python爬虫爬取美女图片

直接上代码了

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
#coding=utf-8
import urllib
import re
import os
import time
import threading
def getHtml(url):
page = urllib.urlopen(url)
html = page.read()
return html
def getImgUrl(html,src):
srcre = re.compile(src)
srclist = re.findall(srcre,html)
return srclist
def getImgPage(html):
url = r'http://.*\.html'
urlre = re.compile(url)
urllist = re.findall(urlre,html)
return urllist
def downloadImg(url):
html = getHtml(url)
src = r'rel=.*\.jpg'
srclist = getImgUrl(html,src)
srclist2 = []
for srcs in srclist:
temp = srcs.replace("'",'"')
temp = temp.split('"')
srclist2.append(temp[1])
for srcurl in srclist2:
imgName = srcurl.replace(':','_')
imgName = imgName.replace('/','_')
print 'download pic %s .........' % srcurl
if os.path.isfile('pic/%s' % imgName):
continue
urllib.urlretrieve(srcurl,'pic/%s' % imgName)
class MyThread(threading.Thread):
def __init__(self,urllist):
threading.Thread.__init__(self)
self.urllist = urllist
def run(self):
for u in self.urllist:
downloadImg(u)
def main():
url = 'http://www.6188.net/'
html = getHtml(url)
urllist = getImgPage(html)
urllist2 = []
length = len(urllist) / 7
for i in range(1,8):
temp = urllist[(i-1)*length:i*length]
urllist2.append(temp)
for u in urllist2:
t = MyThread(u)
t.start()
main()

热评文章