100字范文,内容丰富有趣,生活中的好帮手!
100字范文 > python爬虫学习(1)__抓取煎蛋图片

python爬虫学习(1)__抓取煎蛋图片

时间:2021-05-29 10:33:37

相关推荐

python爬虫学习(1)__抓取煎蛋图片

#coding=utf-8#python_demo 爬取煎蛋妹子图在本地文件夹import requestsimport threadingimport timeimport osfrom bs4 import BeautifulSoup#伪造头文件headers = {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/51.0.2704.103 Safari/537.36','Accept-Encoding': 'gzip','Cookie': '1024679722=aada4mZxRMxqvInd7D6PSgq%2FIkpGFeGlZWAH1gqP8Q; __auc=57bffd35154a91de3cd5d3b1ddb; 1024679722=ebeaLZUFikSR1OE6lm5MJYJSV0V1DbcooxQr0CHu; jdna=596e6fb28c1bb47f949e65e1ae03f7f5#1467948344088; Hm_lvt_fd93b7fb546adcfbcf80c4fc2b54da2c=1467001661,1467189261,1467685014,1467857178; Hm_lpvt_fd93b7fb546adcfbcf80c4fc2b54da2c=1467948345; _ga=GA1.2.1739476572.1438849462; _gat=1'}def saveImgs(*allUrl):if not os.path.exists('/home/zhanyunwu/jiandanpic'):os.mkdir('/home/zhanyunwu/jiandanpic') #在本地新建文件夹print allUrlif len(allUrl)!=0:print '当前页面有', len(allUrl), '张图片即将下载'for l in allUrl:filename='/home/zhanyunwu/jiandanpic/'+parseName(l)saveImg(l,filename)time.sleep(1)else:print '当前页面无图片下载'def saveImg(url,filename):print '当前图片url:',str(url),'当前图片名称',filename# u=urllib2.urlopen(url)# data=u.read()reponse=requests.get(str(url),headers=headers)image=reponse.content# f=open(filename,'wb')with open(filename,'wb') as f:f.write(image)def parseName(url):u=str(url).split('.')filename=str(url)[30:55]+'.'+u[-1]return filename#getallImgUrldef getAllImgUrl(url):allurl = []req=requests.get(url,headers=headers)# print req.status_codeif req.status_code !=200:return allurlsoup=BeautifulSoup(req.content,"lxml")links=soup.select('mentlist img')print linksfor l in links:allurl.append(l.attrs.get('src'))return allurl#多线程爬取def crawler(n,m):for l in range(n,m):url = '/ooxx/page-' + str(l) + '#comments'u=getAllImgUrl(url)saveImgs(*u)c1=threading.Thread(target=crawler,args=(1850,1900))c2=threading.Thread(target=crawler,args=(1950,2000))c3=threading.Thread(target=crawler,args=(2001,2064))c1.start()c2.start()c3.start()c1.join()c2.join()c3.join()print 'success'

本内容不代表本网观点和政治立场,如有侵犯你的权益请联系我们处理。
网友评论
网友评论仅供其表达个人看法,并不表明网站立场。