100字范文,内容丰富有趣,生活中的好帮手!
100字范文 > Python爬虫抓取携程网机票信息并发邮件通知

Python爬虫抓取携程网机票信息并发邮件通知

时间:2022-05-29 21:14:33

相关推荐

Python爬虫抓取携程网机票信息并发邮件通知

背景:

由于要买机票,所以一直进行搜索,爬虫可以帮我解决这个问题;

用Python抓取携程网机票信息 过程纪实(上篇)

解释的超级详细。

于是通过这一过程,基本了解了一些;

查询 上海 到 西安 4.29~05.02的机票:

#coding:utf-8import urllib2from lxml import etreeimport jsonimport randomimport sysreload(sys)sys.setdefaultencoding('utf8')def get_json2(date,rk,CK,r):'''根据构造出的url获取到航班数据'''url= "/domesticsearch/search/SearchFirstRouteFlights?DCity1=SHA&ACity1=SIA&SearchType=S&DDate1=%s&IsNearAirportRecommond=0&rk=%s&CK=%s&r=%s"%(date,rk,CK,r)headers={'Host':"",'User-Agent':"Mozilla/5.0 (Windows NT 10.0; WOW64; rv:45.0) Gecko/0101 Firefox/45.0",'Referer':"/booking/hrb-sha-day-1.html?ddate1=-04-29"}headers['Referer']="/booking/hrb-sha-day-1.html?ddate1=%s"%datereq=urllib2.Request(url,headers=headers)res=urllib2.urlopen(req)content=res.read()dict_content=json.loads(content,encoding="gb2312")length = len(dict_content['fis']) # print lengthi = 0for i in range(length):if ((dict_content['fis'][i][u'lp']) < 600 ):print (dict_content['fis'][i][u'lp']),print (dict_content['fis'][i][u'dt']),print (dict_content['fis'][i][u'at']) #print (dict_content['fis'][i][u'dpbn']) def get_parameter(date):'''获取重要的参数date:日期,格式示例:-05-13'''url='/booking/hrb-sha-day-1.html?ddate1=%s'%dateres=urllib2.urlopen(url).read()tree=etree.HTML(res)pp=tree.xpath('''//body/script[1]/text()''')[0].split()CK_original=pp[3][-34:-2]CK=CK_original[0:5]+CK_original[13]+CK_original[5:13]+CK_original[14:]rk=pp[-1][18:24]num=random.random()*10num_str="%.15f"%numrk=num_str+rkr=pp[-1][27:len(pp[-1])-3]return rk,CK,rif __name__=='__main__':dates=['-04-29','-04-30','-05-01','-05-02']for date in dates:rk,CK,r=get_parameter(date)get_json2(date,rk,CK,r)print "-----"

发送邮件程序(我找到出处就放上来):

# -*- coding: utf-8 -*-from email import encodersfrom email.header import Headerfrom email.mime.text import MIMETextfrom email.utils import parseaddr, formataddrimport smtplibdef _format_addr(s):name, addr = parseaddr(s)return formataddr(( \Header(name, 'utf-8').encode(), \addr.encode('utf-8') if isinstance(addr, unicode) else addr))from_addr = raw_input('From: ')password = raw_input('Password: ')to_addr = raw_input('To: ')smtp_server = raw_input('SMTP server: ')msg = MIMEText('Not just fly fight...', 'plain', 'utf-8')msg['From'] = _format_addr(u'Air <%s>' % from_addr)msg['To'] = _format_addr(u'126.Air <%s>' % to_addr)msg['Subject'] = Header(u'flight……', 'utf-8').encode()server = smtplib.SMTP(smtp_server, 25)server.set_debuglevel(1) # 正式用改为0就好啦server.login(from_addr, password)server.sendmail(from_addr, [to_addr], msg.as_string())server.quit()

将查询信息和发送邮件的程序整合起来,大概就是这样,

#!/usr/bin/python# -*- coding: utf-8 -*-import urllib2from lxml import etreeimport jsonimport randomfrom email import encodersfrom email.header import Headerfrom email.mime.text import MIMETextfrom email.utils import parseaddr, formataddrimport smtplibimport sysreload(sys)sys.setdefaultencoding('utf8')from_addr = "****@" #raw_input('From: ')password = "******" #raw_input('Password: ')to_addr = "********@" #raw_input('To: ')smtp_server = ""#raw_input('SMTP server: ')def _format_addr(s):name, addr = parseaddr(s)return formataddr(( \Header(name, 'utf-8').encode(), \addr.encode('utf-8') if isinstance(addr, unicode) else addr))def get_json2(date,rk,CK,r):'''根据构造出的url获取到航班数据'''url= "/domesticsearch/search/SearchFirstRouteFlights?DCity1=SHA&ACity1=SIA&SearchType=S&DDate1=%s&IsNearAirportRecommond=0&rk=%s&CK=%s&r=%s"%(date,rk,CK,r)headers={'Host':"",'User-Agent':"Mozilla/5.0 (Windows NT 10.0; WOW64; rv:45.0) Gecko/0101 Firefox/45.0",'Referer':"/booking/hrb-sha-day-1.html?ddate1=-04-29"}headers['Referer']="/booking/hrb-sha-day-1.html?ddate1=%s"%datereq=urllib2.Request(url,headers=headers)res=urllib2.urlopen(req)content=res.read()dict_content=json.loads(content,encoding="gb2312")length = len(dict_content['fis']) # print lengthi = 0for i in range(length):if ((dict_content['fis'][i][u'lp']) < 600 ):print (dict_content['fis'][i][u'lp']),print (dict_content['fis'][i][u'dt']),print (dict_content['fis'][i][u'at']),print (dict_content['fis'][i][u'dpbn']) if ((dict_content['fis'][i][u'lp']) <= 450 ):msg = MIMEText(('%r at %s in %s'% ((dict_content['fis'][i][u'lp']),(dict_content['fis'][i][u'dt']),(dict_content['fis'][i][u'dpbn']))),'plain', 'utf-8')msg['From'] = _format_addr(u'Air <%s>' % from_addr)msg['To'] = _format_addr(u'126.Air <%s>' % to_addr)msg['Subject'] = Header(u'flight…%r '%(dict_content['fis'][i][u'lp']), 'utf-8').encode()server = smtplib.SMTP(smtp_server, 25)server.set_debuglevel(0)server.login(from_addr, password)server.sendmail(from_addr, [to_addr], msg.as_string())server.quit()def get_parameter(date):'''获取重要的参数date:日期,格式示例:-05-13'''url='/booking/hrb-sha-day-1.html?ddate1=%s'%dateres=urllib2.urlopen(url).read()tree=etree.HTML(res)pp=tree.xpath('''//body/script[1]/text()''')[0].split()CK_original=pp[3][-34:-2]CK=CK_original[0:5]+CK_original[13]+CK_original[5:13]+CK_original[14:]rk=pp[-1][18:24]num=random.random()*10num_str="%.15f"%numrk=num_str+rkr=pp[-1][27:len(pp[-1])-3]return rk,CK,rif __name__=='__main__':dates=['-04-29','-04-30','-05-01']for date in dates:rk,CK,r=get_parameter(date)get_json2(date,rk,CK,r)print "-----"

然后使用crontab 做一个定时任务,每20mins执行一次;

基本格式 :

*****command分 时 日 月 周 命令

so,

0,20,40 * * * * python ~/test.py

尽管有很多问题,正在学习。

本内容不代表本网观点和政治立场,如有侵犯你的权益请联系我们处理。
网友评论
网友评论仅供其表达个人看法,并不表明网站立场。