100字范文,内容丰富有趣,生活中的好帮手!
100字范文 > selenium实现登录百度(自动识别简单验证码)

selenium实现登录百度(自动识别简单验证码)

时间:2023-02-02 14:16:00

相关推荐

selenium实现登录百度(自动识别简单验证码)

需要做的工作

0、工程结构

1、代码:

①baidu_login.py

1 import re 2 import os 3 import sys 4 import time 5 import random 6 from selenium import webdriver 7 from PIL import Image, ImageEnhance 8 import pytesseract 9 from func import base642str, str2base64 10 11 12 def input_account(account='请传入账号参数', xpath_rule="//input[@id='TANGRAM__PSP_3__userName']"): 13'''模拟输入账号 14:param account: 账号 15:param xpath_rule: 账号输入框的xpath定位规则 16:return: 17''' 18input_box1 = driver.find_element_by_xpath(xpath_rule) 19input_box1.send_keys(account) 20time.sleep(0.5) 21 22 23 def input_pwd(pwd, xpath_rule="//input[@id='TANGRAM__PSP_3__password']"): 24'''模拟输入密码 25:param account: base64后的密码 26:param xpath_rule: 密码输入框的xpath定位规则 27:return: 28''' 29input_box2 = driver.find_element_by_xpath(xpath_rule) 30input_box2.clear() # 清空密码 31input_box2.send_keys(base642str(pwd)) 32time.sleep(0.5) 33 34 35 def input_verify_code(verify_code, xpath_rule="TANGRAM__PSP_3__verifyCode"): 36'''模拟输入验证码 37:param account: base64后的密码 38:param xpath_rule: 密码输入框的xpath定位规则 39:return: 40''' 41driver.find_element_by_id(xpath_rule).send_keys(verify_code.strip()) 42time.sleep(0.5) 43 44 45 def identify_verify_code(rootpath, pic_name="screenImg.png"): 46'''tesseract识别百度验证码 47:param rootpath: 验证码图片保存的文件夹路径 48:param pic_name: 验证码图片保存的文件名 49:return: 识别后的验证码文本 50''' 51# 截图或验证码图片保存地址 52screenImg = os.path.join(rootpath, pic_name) 53# 浏览器页面截屏 54time.sleep(3) 55driver.get_screenshot_as_file(screenImg) 56# 定位验证码位置及大小 57location = driver.find_element_by_id('TANGRAM__PSP_3__verifyCodeImg').location 58size = driver.find_element_by_id('TANGRAM__PSP_3__verifyCodeImg').size 59left = location['x'] 60top = location['y'] 61right = location['x'] + size['width'] 62bottom = location['y'] + size['height'] 63# 从文件读取截图,截取验证码位置再次保存 64img = Image.open(screenImg).crop((left, top, right, bottom)) 65img = img.convert('L') # 转换模式:L | RGB 66img = ImageEnhance.Contrast(img) # 增强对比度 67img = img.enhance(2.0) # 增加饱和度 68img.save(screenImg) 69print("图片验证码以保存:%s" % screenImg) 70# 再次读取识别验证码 71print("开始读取识别图片验证码:%s" % screenImg) 72img = Image.open(screenImg) 73verifycode = pytesseract.image_to_string(img) 74print("识别结果:%s" % verifycode) 75return verifycode 76 77 78 def click_a_link(xpath_rule="//p[@id='TANGRAM__PSP_3__footerULoginBtn']"): 79'''点击一个链接 80:param xpath_rule: 被点击链接的xpath定位规则 81:return: 82''' 83input_box0 = driver.find_element_by_xpath(xpath_rule) 84input_box0.click() 85 86 87 def click_a_id_link(id_rule="TANGRAM__PSP_3__verifyCodeChange"): 88'''点击一个链接 89:param id_rule: 被点击链接的id定位规则 90:return: 91''' 92input_box0 = driver.find_element_by_id(id_rule) 93input_box0.click() 94 95 96 def is_need_verify_code(): 97'''判断是否需要验证码 98:return: 需要验证码返回True,否则False 99'''100imgsrc = driver.find_element_by_id("TANGRAM__PSP_3__verifyCodeImg").get_attribute('src')101if re.match(r'/cgi-bin/genimage.*', imgsrc):102 return True103else:104 return False105 106 107 def get_id_node_text(id_rule="TANGRAM__PSP_3__error"):108'''获取id节点提示信息109:param id_rule:id节点的id匹配规则(id属性的值)110:return:该id节点中的文本信息111'''112one_node = driver.find_element_by_id(id_rule)113text_info = one_node.text114return text_info115 116 117 def is_login_success():118'''判断登录是否成功119:return: 登录成功返回True,否则False120'''121current_title = driver.title.strip()122if current_title.startswith("登录"):123 return False124else:125 return True126 127 128 def deal_much_pop_up_window():129'''处理手机验证码认证反复弹窗130:return:131'''132i = 0133while True: # 处理手机验证码认证反复弹窗134 try:135 one3_click = driver.find_element_by_id("TANGRAM__%s__header_a" % (22 + i)) # 22+i在应对弹窗的关闭按钮id名称发生变化。136 except Exception as e:137 print("无需手机验证码")138 break139 else:140 print("第 %s 次弹出安全验证,要求获取手机验证码" % (i + 1))141 time.sleep(0.5)142 print("1s后自动选择无需手机验证码")143 time.sleep(1)144 one3_click.click()145 print("1s后自动点击登陆")146 time.sleep(1)147 click_a_link(xpath_rule="//input[@id='TANGRAM__PSP_3__submit']") # 点击登录,提交表单148 time.sleep(2)149 # 判断是否成功登陆150 current_title = driver.title.strip()151 if current_title.startswith("登录"):152 print('333-登陆失败...')153 time.sleep(0.5)154 print('333-2秒后自动重试...')155 i = i + 1156 time.sleep(2)157 continue158 else:159 print("打印标题")160 print(driver.title)161 print('333-登录成功...')162 sys.exit() # 程序终止163 164 165 def deal_a_pop_up_window(xpath_rule="//input[@id='TANGRAM__PSP_27__rebindGuideCancel']"):166'''处理一次弹窗167:param xpath_rule:处理弹窗的按钮/链接的xpath匹配规则168:return:169'''170# 判断是否需要手机号绑定确认171try:172 # 绑定手机号确认173 one_click = driver.find_element_by_xpath(xpath_rule)174except Exception as e:175 print("无需绑定手机号确认")176else:177 print("弹出了绑定手机号确认,1s后自动选择不需要")178 time.sleep(1)179 one_click.click()180 181 182 if __name__ == '__main__':183# 将mm隐匿化184# print(str2base64("123456"))185# str1_base64="MTIzNDU2"186# print(base642str(str1_base64))187# sys.exit()188 189# 账号和密码准备190account = random.randint(0, 1000000) # 随机数字作为账号191pwd = "MTIzNDU2"192print("account: %s" % account)193# 最大登录次数194max_login = 16195# 当前目录设置为根路径196ROOT_PATH = os.getcwd()197print('000-正在启用selenium...')198# 调用环境变量指定的PhantomJS浏览器创建浏览器对象199chromedriver_exe_path = os.path.join(ROOT_PATH, "chromedriver.exe")200driver = webdriver.Chrome(chromedriver_exe_path)201print('000-启用OK')202 203# 请求登录页面204url = '/v2/?login'205print('111-selenium正在请求页面:%s' % url)206driver.get(url) # get方法请求页面,获取响应207print('111-请求OK')208 209print("打印标题")210print(driver.title)211 212# 点击账号和密码登录213click_a_link()214 215print('222-selenium正在填写表单...')216time.sleep(1)217# 第一次尝试登录218 219# 模拟填写账号220input_account(account)221 222# 模拟填写密码223input_pwd(pwd)224 225# 判断是否需要验证码226is_need = is_need_verify_code()227if is_need: # 需要验证码228 print("需要验证码")229 # 自动识别和模拟填写验证码230 code = identify_verify_code(rootpath=ROOT_PATH, pic_name="screenImg.png") # 自动识别验证码231 input_verify_code(code) # 模拟填写验证码232else: # 不需要验证码233 print("不需要验证码")234 print('222-填写表单OK')235 time.sleep(1)236 237print('333-selenium提交表单...')238click_a_link(xpath_rule="//input[@id='TANGRAM__PSP_3__submit']") # 点击登录,提交表单239print("第 %s 次尝试登录" % 1)240time.sleep(3)241 242# 处理反复弹窗(手机验证码):点击关闭按钮243deal_much_pop_up_window()244# 处理一次弹窗(绑定手机号确认):点击不需要修改245deal_a_pop_up_window()246 247for login_i in range(max_login - 1):248 # 判断是否登录成249 is_logined = is_login_success()250 if is_logined:251 print("登录成功")252 break253 else:254 print("第 %s 次登录失败,正在尝试重新登录..." % (login_i + 1))255 # 第二次尝试登录256 print("=>第 %s 次尝试登录" % (login_i + 2))257 error_info = get_id_node_text()258 if ("帐号或密码错误" in error_info) or ("用户名或密码有误"): # 第二次尝试登录2.1259 print("正在尝试重新输入密码...")260 # 模拟填写密码261 input_pwd(pwd)262 # 判断是否需要验证码263 is_need = is_need_verify_code()264 if is_need:265 print("需要验证码")266 # 点击更新验证码267 click_a_id_link()268 # 自动识别和模拟填写验证码269 code = identify_verify_code(rootpath=ROOT_PATH, pic_name="screenImg.png") # 自动识别验证码270 input_verify_code(code) # 模拟填写验证码271 print("提示:验证码错误,js会使得提交无效")272 else:273 print("不需要验证码")274 275 elif ("验证码" in error_info): # 第二次尝试登录2.2276 print("正在尝试重新输入了验证码和密码...")277 time.sleep(1)278 # 模拟填写密码279 input_pwd(pwd)280 # 点击更新验证码281 click_a_id_link()282 # 自动识别和模拟填写验证码283 code = identify_verify_code(rootpath=ROOT_PATH, pic_name="screenImg.png") # 自动识别验证码284 input_verify_code(code) # 模拟填写验证码285 print("提示:验证码错误,js会使得提交无效")286 time.sleep(2)287 288 else:289 print("其他未知异常:登录失败")290 sys.exit() # 程序终止291 292 print('222-填写表单OK')293 time.sleep(1)294 295 print('333-selenium提交表单...')296 click_a_link(xpath_rule="//input[@id='TANGRAM__PSP_3__submit']") # 点击登录,提交表单297 time.sleep(3)298 299# 打印标题300print("打印标题")301print(driver.title)302time.sleep(1)303 304# 判断是否成功登陆305is_logined = is_login_success()306if is_logined:307 print('333-登录成功...')308else:309 print('333-登陆失败...')

View Code

②func.py

import base64import timedef timestamp2datems(timestamp):'''时间戳转为日期字串,精确到ms。单位s:param timestamp:时间戳:return:日期字串'''local_time = time.localtime(timestamp)# data_head = time.strftime("%Y-%m-%d %H:%M:%S", local_time)data_head = time.strftime("%Y-%m-%d_%H-%M-%S", local_time)data_secs = (timestamp - int(timestamp)) * 1000dt_ms = "%s.%03d" % (data_head, data_secs)# print(dt_ms)return dt_msdef bit2humanView(bit_val):'''文件大小bit转为人类易读大小bit、KB、MB:param bit_val:字节数值:return:人类易读大小和单位'''is2kb = int(bit_val / 1042) # 转换为kb取整is2mb = int(bit_val / 1024 / 1024) # 转为mb取整is2gb = int(bit_val / 1024 / 1024 / 1024) # 转为gb取整if is2gb is not 0:gb_val = bit_val / 1024 / 1024 / 1024return "%.2f GB" % gb_valif is2mb is not 0:mb_val = bit_val / 1024 / 1024return "%.2f MB" % mb_valif is2kb is not 0:kb_val = bit_val / 1024return "%.2f KB" % kb_valreturn "%s bit" % bit_valdef str2base64(pwd_decode_str):'''明文str转为base64密文:param pwd_decode_str: 明文str:return: base64密文'''base64_encrypt = base64.b64encode(pwd_decode_str.encode('utf-8'))pwd_encode_str = str(base64_encrypt, 'utf-8')return pwd_encode_strdef base642str(pwd_encode_str):'''base64密文转为明文str:param pwd_encode_str: base64密文:return: 明文str'''base64_decrypt = base64.b64decode(pwd_encode_str.encode('utf-8'))pwd_decode_str = str(base64_decrypt, 'utf-8')return pwd_decode_str

③依赖包:requirements.txt

Pillow==6.0.0pytesseract==0.2.6selenium==3.141.0

2、selenium的相关支持:

①google浏览器;

②相应版本的浏览器驱动chromedriver.exe

3、PIL安装:pip install pillow

4、图像识别工具:

①windows安装tesseract.exe

②python安装pytesseract

本内容不代表本网观点和政治立场,如有侵犯你的权益请联系我们处理。
网友评论
网友评论仅供其表达个人看法,并不表明网站立场。