[Python] 纯文本查看 复制代码import requests
from pyquery import PyQuery as pq
from multiprocessing.dummy import Pool
from multiprocessing.dummy import Pool as ThreadPool# 线程类
import multiprocessing
import time
start = time.time()
url_k = []
url = 'https://www.biquge.cm/6/6793/'
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.99 Safari/537.36'
}
#html = requests.get(url=url, headers=headers).content.decode('gbk').encode('utf-8')
r=requests.get(url=url,headers=headers)
r.encoding = 'gbk'
doc = pq(r.content)
book_name = doc('#info h1').text()
lis = doc('#list a').items()
for li in lis:
url_g = li.attr('href')
#name_g = li.text()
#print(name_g)
#url1 = 'https://www.biquge.cm' + str(url_g)
url_k.append(url_g )
def nr(a) : #print(url1)
headers1 = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.99 Safari/537.36'
}
h = requests.get(url =f"https://www.biquge.cm{a}",headers = headers1)
h.encoding = 'gbk'
#html1 = requests.get(url=url1, headers=headers1).content.decode('gbk')
doc = pq(h.content)
lis1 = doc('#content br') #div = # class = .
name1 = doc('.bookname h1').text()
print(name1)
return name1,lis1
f = open(f'./{book_name}.txt','w',encoding='utf-8')
def writ(lst):
for i in lst:
f.write(i[0] + '\n' +i[1] + '\n')
f.close()
if __name__ == '__main__':
pool = multiprocessing.Pool(4, maxtasksperchild=1)
#pool = Pool(100) #线程池
lst = pool.map(nr,url_k,chunksize=1) #线程池访问页面
writ(lst)
pool.close() # 关闭进程池,不再接受新的进程
pool.join() #主进程阻塞等待子进程的退出
print(time.time()-start)
print('结束')