pypeteer基于asyncio异步模块
基本使用
导入模块
from pyppeteer
import launch
实例化一个浏览器
browser
= await launch
()
实例化一个页面
page
= await self
.browser
.newPage
()
页面打开网站
await page
.goto
('https://www.baidu.com/',{'timeout':100*1000})
定位页面元素
ele
= (await page
.xpath
('//div[text()="免密码登录"]'))[0]
await ele
.click
()
输入框中输入内容
await page
.type('input[name=username]',uname
,{'delay':100})
获取元素属性值
check
= await page
.Jeval
('img[alt=图形验证码]','node => node.getAttribute("src")')
获取cookies
cookies
= await page
.evaluate
('document.cookie',force_expr
=True)
示例
import asyncio
from pyppeteer
import launch
import traceback
class Spider:
def __init__(self
):
self
.browser
= None
self
.regist_url
= 'https://www.zhihu.com/signin?next=%2F'
def input_time_random(self
):
return randint
(100, 151)
async def registered(self
,unames
:tuple):
if not self
.browser
:
self
.browser
= await launch
({'headless':False,'dumpio':True,'ignoreDefaultArgs':['--enable-automation']})
for uname
in unames
:
try:
page
= await self
.browser
.newPage
()
await page
.goto
(self
.regist_url
,{'timeout':100*1000})
await (await page
.xpath
('//div[text()="免密码登录"]'))[0].click
()
await asyncio
.sleep
(1)
await page
.type('input[name=username]',uname
,{'delay': self
.input_time_random
() - 50})
check
= await page
.Jeval
('img[alt=图形验证码]','node => node.getAttribute("src")')
print('check',check
)
if 'null' not in check
:
pass
await (await page
.xpath
('//button[text()="获取短信验证码"]'))[0].click
()
await asyncio
.sleep
(60)
cookies
= await page
.evaluate
('document.cookie',force_expr
=True)
print('cookies',cookies
)
except:
traceback
.print_exc
()
async def run(self
):
self
.browser
= await launch
({'headless':False,'dumpio':True,'ignoreDefaultArgs':['--enable-automation']})
tasks
= []
for i
in range(1):
tasks
.append
(asyncio
.create_task
(self
.registered
(('XXXXX',))))
await asyncio
.wait
(tasks
)
if __name__
=="__main__":
spider
= Spider
()
asyncio
.run
(spider
.run
())
转载请注明原文地址:https://tech.qufami.com/read-27738.html