pyppeteer用法总结

tech2026-04-10  2

pypeteer基于asyncio异步模块

基本使用

导入模块

from pyppeteer import launch

实例化一个浏览器

browser = await launch() # 实例化一个浏览器对象 # browser = await launch({'headless':False,'dumpio':True,'ignoreDefaultArgs':['--enable-automation']})

实例化一个页面

page = await self.browser.newPage()

页面打开网站

await page.goto('https://www.baidu.com/',{'timeout':100*1000})

定位页面元素

ele = (await page.xpath('//div[text()="免密码登录"]'))[0] await ele.click() # 点击页面元素

输入框中输入内容

await page.type('input[name=username]',uname,{'delay':100})

获取元素属性值

check = await page.Jeval('img[alt=图形验证码]','node => node.getAttribute("src")')

获取cookies

cookies = await page.evaluate('document.cookie',force_expr=True)

示例

import asyncio from pyppeteer import launch import traceback class Spider: def __init__(self): self.browser = None self.regist_url = 'https://www.zhihu.com/signin?next=%2F' def input_time_random(self): return randint(100, 151) async def registered(self,unames:tuple): if not self.browser: self.browser = await launch({'headless':False,'dumpio':True,'ignoreDefaultArgs':['--enable-automation']}) for uname in unames: try: page = await self.browser.newPage() await page.goto(self.regist_url,{'timeout':100*1000}) # 选择免密码登录 await (await page.xpath('//div[text()="免密码登录"]'))[0].click() await asyncio.sleep(1) # 输入用户名 await page.type('input[name=username]',uname,{'delay': self.input_time_random() - 50}) check = await page.Jeval('img[alt=图形验证码]','node => node.getAttribute("src")') print('check',check) # # check = await page.evaluate('img[alt=图形验证码].getAttribute("src")', force_expr=True) if 'null' not in check: pass # 打码平台 # 点击获取验证码 await (await page.xpath('//button[text()="获取短信验证码"]'))[0].click() await asyncio.sleep(60) cookies = await page.evaluate('document.cookie',force_expr=True) print('cookies',cookies) except: traceback.print_exc() async def run(self): self.browser = await launch({'headless':False,'dumpio':True,'ignoreDefaultArgs':['--enable-automation']}) tasks = [] for i in range(1): tasks.append(asyncio.create_task(self.registered(('XXXXX',)))) await asyncio.wait(tasks) if __name__=="__main__": spider = Spider() asyncio.run(spider.run())
最新回复(0)