最近有些爬虫需求,需要一直爬取阿里云相关的数据,使用了selenium+python的架构,结果却存在时不时被阿里云识别为爬虫,无奈之下,多出寻求解决方案:
options = webdriver.ChromeOptions()
mobile_emulation = {"deviceName": "iPhone 6"}
options.add_experimental_option("excludeSwitches", ["enable-automation"]) //取消掉 您的浏览器 正在被控制的提醒
options.add_experimental_option('useAutomationExtension', False)
options.add_experimental_option("mobileEmulation", mobile_emulation) //伪装成移动端
driver = webdriver.Chrome(options=options,
executable_path=chromedriver.exe')
//最关键的一步, 设置 window.navigator.webdriver = undefined
driver.execute_cdp_cmd("Page.addScriptToEvaluateOnNewDocument", {
"source": """
Object.defineProperty(navigator, 'webdriver', {
get: () => undefined
})
"""
})
driver.get(url)