python登录并爬取淘宝信息代码示例
本文主要分享关于python登录并爬取淘宝信息的相关代码,还是挺不错的,大家可以了解下。
#!/usr/bin/envpython #-*-coding:utf-8-*- fromseleniumimportwebdriver importtime importdatetime importtraceback importlogging importos fromselenium.webdriver.common.action_chainsimportActionChains importcodecs #登录 deflogin(driver,site): driver.get(site) time.sleep(5) try: #点击请登录 driver.find_element_by_class_name("h").click() time.sleep(5) #输入账号和密码 driver.find_element_by_id("TPL_username_1").send_keys(u"yourusername") time.sleep(5) #printdriver.find_element_by_id("TPL_username_1") driver.find_element_by_id("TPL_password_1").send_keys(u"yourpsd") time.sleep(5) #点击登录 driver.find_element_by_id("J_SubmitStatic").click() time.sleep(30) except: printu"failure" defcrawlmarket(driver,filename,site): #driver=webdriver.Firefox() driver.get(site) driver.maximize_window() time.sleep(10) driver.refresh() time.sleep(10) test=driver.find_elements_by_xpath("//a[@class='J_ItemLink']") #是否获取到消息,若无则登录 iflen(test)==0: login(driver,site) time.sleep(30) resultstrall="" resultstr="" strinfo="" foriinrange(0,len(test),1): iftest[i].text!="": resultstr=test[i].text.strip()+'\n' printresultstr resultstrall+=resultstr #是否成功抓取 ifresultstrall!="": f=codecs.open(filename,'w','utf-8') f.write(resultstrall) f.close() #若没有成功抓取将网站写入error else: strinfo=filename+","+site printstrinfo ferror=codecs.open("error.txt",'a','utf-8') ferror.write(strinfo) ferror.close() driver.quit() defcrawltaobaosousuo(driver,filename,site): #driver=webdriver.Firefox() driver.get(site) driver.maximize_window() time.sleep(10) driver.get(site) time.sleep(30) driver.refresh() test=driver.find_elements_by_xpath("//a[@class='J_ClickStat']") resultstrall="" resultstr="" strinfo="" foriinrange(0,len(test),1): iftest[i].text!="": resultstr=test[i].text.strip()+'\n' printresultstr resultstrall+=resultstr ifresultstrall!="": f=codecs.open(filename,'w','utf-8') f.write(resultstrall) f.close() else: strinfo=filename+","+site printstrinfo ferror=codecs.open("error.txt",'a','utf-8') ferror.write(strinfo) ferror.close() driver.quit() defjiexi(driver): f=open("1.txt","r") forlineinf: time.sleep(60) info=line.split(",") href=info[1] filename=info[0].decode("utf-8") printfilename if"markets"inhref: crawlmarket(driver,filename,href) else: crawltaobaosousuo(driver,filename,href) if__name__=='__main__': driver=webdriver.Firefox() jiexi(driver)
小结
有改进策略一起探讨,可以抓取淘宝部分网页内容,根据自己的需求改改吧,会被风控。个人觉得不登录的效果更好。
以上就是本文关于python登录并爬取淘宝信息代码示例的全部内容,希望对大家有所帮助。感兴趣的朋友可以继续参阅本站其他相关专题。如有不足之处,欢迎留言指出。感谢朋友们对本站的支持!