『姵来晓晓看娱乐』并存入mysql数据库,超详细,python3快速爬取房源信息( 二 )


『姵来晓晓看娱乐』并存入mysql数据库,超详细,python3快速爬取房源信息
文章图片
以下为完整代码:
#fromseleniumimportwebdriverimportrequestsimportrefrombs4importBeautifulSoupimportpymysql#importtime#chrome_driver=r''C:Users秦QQAppDataLocalProgramsPythonPython38-32Libsite-packagesselenium-3.141.0-py3.8.eggseleniumwebdriverchromechromedriver.exe''#brower=webdriver.Chrome(executable_path=chrome_driver)#pool_url='http://localhost:5555/random'page=1whilepage<11:#brower.get(''https://tianjin.anjuke.com/sale/p%d/#filtersort''%page)#time.sleep(1)print(''这是第''+str(page)+''页'')#proxy=requests.get(pool_url).text#proxies={#'http':'http://'+proxy#}ifpage==1:url='https://tianjin.anjuke.com/sale/'headers={'referer':'https://tianjin.anjuke.com/sale/','user-agent':'Mozilla/5.0(WindowsNT10.0;Win64;x64)AppleWebKit/537.36(KHTML,likeGecko)Chrome/79.0.3945.130Safari/537.36',}else:url='https://tianjin.anjuke.com/sale/p%d/#filtersort'%pageheaders={'referer':'https://tianjin.anjuke.com/sale/p%d/#filtersort'%page,'user-agent':'Mozilla/5.0(WindowsNT10.0;Win64;x64)AppleWebKit/537.36(KHTML,likeGecko)Chrome/79.0.3945.130Safari/537.36',}#html=requests.get(url,allow_redirects=False,headers=headers,proxies=proxies)html=requests.get(url,headers=headers)soup=BeautifulSoup(html.content,'lxml')#图片地址myjpg=r''jpg=re.findall(myjpg,html.text)#描述mytail=r'(.*?)'mytotal=re.findall(totalprice,html.text)#单价simpleprice=r'(.*?)'simple=re.findall(simpleprice,html.text)db=pymysql.connect(''localhost'',''root'','''',''anjuke'')conn=db.cursor()print(len(jpg))foriinrange(0,len(tail)):jpgs=jpg[i]scripts=tail[i]localroom=my[i][0]localarea=my[i][1]localhigh=my[i][2]localtimes=my[i][3]local=my[i][4]total=mytotal[i]oneprice=simple[i]sql=''insertintoshanghai_adminvalue('%s','%s','%s','%s','%s','%s','%s','%s','%s')''%(jpgs,scripts,local,total,oneprice,localroom,localarea,localhigh,localtimes)conn.execute(sql)db.commit()db.close()#button=brower.find_element_by_class_name('aNxt')#button.click()#time.sleep(1)page=page+1#brower.close()