代碼如下:
#-*-coding:UTF-8-*-
”’
Createdon2021-12-5

@author:good-temper
”’
importurllib2
importbs4
importtime
defgetPage(urlStr):
”’
獲取頁面內容
”’
content=urllib2.urlopen(urlStr).read()
returncontent
defgetNextPageUrl(currPageNum):
#頁碼-1-1-72-4137-33.html
url=u’‘+str(currPageNum+1)+’-1-1-72-4137-33.html’
#是否有下一頁
content=getPage(url);
soup=bs4.BeautifulSoup(content)
list=soup.findAll(‘span’,{‘class’:’next-disabled’});
if(len(list)==0):
returnurl
return”
defanalyzeList():
pageNum=0
list=[]
url=getNextPageUrl(pageNum)
whileurl!=”:
soup=bs4.BeautifulSoup(getPage(url))
pagelist=soup.findAll(‘div’,{‘class’:’p-name’})
foreleminpagelist:
soup1=bs4.BeautifulSoup(str(elem))
list.append(soup1.find(‘a’)[‘href’])
pageNum=pageNum+1
printpageNum
url=getNextPageUrl(pageNum)
returnlist

defanalyzeContent(url):
return”
defwriteToFile(list,path):
f=open(path,‘a’)
foreleminlist:
f.write(elem+’
’)
f.close()
if__name__==‘__main__’:
list=analyzeList()

print‘共抓取’+str(len(list))+’條
’
writeToFile(list,u’E:\\jd_phone_list.dat’);
轉載請注明:seo-網站優化-網站建設?python抓取京東商城手機列表url實例代碼
文章地址:http://www.meyanliao.com/article/online/12705.html