代碼如下:
fromcreepyimportCrawler
fromBeautifulSoupimportBeautifulSoup
importurllib2
importjson
classMyCrawler(Crawler):
defprocess_document(self,doc):
ifdoc.status==200:
print‘[%d]%s’%(doc.status,doc.url)
try:
soup=BeautifulSoup(doc.text.decode(‘gb18030’).encode(‘utf-8’))
exceptExceptionase:
printe
soup=BeautifulSoup(doc.text)
printsoup.find(id=”product-intro”).div.h1.text
url_id=urllib2.unquote(doc.url).decode(‘utf8’).split(‘/’)[-1].split(‘.’)[0]
f=urllib2.urlopen(‘?skuid=J_’+url_id,timeout=5)
price=json.loads(f.read())
f.close()
printprice[0][‘p’]
else:
pass
crawler=MyCrawler()
crawler.set_follow_mode(Crawler.F_SAME_HOST)
crawler.set_concurrency_level(16)
crawler.add_url_filter(‘\.(jpg|jpeg|gif|png|js|css|swf)$’)
crawler.crawl(‘’)

轉載請注明:seo-網站優化-網站建設?python抓取京東價格分析京東商品價格走勢

文章地址:http://www.meyanliao.com/article/online/13193.html