发布于2020-03-18 12:02 阅读(1448) 评论(0) 点赞(29) 收藏(4)
import requests,re,os,time
from lxml import etree
class TBZT():
def __init__(self):
self.headers=self.cookiE()
try:
url=str(input('请输入淘宝的网址:'))
self.req=requests.get(url,self.headers)
except Exception as e:
print('网址错误',e)
time.sleep(10)
def cookiE(self):
cookie=str(input('请输入淘宝的cookie:'))
try:
if cookie == '':
# url1="https://item.taobao.com/item.htm?spm=a230r.1.999.51.3de1523cBN9zex&id=606684613235&ns=1#detail"
initiall_headers={
'cookie':'你的cookie',
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.44 Safari/537.36'
}
print('默认cookie')
return initiall_headers
time.sleep(1)
else:
#url1='https://detail.tmall.com/item.htm?spm=a230r.1.999.159.599b523cjYsg4e&id=593911750924&ns=1
# url1="https://item.taobao.com/item.htm?spm=a230r.1.999.51.3de1523cBN9zex&id=606684613235&ns=1#detail"
initialls_headers={
'cookie': cookie,
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.44 Safari/537.36'
}
print('自选cookie')
time.sleep(1)
return initialls_headers
except Exception as e:
print('cookie错误',e)
time.sleep(10)
def desc_Url(self):
req=self.req.text
text=etree.HTML(req)
# documentName=text.xpath('//*[@id="J_Title"]/h3/text()')[0].strip()
documentName=text.xpath('//*[@id="J_Title"]/h3/@data-title')[0]
if not os.path.exists(documentName):
os.mkdir(documentName)
rule=re.compile(r"location.protocol==='http:' \? '(.*?)' :")
desc=re.findall(rule,req)[0]
descUrl='https:{}'.format(desc)
descUrls=requests.get(descUrl,headers=self.headers).text
rule=re.compile(r'src="(.*?)" ')
imgUrls=re.findall(rule,descUrls)
if len(imgUrls)==0:
print('noSrc!')
time.sleep(2)
exit(1)
for Index,imgurl in enumerate(imgUrls):
self.download_Img(str(Index),imgurl,documentName)
print(Index,imgurl)
def download_Img(self,Index,imgurl,doucumentName):
try:
imgUrl=requests.get(imgurl)
with open (doucumentName+'\\'+Index+'.jpg','wb') as f:
f.write(imgUrl.content)
except Exception as e:
print('downloadError:',e)
time.sleep(3)
class TMZT():
def init(self):
self.headers=self.cookiE()
try:
url=str(input(‘请输入天猫的网址:’))
self.req=requests.get(url,self.headers)
except Exception as e:
print(‘网址错误’,e)
time.sleep(10)
def cookiE(self):
cookie=str(input('请输入天猫的cookie:'))
try:
if cookie == '':
# url='https://detail.tmall.com/item.htm?spm=a230r.1.999.191.599b523cjYsg4e&id=589451048907&ns=1'
# url1="https://item.taobao.com/item.htm?spm=a230r.1.999.51.3de1523cBN9zex&id=606684613235&ns=1#detail"
initiall_headers={
'cookie':'你的cookie',
# 'user-agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.25 Safari/537.36 Core/1.70.3741.400 QQBrowser/10.5.3863.400'
# 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.44 Safari/537.36'
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/64.0.3282.140 Safari/537.36 Edge/18.17763',
}
print('默认cookie')
return initiall_headers
time.sleep(1)
else:
#url1='https://detail.tmall.com/item.htm?spm=a230r.1.999.159.599b523cjYsg4e&id=593911750924&ns=1
# url1="https://item.taobao.com/item.htm?spm=a230r.1.999.51.3de1523cBN9zex&id=606684613235&ns=1#detail"
initialls_headers={
'cookie': cookie,
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.44 Safari/537.36'
}
print('自选cookie')
time.sleep(1)
return initialls_headers
except Exception as e:
print('cookie错误',e)
time.sleep(10)
def desc_Url(self):
req=self.req.text
text=etree.HTML(req)
documentName=text.xpath('//*[@id="J_DetailMeta"]/div[1]/div[1]/div/div[1]/h1/text()')[0].strip()
if not os.path.exists(documentName):
os.mkdir(documentName)
rule=re.compile(r'"descUrl":"(.*?)",')
# rule=re.compile(r"location.protocol==='http:' \? '(.*?)' :")
desc=re.findall(rule,req)[0]
descUrl='https:{}'.format(desc)
descUrls=requests.get(descUrl,headers=self.headers).text
rule=re.compile(r'img src="(.*?)" ')
imgUrls=re.findall(rule,descUrls)
if len(imgUrls)==0:
print('noSrc!')
time.sleep(2)
exit(1)
for Index,imgurl in enumerate(imgUrls):
self.download_Img(str(Index),imgurl,documentName)
print(Index,imgurl)
def download_Img(self,Index,imgurl,doucumentName):
try:
imgUrl=requests.get(imgurl)
with open (doucumentName+'\\'+Index+'.jpg','wb') as f:
f.write(imgUrl.content)
except Exception as e:
print('downloadError:',e)
time.sleep(3)
if __name__ == '__main__':
model=input('淘宝找图输入"tb或a",天猫找图输入"tm或b": ')
if model == 'tb':
print('淘宝模式')
tbzt=TBZT()
tbzt.desc_Url()
elif model == 'a':
print('淘宝模式')
tbzt=TBZT()
tbzt.desc_Url()
elif model == 'tm':
print('天猫模式')
tmzt=TMZT()
tmzt.desc_Url()
elif model == 'b':
print('天猫模式')
tmzt=TMZT()
tmzt.desc_Url()
else:
print('输入错误,关闭')
time.sleep(5)
exit(1)
作者:战天
链接:https://www.pythonheidong.com/blog/article/265485/6659c2cd42787fa4f267/
来源:python黑洞网
任何形式的转载都请注明出处,如有侵权 一经发现 必将追究其法律责任
昵称:
评论内容:(最多支持255个字符)
---无人问津也好,技不如人也罢,你都要试着安静下来,去做自己该做的事,而不是让内心的烦躁、焦虑,坏掉你本来就不多的热情和定力
Copyright © 2018-2021 python黑洞网 All Rights Reserved 版权所有,并保留所有权利。 京ICP备18063182号-1
投诉与举报,广告合作请联系vgs_info@163.com或QQ3083709327
免责声明:网站文章均由用户上传,仅供读者学习交流使用,禁止用做商业用途。若文章涉及色情,反动,侵权等违法信息,请向我们举报,一经核实我们会立即删除!