try: with open('url.txt', 'r') as f: # 该文件放入爬取到的图片链接 urllist = f.read().splitlines() except: urllist = [] print(urllist)
def GetPic(): api = 'https://api.vvhan.com/api/bing?type=sj' # 将要爬取的站点 headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/92.0.4515.131 Safari/537.36'} url = get(api, headers=headers, timeout=2).request.url if url: return url
def init(): i = 0 ii = 0 while i < 30 and ii < 3000: # 连续30张图都重复或者本次采集到3000张图就停止 url = GetPic() if url not in urllist: print(url) urllist.append(url) with open('url.txt', 'a+') as f: f.write(url + '\n') ii = ii + 1 i = 0 else: i = i + 1 print("重复次数" + str(i) + ",重复的: " + url) time.sleep(1)