1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
from requests import get
import time

try:
with open('url.txt', 'r') as f: # 该文件放入爬取到的图片链接
urllist = f.read().splitlines()
except:
urllist = []
print(urllist)


def GetPic():
api = 'https://api.vvhan.com/api/bing?type=sj' # 将要爬取的站点
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/92.0.4515.131 Safari/537.36'}
url = get(api, headers=headers, timeout=2).request.url
if url:
return url


def init():
i = 0
ii = 0
while i < 30 and ii < 3000: # 连续30张图都重复或者本次采集到3000张图就停止
url = GetPic()
if url not in urllist:
print(url)
urllist.append(url)
with open('url.txt', 'a+') as f:
f.write(url + '\n')
ii = ii + 1
i = 0
else:
i = i + 1
print("重复次数" + str(i) + ",重复的: " + url)
time.sleep(1)


init()