【下拉加載】百度搜索圖片采集

2024年2月6日 21点热度 0人点赞

記得要加上headers,否則會報錯,被認為是爬蟲。

import  requests
import  json
import  random
import  string
import os
word=input("請輸入關鍵詞:")
print(word)
max_value=100
current_value=0
i=0
headers={'User-Agent':"xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx"}
while current_value<max_value:
    url=f"https://image.baidu.com/search/acjson?tn=resultjson_com&logid=10080522294965570219&ipn=rj&ct=201326592&is=&fp=result&fr=&word={word}&queryWord=外星人&cl=2&lm=-1&ie=utf-8&oe=utf-8&adpicid=&st=-1&z=&ic=&hd=&latest=©right=&s=&se=&tab=&width=&height=&face=0&istype=2&qc=&nc=1&expermode=&nojc=&isAsync=&pn={current_value}&rn=30&gsm=1e&1706710087306="
    result=requests.get(url,headers=headers)
    json_str=result.content
    json_doc=str(json_str,'utf-8')
    imageResult=json.loads(json_doc)
    print(imageResult)
    data=imageResult['data']
    for record in data:
        url=record.get('middleURL')
        if url!=None:
            print('正在下載圖片:',url)
            r=requests.get(url)
            with open(fr"C:\xx\xxx\xxxxx\xxxxx\百度搜索圖片\{i}.jpg",'wb') as f:
                f.write(r.content)
                f.close()
                i =1
    current_value =30