1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86
|
''' 请先安装好相应的 Python 模块 pip install requests bs4 lxml '''
import requests from bs4 import BeautifulSoup import lxml import re from glob import glob import os
headers = { 'cookie': '', 'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.116 Safari/537.36', }
def get_pic_list(num): url = 'https://www.pixiv.net/ajax/user/%s/profile/all?lang=zh' % num r = requests.get(url, headers = headers) pic_list = re.findall('"(\d+?)":null', r.text) return pic_list
def get_file_list(): temp = glob('*') for item in temp: if '.txt' in item: temp.remove(item) file_list = [name.split('.')[0] for name in temp] return file_list
def download(num): pic_list = get_pic_list(num) file_list = get_file_list() for pic in pic_list: if pic not in file_list: url = 'https://www.pixiv.net/artworks/%s' % pic r = requests.get(url, headers = headers) result = re.search('"original":"(.+?)"', r.text) if result: download_url = result.group(1) suffix = download_url.rsplit('/', 1)[-1].rsplit('.', 1)[-1] h = { 'accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9', 'accept-encoding': 'gzip, deflate, br', 'accept-language': 'zh-CN,zh;q=0.9', 'cache-control': 'max-age=0', 'dnt': '1', 'if-modified-since': 'Mon, 09 Sep 2019 23:00:01 GMT', 'referer': 'https://www.pixiv.net/artworks/76712185', 'sec-fetch-dest': 'document', 'sec-fetch-mode': 'navigate', 'sec-fetch-site': 'none', 'sec-fetch-user': '?1', 'upgrade-insecure-requests': '1', 'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.116 Safari/537.36', } s = requests.get(download_url, headers = h) with open(pic + '.' + suffix, 'wb') as f: f.write(s.content) print(pic)
def main(): filenames = os.listdir() filenames.remove('pixiv.py') cwd = os.getcwd() for filename in filenames: print('切换到%s' % filename) os.chdir(cwd + '\\' + filename) num = glob("*.txt")[0] num = re.sub('.txt', '', num) download(num)
if __name__ == '__main__': main()
|