功能:

根据图片 urls 和 filenames 采用多进程下载.

实现:

#!/usr/bin/python3
#!--*-- coding:utf-8 --*--
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import io
import os
import urllib3
import multiprocessing

from PIL import Image
from tqdm import tqdm
from urllib3.util import Retry

urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)


def download_image(filenames_and_urls):
    """
    下载图片,并以 90% 质量保存为 JPG 格式.
    如果图片已存在,则自动跳过.
    """
    fname, url = filenames_and_urls
    if not os.path.exists(fname):
        http = urllib3.PoolManager(retries=Retry(connect=3, read=2, redirect=3))
        response = http.request("GET", url)
        image = Image.open(io.BytesIO(response.data))
        image_rgb = image.convert("RGB")
        image_rgb.save(fname, format='JPEG', quality=90)
    

if __name__ == '__main__':
    print("[INFO]多进程下载图片")
    
    # 读取 filenames 和 urls
    # filenames_urls = (filenames, urls)

    # 图片下载
    pool = multiprocessing.Pool(processes=12)
    with tqdm(total=len(fnames_urls)) as progress_bar:
        for _ in pool.imap_unordered(download_image, filenames_urls):
            progress_bar.update(1)

    print("[INFO]Done.")
Last modification:September 8th, 2021 at 10:18 am