Python - 多进程下载multiprocessing

AIHGF

2019 年 05 月 16 日

1932 次浏览

暂无评论

1302字数

Python

功能：

根据图片 urls 和 filenames 采用多进程下载.

实现：

#!/usr/bin/python3
#!--*-- coding:utf-8 --*--
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import io
import os
import urllib3
import multiprocessing

from PIL import Image
from tqdm import tqdm
from urllib3.util import Retry

urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)


def download_image(filenames_and_urls):
    """
    下载图片，并以 90% 质量保存为 JPG 格式.
    如果图片已存在，则自动跳过.
    """
    fname, url = filenames_and_urls
    if not os.path.exists(fname):
        http = urllib3.PoolManager(retries=Retry(connect=3, read=2, redirect=3))
        response = http.request("GET", url)
        image = Image.open(io.BytesIO(response.data))
        image_rgb = image.convert("RGB")
        image_rgb.save(fname, format='JPEG', quality=90)
    

if __name__ == '__main__':
    print("[INFO]多进程下载图片")
    
    # 读取 filenames 和 urls
    # filenames_urls = (filenames, urls)

    # 图片下载
    pool = multiprocessing.Pool(processes=12)
    with tqdm(total=len(fnames_urls)) as progress_bar:
        for _ in pool.imap_unordered(download_image, filenames_urls):
            progress_bar.update(1)

    print("[INFO]Done.")

发表评论 取消回复 使用cookie技术保留您的个人信息以便您下次快速评论，继续评论表示您已同意该条款

Python - 多进程下载multiprocessing

发表评论取消回复
使用cookie技术保留您的个人信息以便您下次快速评论，继续评论表示您已同意该条款