파이썬 이미지 크롤링



파이썬, 이미지 크롤링

import os
import requests
from bs4 import BeautifulSoup
from PIL import Image
MAXPIXELS = 65500

eq_url = '가져올 이미지의 referer가 될 주소'
headers = {
    'Referer' : eq_url
}
response = requests.get(eq_url)
if response.ok:
    soup = BeautifulSoup(response.text, 'lxml')
    img_rfs = soup.select('이미지를 구별할 태그')
    canvaswidth = 0
    imgheights = []
    imgfnames = []
    num = 0
    height = 0
    canvasheight = 0
    for img_rf in img_rfs:
        img_url = img_rf['src']
        img_data = requests.get(img_url, headers=headers).content
        img_fname = os.path.basename(img_url)
        with open(img_fname, 'wb') as img:
            img.write(img_data)
        with Image.open(img_fname) as im:
            canvaswidth = max(canvaswidth, im.width)
            if canvasheight + im.height > MAXPIXELS :
                #이전까지 읽던 데이터를 이미지파일로 생성
                with Image.new('RGB', (canvaswidth, canvasheight), (255, 255, 255)) as nim:
                    for imgfname, imgheight in zip(imgfnames, imgheights):
                        with Image.open(imgfname) as img:
                            nim.paste(img, box=(0,height))
                            height += imgheight
                    savefilename = 'all{}.jpg'.format(num)
                    nim.save(savefilename)
                    print('{} is created'.format(savefilename))
                    num += 1
                    canvaswidth = 0
                    canvasheight = im.height
                    del imgheights[:]
                    del imgfnames[:]
                    height = 0
                    imgheights.append(im.height)
                    imgfnames.append(img_fname)
            else :
                # 계속 읽기
                canvasheight += im.height
                imgheights.append(im.height)
                imgfnames.append(img_fname)
    if canvaswidth != 0:
        with Image.new('RGB', (canvaswidth, canvasheight), (255, 255, 255)) as nim:
            for imgfname, imgheight in zip(imgfnames, imgheights):
                with Image.open(imgfname) as img:
                    nim.paste(img, box=(0,height))
                    height += imgheight
            savefilename = 'all{}.jpg'.format(num)
            nim.save(savefilename)
            print('{} is created'.format(savefilename))

이번엔 여기까지 : )