birdscrape/birdscrape.py

import requests
import shutil

from bs4 import BeautifulSoup

def downloadImage(url, filename):
    # Open the url image, set stream to True, this will return the stream content.
    r = requests.get(url, stream = True)

    # Check if the image was retrieved successfully
    if r.status_code == 200:
        # Set decode_content value to True, otherwise the downloaded image file's size will be zero.
        r.raw.decode_content = True

        # Open a local file with wb ( write binary ) permission.
        with open(filename,'wb') as f:
            shutil.copyfileobj(r.raw, f)

        print('Image sucessfully Downloaded: ',filename)
    else:
        print('Image Couldn\'t be retreived')

URL = "http://tirill.de/illustrierte-voegel-liste.html"
html_text = requests.get(URL)
soup = BeautifulSoup(html_text.text, 'html.parser')

# Dictionary for all bird URLs
bird_dict_urls = {"URL":[]}

birdlist = soup.find('ul', class_ = "level_1")

print("Getting list of all illustrated birds on: " + URL)

for bird in birdlist.findAll('li'):
    bird_url = "http://tirill.de/" + bird.a['href']
    #print(bird_url)
    bird_dict_urls["URL"].append(bird_url)

print(str(len(bird_dict_urls["URL"])) + " Vochels gefunden.")

for i in bird_dict_urls["URL"]:
    ## SING BIRD INFO + DOWNLOAD
    html_bird = requests.get(i)
    bird_soup = BeautifulSoup(html_bird.text, 'html.parser')

    bird_images_container = bird_soup.find_all('figure', class_ = "image_container")
    #print(bird_images_container)

    for image in bird_images_container:
        #print(image.img['alt'])
        #print(image.img['src'])
        full_path_img = "https://tirill.de/" + image.img['src']
        filename = full_path_img.split("/")[-1]
        print(full_path_img)
        print(filename)
        downloadImage(full_path_img, filename)