56 lines
1.8 KiB
Python
56 lines
1.8 KiB
Python
import requests
|
|
import shutil
|
|
|
|
from bs4 import BeautifulSoup
|
|
|
|
def downloadImage(url, filename):
|
|
# Open the url image, set stream to True, this will return the stream content.
|
|
r = requests.get(url, stream = True)
|
|
|
|
# Check if the image was retrieved successfully
|
|
if r.status_code == 200:
|
|
# Set decode_content value to True, otherwise the downloaded image file's size will be zero.
|
|
r.raw.decode_content = True
|
|
|
|
# Open a local file with wb ( write binary ) permission.
|
|
with open(filename,'wb') as f:
|
|
shutil.copyfileobj(r.raw, f)
|
|
|
|
print('Image sucessfully Downloaded: ',filename)
|
|
else:
|
|
print('Image Couldn\'t be retreived')
|
|
|
|
URL = "http://tirill.de/illustrierte-voegel-liste.html"
|
|
html_text = requests.get(URL)
|
|
soup = BeautifulSoup(html_text.text, 'html.parser')
|
|
|
|
# Dictionary for all bird URLs
|
|
bird_dict_urls = {"URL":[]}
|
|
|
|
birdlist = soup.find('ul', class_ = "level_1")
|
|
|
|
print("Getting list of all illustrated birds on: " + URL)
|
|
|
|
for bird in birdlist.findAll('li'):
|
|
bird_url = "http://tirill.de/" + bird.a['href']
|
|
#print(bird_url)
|
|
bird_dict_urls["URL"].append(bird_url)
|
|
|
|
print(str(len(bird_dict_urls["URL"])) + " Vochels gefunden.")
|
|
|
|
for i in bird_dict_urls["URL"]:
|
|
## SING BIRD INFO + DOWNLOAD
|
|
html_bird = requests.get(i)
|
|
bird_soup = BeautifulSoup(html_bird.text, 'html.parser')
|
|
|
|
bird_images_container = bird_soup.find_all('figure', class_ = "image_container")
|
|
#print(bird_images_container)
|
|
|
|
for image in bird_images_container:
|
|
#print(image.img['alt'])
|
|
#print(image.img['src'])
|
|
full_path_img = "https://tirill.de/" + image.img['src']
|
|
filename = full_path_img.split("/")[-1]
|
|
print(full_path_img)
|
|
print(filename)
|
|
downloadImage(full_path_img, filename) |