Initial commit
This commit is contained in:
56
birdscrape.py
Normal file
56
birdscrape.py
Normal file
@@ -0,0 +1,56 @@
|
||||
import requests
|
||||
import shutil
|
||||
|
||||
from bs4 import BeautifulSoup
|
||||
|
||||
def downloadImage(url, filename):
|
||||
# Open the url image, set stream to True, this will return the stream content.
|
||||
r = requests.get(url, stream = True)
|
||||
|
||||
# Check if the image was retrieved successfully
|
||||
if r.status_code == 200:
|
||||
# Set decode_content value to True, otherwise the downloaded image file's size will be zero.
|
||||
r.raw.decode_content = True
|
||||
|
||||
# Open a local file with wb ( write binary ) permission.
|
||||
with open(filename,'wb') as f:
|
||||
shutil.copyfileobj(r.raw, f)
|
||||
|
||||
print('Image sucessfully Downloaded: ',filename)
|
||||
else:
|
||||
print('Image Couldn\'t be retreived')
|
||||
|
||||
URL = "http://tirill.de/illustrierte-voegel-liste.html"
|
||||
html_text = requests.get(URL)
|
||||
soup = BeautifulSoup(html_text.text, 'html.parser')
|
||||
|
||||
# Dictionary for all bird URLs
|
||||
bird_dict_urls = {"URL":[]}
|
||||
|
||||
birdlist = soup.find('ul', class_ = "level_1")
|
||||
|
||||
print("Getting list of all illustrated birds on: " + URL)
|
||||
|
||||
for bird in birdlist.findAll('li'):
|
||||
bird_url = "http://tirill.de/" + bird.a['href']
|
||||
#print(bird_url)
|
||||
bird_dict_urls["URL"].append(bird_url)
|
||||
|
||||
print(str(len(bird_dict_urls["URL"])) + " Vochels gefunden.")
|
||||
|
||||
for i in bird_dict_urls["URL"]:
|
||||
## SING BIRD INFO + DOWNLOAD
|
||||
html_bird = requests.get(i)
|
||||
bird_soup = BeautifulSoup(html_bird.text, 'html.parser')
|
||||
|
||||
bird_images_container = bird_soup.find_all('figure', class_ = "image_container")
|
||||
#print(bird_images_container)
|
||||
|
||||
for image in bird_images_container:
|
||||
#print(image.img['alt'])
|
||||
#print(image.img['src'])
|
||||
full_path_img = "https://tirill.de/" + image.img['src']
|
||||
filename = full_path_img.split("/")[-1]
|
||||
print(full_path_img)
|
||||
print(filename)
|
||||
downloadImage(full_path_img, filename)
|
||||
Reference in New Issue
Block a user