TheMathemagicians/penny_dreadful_downloader/downloader/downloader.py

62 lines
1.8 KiB
Python
Executable File

#!/usr/bin/env python3
import re
import requests
from bs4 import BeautifulSoup
SEASON_FILES_DIRECTORY = "/home/viciouscirce/dox/project_mathemagicians/penny_dreadful/legality/legality_data/"
def download_seasons_html():
page = requests.get("https://pennydreadfulmagic.com/seasons/")
return BeautifulSoup(page.text, "html.parser")
def is_season(tag):
return tag.name == "section" and "class" in tag.attrs.keys() and "stats" in tag.attrs["class"]
def get_season_number(tag):
header = tag.findChild("h2")
header_text = header.string
season_number = re.search("[0-9]{1,2}", header_text).group(0)
return season_number
def get_season_cards_url(tag):
list_items = tag.findChildren("li")
legal_cards_item = list_items[8]
season_cards_url = legal_cards_item.a.attrs["href"]
season_cards_url = season_cards_url.replace("https", "http")
return season_cards_url
def get_seasons(soup):
season_tags = soup.find_all(is_season)
season_urls = {}
for season_tag in season_tags:
season_number = get_season_number(season_tag)
season_cards_url = get_season_cards_url(season_tag)
season_urls[season_number] = season_cards_url
return season_urls
def download_season_lists(seasons):
for season_number, season_url in seasons.items():
cards_list = requests.get(season_url)
seasons[season_number] = cards_list.text
return seasons
def write_season_lists(responses):
for season_number, cards_list in responses.items():
filename = SEASON_FILES_DIRECTORY + "season_" + season_number + "_legal_cards.txt"
with open(filename, "w") as cards_file:
cards_file.write(cards_list)
def main():
soup = download_seasons_html()
seasons = get_seasons(soup)
lists = download_season_lists(seasons)
write_season_lists(lists)
if __name__ == "__main__":
main()