62 lines
1.8 KiB
Python
Executable File
62 lines
1.8 KiB
Python
Executable File
#!/usr/bin/env python3
|
|
|
|
import re
|
|
import requests
|
|
from bs4 import BeautifulSoup
|
|
|
|
SEASON_FILES_DIRECTORY = "/home/viciouscirce/dox/project_mathemagicians/penny_dreadful/legality/legality_data/"
|
|
|
|
def download_seasons_html():
|
|
page = requests.get("https://pennydreadfulmagic.com/seasons/")
|
|
return BeautifulSoup(page.text, "html.parser")
|
|
|
|
def is_season(tag):
|
|
return tag.name == "section" and "class" in tag.attrs.keys() and "stats" in tag.attrs["class"]
|
|
|
|
def get_season_number(tag):
|
|
header = tag.findChild("h2")
|
|
header_text = header.string
|
|
season_number = re.search("[0-9]{1,2}", header_text).group(0)
|
|
return season_number
|
|
|
|
def get_season_cards_url(tag):
|
|
list_items = tag.findChildren("li")
|
|
legal_cards_item = list_items[8]
|
|
season_cards_url = legal_cards_item.a.attrs["href"]
|
|
season_cards_url = season_cards_url.replace("https", "http")
|
|
return season_cards_url
|
|
|
|
def get_seasons(soup):
|
|
season_tags = soup.find_all(is_season)
|
|
|
|
season_urls = {}
|
|
|
|
for season_tag in season_tags:
|
|
season_number = get_season_number(season_tag)
|
|
season_cards_url = get_season_cards_url(season_tag)
|
|
season_urls[season_number] = season_cards_url
|
|
|
|
return season_urls
|
|
|
|
def download_season_lists(seasons):
|
|
for season_number, season_url in seasons.items():
|
|
cards_list = requests.get(season_url)
|
|
seasons[season_number] = cards_list.text
|
|
|
|
return seasons
|
|
|
|
def write_season_lists(responses):
|
|
for season_number, cards_list in responses.items():
|
|
filename = SEASON_FILES_DIRECTORY + "season_" + season_number + "_legal_cards.txt"
|
|
with open(filename, "w") as cards_file:
|
|
cards_file.write(cards_list)
|
|
|
|
def main():
|
|
soup = download_seasons_html()
|
|
seasons = get_seasons(soup)
|
|
lists = download_season_lists(seasons)
|
|
write_season_lists(lists)
|
|
|
|
if __name__ == "__main__":
|
|
main()
|