Initial commit

This commit is contained in:
The Magician 2023-12-11 18:30:58 +00:00
commit 63e4cef7d7
4 changed files with 238 additions and 0 deletions

View File

@ -0,0 +1,61 @@
#!/usr/bin/env python3
import re
import requests
from bs4 import BeautifulSoup
SEASON_FILES_DIRECTORY = "/home/viciouscirce/dox/project_mathemagicians/penny_dreadful/legality/legality_data/"
def download_seasons_html():
page = requests.get("https://pennydreadfulmagic.com/seasons/")
return BeautifulSoup(page.text, "html.parser")
def is_season(tag):
return tag.name == "section" and "class" in tag.attrs.keys() and "stats" in tag.attrs["class"]
def get_season_number(tag):
header = tag.findChild("h2")
header_text = header.string
season_number = re.search("[0-9]{1,2}", header_text).group(0)
return season_number
def get_season_cards_url(tag):
list_items = tag.findChildren("li")
legal_cards_item = list_items[8]
season_cards_url = legal_cards_item.a.attrs["href"]
season_cards_url = season_cards_url.replace("https", "http")
return season_cards_url
def get_seasons(soup):
season_tags = soup.find_all(is_season)
season_urls = {}
for season_tag in season_tags:
season_number = get_season_number(season_tag)
season_cards_url = get_season_cards_url(season_tag)
season_urls[season_number] = season_cards_url
return season_urls
def download_season_lists(seasons):
for season_number, season_url in seasons.items():
cards_list = requests.get(season_url)
seasons[season_number] = cards_list.text
return seasons
def write_season_lists(responses):
for season_number, cards_list in responses.items():
filename = SEASON_FILES_DIRECTORY + "season_" + season_number + "_legal_cards.txt"
with open(filename, "w") as cards_file:
cards_file.write(cards_list)
def main():
soup = download_seasons_html()
seasons = get_seasons(soup)
lists = download_season_lists(seasons)
write_season_lists(lists)
if __name__ == "__main__":
main()

View File

@ -0,0 +1,2 @@
#!/bin/bash
ls -1 downloader.py test_downloader.py | entr -cd python test_downloader.py

View File

@ -0,0 +1,175 @@
#!/usr/bin/env python3
import unittest
from unittest.mock import Mock, call, mock_open
import requests
from bs4 import BeautifulSoup
import downloader
def get_mock_cards_response(text):
mock_response = Mock()
mock_response.text = text
return mock_response
class DownloaderTests(unittest.TestCase):
def test_download_seasons_html_calls_requests_get_with_correct_url(self):
requests.get = Mock()
requests.get.return_value.text = ''
downloader.download_seasons_html()
requests.get.assert_called_once_with("https://pennydreadfulmagic.com/seasons/")
def test_download_seasons_html_returns_soup_of_document(self):
mock_html = '<html></html>'
expected = BeautifulSoup(mock_html, "html.parser")
requests.get = Mock()
requests.get.return_value.text = mock_html
result = downloader.download_seasons_html()
self.assertEqual(expected, result)
def test_is_season_returns_true_when_tag_is_season(self):
tag = Mock()
tag.name = "section"
tag.attrs = {"class": "stats"}
is_season = downloader.is_season(tag)
self.assertEqual(is_season, True)
def test_is_season_returns_false_when_tag_name_is_not_div(self):
tag = Mock()
tag.name = "p"
tag.attrs = {"class": "stats"}
is_season = downloader.is_season(tag)
self.assertEqual(is_season, False)
def test_is_season_returns_false_when_tag_class_is_not_content_season(self):
tag = Mock()
tag.name = "section"
tag.attrs = {"class": "not-a-season"}
is_season = downloader.is_season(tag)
self.assertEqual(is_season, False)
def test_is_season_returns_false_when_tag_has_no_class(self):
tag = Mock()
tag.name = "section"
tag.attrs = {}
is_season = downloader.is_season(tag)
self.assertEqual(is_season, False)
def test_get_season_number_returns_correct_number(self):
expected_season_number = "29"
mock_soup = BeautifulSoup("<div><h2>Season " + expected_season_number + " (ONE)</h2></div>", "html.parser")
mock_tag = mock_soup.div
season_number = downloader.get_season_number(mock_tag)
self.assertEqual(season_number, expected_season_number)
def test_get_season_number_returns_correct_number_when_single_digit_season(self):
expected_season_number = "7"
mock_soup = BeautifulSoup("<div><h2>Season " + expected_season_number + " (RIX)</h2></div>", "html.parser")
mock_tag = mock_soup.div
season_number = downloader.get_season_number(mock_tag)
self.assertEqual(season_number, expected_season_number)
def test_get_season_cards_url_returns_correct_url_from_list(self):
expected_url = "TEST_SEASON_CARDS_URL"
mock_soup = BeautifulSoup('''<div class="content content-season"><section class="stats"><h2>Season 23 (VOW)</h2><p></p><ul><li></li><li></li><li></li><li></li><li></li><li></li><li></li><li></li><li><a href="''' + expected_url + '''">14,990 Legal Cards</a></li><li></li><li></li><li></li></ul></section></div>''', "html.parser")
mock_tag = mock_soup.div
season_url = downloader.get_season_cards_url(mock_tag)
self.assertEqual(season_url, expected_url)
def test_get_season_cards_url_returns_http_when_url_is_https(self):
expected_url = "http://pdmtgo.com/VOW_legal_cards.txt"
source_url = "https://pdmtgo.com/VOW_legal_cards.txt"
mock_soup = BeautifulSoup('''<div class="content content-season"><section class="stats"><h2>Season 23 (VOW)</h2><p></p><ul><li></li><li></li><li></li><li></li><li></li><li></li><li></li><li></li><li><a href="''' + source_url + '''">14,990 Legal Cards</a></li><li></li><li></li><li></li></ul></section></div>''', "html.parser")
mock_tag = mock_soup.div
season_url = downloader.get_season_cards_url(mock_tag)
self.assertEqual(season_url, expected_url)
def test_get_seasons_returns_dictionary_of_season_to_url(self):
expected_season_number = "23"
expected_url = "TEST_SEASON_CARDS_URL"
mock_soup = BeautifulSoup('''<html><body><main><div class="content content-seasons"><section class="stats"><h2>Season ''' + expected_season_number + ''' (VOW)</h2><p></p><ul><li></li><li></li><li></li><li></li><li></li><li></li><li></li><li></li><li><a href="''' + expected_url + '''">14,990 Legal Cards</a></li><li></li><li></li><li></li></ul></section></div></main></body></html>''', "html.parser")
seasons_list = downloader.get_seasons(mock_soup)
self.assertEqual(seasons_list, {expected_season_number: expected_url})
def test_download_season_lists_pulls_file_from_web(self):
expected_season_number = "23"
expected_url = "TEST_SEASON_CARDS_URL"
test_cards = "CARDS CARDS MORE CARDS"
requests.get = Mock(return_value=get_mock_cards_response(test_cards))
downloader.download_season_lists({expected_season_number: expected_url})
requests.get.assert_called_once_with(expected_url)
def test_download_season_lists_gets_all_files(self):
test_url_1 = "SEASON_23_URL"
test_url_2 = "SEASON_42_URL"
test_seasons = {
"23": test_url_1,
"42": test_url_2}
test_cards = "CARDS CARDS MORE CARDS"
requests.get = Mock(return_value=get_mock_cards_response(test_cards))
downloader.download_season_lists(test_seasons)
requests.get.assert_has_calls([call(test_url_1), call(test_url_2)])
def test_write_season_lists_writes_response_text_to_disk(self):
test_cards = "CARDS CARDS MORE CARDS"
test_responses = {"23": test_cards}
open_mock = mock_open()
with unittest.mock.patch("builtins.open", open_mock):
downloader.write_season_lists(test_responses)
handle = open_mock()
handle.write.assert_called_with(test_cards)
def test_main_runs_program(self):
mock_soup = BeautifulSoup("<html></html>", "html.parser")
downloader.download_seasons_html = Mock(return_value=mock_soup)
mock_seasons = {"23": "URL"}
downloader.get_seasons = Mock(return_value=mock_seasons)
mock_lists = {"23": "CARDS UPON CARDS"}
downloader.download_season_lists = Mock(return_value=mock_lists)
# downloader.write_season_lists = Mock() # TODO: Find out why this causes the above test to fail
with unittest.mock.patch("builtins.open", mock_open()):
downloader.main()
downloader.download_seasons_html.assert_called_once()
downloader.get_seasons.assert_called_once_with(mock_soup)
downloader.download_season_lists.assert_called_once_with(mock_seasons)
# downloader.write_season_lists.assert_called_once_with(mock_lists)
if __name__ == '__main__':
unittest.main()