commit 63e4cef7d709a5af9bbbeb80fe77a93db7d80ace Author: The Magician Date: Mon Dec 11 18:30:58 2023 +0000 Initial commit diff --git a/penny_dreadful_downloader/downloader/__pycache__/downloader.cpython-311.pyc b/penny_dreadful_downloader/downloader/__pycache__/downloader.cpython-311.pyc new file mode 100644 index 0000000..6c7da0e Binary files /dev/null and b/penny_dreadful_downloader/downloader/__pycache__/downloader.cpython-311.pyc differ diff --git a/penny_dreadful_downloader/downloader/downloader.py b/penny_dreadful_downloader/downloader/downloader.py new file mode 100755 index 0000000..16d0dca --- /dev/null +++ b/penny_dreadful_downloader/downloader/downloader.py @@ -0,0 +1,61 @@ +#!/usr/bin/env python3 + +import re +import requests +from bs4 import BeautifulSoup + +SEASON_FILES_DIRECTORY = "/home/viciouscirce/dox/project_mathemagicians/penny_dreadful/legality/legality_data/" + +def download_seasons_html(): + page = requests.get("https://pennydreadfulmagic.com/seasons/") + return BeautifulSoup(page.text, "html.parser") + +def is_season(tag): + return tag.name == "section" and "class" in tag.attrs.keys() and "stats" in tag.attrs["class"] + +def get_season_number(tag): + header = tag.findChild("h2") + header_text = header.string + season_number = re.search("[0-9]{1,2}", header_text).group(0) + return season_number + +def get_season_cards_url(tag): + list_items = tag.findChildren("li") + legal_cards_item = list_items[8] + season_cards_url = legal_cards_item.a.attrs["href"] + season_cards_url = season_cards_url.replace("https", "http") + return season_cards_url + +def get_seasons(soup): + season_tags = soup.find_all(is_season) + + season_urls = {} + + for season_tag in season_tags: + season_number = get_season_number(season_tag) + season_cards_url = get_season_cards_url(season_tag) + season_urls[season_number] = season_cards_url + + return season_urls + +def download_season_lists(seasons): + for season_number, season_url in seasons.items(): + cards_list = requests.get(season_url) + seasons[season_number] = cards_list.text + + return seasons + +def write_season_lists(responses): + for season_number, cards_list in responses.items(): + filename = SEASON_FILES_DIRECTORY + "season_" + season_number + "_legal_cards.txt" + with open(filename, "w") as cards_file: + cards_file.write(cards_list) + +def main(): + soup = download_seasons_html() + seasons = get_seasons(soup) + lists = download_season_lists(seasons) + write_season_lists(lists) + +if __name__ == "__main__": + main() diff --git a/penny_dreadful_downloader/downloader/runtest b/penny_dreadful_downloader/downloader/runtest new file mode 100755 index 0000000..b74a08d --- /dev/null +++ b/penny_dreadful_downloader/downloader/runtest @@ -0,0 +1,2 @@ +#!/bin/bash +ls -1 downloader.py test_downloader.py | entr -cd python test_downloader.py diff --git a/penny_dreadful_downloader/downloader/test_downloader.py b/penny_dreadful_downloader/downloader/test_downloader.py new file mode 100644 index 0000000..7081902 --- /dev/null +++ b/penny_dreadful_downloader/downloader/test_downloader.py @@ -0,0 +1,175 @@ +#!/usr/bin/env python3 + +import unittest +from unittest.mock import Mock, call, mock_open + +import requests +from bs4 import BeautifulSoup + +import downloader + +def get_mock_cards_response(text): + mock_response = Mock() + mock_response.text = text + return mock_response + +class DownloaderTests(unittest.TestCase): + def test_download_seasons_html_calls_requests_get_with_correct_url(self): + requests.get = Mock() + requests.get.return_value.text = '' + + downloader.download_seasons_html() + + requests.get.assert_called_once_with("https://pennydreadfulmagic.com/seasons/") + + def test_download_seasons_html_returns_soup_of_document(self): + mock_html = '' + expected = BeautifulSoup(mock_html, "html.parser") + + requests.get = Mock() + requests.get.return_value.text = mock_html + + result = downloader.download_seasons_html() + + self.assertEqual(expected, result) + + def test_is_season_returns_true_when_tag_is_season(self): + tag = Mock() + tag.name = "section" + tag.attrs = {"class": "stats"} + + is_season = downloader.is_season(tag) + + self.assertEqual(is_season, True) + + def test_is_season_returns_false_when_tag_name_is_not_div(self): + tag = Mock() + tag.name = "p" + tag.attrs = {"class": "stats"} + + is_season = downloader.is_season(tag) + + self.assertEqual(is_season, False) + + def test_is_season_returns_false_when_tag_class_is_not_content_season(self): + tag = Mock() + tag.name = "section" + tag.attrs = {"class": "not-a-season"} + + is_season = downloader.is_season(tag) + + self.assertEqual(is_season, False) + + def test_is_season_returns_false_when_tag_has_no_class(self): + tag = Mock() + tag.name = "section" + tag.attrs = {} + + is_season = downloader.is_season(tag) + + self.assertEqual(is_season, False) + + def test_get_season_number_returns_correct_number(self): + expected_season_number = "29" + mock_soup = BeautifulSoup("

Season " + expected_season_number + " (ONE)

", "html.parser") + mock_tag = mock_soup.div + + season_number = downloader.get_season_number(mock_tag) + + self.assertEqual(season_number, expected_season_number) + + def test_get_season_number_returns_correct_number_when_single_digit_season(self): + expected_season_number = "7" + mock_soup = BeautifulSoup("

Season " + expected_season_number + " (RIX)

", "html.parser") + mock_tag = mock_soup.div + + season_number = downloader.get_season_number(mock_tag) + + self.assertEqual(season_number, expected_season_number) + + def test_get_season_cards_url_returns_correct_url_from_list(self): + expected_url = "TEST_SEASON_CARDS_URL" + mock_soup = BeautifulSoup('''

Season 23 (VOW)

''', "html.parser") + mock_tag = mock_soup.div + + season_url = downloader.get_season_cards_url(mock_tag) + + self.assertEqual(season_url, expected_url) + + def test_get_season_cards_url_returns_http_when_url_is_https(self): + expected_url = "http://pdmtgo.com/VOW_legal_cards.txt" + source_url = "https://pdmtgo.com/VOW_legal_cards.txt" + mock_soup = BeautifulSoup('''

Season 23 (VOW)

''', "html.parser") + mock_tag = mock_soup.div + + season_url = downloader.get_season_cards_url(mock_tag) + + self.assertEqual(season_url, expected_url) + + def test_get_seasons_returns_dictionary_of_season_to_url(self): + expected_season_number = "23" + expected_url = "TEST_SEASON_CARDS_URL" + mock_soup = BeautifulSoup('''

Season ''' + expected_season_number + ''' (VOW)

''', "html.parser") + + seasons_list = downloader.get_seasons(mock_soup) + + self.assertEqual(seasons_list, {expected_season_number: expected_url}) + + def test_download_season_lists_pulls_file_from_web(self): + expected_season_number = "23" + expected_url = "TEST_SEASON_CARDS_URL" + + test_cards = "CARDS CARDS MORE CARDS" + requests.get = Mock(return_value=get_mock_cards_response(test_cards)) + + downloader.download_season_lists({expected_season_number: expected_url}) + + requests.get.assert_called_once_with(expected_url) + + def test_download_season_lists_gets_all_files(self): + test_url_1 = "SEASON_23_URL" + test_url_2 = "SEASON_42_URL" + test_seasons = { + "23": test_url_1, + "42": test_url_2} + + test_cards = "CARDS CARDS MORE CARDS" + requests.get = Mock(return_value=get_mock_cards_response(test_cards)) + + downloader.download_season_lists(test_seasons) + + requests.get.assert_has_calls([call(test_url_1), call(test_url_2)]) + + def test_write_season_lists_writes_response_text_to_disk(self): + test_cards = "CARDS CARDS MORE CARDS" + test_responses = {"23": test_cards} + + open_mock = mock_open() + with unittest.mock.patch("builtins.open", open_mock): + downloader.write_season_lists(test_responses) + + handle = open_mock() + handle.write.assert_called_with(test_cards) + + def test_main_runs_program(self): + mock_soup = BeautifulSoup("", "html.parser") + downloader.download_seasons_html = Mock(return_value=mock_soup) + + mock_seasons = {"23": "URL"} + downloader.get_seasons = Mock(return_value=mock_seasons) + + mock_lists = {"23": "CARDS UPON CARDS"} + downloader.download_season_lists = Mock(return_value=mock_lists) + + # downloader.write_season_lists = Mock() # TODO: Find out why this causes the above test to fail + + with unittest.mock.patch("builtins.open", mock_open()): + downloader.main() + + downloader.download_seasons_html.assert_called_once() + downloader.get_seasons.assert_called_once_with(mock_soup) + downloader.download_season_lists.assert_called_once_with(mock_seasons) + # downloader.write_season_lists.assert_called_once_with(mock_lists) + +if __name__ == '__main__': + unittest.main()