Initial commit
This commit is contained in:
commit
63e4cef7d7
Binary file not shown.
|
@ -0,0 +1,61 @@
|
||||||
|
#!/usr/bin/env python3
|
||||||
|
|
||||||
|
import re
|
||||||
|
import requests
|
||||||
|
from bs4 import BeautifulSoup
|
||||||
|
|
||||||
|
SEASON_FILES_DIRECTORY = "/home/viciouscirce/dox/project_mathemagicians/penny_dreadful/legality/legality_data/"
|
||||||
|
|
||||||
|
def download_seasons_html():
|
||||||
|
page = requests.get("https://pennydreadfulmagic.com/seasons/")
|
||||||
|
return BeautifulSoup(page.text, "html.parser")
|
||||||
|
|
||||||
|
def is_season(tag):
|
||||||
|
return tag.name == "section" and "class" in tag.attrs.keys() and "stats" in tag.attrs["class"]
|
||||||
|
|
||||||
|
def get_season_number(tag):
|
||||||
|
header = tag.findChild("h2")
|
||||||
|
header_text = header.string
|
||||||
|
season_number = re.search("[0-9]{1,2}", header_text).group(0)
|
||||||
|
return season_number
|
||||||
|
|
||||||
|
def get_season_cards_url(tag):
|
||||||
|
list_items = tag.findChildren("li")
|
||||||
|
legal_cards_item = list_items[8]
|
||||||
|
season_cards_url = legal_cards_item.a.attrs["href"]
|
||||||
|
season_cards_url = season_cards_url.replace("https", "http")
|
||||||
|
return season_cards_url
|
||||||
|
|
||||||
|
def get_seasons(soup):
|
||||||
|
season_tags = soup.find_all(is_season)
|
||||||
|
|
||||||
|
season_urls = {}
|
||||||
|
|
||||||
|
for season_tag in season_tags:
|
||||||
|
season_number = get_season_number(season_tag)
|
||||||
|
season_cards_url = get_season_cards_url(season_tag)
|
||||||
|
season_urls[season_number] = season_cards_url
|
||||||
|
|
||||||
|
return season_urls
|
||||||
|
|
||||||
|
def download_season_lists(seasons):
|
||||||
|
for season_number, season_url in seasons.items():
|
||||||
|
cards_list = requests.get(season_url)
|
||||||
|
seasons[season_number] = cards_list.text
|
||||||
|
|
||||||
|
return seasons
|
||||||
|
|
||||||
|
def write_season_lists(responses):
|
||||||
|
for season_number, cards_list in responses.items():
|
||||||
|
filename = SEASON_FILES_DIRECTORY + "season_" + season_number + "_legal_cards.txt"
|
||||||
|
with open(filename, "w") as cards_file:
|
||||||
|
cards_file.write(cards_list)
|
||||||
|
|
||||||
|
def main():
|
||||||
|
soup = download_seasons_html()
|
||||||
|
seasons = get_seasons(soup)
|
||||||
|
lists = download_season_lists(seasons)
|
||||||
|
write_season_lists(lists)
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
|
@ -0,0 +1,2 @@
|
||||||
|
#!/bin/bash
|
||||||
|
ls -1 downloader.py test_downloader.py | entr -cd python test_downloader.py
|
|
@ -0,0 +1,175 @@
|
||||||
|
#!/usr/bin/env python3
|
||||||
|
|
||||||
|
import unittest
|
||||||
|
from unittest.mock import Mock, call, mock_open
|
||||||
|
|
||||||
|
import requests
|
||||||
|
from bs4 import BeautifulSoup
|
||||||
|
|
||||||
|
import downloader
|
||||||
|
|
||||||
|
def get_mock_cards_response(text):
|
||||||
|
mock_response = Mock()
|
||||||
|
mock_response.text = text
|
||||||
|
return mock_response
|
||||||
|
|
||||||
|
class DownloaderTests(unittest.TestCase):
|
||||||
|
def test_download_seasons_html_calls_requests_get_with_correct_url(self):
|
||||||
|
requests.get = Mock()
|
||||||
|
requests.get.return_value.text = ''
|
||||||
|
|
||||||
|
downloader.download_seasons_html()
|
||||||
|
|
||||||
|
requests.get.assert_called_once_with("https://pennydreadfulmagic.com/seasons/")
|
||||||
|
|
||||||
|
def test_download_seasons_html_returns_soup_of_document(self):
|
||||||
|
mock_html = '<html></html>'
|
||||||
|
expected = BeautifulSoup(mock_html, "html.parser")
|
||||||
|
|
||||||
|
requests.get = Mock()
|
||||||
|
requests.get.return_value.text = mock_html
|
||||||
|
|
||||||
|
result = downloader.download_seasons_html()
|
||||||
|
|
||||||
|
self.assertEqual(expected, result)
|
||||||
|
|
||||||
|
def test_is_season_returns_true_when_tag_is_season(self):
|
||||||
|
tag = Mock()
|
||||||
|
tag.name = "section"
|
||||||
|
tag.attrs = {"class": "stats"}
|
||||||
|
|
||||||
|
is_season = downloader.is_season(tag)
|
||||||
|
|
||||||
|
self.assertEqual(is_season, True)
|
||||||
|
|
||||||
|
def test_is_season_returns_false_when_tag_name_is_not_div(self):
|
||||||
|
tag = Mock()
|
||||||
|
tag.name = "p"
|
||||||
|
tag.attrs = {"class": "stats"}
|
||||||
|
|
||||||
|
is_season = downloader.is_season(tag)
|
||||||
|
|
||||||
|
self.assertEqual(is_season, False)
|
||||||
|
|
||||||
|
def test_is_season_returns_false_when_tag_class_is_not_content_season(self):
|
||||||
|
tag = Mock()
|
||||||
|
tag.name = "section"
|
||||||
|
tag.attrs = {"class": "not-a-season"}
|
||||||
|
|
||||||
|
is_season = downloader.is_season(tag)
|
||||||
|
|
||||||
|
self.assertEqual(is_season, False)
|
||||||
|
|
||||||
|
def test_is_season_returns_false_when_tag_has_no_class(self):
|
||||||
|
tag = Mock()
|
||||||
|
tag.name = "section"
|
||||||
|
tag.attrs = {}
|
||||||
|
|
||||||
|
is_season = downloader.is_season(tag)
|
||||||
|
|
||||||
|
self.assertEqual(is_season, False)
|
||||||
|
|
||||||
|
def test_get_season_number_returns_correct_number(self):
|
||||||
|
expected_season_number = "29"
|
||||||
|
mock_soup = BeautifulSoup("<div><h2>Season " + expected_season_number + " (ONE)</h2></div>", "html.parser")
|
||||||
|
mock_tag = mock_soup.div
|
||||||
|
|
||||||
|
season_number = downloader.get_season_number(mock_tag)
|
||||||
|
|
||||||
|
self.assertEqual(season_number, expected_season_number)
|
||||||
|
|
||||||
|
def test_get_season_number_returns_correct_number_when_single_digit_season(self):
|
||||||
|
expected_season_number = "7"
|
||||||
|
mock_soup = BeautifulSoup("<div><h2>Season " + expected_season_number + " (RIX)</h2></div>", "html.parser")
|
||||||
|
mock_tag = mock_soup.div
|
||||||
|
|
||||||
|
season_number = downloader.get_season_number(mock_tag)
|
||||||
|
|
||||||
|
self.assertEqual(season_number, expected_season_number)
|
||||||
|
|
||||||
|
def test_get_season_cards_url_returns_correct_url_from_list(self):
|
||||||
|
expected_url = "TEST_SEASON_CARDS_URL"
|
||||||
|
mock_soup = BeautifulSoup('''<div class="content content-season"><section class="stats"><h2>Season 23 (VOW)</h2><p></p><ul><li></li><li></li><li></li><li></li><li></li><li></li><li></li><li></li><li><a href="''' + expected_url + '''">14,990 Legal Cards</a></li><li></li><li></li><li></li></ul></section></div>''', "html.parser")
|
||||||
|
mock_tag = mock_soup.div
|
||||||
|
|
||||||
|
season_url = downloader.get_season_cards_url(mock_tag)
|
||||||
|
|
||||||
|
self.assertEqual(season_url, expected_url)
|
||||||
|
|
||||||
|
def test_get_season_cards_url_returns_http_when_url_is_https(self):
|
||||||
|
expected_url = "http://pdmtgo.com/VOW_legal_cards.txt"
|
||||||
|
source_url = "https://pdmtgo.com/VOW_legal_cards.txt"
|
||||||
|
mock_soup = BeautifulSoup('''<div class="content content-season"><section class="stats"><h2>Season 23 (VOW)</h2><p></p><ul><li></li><li></li><li></li><li></li><li></li><li></li><li></li><li></li><li><a href="''' + source_url + '''">14,990 Legal Cards</a></li><li></li><li></li><li></li></ul></section></div>''', "html.parser")
|
||||||
|
mock_tag = mock_soup.div
|
||||||
|
|
||||||
|
season_url = downloader.get_season_cards_url(mock_tag)
|
||||||
|
|
||||||
|
self.assertEqual(season_url, expected_url)
|
||||||
|
|
||||||
|
def test_get_seasons_returns_dictionary_of_season_to_url(self):
|
||||||
|
expected_season_number = "23"
|
||||||
|
expected_url = "TEST_SEASON_CARDS_URL"
|
||||||
|
mock_soup = BeautifulSoup('''<html><body><main><div class="content content-seasons"><section class="stats"><h2>Season ''' + expected_season_number + ''' (VOW)</h2><p></p><ul><li></li><li></li><li></li><li></li><li></li><li></li><li></li><li></li><li><a href="''' + expected_url + '''">14,990 Legal Cards</a></li><li></li><li></li><li></li></ul></section></div></main></body></html>''', "html.parser")
|
||||||
|
|
||||||
|
seasons_list = downloader.get_seasons(mock_soup)
|
||||||
|
|
||||||
|
self.assertEqual(seasons_list, {expected_season_number: expected_url})
|
||||||
|
|
||||||
|
def test_download_season_lists_pulls_file_from_web(self):
|
||||||
|
expected_season_number = "23"
|
||||||
|
expected_url = "TEST_SEASON_CARDS_URL"
|
||||||
|
|
||||||
|
test_cards = "CARDS CARDS MORE CARDS"
|
||||||
|
requests.get = Mock(return_value=get_mock_cards_response(test_cards))
|
||||||
|
|
||||||
|
downloader.download_season_lists({expected_season_number: expected_url})
|
||||||
|
|
||||||
|
requests.get.assert_called_once_with(expected_url)
|
||||||
|
|
||||||
|
def test_download_season_lists_gets_all_files(self):
|
||||||
|
test_url_1 = "SEASON_23_URL"
|
||||||
|
test_url_2 = "SEASON_42_URL"
|
||||||
|
test_seasons = {
|
||||||
|
"23": test_url_1,
|
||||||
|
"42": test_url_2}
|
||||||
|
|
||||||
|
test_cards = "CARDS CARDS MORE CARDS"
|
||||||
|
requests.get = Mock(return_value=get_mock_cards_response(test_cards))
|
||||||
|
|
||||||
|
downloader.download_season_lists(test_seasons)
|
||||||
|
|
||||||
|
requests.get.assert_has_calls([call(test_url_1), call(test_url_2)])
|
||||||
|
|
||||||
|
def test_write_season_lists_writes_response_text_to_disk(self):
|
||||||
|
test_cards = "CARDS CARDS MORE CARDS"
|
||||||
|
test_responses = {"23": test_cards}
|
||||||
|
|
||||||
|
open_mock = mock_open()
|
||||||
|
with unittest.mock.patch("builtins.open", open_mock):
|
||||||
|
downloader.write_season_lists(test_responses)
|
||||||
|
|
||||||
|
handle = open_mock()
|
||||||
|
handle.write.assert_called_with(test_cards)
|
||||||
|
|
||||||
|
def test_main_runs_program(self):
|
||||||
|
mock_soup = BeautifulSoup("<html></html>", "html.parser")
|
||||||
|
downloader.download_seasons_html = Mock(return_value=mock_soup)
|
||||||
|
|
||||||
|
mock_seasons = {"23": "URL"}
|
||||||
|
downloader.get_seasons = Mock(return_value=mock_seasons)
|
||||||
|
|
||||||
|
mock_lists = {"23": "CARDS UPON CARDS"}
|
||||||
|
downloader.download_season_lists = Mock(return_value=mock_lists)
|
||||||
|
|
||||||
|
# downloader.write_season_lists = Mock() # TODO: Find out why this causes the above test to fail
|
||||||
|
|
||||||
|
with unittest.mock.patch("builtins.open", mock_open()):
|
||||||
|
downloader.main()
|
||||||
|
|
||||||
|
downloader.download_seasons_html.assert_called_once()
|
||||||
|
downloader.get_seasons.assert_called_once_with(mock_soup)
|
||||||
|
downloader.download_season_lists.assert_called_once_with(mock_seasons)
|
||||||
|
# downloader.write_season_lists.assert_called_once_with(mock_lists)
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
unittest.main()
|
Loading…
Reference in New Issue