diff --git a/.gitignore b/.gitignore index 894a44c..af7597f 100644 --- a/.gitignore +++ b/.gitignore @@ -23,6 +23,7 @@ wheels/ *.egg-info/ .installed.cfg *.egg +.idea MANIFEST # PyInstaller diff --git a/MANIFEST.in b/MANIFEST.in new file mode 100644 index 0000000..fe036a6 --- /dev/null +++ b/MANIFEST.in @@ -0,0 +1,2 @@ +include app/support_files/templates/fb2/* +include app/support_files/templates/html/* \ No newline at end of file diff --git a/README.md b/README.md new file mode 100644 index 0000000..0f7143a --- /dev/null +++ b/README.md @@ -0,0 +1,114 @@ +## It is a one-shot command-line RSS reader by Zviger. +### Installation +Install [Python3.8](https://www.python.org/downloads/) + +Install [pip](https://pip.pypa.io/en/stable/installing/) + +Install GIT. +This [link](https://git-scm.com/book/en/v2/Getting-Started-Installing-Git) +may be useful in this matter. +Clone this repository in the folder you need using this command +```text +git clone https://github.com/Zviger/PythonHomework +``` +after change the branch to the project branch and run +```text +git checkout final_proj +``` +Now you can install the application itself using this command from the application folder +```text +pip install . --user +``` +You will also need MongoDB. You can install and run +MongoDB on your system using this [link](https://docs.mongodb.com/manual/installation/). +But you can install [Docker](https://docs.docker.com/install/) and +[Docker-Compose](https://docs.docker.com/compose/install/). + + +To start the container with MongoDB, run the following command in the application folder +```text +docker-compose up +``` +* The application and database are connected through port 27017. + +You can stop container with MongoDB by command +```text +docker-compose stop +``` +and run again +```text +docker-compose start +``` +You can execute the command +```text +docker-compose down +``` +but you will lose all saved data from the database. + + +Congratulations! + +Run +```text +rss-reader --help +``` +to learn about the features of the application and start using it. +### User interface +```text +usage: rss-reader [-h] [--version] [-l LIMIT] [--verbose] [--json] [--length LENGTH] [--date DATE] [--to_html PATH] [--to_fb2 PATH] [--colorize] source + +positional arguments: + source RSS URL + +optional arguments: + -h, --help show this help message and exit + --version Print version info + -l LIMIT, --limit LIMIT + Limit news topics if this parameter provided + --verbose Print result as JSON in stdout + --json Outputs verbose status messages + --length LENGTH Sets the length of each line of news output + --date DATE Search past news by date in format yeardaymonth (19991311) + --to_html PATH Save news by path in html format + --to_fb2 PATH Save news by path in fb2 format + --colorize Make console text display colorful +``` + +### Json structure +```json +[ + { + "title": "Yahoo News - Latest News & Headlines", + "link": "https://www.yahoo.com/news", + "items": + [ + { + "title": "Sorry, Hillary: Democrats don't need a savior", + "link": "https://news.yahoo.com/sorry-hillary-democrats-dont-need-a-savior-194253123.html", + "author": "no author", + "published_parsed": [2019, 11, 13, 19, 42, 53, 2, 317, 0], + "description": "With the Iowa caucuses fast approaching, Hillary Clinton is just the latest in the colorful cast of characters who seem to have surveyed the sprawling Democratic field, sensed something lacking and decided that \u201csomething\u201d might be them.", + "img_links": + [ + "http://l.yimg.com/uu/api/res/1.2/xq3Ser6KXPfV6aeoxbq9Uw--/YXBwaWQ9eXRhY2h5b247aD04Njt3PTEzMDs-/https://media-mbst-pub-ue1.s3.amazonaws.com/creatr-uploaded-images/2019-11/14586fd0-064d-11ea-b7df-7288f8d8c1a7" + ] + } + ] + } +] +``` +### Cashing +The news is saved to the database when news output commands are executed. MongoDB is used as a database management system. +When the --date parameter is used, news is downloaded from the database by the entered date and the entered RSS link. + +Features: +* The --limit parameter affects the amount of data loaded into the database. +* Date must be written in the yearmonthday (example - 19991113) format. + +### Saving in files +Using the "--to_html" and "--to_fb2" parameters, you can save files at a given path. +The path should be written in the style of UNIX systems (example: ./some/folder). +File names are formed using the "feed[index].[format]" template (example: feed13.html). +File indices go sequentially and a new file fills this sequence or is set to the end. +What does this mean: if, for example, there are files "feed1.html" and "feed3.html", +a new file will be created with the name "feed2.html". \ No newline at end of file diff --git a/app/__init__.py b/app/__init__.py new file mode 100644 index 0000000..07f3d47 --- /dev/null +++ b/app/__init__.py @@ -0,0 +1 @@ +__version__ = "5.1" diff --git a/app/core.py b/app/core.py new file mode 100644 index 0000000..a7713b3 --- /dev/null +++ b/app/core.py @@ -0,0 +1,9 @@ +from app.support_files.rss_reader import Reader + + +def main() -> None: + Reader.exec_console_args() + + +if __name__ == "__main__": + main() diff --git a/app/support_files/__init__.py b/app/support_files/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/app/support_files/app_logger.py b/app/support_files/app_logger.py new file mode 100644 index 0000000..5a5e6c8 --- /dev/null +++ b/app/support_files/app_logger.py @@ -0,0 +1,30 @@ +""" +This module provides functions to work with logging. +""" +import logging +import sys +from logging import Logger + + +def init_logger(name: str) -> Logger: + """ + Initialize and return logger object. + :param name: Name of the logger object. + """ + logger = logging.getLogger(name) + logger.setLevel(logging.INFO) + # create the logging file handler + stream_handler = logging.StreamHandler(sys.stdout) + formatter = logging.Formatter('%(asctime)s - %(name)s - %(message)s') + stream_handler.setFormatter(formatter) + # add handler to logger object + logger.addHandler(stream_handler) + return logger + + +def get_logger(name: str) -> Logger: + """ + Return logger object. + :param name: Name of the logger object. + """ + return logging.getLogger(name) diff --git a/app/support_files/args_parser.py b/app/support_files/args_parser.py new file mode 100644 index 0000000..bf1d46f --- /dev/null +++ b/app/support_files/args_parser.py @@ -0,0 +1,29 @@ +""" +This module is a parser of console arguments for this project. +""" +import argparse +from argparse import Namespace + +import app + + +def get_args() -> Namespace: + """ + Function, that parse console args. + :return: An object that provides the values ​​of parsed arguments. + """ + parser = argparse.ArgumentParser(description="It is a python command-line rss reader") + parser.add_argument("source", help="RSS URL") + parser.add_argument("--version", action="version", version=f"%(prog)s {app.__version__}", help="Print version info") + parser.add_argument("-l", "--limit", type=int, help="Limit news topics if this parameter provided", default=-1) + parser.add_argument("--verbose", action="store_true", help="Print result as JSON in stdout", default=False) + parser.add_argument("--json", action="store_true", help="Outputs verbose status messages", default=False) + parser.add_argument("--length", type=int, help="Sets the length of each line of news output", default=120) + parser.add_argument("--date", type=str, help="Search past news by date in format yeardaymonth (19991311)", + default=None) + parser.add_argument("--to_html", metavar="PATH", type=str, help="Save news by path in html format", default=None) + parser.add_argument("--to_fb2", metavar="PATH", type=str, help="Save news by path in fb2 format", default=None) + parser.add_argument("--colorize", action="store_true", help="Make console text display colorful", default=False) + parser.parse_args() + args = parser.parse_args() + return args diff --git a/app/support_files/config.py b/app/support_files/config.py new file mode 100644 index 0000000..926092d --- /dev/null +++ b/app/support_files/config.py @@ -0,0 +1,5 @@ +""" +This module provides configuration strings +""" + +APP_NAME = "rss-reader" diff --git a/app/support_files/db_manager.py b/app/support_files/db_manager.py new file mode 100644 index 0000000..b733d24 --- /dev/null +++ b/app/support_files/db_manager.py @@ -0,0 +1,104 @@ +""" +This module contains class to work with database. +""" +import os +from dataclasses import asdict +from time import strptime, mktime, altzone, localtime, struct_time +from typing import Optional + +from pymongo import MongoClient, errors + +from app.support_files.app_logger import get_logger +from app.support_files.config import APP_NAME +from app.support_files.dtos import Feed, Item +from app.support_files.exeptions import FindFeedError, DateError, DBConnectError + + +class DBManager: + """ + Class to work with database. + """ + + def __init__(self) -> None: + mongo_host = os.getenv('MONGO_HOST', '127.0.0.1') + client = MongoClient(f"mongodb://{mongo_host}:27017/") + try: + client.server_info() + except errors.ServerSelectionTimeoutError: + raise DBConnectError(f"Can't connect to database with host - {mongo_host} and port - 27017") + self._db = client["feed_db"] + self._collection = self._db["feed_collection"] + self._logger = get_logger(APP_NAME) + + def insert_feed(self, feed: Feed) -> None: + """ + Insert feed in database. + If this feed exists in the database, then news is added that was not there. + :param feed: Feed, which should be inserted. + """ + self._logger.info("Loading data from database to join with inserted data is started") + cashed_feed = self.find_feed_by_link(feed.rss_link) + self._logger.info("Loading data from database to join with inserted data is finished") + + if cashed_feed is not None: + items = set(feed.items) + cashed_items = set(cashed_feed.items) + result_items = list(set(items).union(set(cashed_items))) + result_items = list(map(asdict, result_items)) + self._collection.update_one({"rss_link": feed.rss_link}, {"$set": {"items": result_items}}) + else: + self._collection.insert_one(asdict(feed)) + self._logger.info("New and old data are joined") + + def find_feed_by_link(self, link: str) -> Optional[Feed]: + """ + Looks for feed in the database by rss link and returns it. + :param link: Rss link. + :return: Feed, if it exist, otherwise None. + """ + dict_feed = self._collection.find_one({"rss_link": link}) + if dict_feed is None: + return None + del dict_feed["_id"] + feed = Feed(**dict_feed) + feed.items = [Item(**item) for item in dict_feed["items"]] + return feed + + def find_feed_by_link_and_date(self, link: str, date: str, limit: int = -1) -> Feed: + """ + Looks for feed in the database by rss link and date and returns it. + Raise DateError, in it not exist. + :param link: Rss link. + :param date: Need date. + :param limit: Limit count of returned items. + :return: Feed, if it exist. + """ + try: + date = strptime(date, "%Y%m%d") + except ValueError as err: + raise DateError(err.__str__()) + feed = self.find_feed_by_link(link) + if feed is None: + raise FindFeedError("This feed is not cashed") + result_items = [] + count = limit + for item in feed.items: + item_date = struct_time(item.published_parsed) + l_i_date = localtime(mktime(tuple(item_date)) - altzone) + if (l_i_date.tm_year, l_i_date.tm_mon, l_i_date.tm_mday) == (date.tm_year, date.tm_mon, date.tm_mday): + result_items.append(item) + count -= 1 + if count == 0: + break + feed.items = result_items + return feed + + def truncate_collection(self) -> None: + """ + Truncate database. + """ + self._collection.delete_many({}) + + +if __name__ == "__main__": + DBManager() diff --git a/app/support_files/dtos.py b/app/support_files/dtos.py new file mode 100644 index 0000000..58fd6bb --- /dev/null +++ b/app/support_files/dtos.py @@ -0,0 +1,33 @@ +""" +This module contains data classes to work with feeds. +""" +from dataclasses import dataclass, field +from time import struct_time, localtime, time +from typing import List + + +@dataclass +class Item: + """ + This class represents each item in feed. + """ + title: str = "no title" + link: str = "no link" + author: str = "no author" + published_parsed: struct_time = localtime(time()) + description: str = "description" + img_links: List[str] = field(default_factory=list) + + def __hash__(self) -> int: + return hash(str(self.__dict__)) + + +@dataclass +class Feed: + """ + This class represents feed. + """ + rss_link: str + title: str = "no title" + link: str = "no link" + items: List[Item] = field(default_factory=list) diff --git a/app/support_files/exeptions.py b/app/support_files/exeptions.py new file mode 100644 index 0000000..76bf698 --- /dev/null +++ b/app/support_files/exeptions.py @@ -0,0 +1,38 @@ +""" +This module provides exception classes. +""" + + +class FindFeedError(Exception): + """ + This class should be raised, if received some problems with getting feed. + """ + pass + + +class DateError(ValueError): + """ + This class should be raised, if received some problems with converting date. + """ + pass + + +class DirError(Exception): + """ + This class should be raised, if received path is not a directory. + """ + pass + + +class DirExistsError(Exception): + """ + This class should be raised, if directory which was received by bath not exists. + """ + pass + + +class DBConnectError(Exception): + """ + This class should be raised, if received some problems with connection with database. + """ + pass diff --git a/app/support_files/file_manager.py b/app/support_files/file_manager.py new file mode 100644 index 0000000..1d6fcfc --- /dev/null +++ b/app/support_files/file_manager.py @@ -0,0 +1,37 @@ +""" +This module contains functions to work with files. +""" +from pathlib import Path +from re import findall + +from app.support_files.exeptions import DirError, DirExistsError + + +def store_str_to_file(data: str, path: str, file_format: str, file_name: str = "feed") -> None: + """ + Saves data to a folder at a given path to a file with a given name, to which the index is added, and the format. + The file name is based on files with a specific file name format that are already in the folder. + File indices go sequentially and a new file fills this sequence or is set to the end. + """ + true_path = Path(path) + if not true_path.exists(): + raise DirExistsError(f"This directory not exists: {true_path}") + if not true_path.is_dir(): + raise DirError(f"Is not a directory: {true_path}") + file_indexes = [] + for _dir in true_path.iterdir(): + if findall(fr"{file_name}\d+.{file_format}", _dir.name): + file_indexes.append(int(findall(r"\d+", _dir.name)[0])) + file_indexes = sorted(file_indexes) + current_index = 1 + for index in file_indexes: + if index - current_index > 1: + break + else: + current_index += 1 + with open(true_path.joinpath("".join([file_name, str(current_index), ".", file_format])), "w") as file: + file.write(data) + + +if __name__ == "__main__": + print(Path(".").name) diff --git a/app/support_files/format_converter.py b/app/support_files/format_converter.py new file mode 100644 index 0000000..19c9b58 --- /dev/null +++ b/app/support_files/format_converter.py @@ -0,0 +1,208 @@ +""" +This module contains class for converting parsed data from RSS. +""" +import base64 +import dataclasses +import functools +import json +import textwrap +from typing import List +from pathlib import Path +from time import strftime, altzone, mktime, localtime, ctime, time, struct_time + +import urllib3 +from colored import fg, attr + +import app +from app.support_files.app_logger import get_logger +from app.support_files.config import APP_NAME +from app.support_files.dtos import Feed +from app.support_files.rss_parser import Parser + + +def convert_date(date: struct_time) -> str: + """ + Converts date too human readable format. + """ + published = localtime(mktime(tuple(date)) - altzone) + return " ".join([strftime('%a, %d %b %Y %X', published), str(-altzone / 3600)]) + + +def get_templates(template_type: str, template_names: List[str]) -> List[str]: + """ + Reads templates from files. + """ + templates = [] + app_path = Path(app.__path__[0]).joinpath() + for template_name in template_names: + with open(app_path.joinpath(f"support_files/templates/{template_type}/{template_name}"), "r") as main_file: + templates.append(main_file.read()) + return templates + + +def set_length(str_len: str): + """ + Makes text the same length wide, + """ + def decorator(func): + def wrapper(*args, **kwargs): + text = textwrap.fill(args[0], width=str_len) # first argument mast be str + result = func(text, *args[1:], **kwargs) + return result + return wrapper + return decorator + + +def get_img_by_url(url: str) -> str: + """ + Gets img in base64 format by url. + """ + http = urllib3.PoolManager() + response = http.request("GET", url) + return str(base64.b64encode(response.data), "utf-8") + + +class Converter: + """ + This class represents format converter for parsed data from RSS. + """ + + def __init__(self, feeds: List[Feed]) -> None: + """ + :param feeds: Parsed data from RSS. + """ + self.__feeds = feeds + self._logger = get_logger(APP_NAME) + + def to_console_format(self, str_len: int = 80, col_en: bool = False) -> str: + """ + Convert data to console format. + :param col_en: Enable colorizing, if true. + :param str_len: Length of output strings. + :return: Converted data. + """ + @set_length(str_len) + def set_color(text: str, text_color: str, enabled: bool) -> str: + """ + Changes the text_color of the text if enabled is true. + :return: Colorized text. + """ + color_str = "" + reset = "" + if enabled: + color_str = f"{fg(text_color)}" + reset = f" {attr('reset')}" + return color_str + text + reset + strings = [] + out_separator = set_color(f"{'*' * str_len}", "green_4", col_en) + in_separator = set_color(f"{'-' * str_len}", "chartreuse_3b", col_en) + for feed in self.__feeds: + strings.append(out_separator) + strings.append(set_color(f"Feed: {feed.title}", "gold_1", col_en)) + for item in feed.items: + strings.append(in_separator) + strings.append(set_color(f"Author: {item.author}", "light_green_3", col_en)) + strings.append(set_color(f"Published: {convert_date(item.published_parsed)}", "light_cyan_1", col_en)) + strings.append("\n") + strings.append(set_color("Title:", "yellow_3a", col_en)) + strings.append(set_color(f"\t{item.title}", "gold_1", col_en)) + strings.append("\n") + strings.append(set_color("Description:", "yellow_3a", col_en)) + strings.append(set_color(f"\t{item.description}", "light_green_3", col_en)) + strings.append("\n") + strings.append(set_color("Link:", "yellow_3a", col_en)) + strings.append(set_color(f"\t{item.link}", "wheat_1", col_en)) + strings.append(set_color("Image links:", "yellow_3a", col_en)) + for img_link in item.img_links: + strings.append(set_color(f"\t{img_link}", "light_cyan_1", col_en)) + strings.append(in_separator) + strings.append(out_separator) + + strings = "\n".join(strings) + + result_string = functools.reduce(lambda a, b: a + b, strings) + + return result_string + + def to_json_format(self, str_len: int = 80) -> str: + """ + Convert data to json format. + :param str_len: Length of output strings. + :return: Converted data. + """ + dicts_of_feeds = list(map(dataclasses.asdict, self.__feeds)) + return textwrap.fill(json.dumps(dicts_of_feeds, ensure_ascii=False), width=str_len) + + def to_html_format(self) -> str: + """ + Convert data to html format. + :return: Converted data. + """ + template_names = ["main", "feed", "item", "image"] + main_template, feed_template, item_template, image_template = get_templates("html", template_names) + feed_str_s = [] + for feed in self.__feeds: + item_str_s = [] + for item in feed.items: + item_img_links = ["http://view.dreamstalk.ca/breeze5/images/no-photo.png"] + if item.img_links: + item_img_links = item.img_links + img_str_s = [] + for item_img_link in item_img_links: + img_str_s.append(image_template.format(item_img_link=item_img_link)) + item_str_s.append(item_template.format(item_title=item.title, + item_link=item.link, + item_author=item.author, + item_published=convert_date(item.published_parsed), + item_description=item.description, + item_images="\n".join(img_str_s))) + feed_str_s.append(feed_template.format(feed_title=feed.title, + feed_link=feed.link, + items="\n".join(item_str_s))) + result_str = main_template.format(feeds="\n".join(feed_str_s), + title="Feeds") + return result_str + + def to_fb2_format(self) -> str: + """ + Convert data to html format. + :return: Converted data. + """ + template_names = ["main", "feed", "item", "image", "binary"] + main_template, feed_template, item_template, image_template, binary_template =\ + get_templates("fb2", template_names) + feed_str_s = [] + img_content_str_s = [] + img_index = 0 + for feed in self.__feeds: + item_str_s = [] + for item in feed.items: + item_img_links = ["http://view.dreamstalk.ca/breeze5/images/no-photo.png"] + if item.img_links: + item_img_links = item.img_links + img_str_s = [] + for item_img_link in item_img_links[:1]: + img_str_s.append(image_template.format(img_index=img_index)) + self._logger.info(f"Downloading and converting image from {item_img_link} to binary are started") + img_content_str_s.append(binary_template.format(img_index=img_index, + img_content=get_img_by_url(item_img_link))) + self._logger.info(f"Downloading and converting image from {item_img_link} to binary are finished") + img_index += 1 + item_str_s.append(item_template.format(item_title=item.title, + item_link=item.link, + item_author=item.author, + item_published=convert_date(item.published_parsed), + item_description=item.description, + item_images="\n".join(img_str_s))) + feed_str_s.append(feed_template.format(feed_title=feed.title, + feed_link=feed.link, + items="\n".join(item_str_s))) + result_str = main_template.format(date=ctime(time()), + feeds="\n".join(feed_str_s), + img_contents="\n".join(img_content_str_s)) + return result_str + + +if __name__ == "__main__": + print(Converter([Parser("https://news.yahoo.com/rss/").parse_feed(items_limit=1)]). + to_console_format(col_en=True, str_len=120)) diff --git a/app/support_files/rss_parser.py b/app/support_files/rss_parser.py new file mode 100644 index 0000000..c7ae265 --- /dev/null +++ b/app/support_files/rss_parser.py @@ -0,0 +1,74 @@ +""" +This module contains class for parsing RSS. +""" +from typing import Dict, Any + +from bs4 import BeautifulSoup +import feedparser + +from app.support_files.dtos import Item, Feed +from app.support_files.config import APP_NAME +from app.support_files.app_logger import get_logger + +FEED_FIELD_MAPPING = {"title": "title", + "link": "link"} + +ITEM_FIELD_MAPPING = {"title": "title", + "link": "link", + "author": "author", + "description": "description", + "published_parsed": "published_parsed", + "media_content": "img_links"} + + +def apply_field_mapping(field_mapping: Dict[str, str], source: Dict[str, str]) -> Dict[str, Any]: + return {v: source.get(k) for k, v in field_mapping.items() if source.get(k)} + + +class Parser: + """ + This class provides methods to parse RSS. + """ + + def __init__(self, url: str): + """ + :param url: Url of RSS. + """ + self.url = url + self._logger = get_logger(APP_NAME) + + def parse_feed(self, items_limit: int = -1) -> Feed: + """ + Parse the RSS file. + :param items_limit: Limit count of returned items. + """ + self._logger.info(f"Reading {self.url} is started") + data = feedparser.parse(self.url) + if data.bozo != 0: + raise ConnectionError("Some problems with connection") + if data.status != 200: + raise ConnectionError("Invalid url") + self._logger.info(f"Reading {self.url} is finished") + self._logger.info("Converting read data to standard form is started") + feed = data.get("feed", {}) + feed_data = apply_field_mapping(FEED_FIELD_MAPPING, feed) + feed_data["rss_link"] = self.url + entries = data.get("entries", []) + if items_limit > 0: + entries = entries[:items_limit] + items_data = [apply_field_mapping(ITEM_FIELD_MAPPING, item) + for item in entries] + for item_data in items_data: + soup = BeautifulSoup(item_data.get("description", ""), 'html.parser') + item_data["description"] = soup.text + item_data["img_links"] = [item["url"] for item in item_data.get("img_links", [])] + + feed = Feed(**feed_data) + feed.items = [Item(**item_data) for item_data in items_data] + self._logger.info("Converting read data to standard form is finished") + return feed + + +if __name__ == "__main__": + parser = Parser("http://www.bbc.co.uk/music/genres/classical/reviews.rss") + print(parser.parse_feed(1)) diff --git a/app/support_files/rss_reader.py b/app/support_files/rss_reader.py new file mode 100644 index 0000000..0a21dd3 --- /dev/null +++ b/app/support_files/rss_reader.py @@ -0,0 +1,112 @@ +""" +This module contains class for fork with RSS. +""" +from app.support_files import ( + rss_parser, + args_parser, + format_converter, + app_logger, + db_manager, + exeptions) +from app.support_files.config import APP_NAME +from app.support_files.file_manager import store_str_to_file + + +class Reader: + + """ + Class for fork with RSS. + """ + @staticmethod + def exec_console_args() -> None: + """ + Execute console commands. + """ + logger = app_logger.init_logger(APP_NAME) + _args = args_parser.get_args() + logger.disabled = not _args.verbose + logger.info("Program started") + source = _args.source.rstrip("/") + logger.info(f"Parsing {source} started") + parser = rss_parser.Parser(source) + + limit = _args.limit + to_json = _args.json + to_html_path = _args.to_html + to_fb2_path = _args.to_fb2 + colorize = _args.colorize + + if limit < 1 and limit != -1: + print("The limit must be -1 or greater than 0") + return None + + logger.info("Connecting with database") + try: + db = db_manager.DBManager() + except exeptions.DBConnectError as err: + print(err) + return None + if _args.date is None: + try: + feed = parser.parse_feed(limit) + except ConnectionError as err: + print(err) + return None + logger.info(f"Parsing {source} finished") + + logger.info("Loading parsed data to database is started") + db.insert_feed(feed) + logger.info("Loading parsed data to database is finished") + else: + logger.info("Load data from database is started") + try: + feed = db.find_feed_by_link_and_date(source, _args.date, limit) + except exeptions.FindFeedError as err: + print(err) + return None + except exeptions.DateError as err: + print(err) + return None + logger.info("Load data from database is finished") + + len_each_line = _args.length + if len_each_line < 60: + print("The length must be greater than 60") + return None + converter = format_converter.Converter([feed]) + + if to_html_path: + logger.info("Saving data in html format in file is started") + try: + store_str_to_file(converter.to_html_format(), to_html_path, "html") + except exeptions.DirExistsError as err: + print(err) + return None + except exeptions.DirError as err: + print(err) + return None + logger.info("Saving data in html format in file is finished") + elif to_fb2_path: + logger.info("Saving data in fb2 format in file is started") + try: + store_str_to_file(converter.to_fb2_format(), to_fb2_path, "fb2") + except exeptions.DirExistsError as err: + print(err) + return None + except exeptions.DirError as err: + print(err) + return None + logger.info("Saving data in fb2 format in file is finished") + else: + if to_json: + logger.info("Data is converted to json format and printing is started") + print(converter.to_json_format(str_len=len_each_line)) + else: + logger.info("Data is converted to console format and printing is started") + print(converter.to_console_format(str_len=len_each_line, col_en=colorize)) + logger.info("Printing is finished") + return None + + +if __name__ == "__main__": + Reader.exec_console_args() diff --git a/app/support_files/templates/fb2/binary b/app/support_files/templates/fb2/binary new file mode 100644 index 0000000..6620e68 --- /dev/null +++ b/app/support_files/templates/fb2/binary @@ -0,0 +1,3 @@ + +{img_content} + \ No newline at end of file diff --git a/app/support_files/templates/fb2/feed b/app/support_files/templates/fb2/feed new file mode 100644 index 0000000..6475567 --- /dev/null +++ b/app/support_files/templates/fb2/feed @@ -0,0 +1,11 @@ + +<p> +{feed_title} +</p> + +

+Link: {feed_link} +

+
+{items} +
\ No newline at end of file diff --git a/app/support_files/templates/fb2/image b/app/support_files/templates/fb2/image new file mode 100644 index 0000000..9c6cdd9 --- /dev/null +++ b/app/support_files/templates/fb2/image @@ -0,0 +1 @@ +

\ No newline at end of file diff --git a/app/support_files/templates/fb2/item b/app/support_files/templates/fb2/item new file mode 100644 index 0000000..50391c3 --- /dev/null +++ b/app/support_files/templates/fb2/item @@ -0,0 +1,20 @@ + +

+{item_title} +

+
+

+Link: {item_link} +

+

+Author: {item_author} +

+

+Published: {item_published} +

+ +

+{item_description} +

+ +{item_images} \ No newline at end of file diff --git a/app/support_files/templates/fb2/main b/app/support_files/templates/fb2/main new file mode 100644 index 0000000..2fc2fdf --- /dev/null +++ b/app/support_files/templates/fb2/main @@ -0,0 +1,23 @@ + + + + + feeds + ZvigerRogert + Feeds + + + ZvigerRogert + calibre 4.4.0 + {date} + + + + + +
+ {feeds} +
+ +{img_contents} +
\ No newline at end of file diff --git a/app/support_files/templates/html/feed b/app/support_files/templates/html/feed new file mode 100644 index 0000000..0d9ea5a --- /dev/null +++ b/app/support_files/templates/html/feed @@ -0,0 +1,6 @@ +
+{feed_title} +
+{items} +
+
\ No newline at end of file diff --git a/app/support_files/templates/html/image b/app/support_files/templates/html/image new file mode 100644 index 0000000..b6ce13d --- /dev/null +++ b/app/support_files/templates/html/image @@ -0,0 +1 @@ +Responsive image \ No newline at end of file diff --git a/app/support_files/templates/html/item b/app/support_files/templates/html/item new file mode 100644 index 0000000..58eeb24 --- /dev/null +++ b/app/support_files/templates/html/item @@ -0,0 +1,11 @@ +
+{item_title} +
+
+
Author: {item_author}
+
Published: {item_published}
+
+

{item_description}

+{item_images} +
+
\ No newline at end of file diff --git a/app/support_files/templates/html/main b/app/support_files/templates/html/main new file mode 100644 index 0000000..ee4fcb8 --- /dev/null +++ b/app/support_files/templates/html/main @@ -0,0 +1,14 @@ + + + + +{title} + + + +
+{feeds} +
+ + \ No newline at end of file diff --git a/docker-compose.yml b/docker-compose.yml new file mode 100644 index 0000000..61356b9 --- /dev/null +++ b/docker-compose.yml @@ -0,0 +1,6 @@ +version: '3.6' +services: + mongo: + image: mongo + ports: + - "27017:27017" \ No newline at end of file diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..68ccbd6 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,5 @@ +colored==1.4.1 +urllib3==1.25.7 +feedparser==5.2.1 +beautifulsoup4==4.8.1 +pymongo==3.9.0 \ No newline at end of file diff --git a/setup.py b/setup.py new file mode 100644 index 0000000..c26f628 --- /dev/null +++ b/setup.py @@ -0,0 +1,25 @@ +import os + +import setuptools + +import app +from app.support_files.config import APP_NAME + +with open("requirements.txt") as fp: + install_requires = fp.read() + +setuptools.setup( + name=APP_NAME, + version=app.__version__, + author="Budzich Maxim", + author_email="131119999@gmail.com", + long_description=open(os.path.join(os.path.dirname(__file__), "README.md")).read(), + url="https://github.com/Zviger/PythonHomework/tree/final_proj", + packages=setuptools.find_packages(), + python_requires=">=3.8", + install_requires=install_requires, + include_package_data=True, + entry_points={ + "console_scripts": [f"{APP_NAME}=app.core:main"], + } +) diff --git a/tests/__init__.py b/tests/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/test_args_parser.py b/tests/test_args_parser.py new file mode 100644 index 0000000..045140d --- /dev/null +++ b/tests/test_args_parser.py @@ -0,0 +1,69 @@ +import unittest +from unittest import TestCase +from unittest.mock import patch, Mock +import time + +from app.support_files.rss_parser import Parser +from app.support_files.dtos import Item, Feed + + +class TestArgsParser(TestCase): + + def setUp(self): + self.parser = Parser("rss_link") + + def test_parsing_method(self): + with patch("feedparser.parse") as parse_mock: + def mock_get(key, default=None): + mock_dict = {"feed": {"title": "feed_title", + "link": "feed_link"}, + "entries": + [{"title": "item_title", + "link": "item_link", + "author": "item_author", + "published_parsed": time.struct_time((2019, 11, 30, 13, 45, 0, 5, 334, 0)), + "description": "item_description", + "media_content": []}]} + + return mock_dict.get(key, default) + + attrs = {"bozo": 0, + "status": 200, + "get": mock_get} + parse_object_mock = Mock() + parse_object_mock.configure_mock(**attrs) + parse_mock.return_value = parse_object_mock + test_feed = Feed(rss_link="rss_link", + title="feed_title", + link="feed_link", + items=[Item(title="item_title", + link="item_link", + author="item_author", + published_parsed=time.struct_time((2019, 11, 30, 13, 45, 0, 5, 334, 0)), + description="item_description", + img_links=[])]) + self.assertEqual(test_feed, self.parser.parse_feed()) + + def test_parsing_method_exception_1(self): + with patch("feedparser.parse") as parse_mock: + attrs = {"bozo": 1, + "status": 200} + parse_object_mock = Mock() + parse_object_mock.configure_mock(**attrs) + parse_mock.return_value = parse_object_mock + with self.assertRaises(ConnectionError): + self.parser.parse_feed() + + def test_parsing_method_exception_2(self): + with patch("feedparser.parse") as parse_mock: + attrs = {"bozo": 0, + "status": 404} + parse_object_mock = Mock() + parse_object_mock.configure_mock(**attrs) + parse_mock.return_value = parse_object_mock + with self.assertRaises(ConnectionError): + self.parser.parse_feed() + + +if __name__ == "__main__": + unittest.main()