From e41834a8d1ac5fcd0d1681782d81154de42c290b Mon Sep 17 00:00:00 2001 From: Yana Shcherbich Date: Sun, 10 Nov 2019 18:48:59 +0300 Subject: [PATCH 01/15] output news in readable format --- rss_reader.py | 97 +++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 97 insertions(+) create mode 100644 rss_reader.py diff --git a/rss_reader.py b/rss_reader.py new file mode 100644 index 0000000..9504383 --- /dev/null +++ b/rss_reader.py @@ -0,0 +1,97 @@ +import feedparser +from bs4 import BeautifulSoup + + +class RssReader(): + + def __init__(self, url, limit=None): + self.url = url + self.feeds = feedparser.parse(url) + self.tags = ['title', 'published', 'pubDate', 'link', 'date'] + self.list_of_news = [] + self.limit = limit + + + def print_date(self, news): + """print date""" + if news.get('published') != 'Unknown': + print('Date:', news['published']) + elif news.get('pubDate') != 'Unknown': + print('Date:', news['pubDate']) + elif news.get('date') != 'Unknown': + print('Date:', news['date']) + else print('Date: unknown') + + + def print_news(self): + """print news""" + self.make_list_of_news() + print('Feed:', self.feeds.feed.get('title'), "\n\n") + + for news in self.list_of_news: + print('Title:', news['title']) + self.print_date(news) + print('Link:', news['link'], '\n') + + if news.get('text'): + print(news['text'], '\n') + + if news.get('images'): + print('Images:') + for link in news['images']: + print(link) + print() + + if news.get('links'): + print('Links:') + for link in news['links']: + print(link) + print() + + print('-' * 50) + + + def make_list_of_news(self): + """Make a list of news + + type of news: dict + """ + + if self.limit == None or self.limit > len(self.feeds): + self.limit = len(self.feeds) + + for news in self.feeds['entries'][:self.limit]: + one_news = {} + for tag in self.tags: + if tag in news: + one_news[tag] = news[tag] + else: + one_news[tag] = 'Unknown' + one_news.update(self.read_description(news)) + self.list_of_news.append(one_news) + + + def read_description(self, news)->dict: + """Return dict with keys 'text', 'images', 'links' + + 'text' value is description(str) + 'images' value is a list of images sources + 'links' value is a list of urls + + """ + soup = BeautifulSoup(news.description, features="html.parser") + + list_of_images = [] + images = soup.findAll('img') + for image in images: + if image.get('src'): + list_of_images.append(image['src']) + + list_of_links = [] + for tag in soup.findAll(): + if tag.get('href'): + list_of_links.append(tag['href']) + if tag.get('url'): + list_of_links.append(tag['url']) + + return {'text': soup.text,'images': list_of_images, 'links': list_of_links} From 4a1e887cc2460e57f1b6e402a83a23eb87d54552 Mon Sep 17 00:00:00 2001 From: Yana Shcherbich Date: Wed, 13 Nov 2019 22:14:07 +0300 Subject: [PATCH 02/15] Add ability to convert into JSON and work with command line --- README.md | 44 +++++++++++++ news.py | 170 ++++++++++++++++++++++++++++++++++++++++++++++++++ rss_reader.py | 128 ++++++++++++++----------------------- 3 files changed, 260 insertions(+), 82 deletions(-) create mode 100644 README.md create mode 100644 news.py diff --git a/README.md b/README.md new file mode 100644 index 0000000..fdc057e --- /dev/null +++ b/README.md @@ -0,0 +1,44 @@ +# RSS reader + +RSS reader is a command-line utility which receives RSS URL and prints results in human-readable format. + +## Specification + +usage: rss_reader.py [-h] [--version] [--json] [--verbose] [--limit LIMIT] + source + +Pure Python command-line RSS reader. + +positional arguments: + source RSS URL + +optional arguments: + -h, --help show this help message and exit + --version Print version info + --json Print result as JSON in stdout + --verbose Outputs verbose status messages + --limit LIMIT Limit news topics if this parameter provided + +## JSON structure + +{ + "news": { + "feed": "Yahoo News - Latest News & Headlines", + "items": [ + { + "title": "Ukrainian energy company tied to Hunter Biden supported American think tank, paid for trips", + "link": "https://news.yahoo.com/ukrainian-energy-company-tied-to-hunter-biden-supported-american-think-tank-paid-for-trips-015132322.html", + "date": "Tue, 12 Nov 2019 20:51:32 -0500", + "description": { + "text": "Burisma gave more than $450,000 to the Atlantic Council, a prominent Washington think tank.", + "images": [ + "http://l1.yimg.com/uu/api/res/1.2/2Q92DOIaZFmDeg0l9DbhAg--/YXBwaWQ9eXRhY2h5b247aD04Njt3PTEzMDs-/https://media-mbst-pub-ue1.s3.amazonaws.com/creatr-images/2019-11/42dec8d0-05a9-11ea-adcf-9417cbbb4d35" + ], + "links": [ + "https://news.yahoo.com/ukrainian-energy-company-tied-to-hunter-biden-supported-american-think-tank-paid-for-trips-015132322.html" + ] + } + } + ] + } +} diff --git a/news.py b/news.py new file mode 100644 index 0000000..5d257e8 --- /dev/null +++ b/news.py @@ -0,0 +1,170 @@ +"""Module contains class related to news""" + +import json +import logging +import sys + +import feedparser +from bs4 import BeautifulSoup + + +class RssReader(): + """This class parse, process and output news.""" + + def __init__(self, url: str, limit=None): + logging.info('Initialization') + + self.url = url + self.feeds = feedparser.parse(url) + self._check_url() + + self.feed_title = self.feeds.feed.get('title') + self.list_of_news = [] + self.limit = limit + + self._check_limit() + self.make_list_of_news() + + def _check_url(self): + """Check if the url is valid.""" + + logging.info('Check URL') + if self.feeds['bozo'] or self.feeds.status != 200: + logging.error('Something wrong with URL or Internet connection') + sys.exit(1) + + def _check_limit(self): + """Check if the limit >= 0.""" + + logging.info('Check limit') + if self.limit is not None and self.limit < 0: + logging.error('Limit < 0') + sys.exit(1) + + def print_news(self): + """Print news in human-readable format.""" + + logging.info('Print news') + + print('Feed:', self.feed_title, "\n\n") + + news_number = 1 + for news in self.list_of_news: + print('№', news_number) + news_number += 1 + print('Title:', news['title']) + print('Date:', news['date']) + print('Link:', news['link'], '\n') + + if news['description']['text']: + print(news['description']['text'], '\n') + + if news['description']['images']: + print('Images:') + for item in news['description']['images']: + print(item) + + if news['description']['links']: + print('Links:') + for item in news['description']['links']: + print(item) + + print('-' * 50) + + def _find_date_tag(self, news: dict) -> str: + """ + Find date tag and return its value, + or return 'Unknown' if tag not found. + """ + + logging.info('Find date tag') + + if news.get('published'): + return news['published'] + elif news.get('pubDate'): + return news['pubDate'] + elif news.get('Date:'): + return news['Date'] + else: + return 'Unknown' + + def make_list_of_news(self): + """Make a list of news. + + type of news: dict + """ + + logging.info('Make a list of news') + + if self.limit is None or self.limit > len(self.feeds): + self.limit = len(self.feeds) + + + for news in self.feeds['entries'][:self.limit]: + one_news = {} + + if news.get('title'): + one_news['title'] = news['title'] + else: + one_news['title'] = 'Unknown' + + if news.get('link'): + one_news['link'] = news['link'] + else: + one_news['link'] = 'Unknown' + + one_news['date'] = self._find_date_tag(news) + one_news.update(self._read_description(news)) + self.list_of_news.append(one_news) + + def _read_description(self, news: dict) -> dict: + """Return dict with keys 'text', 'images', 'links'. + + 'text' value is description(str) + 'images' value is a list of images sources + 'links' value is a list of urls + """ + + logging.info('Get information from description') + soup = BeautifulSoup(news.description, features="html.parser") + + logging.info('Get text of description') + text = soup.text + if not text: + text = 'Nothing' + + logging.info('Get list of images') + list_of_images = [] + images = soup.findAll('img') + for image in images: + if image.get('src'): + list_of_images.append(image['src']) + + if not list_of_images: + list_of_images = None + + logging.info('Get list of links') + list_of_links = [] + for tag in soup.findAll(): + if tag.get('href'): + list_of_links.append(tag['href']) + if tag.get('url'): + list_of_links.append(tag['url']) + + if not list_of_links: + list_of_links = None + + return {'description': {'text': text, 'images': list_of_images, + 'links': list_of_links}} + + def convert_to_json(self): + """Return news in JSON format.""" + + logging.info('Convert news into JSON format') + try: + result = json.dumps({'news': {'feed': self.feed_title, 'items': self.list_of_news}}, + indent=4, ensure_ascii=False) + except Exception as e: + logging.error("Can't convert to JSON:", e) + + return result diff --git a/rss_reader.py b/rss_reader.py index 9504383..cd1ebb3 100644 --- a/rss_reader.py +++ b/rss_reader.py @@ -1,97 +1,61 @@ -import feedparser -from bs4 import BeautifulSoup +"""Module provides user interface""" +import argparse +import logging +import sys -class RssReader(): +from news import RssReader - def __init__(self, url, limit=None): - self.url = url - self.feeds = feedparser.parse(url) - self.tags = ['title', 'published', 'pubDate', 'link', 'date'] - self.list_of_news = [] - self.limit = limit +VERSION = "1.0" - def print_date(self, news): - """print date""" - if news.get('published') != 'Unknown': - print('Date:', news['published']) - elif news.get('pubDate') != 'Unknown': - print('Date:', news['pubDate']) - elif news.get('date') != 'Unknown': - print('Date:', news['date']) - else print('Date: unknown') +def add_args(parser): + """Add arguments and return new parser.""" + parser.add_argument('source', help='RSS URL', type=str) + parser.add_argument('--version', help='Print version info', action='version') + parser.add_argument('--json', help='Print result as JSON in stdout', action="store_true") + parser.add_argument('--verbose', help='Outputs verbose status messages', action="store_true") + parser.add_argument('--limit', help='Limit news topics if this parameter provided', type=int) + return parser - def print_news(self): - """print news""" - self.make_list_of_news() - print('Feed:', self.feeds.feed.get('title'), "\n\n") +def start_parsing(url: str, limit: int, json_mode: bool): + """This function create rss feed and print news. - for news in self.list_of_news: - print('Title:', news['title']) - self.print_date(news) - print('Link:', news['link'], '\n') + Arguments: + url - RSS URL + limit - news amount that will be printed + json_mode - if true then news will be printed in JSON format + """ - if news.get('text'): - print(news['text'], '\n') + logging.info('Create feed') + feed = RssReader(url, limit) - if news.get('images'): - print('Images:') - for link in news['images']: - print(link) - print() + if json_mode: + print(feed.convert_to_json()) + else: + feed.print_news() - if news.get('links'): - print('Links:') - for link in news['links']: - print(link) - print() +def main(): + """This function works with arguments, start parsing.""" - print('-' * 50) + parser = argparse.ArgumentParser(description='Pure Python command-line RSS reader') + parser = add_args(parser) + parser.version = VERSION + args = parser.parse_args() + if args.verbose: + logging.basicConfig(level=logging.INFO, format='%(levelname)s - %(message)s') + try: + start_parsing(args.source, args.limit, args.json) + except Exception as e: + logging.ERROR("Something wrong with parsing: {e}") - def make_list_of_news(self): - """Make a list of news + logging.info('Program is completed') - type of news: dict - """ - - if self.limit == None or self.limit > len(self.feeds): - self.limit = len(self.feeds) - - for news in self.feeds['entries'][:self.limit]: - one_news = {} - for tag in self.tags: - if tag in news: - one_news[tag] = news[tag] - else: - one_news[tag] = 'Unknown' - one_news.update(self.read_description(news)) - self.list_of_news.append(one_news) - - - def read_description(self, news)->dict: - """Return dict with keys 'text', 'images', 'links' - - 'text' value is description(str) - 'images' value is a list of images sources - 'links' value is a list of urls - - """ - soup = BeautifulSoup(news.description, features="html.parser") - - list_of_images = [] - images = soup.findAll('img') - for image in images: - if image.get('src'): - list_of_images.append(image['src']) - - list_of_links = [] - for tag in soup.findAll(): - if tag.get('href'): - list_of_links.append(tag['href']) - if tag.get('url'): - list_of_links.append(tag['url']) - - return {'text': soup.text,'images': list_of_images, 'links': list_of_links} +if __name__ == '__main__': + try: + main() + except Exception as e: + print('Something went wrong: ', e) + sys.exit(1) From 2fb6715276bd557b41f5714a4b0b0dbb8f3445eb Mon Sep 17 00:00:00 2001 From: Yana Shcherbich Date: Wed, 13 Nov 2019 22:14:07 +0300 Subject: [PATCH 03/15] Add ability to convert into JSON and work with command line --- README.md | 46 ++++++++++++++ news.py | 170 ++++++++++++++++++++++++++++++++++++++++++++++++++ rss_reader.py | 128 ++++++++++++++----------------------- 3 files changed, 262 insertions(+), 82 deletions(-) create mode 100644 README.md create mode 100644 news.py diff --git a/README.md b/README.md new file mode 100644 index 0000000..a1a0b28 --- /dev/null +++ b/README.md @@ -0,0 +1,46 @@ +# RSS reader + +RSS reader is a command-line utility which receives RSS URL and prints results in human-readable format. + +## Specification +
+usage: rss_reader.py [-h] [--version] [--json] [--verbose] [--limit LIMIT]
+                     source
+
+Pure Python command-line RSS reader.
+
+positional arguments:
+  source         RSS URL
+
+optional arguments:
+  -h, --help     show this help message and exit
+  --version      Print version info
+  --json         Print result as JSON in stdout
+  --verbose      Outputs verbose status messages
+  --limit LIMIT  Limit news topics if this parameter provided
+
+ +## JSON structure +
+{
+    "news": {
+        "feed": "Yahoo News - Latest News & Headlines",
+        "items": [
+            {
+                "title": "Ukrainian energy company tied to Hunter Biden supported American think tank, paid for trips",
+                "link": "https://news.yahoo.com/ukrainian-energy-company-tied-to-hunter-biden-supported-american-think-tank-paid-for-trips-015132322.html",
+                "date": "Tue, 12 Nov 2019 20:51:32 -0500",
+                "description": {
+                    "text": "Burisma gave more than $450,000 to the Atlantic Council, a prominent Washington think tank.",
+                    "images": [
+                        "http://l1.yimg.com/uu/api/res/1.2/2Q92DOIaZFmDeg0l9DbhAg--/YXBwaWQ9eXRhY2h5b247aD04Njt3PTEzMDs-/https://media-mbst-pub-ue1.s3.amazonaws.com/creatr-images/2019-11/42dec8d0-05a9-11ea-adcf-9417cbbb4d35"
+                    ],
+                    "links": [
+                        "https://news.yahoo.com/ukrainian-energy-company-tied-to-hunter-biden-supported-american-think-tank-paid-for-trips-015132322.html"
+                    ]
+                }
+            }
+        ]
+    }
+}
+
diff --git a/news.py b/news.py new file mode 100644 index 0000000..5d257e8 --- /dev/null +++ b/news.py @@ -0,0 +1,170 @@ +"""Module contains class related to news""" + +import json +import logging +import sys + +import feedparser +from bs4 import BeautifulSoup + + +class RssReader(): + """This class parse, process and output news.""" + + def __init__(self, url: str, limit=None): + logging.info('Initialization') + + self.url = url + self.feeds = feedparser.parse(url) + self._check_url() + + self.feed_title = self.feeds.feed.get('title') + self.list_of_news = [] + self.limit = limit + + self._check_limit() + self.make_list_of_news() + + def _check_url(self): + """Check if the url is valid.""" + + logging.info('Check URL') + if self.feeds['bozo'] or self.feeds.status != 200: + logging.error('Something wrong with URL or Internet connection') + sys.exit(1) + + def _check_limit(self): + """Check if the limit >= 0.""" + + logging.info('Check limit') + if self.limit is not None and self.limit < 0: + logging.error('Limit < 0') + sys.exit(1) + + def print_news(self): + """Print news in human-readable format.""" + + logging.info('Print news') + + print('Feed:', self.feed_title, "\n\n") + + news_number = 1 + for news in self.list_of_news: + print('№', news_number) + news_number += 1 + print('Title:', news['title']) + print('Date:', news['date']) + print('Link:', news['link'], '\n') + + if news['description']['text']: + print(news['description']['text'], '\n') + + if news['description']['images']: + print('Images:') + for item in news['description']['images']: + print(item) + + if news['description']['links']: + print('Links:') + for item in news['description']['links']: + print(item) + + print('-' * 50) + + def _find_date_tag(self, news: dict) -> str: + """ + Find date tag and return its value, + or return 'Unknown' if tag not found. + """ + + logging.info('Find date tag') + + if news.get('published'): + return news['published'] + elif news.get('pubDate'): + return news['pubDate'] + elif news.get('Date:'): + return news['Date'] + else: + return 'Unknown' + + def make_list_of_news(self): + """Make a list of news. + + type of news: dict + """ + + logging.info('Make a list of news') + + if self.limit is None or self.limit > len(self.feeds): + self.limit = len(self.feeds) + + + for news in self.feeds['entries'][:self.limit]: + one_news = {} + + if news.get('title'): + one_news['title'] = news['title'] + else: + one_news['title'] = 'Unknown' + + if news.get('link'): + one_news['link'] = news['link'] + else: + one_news['link'] = 'Unknown' + + one_news['date'] = self._find_date_tag(news) + one_news.update(self._read_description(news)) + self.list_of_news.append(one_news) + + def _read_description(self, news: dict) -> dict: + """Return dict with keys 'text', 'images', 'links'. + + 'text' value is description(str) + 'images' value is a list of images sources + 'links' value is a list of urls + """ + + logging.info('Get information from description') + soup = BeautifulSoup(news.description, features="html.parser") + + logging.info('Get text of description') + text = soup.text + if not text: + text = 'Nothing' + + logging.info('Get list of images') + list_of_images = [] + images = soup.findAll('img') + for image in images: + if image.get('src'): + list_of_images.append(image['src']) + + if not list_of_images: + list_of_images = None + + logging.info('Get list of links') + list_of_links = [] + for tag in soup.findAll(): + if tag.get('href'): + list_of_links.append(tag['href']) + if tag.get('url'): + list_of_links.append(tag['url']) + + if not list_of_links: + list_of_links = None + + return {'description': {'text': text, 'images': list_of_images, + 'links': list_of_links}} + + def convert_to_json(self): + """Return news in JSON format.""" + + logging.info('Convert news into JSON format') + try: + result = json.dumps({'news': {'feed': self.feed_title, 'items': self.list_of_news}}, + indent=4, ensure_ascii=False) + except Exception as e: + logging.error("Can't convert to JSON:", e) + + return result diff --git a/rss_reader.py b/rss_reader.py index 9504383..76564bc 100644 --- a/rss_reader.py +++ b/rss_reader.py @@ -1,97 +1,61 @@ -import feedparser -from bs4 import BeautifulSoup +"""Module provides work with command line""" +import argparse +import logging +import sys -class RssReader(): +from news import RssReader - def __init__(self, url, limit=None): - self.url = url - self.feeds = feedparser.parse(url) - self.tags = ['title', 'published', 'pubDate', 'link', 'date'] - self.list_of_news = [] - self.limit = limit +VERSION = "1.0" - def print_date(self, news): - """print date""" - if news.get('published') != 'Unknown': - print('Date:', news['published']) - elif news.get('pubDate') != 'Unknown': - print('Date:', news['pubDate']) - elif news.get('date') != 'Unknown': - print('Date:', news['date']) - else print('Date: unknown') +def add_args(parser) -> parser: + """Add arguments and return new parser.""" + parser.add_argument('source', help='RSS URL', type=str) + parser.add_argument('--version', help='Print version info', action='version') + parser.add_argument('--json', help='Print result as JSON in stdout', action="store_true") + parser.add_argument('--verbose', help='Outputs verbose status messages', action="store_true") + parser.add_argument('--limit', help='Limit news topics if this parameter provided', type=int) + return parser - def print_news(self): - """print news""" - self.make_list_of_news() - print('Feed:', self.feeds.feed.get('title'), "\n\n") +def start_parsing(url: str, limit: int, json_mode: bool): + """This function create rss feed and print news. - for news in self.list_of_news: - print('Title:', news['title']) - self.print_date(news) - print('Link:', news['link'], '\n') + Arguments: + url - RSS URL + limit - news amount that will be printed + json_mode - if true then news will be printed in JSON format + """ - if news.get('text'): - print(news['text'], '\n') + logging.info('Create feed') + feed = RssReader(url, limit) - if news.get('images'): - print('Images:') - for link in news['images']: - print(link) - print() + if json_mode: + print(feed.convert_to_json()) + else: + feed.print_news() - if news.get('links'): - print('Links:') - for link in news['links']: - print(link) - print() +def main(): + """This function works with arguments, starts parsing.""" - print('-' * 50) + parser = argparse.ArgumentParser(description='Pure Python command-line RSS reader') + parser = add_args(parser) + parser.version = VERSION + args = parser.parse_args() + if args.verbose: + logging.basicConfig(level=logging.INFO, format='%(levelname)s - %(message)s') + try: + start_parsing(args.source, args.limit, args.json) + except Exception as e: + logging.ERROR("Something wrong with parsing: {e}") - def make_list_of_news(self): - """Make a list of news + logging.info('Program is completed') - type of news: dict - """ - - if self.limit == None or self.limit > len(self.feeds): - self.limit = len(self.feeds) - - for news in self.feeds['entries'][:self.limit]: - one_news = {} - for tag in self.tags: - if tag in news: - one_news[tag] = news[tag] - else: - one_news[tag] = 'Unknown' - one_news.update(self.read_description(news)) - self.list_of_news.append(one_news) - - - def read_description(self, news)->dict: - """Return dict with keys 'text', 'images', 'links' - - 'text' value is description(str) - 'images' value is a list of images sources - 'links' value is a list of urls - - """ - soup = BeautifulSoup(news.description, features="html.parser") - - list_of_images = [] - images = soup.findAll('img') - for image in images: - if image.get('src'): - list_of_images.append(image['src']) - - list_of_links = [] - for tag in soup.findAll(): - if tag.get('href'): - list_of_links.append(tag['href']) - if tag.get('url'): - list_of_links.append(tag['url']) - - return {'text': soup.text,'images': list_of_images, 'links': list_of_links} +if __name__ == '__main__': + try: + main() + except Exception as e: + print('Something went wrong: ', e) + sys.exit(1) From dec51ec84ad3454e48f07d1da19bdc1d5ffdb386 Mon Sep 17 00:00:00 2001 From: Yana Shcherbich Date: Sun, 17 Nov 2019 17:41:05 +0300 Subject: [PATCH 04/15] add setup.py --- rss/__init__.py | 0 news.py => rss/news.py | 0 rss_reader.py => rss/rss_reader.py | 0 setup.py | 25 +++++++++++++++++++++++++ 4 files changed, 25 insertions(+) create mode 100644 rss/__init__.py rename news.py => rss/news.py (100%) rename rss_reader.py => rss/rss_reader.py (100%) create mode 100644 setup.py diff --git a/rss/__init__.py b/rss/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/news.py b/rss/news.py similarity index 100% rename from news.py rename to rss/news.py diff --git a/rss_reader.py b/rss/rss_reader.py similarity index 100% rename from rss_reader.py rename to rss/rss_reader.py diff --git a/setup.py b/setup.py new file mode 100644 index 0000000..562a2dc --- /dev/null +++ b/setup.py @@ -0,0 +1,25 @@ +from setuptools import setup, find_packages +from os import path + + +with open('README.md', 'r') as f: + long_description = f.read() + +setup( + name='rss-reader', + version='2.0', + description='Pure Python command-line RSS reader', + long_description=long_description, + long_description_content_type="text/markdown", + url='https://github.com/yanaShcherbich/PythonHomework', + author='Yana Shcherbich', + author_email='vilikdf@gmail.com', + packages=find_packages(), + python_requires='>=3.8', + install_requires=['feedparser', 'bs4'], # Optional + + + entry_points={ # Optional + 'console_scripts': ['rss-reader=rss.rss_reader:main'], + } +) From c6ccd2ea6647d1a954749a4e0c51f17a901aa94f Mon Sep 17 00:00:00 2001 From: Yana Shcherbich Date: Tue, 26 Nov 2019 18:06:32 +0300 Subject: [PATCH 05/15] Add ability to work with cache --- README.md | 2 + rss/cache.py | 130 ++++++++++++++++++++++++++++++++++++++++++++++ rss/news.py | 122 +++++++++++++++++++++---------------------- rss/rss_reader.py | 63 +++++++++++++++------- setup.py | 7 ++- 5 files changed, 237 insertions(+), 87 deletions(-) create mode 100644 rss/cache.py diff --git a/README.md b/README.md index a1a0b28..dda0247 100644 --- a/README.md +++ b/README.md @@ -18,6 +18,8 @@ optional arguments: --json Print result as JSON in stdout --verbose Outputs verbose status messages --limit LIMIT Limit news topics if this parameter provided + --date DATE Take a date in %Y%m%d format. The news from the specified day + will be printed out. ## JSON structure diff --git a/rss/cache.py b/rss/cache.py new file mode 100644 index 0000000..0eb6b51 --- /dev/null +++ b/rss/cache.py @@ -0,0 +1,130 @@ +"""This module provides work with cashed news.""" + +import logging +import shelve +import datetime +import sys +import json + +import dateparser + + +class Cache: + """This class creates cache file, updates it and prints cached news.""" + + def __init__(self): + logging.info("Cache initialization") + self.db_file_name = 'cache.txt' + + def _create_key(self, date: str, url: str) -> str: + """Create key for db""" + + logging.info('Create key') + return date + url + + def _convert_date(self, date: str) -> str: + """Convert date to %Y%m%d format.""" + + logging.info('Convert date') + converted_date = dateparser.parse(date) + return converted_date.strftime('%Y%m%d') + + def insert_news(self, news, url: str): + """Insert news into cache file. + Create cache file if it doesn't exist. + """ + + date = news['date'] + key = self._create_key(self._convert_date(date), url) + logging.info("Open db or create if it doesn't exist for inserting news") + with shelve.open(self.db_file_name) as db: + if db.get(key): + logging.info("Update record") + record = db[key] + if not record.count(news): + record.append(news) + db[key] = record + else: + logging.info("Create new record") + record = [] + record.append(news) + db[key] = record + + def _check_entered_date(self, key: str): + """Check length and characters in entered string""" + + logging.info('Check entered date') + if len(key) != 8 or not key.isdigit(): + raise ValueError('Invalid entered date') + + def _get_news(self, key: str) -> list: + """Get news from db by key""" + + logging.info("Open db or create if it doesn't exist for getting news") + with shelve.open(self.db_file_name) as db: + try: + record = db[key] + return record + except KeyError: + raise Exception("Can't find the news") + + def set_printing_news(self, url: str, date: str, limit=None, json_mode=None): + """Set print format""" + + logging.info("Set print format") + + self._check_entered_date(date) + self._check_limit(limit) + + key = self._create_key(date, url) + db = self._get_news(key) + + if json_mode: + print(json.dumps(db[:limit], indent=4, ensure_ascii=False)) + else: + self.print_news(db, limit) + + def _check_limit(self, limit): + """Check if the limit > 0.""" + + logging.info('Check limit') + if limit is not None and limit <= 0: + raise ValueError('Invalid limit: limit <= 0') + + def print_news(self, list_of_news, limit): + """Print news""" + + logging.info('Start printing cached news') + news_number = 1 + #if self.list_of_news consists of 1 element + if type(list_of_news) == dict: + print('№', news_number) + self._print_entries(list_of_news) + else: + for news in list_of_news[:limit]: + print('№', news_number) + news_number += 1 + self._print_entries(news) + + def _print_entries(self, news: dict): + """Print one news.""" + + logging.info('Print one news') + print('Title:', news['title']) + print('Date:', news['date']) + print('Link:', news['link'], '\n') + + if news['description']['text'] != 'Nothing': + print(news['description']['text'], '\n') + + if news['description']['images']: + print('Images:') + for item in news['description']['images']: + print(item) + + if news['description']['links']: + print('Links:') + for item in news['description']['links']: + print(item) + + print('-' * 50) diff --git a/rss/news.py b/rss/news.py index eb27bdf..d42b868 100644 --- a/rss/news.py +++ b/rss/news.py @@ -1,28 +1,31 @@ -"""Module contains class related to news""" +"""Module contains class related to news.""" import json import logging import sys +import datetime import feedparser from bs4 import BeautifulSoup +from rss.cache import Cache -class RssReader(): - """This class parse, process and output news.""" + +class News: + """This class parses, processes and outputs news.""" def __init__(self, url: str, limit=None): - logging.info('Initialization') + logging.info('News initialization') self.url = url - self.feeds = feedparser.parse(url) + logging.info('Parsing url') + self.feeds = feedparser.parse(self.url) self._check_url() self.feed_title = self.feeds.feed.get('title') self.list_of_news = [] - self.limit = limit - self._check_limit() + self._check_limit(limit) self.make_list_of_news() def _check_url(self): @@ -30,51 +33,59 @@ def _check_url(self): logging.info('Check URL') if self.feeds['bozo'] or self.feeds.status != 200: - logging.error('Something wrong with URL or Internet connection') - sys.exit(1) + raise Exception('Something wrong with URL or Internet connection') - def _check_limit(self): - """Check if the limit >= 0.""" + def _check_limit(self, limit): + """Check if the limit > 0.""" logging.info('Check limit') - if self.limit is not None and self.limit < 0: - logging.error('Limit < 0') - sys.exit(1) + if limit is not None and limit <= 0: + raise ValueError('Invalid limit: limit <= 0') - def print_news(self): + def print_news(self, limit): """Print news in human-readable format.""" - logging.info('Print news') - - print('Feed:', self.feed_title, "\n\n") + logging.info("Start printing news") + print('\nFeed:', self.feed_title, "\n\n") news_number = 1 - for news in self.list_of_news: + #if self.list_of_news consists of 1 element + if type(list_of_news) == dict: print('№', news_number) - news_number += 1 - print('Title:', news['title']) - print('Date:', news['date']) - print('Link:', news['link'], '\n') + self._print_entries(list_of_news) + else: + for news in list_of_news[:limit]: + print('№', news_number) + news_number += 1 + self._print_entries(news) - if news['description']['text']: - print(news['description']['text'], '\n') + def _print_entries(self, news: dict): + """Print one news.""" - if news['description']['images']: - print('Images:') - for item in news['description']['images']: - print(item) + logging.info('Print one news') + print('Title:', news['title']) + print('Date:', news['date']) + print('Link:', news['link'], '\n') - if news['description']['links']: - print('Links:') - for item in news['description']['links']: - print(item) + if news['description']['text'] != 'Nothing': + print(news['description']['text'], '\n') - print('-' * 50) + if news['description']['images']: + print('Images:') + for item in news['description']['images']: + print(item) + + if news['description']['links']: + print('Links:') + for item in news['description']['links']: + print(item) + + print('-' * 50) def _find_date_tag(self, news: dict) -> str: """ Find date tag and return its value, - or return 'Unknown' if tag not found. + or return the current local date if tag not found. """ logging.info('Find date tag') @@ -86,7 +97,8 @@ def _find_date_tag(self, news: dict) -> str: elif news.get('Date:'): return news['Date'] else: - return 'Unknown' + date = datetime.today() + return date.isoformat() def make_list_of_news(self): """Make a list of news. @@ -96,26 +108,15 @@ def make_list_of_news(self): logging.info('Make a list of news') - if self.limit is None or self.limit > len(self.feeds): - self.limit = len(self.feeds) - - - for news in self.feeds['entries'][:self.limit]: - one_news = {} - - if news.get('title'): - one_news['title'] = news['title'] - else: - one_news['title'] = 'Unknown' - - if news.get('link'): - one_news['link'] = news['link'] - else: - one_news['link'] = 'Unknown' - - one_news['date'] = self._find_date_tag(news) + cache = Cache() + for news in self.feeds['entries']: + title = news.get('title', 'Unknown') + one_news = {'title': title.replace(''', "'"), + 'link': news.get('link', 'Unknown'), + 'date': self._find_date_tag(news)} one_news.update(self._read_description(news)) self.list_of_news.append(one_news) + cache.insert_news(one_news, self.url) def _read_description(self, news: dict) -> dict: """Return dict with keys 'text', 'images', 'links'. @@ -129,8 +130,7 @@ def _read_description(self, news: dict) -> dict: soup = BeautifulSoup(news.description, features="html.parser") logging.info('Get text of description') - text = soup.text - text.replace(''', "'") + text = soup.text.replace(''', "'") if not text: text = 'Nothing' @@ -158,14 +158,10 @@ def _read_description(self, news: dict) -> dict: return {'description': {'text': text, 'images': list_of_images, 'links': list_of_links}} - def convert_to_json(self): + def convert_to_json(self, limit=None): """Return news in JSON format.""" logging.info('Convert news into JSON format') - try: - result = json.dumps({'news': {'feed': self.feed_title, 'items': self.list_of_news}}, - indent=4, ensure_ascii=False) - except Exception as e: - logging.error("Can't convert to JSON:", e) - + result = json.dumps({'news': {'feed': self.feed_title, 'items': self.list_of_news[:limit]}}, + indent=4, ensure_ascii=False) return result diff --git a/rss/rss_reader.py b/rss/rss_reader.py index d0cece6..8561bab 100644 --- a/rss/rss_reader.py +++ b/rss/rss_reader.py @@ -1,40 +1,54 @@ -"""Module provides work with command line""" +"""Module provides work with command line.""" import argparse import logging import sys -from rss.news import RssReader +from rss.news import News +from rss.cache import Cache -VERSION = "1.0" +VERSION = "3.0" def add_args(parser): """Add arguments and return new parser.""" + logging.info('Add arguments') parser.add_argument('source', help='RSS URL', type=str) parser.add_argument('--version', help='Print version info', action='version') parser.add_argument('--json', help='Print result as JSON in stdout', action="store_true") parser.add_argument('--verbose', help='Outputs verbose status messages', action="store_true") parser.add_argument('--limit', help='Limit news topics if this parameter provided', type=int) + parser.add_argument('--date', help="""Take a date in %%Y%%m%%d format. + The news from the specified day will be printed out.""", type=str) return parser + def start_parsing(url: str, limit: int, json_mode: bool): """This function create rss feed and print news. - Arguments: - url - RSS URL - limit - news amount that will be printed - json_mode - if true then news will be printed in JSON format + :param url: RSS URL + :param limit: news amount that will be printed + :param json_mode: if true then news will be printed in JSON format """ logging.info('Create feed') - feed = RssReader(url, limit) - + feed = News(url, limit) if json_mode: - print(feed.convert_to_json()) + print(feed.convert_to_json(limit)) else: - feed.print_news() + feed.print_news(limit) + + +def set_verbose_mode(verbose_mode: bool): + """Set logging level and format""" + + for handler in logging.root.handlers[:]: + logging.root.removeHandler(handler) + if verbose_mode: + logging.basicConfig(level=logging.INFO, format='%(levelname)s - %(message)s') + logging.info('Set verbose mode') + def main(): """This function works with arguments, starts parsing.""" @@ -43,19 +57,28 @@ def main(): parser = add_args(parser) parser.version = VERSION args = parser.parse_args() - if args.verbose: - logging.basicConfig(level=logging.INFO, format='%(levelname)s - %(message)s') - try: - start_parsing(args.source, args.limit, args.json) - except Exception as e: - logging.ERROR("Something wrong with parsing: {e}") + set_verbose_mode(args.verbose) + + if args.date: + try: + cache = Cache() + cache.set_printing_news(args.source, args.date, args.limit, args.json) + except Exception as e: + print('Errors with cache:', e) + else: + try: + start_parsing(args.source, args.limit, args.json) + except Exception as e: + print('Errors with parsing:', e) logging.info('Program is completed') -if __name__ == '__main__': + +def run(): + """Entry point""" + try: main() except Exception as e: - print('Something went wrong: ', e) - sys.exit(1) + print('There are some errors: ', e) diff --git a/setup.py b/setup.py index 0666c9a..4060436 100644 --- a/setup.py +++ b/setup.py @@ -5,17 +5,16 @@ setup( name='rss-reader', - version='2.0', + version='3.0', description='Pure Python command-line RSS reader', long_description=long_description, - long_description_content_type="text/markdown", url='https://github.com/yanaShcherbich/PythonHomework', author='Yana Shcherbich', author_email='vilikdf@gmail.com', packages=find_packages(), python_requires='>=3.8', - install_requires=['feedparser', 'bs4'], + install_requires=['feedparser', 'bs4', 'dateparser'], entry_points={ - 'console_scripts': ['rss-reader=rss.rss_reader:main'], + 'console_scripts': ['rss-reader=rss.rss_reader:run'], } ) From 769c461a4a5e068e93bb813e2663456ce55c862c Mon Sep 17 00:00:00 2001 From: Yana Shcherbich Date: Tue, 26 Nov 2019 23:29:58 +0300 Subject: [PATCH 06/15] fix news module --- rss/news.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/rss/news.py b/rss/news.py index d42b868..7b9e75a 100644 --- a/rss/news.py +++ b/rss/news.py @@ -50,11 +50,11 @@ def print_news(self, limit): news_number = 1 #if self.list_of_news consists of 1 element - if type(list_of_news) == dict: + if type(self.list_of_news) == dict: print('№', news_number) - self._print_entries(list_of_news) + self._print_entries(self.list_of_news) else: - for news in list_of_news[:limit]: + for news in self.list_of_news[:limit]: print('№', news_number) news_number += 1 self._print_entries(news) From 51cd201349acf9fa0f24fdb75b705a7c92fce80a Mon Sep 17 00:00:00 2001 From: Yana Shcherbich Date: Wed, 27 Nov 2019 02:16:48 +0300 Subject: [PATCH 07/15] Add description of news caching in README.md --- README.md | 3 +++ rss/cache.py | 2 +- rss/news.py | 2 +- 3 files changed, 5 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index dda0247..7d9e08b 100644 --- a/README.md +++ b/README.md @@ -22,6 +22,9 @@ optional arguments: will be printed out. +## News caching +The RSS news are stored in a local storage while reading. Local storage is implemented using shelve. The cached news are stored by key which consists of RSS URL and date. + ## JSON structure
 {
diff --git a/rss/cache.py b/rss/cache.py
index 0eb6b51..6f22a12 100644
--- a/rss/cache.py
+++ b/rss/cache.py
@@ -96,7 +96,7 @@ def print_news(self, list_of_news, limit):
 
         logging.info('Start printing cached news')
         news_number = 1
-        #if self.list_of_news consists of 1 element
+        # if self.list_of_news consists of 1 element
         if type(list_of_news) == dict:
             print('№', news_number)
             self._print_entries(list_of_news)
diff --git a/rss/news.py b/rss/news.py
index 7b9e75a..84a635f 100644
--- a/rss/news.py
+++ b/rss/news.py
@@ -49,7 +49,7 @@ def print_news(self, limit):
         print('\nFeed:', self.feed_title, "\n\n")
 
         news_number = 1
-        #if self.list_of_news consists of 1 element
+        # if self.list_of_news consists of 1 element
         if type(self.list_of_news) == dict:
             print('№', news_number)
             self._print_entries(self.list_of_news)

From 2b144289bddafbaf1cb015671c80059420a5d431 Mon Sep 17 00:00:00 2001
From: Yana Shcherbich 
Date: Wed, 27 Nov 2019 02:16:48 +0300
Subject: [PATCH 08/15] Add description of news caching in README.md

---
 README.md    | 3 +++
 rss/cache.py | 4 ++--
 rss/news.py  | 2 +-
 3 files changed, 6 insertions(+), 3 deletions(-)

diff --git a/README.md b/README.md
index dda0247..29e2576 100644
--- a/README.md
+++ b/README.md
@@ -22,6 +22,9 @@ optional arguments:
                  will be printed out.
 
+## News caching +The RSS news are stored in a local storage while reading. Local storage is implemented using shelve. The cached news are stored by key which consists of date and RSS URL. The cashed news can be read with optional argument --date. + ## JSON structure
 {
diff --git a/rss/cache.py b/rss/cache.py
index 0eb6b51..e47251b 100644
--- a/rss/cache.py
+++ b/rss/cache.py
@@ -14,7 +14,7 @@ class Cache:
 
     def __init__(self):
         logging.info("Cache initialization")
-        self.db_file_name = 'cache.txt'
+        self.db_file_name = 'cache.db'
 
     def _create_key(self, date: str, url: str) -> str:
         """Create key for db"""
@@ -96,7 +96,7 @@ def print_news(self, list_of_news, limit):
 
         logging.info('Start printing cached news')
         news_number = 1
-        #if self.list_of_news consists of 1 element
+        # if self.list_of_news consists of 1 element
         if type(list_of_news) == dict:
             print('№', news_number)
             self._print_entries(list_of_news)
diff --git a/rss/news.py b/rss/news.py
index 7b9e75a..84a635f 100644
--- a/rss/news.py
+++ b/rss/news.py
@@ -49,7 +49,7 @@ def print_news(self, limit):
         print('\nFeed:', self.feed_title, "\n\n")
 
         news_number = 1
-        #if self.list_of_news consists of 1 element
+        # if self.list_of_news consists of 1 element
         if type(self.list_of_news) == dict:
             print('№', news_number)
             self._print_entries(self.list_of_news)

From aa97ba4c54a914a9f2e10160f684c3dd10d8b6b6 Mon Sep 17 00:00:00 2001
From: Yana Shcherbich 
Date: Wed, 27 Nov 2019 14:14:54 +0300
Subject: [PATCH 09/15] Fix date and links

---
 rss/news.py | 28 ++++++++++++++--------------
 1 file changed, 14 insertions(+), 14 deletions(-)

diff --git a/rss/news.py b/rss/news.py
index 84a635f..f0a3897 100644
--- a/rss/news.py
+++ b/rss/news.py
@@ -97,7 +97,7 @@ def _find_date_tag(self, news: dict) -> str:
         elif news.get('Date:'):
             return news['Date']
         else:
-            date = datetime.today()
+            date = datetime.datetime.now()
             return date.isoformat()
 
     def make_list_of_news(self):
@@ -134,29 +134,29 @@ def _read_description(self, news: dict) -> dict:
         if not text:
             text = 'Nothing'
 
-        logging.info('Get list of images')
-        list_of_images = []
+        logging.info('Get set of images')
+        set_of_images = set()
         images = soup.findAll('img')
         for image in images:
             if image.get('src'):
-                list_of_images.append(image['src'])
+                set_of_images.add(image['src'])
 
-        if not list_of_images:
-            list_of_images = None
+        if not set_of_images:
+            set_of_images = None
 
-        logging.info('Get list of links')
-        list_of_links = []
+        logging.info('Get set of links')
+        set_of_links = set()
         for tag in soup.findAll():
             if tag.get('href'):
-                list_of_links.append(tag['href'])
+                set_of_links.add(tag['href'])
             if tag.get('url'):
-                list_of_links.append(tag['url'])
+                set_of_links.add(tag['url'])
 
-        if not list_of_links:
-            list_of_links = None
+        if not set_of_links:
+            set_of_links = None
 
-        return {'description': {'text': text, 'images': list_of_images,
-                'links': list_of_links}}
+        return {'description': {'text': text, 'images': set_of_images,
+                'links': set_of_links}}
 
     def convert_to_json(self, limit=None):
         """Return news in JSON format."""

From 5781a3716b608ba9431950bfca32fd2b96f2882a Mon Sep 17 00:00:00 2001
From: Yana Shcherbich 
Date: Wed, 27 Nov 2019 22:05:37 +0300
Subject: [PATCH 10/15] Fix getting description

---
 README.md    | 19 ++++++++++---------
 rss/cache.py |  2 +-
 rss/news.py  | 33 +++++++++++++++++++--------------
 3 files changed, 30 insertions(+), 24 deletions(-)

diff --git a/README.md b/README.md
index 29e2576..a39aa24 100644
--- a/README.md
+++ b/README.md
@@ -29,20 +29,21 @@ The RSS news are stored in a local storage while reading. Local storage is imple
 
 {
     "news": {
-        "feed": "Yahoo News - Latest News & Headlines",
+        "feed": "TUT.BY: Новости ТУТ - Главные новости",
         "items": [
             {
-                "title": "Ukrainian energy company tied to Hunter Biden supported American think tank, paid for trips",
-                "link": "https://news.yahoo.com/ukrainian-energy-company-tied-to-hunter-biden-supported-american-think-tank-paid-for-trips-015132322.html",
-                "date": "Tue, 12 Nov 2019 20:51:32 -0500",
+                "title": "Охрана, неприкосновенность, пенсия. Канопацкая предлагает закон о гарантиях для экс-президента Беларуси",
+                "link": "https://news.tut.by/economics/662957.html?utm_campaign=news-feed&utm_medium=rss&utm_source=rss-news",
+                "date": "Wed, 27 Nov 2019 15:41:00 +0300",
                 "description": {
-                    "text": "Burisma gave more than $450,000 to the Atlantic Council, a prominent Washington think tank.",
+                    "text": "Депутат Анна Канопацкая разработала законопроект «О гарантиях президенту Республики Беларусь, прекратившему исполнение своих полномочий, и членам его семьи» и в ближайшее время внесет его на рассмотрение в Палату представителей.",
                     "images": [
-                        "http://l1.yimg.com/uu/api/res/1.2/2Q92DOIaZFmDeg0l9DbhAg--/YXBwaWQ9eXRhY2h5b247aD04Njt3PTEzMDs-/https://media-mbst-pub-ue1.s3.amazonaws.com/creatr-images/2019-11/42dec8d0-05a9-11ea-adcf-9417cbbb4d35"
+                        {
+                            "src": "https://img.tyt.by/thumbnails/n/politika/04/4/c5109116a72e8f8029fecf5ca544c9d4.jpg",
+                            "alt": "Фото: sb.by"
+                        }
                     ],
-                    "links": [
-                        "https://news.yahoo.com/ukrainian-energy-company-tied-to-hunter-biden-supported-american-think-tank-paid-for-trips-015132322.html"
-                    ]
+                    "links": null
                 }
             }
         ]
diff --git a/rss/cache.py b/rss/cache.py
index e47251b..2f4da02 100644
--- a/rss/cache.py
+++ b/rss/cache.py
@@ -120,7 +120,7 @@ def _print_entries(self, news: dict):
         if news['description']['images']:
             print('Images:')
             for item in news['description']['images']:
-                print(item)
+                print(item['src'])
 
         if news['description']['links']:
             print('Links:')
diff --git a/rss/news.py b/rss/news.py
index f0a3897..d492d71 100644
--- a/rss/news.py
+++ b/rss/news.py
@@ -73,7 +73,7 @@ def _print_entries(self, news: dict):
         if news['description']['images']:
             print('Images:')
             for item in news['description']['images']:
-                print(item)
+                print(item['src'])
 
         if news['description']['links']:
             print('Links:')
@@ -122,8 +122,8 @@ def _read_description(self, news: dict) -> dict:
         """Return dict with keys 'text', 'images', 'links'.
 
         'text' value is description(str)
-        'images' value is a list of images sources
-        'links' value is a list of urls
+        'images' value is a dict
+        'links' value is a set of urls
         """
 
         logging.info('Get information from description')
@@ -134,15 +134,25 @@ def _read_description(self, news: dict) -> dict:
         if not text:
             text = 'Nothing'
 
-        logging.info('Get set of images')
-        set_of_images = set()
+        return {'description': {'text': text, 'images': self._get_img_list(soup),
+                'links': self._get_links_set(soup),
+                }}
+
+    def _get_img_list(self, soup) -> list:
+        """Get images src and alt from soup object.
+        Return list of dicts.
+        """
+
+        logging.info('Get images')
+        list_of_images =[]
         images = soup.findAll('img')
         for image in images:
             if image.get('src'):
-                set_of_images.add(image['src'])
+                list_of_images.append({'src': image['src'], 'alt': image['alt']})
+        return list_of_images if list_of_images else None
 
-        if not set_of_images:
-            set_of_images = None
+    def _get_links_set(self, soup):
+        """Get links from soup object."""
 
         logging.info('Get set of links')
         set_of_links = set()
@@ -151,12 +161,7 @@ def _read_description(self, news: dict) -> dict:
                 set_of_links.add(tag['href'])
             if tag.get('url'):
                 set_of_links.add(tag['url'])
-
-        if not set_of_links:
-            set_of_links = None
-
-        return {'description': {'text': text, 'images': set_of_images,
-                'links': set_of_links}}
+        return set_of_links if set_of_links else None
 
     def convert_to_json(self, limit=None):
         """Return news in JSON format."""

From 4362fbbffee3da2acfd8ae8730c6ea7d3efd96a5 Mon Sep 17 00:00:00 2001
From: Yana Shcherbich 
Date: Fri, 29 Nov 2019 03:29:20 +0300
Subject: [PATCH 11/15] Fix errors with tag published

---
 rss/cache.py |  8 ++++++++
 rss/news.py  | 14 ++++++++------
 2 files changed, 16 insertions(+), 6 deletions(-)

diff --git a/rss/cache.py b/rss/cache.py
index 2f4da02..087de70 100644
--- a/rss/cache.py
+++ b/rss/cache.py
@@ -26,7 +26,15 @@ def _convert_date(self, date: str) -> str:
         """Convert date to %Y%m%d format."""
 
         logging.info('Convert date')
+
         converted_date = dateparser.parse(date)
+        if not converted_date:
+            logging.info("Date isn't clear. Try to parse again")
+            try:
+                converted_date = datetime.datetime.strptime(date, "%a, %d %b %Y %X %z")
+                return converted_date.strftime('%Y%m%d')
+            except Exception:
+                raise Exception('Something wrong with date')
         return converted_date.strftime('%Y%m%d')
 
     def insert_news(self, news, url: str):
diff --git a/rss/news.py b/rss/news.py
index d492d71..9851dc0 100644
--- a/rss/news.py
+++ b/rss/news.py
@@ -24,6 +24,7 @@ def __init__(self, url: str, limit=None):
 
         self.feed_title = self.feeds.feed.get('title')
         self.list_of_news = []
+        self.list_of_row_descriptions = []
 
         self._check_limit(limit)
         self.make_list_of_news()
@@ -115,6 +116,7 @@ def make_list_of_news(self):
                         'link': news.get('link', 'Unknown'),
                         'date': self._find_date_tag(news)}
             one_news.update(self._read_description(news))
+
             self.list_of_news.append(one_news)
             cache.insert_news(one_news, self.url)
 
@@ -123,7 +125,7 @@ def _read_description(self, news: dict) -> dict:
 
         'text' value is description(str)
         'images' value is a dict
-        'links' value is a set of urls
+        'links' value is a list of urls
         """
 
         logging.info('Get information from description')
@@ -134,9 +136,9 @@ def _read_description(self, news: dict) -> dict:
         if not text:
             text = 'Nothing'
 
+        self.list_of_row_descriptions.append(news.description)
         return {'description': {'text': text, 'images': self._get_img_list(soup),
-                'links': self._get_links_set(soup),
-                }}
+                'links': self._get_links_list(soup)}}
 
     def _get_img_list(self, soup) -> list:
         """Get images src and alt from soup object.
@@ -144,14 +146,14 @@ def _get_img_list(self, soup) -> list:
         """
 
         logging.info('Get images')
-        list_of_images =[]
+        list_of_images = []
         images = soup.findAll('img')
         for image in images:
             if image.get('src'):
                 list_of_images.append({'src': image['src'], 'alt': image['alt']})
         return list_of_images if list_of_images else None
 
-    def _get_links_set(self, soup):
+    def _get_links_list(self, soup):
         """Get links from soup object."""
 
         logging.info('Get set of links')
@@ -161,7 +163,7 @@ def _get_links_set(self, soup):
                 set_of_links.add(tag['href'])
             if tag.get('url'):
                 set_of_links.add(tag['url'])
-        return set_of_links if set_of_links else None
+        return list(set_of_links) if set_of_links else None
 
     def convert_to_json(self, limit=None):
         """Return news in JSON format."""

From 34609d668c9ebecfa7dddb37d125d9af4df888a1 Mon Sep 17 00:00:00 2001
From: Yana Shcherbich 
Date: Sat, 30 Nov 2019 13:00:34 +0300
Subject: [PATCH 12/15] Add ability to convert to fb2 and html

---
 rss/cache.py             |  40 +++++++++----
 rss/converter_to_fb2.py  | 124 +++++++++++++++++++++++++++++++++++++++
 rss/converter_to_html.py |  52 ++++++++++++++++
 rss/news.py              |   4 +-
 rss/rss_reader.py        |  32 +++++++---
 setup.py                 |   6 +-
 6 files changed, 235 insertions(+), 23 deletions(-)
 create mode 100644 rss/converter_to_fb2.py
 create mode 100644 rss/converter_to_html.py

diff --git a/rss/cache.py b/rss/cache.py
index 087de70..fff253c 100644
--- a/rss/cache.py
+++ b/rss/cache.py
@@ -8,6 +8,9 @@
 
 import dateparser
 
+from rss.converter_to_fb2 import Fb2Converter
+from rss.converter_to_html import HTMLConverter
+
 
 class Cache:
     """This class creates cache file, updates it and prints cached news."""
@@ -17,7 +20,7 @@ def __init__(self):
         self.db_file_name = 'cache.db'
 
     def _create_key(self, date: str, url: str) -> str:
-        """Create key for db"""
+        """Create key for db."""
 
         logging.info('Create key')
         return date + url
@@ -37,7 +40,7 @@ def _convert_date(self, date: str) -> str:
                 raise Exception('Something wrong with date')
         return converted_date.strftime('%Y%m%d')
 
-    def insert_news(self, news, url: str):
+    def insert_news(self, news, row_description, url: str):
         """Insert news into cache file.
            Create cache file if it doesn't exist.
         """
@@ -49,13 +52,17 @@ def insert_news(self, news, url: str):
             if db.get(key):
                 logging.info("Update record")
                 record = db[key]
-                if not record.count(news):
-                    record.append(news)
+                if not list(record['list_of_news']).count(news):
+                    record['list_of_news'].append(news)
+                    record['list_of_row_descriptions'].append(row_description)
                 db[key] = record
             else:
                 logging.info("Create new record")
-                record = []
-                record.append(news)
+                record = {}
+                record['list_of_news'] = []
+                record['list_of_news'].append(news)
+                record['list_of_row_descriptions'] = []
+                record['list_of_row_descriptions'].append(row_description)
                 db[key] = record
 
     def _check_entered_date(self, key: str):
@@ -76,7 +83,9 @@ def _get_news(self, key: str) -> list:
             except KeyError:
                 raise Exception("Can't find the news")
 
-    def set_printing_news(self, url: str, date: str, limit=None, json_mode=None):
+    def set_printing_news(self, url: str, date: str,
+                          limit: int, json_mode: bool,
+                          fb2_path: str, html_path: str):
         """Set print format"""
 
         logging.info("Set print format")
@@ -88,9 +97,18 @@ def set_printing_news(self, url: str, date: str, limit=None, json_mode=None):
         db = self._get_news(key)
 
         if json_mode:
-            print(json.dumps(db[:limit], indent=4, ensure_ascii=False))
+            print(json.dumps(db['list_of_news'][:limit], indent=4, ensure_ascii=False))
         else:
-            self.print_news(db, limit)
+            self.print_news(db['list_of_news'], limit)
+
+        if fb2_path:
+            conv = Fb2Converter(fb2_path)
+            conv.convert_to_fb2(db['list_of_news'][:limit])
+            conv.save_fb2()
+        if html_path:
+            conv = HTMLConverter(html_path)
+            conv.save_html(conv.convert_to_html(db['list_of_news'][:limit],
+                                                db['list_of_row_descriptions'][:limit]))
 
     def _check_limit(self, limit):
         """Check if the limit > 0."""
@@ -100,11 +118,11 @@ def _check_limit(self, limit):
             raise ValueError('Invalid limit: limit <= 0')
 
     def print_news(self, list_of_news, limit):
-        """Print news"""
+        """Print news."""
 
         logging.info('Start printing cached news')
         news_number = 1
-        # if self.list_of_news consists of 1 element
+        # check if self.list_of_news consists of 1 element
         if type(list_of_news) == dict:
             print('№', news_number)
             self._print_entries(list_of_news)
diff --git a/rss/converter_to_fb2.py b/rss/converter_to_fb2.py
new file mode 100644
index 0000000..23c32fd
--- /dev/null
+++ b/rss/converter_to_fb2.py
@@ -0,0 +1,124 @@
+"""This module converts news to fb2 format and saves."""
+
+import os
+import logging
+from base64 import b64encode
+import xml.etree.ElementTree as tree
+from xml.etree.ElementTree import Element
+import xml.dom.minidom as minidom
+
+import requests
+
+
+class Fb2Converter:
+    """Class provides work with conversation to fb2."""
+
+    def __init__(self, path='rss-news.fb2'):
+        logging.info('Fb2Converter initialization')
+        self.path = path
+        self.root = tree.Element('FictionBook')
+        self.root.set('xmlns:l', "http://www.w3.org/1999/xlink")
+        self.description = tree.SubElement(self.root, 'description')
+        self.body = tree.SubElement(self.root, 'body')
+
+    def insert_file_description(self):
+        """Insert file description."""
+
+        logging.info('Insert description')
+        title_info = tree.SubElement(self.description, 'title-info')
+        tree.SubElement(title_info, 'book-title').text = 'RSS news'
+
+    def insert_body(self, list_of_news, limit):
+        """Insert body."""
+
+        logging.info("Insert body")
+        for news in list_of_news[:limit]:
+            self.insert_section(news)
+
+    def insert_section(self, news):
+        """Insert section."""
+
+        logging.info('Insert describing single news section')
+        section = tree.SubElement(self.body, 'section')
+
+        self.insert_tag_p(section, news['title'], True)
+        self.insert_tag_empty_line(section)
+        self.insert_tag_p(section, 'Link: ' + news['link'])
+        self.insert_tag_p(section, 'Date: ' + news['date'])
+        self.insert_tag_empty_line(section)
+
+        if news['description']['images']:
+            try:
+                for img in news['description']['images']:
+                    self.insert_image(section, img['src'], img['alt'])
+            except Exception as e:
+                print("Errors with images: ", e)
+
+        self.insert_tag_empty_line(section)
+        self.insert_tag_p(section, news['description']['text'])
+
+        if news['description']['links']:
+            self.insert_tag_empty_line(section)
+            self.insert_tag_p(section, 'Links:')
+            for link in news['description']['links']:
+                self.insert_tag_p(section, link)
+
+        self.insert_tag_empty_line(section)
+        self.insert_tag_p(section, '-'*50)
+
+    def insert_tag_empty_line(self, parent):
+        """Insert empty line """
+
+        logging.info('Insert empty line')
+        tree.SubElement(parent, 'empty-line')
+
+    def insert_tag_p(self, parent, text, strong_mode=None):
+        """
+        Insert tag p with text.
+        If strong_mode then text will be bold.
+        """
+
+        if strong_mode:
+            logging.info('Insert tag p with ')
+            tag_p = tree.SubElement(parent, 'p')
+            tree.SubElement(tag_p, 'strong').text = text
+        else:
+            logging.info('Insert tag p')
+            tree.SubElement(parent, 'p').text = text
+
+    def convert_to_fb2(self, news, limit=None):
+        """Return news converted into fb2."""
+
+        logging.info('Start conversion to fb2')
+        self.insert_file_description()
+        self.insert_body(news, limit)
+
+    def save_fb2(self):
+        """Save fb2 converted news on the received path."""
+
+        logging.info('Save fb2 converted news')
+        with open(self.path, 'w') as file:
+            file.write(tree.tostring(self.root).decode('UTF-8'))
+
+        pretty_xml_as_string = minidom.parse(self.path).toprettyxml()
+
+        with open(self.path, 'w') as file:
+            file.write(pretty_xml_as_string)
+
+    def insert_image(self, parent, img_url, img_name):
+        """Insert image tag in format: ."""
+
+        logging.info('Insert image')
+        image = tree.SubElement(parent, 'image')
+        image.set('l:href', '#' + img_name)
+        binary = tree.SubElement(self.root, 'binary')
+        binary.set('id', img_name)
+        binary.set('content-type', 'image/png')
+        binary.text = self.get_binary_img(img_url)
+
+    def get_binary_img(self, src):
+        """Return img as base64 in string form"""
+
+        logging.info('Get binary img')
+        resource = requests.get(src)
+        return b64encode(resource.content).decode('UTF-8')
diff --git a/rss/converter_to_html.py b/rss/converter_to_html.py
new file mode 100644
index 0000000..1814531
--- /dev/null
+++ b/rss/converter_to_html.py
@@ -0,0 +1,52 @@
+"""This module converts news to HTML and fb2 and saves."""
+
+import os
+import logging
+
+from bs4 import BeautifulSoup
+from lxml import html
+from lxml import etree
+from lxml.builder import E
+
+
+class HTMLConverter:
+    """Class provides work with conversation to HTML."""
+
+    def __init__(self, path='rss-news.html'):
+        logging.info('HTMLConverter initialization')
+        self.path = path
+
+    def convert_to_html(self, list_of_news, list_of_row_descriptions):
+        """Return news converted into HTML."""
+
+        logging.info('Start conversion to HTML')
+        page = (
+            E.html(
+                E.head(E.title("RSS news")),
+            )
+        )
+
+        for single_news, single_description in \
+                zip(list_of_news, list_of_row_descriptions):
+            logging.info('Convert one news')
+            page.append(E.P(
+                E.center(E.h2(single_news['title'])),
+                E.h2(E.a(single_news['link'], href=single_news['link'])),
+                E.h4(single_news['date']),
+            ))
+            page.append(html.fromstring(single_description))
+            page.append(E.BR())
+            page.append(E.BR())
+            page.append(E.HR())
+        return page
+
+    def save_html(self, html_news):
+        """Save HTML converted news on the received path."""
+
+        logging.info('Save HTML converted news')
+        with open(self.path, 'w') as file:
+            file.write(html.tostring(html_news,
+                                     pretty_print=True,
+                                     encoding='unicode',
+                                     method='html',
+                                     doctype=''))
diff --git a/rss/news.py b/rss/news.py
index 9851dc0..2bd1361 100644
--- a/rss/news.py
+++ b/rss/news.py
@@ -50,7 +50,7 @@ def print_news(self, limit):
         print('\nFeed:', self.feed_title, "\n\n")
 
         news_number = 1
-        # if self.list_of_news consists of 1 element
+        # check if self.list_of_news consists of 1 element
         if type(self.list_of_news) == dict:
             print('№', news_number)
             self._print_entries(self.list_of_news)
@@ -118,7 +118,7 @@ def make_list_of_news(self):
             one_news.update(self._read_description(news))
 
             self.list_of_news.append(one_news)
-            cache.insert_news(one_news, self.url)
+            cache.insert_news(one_news, self.list_of_row_descriptions[-1], self.url)
 
     def _read_description(self, news: dict) -> dict:
         """Return dict with keys 'text', 'images', 'links'.
diff --git a/rss/rss_reader.py b/rss/rss_reader.py
index 8561bab..7059b35 100644
--- a/rss/rss_reader.py
+++ b/rss/rss_reader.py
@@ -6,8 +6,10 @@
 
 from rss.news import News
 from rss.cache import Cache
+from rss.converter_to_fb2 import Fb2Converter
+from rss.converter_to_html import HTMLConverter
 
-VERSION = "3.0"
+VERSION = "4.0"
 
 
 def add_args(parser):
@@ -21,10 +23,15 @@ def add_args(parser):
     parser.add_argument('--limit', help='Limit news topics if this parameter provided', type=int)
     parser.add_argument('--date', help="""Take a date in %%Y%%m%%d format.
                          The news from the specified day will be printed out.""", type=str)
+    parser.add_argument('--to-html', help="""Convers news into html and print in stdout.
+                        Argument receives the path where new file will be saved.""", type=str)
+    parser.add_argument('--to-fb2', help="""Convers news into fb2  and print in stdout.
+                        Argument receives the path where new file will be saved.""", type=str)
     return parser
 
 
-def start_parsing(url: str, limit: int, json_mode: bool):
+def start_parsing(url: str, limit: int, json_mode: bool,
+                  fb2_path: str, html_path: str):
     """This function create rss feed and print news.
 
     :param url: RSS URL
@@ -33,11 +40,20 @@ def start_parsing(url: str, limit: int, json_mode: bool):
     """
 
     logging.info('Create feed')
-    feed = News(url, limit)
+    news = News(url, limit)
     if json_mode:
-        print(feed.convert_to_json(limit))
+        print(news.convert_to_json(limit))
     else:
-        feed.print_news(limit)
+        news.print_news(limit)
+
+    if fb2_path:
+        conv = Fb2Converter(fb2_path)
+        conv.convert_to_fb2(news.list_of_news[:limit])
+        conv.save_fb2()
+    if html_path:
+        conv = HTMLConverter(html_path)
+        conv.save_html(conv.convert_to_html(news.list_of_news[:limit],
+                                            news.list_of_row_descriptions[:limit]))
 
 
 def set_verbose_mode(verbose_mode: bool):
@@ -63,12 +79,14 @@ def main():
     if args.date:
         try:
             cache = Cache()
-            cache.set_printing_news(args.source, args.date, args.limit, args.json)
+            cache.set_printing_news(args.source, args.date, args.limit,
+                                    args.json, args.to_fb2, args.to_html)
         except Exception as e:
             print('Errors with cache:', e)
     else:
         try:
-            start_parsing(args.source, args.limit, args.json)
+            start_parsing(args.source, args.limit, args.json,
+                          args.to_fb2, args.to_html)
         except Exception as e:
             print('Errors with parsing:', e)
 
diff --git a/setup.py b/setup.py
index 4060436..fdc70c4 100644
--- a/setup.py
+++ b/setup.py
@@ -5,15 +5,15 @@
 
 setup(
     name='rss-reader',
-    version='3.0',
+    version='4.0',
     description='Pure Python command-line RSS reader',
     long_description=long_description,
     url='https://github.com/yanaShcherbich/PythonHomework',
     author='Yana Shcherbich',
     author_email='vilikdf@gmail.com',
     packages=find_packages(),
-    python_requires='>=3.8',
-    install_requires=['feedparser', 'bs4', 'dateparser'],
+    python_requires='>=3.6',
+    install_requires=['feedparser', 'bs4', 'dateparser', 'requests', 'lxml'],
     entry_points={
         'console_scripts': ['rss-reader=rss.rss_reader:run'],
     }

From 463e19165c52317b4b5b4e1a47bb77e4ac04cd6f Mon Sep 17 00:00:00 2001
From: Yana Shcherbich 
Date: Sat, 30 Nov 2019 13:23:59 +0300
Subject: [PATCH 13/15] Change README.md

---
 README.md         | 23 ++++++++++++++---------
 rss/rss_reader.py |  4 ++--
 2 files changed, 16 insertions(+), 11 deletions(-)

diff --git a/README.md b/README.md
index a39aa24..df7db94 100644
--- a/README.md
+++ b/README.md
@@ -12,18 +12,23 @@ Pure Python command-line RSS reader.
 positional arguments:
   source         RSS URL
 
-optional arguments:
-  -h, --help     show this help message and exit
-  --version      Print version info
-  --json         Print result as JSON in stdout
-  --verbose      Outputs verbose status messages
-  --limit LIMIT  Limit news topics if this parameter provided
-  --date DATE    Take a date in %Y%m%d format. The news from the specified day
-                 will be printed out.
+ooptional arguments:
+  -h, --help         show this help message and exit
+  --version          Print version info
+  --json             Print result as JSON in stdout
+  --verbose          Outputs verbose status messages
+  --limit LIMIT      Limit news topics if this parameter provided
+  --date DATE        Take a date in %Y%m%d format. The news from the specified
+                     day will be printed out.
+  --to-html TO_HTML  Convert news into html and print in stdout. Argument
+                     receives the path where new file will be saved.
+  --to-fb2 TO_FB2    Convert news into fb2 and print in stdout. Argument
+                     receives the path where new file will be saved.
+
 
## News caching -The RSS news are stored in a local storage while reading. Local storage is implemented using shelve. The cached news are stored by key which consists of date and RSS URL. The cashed news can be read with optional argument --date. +The RSS news are stored in a local storage while reading. Local storage is implemented using shelve. The cached news are dicts with the news itself and its row (html) description which are stored by key. Key consists of date and RSS URL. The cashed news can be read with optional argument --date. ## JSON structure
diff --git a/rss/rss_reader.py b/rss/rss_reader.py
index 7059b35..a59b04d 100644
--- a/rss/rss_reader.py
+++ b/rss/rss_reader.py
@@ -23,9 +23,9 @@ def add_args(parser):
     parser.add_argument('--limit', help='Limit news topics if this parameter provided', type=int)
     parser.add_argument('--date', help="""Take a date in %%Y%%m%%d format.
                          The news from the specified day will be printed out.""", type=str)
-    parser.add_argument('--to-html', help="""Convers news into html and print in stdout.
+    parser.add_argument('--to-html', help="""Convert news into html and print in stdout.
                         Argument receives the path where new file will be saved.""", type=str)
-    parser.add_argument('--to-fb2', help="""Convers news into fb2  and print in stdout.
+    parser.add_argument('--to-fb2', help="""Convert news into fb2  and print in stdout.
                         Argument receives the path where new file will be saved.""", type=str)
     return parser
 

From 23d1d0c73533dcbd2b44c043412b7425081a172c Mon Sep 17 00:00:00 2001
From: Yana Shcherbich 
Date: Sat, 30 Nov 2019 16:10:50 +0300
Subject: [PATCH 14/15] Change README.md, add check_path

---
 README.md         |  2 +-
 rss/rss_reader.py | 16 ++++++++++++++++
 2 files changed, 17 insertions(+), 1 deletion(-)

diff --git a/README.md b/README.md
index df7db94..dc63f5a 100644
--- a/README.md
+++ b/README.md
@@ -28,7 +28,7 @@ ooptional arguments:
 
## News caching -The RSS news are stored in a local storage while reading. Local storage is implemented using shelve. The cached news are dicts with the news itself and its row (html) description which are stored by key. Key consists of date and RSS URL. The cashed news can be read with optional argument --date. +The RSS news are stored in a local storage while reading. Local storage is implemented using shelve. The cached news are dicts with the news itself and its row (html) description which are stored by key. The key consists of date and RSS URL. The cashed news can be read with optional argument --date. Utility creates binary db file 'cache.db' in current directory. If you change current directory, db file from previoгs will not be copied to the current directory. ## JSON structure
diff --git a/rss/rss_reader.py b/rss/rss_reader.py
index a59b04d..56349a4 100644
--- a/rss/rss_reader.py
+++ b/rss/rss_reader.py
@@ -3,6 +3,7 @@
 import argparse
 import logging
 import sys
+from pathlib import Path
 
 from rss.news import News
 from rss.cache import Cache
@@ -30,6 +31,15 @@ def add_args(parser):
     return parser
 
 
+def check_path(input_path: str):
+    """Check file path."""
+
+    logging.info('Check path')
+    try:
+        Path(input_path)
+    except Exception as e:
+        ptint("Invalid path: ", e)
+
 def start_parsing(url: str, limit: int, json_mode: bool,
                   fb2_path: str, html_path: str):
     """This function create rss feed and print news.
@@ -76,6 +86,12 @@ def main():
 
     set_verbose_mode(args.verbose)
 
+    if args.to_fb2:
+        check_path(args.to_fb2)
+
+    if args.to_html:
+        check_path(args.to_html)
+
     if args.date:
         try:
             cache = Cache()

From 08f7331074090a66eab5f9d258bc932e9d03a041 Mon Sep 17 00:00:00 2001
From: Yana Shcherbich 
Date: Sun, 1 Dec 2019 15:30:34 +0300
Subject: [PATCH 15/15] Add requirements.txt

---
 requirements.txt | 5 +++++
 1 file changed, 5 insertions(+)
 create mode 100644 requirements.txt

diff --git a/requirements.txt b/requirements.txt
new file mode 100644
index 0000000..588303e
--- /dev/null
+++ b/requirements.txt
@@ -0,0 +1,5 @@
+feedparser==2.2.1
+bs4==0.0.1
+dateparser==0.7.2
+requests==2.22.0
+lxml==4.4.2