diff --git a/.gitignore b/.gitignore
index 894a44c..fa532a7 100644
--- a/.gitignore
+++ b/.gitignore
@@ -102,3 +102,6 @@ venv.bak/
# mypy
.mypy_cache/
+
+# PyCharm
+.idea
\ No newline at end of file
diff --git a/LICENSE b/LICENSE
new file mode 100644
index 0000000..b4e4cc3
--- /dev/null
+++ b/LICENSE
@@ -0,0 +1,21 @@
+MIT License
+
+Copyright (c) 2019 Elia Onishchouk
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
\ No newline at end of file
diff --git a/MANIFEST.in b/MANIFEST.in
new file mode 100644
index 0000000..540b720
--- /dev/null
+++ b/MANIFEST.in
@@ -0,0 +1 @@
+include requirements.txt
\ No newline at end of file
diff --git a/README.md b/README.md
new file mode 100644
index 0000000..3882315
--- /dev/null
+++ b/README.md
@@ -0,0 +1,50 @@
+# Introduction to Python. Hometask
+
+RSS reader is a command-line utility which receives [RSS](wikipedia.org/wiki/RSS) URL and prints results in human-readable format.
+
+
+Utility provides the following interface:
+```shell
+usage: rss_reader.py [-h] [--version] [--json] [--verbose] [--limit LIMIT]
+ [--date DATE] [--to-html TO_HTML] [--to-fb2 TO_FB2]
+ [--colorize]
+ source
+
+Pure Python command-line RSS reader.
+
+positional arguments:
+ source RSS URL
+
+optional arguments:
+ -h, --help show this help message and exit
+ --version Prints version info
+ --json Prints result as JSON in stdout
+ --verbose Outputs verbose status messages
+ --limit LIMIT Limits news topics if this parameter provided
+ --date DATE Shows news of specific date
+ --to-html TO_HTML Converts news into html format and save to a specified
+ path
+ --to-fb2 TO_FB2 Converts news into fb2 format and save to a specified
+ path
+ --colorize Colorizes the cmd output
+```
+
+With the argument `--json` the program converts the news into [JSON](https://en.wikipedia.org/wiki/JSON) format.
+
+With the argument `--limit` the program prints given number of news.
+
+With the argument `--verbose` the program prints all logs in stdout.
+
+With the argument `--version` the program prints in stdout it's current version and complete it's work.
+
+With the argument `--date` the program prints or saves news of source from specific date stored if there are any.
+
+With the argument `--to-html` the program saves news from source to the given path as a html file.
+
+With the argument `--to-fb2` the program saves news from source to the given path as a fb2 file.
+
+With the argument `--colorize` the program colorizes output in cmd.
+
+# Caching
+
+This program stores data in `"home directory"/rss_reader_cache`. In this directory images in folder `images` are stored and a `cache.json` file is located. It stores all data independent of `--date` attribute.
\ No newline at end of file
diff --git a/json_schema.json b/json_schema.json
new file mode 100644
index 0000000..78aba63
--- /dev/null
+++ b/json_schema.json
@@ -0,0 +1,91 @@
+{
+ "$schema": "http://json-schema.org/draft-07/schema#",
+ type": "object",
+ "title": "News feed json schema",
+ "required": [
+ "Feed",
+ "Items"
+ ],
+ "properties": {
+ "Feed": {
+ "type": "string",
+ "title": "Feed",
+ "description": "The title of the feed"
+ },
+ "Items": {
+ "type": "array",
+ "title": "News",
+ "items": {
+ "type": "object",
+ "title": "news",
+ "required": [
+ "title",
+ "description",
+ "link",
+ "pubDate",
+ "source"
+ ],
+ "properties": {
+ "title": {
+ "type": "string",
+ "title": "Title",
+ "description": "The title of the news"
+ },
+ "description": {
+ "type": "string",
+ "title": "Description",
+ "description": "The description of the news"
+ },
+ "link": {
+ "type": "string",
+ "title": "Link",
+ "description": "The origin link of the news"
+ },
+ "pubDate": {
+ "type": "string",
+ "title": "Date",
+ "description": "The date this news was published"
+ },
+ "source": {
+ "type": "object",
+ "title": "Links inside the description",
+ "required": [
+ "images_links",
+ "href_links",
+ "video_links"
+ ],
+ "properties": {
+ "images_links": {
+ "type": "array",
+ "title": "Images links",
+ "items": {
+ "type": "string",
+ "title": "Image link",
+ "description": "The source of the image"
+ }
+ },
+ "href_links": {
+ "type": "array",
+ "title": "Hyper references",
+ "items": {
+ "type": "string",
+ "title": "URL link",
+ "description": "The source of the hyper reference"
+ }
+ },
+ "video_links": {
+ "type": "array",
+ "title": "Video links",
+ "items": {
+ "type": "string",
+ "title": "Video link",
+ "description": "The source of the video"
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+}
\ No newline at end of file
diff --git a/requirements.txt b/requirements.txt
new file mode 100644
index 0000000..2a6104e
--- /dev/null
+++ b/requirements.txt
@@ -0,0 +1,3 @@
+feedparser
+bs4
+colorama
\ No newline at end of file
diff --git a/rss_reader/__init__.py b/rss_reader/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/rss_reader/rss_reader.py b/rss_reader/rss_reader.py
new file mode 100644
index 0000000..084a89f
--- /dev/null
+++ b/rss_reader/rss_reader.py
@@ -0,0 +1,675 @@
+import argparse
+import feedparser
+import logging
+import html
+import json
+import urllib
+import colorama
+from base64 import b64encode
+from pathlib import Path
+from bs4 import BeautifulSoup
+from rss_reader import version as vers
+
+
+class Converter:
+ """This class is used to convert news feed to either html or fb2 version"""
+
+ def __init__(self, news_feed):
+ """
+ Constructor of Converter class. It assigns encoding value additionally
+ :param NewsFeed news_feed: A NewsFeed object that contains news feed
+ """
+
+ self.news_feed = news_feed
+ self.encoding = self.news_feed.items[0].encoding
+
+ def convert_to_html(self, path, limit, date):
+ """
+ This function creates a html file with news feed
+
+ :param str path: The path where new file will be saved
+ :param int limit: The number of news to be saved
+ :param str date: Optional: if exists than resulting html file will only contain news from specific date
+ """
+
+ logging.info('Converting to html')
+ path_object = Path(path)
+ path_object.mkdir(parents=True, exist_ok=True)
+ path_object /= 'news feed.html'
+ with path_object.open('w', encoding="utf-8") as html_file:
+ html_file.write(self.create_html(limit, date))
+
+ logging.info('Converting to html successful')
+
+ def convert_to_fb2(self, path, limit):
+ """
+ This function creates a fb2 file with news feed
+
+ :param str path: The path where new file will be saved
+ :param int limit: The number of news to be saved
+ """
+
+ logging.info('Converting to fb2')
+ path_object = Path(path)
+ path_object.mkdir(parents=True, exist_ok=True)
+ path_object /= 'news feed.fb2'
+ with path_object.open('w', encoding=self.encoding) as fb2_file:
+ fb2_file.write(self.create_fb2(limit))
+
+ logging.info('Converting to fb2 successful')
+
+ def create_html(self, limit, date):
+ """
+ This function creates a html version of news feed
+
+ :param int limit: The number of news to be created
+ :param str date: Optional: if exists than resulting html implementation will contain news from specific date
+ :return: a html like news feed
+ :rtype: str
+ """
+
+ logging.info('Creating html text')
+ limit = checking_limit(limit, self.news_feed.items)
+ news = '\n'.join([item.create_div(date) for item in self.news_feed.items[:limit]]) # ????
+
+ return """
+
+
+
+ News Feed
+ {0}
+
+
+ """.format(news)
+
+ def create_fb2(self, limit):
+ """
+ This function creates a fb2 version of news feed
+
+ :param int limit: The number of news to be created
+ :return: a fb2 like news feed
+ :rtype: str
+ """
+
+ logging.info('Creating fb2 text')
+ limit = checking_limit(limit, self.news_feed.items)
+ news = ''.join([item.create_section() for item in self.news_feed.items[:limit]])
+ binaries = ''.join([item.create_binary() for item in self.news_feed.items[:limit]])
+
+ return """
+
+
+
+ newspapers
+ RSS Reader
+ el0ny
+
+
+ {myprog}
+ {vers}
+
+
+
+ {news}
+
+ {binaries}
+
+ """.format(myprog=__name__, vers=vers.__version__,
+ news=news, encoding=self.encoding,
+ binaries=binaries)
+
+
+class NewsFeed:
+ """Base class for news feed"""
+
+ def __init__(self, feed_title, items):
+ """
+ This constructor only initializes two values, nothing else
+
+ :param str feed_title: The title of news feed
+ :param list items: A list of Item objects, basically, a list of news
+ """
+
+ self.feed_title = feed_title
+ self.items = items
+
+ def print_feed(self, _json, limit, colorize):
+ """
+ This function allows to print news in cmd either in json or str format
+
+ :param colorize: If true than colorize the output in cmd
+ :param bool _json: If true than the news will be in json format, otherwise in str format
+ :param int limit: The number of news to be printed
+ """
+
+ limit = checking_limit(limit, self.items)
+ if _json:
+ self.print_to_json(limit)
+ else:
+ self.print_to_console(limit, colorize)
+
+ def create_json(self, is_cached, limit):
+ """
+ This function allows to create json like dict of news
+
+ :param bool is_cached: If true then json will be ready to be saved, otherwise, to be printed
+ :param int limit: The number of news to be printed
+ :return: A json like dict of news
+ :rtype: dict
+ """
+
+ return {'Feed': self.feed_title, 'Items': [item.return_item(is_cached) for item in self.items[:limit]]}
+
+ def print_to_json(self, limit):
+ """
+ This function allows to print news in cmd in json format
+
+ :param int limit: The number of news to be printed
+ """
+
+ logging.info('Printing news in json format')
+ print(json.dumps(self.create_json(0, limit)))
+
+ def print_to_console(self, limit, colorize):
+ """
+ This function allows to print news in cmd in str format
+
+ :param colorize: If true than colorize the output in cmd
+ :param int limit: The number of news to be printed
+ """
+
+ logging.info('Printing news in console format')
+ print('Feed: {0}'.format(self.feed_title))
+ for item in self.items[:limit]:
+ item.print_to_console(colorize)
+ logging.info('Printed %s news', limit)
+
+ def save_news(self, limit):
+ """
+ This function allows to save news in a json file in homedirectory/rss_reader_cache/cache.json
+
+ :param int limit: The number of news to be saved
+ """
+ logging.info('Saving news')
+ news_to_save = self.create_json(1, limit)['Items']
+ existing_news = load_from_cache()
+ news_to_save += [item for item in existing_news if item not in news_to_save]
+ path = Path.home().joinpath('rss_reader_cache')
+ cache_file = "cache.json"
+ path.mkdir(parents=True, exist_ok=True)
+ filepath = path / cache_file
+ with filepath.open('w') as json_file:
+ json.dump(news_to_save, json_file)
+ logging.info('Saving news successful')
+
+
+class Item:
+ """
+ Class for single news item from news feed
+ Attributes of the class can vary depend on if this item is created from loading from cache, or from parsed feed
+ They are:
+ str title News title
+ str pubDate Published date in it's original form
+ str link Link to the news
+ str description Description of the news
+ dict links A dict with href, image, video links
+ str date_string (optional: only from cache) Published date in YYYYMMDD format
+ str source (optional: only from cache) Rss source
+ str encoding (optional: only from cache) Encoding of the news
+ """
+ def __init__(self, news_dict):
+ for key in news_dict:
+ setattr(self, key, news_dict[key])
+
+ def print_to_console(self, colorize):
+ """
+ This function allows to print one news item in console
+
+ :param colorize: If true than colorize the output in cmd
+ """
+ title_color = ''
+ date_color = ''
+ link_color = ''
+ description_color = ''
+ href_color = ''
+ image_color = ''
+ video_color = ''
+ divider_color = ''
+ if colorize:
+ colorama.init(autoreset=True)
+ title_color = colorama.Fore.MAGENTA
+ date_color = colorama.Fore.WHITE
+ link_color = colorama.Fore.LIGHTBLACK_EX
+ description_color = colorama.Fore.LIGHTYELLOW_EX + colorama.Back.BLACK
+ href_color = colorama.Fore.GREEN
+ image_color = colorama.Fore.LIGHTGREEN_EX
+ video_color = colorama.Fore.CYAN
+ divider_color = colorama.Fore.LIGHTWHITE_EX + colorama.Back.LIGHTWHITE_EX
+ print(colorama.Fore.GREEN)
+ print(title_color + '\nTitle: {0}'.format(self.title))
+ print(date_color + 'Date: {0}'.format(self.pubDate))
+ print(link_color + 'Link: {0} \n'.format(self.link))
+ print(description_color + self.description)
+ print()
+
+ if self.links['href_links']:
+ print(href_color + '\nLinks:')
+ for link in self.links['href_links']:
+ print(href_color + link)
+
+ if self.links['images_links']:
+ print(image_color + '\nImages:')
+ for link in self.links['images_links']:
+ print(image_color + link)
+
+ if self.links['video_links']:
+ print(video_color + '\nVideos:')
+ for link in self.links['video_links']:
+ print(video_color + link)
+
+ print(divider_color + '\n//////////////////////////////////////////////////////////////////////////')
+
+ def create_div(self, date):
+ """
+ This function creates a div block of news needed for html convertation
+
+ :param str date: Optional: if exists than resulting div implementation will only contain news from specific date
+ :return: A string representation of div block of news
+ :rtype: str
+ """
+
+ return """
+
+
{title}
+
{pubDate}
+
+
{description}
+
Read More
+
+
+ """.format(title=html.escape(self.title), pubDate=self.pubDate,
+ description=self.insert_hrefs(self.description, date), link=self.link)
+
+ def create_section(self):
+ """
+ This function creates a section block of news needed for fb2 convertation
+
+ :return: A string representation of section block of news
+ :rtype: str
+ """
+
+ logging.info('Creating section')
+ description = html.escape(self.description)
+ return """
+
+ {title}
+ {pubDate}
+ {description}
+
+ """.format(title=html.escape(self.title), pubDate=self.pubDate,
+ description=self.insert_hrefs_fb2(self.description))
+
+ def insert_hrefs(self, description, date):
+ """
+ This function inserts href links in description needed for html convertation
+
+ :param str description: The original description of news
+ :param str date: Optional: if exists than resulting description will only contain news from specific date
+ :return: A description with inserted href links
+ :rtype: str
+ """
+
+ description = self.insert_images(html.escape(description), date)
+ description = self.insert_videos(description)
+ for href_link in self.links['href_links']:
+ href_raw = description[description.find(' [link '):description.find(']', description.find(' [link '))+1]
+ href_content = href_raw[href_raw.find(' | ')+3:len(href_raw)-1]
+ href_html = '{content}'.format(href=href_link[href_link.find(': ')+2:],
+ content=href_content)
+ description = description.replace(href_raw, href_html)
+ logging.info('href inserted')
+ return description
+
+ def insert_images(self, description, date):
+ """
+ This function inserts images in description needed for html convertation
+
+ :param str description: The original description of news
+ :param str date: Optional: if exists than resulting description will only contain news from specific date
+ :return: A description with inserted image links
+ :rtype: str
+ """
+ logging.info('Image inserted')
+ for image_link in self.links['images_links']:
+ image_raw = description[description.find(' [image '):description.find(']', description.find(' [image '))+1]
+ image_alt = image_raw[image_raw.find(' | ') + 3:len(image_raw) - 1]
+ source = image_link[image_link.find(': ') + 2:]
+ if date:
+ image_name = source.split('/')[-1]
+ image_name = image_name.translate(str.maketrans('', '', '.?><"*:|')) + '.jpg'
+ path = Path.home().joinpath('rss_reader_cache/image')
+ source = path / image_name
+ image_html = '
'.format(src=source, alt=image_alt)
+ description = description.replace(image_raw, image_html)
+ return description
+
+ def insert_videos(self, description):
+ """
+ This function inserts video links in description needed for html convertation
+ (I thought that I can convert them into full videos, but then I realised that it was a bad idea,
+ so I decided to just keep that part, although it isn't necessary anymore
+
+ :param str description: The original description of news
+ :param str date: Optional: if exists than resulting description will only contain news from specific date
+ :return: A description with inserted video links
+ :rtype: str
+ """
+ logging.info('Video inserted')
+ for video_link in self.links['video_links']:
+ video_href = description[description.find(' [video '):description.find(']', description.find(' [video '))+1]
+ logging.info(video_href)
+ source = video_link[video_link.find(': ') + 2:]
+ image_html = '{content}'.format(src=source, content=video_href[1:])
+ description = description.replace(video_href, image_html)
+ return description
+
+ def create_binary(self):
+ """
+ This function creates a with b64 images needed for fb2 convertation
+
+ :return: A string in format with images inside
+ """
+
+ logging.info('Creating binaries')
+ binaries = ''
+ if not self.links['images_links']:
+ return ''
+ for image_link in self.links['images_links']:
+ source = image_link[image_link.find(': ') + 2:]
+ image_name = source.split('/')[-1]
+ if source == '':
+ image_name = '.jpg'
+ encoded_string = ''
+ else:
+ logging.info('Image name %s', image_name)
+ image_name = image_name.translate(str.maketrans('', '', '.?><"*:|')) + '.jpg'
+ path = Path.home().joinpath('rss_reader_cache/image')
+ source = path / image_name
+ with open(source, "rb") as image_file:
+ encoded_string = b64encode(image_file.read()).decode()
+
+ binaries += '{data}'\
+ .format(src=image_name, data=encoded_string)
+ return binaries
+
+ def insert_hrefs_fb2(self, description):
+ """
+ This function allows find and insert links into description
+ (That is also a rudimental function. Originally I wanted to make hrefs to web links, which are stored as
+ notes. But something went wrong and not all rss were working correctly. So now it just makes links that are
+ empty)
+
+ :param str description: Were to find those hrefs
+ :return: Resulting description with inserted href links
+ :rtype: str
+ """
+
+ logging.info('href inserted')
+ description = self.insert_images_fb2(html.escape(description))
+ for href_link in self.links['href_links']:
+ href_raw = description[description.find(' [link '):description.find(']', description.find(' [link '))+1]
+ href_content = href_raw[href_raw.find(' | ')+3:len(href_raw)-1]
+ href_fb2 = '{content}'.format(href=href_link[href_link.find(': ')+2:],
+ content=href_content)
+ description = description.replace(href_raw, href_fb2)
+ return description
+
+ def insert_images_fb2(self, description):
+ """
+ This function allows find and insert links to images into description
+
+ :param str description: Were to find those images
+ :return: Resulting description with inserted image links
+ :rtype: str
+ """
+ logging.info('Image inserted')
+ for image_link in self.links['images_links']:
+ image_raw = description[description.find(' [image '):description.find(']', description.find(' [image '))+1]
+ image_alt = image_raw[image_raw.find(' | ') + 3:len(image_raw) - 1]
+ source = image_link[image_link.find(': ') + 2:]
+ image_name = source.split('/')[-1]
+ image_name = image_name.translate(str.maketrans('', '', '.?><"*:|')) + '.jpg'
+ image_html = ''.format(src=image_name, alt=image_alt)
+ description = description.replace(image_raw, image_html)
+ return description
+
+ def return_item(self, is_cached):
+ """
+ This function returns the content of this object as a dict
+
+ :param bool is_cached: If true than the result dict will be able to be cached
+ :return: A dict with this object's content
+ :rtype: dict
+ """
+
+ item_content = {'title': self.title, 'description': self.description,
+ 'link': self.link, 'pubDate': self.pubDate, 'links': self.links}
+ if is_cached:
+ item_content['date_string'] = self.date_string
+ item_content['source'] = self.source
+ item_content['encoding'] = self.encoding
+ return item_content
+
+
+def set_argparse():
+ """
+ This function allows to get needed parameters from command line
+
+ :return: An object with all needed parameters inside
+ """
+
+ parser = argparse.ArgumentParser(description='Pure Python command-line RSS reader.')
+ parser.add_argument('source', type=str, help='RSS URL')
+
+ parser.add_argument('--version', action='version', version='%(prog)s v'+vers.__version__,
+ help='Prints version info')
+ parser.add_argument('--json', action='store_true', help='Prints result as JSON in stdout')
+ parser.add_argument('--verbose', action='store_true', help='Outputs verbose status messages')
+ parser.add_argument('--limit', type=int, default=-1, help='Limits news topics if this parameter provided')
+ parser.add_argument('--date', type=str, help='Shows news of specific date')
+ parser.add_argument('--to-html', dest='to_html', type=str,
+ help='Converts news into html format and save to a specified path')
+ parser.add_argument('--to-fb2', dest='to_fb2', type=str,
+ help='Converts news into fb2 format and save to a specified path')
+ parser.add_argument('--colorize', action='store_true', help='Colorizes the cmd output')
+ return parser.parse_args()
+
+
+def find_images(soup):
+ """
+ This function allows to extract
image links from parsed feed
+
+ :param bs4.BeautifulSoup soup: A beautifulsoup representation of parsed news feed
+ :return: A list of found image links
+ :rtype: list
+ """
+
+ logging.info('Starting image finding')
+ image_iterator = 0
+ images_links = []
+
+ for img in soup.findAll('img'):
+
+ image_iterator += 1
+ if 'alt' in img.attrs and img['alt'] != '':
+ replaced_data = ' [image {0} | {1}] '.format(image_iterator, img['alt'])
+ else:
+ replaced_data = ' [image {0}]'.format(image_iterator)
+ src = img['src']
+
+ if src != '':
+ image_name = src.split('/')[-1]
+ image_name = image_name.translate(str.maketrans('', '', '.?><"*:|')) + '.jpg'
+ path = Path.home().joinpath('rss_reader_cache/image')
+ path.mkdir(parents=True, exist_ok=True)
+ filepath = path / image_name
+ if filepath.is_file():
+ logging.info('Image already exists')
+ else:
+ urllib.request.urlretrieve(src, filepath)
+ images_links.append('[{0}]: {1}'.format(image_iterator, src))
+ soup.find('img').replace_with(replaced_data)
+
+ logging.info('Image finding finished. Found %s images', image_iterator)
+ return images_links
+
+
+def find_href(soup):
+ """
+ This function allows to extract href links from parsed feed
+
+ :param bs4.BeautifulSoup soup: A beautifulsoup representation of parsed news feed
+ :return: A list of found href links
+ :rtype: list
+ """
+
+ logging.info('Starting link finding')
+ href_iterator = 0
+ href_links = []
+ for href in soup.findAll('a'):
+ if 'href' in href.attrs:
+ href_iterator += 1
+ link = href['href']
+ if href.text != '':
+ replaced_data = ' [link {0} | {1}] '.format(href_iterator, href.text)
+ else:
+ replaced_data = ' [link {0}] '.format(href_iterator)
+ href_links.append('[{0}]: {1}'.format(href_iterator, link))
+ href.replace_with(replaced_data)
+ logging.info('Link finding finished. Found %s links', href_iterator)
+ return href_links
+
+
+def find_videos(soup):
+ """
+ This function allows to extract