diff --git a/README.md b/README.md
new file mode 100644
index 0000000..c1bc424
--- /dev/null
+++ b/README.md
@@ -0,0 +1,6 @@
+#One-shot RSS reader
+
+1. --date - take date in format YYYYMMDD and return cached news with that publication date
+2. --to_fb2 - convert output to fb2 format
+3. --to_html - convert output to html format
+4. --path - choose path for file saving mods
\ No newline at end of file
diff --git a/__init__.py b/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/project/SQL_cache.py b/project/SQL_cache.py
new file mode 100644
index 0000000..d46ba65
--- /dev/null
+++ b/project/SQL_cache.py
@@ -0,0 +1,92 @@
+import sqlite3
+from os.path import exists
+import sys
+from .log_helper import stdout_write, write_progressbar
+
+
+class Database():
+ """Class working with SQLite3 database"""
+
+ def __init__(self):
+ super(Database, self).__init__()
+ if not exists("cache.db"):
+ conn = sqlite3.connect("cache.db")
+ cursor = conn.cursor()
+ cursor.execute("""
+ CREATE TABLE `feed` (`source` text unique, `name` text)
+ """)
+ cursor.execute("""
+ CREATE TABLE "news" ( `source` text, `date` text,
+ `title` text, `link` text UNIQUE,
+ `description` text, `links` text )
+ """)
+ conn.commit()
+ conn.close()
+ self.conn = None
+ self.cursor = None
+
+ def _open(self):
+ self.conn = sqlite3.connect("cache.db")
+ self.cursor = self.conn.cursor()
+
+ def _close(self):
+ self.conn.close()
+
+ def write_data(self, data, feed, url, verbose, color):
+ """Write news to database
+ Params:
+ data: turple - article data
+ feed: str - rss_channel feed
+ url: str
+ verbose: bool
+ """
+ try:
+ self._open()
+ counter = 0
+ if verbose:
+ write_progressbar(len(data)+1, counter)
+ for news in data:
+ self.cursor.execute("""
+ INSERT INTO news
+ VALUES (?,?,?,?,?,?)
+ """, news)
+ counter += 1
+ if verbose:
+ write_progressbar(len(data)+1, counter)
+ self.conn.commit()
+ self.cursor.execute("""
+ INSERT INTO feed
+ VALUES (?,?)
+ """, (url, feed))
+ self.conn.commit()
+ except sqlite3.IntegrityError:
+ pass
+ except sqlite3.DatabaseError:
+ stdout_write("Database error", color="red", colorize=color)
+ finally:
+ self._close()
+ counter = len(data)+1
+ if verbose:
+ write_progressbar(len(data)+1, counter)
+
+ def read_data(self, url, date, color):
+ """Get url & date
+ Return feed & data
+ """
+ feed, data = None, None
+ try:
+ self._open()
+ self.cursor.execute(f"""
+ SELECT name from feed WHERE source = '{url}'
+ """)
+ feed = self.cursor.fetchall()
+ self.cursor.execute(f"""
+ SELECT * from news WHERE source = '{url}' and date = '{date}'
+ """)
+ data = self.cursor.fetchall()
+ except Exception as e:
+ stdout_write(f"Database reading error {e}", color="red", colorize=color)
+ sys.exit()
+ finally:
+ self._close()
+ return feed, data
diff --git a/project/__init__.py b/project/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/project/converter.py b/project/converter.py
new file mode 100644
index 0000000..10e9292
--- /dev/null
+++ b/project/converter.py
@@ -0,0 +1,217 @@
+from .log_helper import stdout_write, write_progressbar
+from random import randint
+from time import time
+from base64 import b64encode
+import os
+import urllib.request
+import urllib.error
+
+
+def _download_image(url, verbose, sv_path, color=False):
+ """download image from Internet to your PC"""
+ stdout_write("Downloading image", verbose=verbose, color="blue", colorize=color)
+ try:
+ local_name, headers = urllib.request.urlretrieve(
+ url, sv_path + '/' + url.split('/')[-1])
+ stdout_write(f'Image "{url}" was downloaded.', verbose=verbose, color="green", colorize=color)
+ return local_name
+ except (urllib.error.URLError, urllib.error.HTTPError):
+ stdout_write("Error occurred during downloading image", color="red", colorize=color)
+ return ""
+ except ValueError:
+ stdout_write("Error: image not found", color="red", colorize=color)
+ return ""
+
+
+class Converter():
+ """Converter class. Convert data to some format"""
+
+ def to_json(self, feed, column, verbose, color):
+ """Take data and return it in json"""
+ stdout_write("Convert to json...", verbose=verbose, color="blue", colorize=color)
+ counter = 0
+ if verbose:
+ write_progressbar(len(column), counter)
+ json_text = '{\n "title": "' + feed + '",\n "news": ['
+ separ = False
+ for news in column:
+ if separ:
+ json_text += ','
+ separ = True
+ json_text += '{\n "title": "' + news['title'] + '",'
+ if 'date' in news:
+ json_text += '\n "date": "' + news['date'] + '",'
+ json_text += '\n "link": "' + news['link'] + '",'
+ json_text += '\n "description": "' + (news['text']) + '",'
+ json_text += '\n "links": ['
+ links = ""
+ for lin in news['links']:
+ links += f'\n "{lin}",'
+ if len(links) != 0:
+ json_text += links[:-1] + "\n ]"
+ else:
+ json_text += ']'
+ json_text += "\n }"
+ counter += 1
+ if verbose:
+ write_progressbar(len(column), counter)
+ json_text += ']\n}'
+ return json_text
+
+ def to_fb2(self, feed, column, url, sv_path=os.getcwd(), verbose=False, color=False):
+ """Function convert data to fb2 and save as file
+ Params:
+ feed - rss_channel feed
+ column - data from rss_channel
+ sv_path - path for html doc
+ url - link to source
+ """
+ def next_article(id, title, images, description, feed, date="Unknown"):
+ """return code for single article and
+ binary files for used images
+ """
+ stdout_write("Converting an article...", verbose=verbose, color="blue", colorize=color)
+ binary = []
+ for img in images:
+ binary += [f'{img}']
+ return f"""
+
+ {title}
+
+ {' '.join([f'' for img in images])}
+ {date}
+ {description}
+ Source: {feed}
+
+""", binary
+
+ stdout_write("Creating FB2 file", verbose=verbose, color="blue", colorize=color)
+ fb2_begin = '\n' + \
+ ''
+ fb2_end = ''
+ fb2_desc = f"""
+
+
+ sci_business/genre>
+
+ {url}
+
+ {feed}
+ en
+
+
+
+ {url}
+
+ 11.11.2011
+ 3.14
+ {hash(time()+randint(10000000, 1000000000000))}
+
+
+
+"""
+ binary = []
+ fb2_text = fb2_begin + fb2_desc
+
+ stdout_write("Convert news", verbose=verbose, color="blue", colorize=color)
+ for news in column:
+ image_links = []
+ text_links = []
+ for link in news["links"]:
+ if "(image)" in link:
+ image_links += [link[:-8]]
+ else:
+ text_links += [link[:-7]]
+ images = []
+ for link in image_links:
+ img_path = _download_image(link, verbose, sv_path, color)
+ try:
+ with open(img_path, 'rb') as binfile:
+ images += [b64encode(binfile.read()).decode()]
+ except FileNotFoundError:
+ pass
+ article, temp_bin = next_article(id=hash(hash(news["title"]) + randint(1, 10000)),
+ title=news["title"],
+ images=images,
+ date=news["date"],
+ description=news["text"] +
+ 'links' + "\n".join(text_links),
+ feed=feed
+ )
+ fb2_text += article
+ binary += temp_bin
+ stdout_write("Text data converted", verbose=verbose, color="green", colorize=color)
+ binary = set(binary)
+ fb2_text += " "
+ for img in binary:
+ fb2_text += '\n'+img+'\n'
+ fb2_text += fb2_end
+ stdout_write("Add binary part", verbose=verbose, color="green", colorize=color)
+
+ file_path = f"{sv_path}/{hash(time())}-{randint(0, 100)}.fb2"
+ open(file_path, 'a').close()
+ with open(file_path, "w") as file:
+ file.write(fb2_text)
+ stdout_write("FB2 document created", verbose=verbose, color="green", colorize=color)
+
+ def to_html(self, feed, column, sv_path=os.getcwd(), verbose=False, color=False):
+ """Function convert data to html and save as file
+ Params:
+ feed - rss_channel feed
+ column - data from rss_channel
+ sv_path - path for html doc
+ """
+
+ def next_article(title, images, description, feed, links, date="Unknown"):
+ """create html-code for single article"""
+ return f"""
+
+
{title}
+ {' '.join(f'

' for img in images)}
+
{description}
+ {' '.join(f'
link ' for link in links)}
+
Date: {date}
+
+ """
+
+ def create_html(feed, main_part):
+ return f"""
+
+
+
+ {feed}
+
+
+{main_part}
+
+
+"""
+
+ html_text = ""
+ stdout_write("Creating HTML version", verbose=verbose, color="blue", colorize=color)
+ for news in column:
+ image_links = []
+ text_links = []
+ for link in news["links"]:
+ if "(image)" in link:
+ image_links += [link[:-8]]
+ else:
+ text_links += [link[:-7]]
+ images = []
+ for link in image_links:
+ img_path = _download_image(link, verbose, sv_path, color)
+ images += [img_path]
+ html_text += next_article(links=text_links,
+ title=news["title"],
+ images=images,
+ date=news["date"],
+ description=news["text"],
+ feed=feed
+ )
+ html_text = create_html(feed, html_text)
+ file_path = f"{sv_path}/{hash(time())}-{randint(0, 100)}.html"
+ open(file_path, 'a').close()
+ with open(file_path, "w") as file:
+ file.write(html_text)
+ stdout_write("Finish HTML document", verbose=verbose, color="green", colorize=color)
diff --git a/project/html_parser.py b/project/html_parser.py
new file mode 100644
index 0000000..fcfd62e
--- /dev/null
+++ b/project/html_parser.py
@@ -0,0 +1,37 @@
+from html.parser import HTMLParser
+
+
+class _HTMLTagsParser(HTMLParser):
+ """Class using for parsing html-formatted text"""
+
+ def __init__(self):
+ super().__init__()
+ self.links = []
+ self.text = ""
+
+ def handle_starttag(self, tag, attrs):
+ """Convert and
tags to text form"""
+ if tag == "img":
+ num = len(self.links)+1
+ self.text += "[Image"
+ for attr in attrs:
+ if attr[0] == "alt" and attr[1] != "":
+ self.text += f": {attr[1]}"
+ elif attr[0] == "src":
+ self.links += [attr[1] + " (image)"]
+ self.text += f"][{num}]"
+ elif tag == "a":
+ for attr in attrs:
+ if attr[0] == "href":
+ self.links += [attr[1] + " (text)"]
+
+ def handle_data(self, data):
+ """Take text from HTML"""
+ self.text += data
+
+
+def parse_HTML(text):
+ """Return text without tags or links and a list with links"""
+ parser = _HTMLTagsParser()
+ parser.feed(text)
+ return parser.text, parser.links
diff --git a/project/log_helper.py b/project/log_helper.py
new file mode 100644
index 0000000..79d6249
--- /dev/null
+++ b/project/log_helper.py
@@ -0,0 +1,37 @@
+def stdout_write(string, sep=' ', end='\n', flush=False, verbose=True, color="", colorize=False):
+ """Output function for singe string but convert ' to '"""
+ if colorize:
+ RED = '\033[31m'
+ BLUE = '\033[34m'
+ GREEN = '\033[92m'
+ RESET = '\033[0m'
+ else:
+ RED, BLUE, GREEN, RESET = "", "", "", ""
+
+ if color == "red":
+ color = RED
+ elif color == "blue":
+ color = BLUE
+ elif color == "green":
+ color = GREEN
+ else:
+ color, RESET = "", ""
+
+ if verbose:
+ string = string.replace("'", "'")
+ print(color+string+RESET, sep=sep, end=end, flush=flush)
+
+
+def write_progressbar(elems, done, length=20):
+ """Take arguments
+ elems: count of elements
+ done: progress (in elements)
+ length: progress bar length
+ Write progress bar to stdout
+ """
+ if done != 0:
+ print("\r", end="")
+ col = int(length * (done/elems))
+ print(f"[{'='*col + ' '*(length-col)}] {int(100*done/elems)}%", end="")
+ if elems == done:
+ print()
diff --git a/project/reader.py b/project/reader.py
new file mode 100644
index 0000000..4ec4ed1
--- /dev/null
+++ b/project/reader.py
@@ -0,0 +1,154 @@
+import sys
+import urllib.request
+import urllib.error
+from xml.dom.minidom import parseString
+import dateutil.parser
+from .html_parser import parse_HTML
+from .converter import Converter
+from .log_helper import stdout_write, write_progressbar
+from .SQL_cache import Database
+
+
+class RSSReader():
+ """RSSReader: Class for reading rss channels.
+ Methods:
+ show_news() - print news to stdout
+ show_json() - print news to stdout in json format
+ save_fb2() - save news as fb2 file
+ save-html() - save news as html file
+ """
+
+ def __init__(self, source, limit, verbose, date, sv_path, colorize):
+ super(RSSReader, self).__init__()
+ self.__source = source
+ self.__limit = limit
+ self.__verbose = verbose
+ self.__date = date
+ self.__sv_path = sv_path
+ self.__colorize = colorize
+ self.__text = ""
+
+ def __find_news(self):
+ """Ask database for news from entered date
+ Return data in the same format with __parse function
+ """
+ stdout_write("Reading data from database...", verbose=self.__verbose, color="blue", colorize=self.__colorize)
+ feed, data = Database().read_data(self.__source, self.__date, self.__colorize)
+ column = []
+ if not data:
+ stdout_write("Error: Articles from the entered date not found", color="red", colorize=self.__colorize)
+ sys.exit()
+ counter = 0
+ if self.__verbose:
+ write_progressbar(len(data), counter)
+ for news in data:
+ column += [{"title": news[2],
+ "link": news[3],
+ "text": news[4],
+ "links": news[5].split('\n')}]
+ counter += 1
+ if self.__verbose:
+ write_progressbar(len(data), counter)
+ return feed[0][0], column
+
+ def __cache_data(self, column, feed):
+ """Take parsed data and write it to database"""
+ stdout_write("Writing data to database...", verbose=self.__verbose, color="blue", colorize=self.__colorize)
+ date = lambda pubDate: dateutil.parser.parse(pubDate).strftime("%Y%m%d")
+ formated_data = [
+ (self.__source, date(col["date"]), col["title"],
+ col["link"], col["text"], "\n".join(col["links"])) for col in column]
+ Database().write_data(formated_data, feed, self.__source, self.__verbose, self.__colorize)
+
+ def __read_news(self):
+ """Read data from link"""
+ try:
+ stdout_write(f"Reading information from {self.__source}", end='...\n',
+ verbose=self.__verbose, color="blue", colorize=self.__colorize)
+ with urllib.request.urlopen(self.__source) as rss:
+ bytestr = rss.read()
+ self.__text = bytestr.decode("utf8")
+ stdout_write("Complete.", verbose=self.__verbose, color="green", colorize=self.__colorize)
+ except ValueError:
+ stdout_write("Error: Can't connect, please try with https://", color="red", colorize=self.__colorize)
+ sys.exit()
+ except urllib.error.URLError:
+ stdout_write("Error: Can't connect to web-site, please check URL", color="red", colorize=self.__colorize)
+ sys.exit()
+ except Exception:
+ stdout_write("Unknown error", color="red", colorize=self.__colorize)
+ sys.exit()
+
+ def __parse(self):
+ """Parse XML data to python structures"""
+ stdout_write("Parsing information...", verbose=self.__verbose, color="blue", colorize=self.__colorize)
+ xml = parseString(self.__text)
+ feed = xml.getElementsByTagName("title")[0].firstChild.data
+ items = xml.getElementsByTagName("item")
+ counter = 0
+ if self.__verbose:
+ write_progressbar(self.__limit, counter)
+ column = []
+ for item in items:
+ if counter == self.__limit:
+ break
+ counter += 1
+ a = item.getElementsByTagName("description")[0].firstChild.data
+ text, links = parse_HTML(a)
+ column += [{"title": item.getElementsByTagName("title")[0].firstChild.data,
+ "date": item.getElementsByTagName("pubDate")[0].firstChild.data,
+ "link": item.getElementsByTagName("link")[0].firstChild.data,
+ "text": text,
+ "links": links}]
+ if self.__verbose:
+ write_progressbar(self.__limit, counter)
+ self.__cache_data(column, feed)
+ return feed, column
+
+ def __read(self):
+ """Information source selection"""
+ if not self.__date:
+ self.__read_news()
+ return self.__parse()
+ return self.__find_news()
+
+ def show_news(self):
+ """Read data and print info in stdout"""
+ feed, column = self.__read()
+ stdout_write(f"{feed}", end="\n\n")
+ for news in column:
+ stdout_write(f"Title: {news['title']}")
+ if 'date' in news:
+ stdout_write(f"Date: {news['date']}")
+ stdout_write(f"Link: {news['link']}", end="\n\n")
+ stdout_write(news['text'], end="\n\n")
+ if len(news['links']) != 0:
+ stdout_write("Links:")
+ link_num = 1
+ for link in news['links']:
+ stdout_write(f"[{link_num}]: {link}", color="blue", colorize=self.__colorize)
+ link_num += 1
+ stdout_write("\n\n")
+
+ def show_json(self):
+ """Read data, convert into json and print info in stdout"""
+ feed, column = self.__read()
+ json_text = Converter().to_json(feed, column, self.__verbose, color=self.__colorize)
+ stdout_write(json_text)
+
+ def save_fb2(self):
+ """Read data, convert to fb2 & save it as file"""
+ feed, column = self.__read()
+ if self.__sv_path:
+ Converter().to_fb2(feed, column, self.__source, self.__sv_path,
+ verbose=self.__verbose, color=self.__colorize)
+ else:
+ Converter().to_fb2(feed, column, self.__source, verbose=self.__verbose, color=self.__colorize)
+
+ def save_html(self):
+ """Read data, convert to fb2 & save it into files"""
+ feed, column = self.__read()
+ if self.__sv_path:
+ Converter().to_html(feed, column, self.__sv_path, verbose=self.__verbose, color=self.__colorize)
+ else:
+ Converter().to_html(feed, column, verbose=self.__verbose, color=self.__colorize)
diff --git a/project/rss_reader.py b/project/rss_reader.py
new file mode 100755
index 0000000..259537f
--- /dev/null
+++ b/project/rss_reader.py
@@ -0,0 +1,51 @@
+#!/usr/bin/env python3
+import argparse
+from .reader import RSSReader
+
+
+def parse_arguments():
+ parser = argparse.ArgumentParser()
+ parser.add_argument("--version", action='version', help="Print version info", version="Version 0.5")
+ parser.add_argument("source", type=str, help="RSS URL")
+ parser.add_argument("--json", action="store_true", help="Print result as JSON in stdout")
+ parser.add_argument("--verbose", action="store_true", help="Outputs verbose status messages")
+ parser.add_argument("--to_fb2", action="store_true", help="Save as fb2 file")
+ parser.add_argument("--to_html", action="store_true", help="Save as html file")
+ parser.add_argument("--colorize",action="store_true", help="Add colors to console output")
+ parser.add_argument("--path", type=str, help="Save news to file at entered path.")
+ parser.add_argument("--limit", type=int, help="Limit news topics if this parameter provided")
+ parser.add_argument("--date", type=int, help="Start work with cached data. Format YYYYMMDD")
+ return parser.parse_args()
+
+
+def main():
+ args = parse_arguments()
+ if ' ' in args.path:
+ args.path = None
+ if args.colorize:
+ print('\033[31m' + 'Error: path cannot contain spaces.' + '\033[0m')
+ else:
+ print('Error: path cannot contain spaces.')
+ working_dir = input("Input Y to use working directory")
+ if 'y' in working_dir or 'Y' in working_dir:
+ pass
+ else:
+ return 0
+ rss = RSSReader(args.source, args.limit, args.verbose, args.date, args.path, args.colorize)
+ used = False
+ if args.json:
+ rss.show_json()
+ used = True
+ if args.to_fb2:
+ rss.save_fb2()
+ used = True
+ if args.to_html:
+ rss.save_html()
+ used = True
+ if not used:
+ rss.show_news()
+ return 0
+
+
+if __name__ == "__main__":
+ main()
diff --git a/setup.py b/setup.py
new file mode 100644
index 0000000..3ddf13a
--- /dev/null
+++ b/setup.py
@@ -0,0 +1,23 @@
+from setuptools import setup, find_packages
+
+setup(
+ name='RSSReader_Kiryl',
+ version='0.5',
+ url='https://github.com/KirylDv/PythonHomework/tree/FinalTask',
+ packages=find_packages(),
+ python_requires='>=3.6',
+ py_modules=['project.rss_reader', 'project.reader',
+ 'project.html_parser', 'project.converter',
+ 'project.SQL_cache', 'project.log_helper'],
+ install_requires=['python-dateutil'],
+ # To provide executable scripts, use entry points in preference to the
+ # "scripts" keyword. Entry points provide cross-platform support and allow
+ # `pip` to create the appropriate form of executable for the target
+ # platform.
+ #
+ # For example, the following would provide a command called `sample` which
+ # executes the function `main` from this package when invoked:
+ entry_points={ # Optional
+ 'console_scripts': ['rss_reader=project.rss_reader:main'],
+ },
+)