From f265ddbf67666e321d1c960eab02dee6bec12fdf Mon Sep 17 00:00:00 2001 From: Kiryl Dvorakovsky <39598590+KirylDv@users.noreply.github.com> Date: Sun, 10 Nov 2019 19:39:37 +0300 Subject: [PATCH 01/35] Create rss_reader.py --- rss_reader.py | 5 +++++ 1 file changed, 5 insertions(+) create mode 100644 rss_reader.py diff --git a/rss_reader.py b/rss_reader.py new file mode 100644 index 0000000..3339ef2 --- /dev/null +++ b/rss_reader.py @@ -0,0 +1,5 @@ +def main(): + pass + +if __name__ == "__main__": + main() From df719207f3d4bf516562a52033d0a338ab54e9ad Mon Sep 17 00:00:00 2001 From: Kiryl Dvorakovsky <39598590+KirylDv@users.noreply.github.com> Date: Sun, 10 Nov 2019 19:41:40 +0300 Subject: [PATCH 02/35] Delete rss_reader.py --- rss_reader.py | 5 ----- 1 file changed, 5 deletions(-) delete mode 100644 rss_reader.py diff --git a/rss_reader.py b/rss_reader.py deleted file mode 100644 index 3339ef2..0000000 --- a/rss_reader.py +++ /dev/null @@ -1,5 +0,0 @@ -def main(): - pass - -if __name__ == "__main__": - main() From bb229b9414ced30e9c424fb4005f056f1f6656ee Mon Sep 17 00:00:00 2001 From: Kiryl Dvorakovsky <39598590+KirylDv@users.noreply.github.com> Date: Sun, 10 Nov 2019 19:42:30 +0300 Subject: [PATCH 03/35] Create rss_reader.py --- rss_reader.py | 5 +++++ 1 file changed, 5 insertions(+) create mode 100644 rss_reader.py diff --git a/rss_reader.py b/rss_reader.py new file mode 100644 index 0000000..3339ef2 --- /dev/null +++ b/rss_reader.py @@ -0,0 +1,5 @@ +def main(): + pass + +if __name__ == "__main__": + main() From aec5dd76acf17224eb99ebe787a45da0bcc416c9 Mon Sep 17 00:00:00 2001 From: KirylDv Date: Tue, 12 Nov 2019 19:38:15 +0300 Subject: [PATCH 04/35] Add: arguments for parser --- rss_reader.py | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/rss_reader.py b/rss_reader.py index 3339ef2..079e428 100644 --- a/rss_reader.py +++ b/rss_reader.py @@ -1,5 +1,14 @@ +import argparse + def main(): - pass + parser = argparse.ArgumentParser() + parser.add_argument("source", type=str, help="RSS URL") + parser.add_argument("--version", action="store_true", help="Print version info") + parser.add_argument("--json", action="store_true", help="Print result as JSON in stdout") + parser.add_argument("--verbose", action="store_true", help="Outputs verbose status messages") + parser.add_argument("--limit", action="store_true", help="Limit news topics if this parameter provided") + args = parser.parse_args() + print(args) if __name__ == "__main__": main() From 7d412eff1f097d22ebedcaf6a6125f82a61cfc3d Mon Sep 17 00:00:00 2001 From: KirylDv Date: Fri, 15 Nov 2019 16:33:57 +0300 Subject: [PATCH 05/35] Create file reader.py --- reader.py | 28 ++++++++++++++++++++++++++++ rss_reader.py | 11 +++++++++-- 2 files changed, 37 insertions(+), 2 deletions(-) create mode 100644 reader.py diff --git a/reader.py b/reader.py new file mode 100644 index 0000000..1ea8a77 --- /dev/null +++ b/reader.py @@ -0,0 +1,28 @@ +import urllib.request + + +class RSSReader(): + """docstring for RSSReader""" + + def __init__(self, source, limit=None): + super(RSSReader, self).__init__() + self.source = source + self.limit = limit + self.text = "" + + def read_news(self): + try: + with passurllib.request.urlopen(source) as rss: + bytestr = rss.read() + self.text = bytestr.decode("utf8") + except Exception as e: + if type(e) is AttributeError: + print("Error: URL not found") + if type(e) is ValueError: + print("Error: Invalid URL") + + def parse(self): + pass + + def show_news(self): + self.read_news() diff --git a/rss_reader.py b/rss_reader.py index 079e428..8ad456d 100644 --- a/rss_reader.py +++ b/rss_reader.py @@ -1,14 +1,21 @@ import argparse +import reader -def main(): + +def parse_arguments(): parser = argparse.ArgumentParser() parser.add_argument("source", type=str, help="RSS URL") parser.add_argument("--version", action="store_true", help="Print version info") parser.add_argument("--json", action="store_true", help="Print result as JSON in stdout") parser.add_argument("--verbose", action="store_true", help="Outputs verbose status messages") parser.add_argument("--limit", action="store_true", help="Limit news topics if this parameter provided") - args = parser.parse_args() + return parser.parse_args() + + +def main(): + args = parse_arguments() print(args) + if __name__ == "__main__": main() From 416704e569d4e3403cb7224fcbfee81b95f384d4 Mon Sep 17 00:00:00 2001 From: KirylDv Date: Fri, 15 Nov 2019 20:56:51 +0300 Subject: [PATCH 06/35] Add: parser --- html_parser.py | 29 +++++++++++++++++++++++++++++ reader.py | 49 +++++++++++++++++++++++++++++++++++++------------ rss_reader.py | 9 ++++++--- 3 files changed, 72 insertions(+), 15 deletions(-) create mode 100644 html_parser.py diff --git a/html_parser.py b/html_parser.py new file mode 100644 index 0000000..ea4d8ab --- /dev/null +++ b/html_parser.py @@ -0,0 +1,29 @@ +from html.parser import HTMLParser + +class _html_parser(HTMLParser): + def __init__(self): + super().__init__() + self.links = [] + self.text = "" + + def handle_starttag(self, tag, attrs): + if tag == "img": + num = len(self.links)+1 + self.text += f"[Image {num}: " + for attr in attrs: + if attr[0] == "alt": + self.text += attr[1] + f"][{num}]" + elif attr[0] == "src": + self.links += [attr[1]] + elif tag == "a": + for attr in attrs: + if attr[0] == "href": + self.links += [attr[1]] + + def handle_data(self, data): + self.text += data + +def parse_HTML(text): + parser = _html_parser(); + parser.feed(text) + return parser.text, parser.links \ No newline at end of file diff --git a/reader.py b/reader.py index 1ea8a77..b10a64a 100644 --- a/reader.py +++ b/reader.py @@ -1,28 +1,53 @@ import urllib.request +from xml.dom.minidom import parseString +from xml.dom.minidom import parse as parseFile +from html_parser import parse_HTML class RSSReader(): - """docstring for RSSReader""" + """RSSReader: class for reading rss channels""" - def __init__(self, source, limit=None): + def __init__(self, args): super(RSSReader, self).__init__() - self.source = source - self.limit = limit + self.source = args.source + self.limit = args.limit + self.json = args.json + self.verbose = args.verbose self.text = "" - def read_news(self): + def __read_news(self): try: - with passurllib.request.urlopen(source) as rss: + with urllib.request.urlopen(self.source) as rss: bytestr = rss.read() self.text = bytestr.decode("utf8") except Exception as e: - if type(e) is AttributeError: - print("Error: URL not found") - if type(e) is ValueError: + if type(e) == ValueError: print("Error: Invalid URL") + else: + print("Unknown error") - def parse(self): - pass + def __parse(self): + xml = parseString(self.text) + feed = xml.getElementsByTagName("title")[0].firstChild.data + items = xml.getElementsByTagName("item") + counter = 0 + column = [] + for item in items: + if counter == self.limit: + break + counter += 1 + a = item.getElementsByTagName("description")[0].firstChild.data + text, links = parse_HTML(a) + column += [item.getElementsByTagName("title")[0].firstChild.data, + item.getElementsByTagName("pubDate")[0].firstChild.data, + item.getElementsByTagName("link")[0].firstChild.data, + text, + links] + return feed, column def show_news(self): - self.read_news() + self.__read_news() + feed, column = self.__parse() + feed.replace("'","'") + print(f"Feed: {feed}") + diff --git a/rss_reader.py b/rss_reader.py index 8ad456d..110511e 100644 --- a/rss_reader.py +++ b/rss_reader.py @@ -4,17 +4,20 @@ def parse_arguments(): parser = argparse.ArgumentParser() + parser.add_argument("--version", action='version', help="Print version info", version="Version 0.01") parser.add_argument("source", type=str, help="RSS URL") - parser.add_argument("--version", action="store_true", help="Print version info") parser.add_argument("--json", action="store_true", help="Print result as JSON in stdout") parser.add_argument("--verbose", action="store_true", help="Outputs verbose status messages") - parser.add_argument("--limit", action="store_true", help="Limit news topics if this parameter provided") + parser.add_argument("--limit", type=int, help="Limit news topics if this parameter provided") return parser.parse_args() def main(): args = parse_arguments() - print(args) + rss = reader.RSSReader(args) + rss.show_news() + #print(args.json) + if __name__ == "__main__": From e46c7cc8265f9dfc12d59c2f87288825880cac69 Mon Sep 17 00:00:00 2001 From: KirylDv Date: Fri, 15 Nov 2019 22:30:58 +0300 Subject: [PATCH 07/35] Add: console output for news --- html_parser.py | 13 ++++++---- reader.py | 65 +++++++++++++++++++++++++++++++++----------------- rss_reader.py | 2 -- 3 files changed, 52 insertions(+), 28 deletions(-) diff --git a/html_parser.py b/html_parser.py index ea4d8ab..2e6c2ff 100644 --- a/html_parser.py +++ b/html_parser.py @@ -1,6 +1,9 @@ from html.parser import HTMLParser + class _html_parser(HTMLParser): + """Class using for parsing html-formatted text""" + def __init__(self): super().__init__() self.links = [] @@ -14,16 +17,18 @@ def handle_starttag(self, tag, attrs): if attr[0] == "alt": self.text += attr[1] + f"][{num}]" elif attr[0] == "src": - self.links += [attr[1]] + self.links += [attr[1] + " (image)"] elif tag == "a": for attr in attrs: if attr[0] == "href": - self.links += [attr[1]] + self.links += [attr[1] + " (text)"] def handle_data(self, data): self.text += data + def parse_HTML(text): - parser = _html_parser(); + """Return text without tags or links and a list with links""" + parser = _html_parser() parser.feed(text) - return parser.text, parser.links \ No newline at end of file + return parser.text, parser.links diff --git a/reader.py b/reader.py index b10a64a..4aa9d88 100644 --- a/reader.py +++ b/reader.py @@ -1,53 +1,74 @@ +import sys import urllib.request +import urllib.error from xml.dom.minidom import parseString -from xml.dom.minidom import parse as parseFile from html_parser import parse_HTML +def output(string, sep=' ', end='\n', flush=False): + """Output function for singe string but convert ' to '""" + string = string.replace("'", "'") + print(string, sep=sep, end=end, flush=flush) + + class RSSReader(): - """RSSReader: class for reading rss channels""" + """RSSReader: Class for reading rss channels. + + """ def __init__(self, args): super(RSSReader, self).__init__() - self.source = args.source - self.limit = args.limit - self.json = args.json - self.verbose = args.verbose - self.text = "" + self.__source = args.source + self.__limit = args.limit + self.__json = args.json + self.__verbose = args.verbose + self.__text = "" def __read_news(self): + """ """ try: - with urllib.request.urlopen(self.source) as rss: + with urllib.request.urlopen(self.__source) as rss: bytestr = rss.read() - self.text = bytestr.decode("utf8") + self.__text = bytestr.decode("utf8") except Exception as e: - if type(e) == ValueError: - print("Error: Invalid URL") + if type(e) is ValueError: + output("Error: Can't connect, please try with https://") + elif type(e) is urllib.error.URLError: + output("Error: Can't connect to web-site, please check URL") else: - print("Unknown error") + output("Unknown error") + sys.exit() + def __parse(self): - xml = parseString(self.text) + xml = parseString(self.__text) feed = xml.getElementsByTagName("title")[0].firstChild.data items = xml.getElementsByTagName("item") counter = 0 column = [] for item in items: - if counter == self.limit: + if counter == self.__limit: break counter += 1 a = item.getElementsByTagName("description")[0].firstChild.data text, links = parse_HTML(a) - column += [item.getElementsByTagName("title")[0].firstChild.data, - item.getElementsByTagName("pubDate")[0].firstChild.data, - item.getElementsByTagName("link")[0].firstChild.data, - text, - links] + column += [[item.getElementsByTagName("title")[0].firstChild.data, + item.getElementsByTagName("pubDate")[0].firstChild.data, + item.getElementsByTagName("link")[0].firstChild.data, + text, + links]] return feed, column def show_news(self): self.__read_news() feed, column = self.__parse() - feed.replace("'","'") - print(f"Feed: {feed}") - + output(f"Feed: {feed}", end="\n\n") + for news in column: + output(f"Title: {news[0]}") + output(f"Date: {news[1]}") + output(f"Link: {news[2]}", end="\n\n") + output(news[3], end="\n\n") + output("Links:") + for i in range(len(news[4])): + output(f"[{i+1}]: {news[4][i]}") + output("\n\n") diff --git a/rss_reader.py b/rss_reader.py index 110511e..259a2d0 100644 --- a/rss_reader.py +++ b/rss_reader.py @@ -16,8 +16,6 @@ def main(): args = parse_arguments() rss = reader.RSSReader(args) rss.show_news() - #print(args.json) - if __name__ == "__main__": From 48baada5b4c813df8d85b24ea6825dee6dcd7ad0 Mon Sep 17 00:00:00 2001 From: KirylDv Date: Sat, 16 Nov 2019 20:01:51 +0300 Subject: [PATCH 08/35] Add: --verbose --- reader.py | 25 +++++++++++++++++-------- 1 file changed, 17 insertions(+), 8 deletions(-) diff --git a/reader.py b/reader.py index 4aa9d88..126dc86 100644 --- a/reader.py +++ b/reader.py @@ -5,16 +5,17 @@ from html_parser import parse_HTML -def output(string, sep=' ', end='\n', flush=False): +def output(string, sep=' ', end='\n', flush=False, verbose=True): """Output function for singe string but convert ' to '""" - string = string.replace("'", "'") - print(string, sep=sep, end=end, flush=flush) + if verbose: + string = string.replace("'", "'") + print(string, sep=sep, end=end, flush=flush) class RSSReader(): """RSSReader: Class for reading rss channels. - - + Methods: + show_news() - output news to stdout """ def __init__(self, args): super(RSSReader, self).__init__() @@ -25,11 +26,13 @@ def __init__(self, args): self.__text = "" def __read_news(self): - """ """ + """Read data from link""" try: + output(f"Reading information from {self.__source}", end='...\n', verbose=self.__verbose) with urllib.request.urlopen(self.__source) as rss: bytestr = rss.read() self.__text = bytestr.decode("utf8") + output("Complete.", verbose=self.__verbose) except Exception as e: if type(e) is ValueError: output("Error: Can't connect, please try with https://") @@ -41,6 +44,8 @@ def __read_news(self): def __parse(self): + """Parse XML data to python structures""" + output("Parsing information...", verbose=self.__verbose) xml = parseString(self.__text) feed = xml.getElementsByTagName("title")[0].firstChild.data items = xml.getElementsByTagName("item") @@ -57,18 +62,22 @@ def __parse(self): item.getElementsByTagName("link")[0].firstChild.data, text, links]] + output(f"{counter} article parsed.", verbose=self.__verbose) + output("Complete.", verbose=self.__verbose) return feed, column def show_news(self): + """Read, parse and print info in stdout""" self.__read_news() feed, column = self.__parse() - output(f"Feed: {feed}", end="\n\n") + output(f"{feed}", end="\n\n") for news in column: output(f"Title: {news[0]}") output(f"Date: {news[1]}") output(f"Link: {news[2]}", end="\n\n") output(news[3], end="\n\n") - output("Links:") + if len(news[4]) != 0: + output("Links:") for i in range(len(news[4])): output(f"[{i+1}]: {news[4][i]}") output("\n\n") From 5a75dfbacac2fcfbb217a11d7e1af14c5f9fa0e7 Mon Sep 17 00:00:00 2001 From: KirylDv Date: Sat, 16 Nov 2019 20:46:04 +0300 Subject: [PATCH 09/35] Add: call --json function --- rss_reader.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/rss_reader.py b/rss_reader.py index 259a2d0..d4b366f 100644 --- a/rss_reader.py +++ b/rss_reader.py @@ -15,7 +15,10 @@ def parse_arguments(): def main(): args = parse_arguments() rss = reader.RSSReader(args) - rss.show_news() + if args.json: + rss.show_json() + else: + rss.show_news() if __name__ == "__main__": From 4617b7dd74d92f9e984f0b1c75a97cbc33191361 Mon Sep 17 00:00:00 2001 From: KirylDv Date: Sat, 16 Nov 2019 21:18:57 +0300 Subject: [PATCH 10/35] Add: --json --- reader.py | 54 ++++++++++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 50 insertions(+), 4 deletions(-) diff --git a/reader.py b/reader.py index 126dc86..22b6188 100644 --- a/reader.py +++ b/reader.py @@ -10,13 +10,29 @@ def output(string, sep=' ', end='\n', flush=False, verbose=True): if verbose: string = string.replace("'", "'") print(string, sep=sep, end=end, flush=flush) - + + +def progress(elems, done, length=20): + """Take arguments + elems: count of elements + done: progress (in elements) + length: progress bar length + Write progress bar to stdout + """ + if done != 0: + print("\r", end="") + col = int(length * (done/elems)) + print(f"[{'='*col + ' '*(length-col)}] {int(100*done/elems)}%", end="") + if elems == done: + print() + class RSSReader(): """RSSReader: Class for reading rss channels. Methods: show_news() - output news to stdout """ + def __init__(self, args): super(RSSReader, self).__init__() self.__source = args.source @@ -42,7 +58,6 @@ def __read_news(self): output("Unknown error") sys.exit() - def __parse(self): """Parse XML data to python structures""" output("Parsing information...", verbose=self.__verbose) @@ -50,6 +65,8 @@ def __parse(self): feed = xml.getElementsByTagName("title")[0].firstChild.data items = xml.getElementsByTagName("item") counter = 0 + if self.__verbose: + progress(self.__limit, counter) column = [] for item in items: if counter == self.__limit: @@ -62,8 +79,8 @@ def __parse(self): item.getElementsByTagName("link")[0].firstChild.data, text, links]] - output(f"{counter} article parsed.", verbose=self.__verbose) - output("Complete.", verbose=self.__verbose) + if self.__verbose: + progress(self.__limit, counter) return feed, column def show_news(self): @@ -81,3 +98,32 @@ def show_news(self): for i in range(len(news[4])): output(f"[{i+1}]: {news[4][i]}") output("\n\n") + + def show_json(self): + """Read, parse, convert into json and print info in stdout""" + self.__read_news() + feed, column = self.__parse() + output("Convert to json...", verbose=self.__verbose) + counter = 0 + if self.__verbose: + progress(len(column), counter) + json = '{\n "title": "' + feed + '",\n "news": [' + separ = False + for news in column: + if separ: + json += ',' + separ = True + json += '\n {\n "title": "' + news[0] + '",' + json += '\n "date": "' + news[1] + '",' + json += '\n "link": "' + news[2] + '",' + json += '\n "description": "' + news[3] + '",' + json += '\n "links": [' + links = "" + for lin in news[4]: + links += f'\n "{lin}",' + json += links[:-1] + "\n ]" + "\n }" + counter += 1 + if self.__verbose: + progress(len(column), counter) + json += '\n ]\n}' + output(json) From 751a4d0b5dffe19cf7fb75c80443d1632130399f Mon Sep 17 00:00:00 2001 From: KirylDv Date: Sat, 16 Nov 2019 22:09:01 +0300 Subject: [PATCH 11/35] Add shebang & correct codestyle --- reader.py | 12 ++++++++---- rss_reader.py | 1 + 2 files changed, 9 insertions(+), 4 deletions(-) mode change 100644 => 100755 rss_reader.py diff --git a/reader.py b/reader.py index 22b6188..fc87e4f 100644 --- a/reader.py +++ b/reader.py @@ -13,7 +13,7 @@ def output(string, sep=' ', end='\n', flush=False, verbose=True): def progress(elems, done, length=20): - """Take arguments + """Take arguments elems: count of elements done: progress (in elements) length: progress bar length @@ -113,7 +113,7 @@ def show_json(self): if separ: json += ',' separ = True - json += '\n {\n "title": "' + news[0] + '",' + json += '{\n "title": "' + news[0] + '",' json += '\n "date": "' + news[1] + '",' json += '\n "link": "' + news[2] + '",' json += '\n "description": "' + news[3] + '",' @@ -121,9 +121,13 @@ def show_json(self): links = "" for lin in news[4]: links += f'\n "{lin}",' - json += links[:-1] + "\n ]" + "\n }" + if len(links) != 0: + json += links[:-1] + "\n ]" + else: + json += ']' + json += "\n }" counter += 1 if self.__verbose: progress(len(column), counter) - json += '\n ]\n}' + json += ']\n}' output(json) diff --git a/rss_reader.py b/rss_reader.py old mode 100644 new mode 100755 index d4b366f..2343a3d --- a/rss_reader.py +++ b/rss_reader.py @@ -1,3 +1,4 @@ +#!/usr/bin/env python3 import argparse import reader From 3dcb76a7b3c8ff2aed3e5f99f7c0458bfed4bc0e Mon Sep 17 00:00:00 2001 From: KirylDv Date: Sat, 16 Nov 2019 23:05:35 +0300 Subject: [PATCH 12/35] Fix: images without alt text --- html_parser.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/html_parser.py b/html_parser.py index 2e6c2ff..67b27c8 100644 --- a/html_parser.py +++ b/html_parser.py @@ -12,12 +12,13 @@ def __init__(self): def handle_starttag(self, tag, attrs): if tag == "img": num = len(self.links)+1 - self.text += f"[Image {num}: " + self.text += "[Image" for attr in attrs: - if attr[0] == "alt": - self.text += attr[1] + f"][{num}]" + if attr[0] == "alt" and attr[1] != "": + self.text += f": {attr[1]}" elif attr[0] == "src": self.links += [attr[1] + " (image)"] + self.text += f"][{num}]" elif tag == "a": for attr in attrs: if attr[0] == "href": From cce26c199c1077ccc008be23502de4eae35c35d9 Mon Sep 17 00:00:00 2001 From: KirylDv Date: Sat, 16 Nov 2019 23:55:45 +0300 Subject: [PATCH 13/35] 1st Iteration --- rss_reader.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rss_reader.py b/rss_reader.py index 2343a3d..d50e636 100755 --- a/rss_reader.py +++ b/rss_reader.py @@ -5,7 +5,7 @@ def parse_arguments(): parser = argparse.ArgumentParser() - parser.add_argument("--version", action='version', help="Print version info", version="Version 0.01") + parser.add_argument("--version", action='version', help="Print version info", version="Version 0.1") parser.add_argument("source", type=str, help="RSS URL") parser.add_argument("--json", action="store_true", help="Print result as JSON in stdout") parser.add_argument("--verbose", action="store_true", help="Outputs verbose status messages") From 4ddda5c43dea60430b88d3dd210342e671eb9db6 Mon Sep 17 00:00:00 2001 From: KirylDv Date: Sun, 17 Nov 2019 17:36:09 +0300 Subject: [PATCH 14/35] Add: setup.py --- setup.py | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) create mode 100644 setup.py diff --git a/setup.py b/setup.py new file mode 100644 index 0000000..84e7b9d --- /dev/null +++ b/setup.py @@ -0,0 +1,22 @@ +from setuptools import setup, find_packages +from os import path + +here = path.abspath(path.dirname(__file__)) + +setup( + name='RSSReader_Kiryl', + version='0.2', + url='https://github.com/KirylDv/PythonHomework/tree/FinalTask', + packages=find_packages(), + python_requires='>=3.6.8', + # To provide executable scripts, use entry points in preference to the + # "scripts" keyword. Entry points provide cross-platform support and allow + # `pip` to create the appropriate form of executable for the target + # platform. + # + # For example, the following would provide a command called `sample` which + # executes the function `main` from this package when invoked: + entry_points={ # Optional + 'console_scripts': ['rss-reader=rss_reader.py:main'], + }, +) \ No newline at end of file From 92c78bfcbd9874135545f01ff78da6e9aef083d1 Mon Sep 17 00:00:00 2001 From: KirylDv Date: Sun, 17 Nov 2019 19:42:21 +0300 Subject: [PATCH 15/35] 2nd Iteration. Make changes for setup --- __init__.py | 0 project/__init__.py | 0 html_parser.py => project/html_parser.py | 0 reader.py => project/reader.py | 2 +- rss_reader.py => project/rss_reader.py | 6 +++--- setup.py | 8 +++----- 6 files changed, 7 insertions(+), 9 deletions(-) create mode 100644 __init__.py create mode 100644 project/__init__.py rename html_parser.py => project/html_parser.py (100%) rename reader.py => project/reader.py (99%) rename rss_reader.py => project/rss_reader.py (88%) diff --git a/__init__.py b/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/project/__init__.py b/project/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/html_parser.py b/project/html_parser.py similarity index 100% rename from html_parser.py rename to project/html_parser.py diff --git a/reader.py b/project/reader.py similarity index 99% rename from reader.py rename to project/reader.py index fc87e4f..8247918 100644 --- a/reader.py +++ b/project/reader.py @@ -2,7 +2,7 @@ import urllib.request import urllib.error from xml.dom.minidom import parseString -from html_parser import parse_HTML +from .html_parser import parse_HTML def output(string, sep=' ', end='\n', flush=False, verbose=True): diff --git a/rss_reader.py b/project/rss_reader.py similarity index 88% rename from rss_reader.py rename to project/rss_reader.py index d50e636..a7cf5fb 100755 --- a/rss_reader.py +++ b/project/rss_reader.py @@ -1,11 +1,11 @@ #!/usr/bin/env python3 import argparse -import reader +from .reader import RSSReader def parse_arguments(): parser = argparse.ArgumentParser() - parser.add_argument("--version", action='version', help="Print version info", version="Version 0.1") + parser.add_argument("--version", action='version', help="Print version info", version="Version 0.2") parser.add_argument("source", type=str, help="RSS URL") parser.add_argument("--json", action="store_true", help="Print result as JSON in stdout") parser.add_argument("--verbose", action="store_true", help="Outputs verbose status messages") @@ -15,7 +15,7 @@ def parse_arguments(): def main(): args = parse_arguments() - rss = reader.RSSReader(args) + rss = RSSReader(args) if args.json: rss.show_json() else: diff --git a/setup.py b/setup.py index 84e7b9d..ff3a0c8 100644 --- a/setup.py +++ b/setup.py @@ -1,14 +1,12 @@ from setuptools import setup, find_packages -from os import path - -here = path.abspath(path.dirname(__file__)) setup( name='RSSReader_Kiryl', version='0.2', url='https://github.com/KirylDv/PythonHomework/tree/FinalTask', packages=find_packages(), - python_requires='>=3.6.8', + python_requires='>=3.6', + py_modules=['project.rss_reader', 'project.reader', 'project.html_parser'], # To provide executable scripts, use entry points in preference to the # "scripts" keyword. Entry points provide cross-platform support and allow # `pip` to create the appropriate form of executable for the target @@ -17,6 +15,6 @@ # For example, the following would provide a command called `sample` which # executes the function `main` from this package when invoked: entry_points={ # Optional - 'console_scripts': ['rss-reader=rss_reader.py:main'], + 'console_scripts': ['rss_reader=project.rss_reader:main'], }, ) \ No newline at end of file From c2abc7c16196dac1a5a8d250bf503ff48ec9a17b Mon Sep 17 00:00:00 2001 From: KirylDv Date: Tue, 26 Nov 2019 12:27:53 +0300 Subject: [PATCH 16/35] Refactor convertion to json --- project/converter.py | 35 ++++++++++++++++++++++++++++++ project/reader.py | 50 ++++++++++++++++--------------------------- project/rss_reader.py | 1 + 3 files changed, 54 insertions(+), 32 deletions(-) create mode 100644 project/converter.py diff --git a/project/converter.py b/project/converter.py new file mode 100644 index 0000000..eed8a0a --- /dev/null +++ b/project/converter.py @@ -0,0 +1,35 @@ +from .reader import output, progress + + +class Converter(): + """docstring for Converter""" + + def to_json(feed, column, verbose): + output("Convert to json...", verbose=verbose) + counter = 0 + if verbose: + progress(len(column), counter) + json = '{\n "title": "' + feed + '",\n "news": [' + separ = False + for news in column: + if separ: + json += ',' + separ = True + json += '{\n "title": "' + news[0] + '",' + json += '\n "date": "' + news[1] + '",' + json += '\n "link": "' + news[2] + '",' + json += '\n "description": "' + news[3] + '",' + json += '\n "links": [' + links = "" + for lin in news[4]: + links += f'\n "{lin}",' + if len(links) != 0: + json += links[:-1] + "\n ]" + else: + json += ']' + json += "\n }" + counter += 1 + if verbose: + progress(len(column), counter) + json += ']\n}' + return json diff --git a/project/reader.py b/project/reader.py index 8247918..24657f4 100644 --- a/project/reader.py +++ b/project/reader.py @@ -3,6 +3,7 @@ import urllib.error from xml.dom.minidom import parseString from .html_parser import parse_HTML +from .converter import Converter def output(string, sep=' ', end='\n', flush=False, verbose=True): @@ -41,6 +42,12 @@ def __init__(self, args): self.__verbose = args.verbose self.__text = "" + def __find_news(self): + pass + + def __cache_data(self): + pass + def __read_news(self): """Read data from link""" try: @@ -81,12 +88,18 @@ def __parse(self): links]] if self.__verbose: progress(self.__limit, counter) + self.__cache_data(column) return feed, column + def __read(): + if not self.__date: + self.__read_news() + return self.__parse() + return self.__find_news() + def show_news(self): - """Read, parse and print info in stdout""" - self.__read_news() - feed, column = self.__parse() + """Read and print info in stdout""" + feed, column = self.__read() output(f"{feed}", end="\n\n") for news in column: output(f"Title: {news[0]}") @@ -101,33 +114,6 @@ def show_news(self): def show_json(self): """Read, parse, convert into json and print info in stdout""" - self.__read_news() - feed, column = self.__parse() - output("Convert to json...", verbose=self.__verbose) - counter = 0 - if self.__verbose: - progress(len(column), counter) - json = '{\n "title": "' + feed + '",\n "news": [' - separ = False - for news in column: - if separ: - json += ',' - separ = True - json += '{\n "title": "' + news[0] + '",' - json += '\n "date": "' + news[1] + '",' - json += '\n "link": "' + news[2] + '",' - json += '\n "description": "' + news[3] + '",' - json += '\n "links": [' - links = "" - for lin in news[4]: - links += f'\n "{lin}",' - if len(links) != 0: - json += links[:-1] + "\n ]" - else: - json += ']' - json += "\n }" - counter += 1 - if self.__verbose: - progress(len(column), counter) - json += ']\n}' + feed, column = self.__read() + json = Converter.to_json(feed, column, self.__verbose) output(json) diff --git a/project/rss_reader.py b/project/rss_reader.py index a7cf5fb..c59f878 100755 --- a/project/rss_reader.py +++ b/project/rss_reader.py @@ -10,6 +10,7 @@ def parse_arguments(): parser.add_argument("--json", action="store_true", help="Print result as JSON in stdout") parser.add_argument("--verbose", action="store_true", help="Outputs verbose status messages") parser.add_argument("--limit", type=int, help="Limit news topics if this parameter provided") + parser.add_argument("--date", type=int, help="Start work with cached data. Format YYYYMMDD") return parser.parse_args() From f0aa4946b29bdcf8e86671f2edeec42d016a6d8e Mon Sep 17 00:00:00 2001 From: KirylDv Date: Fri, 29 Nov 2019 14:26:25 +0300 Subject: [PATCH 17/35] Add parts for cahce --- project/SQLcache.py | 40 ++++++++++++++++++++++++++++++++++++++++ project/cache.db | Bin 0 -> 16384 bytes project/reader.py | 10 +++++++--- setup.py | 4 ++-- 4 files changed, 49 insertions(+), 5 deletions(-) create mode 100644 project/SQLcache.py create mode 100644 project/cache.db diff --git a/project/SQLcache.py b/project/SQLcache.py new file mode 100644 index 0000000..7fc271c --- /dev/null +++ b/project/SQLcache.py @@ -0,0 +1,40 @@ +import sqlite3 + + +class Database(): + """docstring for Database""" + + def __init__(self): + super(ClassName, self).__init__() + self.conn = None + self.cursor = None + + + def _open(self): + self.conn = sqllite3.connect("cache.db") + self.cursor = conn.cursore() + + def _close(self): + self.conn.close() + + def write_data(self, data, feed, url): + try: + self._open() + self.cursor.execute(""" + INSERT INTO news + VALUES (?,?,?,?,?,?) + """, data) + self.cursor.execute(""" + INSERT INTO feed + VALUES (?,?) + """, (url, feed)) + except sqlite3.DatabaseError as err: + print("Database error") + else: + self.conn.commit() + finally: + self._close() + + def read_data(self, url, date): + data = None + return data diff --git a/project/cache.db b/project/cache.db new file mode 100644 index 0000000000000000000000000000000000000000..4caaffd1a56a46fdb3e6bbb035e5cff885679e3d GIT binary patch literal 16384 zcmeI%K}*9h7=Yol72O7-yNq2t=YHF%; z`{4C_Px`%^fhPl#3S_%h4f1r6O;k|zy(eFe?)t-#*DcaXINv5~oipdSb=9;)luXq! ze~*i6_BdSVblo4DSB)=BYp&Q8bGqExY@xs2LZUu$Q|(_{Fq!sdajDf(cjYuJuJ4;r zM>;O!I7(j287iM-(VLFaWScRA`8w{@&B_haP!K=>0R#|0009ILKmY**5J2Ei1-Sno z>X+qQ2q1s}0tg_000IagfB*sr>;!5i Date: Fri, 29 Nov 2019 15:17:19 +0300 Subject: [PATCH 18/35] Add: database exists checking, change news from list fo dict --- project/{SQLcache.py => SQL_cache.py} | 19 ++++++- project/cache.db | Bin 16384 -> 16384 bytes project/converter.py | 34 ++++++------ project/html_parser.py | 6 +- project/reader.py | 77 +++++++++++++------------- project/rss_reader.py | 2 +- 6 files changed, 78 insertions(+), 60 deletions(-) rename project/{SQLcache.py => SQL_cache.py} (59%) diff --git a/project/SQLcache.py b/project/SQL_cache.py similarity index 59% rename from project/SQLcache.py rename to project/SQL_cache.py index 7fc271c..d8b023a 100644 --- a/project/SQLcache.py +++ b/project/SQL_cache.py @@ -1,18 +1,31 @@ import sqlite3 - +from os.path import exists class Database(): """docstring for Database""" def __init__(self): - super(ClassName, self).__init__() + super(Database, self).__init__() + if not exists("cache.db"): + conn = sqlite3.connect("cache.db") + cursor = conn.cursor() + cursor.execute(""" + CREATE TABLE `feed` (`source` text unique, `name` text) + """) + cursor.execute(""" + CREATE TABLE news(source text, + date text, title text, link text, + description text, links text) + """) + conn.commit() + conn.close() self.conn = None self.cursor = None def _open(self): self.conn = sqllite3.connect("cache.db") - self.cursor = conn.cursore() + self.cursor = conn.cursor() def _close(self): self.conn.close() diff --git a/project/cache.db b/project/cache.db index 4caaffd1a56a46fdb3e6bbb035e5cff885679e3d..eee0bc47307a62c5969b5e60ea99a66b56475799 100644 GIT binary patch delta 134 zcmZo@U~Fh$oRG$oz`)G+ih)0a?-jrK#>NUh4sRwlaZypm>dAuq%8Cia`K3k4sR;@t zsTCy(rFof!rKvg!33-XRP_fC8{6<`jqKxbyJx#omr||0va4DdI$p`qAF$6b1;+Hc3 E0LaE6&j0`b delta 105 zcmZo@U~Fh$oRG%T%)rdg!NA|m&%qzHSx{gaA7=m)ySS(*V and tags to text form""" if tag == "img": num = len(self.links)+1 self.text += "[Image" @@ -25,11 +26,12 @@ def handle_starttag(self, tag, attrs): self.links += [attr[1] + " (text)"] def handle_data(self, data): + """Take text from HTML""" self.text += data def parse_HTML(text): """Return text without tags or links and a list with links""" - parser = _html_parser() + parser = _HTMLTagsParser() parser.feed(text) return parser.text, parser.links diff --git a/project/reader.py b/project/reader.py index 800d532..9746f40 100644 --- a/project/reader.py +++ b/project/reader.py @@ -6,14 +6,14 @@ from .converter import Converter -def output(string, sep=' ', end='\n', flush=False, verbose=True): +def stdout_write(string, sep=' ', end='\n', flush=False, verbose=True): """Output function for singe string but convert ' to '""" if verbose: string = string.replace("'", "'") print(string, sep=sep, end=end, flush=flush) -def progress(elems, done, length=20): +def write_progressbar(elems, done, length=20): """Take arguments elems: count of elements done: progress (in elements) @@ -31,15 +31,15 @@ def progress(elems, done, length=20): class RSSReader(): """RSSReader: Class for reading rss channels. Methods: - show_news() - output news to stdout + show_news() - print news to stdout """ - def __init__(self, args): + def __init__(self, source, limit, verbose, date): super(RSSReader, self).__init__() - self.__source = args.source - self.__limit = args.limit - self.__json = args.json - self.__verbose = args.verbose + self.__source = source + self.__limit = limit + self.__verbose = averbose + self.__date = date self.__text = "" def __find_news(self): @@ -48,30 +48,31 @@ def __find_news(self): def __cache_data(self, column, feed): Date = format(pubDate) formated_data = [ - (self.__source, Date, col[0], col[2], col[3], col[4]) - for col in column] + (self.__source, Date, col["title"], + col["link"], col["text"], col["links"]) for col in column] Database().write_data(formated_data, feed, self.__source) def __read_news(self): """Read data from link""" try: - output(f"Reading information from {self.__source}", end='...\n', verbose=self.__verbose) + stdout_write(f"Reading information from {self.__source}", end='...\n', verbose=self.__verbose) with urllib.request.urlopen(self.__source) as rss: bytestr = rss.read() self.__text = bytestr.decode("utf8") - output("Complete.", verbose=self.__verbose) - except Exception as e: - if type(e) is ValueError: - output("Error: Can't connect, please try with https://") - elif type(e) is urllib.error.URLError: - output("Error: Can't connect to web-site, please check URL") - else: - output("Unknown error") + stdout_write("Complete.", verbose=self.__verbose) + except ValueError: + stdout_write("Error: Can't connect, please try with https://") + sys.exit() + except urllib.error.URLError: + stdout_write("Error: Can't connect to web-site, please check URL") + sys.exit() + except Exception: + stdout_write("Unknown error") sys.exit() def __parse(self): """Parse XML data to python structures""" - output("Parsing information...", verbose=self.__verbose) + stdout_write("Parsing information...", verbose=self.__verbose) xml = parseString(self.__text) feed = xml.getElementsByTagName("title")[0].firstChild.data items = xml.getElementsByTagName("item") @@ -85,11 +86,11 @@ def __parse(self): counter += 1 a = item.getElementsByTagName("description")[0].firstChild.data text, links = parse_HTML(a) - column += [[item.getElementsByTagName("title")[0].firstChild.data, - item.getElementsByTagName("pubDate")[0].firstChild.data, - item.getElementsByTagName("link")[0].firstChild.data, - text, - links]] + column += [{"title": item.getElementsByTagName("title")[0].firstChild.data, + "date": item.getElementsByTagName("pubDate")[0].firstChild.data, + "link": item.getElementsByTagName("link")[0].firstChild.data, + "text": text, + "links": links}] if self.__verbose: progress(self.__limit, counter) self.__cache_data(column, feed) @@ -104,20 +105,22 @@ def __read(): def show_news(self): """Read and print info in stdout""" feed, column = self.__read() - output(f"{feed}", end="\n\n") + stdout_write(f"{feed}", end="\n\n") for news in column: - output(f"Title: {news[0]}") - output(f"Date: {news[1]}") - output(f"Link: {news[2]}", end="\n\n") - output(news[3], end="\n\n") - if len(news[4]) != 0: - output("Links:") - for i in range(len(news[4])): - output(f"[{i+1}]: {news[4][i]}") - output("\n\n") + stdout_write(f"Title: {news["title"]}") + stdout_write(f"Date: {news["date"]}") + stdout_write(f"Link: {news["link"]}", end="\n\n") + stdout_write(news["text"], end="\n\n") + if len(news["links"]) != 0: + stdout_write("Links:") + link_num = 1 + for link in news["links"]: + stdout_write(f"[{link_num}]: {link}") + link_num += 1 + stdout_write("\n\n") def show_json(self): """Read, parse, convert into json and print info in stdout""" feed, column = self.__read() - json = Converter.to_json(feed, column, self.__verbose) - output(json) + json_text = Converter.to_json(feed, column, self.__verbose) + stdout_write(json_text) diff --git a/project/rss_reader.py b/project/rss_reader.py index c59f878..17a7b04 100755 --- a/project/rss_reader.py +++ b/project/rss_reader.py @@ -16,7 +16,7 @@ def parse_arguments(): def main(): args = parse_arguments() - rss = RSSReader(args) + rss = RSSReader(args.source, args.limit, args.verbose, args.date) if args.json: rss.show_json() else: From 5b0ba26383a95fc1da9f89aba1d6335f3e5b988c Mon Sep 17 00:00:00 2001 From: KirylDv Date: Fri, 29 Nov 2019 17:29:37 +0300 Subject: [PATCH 19/35] Add: working with cache methods --- project/SQL_cache.py | 58 +++++++++++++++++++++++++++++++----------- project/cache.db | Bin 16384 -> 0 bytes project/converter.py | 13 +++++----- project/log_helper.py | 20 +++++++++++++++ project/reader.py | 58 +++++++++++++++++------------------------- setup.py | 4 ++- 6 files changed, 97 insertions(+), 56 deletions(-) delete mode 100644 project/cache.db create mode 100644 project/log_helper.py diff --git a/project/SQL_cache.py b/project/SQL_cache.py index d8b023a..90e10dc 100644 --- a/project/SQL_cache.py +++ b/project/SQL_cache.py @@ -1,5 +1,7 @@ import sqlite3 from os.path import exists +import sys + class Database(): """docstring for Database""" @@ -13,41 +15,67 @@ def __init__(self): CREATE TABLE `feed` (`source` text unique, `name` text) """) cursor.execute(""" - CREATE TABLE news(source text, - date text, title text, link text, - description text, links text) + CREATE TABLE "news" ( `source` text, `date` text, + `title` text, `link` text UNIQUE, + `description` text, `links` text ) """) conn.commit() conn.close() self.conn = None self.cursor = None - def _open(self): - self.conn = sqllite3.connect("cache.db") - self.cursor = conn.cursor() + self.conn = sqlite3.connect("cache.db") + self.cursor = self.conn.cursor() def _close(self): self.conn.close() def write_data(self, data, feed, url): + """Write news to database + Params: + data: turple + feed: str + url: str + """ try: self._open() - self.cursor.execute(""" - INSERT INTO news - VALUES (?,?,?,?,?,?) - """, data) + for news in data: + self.cursor.execute(""" + INSERT INTO news + VALUES (?,?,?,?,?,?) + """, news) + self.conn.commit() self.cursor.execute(""" INSERT INTO feed VALUES (?,?) """, (url, feed)) - except sqlite3.DatabaseError as err: - print("Database error") - else: self.conn.commit() + except sqlite3.IntegrityError: + pass + except sqlite3.DatabaseError: + print("Database error") finally: self._close() def read_data(self, url, date): - data = None - return data + """Get url & date + Return feed & data + """ + feed, data = None, None + try: + self._open() + self.cursor.execute(f""" + SELECT name from feed WHERE source = '{url}' + """) + feed = self.cursor.fetchall() + self.cursor.execute(f""" + SELECT * from news WHERE source = '{url}' and date = '{date}' + """) + data = self.cursor.fetchall() + except Exception as e: + print("Database reading error", e) + sys.exit() + finally: + self._close() + return feed, data diff --git a/project/cache.db b/project/cache.db deleted file mode 100644 index eee0bc47307a62c5969b5e60ea99a66b56475799..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 16384 zcmeI%Jx{_w7{KvsF=$A5SsE6H+k}Le7zYOz(J;xPveODjY%HzNYb0^fPsHcqQ`o0M zh$OPOk^d!ky|?zc{yIJB(?dVBMo*IYG_d+oHI(D18?BX6ZeEY_x+%M3XS1;9>ZTMR^WA z3^JP(0R#|0009ILKmY**5J2Ei1<3yo z^~>^J2q1s}0tg_000IagfB*srYy~RP<^TT{gdPG2Ab Date: Fri, 29 Nov 2019 17:37:04 +0300 Subject: [PATCH 20/35] Add require packages --- setup.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/setup.py b/setup.py index cd4cbe4..ad9e67d 100644 --- a/setup.py +++ b/setup.py @@ -6,9 +6,10 @@ url='https://github.com/KirylDv/PythonHomework/tree/FinalTask', packages=find_packages(), python_requires='>=3.6', - py_modules=['project.rss_reader', 'project.reader', - 'project.html_parser', 'project.converter', - 'project.SQL_cache'], + py_modules=['project.rss_reader', 'project.reader', + 'project.html_parser', 'project.converter', + 'project.SQL_cache', 'project.log_helper'], + install_requires=['python-dateutil'] # To provide executable scripts, use entry points in preference to the # "scripts" keyword. Entry points provide cross-platform support and allow # `pip` to create the appropriate form of executable for the target From 6be9bd357776701211e6092a6300a8377eabe289 Mon Sep 17 00:00:00 2001 From: KirylDv Date: Fri, 29 Nov 2019 17:41:35 +0300 Subject: [PATCH 21/35] Add forgotten , --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index ad9e67d..fdd9c21 100644 --- a/setup.py +++ b/setup.py @@ -9,7 +9,7 @@ py_modules=['project.rss_reader', 'project.reader', 'project.html_parser', 'project.converter', 'project.SQL_cache', 'project.log_helper'], - install_requires=['python-dateutil'] + install_requires=['python-dateutil'], # To provide executable scripts, use entry points in preference to the # "scripts" keyword. Entry points provide cross-platform support and allow # `pip` to create the appropriate form of executable for the target From ea6b17f0679914b6924ce27063b300e9b2453eaa Mon Sep 17 00:00:00 2001 From: KirylDv Date: Fri, 29 Nov 2019 18:20:41 +0300 Subject: [PATCH 22/35] Add docstrings --- README.md | 3 +++ project/SQL_cache.py | 2 +- project/converter.py | 3 ++- project/reader.py | 10 +++++++++- project/rss_reader.py | 2 +- setup.py | 2 +- 6 files changed, 17 insertions(+), 5 deletions(-) create mode 100644 README.md diff --git a/README.md b/README.md new file mode 100644 index 0000000..1ea338a --- /dev/null +++ b/README.md @@ -0,0 +1,3 @@ +#One-shot RSS reader + +with flag --date take date in format YYYYMMDD and return cached news with that publication date diff --git a/project/SQL_cache.py b/project/SQL_cache.py index 90e10dc..6d03798 100644 --- a/project/SQL_cache.py +++ b/project/SQL_cache.py @@ -4,7 +4,7 @@ class Database(): - """docstring for Database""" + """Class working with SQLite3 database""" def __init__(self): super(Database, self).__init__() diff --git a/project/converter.py b/project/converter.py index ba73d77..8aad41d 100644 --- a/project/converter.py +++ b/project/converter.py @@ -2,9 +2,10 @@ class Converter(): - """docstring for Converter""" + """Converter class. Convert data to some format""" def to_json(feed, column, verbose): + """Take data and return it in json""" stdout_write("Convert to json...", verbose=verbose) counter = 0 if verbose: diff --git a/project/reader.py b/project/reader.py index 7e56efb..6a6d903 100644 --- a/project/reader.py +++ b/project/reader.py @@ -13,6 +13,7 @@ class RSSReader(): """RSSReader: Class for reading rss channels. Methods: show_news() - print news to stdout + show_json() - print news to stdout in json format """ def __init__(self, source, limit, verbose, date): @@ -24,9 +25,14 @@ def __init__(self, source, limit, verbose, date): self.__text = "" def __find_news(self): - """ """ + """Ask database for news from entered date + Return data in the same format with __parse function + """ feed, data = Database().read_data(self.__source, self.__date) column = [] + if not data: + stdout_write("Error: Articles from the entered date not found") + sys.exit() for news in data: column += [{"title": news[2], "link": news[3], @@ -35,6 +41,7 @@ def __find_news(self): return feed[0][0], column def __cache_data(self, column, feed): + """Take parsed data and write it to database""" date = lambda pubDate: dateutil.parser.parse(pubDate).strftime("%Y%m%d") formated_data = [ (self.__source, date(col["date"]), col["title"], @@ -86,6 +93,7 @@ def __parse(self): return feed, column def __read(self): + """Information source selection""" if not self.__date: self.__read_news() return self.__parse() diff --git a/project/rss_reader.py b/project/rss_reader.py index 17a7b04..97c1528 100755 --- a/project/rss_reader.py +++ b/project/rss_reader.py @@ -5,7 +5,7 @@ def parse_arguments(): parser = argparse.ArgumentParser() - parser.add_argument("--version", action='version', help="Print version info", version="Version 0.2") + parser.add_argument("--version", action='version', help="Print version info", version="Version 0.3") parser.add_argument("source", type=str, help="RSS URL") parser.add_argument("--json", action="store_true", help="Print result as JSON in stdout") parser.add_argument("--verbose", action="store_true", help="Outputs verbose status messages") diff --git a/setup.py b/setup.py index fdd9c21..83a412d 100644 --- a/setup.py +++ b/setup.py @@ -2,7 +2,7 @@ setup( name='RSSReader_Kiryl', - version='0.2', + version='0.3', url='https://github.com/KirylDv/PythonHomework/tree/FinalTask', packages=find_packages(), python_requires='>=3.6', From 96dce6801aaee56c16457672d22dc5b32f40bb44 Mon Sep 17 00:00:00 2001 From: KirylDv Date: Fri, 29 Nov 2019 18:36:24 +0300 Subject: [PATCH 23/35] 3rd Iteration --- project/SQL_cache.py | 16 +++++++++++++--- project/html_parser.py | 2 +- project/reader.py | 16 +++++++++++++--- 3 files changed, 27 insertions(+), 7 deletions(-) diff --git a/project/SQL_cache.py b/project/SQL_cache.py index 6d03798..c039b00 100644 --- a/project/SQL_cache.py +++ b/project/SQL_cache.py @@ -31,26 +31,36 @@ def _open(self): def _close(self): self.conn.close() - def write_data(self, data, feed, url): + def write_data(self, data, feed, url, verbose): """Write news to database Params: - data: turple - feed: str + data: turple - article data + feed: str - rss_channel feed url: str + verbose: bool """ try: self._open() + counter = 0 + if verbose: + write_progressbar(len(data)+1, counter) for news in data: self.cursor.execute(""" INSERT INTO news VALUES (?,?,?,?,?,?) """, news) + counter += 1 + if verbose: + write_progressbar(len(data)+1, counter) self.conn.commit() self.cursor.execute(""" INSERT INTO feed VALUES (?,?) """, (url, feed)) self.conn.commit() + counter += 1 + if verbose: + write_progressbar(len(data)+1, counter) except sqlite3.IntegrityError: pass except sqlite3.DatabaseError: diff --git a/project/html_parser.py b/project/html_parser.py index 986118f..fcfd62e 100644 --- a/project/html_parser.py +++ b/project/html_parser.py @@ -19,7 +19,7 @@ def handle_starttag(self, tag, attrs): self.text += f": {attr[1]}" elif attr[0] == "src": self.links += [attr[1] + " (image)"] - self.text += f"][{num}]" + self.text += f"][{num}]" elif tag == "a": for attr in attrs: if attr[0] == "href": diff --git a/project/reader.py b/project/reader.py index 6a6d903..25885dc 100644 --- a/project/reader.py +++ b/project/reader.py @@ -28,25 +28,35 @@ def __find_news(self): """Ask database for news from entered date Return data in the same format with __parse function """ + output("Reading data from database...", verbose=self.__verbose) feed, data = Database().read_data(self.__source, self.__date) column = [] if not data: stdout_write("Error: Articles from the entered date not found") sys.exit() + counter = 0 + if self.__verbose: + write_progressbar(len(data), counter) for news in data: column += [{"title": news[2], "link": news[3], "text": news[4], - "links": news[5].split('\n') }] + "links": news[5].split('\n')}] + counter += 1 + if self.__verbose: + write_progressbar(len(data), counter) return feed[0][0], column def __cache_data(self, column, feed): """Take parsed data and write it to database""" - date = lambda pubDate: dateutil.parser.parse(pubDate).strftime("%Y%m%d") + stdout_write("Writing data to database...", verbose=self.__verbose) + + def date(pubDate): return dateutil.parser.parse( + pubDate).strftime("%Y%m%d") formated_data = [ (self.__source, date(col["date"]), col["title"], col["link"], col["text"], "\n".join(col["links"])) for col in column] - Database().write_data(formated_data, feed, self.__source) + Database().write_data(formated_data, feed, self.__source, self.__verbose) def __read_news(self): """Read data from link""" From fa156073d1543d04d7414de300b563e78efff190 Mon Sep 17 00:00:00 2001 From: KirylDv Date: Sat, 30 Nov 2019 18:52:22 +0300 Subject: [PATCH 24/35] Add: to_fb2 function --- project/converter.py | 72 +++++++++++++++++++++++++++++++++++++++++++- project/reader.py | 4 +-- 2 files changed, 72 insertions(+), 4 deletions(-) diff --git a/project/converter.py b/project/converter.py index 8aad41d..fa78ed0 100644 --- a/project/converter.py +++ b/project/converter.py @@ -1,5 +1,7 @@ from .log_helper import stdout_write, write_progressbar - +from random import randint +from time import time +from base64 import b64encode class Converter(): """Converter class. Convert data to some format""" @@ -35,3 +37,71 @@ def to_json(feed, column, verbose): write_progressbar(len(column), counter) json_text += ']\n}' return json_text + + def to_fb2(feed, column, url, sv_path=f"/home/{str(randint(10**10))}/"): + fb2_begin = '\n' + \ + +'' + fb2_end = '' + fb2_desc = f""" + + + news + {url} + + {feed} + + + + 3.14159 + {randint(10000000, 1000000000000)} + + + """ + + def next_article(id, title, images, date="Unknown", description, feed): + return f""" + {f'{img}' for img in images} + +
+ + <p>{title}</p> + + {f'

' for img in images} +

{date}

+

{description}

+

Source:{feed}

+
+ + """ + + def download_image(url): + try: + urllib.request.urlretrieve(url, sv_path + url.split('/')[-1]) + stdout_write(f'Image "{url}" was downloaded.', verbose=verbose) + return sv_path + url.split('/')[-1] + except urllib.error.URLError, urllib.error.HTTPError: + stdout_write("Error occurred during downloading image") + return "" + + fb2_text = fb2_begin + fb2_desc + for news in column: + image_links = [] + for link in news["links"]: + if "(image)" in link: + image_links += [link[:-8]] + images = [] + for link in image_links: + img_path = download_image(link) + with open(img_path, 'rb') as binfile: + images += [b64encode(binfile.read()).decode()] + fb2_text += next_article(id=hash(hash(news["title"]) + randint(1, 10000)), + title=news["title"], + images=images, + date=news["date"], + description=description+'\n'+"\n".join(links), + feed=feed + ) + fb2_text += fb2_end + with open(f"{sv_path}{time()}-{randint(100)}.fb2", "w") as file: + file.write(fb2_text) diff --git a/project/reader.py b/project/reader.py index 25885dc..84a5174 100644 --- a/project/reader.py +++ b/project/reader.py @@ -50,9 +50,7 @@ def __find_news(self): def __cache_data(self, column, feed): """Take parsed data and write it to database""" stdout_write("Writing data to database...", verbose=self.__verbose) - - def date(pubDate): return dateutil.parser.parse( - pubDate).strftime("%Y%m%d") + date = lambda pubDate: dateutil.parser.parse(pubDate).strftime("%Y%m%d") formated_data = [ (self.__source, date(col["date"]), col["title"], col["link"], col["text"], "\n".join(col["links"])) for col in column] From 69d093f4cb185bf538dc40601c7ab14872d048d1 Mon Sep 17 00:00:00 2001 From: KirylDv Date: Sat, 30 Nov 2019 21:13:38 +0300 Subject: [PATCH 25/35] Correct fb2 doc --- project/converter.py | 148 +++++++++++++++++++++++++----------------- project/reader.py | 7 +- project/rss_reader.py | 11 +++- 3 files changed, 104 insertions(+), 62 deletions(-) diff --git a/project/converter.py b/project/converter.py index fa78ed0..3590d2b 100644 --- a/project/converter.py +++ b/project/converter.py @@ -2,11 +2,15 @@ from random import randint from time import time from base64 import b64encode +import os +import urllib.request +import urllib.error + class Converter(): """Converter class. Convert data to some format""" - def to_json(feed, column, verbose): + def to_json(self, feed, column, verbose): """Take data and return it in json""" stdout_write("Convert to json...", verbose=verbose) counter = 0 @@ -37,71 +41,97 @@ def to_json(feed, column, verbose): write_progressbar(len(column), counter) json_text += ']\n}' return json_text - - def to_fb2(feed, column, url, sv_path=f"/home/{str(randint(10**10))}/"): - fb2_begin = '\n' + \ - +'' - fb2_end = '' - fb2_desc = f""" - - - news - {url} - - {feed} - - - - 3.14159 - {randint(10000000, 1000000000000)} - - - """ - - def next_article(id, title, images, date="Unknown", description, feed): - return f""" - {f'{img}' for img in images} - -
+ + def to_fb2(self, feed, column, url, sv_path, verbose=False): + + def next_article(id, title, images, description, feed, date="Unknown"): + binary = [] + for img in images: + binary += [f'{img}'] + return f"""
<p>{title}</p> - {f'

' for img in images} -

{date}

+ {' '.join([f'' for img in images])} +

{date}

{description}

-

Source:{feed}

+

Source: {feed}

- - """ - +""", binary + def download_image(url): try: - urllib.request.urlretrieve(url, sv_path + url.split('/')[-1]) + local_name, headers = urllib.request.urlretrieve(url) stdout_write(f'Image "{url}" was downloaded.', verbose=verbose) - return sv_path + url.split('/')[-1] - except urllib.error.URLError, urllib.error.HTTPError: + return local_name + except (urllib.error.URLError, urllib.error.HTTPError): stdout_write("Error occurred during downloading image") return "" - - fb2_text = fb2_begin + fb2_desc - for news in column: - image_links = [] - for link in news["links"]: - if "(image)" in link: - image_links += [link[:-8]] - images = [] - for link in image_links: - img_path = download_image(link) - with open(img_path, 'rb') as binfile: - images += [b64encode(binfile.read()).decode()] - fb2_text += next_article(id=hash(hash(news["title"]) + randint(1, 10000)), - title=news["title"], - images=images, - date=news["date"], - description=description+'\n'+"\n".join(links), - feed=feed - ) - fb2_text += fb2_end - with open(f"{sv_path}{time()}-{randint(100)}.fb2", "w") as file: - file.write(fb2_text) + except ValueError: + stdout_write("Error: image not found") + return "" + + if sv_path: + os.chdir(sv_path) + + fb2_begin = '\n' + \ + '' + fb2_end = '' + fb2_desc = f""" + + + sci_business/genre> + + {url} + + {feed} + en + + + + {url} + + 11.11.2011 + 3.14 + {hash(time()+randint(10000000, 1000000000000))} + + + +""" + binary = [] + fb2_text = fb2_begin + fb2_desc + + for news in column: + image_links = [] + for link in news["links"]: + if "(image)" in link: + image_links += [link[:-8]] + images = [] + for link in image_links: + img_path = download_image(link) + try: + with open(img_path, 'rb') as binfile: + images += [b64encode(binfile.read()).decode()] + except FileNotFoundError: + pass + article, temp_bin = next_article(id=hash(hash(news["title"]) + randint(1, 10000)), + title=news["title"], + images=images, + date=news["date"], + description=news["text"], + feed=feed + ) + fb2_text += article + binary += temp_bin + binary = set(binary) + fb2_text += " " + for img in binary: + fb2_text += '\n'+img+'\n' + fb2_text += fb2_end + + with open(f"{str(time()).split('.')[-1]}-{randint(0, 100)}.fb2", "w") as file: + file.write(fb2_text) + + def to_html(self, feed, column, url, sv_path, verbose=False): + pass diff --git a/project/reader.py b/project/reader.py index 84a5174..acd334c 100644 --- a/project/reader.py +++ b/project/reader.py @@ -16,12 +16,13 @@ class RSSReader(): show_json() - print news to stdout in json format """ - def __init__(self, source, limit, verbose, date): + def __init__(self, source, limit, verbose, date, sv_path): super(RSSReader, self).__init__() self.__source = source self.__limit = limit self.__verbose = verbose self.__date = date + self.__sv_path = sv_path self.__text = "" def __find_news(self): @@ -130,3 +131,7 @@ def show_json(self): feed, column = self.__read() json_text = Converter.to_json(feed, column, self.__verbose) stdout_write(json_text) + + def save_fb2(self): + feed, column = self.__read() + Converter().to_fb2(feed, column, self.__source, self.__sv_path) \ No newline at end of file diff --git a/project/rss_reader.py b/project/rss_reader.py index 97c1528..5c63329 100755 --- a/project/rss_reader.py +++ b/project/rss_reader.py @@ -9,6 +9,8 @@ def parse_arguments(): parser.add_argument("source", type=str, help="RSS URL") parser.add_argument("--json", action="store_true", help="Print result as JSON in stdout") parser.add_argument("--verbose", action="store_true", help="Outputs verbose status messages") + parser.add_argument("--to-fb2", action="store_true", help="Save as fb2 file") + parser.add_argument("--path", type=str, help="Save news to file at entered path.") parser.add_argument("--limit", type=int, help="Limit news topics if this parameter provided") parser.add_argument("--date", type=int, help="Start work with cached data. Format YYYYMMDD") return parser.parse_args() @@ -16,10 +18,15 @@ def parse_arguments(): def main(): args = parse_arguments() - rss = RSSReader(args.source, args.limit, args.verbose, args.date) + rss = RSSReader(args.source, args.limit, args.verbose, args.date, args.path) + used = False if args.json: rss.show_json() - else: + used = True + if args.to-fb2: + rss.save_fb2() + used = True + if not used: rss.show_news() From 6124b87a34f20c8a2424ba04819e2d84de98e067 Mon Sep 17 00:00:00 2001 From: KirylDv Date: Sat, 30 Nov 2019 21:37:01 +0300 Subject: [PATCH 26/35] Now fb2 work with --path --- README.md | 4 +++- project/converter.py | 12 +++++------- project/reader.py | 5 ++++- 3 files changed, 12 insertions(+), 9 deletions(-) diff --git a/README.md b/README.md index 1ea338a..30bbcde 100644 --- a/README.md +++ b/README.md @@ -1,3 +1,5 @@ #One-shot RSS reader -with flag --date take date in format YYYYMMDD and return cached news with that publication date +--date - take date in format YYYYMMDD and return cached news with that publication date +--to-fb2 - convert output to fb2 format +--path - choose path for file saving mods \ No newline at end of file diff --git a/project/converter.py b/project/converter.py index 3590d2b..c8e79e0 100644 --- a/project/converter.py +++ b/project/converter.py @@ -42,7 +42,7 @@ def to_json(self, feed, column, verbose): json_text += ']\n}' return json_text - def to_fb2(self, feed, column, url, sv_path, verbose=False): + def to_fb2(self, feed, column, url, sv_path=os.getcwd(), verbose=False): def next_article(id, title, images, description, feed, date="Unknown"): binary = [] @@ -61,7 +61,7 @@ def next_article(id, title, images, description, feed, date="Unknown"): def download_image(url): try: - local_name, headers = urllib.request.urlretrieve(url) + local_name, headers = urllib.request.urlretrieve(url, sv_path + url.split('/')[-1]) stdout_write(f'Image "{url}" was downloaded.', verbose=verbose) return local_name except (urllib.error.URLError, urllib.error.HTTPError): @@ -71,9 +71,6 @@ def download_image(url): stdout_write("Error: image not found") return "" - if sv_path: - os.chdir(sv_path) - fb2_begin = '\n' + \ '' @@ -129,8 +126,9 @@ def download_image(url): for img in binary: fb2_text += '\n'+img+'\n' fb2_text += fb2_end - - with open(f"{str(time()).split('.')[-1]}-{randint(0, 100)}.fb2", "w") as file: + file_path = f"{sv_path}/{hash(time())}-{randint(0, 100)}.fb2" + open(file_path, 'a').close() + with open(file_path, "w") as file: file.write(fb2_text) def to_html(self, feed, column, url, sv_path, verbose=False): diff --git a/project/reader.py b/project/reader.py index acd334c..88b2445 100644 --- a/project/reader.py +++ b/project/reader.py @@ -134,4 +134,7 @@ def show_json(self): def save_fb2(self): feed, column = self.__read() - Converter().to_fb2(feed, column, self.__source, self.__sv_path) \ No newline at end of file + if self.__sv_path: + Converter().to_fb2(feed, column, self.__source, self.__sv_path) + else: + Converter().to_fb2(feed, column, self.__source) \ No newline at end of file From c68c60f1725e07508a78cb72420b70d60d53a0cc Mon Sep 17 00:00:00 2001 From: KirylDv Date: Sat, 30 Nov 2019 21:53:29 +0300 Subject: [PATCH 27/35] Add links to fb2 --- README.md | 7 ++++--- project/converter.py | 22 +++++++++++++++++++--- project/reader.py | 9 ++++++++- project/rss_reader.py | 8 ++++++-- rss_reader.py | 38 ++++++++++++++++++++++++++++++++++++++ 5 files changed, 75 insertions(+), 9 deletions(-) create mode 100644 rss_reader.py diff --git a/README.md b/README.md index 30bbcde..f4e6edc 100644 --- a/README.md +++ b/README.md @@ -1,5 +1,6 @@ #One-shot RSS reader ---date - take date in format YYYYMMDD and return cached news with that publication date ---to-fb2 - convert output to fb2 format ---path - choose path for file saving mods \ No newline at end of file +##--date - take date in format YYYYMMDD and return cached news with that publication date +##--to_fb2 - convert output to fb2 format +##--to_html - convert output to html format +##--path - choose path for file saving mods \ No newline at end of file diff --git a/project/converter.py b/project/converter.py index c8e79e0..7872a38 100644 --- a/project/converter.py +++ b/project/converter.py @@ -101,9 +101,12 @@ def download_image(url): for news in column: image_links = [] + text_links = [] for link in news["links"]: if "(image)" in link: image_links += [link[:-8]] + else: + text_links += [link[:-7]] images = [] for link in image_links: img_path = download_image(link) @@ -116,7 +119,7 @@ def download_image(url): title=news["title"], images=images, date=news["date"], - description=news["text"], + description=news["text"] + 'links' + "\n".join(text_links), feed=feed ) fb2_text += article @@ -131,5 +134,18 @@ def download_image(url): with open(file_path, "w") as file: file.write(fb2_text) - def to_html(self, feed, column, url, sv_path, verbose=False): - pass + def to_html(self, feed, column, url, sv_path=os.getcwd(), verbose=False): + + def download_image(url): + try: + local_name, headers = urllib.request.urlretrieve(url, sv_path + url.split('/')[-1]) + stdout_write(f'Image "{url}" was downloaded.', verbose=verbose) + return local_name + except (urllib.error.URLError, urllib.error.HTTPError): + stdout_write("Error occurred during downloading image") + return "" + except ValueError: + stdout_write("Error: image not found") + return "" + + diff --git a/project/reader.py b/project/reader.py index 88b2445..6491d99 100644 --- a/project/reader.py +++ b/project/reader.py @@ -137,4 +137,11 @@ def save_fb2(self): if self.__sv_path: Converter().to_fb2(feed, column, self.__source, self.__sv_path) else: - Converter().to_fb2(feed, column, self.__source) \ No newline at end of file + Converter().to_fb2(feed, column, self.__source) + + def save_html(self): + feed, column = self.__read() + if self.__sv_path: + Converter().to_html(feed, column, self.__source, self.__sv_path) + else: + Converter().to_html(feed, column, self.__source diff --git a/project/rss_reader.py b/project/rss_reader.py index 5c63329..aa55de7 100755 --- a/project/rss_reader.py +++ b/project/rss_reader.py @@ -9,7 +9,8 @@ def parse_arguments(): parser.add_argument("source", type=str, help="RSS URL") parser.add_argument("--json", action="store_true", help="Print result as JSON in stdout") parser.add_argument("--verbose", action="store_true", help="Outputs verbose status messages") - parser.add_argument("--to-fb2", action="store_true", help="Save as fb2 file") + parser.add_argument("--to_fb2", action="store_true", help="Save as fb2 file") + parser.add_argument("--to_html", action="store_true", help="Save as html file") parser.add_argument("--path", type=str, help="Save news to file at entered path.") parser.add_argument("--limit", type=int, help="Limit news topics if this parameter provided") parser.add_argument("--date", type=int, help="Start work with cached data. Format YYYYMMDD") @@ -23,9 +24,12 @@ def main(): if args.json: rss.show_json() used = True - if args.to-fb2: + if args.to_fb2: rss.save_fb2() used = True + if args.to_html: + rss.save_html() + used = True if not used: rss.show_news() diff --git a/rss_reader.py b/rss_reader.py new file mode 100644 index 0000000..4e6c3f7 --- /dev/null +++ b/rss_reader.py @@ -0,0 +1,38 @@ +#!/usr/bin/env python3 +import argparse +from project.reader import RSSReader + + +def parse_arguments(): + parser = argparse.ArgumentParser() + parser.add_argument("--version", action='version', help="Print version info", version="Version 0.3") + parser.add_argument("source", type=str, help="RSS URL") + parser.add_argument("--json", action="store_true", help="Print result as JSON in stdout") + parser.add_argument("--verbose", action="store_true", help="Outputs verbose status messages") + parser.add_argument("--to_fb2", action="store_true", help="Save as fb2 file") + parser.add_argument("--to_html", action="store_true", help="Save as html file") + parser.add_argument("--path", type=str, help="Save news to file at entered path.") + parser.add_argument("--limit", type=int, help="Limit news topics if this parameter provided") + parser.add_argument("--date", type=int, help="Start work with cached data. Format YYYYMMDD") + return parser.parse_args() + + +def main(): + args = parse_arguments() + rss = RSSReader(args.source, args.limit, args.verbose, args.date, args.path) + used = False + if args.json: + rss.show_json() + used = True + if args.to_fb2: + rss.save_fb2() + used = True + if args.to_html: + rss.save_html() + used = True + if not used: + rss.show_news() + + +if __name__ == "__main__": + main() From 677b7bf404ba0df1377f93604651cfc1ed5ce849 Mon Sep 17 00:00:00 2001 From: KirylDv Date: Sun, 1 Dec 2019 16:09:04 +0300 Subject: [PATCH 28/35] Add: convertion to html --- project/converter.py | 48 ++++++++++++++++++++++++++++++++++++++++++++ project/reader.py | 8 ++++---- rss_reader.py | 38 ----------------------------------- 3 files changed, 52 insertions(+), 42 deletions(-) delete mode 100644 rss_reader.py diff --git a/project/converter.py b/project/converter.py index 7872a38..7f6d7dd 100644 --- a/project/converter.py +++ b/project/converter.py @@ -148,4 +148,52 @@ def download_image(url): stdout_write("Error: image not found") return "" + def next_article(title, images, description, feed, links, date="Unknown"): + return f""" +
+

{title}

+ {' '.join(f'Not found' for img in images)} +

{description}

+ {' '.join(f'link ' for link in links)} +

Date: {date}

+
+ """ + def create_html(feed, main_part): + return f""" + + + + {feed} + + +{main_part} + + +""" + + html_text = "" + for news in column: + image_links = [] + text_links = [] + for link in news["links"]: + if "(image)" in link: + image_links += [link[:-8]] + else: + text_links += [link[:-7]] + images = [] + for link in image_links: + img_path = download_image(link) + images += [img_path] + html_text += next_article(links=text_links, + title=news["title"], + images=images, + date=news["date"], + description=news["text"], + feed=feed + ) + html_text = create_html(feed, html_text) + file_path = f"{sv_path}/{hash(time())}-{randint(0, 100)}.html" + open(file_path, 'a').close() + with open(file_path, "w") as file: + file.write(html_text) diff --git a/project/reader.py b/project/reader.py index 6491d99..082b942 100644 --- a/project/reader.py +++ b/project/reader.py @@ -135,13 +135,13 @@ def show_json(self): def save_fb2(self): feed, column = self.__read() if self.__sv_path: - Converter().to_fb2(feed, column, self.__source, self.__sv_path) + Converter().to_fb2(feed, column, self.__source, self.__sv_path, verbose=self.__verbose) else: - Converter().to_fb2(feed, column, self.__source) + Converter().to_fb2(feed, column, self.__source, verbose=self.__verbose) def save_html(self): feed, column = self.__read() if self.__sv_path: - Converter().to_html(feed, column, self.__source, self.__sv_path) + Converter().to_html(feed, column, self.__source, self.__sv_path, verbose=self.__verbose) else: - Converter().to_html(feed, column, self.__source + Converter().to_html(feed, column, self.__source, verbose=self.__verbose) diff --git a/rss_reader.py b/rss_reader.py deleted file mode 100644 index 4e6c3f7..0000000 --- a/rss_reader.py +++ /dev/null @@ -1,38 +0,0 @@ -#!/usr/bin/env python3 -import argparse -from project.reader import RSSReader - - -def parse_arguments(): - parser = argparse.ArgumentParser() - parser.add_argument("--version", action='version', help="Print version info", version="Version 0.3") - parser.add_argument("source", type=str, help="RSS URL") - parser.add_argument("--json", action="store_true", help="Print result as JSON in stdout") - parser.add_argument("--verbose", action="store_true", help="Outputs verbose status messages") - parser.add_argument("--to_fb2", action="store_true", help="Save as fb2 file") - parser.add_argument("--to_html", action="store_true", help="Save as html file") - parser.add_argument("--path", type=str, help="Save news to file at entered path.") - parser.add_argument("--limit", type=int, help="Limit news topics if this parameter provided") - parser.add_argument("--date", type=int, help="Start work with cached data. Format YYYYMMDD") - return parser.parse_args() - - -def main(): - args = parse_arguments() - rss = RSSReader(args.source, args.limit, args.verbose, args.date, args.path) - used = False - if args.json: - rss.show_json() - used = True - if args.to_fb2: - rss.save_fb2() - used = True - if args.to_html: - rss.save_html() - used = True - if not used: - rss.show_news() - - -if __name__ == "__main__": - main() From b60845a09751c98217884d722378c4a1a64c840b Mon Sep 17 00:00:00 2001 From: KirylDv Date: Sun, 1 Dec 2019 16:23:54 +0300 Subject: [PATCH 29/35] Add docstrings for html & fb2 --- README.md | 8 ++++---- project/converter.py | 34 +++++++++++++++++++++++++++------- project/reader.py | 10 ++++++---- 3 files changed, 37 insertions(+), 15 deletions(-) diff --git a/README.md b/README.md index f4e6edc..c1bc424 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ #One-shot RSS reader -##--date - take date in format YYYYMMDD and return cached news with that publication date -##--to_fb2 - convert output to fb2 format -##--to_html - convert output to html format -##--path - choose path for file saving mods \ No newline at end of file +1. --date - take date in format YYYYMMDD and return cached news with that publication date +2. --to_fb2 - convert output to fb2 format +3. --to_html - convert output to html format +4. --path - choose path for file saving mods \ No newline at end of file diff --git a/project/converter.py b/project/converter.py index 7f6d7dd..8a6a15d 100644 --- a/project/converter.py +++ b/project/converter.py @@ -43,8 +43,17 @@ def to_json(self, feed, column, verbose): return json_text def to_fb2(self, feed, column, url, sv_path=os.getcwd(), verbose=False): - + """Function convert data to fb2 and save as file + Params: + feed - rss_channel feed + column - data from rss_channel + sv_path - path for html doc + url - link to source + """ def next_article(id, title, images, description, feed, date="Unknown"): + """return code for single article and + binary files for used images + """ binary = [] for img in images: binary += [f'{img}'] @@ -60,8 +69,10 @@ def next_article(id, title, images, description, feed, date="Unknown"): """, binary def download_image(url): + """download image from Internet to your PC""" try: - local_name, headers = urllib.request.urlretrieve(url, sv_path + url.split('/')[-1]) + local_name, headers = urllib.request.urlretrieve( + url, sv_path + '/' + url.split('/')[-1]) stdout_write(f'Image "{url}" was downloaded.', verbose=verbose) return local_name except (urllib.error.URLError, urllib.error.HTTPError): @@ -119,7 +130,8 @@ def download_image(url): title=news["title"], images=images, date=news["date"], - description=news["text"] + 'links' + "\n".join(text_links), + description=news["text"] + + 'links' + "\n".join(text_links), feed=feed ) fb2_text += article @@ -134,11 +146,18 @@ def download_image(url): with open(file_path, "w") as file: file.write(fb2_text) - def to_html(self, feed, column, url, sv_path=os.getcwd(), verbose=False): - + def to_html(self, feed, column, sv_path=os.getcwd(), verbose=False): + """Function convert data to html and save as file + Params: + feed - rss_channel feed + column - data from rss_channel + sv_path - path for html doc + """ def download_image(url): + """download image from Internet to your PC""" try: - local_name, headers = urllib.request.urlretrieve(url, sv_path + url.split('/')[-1]) + local_name, headers = urllib.request.urlretrieve(url, + sv_path + '/' + url.split('/')[-1]) stdout_write(f'Image "{url}" was downloaded.', verbose=verbose) return local_name except (urllib.error.URLError, urllib.error.HTTPError): @@ -149,6 +168,7 @@ def download_image(url): return "" def next_article(title, images, description, feed, links, date="Unknown"): + """create html-code for single article""" return f"""

{title}

@@ -192,7 +212,7 @@ def create_html(feed, main_part): description=news["text"], feed=feed ) - html_text = create_html(feed, html_text) + html_text = create_html(feed, html_text) file_path = f"{sv_path}/{hash(time())}-{randint(0, 100)}.html" open(file_path, 'a').close() with open(file_path, "w") as file: diff --git a/project/reader.py b/project/reader.py index 082b942..4576a63 100644 --- a/project/reader.py +++ b/project/reader.py @@ -109,7 +109,7 @@ def __read(self): return self.__find_news() def show_news(self): - """Read and print info in stdout""" + """Read data and print info in stdout""" feed, column = self.__read() stdout_write(f"{feed}", end="\n\n") for news in column: @@ -127,12 +127,13 @@ def show_news(self): stdout_write("\n\n") def show_json(self): - """Read, parse, convert into json and print info in stdout""" + """Read data, convert into json and print info in stdout""" feed, column = self.__read() json_text = Converter.to_json(feed, column, self.__verbose) stdout_write(json_text) def save_fb2(self): + """Read data, convert to fb2 & save it as file""" feed, column = self.__read() if self.__sv_path: Converter().to_fb2(feed, column, self.__source, self.__sv_path, verbose=self.__verbose) @@ -140,8 +141,9 @@ def save_fb2(self): Converter().to_fb2(feed, column, self.__source, verbose=self.__verbose) def save_html(self): + """Read data, convert to fb2 & save it into files""" feed, column = self.__read() if self.__sv_path: - Converter().to_html(feed, column, self.__source, self.__sv_path, verbose=self.__verbose) + Converter().to_html(feed, column, self.__sv_path, verbose=self.__verbose) else: - Converter().to_html(feed, column, self.__source, verbose=self.__verbose) + Converter().to_html(feed, column, verbose=self.__verbose) From 01fb821dbd32302c7f99d19f10a98eab23b525d7 Mon Sep 17 00:00:00 2001 From: KirylDv Date: Sun, 1 Dec 2019 16:38:17 +0300 Subject: [PATCH 30/35] Add: verbose --- project/converter.py | 56 +++++++++++++++++++++----------------------- 1 file changed, 27 insertions(+), 29 deletions(-) diff --git a/project/converter.py b/project/converter.py index 8a6a15d..1a56c11 100644 --- a/project/converter.py +++ b/project/converter.py @@ -7,6 +7,22 @@ import urllib.error +def _download_image(url, verbose): + """download image from Internet to your PC""" + stdout_write("Downloading image", verbose=verbose) + try: + local_name, headers = urllib.request.urlretrieve( + url, sv_path + '/' + url.split('/')[-1]) + stdout_write(f'Image "{url}" was downloaded.', verbose=verbose) + return local_name + except (urllib.error.URLError, urllib.error.HTTPError): + stdout_write("Error occurred during downloading image") + return "" + except ValueError: + stdout_write("Error: image not found") + return "" + + class Converter(): """Converter class. Convert data to some format""" @@ -54,6 +70,7 @@ def next_article(id, title, images, description, feed, date="Unknown"): """return code for single article and binary files for used images """ + stdout_write("Converting an article...", verbose=verbose) binary = [] for img in images: binary += [f'{img}'] @@ -68,20 +85,7 @@ def next_article(id, title, images, description, feed, date="Unknown"):
""", binary - def download_image(url): - """download image from Internet to your PC""" - try: - local_name, headers = urllib.request.urlretrieve( - url, sv_path + '/' + url.split('/')[-1]) - stdout_write(f'Image "{url}" was downloaded.', verbose=verbose) - return local_name - except (urllib.error.URLError, urllib.error.HTTPError): - stdout_write("Error occurred during downloading image") - return "" - except ValueError: - stdout_write("Error: image not found") - return "" - + stdout_write("Creating FB2 file", verbose=verbose) fb2_begin = '\n' + \ '' @@ -110,6 +114,7 @@ def download_image(url): binary = [] fb2_text = fb2_begin + fb2_desc + stdout_write("Convert news", verbose=verbose) for news in column: image_links = [] text_links = [] @@ -120,7 +125,7 @@ def download_image(url): text_links += [link[:-7]] images = [] for link in image_links: - img_path = download_image(link) + img_path = _download_image(link, verbose) try: with open(img_path, 'rb') as binfile: images += [b64encode(binfile.read()).decode()] @@ -136,15 +141,19 @@ def download_image(url): ) fb2_text += article binary += temp_bin + stdout_write("Text data converted", verbose=verbose) binary = set(binary) fb2_text += " " for img in binary: fb2_text += '\n'+img+'\n' fb2_text += fb2_end + stdout_write("Add binary part", verbose=verbose) + file_path = f"{sv_path}/{hash(time())}-{randint(0, 100)}.fb2" open(file_path, 'a').close() with open(file_path, "w") as file: file.write(fb2_text) + stdout_write("FB2 document created", verbose=verbose) def to_html(self, feed, column, sv_path=os.getcwd(), verbose=False): """Function convert data to html and save as file @@ -153,19 +162,6 @@ def to_html(self, feed, column, sv_path=os.getcwd(), verbose=False): column - data from rss_channel sv_path - path for html doc """ - def download_image(url): - """download image from Internet to your PC""" - try: - local_name, headers = urllib.request.urlretrieve(url, - sv_path + '/' + url.split('/')[-1]) - stdout_write(f'Image "{url}" was downloaded.', verbose=verbose) - return local_name - except (urllib.error.URLError, urllib.error.HTTPError): - stdout_write("Error occurred during downloading image") - return "" - except ValueError: - stdout_write("Error: image not found") - return "" def next_article(title, images, description, feed, links, date="Unknown"): """create html-code for single article""" @@ -180,6 +176,7 @@ def next_article(title, images, description, feed, links, date="Unknown"): """ def create_html(feed, main_part): + stdout_write("Finish HTML document", verbose=verbose) return f""" @@ -193,6 +190,7 @@ def create_html(feed, main_part): """ html_text = "" + stdout_write("Creating HTML version", verbose=verbose) for news in column: image_links = [] text_links = [] @@ -203,7 +201,7 @@ def create_html(feed, main_part): text_links += [link[:-7]] images = [] for link in image_links: - img_path = download_image(link) + img_path = _download_image(link, verbose) images += [img_path] html_text += next_article(links=text_links, title=news["title"], From fc84232baa3e14205f304d12f71ca1752517aff2 Mon Sep 17 00:00:00 2001 From: KirylDv Date: Sun, 1 Dec 2019 16:40:47 +0300 Subject: [PATCH 31/35] 4th Iteration --- project/rss_reader.py | 2 +- setup.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/project/rss_reader.py b/project/rss_reader.py index aa55de7..add51b0 100755 --- a/project/rss_reader.py +++ b/project/rss_reader.py @@ -5,7 +5,7 @@ def parse_arguments(): parser = argparse.ArgumentParser() - parser.add_argument("--version", action='version', help="Print version info", version="Version 0.3") + parser.add_argument("--version", action='version', help="Print version info", version="Version 0.4") parser.add_argument("source", type=str, help="RSS URL") parser.add_argument("--json", action="store_true", help="Print result as JSON in stdout") parser.add_argument("--verbose", action="store_true", help="Outputs verbose status messages") diff --git a/setup.py b/setup.py index 83a412d..9901c4b 100644 --- a/setup.py +++ b/setup.py @@ -2,7 +2,7 @@ setup( name='RSSReader_Kiryl', - version='0.3', + version='0.4', url='https://github.com/KirylDv/PythonHomework/tree/FinalTask', packages=find_packages(), python_requires='>=3.6', From 68ff764e25c4b32a4862c5df637c0b6b1c7cc4eb Mon Sep 17 00:00:00 2001 From: KirylDv Date: Sun, 1 Dec 2019 17:43:41 +0300 Subject: [PATCH 32/35] Add colors --- project/SQL_cache.py | 9 +++++---- project/converter.py | 32 ++++++++++++++++---------------- project/log_helper.py | 21 +++++++++++++++++++-- project/reader.py | 37 +++++++++++++++++++------------------ project/rss_reader.py | 3 ++- 5 files changed, 61 insertions(+), 41 deletions(-) diff --git a/project/SQL_cache.py b/project/SQL_cache.py index c039b00..831264c 100644 --- a/project/SQL_cache.py +++ b/project/SQL_cache.py @@ -1,6 +1,7 @@ import sqlite3 from os.path import exists import sys +from .log_helper import stdout_write, write_progressbar class Database(): @@ -31,7 +32,7 @@ def _open(self): def _close(self): self.conn.close() - def write_data(self, data, feed, url, verbose): + def write_data(self, data, feed, url, verbose, color): """Write news to database Params: data: turple - article data @@ -64,11 +65,11 @@ def write_data(self, data, feed, url, verbose): except sqlite3.IntegrityError: pass except sqlite3.DatabaseError: - print("Database error") + stdout_write("Database error", color="red", colorize=color) finally: self._close() - def read_data(self, url, date): + def read_data(self, url, date, color): """Get url & date Return feed & data """ @@ -84,7 +85,7 @@ def read_data(self, url, date): """) data = self.cursor.fetchall() except Exception as e: - print("Database reading error", e) + stdout_write(f"Database reading error {e}", color="red", colorize=color) sys.exit() finally: self._close() diff --git a/project/converter.py b/project/converter.py index 1a56c11..6c2de6a 100644 --- a/project/converter.py +++ b/project/converter.py @@ -7,19 +7,19 @@ import urllib.error -def _download_image(url, verbose): +def _download_image(url, verbose, color): """download image from Internet to your PC""" - stdout_write("Downloading image", verbose=verbose) + stdout_write("Downloading image", verbose=verbose, color="blue", colorize=color) try: local_name, headers = urllib.request.urlretrieve( url, sv_path + '/' + url.split('/')[-1]) - stdout_write(f'Image "{url}" was downloaded.', verbose=verbose) + stdout_write(f'Image "{url}" was downloaded.', verbose=verbose, color="green", colorize=color) return local_name except (urllib.error.URLError, urllib.error.HTTPError): - stdout_write("Error occurred during downloading image") + stdout_write("Error occurred during downloading image", color="red", colorize=color) return "" except ValueError: - stdout_write("Error: image not found") + stdout_write("Error: image not found", color="red", colorize=color) return "" @@ -28,7 +28,7 @@ class Converter(): def to_json(self, feed, column, verbose): """Take data and return it in json""" - stdout_write("Convert to json...", verbose=verbose) + stdout_write("Convert to json...", verbose=verbose, color="blue", colorize=color) counter = 0 if verbose: write_progressbar(len(column), counter) @@ -58,7 +58,7 @@ def to_json(self, feed, column, verbose): json_text += ']\n}' return json_text - def to_fb2(self, feed, column, url, sv_path=os.getcwd(), verbose=False): + def to_fb2(self, feed, column, url, sv_path=os.getcwd(), verbose=False, color=False): """Function convert data to fb2 and save as file Params: feed - rss_channel feed @@ -70,7 +70,7 @@ def next_article(id, title, images, description, feed, date="Unknown"): """return code for single article and binary files for used images """ - stdout_write("Converting an article...", verbose=verbose) + stdout_write("Converting an article...", verbose=verbose, color="blue", colorize=color) binary = [] for img in images: binary += [f'{img}'] @@ -85,7 +85,7 @@ def next_article(id, title, images, description, feed, date="Unknown"): """, binary - stdout_write("Creating FB2 file", verbose=verbose) + stdout_write("Creating FB2 file", verbose=verbose, color="blue", colorize=color) fb2_begin = '\n' + \ '' @@ -114,7 +114,7 @@ def next_article(id, title, images, description, feed, date="Unknown"): binary = [] fb2_text = fb2_begin + fb2_desc - stdout_write("Convert news", verbose=verbose) + stdout_write("Convert news", verbose=verbose, color="blue", colorize=color) for news in column: image_links = [] text_links = [] @@ -141,21 +141,21 @@ def next_article(id, title, images, description, feed, date="Unknown"): ) fb2_text += article binary += temp_bin - stdout_write("Text data converted", verbose=verbose) + stdout_write("Text data converted", verbose=verbose, color="green", colorize=color) binary = set(binary) fb2_text += " " for img in binary: fb2_text += '\n'+img+'\n' fb2_text += fb2_end - stdout_write("Add binary part", verbose=verbose) + stdout_write("Add binary part", verbose=verbose, color="green", colorize=color) file_path = f"{sv_path}/{hash(time())}-{randint(0, 100)}.fb2" open(file_path, 'a').close() with open(file_path, "w") as file: file.write(fb2_text) - stdout_write("FB2 document created", verbose=verbose) + stdout_write("FB2 document created", verbose=verbose, color="green", colorize=color) - def to_html(self, feed, column, sv_path=os.getcwd(), verbose=False): + def to_html(self, feed, column, sv_path=os.getcwd(), verbose=False, color=False): """Function convert data to html and save as file Params: feed - rss_channel feed @@ -176,7 +176,6 @@ def next_article(title, images, description, feed, links, date="Unknown"): """ def create_html(feed, main_part): - stdout_write("Finish HTML document", verbose=verbose) return f""" @@ -190,7 +189,7 @@ def create_html(feed, main_part): """ html_text = "" - stdout_write("Creating HTML version", verbose=verbose) + stdout_write("Creating HTML version", verbose=verbose, color="blue", colorize=color) for news in column: image_links = [] text_links = [] @@ -215,3 +214,4 @@ def create_html(feed, main_part): open(file_path, 'a').close() with open(file_path, "w") as file: file.write(html_text) + stdout_write("Finish HTML document", verbose=verbose, color="green", colorize=color) diff --git a/project/log_helper.py b/project/log_helper.py index dc10c6f..207b675 100644 --- a/project/log_helper.py +++ b/project/log_helper.py @@ -1,8 +1,25 @@ -def stdout_write(string, sep=' ', end='\n', flush=False, verbose=True): +def stdout_write(string, sep=' ', end='\n', flush=False, verbose=True, color="", colorize=False): """Output function for singe string but convert ' to '""" + if colorize: + RED = '\033[31m' + BLUE = '\033[34m' + GREEN = '\033[92m' + RESET = '\033[0m' + + if color == "red": + color = RED + elif color == "blue": + color = BLUE + elif color == "green": + color = GREEN + else: + color, RESET = "", "" + else: + RED, BLUE, GREEN, RESET = "", "", "", "" + if verbose: string = string.replace("'", "'") - print(string, sep=sep, end=end, flush=flush) + print(color+string+RESET, sep=sep, end=end, flush=flush) def write_progressbar(elems, done, length=20): diff --git a/project/reader.py b/project/reader.py index 4576a63..18c6497 100644 --- a/project/reader.py +++ b/project/reader.py @@ -16,24 +16,25 @@ class RSSReader(): show_json() - print news to stdout in json format """ - def __init__(self, source, limit, verbose, date, sv_path): + def __init__(self, source, limit, verbose, date, sv_path, colorize): super(RSSReader, self).__init__() self.__source = source self.__limit = limit self.__verbose = verbose self.__date = date self.__sv_path = sv_path + self.__colorize = colorize self.__text = "" def __find_news(self): """Ask database for news from entered date Return data in the same format with __parse function """ - output("Reading data from database...", verbose=self.__verbose) - feed, data = Database().read_data(self.__source, self.__date) + stdout_write("Reading data from database...", verbose=self.__verbose, color="blue", colorize=self.__colorize) + feed, data = Database().read_data(self.__source, self.__date, self.__colorize) column = [] if not data: - stdout_write("Error: Articles from the entered date not found") + stdout_write("Error: Articles from the entered date not found", color="red", colorize=self.__colorize) sys.exit() counter = 0 if self.__verbose: @@ -50,34 +51,34 @@ def __find_news(self): def __cache_data(self, column, feed): """Take parsed data and write it to database""" - stdout_write("Writing data to database...", verbose=self.__verbose) + stdout_write("Writing data to database...", verbose=self.__verbose, color="blue", colorize=self.__colorize) date = lambda pubDate: dateutil.parser.parse(pubDate).strftime("%Y%m%d") formated_data = [ (self.__source, date(col["date"]), col["title"], col["link"], col["text"], "\n".join(col["links"])) for col in column] - Database().write_data(formated_data, feed, self.__source, self.__verbose) + Database().write_data(formated_data, feed, self.__source, self.__verbose, self.__colorize) def __read_news(self): """Read data from link""" try: - stdout_write(f"Reading information from {self.__source}", end='...\n', verbose=self.__verbose) + stdout_write(f"Reading information from {self.__source}", end='...\n', verbose=self.__verbose, color="blue", colorize=self.__colorize) with urllib.request.urlopen(self.__source) as rss: bytestr = rss.read() self.__text = bytestr.decode("utf8") - stdout_write("Complete.", verbose=self.__verbose) + stdout_write("Complete.", verbose=self.__verbose,, color="green", colorize=self.__colorize) except ValueError: - stdout_write("Error: Can't connect, please try with https://") + stdout_write("Error: Can't connect, please try with https://", color="red", colorize=self.__colorize) sys.exit() except urllib.error.URLError: - stdout_write("Error: Can't connect to web-site, please check URL") + stdout_write("Error: Can't connect to web-site, please check URL", color="red", colorize=self.__colorize) sys.exit() except Exception: - stdout_write("Unknown error") + stdout_write("Unknown error", color="red", colorize=self.__colorize) sys.exit() def __parse(self): """Parse XML data to python structures""" - stdout_write("Parsing information...", verbose=self.__verbose) + stdout_write("Parsing information...", verbose=self.__verbose, color="blue", colorize=self.__colorize) xml = parseString(self.__text) feed = xml.getElementsByTagName("title")[0].firstChild.data items = xml.getElementsByTagName("item") @@ -122,28 +123,28 @@ def show_news(self): stdout_write("Links:") link_num = 1 for link in news['links']: - stdout_write(f"[{link_num}]: {link}") + stdout_write(f"[{link_num}]: {link}", color="blue", colorize=self.__colorize) link_num += 1 stdout_write("\n\n") def show_json(self): """Read data, convert into json and print info in stdout""" feed, column = self.__read() - json_text = Converter.to_json(feed, column, self.__verbose) + json_text = Converter.to_json(feed, column, self.__verbose, color=self.__colorize) stdout_write(json_text) def save_fb2(self): """Read data, convert to fb2 & save it as file""" feed, column = self.__read() if self.__sv_path: - Converter().to_fb2(feed, column, self.__source, self.__sv_path, verbose=self.__verbose) + Converter().to_fb2(feed, column, self.__source, self.__sv_path, verbose=self.__verbose, color=self.__colorize) else: - Converter().to_fb2(feed, column, self.__source, verbose=self.__verbose) + Converter().to_fb2(feed, column, self.__source, verbose=self.__verbose, color=self.__colorize) def save_html(self): """Read data, convert to fb2 & save it into files""" feed, column = self.__read() if self.__sv_path: - Converter().to_html(feed, column, self.__sv_path, verbose=self.__verbose) + Converter().to_html(feed, column, self.__sv_path, verbose=self.__verbose, color=self.__colorize) else: - Converter().to_html(feed, column, verbose=self.__verbose) + Converter().to_html(feed, column, verbose=self.__verbose, color=self.__colorize) diff --git a/project/rss_reader.py b/project/rss_reader.py index add51b0..c5ed011 100755 --- a/project/rss_reader.py +++ b/project/rss_reader.py @@ -11,6 +11,7 @@ def parse_arguments(): parser.add_argument("--verbose", action="store_true", help="Outputs verbose status messages") parser.add_argument("--to_fb2", action="store_true", help="Save as fb2 file") parser.add_argument("--to_html", action="store_true", help="Save as html file") + parser.add_argument("--colorize",action="store_true", help="Add colors to console output") parser.add_argument("--path", type=str, help="Save news to file at entered path.") parser.add_argument("--limit", type=int, help="Limit news topics if this parameter provided") parser.add_argument("--date", type=int, help="Start work with cached data. Format YYYYMMDD") @@ -19,7 +20,7 @@ def parse_arguments(): def main(): args = parse_arguments() - rss = RSSReader(args.source, args.limit, args.verbose, args.date, args.path) + rss = RSSReader(args.source, args.limit, args.verbose, args.date, args.path, args.colorize) used = False if args.json: rss.show_json() From c116d2e60571b0536db91ed989fe2b1372bc091a Mon Sep 17 00:00:00 2001 From: KirylDv Date: Sun, 1 Dec 2019 17:45:24 +0300 Subject: [PATCH 33/35] 5th Iteration --- project/reader.py | 2 ++ project/rss_reader.py | 2 +- setup.py | 2 +- 3 files changed, 4 insertions(+), 2 deletions(-) diff --git a/project/reader.py b/project/reader.py index 18c6497..a7b230e 100644 --- a/project/reader.py +++ b/project/reader.py @@ -14,6 +14,8 @@ class RSSReader(): Methods: show_news() - print news to stdout show_json() - print news to stdout in json format + save_fb2() - save news as fb2 file + save-html() - save news as html file """ def __init__(self, source, limit, verbose, date, sv_path, colorize): diff --git a/project/rss_reader.py b/project/rss_reader.py index c5ed011..8b45eb8 100755 --- a/project/rss_reader.py +++ b/project/rss_reader.py @@ -5,7 +5,7 @@ def parse_arguments(): parser = argparse.ArgumentParser() - parser.add_argument("--version", action='version', help="Print version info", version="Version 0.4") + parser.add_argument("--version", action='version', help="Print version info", version="Version 0.5") parser.add_argument("source", type=str, help="RSS URL") parser.add_argument("--json", action="store_true", help="Print result as JSON in stdout") parser.add_argument("--verbose", action="store_true", help="Outputs verbose status messages") diff --git a/setup.py b/setup.py index 9901c4b..3ddf13a 100644 --- a/setup.py +++ b/setup.py @@ -2,7 +2,7 @@ setup( name='RSSReader_Kiryl', - version='0.4', + version='0.5', url='https://github.com/KirylDv/PythonHomework/tree/FinalTask', packages=find_packages(), python_requires='>=3.6', From 330002becf4cbd3efce11edc7666c9b515070714 Mon Sep 17 00:00:00 2001 From: KirylDv Date: Sun, 1 Dec 2019 18:02:34 +0300 Subject: [PATCH 34/35] 5th Iteration. Strings of more than 120 characters removed --- project/log_helper.py | 2 +- project/reader.py | 8 +++++--- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/project/log_helper.py b/project/log_helper.py index 207b675..ed2dfbe 100644 --- a/project/log_helper.py +++ b/project/log_helper.py @@ -5,7 +5,7 @@ def stdout_write(string, sep=' ', end='\n', flush=False, verbose=True, color="", BLUE = '\033[34m' GREEN = '\033[92m' RESET = '\033[0m' - + if color == "red": color = RED elif color == "blue": diff --git a/project/reader.py b/project/reader.py index a7b230e..dab4318 100644 --- a/project/reader.py +++ b/project/reader.py @@ -63,11 +63,12 @@ def __cache_data(self, column, feed): def __read_news(self): """Read data from link""" try: - stdout_write(f"Reading information from {self.__source}", end='...\n', verbose=self.__verbose, color="blue", colorize=self.__colorize) + stdout_write(f"Reading information from {self.__source}", end='...\n', + verbose=self.__verbose, color="blue", colorize=self.__colorize) with urllib.request.urlopen(self.__source) as rss: bytestr = rss.read() self.__text = bytestr.decode("utf8") - stdout_write("Complete.", verbose=self.__verbose,, color="green", colorize=self.__colorize) + stdout_write("Complete.", verbose=self.__verbose, color="green", colorize=self.__colorize) except ValueError: stdout_write("Error: Can't connect, please try with https://", color="red", colorize=self.__colorize) sys.exit() @@ -139,7 +140,8 @@ def save_fb2(self): """Read data, convert to fb2 & save it as file""" feed, column = self.__read() if self.__sv_path: - Converter().to_fb2(feed, column, self.__source, self.__sv_path, verbose=self.__verbose, color=self.__colorize) + Converter().to_fb2(feed, column, self.__source, self.__sv_path, + verbose=self.__verbose, color=self.__colorize) else: Converter().to_fb2(feed, column, self.__source, verbose=self.__verbose, color=self.__colorize) From 2d5292c4f10973ebd1202c402fcb2c5b54e65fab Mon Sep 17 00:00:00 2001 From: KirylDv Date: Tue, 10 Dec 2019 19:31:48 +0300 Subject: [PATCH 35/35] Fix errors added with colorize --- project/SQL_cache.py | 6 +++--- project/converter.py | 10 +++++----- project/log_helper.py | 18 +++++++++--------- project/reader.py | 6 +++--- project/rss_reader.py | 12 ++++++++++++ 5 files changed, 32 insertions(+), 20 deletions(-) diff --git a/project/SQL_cache.py b/project/SQL_cache.py index 831264c..d46ba65 100644 --- a/project/SQL_cache.py +++ b/project/SQL_cache.py @@ -59,15 +59,15 @@ def write_data(self, data, feed, url, verbose, color): VALUES (?,?) """, (url, feed)) self.conn.commit() - counter += 1 - if verbose: - write_progressbar(len(data)+1, counter) except sqlite3.IntegrityError: pass except sqlite3.DatabaseError: stdout_write("Database error", color="red", colorize=color) finally: self._close() + counter = len(data)+1 + if verbose: + write_progressbar(len(data)+1, counter) def read_data(self, url, date, color): """Get url & date diff --git a/project/converter.py b/project/converter.py index 6c2de6a..10e9292 100644 --- a/project/converter.py +++ b/project/converter.py @@ -7,7 +7,7 @@ import urllib.error -def _download_image(url, verbose, color): +def _download_image(url, verbose, sv_path, color=False): """download image from Internet to your PC""" stdout_write("Downloading image", verbose=verbose, color="blue", colorize=color) try: @@ -26,7 +26,7 @@ def _download_image(url, verbose, color): class Converter(): """Converter class. Convert data to some format""" - def to_json(self, feed, column, verbose): + def to_json(self, feed, column, verbose, color): """Take data and return it in json""" stdout_write("Convert to json...", verbose=verbose, color="blue", colorize=color) counter = 0 @@ -42,7 +42,7 @@ def to_json(self, feed, column, verbose): if 'date' in news: json_text += '\n "date": "' + news['date'] + '",' json_text += '\n "link": "' + news['link'] + '",' - json_text += '\n "description": "' + news['links'] + '",' + json_text += '\n "description": "' + (news['text']) + '",' json_text += '\n "links": [' links = "" for lin in news['links']: @@ -125,7 +125,7 @@ def next_article(id, title, images, description, feed, date="Unknown"): text_links += [link[:-7]] images = [] for link in image_links: - img_path = _download_image(link, verbose) + img_path = _download_image(link, verbose, sv_path, color) try: with open(img_path, 'rb') as binfile: images += [b64encode(binfile.read()).decode()] @@ -200,7 +200,7 @@ def create_html(feed, main_part): text_links += [link[:-7]] images = [] for link in image_links: - img_path = _download_image(link, verbose) + img_path = _download_image(link, verbose, sv_path, color) images += [img_path] html_text += next_article(links=text_links, title=news["title"], diff --git a/project/log_helper.py b/project/log_helper.py index ed2dfbe..79d6249 100644 --- a/project/log_helper.py +++ b/project/log_helper.py @@ -5,18 +5,18 @@ def stdout_write(string, sep=' ', end='\n', flush=False, verbose=True, color="", BLUE = '\033[34m' GREEN = '\033[92m' RESET = '\033[0m' - - if color == "red": - color = RED - elif color == "blue": - color = BLUE - elif color == "green": - color = GREEN - else: - color, RESET = "", "" else: RED, BLUE, GREEN, RESET = "", "", "", "" + if color == "red": + color = RED + elif color == "blue": + color = BLUE + elif color == "green": + color = GREEN + else: + color, RESET = "", "" + if verbose: string = string.replace("'", "'") print(color+string+RESET, sep=sep, end=end, flush=flush) diff --git a/project/reader.py b/project/reader.py index dab4318..4ec4ed1 100644 --- a/project/reader.py +++ b/project/reader.py @@ -87,7 +87,7 @@ def __parse(self): items = xml.getElementsByTagName("item") counter = 0 if self.__verbose: - progress(self.__limit, counter) + write_progressbar(self.__limit, counter) column = [] for item in items: if counter == self.__limit: @@ -101,7 +101,7 @@ def __parse(self): "text": text, "links": links}] if self.__verbose: - progress(self.__limit, counter) + write_progressbar(self.__limit, counter) self.__cache_data(column, feed) return feed, column @@ -133,7 +133,7 @@ def show_news(self): def show_json(self): """Read data, convert into json and print info in stdout""" feed, column = self.__read() - json_text = Converter.to_json(feed, column, self.__verbose, color=self.__colorize) + json_text = Converter().to_json(feed, column, self.__verbose, color=self.__colorize) stdout_write(json_text) def save_fb2(self): diff --git a/project/rss_reader.py b/project/rss_reader.py index 8b45eb8..259537f 100755 --- a/project/rss_reader.py +++ b/project/rss_reader.py @@ -20,6 +20,17 @@ def parse_arguments(): def main(): args = parse_arguments() + if ' ' in args.path: + args.path = None + if args.colorize: + print('\033[31m' + 'Error: path cannot contain spaces.' + '\033[0m') + else: + print('Error: path cannot contain spaces.') + working_dir = input("Input Y to use working directory") + if 'y' in working_dir or 'Y' in working_dir: + pass + else: + return 0 rss = RSSReader(args.source, args.limit, args.verbose, args.date, args.path, args.colorize) used = False if args.json: @@ -33,6 +44,7 @@ def main(): used = True if not used: rss.show_news() + return 0 if __name__ == "__main__":