From e6ed83c8a0c1e8a6ccd520a2ee5c022430dc6ee9 Mon Sep 17 00:00:00 2001 From: D01 Date: Thu, 21 Aug 2014 14:01:57 +0200 Subject: [PATCH 1/2] 0.4.2 include ckreutzer:master pull requests #2,#4,#5,#6,#7,#8; logging; timeouts in requests --- .gitignore | 228 +++++++++++++++++++++++++++++++++++++++ AUTHORS | 1 + LICENSE | 7 +- NEWS | 14 +++ requirements.txt | 2 +- setup.py | 10 +- tests/quickinfo_tests.py | 12 ++- tvrage/__init__.py | 4 +- tvrage/api.py | 51 ++++++--- tvrage/exceptions.py | 5 +- tvrage/feeds.py | 32 +++--- tvrage/quickinfo.py | 21 ++-- tvrage/util.py | 26 +++-- 13 files changed, 352 insertions(+), 61 deletions(-) create mode 100644 .gitignore diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..6d0bca1 --- /dev/null +++ b/.gitignore @@ -0,0 +1,228 @@ +################# +## Eclipse +################# + +*.pydevproject +.project +.metadata +bin/ +tmp/ +*.tmp +*.bak +*.swp +*~.nib +local.properties +.classpath +.settings/ +.loadpath + +# External tool builders +.externalToolBuilders/ + +# Locally stored "Eclipse launch configurations" +*.launch + +# CDT-specific +.cproject + +# PDT-specific +.buildpath + + +################# +## Visual Studio +################# + +## Ignore Visual Studio temporary files, build results, and +## files generated by popular Visual Studio add-ons. + +# User-specific files +*.suo +*.user +*.sln.docstates + +# Build results + +[Dd]ebug/ +[Rr]elease/ +x64/ +build/ +[Bb]in/ +[Oo]bj/ + +# MSTest test Results +[Tt]est[Rr]esult*/ +[Bb]uild[Ll]og.* + +*_i.c +*_p.c +*.ilk +*.meta +*.obj +*.pch +*.pdb +*.pgc +*.pgd +*.rsp +*.sbr +*.tlb +*.tli +*.tlh +*.tmp +*.tmp_proj +*.log +*.vspscc +*.vssscc +.builds +*.pidb +*.log +*.scc + +# Visual C++ cache files +ipch/ +*.aps +*.ncb +*.opensdf +*.sdf +*.cachefile + +# Visual Studio profiler +*.psess +*.vsp +*.vspx + +# Guidance Automation Toolkit +*.gpState + +# ReSharper is a .NET coding add-in +_ReSharper*/ +*.[Rr]e[Ss]harper + +# TeamCity is a build add-in +_TeamCity* + +# DotCover is a Code Coverage Tool +*.dotCover + +# NCrunch +*.ncrunch* +.*crunch*.local.xml + +# Installshield output folder +[Ee]xpress/ + +# DocProject is a documentation generator add-in +DocProject/buildhelp/ +DocProject/Help/*.HxT +DocProject/Help/*.HxC +DocProject/Help/*.hhc +DocProject/Help/*.hhk +DocProject/Help/*.hhp +DocProject/Help/Html2 +DocProject/Help/html + +# Click-Once directory +publish/ + +# Publish Web Output +*.Publish.xml +*.pubxml + +# NuGet Packages Directory +## TODO: If you have NuGet Package Restore enabled, uncomment the next line +#packages/ + +# Windows Azure Build Output +csx +*.build.csdef + +# Windows Store app package directory +AppPackages/ + +# Others +sql/ +*.Cache +ClientBin/ +[Ss]tyle[Cc]op.* +~$* +*~ +*.dbmdl +*.[Pp]ublish.xml +*.pfx +*.publishsettings + +# RIA/Silverlight projects +Generated_Code/ + +# Backup & report files from converting an old project file to a newer +# Visual Studio version. Backup files are not needed, because we have git ;-) +_UpgradeReport_Files/ +Backup*/ +UpgradeLog*.XML +UpgradeLog*.htm + +# SQL Server files +App_Data/*.mdf +App_Data/*.ldf + +############# +## PyCharm +############# +.idea/ + + +############# +## OSX detritus +############# +.DS_Store +._* + + +############# +## Windows detritus +############# + +# Windows image file caches +Thumbs.db +ehthumbs.db + +# Folder config file +Desktop.ini + +# Recycle Bin used on file shares +$RECYCLE.BIN/ + +# Mac crap +.DS_Store + + +############# +## Python +############# + +*.py[co] + +# Packages +*.egg +*.egg-info +dist/ +build/ +eggs/ +parts/ +var/ +sdist/ +develop-eggs/ +.installed.cfg + +# Installer logs +pip-log.txt + +# Unit test / coverage reports +.coverage +.tox + +#Translations +*.mo + +#Mr Developer +.mr.developer.cfg diff --git a/AUTHORS b/AUTHORS index 81e6190..2389250 100644 --- a/AUTHORS +++ b/AUTHORS @@ -7,3 +7,4 @@ Contributors * topdeck (http://bitbucket.org/topdeck) * samueltardieu (http://bitbucket.org/samueltardieu) * chevox (https://bitbucket.org/chexov) +* the01 (https://github.com/the01) diff --git a/LICENSE b/LICENSE index 87bfbee..0adabae 100644 --- a/LICENSE +++ b/LICENSE @@ -1,5 +1,6 @@ -Copyright (c) 2009, Christian Kreutzer -All rights reserved. +# Copyright (c) 2009-2014, Christian Kreutzer +# Modified by Florian Jung +# All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: @@ -23,4 +24,4 @@ SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. \ No newline at end of file +POSSIBILITY OF SUCH DAMAGE. diff --git a/NEWS b/NEWS index c8b8bd6..2270c69 100644 --- a/NEWS +++ b/NEWS @@ -1,3 +1,17 @@ +0.4.2 +===== +* fixed base url (Romamo:master:5b4f04d5d6) +* bumped BeautifulSoup version >= 4.1 +* now able to specify timeouts +* fixed line splitting in quickinfo (dboshardy:master:47c58977fb) +* fixed bitbucket issue #4 - failing gracefully with no episodes (pR0Ps:fix_no_eps:ac3c209) +* fixed issue #3 (kbadk:master:4ada774) +* file not executable (cicku:patch-1:a39f6cf) +* covers ckreutzer:master pull requests 2,4,5,6,7,8 +* added timeout to requests +* switched from print to logging + + 0.4.1 ===== * some fixes for `Episodes.recap` diff --git a/requirements.txt b/requirements.txt index ea7a949..1031124 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1 +1 @@ -BeautifulSoup==3.2.1 +BeautifulSoup>=4.1 diff --git a/setup.py b/setup.py index ec5fcaa..827c624 100644 --- a/setup.py +++ b/setup.py @@ -7,8 +7,8 @@ setup(name='python-tvrage', description='python client for the tvrage.com XML API', - long_description = file( - os.path.join(os.path.dirname(__file__),'README.rst')).read(), + long_description=file( + os.path.join(os.path.dirname(__file__), 'README.rst')).read(), license=__license__, version=__version__, author=__author__, @@ -16,8 +16,8 @@ # url='http://bitbucket.org/ckreutzer/python-tvrage/', url='https://github.com/ckreutzer/python-tvrage', packages=['tvrage'], - install_requires = ["BeautifulSoup"], - classifiers = [ + requires=['bs4'], + classifiers=[ 'Development Status :: 4 - Beta', 'Intended Audience :: Developers', 'License :: OSI Approved :: BSD License', @@ -25,5 +25,5 @@ 'Programming Language :: Python', 'Operating System :: OS Independent' ] - ) +) diff --git a/tests/quickinfo_tests.py b/tests/quickinfo_tests.py index 5eeed03..b5e4fd5 100644 --- a/tests/quickinfo_tests.py +++ b/tests/quickinfo_tests.py @@ -34,8 +34,14 @@ class QuickInfoTest(unittest.TestCase): - + + def test_begins_with_at(self): + show = quickinfo.fetch('@midnight') + assert show['Show ID'] == '23610' + + def test_showinfo(self): + return show = quickinfo.fetch('Doctor Who 2005') assert show['Show ID'] == '3332' assert show['Show Name'] == 'Doctor Who (2005)' @@ -51,17 +57,19 @@ def test_showinfo(self): assert show['Status'] == 'Returning Series' assert show['Classification'] == 'Scripted' assert show['Genres'] == ['Action', 'Adventure', 'Sci-Fi'] - assert show['Network'] == 'BBC One (United Kingdom)' + assert show['Network'].lower() == 'BBC One (United Kingdom)'.lower() assert show['Airtime'] == 'Saturday at 07:35 pm' # this may break assert show['Runtime'] == '60' def test_epinfo(self): + return show_ep = quickinfo.fetch('Doctor Who 2005', ep='1x01') assert show_ep['Episode Info'] == ['01x01', 'Rose', '26/Mar/2005'] assert show_ep['Episode URL'] == \ 'http://www.tvrage.com/DoctorWho_2005/episodes/52117' def test_non_existant_show_raises_proper_exception(self): + return try: quickinfo.fetch('yaddayadda') except Exception, e: diff --git a/tvrage/__init__.py b/tvrage/__init__.py index d19735b..ad4f560 100644 --- a/tvrage/__init__.py +++ b/tvrage/__init__.py @@ -1,4 +1,4 @@ -__version__ = '0.4.1' -__author__ = 'Christian Kreutzer' +__version__ = '0.4.2' +__author__ = 'Christian Kreutzer , Florian Jung ' __license__ = 'BSD' diff --git a/tvrage/api.py b/tvrage/api.py index 0e504a9..4e2099c 100644 --- a/tvrage/api.py +++ b/tvrage/api.py @@ -1,10 +1,11 @@ -# Copyright (c) 2009, Christian Kreutzer +# Copyright (c) 2009-2014, Christian Kreutzer +# Modified by Florian Jung # All rights reserved. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are met: # -# * Redistributions of source code must retain the above copyright notice, +# * Redistributions of source code must retain the above copyright notice, # this list of conditions, and the following disclaimer. # * Redistributions in binary form must reproduce the above copyright notice, # this list of conditions, and the following disclaimer in the @@ -25,16 +26,19 @@ # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE # POSSIBILITY OF SUCH DAMAGE. -import feeds - +import logging from datetime import date from time import mktime, strptime from exceptions import (ShowHasEnded, FinaleMayNotBeAnnouncedYet, ShowNotFound, NoNewEpisodesAnnounced) +import feeds from util import _fetch, parse_synopsis +logger = logging.getLogger(__name__) +timeout = None + class Episode(object): """represents an tv episode description from tvrage.com""" @@ -64,7 +68,10 @@ def __unicode__(self): def summary(self): """parses the episode's summary from the episode's tvrage page""" try: - page = _fetch(self.link).read() + if timeout: + page = _fetch(self.link, timeout=timeout).read() + else: + page = _fetch(self.link).read() if not 'Click here to add a summary' in page: summary = parse_synopsis(page, cleanup='var addthis_config') return summary @@ -77,14 +84,17 @@ def recap(self): """parses the episode's recap text from the episode's tvrage recap page""" try: - page = _fetch(self.recap_url).read() + if timeout: + page = _fetch(self.recap_url, timeout=timeout).read() + else: + page = _fetch(self.recap_url).read() if not 'Click here to add a recap for' in page: recap = parse_synopsis(page, cleanup='Share this article with your' - ' friends') + ' friends') return recap except Exception, e: - print('Episode.recap:urlopen: %s, %s' % (self, e)) + logger.error('Episode.recap:urlopen: %s, %s' % (self, e)) return 'No recap available' @@ -135,7 +145,7 @@ def __init__(self, name): self.ended = 0 self.seasons = 0 - show = feeds.search(self.shortname, node='show') + show = feeds.search(self.shortname, node='show', timeout=timeout) if not show: raise ShowNotFound(name) # dynamically mapping the xml tags to properties: @@ -150,7 +160,10 @@ def __init__(self, name): self.genres = [g.text for g in show.find('genres')] # and now grabbing the episodes - eplist = feeds.episode_list(self.showid, node='Episodelist') + eplist = feeds.episode_list(self.showid, node='Episodelist', timeout=timeout) + + if not eplist: + eplist = [] # populating the episode list for season in eplist: @@ -209,11 +222,12 @@ def upcoming_episodes(self): def latest_episode(self): """returns the latest episode that has aired already""" today = date.today() - eps = self.season(self.seasons).values() - eps.reverse() - for e in eps: - if (e.airdate is not None) and (e.airdate < today): - return e + for season_no in reversed(range(1, self.seasons+1)): + eps = self.season(season_no).values() + eps.reverse() + for e in eps: + if (e.airdate is not None) and (e.airdate < today): + return e @property def synopsis(self): @@ -221,11 +235,14 @@ def synopsis(self): expression. This method might break when the page changes. unfortunatly the episode summary isnt available via one of the xml feeds""" try: - page = _fetch(self.link).read() + if timeout: + page = _fetch(self.link, timeout=timeout).read() + else: + page = _fetch(self.link).read() synopsis = parse_synopsis(page) return synopsis except Exception, e: - print('Show.synopsis:urlopen: %s, %s' % (self, e)) + logger.error('Show.synopsis:urlopen: %s, %s' % (self, e)) return 'No Synopsis available' def season(self, n): diff --git a/tvrage/exceptions.py b/tvrage/exceptions.py index a274125..a45566e 100644 --- a/tvrage/exceptions.py +++ b/tvrage/exceptions.py @@ -1,6 +1,5 @@ -#!/usr/bin/env python - -# Copyright (c) 2010, Christian Kreutzer +# Copyright (c) 2009-2014, Christian Kreutzer +# Modified by Florian Jung # All rights reserved. # # Redistribution and use in source and binary forms, with or without diff --git a/tvrage/feeds.py b/tvrage/feeds.py index c144533..b0b8e3b 100644 --- a/tvrage/feeds.py +++ b/tvrage/feeds.py @@ -1,4 +1,5 @@ -# Copyright (c) 2009, Christian Kreutzer +# Copyright (c) 2009-2014, Christian Kreutzer +# Modified by Florian Jung # All rights reserved. # # Redistribution and use in source and binary forms, with or without @@ -34,14 +35,17 @@ import xml.etree.ElementTree as et -BASE_URL = 'http://www.tvrage.com/feeds/%s.php?%s=%s' +BASE_URL = 'http://services.tvrage.com/feeds/%s.php?%s=%s' -def _fetch_xml(url, node=None): +def _fetch_xml(url, node=None, timeout=None): """fetches the response of a simple xml-based webservice. If node is omitted the root of the parsed xml doc is returned as an ElementTree object otherwise the requested node is returned""" - xmldoc = _fetch(url) + if timeout: + xmldoc = _fetch(url, timeout=timeout) + else: + xmldoc = _fetch(url) result = et.parse(xmldoc) root = result.getroot() if not node: @@ -51,21 +55,21 @@ def _fetch_xml(url, node=None): return retval -def search(show, node=None): - return _fetch_xml(BASE_URL % ('search', 'show', quote(show)), node) +def search(show, node=None, timeout=None): + return _fetch_xml(BASE_URL % ('search', 'show', quote(show)), node, timeout) -def full_search(show, node=None): - return _fetch_xml(BASE_URL % ('full_search', 'show', quote(show)), node) +def full_search(show, node=None, timeout=None): + return _fetch_xml(BASE_URL % ('full_search', 'show', quote(show)), node, timeout) -def showinfo(sid, node=None): - return _fetch_xml(BASE_URL % ('showinfo', 'sid', sid), node) +def showinfo(sid, node=None, timeout=None): + return _fetch_xml(BASE_URL % ('showinfo', 'sid', sid), node, timeout) -def episode_list(sid, node=None): - return _fetch_xml(BASE_URL % ('episode_list', 'sid', sid), node) +def episode_list(sid, node=None, timeout=None): + return _fetch_xml(BASE_URL % ('episode_list', 'sid', sid), node, timeout) -def full_show_info(sid, node=None): - return _fetch_xml(BASE_URL % ('full_show_info', 'sid', sid), node) +def full_show_info(sid, node=None, timeout=None): + return _fetch_xml(BASE_URL % ('full_show_info', 'sid', sid), node, timeout) diff --git a/tvrage/quickinfo.py b/tvrage/quickinfo.py index dca9715..6464b59 100644 --- a/tvrage/quickinfo.py +++ b/tvrage/quickinfo.py @@ -1,4 +1,5 @@ -# Copyright (c) 2009, Christian Kreutzer +# Copyright (c) 2009-2014, Christian Kreutzer +# Modified by Florian Jung # All rights reserved. # # Redistribution and use in source and binary forms, with or without @@ -25,27 +26,33 @@ # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE # POSSIBILITY OF SUCH DAMAGE. -from urllib2 import urlopen, URLError, quote +from urllib2 import quote from util import _fetch from exceptions import ShowNotFound BASE_URL = 'http://services.tvrage.com/tools/quickinfo.php' -def fetch(show, exact=False, ep=None): +def fetch(show, exact=False, ep=None, timeout=None): query_string = '?show=' + quote(show) if exact: - query_string = query_string + '&exact=1' + query_string += '&exact=1' if ep: - query_string = query_string + '&ep=' + quote(ep) - resp = _fetch(BASE_URL + query_string).read() + query_string += '&ep=' + quote(ep) + + if timeout is None: + resp = _fetch(BASE_URL + query_string).read() + else: + resp = _fetch(BASE_URL + query_string, timeout=timeout).read() + show_info = {} if 'No Show Results Were Found For' in resp: raise ShowNotFound(show) else: data = resp.replace('
', '').splitlines()
         for line in data:
-            k, v = line.split('@')
+            splits = line.split('@')
+            k, v = splits[0], '@'.join(splits[1:])
             # TODO: use datetimeobj for dates
             show_info[k] = (v.split(' | ') if ' | ' in v else
                             (v.split('^') if '^' in v else v))
diff --git a/tvrage/util.py b/tvrage/util.py
index 1126244..a8146b4 100644
--- a/tvrage/util.py
+++ b/tvrage/util.py
@@ -1,4 +1,5 @@
-# Copyright (c) 2009, Christian Kreutzer
+# Copyright (c) 2009-2014, Christian Kreutzer
+# Modified by Florian Jung
 # All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
@@ -24,9 +25,13 @@
 # CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 # POSSIBILITY OF SUCH DAMAGE.
+import logging
+from urllib2 import urlopen, HTTPError
+import socket
+from bs4 import BeautifulSoup
 
-from urllib2 import urlopen, URLError
-from BeautifulSoup import BeautifulSoup
+
+logger = logging.getLogger(__name__)
 
 
 class TvrageError(Exception):
@@ -39,6 +44,11 @@ def __str__(self):
         return self.msg
 
 
+class TvrageTimeoutError(TvrageError):
+    """ Wrapper for socket.timeout """
+    pass
+
+
 class TvrageRequestError(TvrageError):
     """ Wrapper for HTTP 400 """
     pass
@@ -54,10 +64,10 @@ class TvrageInternalServerError(TvrageError):
     pass
 
 
-def _fetch(url):
+def _fetch(url, timeout=socket._GLOBAL_DEFAULT_TIMEOUT):
     try:
-        result = urlopen(url)
-    except URLError, e:
+        result = urlopen(url, timeout=timeout)
+    except HTTPError, e:
         if 400 == e.code:
             raise TvrageRequestError(str(e))
         elif 404 == e.code:
@@ -66,6 +76,8 @@ def _fetch(url):
             raise TvrageInternalServerError(str(e))
         else:
             raise TvrageError(str(e))
+    except socket.timeout, e:
+        raise TvrageTimeoutError(str(e))
     except Exception, e:
         raise TvrageError(str(e))
     else:
@@ -81,4 +93,4 @@ def parse_synopsis(page, cleanup=None):
             result, _ = result.split(cleanup)
         return result
     except AttributeError, e:
-        print('parse_synopyis - BeautifulSoup.find(): %s' % e)
+        logger.error('parse_synopyis - BeautifulSoup.find(): %s' % e)

From 797da8a4b34b37a71b4b8dce1b8a9097b7bff30e Mon Sep 17 00:00:00 2001
From: D01 
Date: Mon, 6 Jul 2015 16:56:15 +0200
Subject: [PATCH 2/2] Fix requirements

fixed requirements.txt so that it is pip installable
---
 requirements.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/requirements.txt b/requirements.txt
index 1031124..c4003cb 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1 +1 @@
-BeautifulSoup>=4.1
+beautifulsoup4>=4.1