diff --git a/html2text.py b/html2text.py index 17528901..a3c48064 100755 --- a/html2text.py +++ b/html2text.py @@ -1,6 +1,6 @@ #!/usr/bin/env python """html2text: Turn HTML into equivalent Markdown-structured text.""" -__version__ = "3.200.3" +__version__ = "3.200.4" __author__ = "Aaron Swartz (me@aaronsw.com)" __copyright__ = "(C) 2004-2008 Aaron Swartz. GNU GPL 3." __contributors__ = ["Martin 'Joey' Schulze", "Ricardo Reyes", "Kevin Jay North"] @@ -238,6 +238,7 @@ def __init__(self, out=None, baseurl=''): self.abbr_data = None # last inner HTML (for abbr being defined) self.abbr_list = {} # stack of abbreviations to write later self.baseurl = baseurl + self.in_span = False try: del unifiable_n[name2cp('nbsp')] except KeyError: pass @@ -413,7 +414,8 @@ def handle_tag(self, tag, attrs, start): else: self.soft_br() else: - self.p() + if start == 1 or not self.in_span: + self.p() if tag == "br" and start: self.o(" \n") @@ -492,12 +494,14 @@ def handle_tag(self, tag, attrs, start): a['outcount'] = self.outcount self.a.append(a) self.o("][" + str(a['count']) + "]") + self.in_span = False if tag == "img" and start and not self.ignore_images: if has_key(attrs, 'src'): attrs['href'] = attrs['src'] alt = attrs.get('alt', '') self.o("![" + escape_md(alt) + "]") + self.in_span = False if self.inline_links: self.o("(" + escape_md(attrs['href']) + ")") @@ -511,6 +515,7 @@ def handle_tag(self, tag, attrs, start): attrs['outcount'] = self.outcount self.a.append(attrs) self.o("[" + str(attrs['count']) + "]") + self.in_span = False if tag == 'dl' and start: self.p() if tag == 'dt' and not start: self.pbr() @@ -670,6 +675,7 @@ def handle_data(self, data): return else: self.o("[") + self.in_span = True self.maybe_automatic_link = None if not self.code and not self.pre: diff --git a/setup.py b/setup.py index dd3d9bc2..581a4f04 100644 --- a/setup.py +++ b/setup.py @@ -3,7 +3,7 @@ setup( name = "html2text", - version = "3.200.3", + version = "3.200.4", description = "Turn HTML into equivalent Markdown-structured text.", author = "Aaron Swartz", author_email = "me@aaronsw.com",