diff --git a/html2text.py b/html2text.py index 9947586..bcbe186 100755 --- a/html2text.py +++ b/html2text.py @@ -259,6 +259,7 @@ def __init__(self, out=None, baseurl='', bodywidth=BODY_WIDTH): self.abbr_data = None # last inner HTML (for abbr being defined) self.abbr_list = {} # stack of abbreviations to write later self.baseurl = baseurl + self.in_span = False try: del unifiable_n[name2cp('nbsp')] @@ -450,7 +451,8 @@ def handle_tag(self, tag, attrs, start): else: self.soft_br() else: - self.p() + if start == 1 or not self.in_span: + self.p() if tag == "br" and start: self.o(" \n") @@ -541,12 +543,14 @@ def handle_tag(self, tag, attrs, start): a['outcount'] = self.outcount self.a.append(a) self.o("][" + str(a['count']) + "]") + self.in_span = False if tag == "img" and start and not self.ignore_images: if ('src' in attrs): attrs['href'] = attrs['src'] alt = attrs.get('alt', '') self.o("![" + escape_md(alt) + "]") + self.in_span = False if self.inline_links: self.o("(" + escape_md(attrs['href']) + ")") @@ -560,6 +564,7 @@ def handle_tag(self, tag, attrs, start): attrs['outcount'] = self.outcount self.a.append(attrs) self.o("[" + str(attrs['count']) + "]") + self.in_span = False if tag == 'dl' and start: self.p() @@ -748,6 +753,7 @@ def handle_data(self, data): return else: self.o("[") + self.in_span = True self.maybe_automatic_link = None if not self.code and not self.pre: