From b0bfcb17a80a9f6d0daf5a35a784d006b0a6a1ea Mon Sep 17 00:00:00 2001 From: Dan MacTough Date: Sat, 14 Jul 2018 21:38:30 -0400 Subject: [PATCH 1/4] Drop support for Node 4 Add Node 10 --- .travis.yml | 2 +- README.md | 4 ++++ 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/.travis.yml b/.travis.yml index 27c73e2..ae0f7f8 100644 --- a/.travis.yml +++ b/.travis.yml @@ -16,6 +16,6 @@ before_install: - $CXX --version node_js: - - "4" - "6" - "8" + - "10" diff --git a/README.md b/README.md index ca36f2f..8305938 100644 --- a/README.md +++ b/README.md @@ -101,6 +101,10 @@ You can also check out this nice [working implementation](https://github.com/scr See the [`examples`](examples/) directory. +## Changes in v3 + +- dropped support for Node 4 + ## API ### Transform Stream From 6a88e6a34f027e91ed52ff09a3e061889c95dfa2 Mon Sep 17 00:00:00 2001 From: Dan MacTough Date: Sun, 10 Dec 2017 10:01:31 -0500 Subject: [PATCH 2/4] Fix html stripping to get entire html-ish substring --- lib/utils.js | 2 +- test/feeds/title-with-angle-brackets.xml | 11 +++++++++++ test/strip-html.js | 18 ++++++++++++++++++ 3 files changed, 30 insertions(+), 1 deletion(-) create mode 100644 test/feeds/title-with-angle-brackets.xml create mode 100644 test/strip-html.js diff --git a/lib/utils.js b/lib/utils.js index a396ea8..a97cd09 100644 --- a/lib/utils.js +++ b/lib/utils.js @@ -169,7 +169,7 @@ exports.reresolve = reresolve; * @private */ function stripHtml (str) { - return str.replace(/<.*?>/g, ''); + return str.replace(/<+[^>]+?>+/g, ''); } exports.stripHtml = stripHtml; diff --git a/test/feeds/title-with-angle-brackets.xml b/test/feeds/title-with-angle-brackets.xml new file mode 100644 index 0000000..a6ccd67 --- /dev/null +++ b/test/feeds/title-with-angle-brackets.xml @@ -0,0 +1,11 @@ + + + + Channel title + http://example.com/ + Channel + + RSS <<< Title >>> + + + diff --git a/test/strip-html.js b/test/strip-html.js new file mode 100644 index 0000000..bf46254 --- /dev/null +++ b/test/strip-html.js @@ -0,0 +1,18 @@ +describe('strip html', function () { + + var feed = __dirname + '/feeds/title-with-angle-brackets.xml'; + + it('should aggressively strip html', function (done) { + fs.createReadStream(feed).pipe(new FeedParser()) + .once('readable', function () { + var stream = this; + assert.equal(stream.read().title, 'RSS '); + done(); + }) + .on('error', function (err) { + assert.ifError(err); + done(err); + }); + }); + +}); From 3403ee1099aa77e9c1978c8822f3b792210e994a Mon Sep 17 00:00:00 2001 From: Dan MacTough Date: Sun, 10 Dec 2017 10:21:48 -0500 Subject: [PATCH 3/4] Change default behavior to not strip html by default Added option `strip_html` to restore old behavior. Resolves #165, #243 --- README.md | 8 ++++++++ lib/feedparser/index.js | 13 ++++++++++--- test/strip-html.js | 15 ++++++++++++++- 3 files changed, 32 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index 8305938..a8a0416 100644 --- a/README.md +++ b/README.md @@ -97,6 +97,14 @@ You can also check out this nice [working implementation](https://github.com/scr behavior. If you want total control over handling these errors and optionally aborting parsing the feed, use this option. +- `strip_html` - Set to `true` to override Feedparser's default behavior, which is + to pass through all substrings that look like html. In older versions, we always + stripped these html-like substrings to help users avoid inadvertently creating + XSS vulnerabilities by reflecting the value of these elements without properly + escaping them. We decided that wasn't particularly helpful because the simple + sanitation we were performing didn't address all cases and did a poor job. However, + if you were relying on the legacy behavior, you can set this option to `true`. + ## Examples See the [`examples`](examples/) directory. diff --git a/lib/feedparser/index.js b/lib/feedparser/index.js index 7915725..5574519 100644 --- a/lib/feedparser/index.js +++ b/lib/feedparser/index.js @@ -72,6 +72,7 @@ function FeedParser (options) { if (!('normalize' in this.options)) this.options.normalize = true; if (!('addmeta' in this.options)) this.options.addmeta = true; if (!('resume_saxerror' in this.options)) this.options.resume_saxerror = true; + if (!('strip_html' in this.options)) this.options.strip_html = false; if ('MAX_BUFFER_LENGTH' in this.options) { sax.MAX_BUFFER_LENGTH = this.options.MAX_BUFFER_LENGTH; // set to Infinity to have unlimited buffers } else { @@ -430,6 +431,7 @@ FeedParser.prototype.handleMeta = function handleMeta (node, type, options) { var meta = {} , normalize = !options || (options && options.normalize) + , stripHtml = !options || (options && options.strip_html) ; if (normalize) { @@ -765,8 +767,10 @@ FeedParser.prototype.handleMeta = function handleMeta (node, type, options) { if (!meta.xmlurl && this.options.feedurl) { meta.xmlurl = meta.xmlUrl = this.options.feedurl; } - meta.title = meta.title && _.stripHtml(meta.title); - meta.description = meta.description && _.stripHtml(meta.description); + if (stripHtml) { + meta.title = meta.title && _.stripHtml(meta.title); + meta.description = meta.description && _.stripHtml(meta.description); + } } return meta; @@ -777,6 +781,7 @@ FeedParser.prototype.handleItem = function handleItem (node, type, options){ var item = {} , normalize = !options || (options && options.normalize) + , stripHtml = !options || (options && options.strip_html) ; if (normalize) { @@ -1106,7 +1111,9 @@ FeedParser.prototype.handleItem = function handleItem (node, type, options){ item.link = item.guid; } } - item.title = item.title && _.stripHtml(item.title); + if (stripHtml) { + item.title = item.title && _.stripHtml(item.title); + } } return item; }; diff --git a/test/strip-html.js b/test/strip-html.js index bf46254..21ba623 100644 --- a/test/strip-html.js +++ b/test/strip-html.js @@ -2,8 +2,21 @@ describe('strip html', function () { var feed = __dirname + '/feeds/title-with-angle-brackets.xml'; - it('should aggressively strip html', function (done) { + it('should NOT aggressively strip html by default', function (done) { fs.createReadStream(feed).pipe(new FeedParser()) + .once('readable', function () { + var stream = this; + assert.equal(stream.read().title, 'RSS <<< Title >>>'); + done(); + }) + .on('error', function (err) { + assert.ifError(err); + done(err); + }); + }); + + it('should aggressively strip html with option `strip_html`', function (done) { + fs.createReadStream(feed).pipe(new FeedParser({ strip_html: true })) .once('readable', function () { var stream = this; assert.equal(stream.read().title, 'RSS '); From 88db9c71d1ec7369a1056983baf4db86ca799caf Mon Sep 17 00:00:00 2001 From: Dan MacTough Date: Sun, 15 Jul 2018 16:45:20 -0400 Subject: [PATCH 4/4] Update changes in README.md --- README.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index a8a0416..2003ce9 100644 --- a/README.md +++ b/README.md @@ -111,7 +111,8 @@ See the [`examples`](examples/) directory. ## Changes in v3 -- dropped support for Node 4 +- Dropped support for Node 4 +- Change default behavior to not strip html by default [#264](https://github.com/danmactough/node-feedparser/pull/264) ## API