From f321fc4a686d185c8895fa5219fc7207d8f803c8 Mon Sep 17 00:00:00 2001 From: Tommy Yu Date: Sat, 21 Apr 2018 16:42:54 +1200 Subject: [PATCH 01/12] Initial cut of the es2015 lexer - Provide support of the spread operator and arrow function, along with naive support of the template literal. - The template literal currently does NOT have awareness of available identifiers and the like, it's just a dumb string with a funny quote for this initial implementation. - Contains fixes to the regex rules introduced in commits authored later than the initial author date of this commit. --- .flake8 | 1 + src/calmjs/parse/lexers/es2015.py | 51 ++++++++++++++++++ src/calmjs/parse/tests/lexer.py | 30 +++++++++++ src/calmjs/parse/tests/test_es2015_lexer.py | 57 +++++++++++++++++++++ 4 files changed, 139 insertions(+) create mode 100644 src/calmjs/parse/lexers/es2015.py create mode 100644 src/calmjs/parse/tests/test_es2015_lexer.py diff --git a/.flake8 b/.flake8 index e41fc18..7653d00 100644 --- a/.flake8 +++ b/.flake8 @@ -2,6 +2,7 @@ per-file-ignores = src/calmjs/parse/tests/lexer.py:E501 src/calmjs/parse/lexers/es5.py:E221 + src/calmjs/parse/lexers/es2015.py:E221 include = src/ diff --git a/src/calmjs/parse/lexers/es2015.py b/src/calmjs/parse/lexers/es2015.py new file mode 100644 index 0000000..cab33b6 --- /dev/null +++ b/src/calmjs/parse/lexers/es2015.py @@ -0,0 +1,51 @@ +# -*- coding: utf-8 -*- +""" +ES2015 (ECMAScript 6th Edition/ES6) lexer. +""" + +import ply + +from calmjs.parse.lexers.es5 import Lexer as ES5Lexer + + +class Lexer(ES5Lexer): + """ + ES2015 lexer. + """ + + # Punctuators (ES6) + t_ARROW = r'=>' + t_SPREAD = r'\.\.\.' + # this is now a right brace operator... + # t_RBRACE = r'}' + + # TODO verify that the standard string rule will work. + # TODO complete the actual implementation to make this actually + # usable. + template = r""" + (?:` # opening backquote + (?: [^`\\] # not `, \; allow + | \\(\n|\r(?!\n)|\u2028|\u2029|\r\n) # line continuation + | \\[a-tvwyzA-TVWYZ!-\/:-@\[-`{-~] # escaped chars + | \\x[0-9a-fA-F]{2} # hex_escape_sequence + | \\u[0-9a-fA-F]{4} # unicode_escape_sequence + | \\(?:[1-7][0-7]{0,2}|[0-7]{2,3}) # octal_escape_sequence + | \\0 # (ECMA-262 6.0 21.2.2.11) + )*? # zero or many times + `) # closing backquote + """ + + tokens = ES5Lexer.tokens + ( + # ES2015 punctuators + 'ARROW', 'SPREAD', # => ... + + # ES2015 terminal types + 'TEMPLATE', + ) + + @ply.lex.TOKEN(template) + def t_TEMPLATE(self, token): + # remove escape + new line sequence used for strings + # written across multiple lines of code + token.value = token.value.replace('\\\n', '') + return token diff --git a/src/calmjs/parse/tests/lexer.py b/src/calmjs/parse/tests/lexer.py index 6bb39dc..c6b6616 100644 --- a/src/calmjs/parse/tests/lexer.py +++ b/src/calmjs/parse/tests/lexer.py @@ -545,6 +545,36 @@ ) ] +es2015_cases = [ + ( + 'const_keyword', + ('const c', + ['CONST const', 'ID c']), + ), ( + 'arrow_punctuator', + ('=>', + ['ARROW =>']), + ), ( + 'arrow_functions', + ('const c = (name) => { return name; }', + ['CONST const', 'ID c', 'EQ =', 'LPAREN (', 'ID name', 'RPAREN )', + 'ARROW =>', 'LBRACE {', 'RETURN return', 'ID name', 'SEMI ;', + 'RBRACE }']), + ), ( + 'template_literal', + ('`foo`', + ['TEMPLATE `foo`']), + ), ( + 'template_literal_escape', + (r'`f\`o`', + [r'TEMPLATE `f\`o`']), + ), ( + 'template_literal_assignment', + ('s = `hello world`', + ['ID s', 'EQ =', 'TEMPLATE `hello world`']), + ) +] + def run_lexer(value, lexer_cls): lexer = lexer_cls() diff --git a/src/calmjs/parse/tests/test_es2015_lexer.py b/src/calmjs/parse/tests/test_es2015_lexer.py new file mode 100644 index 0000000..aa1ed64 --- /dev/null +++ b/src/calmjs/parse/tests/test_es2015_lexer.py @@ -0,0 +1,57 @@ +# -*- coding: utf-8 -*- +import unittest +from functools import partial + +from calmjs.parse.lexers.es2015 import Lexer +from calmjs.parse.exceptions import ECMASyntaxError + +from calmjs.parse.testing.util import build_equality_testcase +from calmjs.parse.tests.lexer import ( + run_lexer, + run_lexer_pos, + es5_cases, + es5_pos_cases, + es5_all_cases, + es2015_cases, +) + + +class LexerFailureTestCase(unittest.TestCase): + + def test_initial_template_character(self): + lexer = Lexer() + lexer.input('`') + with self.assertRaises(ECMASyntaxError) as e: + [token for token in lexer] + self.assertEqual(str(e.exception), "Illegal character '`' at 1:1") + + +LexerKeywordTestCase = build_equality_testcase( + 'LexerTestCase', partial(run_lexer, lexer_cls=Lexer), ( + (label, data[0], data[1],) for label, data in [( + # Keywords + # ('break case ...', ['BREAK break', 'CASE case', ...]) + 'keywords_all', + (' '.join(kw.lower() for kw in Lexer.keywords), + ['%s %s' % (kw, kw.lower()) for kw in Lexer.keywords] + ), + )] + ) +) + +LexerES5TestCase = build_equality_testcase( + 'LexerTestCase', partial(run_lexer, lexer_cls=Lexer), ( + (label, data[0], data[1],) for label, data in es5_cases)) + +LexerES5PosTestCase = build_equality_testcase( + 'LexerPosTestCase', partial( + run_lexer_pos, lexer_cls=Lexer), es5_pos_cases) + +LexerES5AllTestCase = build_equality_testcase( + 'LexerES5AllTestCase', partial(run_lexer, lexer_cls=partial( + Lexer, yield_comments=True + )), ((label, data[0], data[1],) for label, data in es5_all_cases)) + +LexerES2015TestCase = build_equality_testcase( + 'LexerTestCase', partial(run_lexer, lexer_cls=Lexer), ( + (label, data[0], data[1],) for label, data in es2015_cases)) From cc224488bad3f20a730afc711db32184fe7c2aa8 Mon Sep 17 00:00:00 2001 From: Tommy Yu Date: Sun, 22 Apr 2018 00:11:58 +1200 Subject: [PATCH 02/12] Provide actual support for template literals. - Implement the lookahead and properly check the newlines. - However, that whole escape sequence check might have to be done _inside_ the token formation as this is still going to fail valid input templates such as `foo \ bar` when it will reduce to effectively `foo bar`, rather than a syntax error. Likewise for string literals. --- src/calmjs/parse/lexers/es2015.py | 30 ++++++++++----------- src/calmjs/parse/tests/lexer.py | 29 ++++++++++++++++++++ src/calmjs/parse/tests/test_es2015_lexer.py | 2 +- 3 files changed, 44 insertions(+), 17 deletions(-) diff --git a/src/calmjs/parse/lexers/es2015.py b/src/calmjs/parse/lexers/es2015.py index cab33b6..a7da65c 100644 --- a/src/calmjs/parse/lexers/es2015.py +++ b/src/calmjs/parse/lexers/es2015.py @@ -14,16 +14,22 @@ class Lexer(ES5Lexer): """ # Punctuators (ES6) - t_ARROW = r'=>' - t_SPREAD = r'\.\.\.' - # this is now a right brace operator... + # t_DOLLAR_LBRACE = r'${' + # this is also a right brace punctuator... # t_RBRACE = r'}' + t_ARROW = r'=>' + t_SPREAD = r'\.\.\.' + + tokens = ES5Lexer.tokens + ( + # ES2015 punctuators + 'ARROW', 'SPREAD', # => ... + + # ES2015 terminal types + 'TEMPLATE', + ) - # TODO verify that the standard string rule will work. - # TODO complete the actual implementation to make this actually - # usable. template = r""" - (?:` # opening backquote + (?:(?:`|}) # opening ` or } (?: [^`\\] # not `, \; allow | \\(\n|\r(?!\n)|\u2028|\u2029|\r\n) # line continuation | \\[a-tvwyzA-TVWYZ!-\/:-@\[-`{-~] # escaped chars @@ -32,17 +38,9 @@ class Lexer(ES5Lexer): | \\(?:[1-7][0-7]{0,2}|[0-7]{2,3}) # octal_escape_sequence | \\0 # (ECMA-262 6.0 21.2.2.11) )*? # zero or many times - `) # closing backquote + (?:`|\${)) # closing ` or ${ """ - tokens = ES5Lexer.tokens + ( - # ES2015 punctuators - 'ARROW', 'SPREAD', # => ... - - # ES2015 terminal types - 'TEMPLATE', - ) - @ply.lex.TOKEN(template) def t_TEMPLATE(self, token): # remove escape + new line sequence used for strings diff --git a/src/calmjs/parse/tests/lexer.py b/src/calmjs/parse/tests/lexer.py index c6b6616..0ab2c25 100644 --- a/src/calmjs/parse/tests/lexer.py +++ b/src/calmjs/parse/tests/lexer.py @@ -564,6 +564,35 @@ 'template_literal', ('`foo`', ['TEMPLATE `foo`']), + ), ( + 'template_multiline', + ('`foo\nbar\r\nfoo`', + ['TEMPLATE `foo\nbar\r\nfoo`']), + ), ( + 'template_other_newlines', + ('`foo\u2028\u2029foo`', + ['TEMPLATE `foo\u2028\u2029foo`']), + ), ( + 'template_literal_with_dollar', + ('`foo$`', + ['TEMPLATE `foo$`']), + ), ( + 'template_head_tail', + (r'`hello ${name} while this`', + ['TEMPLATE `hello ${', 'ID name', 'TEMPLATE } while this`']), + ), ( + 'template_empty_head_tail', + (r'`${name}`', + ['TEMPLATE `${', 'ID name', 'TEMPLATE }`']), + ), ( + 'template_nested', + (r'`${`${a * 2}`} ${b}`', + ['TEMPLATE `${', 'TEMPLATE `${', 'ID a', 'MULT *', 'NUMBER 2', + 'TEMPLATE }`', 'TEMPLATE } ${', 'ID b', 'TEMPLATE }`']), + ), ( + 'template_some_keywords', + (r'`this -> ${this}.`', + ['TEMPLATE `this -> ${', 'THIS this', 'TEMPLATE }.`']), ), ( 'template_literal_escape', (r'`f\`o`', diff --git a/src/calmjs/parse/tests/test_es2015_lexer.py b/src/calmjs/parse/tests/test_es2015_lexer.py index aa1ed64..6fd915b 100644 --- a/src/calmjs/parse/tests/test_es2015_lexer.py +++ b/src/calmjs/parse/tests/test_es2015_lexer.py @@ -53,5 +53,5 @@ def test_initial_template_character(self): )), ((label, data[0], data[1],) for label, data in es5_all_cases)) LexerES2015TestCase = build_equality_testcase( - 'LexerTestCase', partial(run_lexer, lexer_cls=Lexer), ( + 'LexerES2015TestCase', partial(run_lexer, lexer_cls=Lexer), ( (label, data[0], data[1],) for label, data in es2015_cases)) From cab2d68b8ffd5a8fa667a3c8f8bfda066278cf7a Mon Sep 17 00:00:00 2001 From: Tommy Yu Date: Sun, 22 Apr 2018 15:57:31 +1200 Subject: [PATCH 03/12] Provide actual template literal lexical components - Mark the generated tokens with the appropriate types. --- src/calmjs/parse/lexers/es2015.py | 22 ++++++++++--- src/calmjs/parse/tests/lexer.py | 34 ++++++++++++--------- src/calmjs/parse/tests/test_es2015_lexer.py | 4 +-- 3 files changed, 39 insertions(+), 21 deletions(-) diff --git a/src/calmjs/parse/lexers/es2015.py b/src/calmjs/parse/lexers/es2015.py index a7da65c..d355008 100644 --- a/src/calmjs/parse/lexers/es2015.py +++ b/src/calmjs/parse/lexers/es2015.py @@ -3,10 +3,22 @@ ES2015 (ECMAScript 6th Edition/ES6) lexer. """ +import re import ply from calmjs.parse.lexers.es5 import Lexer as ES5Lexer +template_token_types = ( + (re.compile(r'`.*`', re.S), + 'TEMPLATE_NOSUB'), + (re.compile(r'`.*\${', re.S), + 'TEMPLATE_HEAD'), + (re.compile(r'}.*\${', re.S), + 'TEMPLATE_MIDDLE'), + (re.compile(r'}.*`', re.S), + 'TEMPLATE_TAIL'), +) + class Lexer(ES5Lexer): """ @@ -25,7 +37,7 @@ class Lexer(ES5Lexer): 'ARROW', 'SPREAD', # => ... # ES2015 terminal types - 'TEMPLATE', + 'TEMPLATE_NOSUB', 'TEMPLATE_HEAD', 'TEMPLATE_MIDDLE', 'TEMPLATE_TAIL', ) template = r""" @@ -42,8 +54,8 @@ class Lexer(ES5Lexer): """ @ply.lex.TOKEN(template) - def t_TEMPLATE(self, token): - # remove escape + new line sequence used for strings - # written across multiple lines of code - token.value = token.value.replace('\\\n', '') + def t_TEMPLATE_RAW(self, token): + for patt, token_type in template_token_types: + if patt.match(token.value): + token.type = token_type return token diff --git a/src/calmjs/parse/tests/lexer.py b/src/calmjs/parse/tests/lexer.py index 0ab2c25..21469f8 100644 --- a/src/calmjs/parse/tests/lexer.py +++ b/src/calmjs/parse/tests/lexer.py @@ -551,56 +551,62 @@ ('const c', ['CONST const', 'ID c']), ), ( - 'arrow_punctuator', - ('=>', - ['ARROW =>']), + 'punctuators', + ('=> ...', + ['ARROW =>', 'SPREAD ...']), ), ( 'arrow_functions', ('const c = (name) => { return name; }', ['CONST const', 'ID c', 'EQ =', 'LPAREN (', 'ID name', 'RPAREN )', 'ARROW =>', 'LBRACE {', 'RETURN return', 'ID name', 'SEMI ;', 'RBRACE }']), + ), ( + 'spread', + ('[...spring, ...summer]', + ['LBRACKET [', 'SPREAD ...', 'ID spring', 'COMMA ,', 'SPREAD ...', + 'ID summer', 'RBRACKET ]']), ), ( 'template_literal', ('`foo`', - ['TEMPLATE `foo`']), + ['TEMPLATE_NOSUB `foo`']), ), ( 'template_multiline', ('`foo\nbar\r\nfoo`', - ['TEMPLATE `foo\nbar\r\nfoo`']), + ['TEMPLATE_NOSUB `foo\nbar\r\nfoo`']), ), ( 'template_other_newlines', ('`foo\u2028\u2029foo`', - ['TEMPLATE `foo\u2028\u2029foo`']), + ['TEMPLATE_NOSUB `foo\u2028\u2029foo`']), ), ( 'template_literal_with_dollar', ('`foo$`', - ['TEMPLATE `foo$`']), + ['TEMPLATE_NOSUB `foo$`']), ), ( 'template_head_tail', (r'`hello ${name} while this`', - ['TEMPLATE `hello ${', 'ID name', 'TEMPLATE } while this`']), + ['TEMPLATE_HEAD `hello ${', 'ID name', 'TEMPLATE_TAIL } while this`']), ), ( 'template_empty_head_tail', (r'`${name}`', - ['TEMPLATE `${', 'ID name', 'TEMPLATE }`']), + ['TEMPLATE_HEAD `${', 'ID name', 'TEMPLATE_TAIL }`']), ), ( 'template_nested', (r'`${`${a * 2}`} ${b}`', - ['TEMPLATE `${', 'TEMPLATE `${', 'ID a', 'MULT *', 'NUMBER 2', - 'TEMPLATE }`', 'TEMPLATE } ${', 'ID b', 'TEMPLATE }`']), + ['TEMPLATE_HEAD `${', 'TEMPLATE_HEAD `${', 'ID a', 'MULT *', + 'NUMBER 2', 'TEMPLATE_TAIL }`', 'TEMPLATE_MIDDLE } ${', 'ID b', + 'TEMPLATE_TAIL }`']), ), ( 'template_some_keywords', (r'`this -> ${this}.`', - ['TEMPLATE `this -> ${', 'THIS this', 'TEMPLATE }.`']), + ['TEMPLATE_HEAD `this -> ${', 'THIS this', 'TEMPLATE_TAIL }.`']), ), ( 'template_literal_escape', (r'`f\`o`', - [r'TEMPLATE `f\`o`']), + [r'TEMPLATE_NOSUB `f\`o`']), ), ( 'template_literal_assignment', ('s = `hello world`', - ['ID s', 'EQ =', 'TEMPLATE `hello world`']), + ['ID s', 'EQ =', 'TEMPLATE_NOSUB `hello world`']), ) ] diff --git a/src/calmjs/parse/tests/test_es2015_lexer.py b/src/calmjs/parse/tests/test_es2015_lexer.py index 6fd915b..1a4f391 100644 --- a/src/calmjs/parse/tests/test_es2015_lexer.py +++ b/src/calmjs/parse/tests/test_es2015_lexer.py @@ -40,11 +40,11 @@ def test_initial_template_character(self): ) LexerES5TestCase = build_equality_testcase( - 'LexerTestCase', partial(run_lexer, lexer_cls=Lexer), ( + 'LexerES5TestCase', partial(run_lexer, lexer_cls=Lexer), ( (label, data[0], data[1],) for label, data in es5_cases)) LexerES5PosTestCase = build_equality_testcase( - 'LexerPosTestCase', partial( + 'LexerES5PosTestCase', partial( run_lexer_pos, lexer_cls=Lexer), es5_pos_cases) LexerES5AllTestCase = build_equality_testcase( From b5ddb4752b47965765f2e2f53e2581eccfe5f31e Mon Sep 17 00:00:00 2001 From: Tommy Yu Date: Sun, 22 Apr 2018 15:58:59 +1200 Subject: [PATCH 04/12] Adding position checking tests --- src/calmjs/parse/tests/lexer.py | 37 +++++++++++++++++++++ src/calmjs/parse/tests/test_es2015_lexer.py | 5 +++ 2 files changed, 42 insertions(+) diff --git a/src/calmjs/parse/tests/lexer.py b/src/calmjs/parse/tests/lexer.py index 21469f8..f83c3e5 100644 --- a/src/calmjs/parse/tests/lexer.py +++ b/src/calmjs/parse/tests/lexer.py @@ -610,6 +610,43 @@ ) ] +es2015_pos_cases = [ + ( + 'single_line_template', + """ + var foo = `bar`; + """, ([ + 'var 1:0', 'foo 1:4', '= 1:8', '`bar` 1:10', '; 1:15' + ], [ + 'var 1:1', 'foo 1:5', '= 1:9', '`bar` 1:11', '; 1:16', + ]) + ), ( + 'multi_line', + """ + var foo = `bar + ${1}`; + """, ([ + 'var 1:0', 'foo 1:4', '= 1:8', '`bar\n${ 1:10', + '1 2:17', '}` 2:18', '; 2:20', + ], [ + 'var 1:1', 'foo 1:5', '= 1:9', '`bar\n${ 1:11', + '1 2:3', '}` 2:4', '; 2:6', + ]) + ), ( + 'multi_line_joined', + r""" + var foo = `bar\ + ${1}`; + """, ([ + 'var 1:0', 'foo 1:4', '= 1:8', '`bar\\\n${ 1:10', + '1 2:18', '}` 2:19', '; 2:21', + ], [ + 'var 1:1', 'foo 1:5', '= 1:9', '`bar\\\n${ 1:11', + '1 2:3', '}` 2:4', '; 2:6', + ]) + ) +] + def run_lexer(value, lexer_cls): lexer = lexer_cls() diff --git a/src/calmjs/parse/tests/test_es2015_lexer.py b/src/calmjs/parse/tests/test_es2015_lexer.py index 1a4f391..6fbea12 100644 --- a/src/calmjs/parse/tests/test_es2015_lexer.py +++ b/src/calmjs/parse/tests/test_es2015_lexer.py @@ -13,6 +13,7 @@ es5_pos_cases, es5_all_cases, es2015_cases, + es2015_pos_cases, ) @@ -55,3 +56,7 @@ def test_initial_template_character(self): LexerES2015TestCase = build_equality_testcase( 'LexerES2015TestCase', partial(run_lexer, lexer_cls=Lexer), ( (label, data[0], data[1],) for label, data in es2015_cases)) + +LexerES2015PosTestCase = build_equality_testcase( + 'LexerES2015PosTestCase', partial( + run_lexer_pos, lexer_cls=Lexer), es2015_pos_cases) From 1f47b823b8be7c54b1e2050e2d002874a2634e23 Mon Sep 17 00:00:00 2001 From: Tommy Yu Date: Sat, 5 May 2018 17:39:38 +1200 Subject: [PATCH 05/12] Add the LET keyword to lexer. --- src/calmjs/parse/lexers/es2015.py | 10 +++++++++- src/calmjs/parse/tests/lexer.py | 8 ++++++++ 2 files changed, 17 insertions(+), 1 deletion(-) diff --git a/src/calmjs/parse/lexers/es2015.py b/src/calmjs/parse/lexers/es2015.py index d355008..06e1a47 100644 --- a/src/calmjs/parse/lexers/es2015.py +++ b/src/calmjs/parse/lexers/es2015.py @@ -20,6 +20,11 @@ ) +es2015_keywords = ( + 'LET', +) + + class Lexer(ES5Lexer): """ ES2015 lexer. @@ -32,7 +37,10 @@ class Lexer(ES5Lexer): t_ARROW = r'=>' t_SPREAD = r'\.\.\.' - tokens = ES5Lexer.tokens + ( + keywords = ES5Lexer.keywords + es2015_keywords + keywords_dict = dict((key.lower(), key) for key in keywords) + + tokens = ES5Lexer.tokens + es2015_keywords + ( # ES2015 punctuators 'ARROW', 'SPREAD', # => ... diff --git a/src/calmjs/parse/tests/lexer.py b/src/calmjs/parse/tests/lexer.py index f83c3e5..241bb37 100644 --- a/src/calmjs/parse/tests/lexer.py +++ b/src/calmjs/parse/tests/lexer.py @@ -550,6 +550,14 @@ 'const_keyword', ('const c', ['CONST const', 'ID c']), + ), ( + 'let_keyword', + ('let c', + ['LET let', 'ID c']), + ), ( + 'var_let_keyword', + ('var let = 1', + ['VAR var', 'LET let', 'EQ =', 'NUMBER 1']), ), ( 'punctuators', ('=> ...', From c8641038fc4437e5f6b3fcb0cbd34bf545ee6e43 Mon Sep 17 00:00:00 2001 From: Tommy Yu Date: Sat, 10 Nov 2018 16:51:03 +1300 Subject: [PATCH 06/12] Add static keyword to lexer. --- src/calmjs/parse/lexers/es2015.py | 1 + src/calmjs/parse/tests/lexer.py | 9 +++++++++ 2 files changed, 10 insertions(+) diff --git a/src/calmjs/parse/lexers/es2015.py b/src/calmjs/parse/lexers/es2015.py index 06e1a47..66161ee 100644 --- a/src/calmjs/parse/lexers/es2015.py +++ b/src/calmjs/parse/lexers/es2015.py @@ -22,6 +22,7 @@ es2015_keywords = ( 'LET', + 'STATIC', ) diff --git a/src/calmjs/parse/tests/lexer.py b/src/calmjs/parse/tests/lexer.py index 241bb37..f9dd204 100644 --- a/src/calmjs/parse/tests/lexer.py +++ b/src/calmjs/parse/tests/lexer.py @@ -558,6 +558,15 @@ 'var_let_keyword', ('var let = 1', ['VAR var', 'LET let', 'EQ =', 'NUMBER 1']), + ), ( + 'static_keyword', + ('class Foo { static foo() {} }', + ['CLASS class', 'ID Foo', 'LBRACE {', 'STATIC static', 'ID foo', + 'LPAREN (', 'RPAREN )', 'LBRACE {', 'RBRACE }', 'RBRACE }']), + ), ( + 'var_static_keyword', + ('var static = 1', + ['VAR var', 'STATIC static', 'EQ =', 'NUMBER 1']), ), ( 'punctuators', ('=> ...', From 8b349e8ea401850fb619dd2c31633e04ff6f0927 Mon Sep 17 00:00:00 2001 From: Tommy Yu Date: Sat, 10 Nov 2018 17:05:07 +1300 Subject: [PATCH 07/12] Yield keyword - Use an excess of extra tokens relevant to this particular syntax. --- src/calmjs/parse/lexers/es2015.py | 1 + src/calmjs/parse/tests/lexer.py | 5 +++++ 2 files changed, 6 insertions(+) diff --git a/src/calmjs/parse/lexers/es2015.py b/src/calmjs/parse/lexers/es2015.py index 66161ee..8558c18 100644 --- a/src/calmjs/parse/lexers/es2015.py +++ b/src/calmjs/parse/lexers/es2015.py @@ -23,6 +23,7 @@ es2015_keywords = ( 'LET', 'STATIC', + 'YIELD', ) diff --git a/src/calmjs/parse/tests/lexer.py b/src/calmjs/parse/tests/lexer.py index f9dd204..2f4a5a3 100644 --- a/src/calmjs/parse/tests/lexer.py +++ b/src/calmjs/parse/tests/lexer.py @@ -567,6 +567,11 @@ 'var_static_keyword', ('var static = 1', ['VAR var', 'STATIC static', 'EQ =', 'NUMBER 1']), + ), ( + 'yield_keyword', + ('function *gen() { yield 1 }', + ['FUNCTION function', 'MULT *', 'ID gen', 'LPAREN (', 'RPAREN )', + 'LBRACE {', 'YIELD yield', 'NUMBER 1', 'RBRACE }']), ), ( 'punctuators', ('=> ...', From d9c2e89b372f5ead42be5640008e943823ac511c Mon Sep 17 00:00:00 2001 From: Tommy Yu Date: Sat, 10 Nov 2018 17:43:57 +1300 Subject: [PATCH 08/12] Include binary and octal number literals. --- src/calmjs/parse/lexers/es2015.py | 21 +++++++++++++++++++++ src/calmjs/parse/tests/lexer.py | 4 ++++ 2 files changed, 25 insertions(+) diff --git a/src/calmjs/parse/lexers/es2015.py b/src/calmjs/parse/lexers/es2015.py index 8558c18..84038a4 100644 --- a/src/calmjs/parse/lexers/es2015.py +++ b/src/calmjs/parse/lexers/es2015.py @@ -50,6 +50,27 @@ class Lexer(ES5Lexer): 'TEMPLATE_NOSUB', 'TEMPLATE_HEAD', 'TEMPLATE_MIDDLE', 'TEMPLATE_TAIL', ) + t_NUMBER = r""" + (?: 0[bB][01]+ # binary_integer_literal + | 0[oO][0-7]+ # or octal_integer_literal + | 0[xX][0-9a-fA-F]+ # or hex_integer_literal + | 0[0-7]+ # or legacy_octal_integer_literal + | (?: # or decimal_literal + (?:0|[1-9][0-9]*) # decimal_integer_literal + \. # dot + [0-9]* # decimal_digits_opt + (?:[eE][+-]?[0-9]+)? # exponent_part_opt + | + \. # dot + [0-9]+ # decimal_digits + (?:[eE][+-]?[0-9]+)? # exponent_part_opt + | + (?:0|[1-9][0-9]*) # decimal_integer_literal + (?:[eE][+-]?[0-9]+)? # exponent_part_opt + ) + ) + """ + template = r""" (?:(?:`|}) # opening ` or } (?: [^`\\] # not `, \; allow diff --git a/src/calmjs/parse/tests/lexer.py b/src/calmjs/parse/tests/lexer.py index 2f4a5a3..b53eb02 100644 --- a/src/calmjs/parse/tests/lexer.py +++ b/src/calmjs/parse/tests/lexer.py @@ -572,6 +572,10 @@ ('function *gen() { yield 1 }', ['FUNCTION function', 'MULT *', 'ID gen', 'LPAREN (', 'RPAREN )', 'LBRACE {', 'YIELD yield', 'NUMBER 1', 'RBRACE }']), + ), ( + 'es2015_numbers', + (('0b1011 0B1101 0o755 0O644'), + ['NUMBER 0b1011', 'NUMBER 0B1101', 'NUMBER 0o755', 'NUMBER 0O644']), ), ( 'punctuators', ('=> ...', From 37c2b6a84519ce67f9479274504f2cce4271afc9 Mon Sep 17 00:00:00 2001 From: Tommy Yu Date: Fri, 26 Jul 2019 15:22:10 -0700 Subject: [PATCH 09/12] Ensure errors in template literals also dealt with --- src/calmjs/parse/lexers/es2015.py | 83 +++++++++++++++++++++ src/calmjs/parse/tests/lexer.py | 46 ++++++++++++ src/calmjs/parse/tests/test_es2015_lexer.py | 9 ++- 3 files changed, 137 insertions(+), 1 deletion(-) diff --git a/src/calmjs/parse/lexers/es2015.py b/src/calmjs/parse/lexers/es2015.py index 84038a4..6244e8b 100644 --- a/src/calmjs/parse/lexers/es2015.py +++ b/src/calmjs/parse/lexers/es2015.py @@ -5,7 +5,10 @@ import re import ply +from itertools import chain +from calmjs.parse.utils import repr_compat +from calmjs.parse.exceptions import ECMASyntaxError from calmjs.parse.lexers.es5 import Lexer as ES5Lexer template_token_types = ( @@ -26,12 +29,74 @@ 'YIELD', ) +PATT_BROKEN_TEMPLATE = re.compile(r""" +(?:(?:`|}) # opening ` or } + (?: [^`\\] # not `, \; allow + | \\(\n|\r(?!\n)|\u2028|\u2029|\r\n) # line continuation + | \\[a-tvwyzA-TVWYZ!-\/:-@\[-`{-~] # escaped chars + | \\x[0-9a-fA-F]{2} # hex_escape_sequence + | \\u[0-9a-fA-F]{4} # unicode_escape_sequence + | \\(?:[1-7][0-7]{0,2}|[0-7]{2,3}) # octal_escape_sequence + | \\0 # (ECMA-262 6.0 21.2.2.11) + )* # zero or many times +) # omit closing ` or ${ +""", flags=re.VERBOSE) + + +def broken_template_token_handler(lexer, token): + match = PATT_BROKEN_TEMPLATE.match(token.value) + if not match: + return + + # update the error token value to only include what was matched here + # as this will be the actual token that "failed" + token.value = match.group() + # calculate colno for current token colno before... + colno = lexer._get_colno(token) + # updating the newline indexes for the error reporting for raw + # lexpos + lexer._update_newline_idx(token) + # probe for the next values (which no valid rules will match) + position = lexer.lexer.lexpos + len(token.value) + failure = lexer.lexer.lexdata[position:position + 2] + if failure and failure[0] == '\\': + type_ = {'x': 'hexadecimal', 'u': 'unicode'}[failure[1]] + seq = re.match( + r'\\[xu][0-9-a-f-A-F]*', lexer.lexer.lexdata[position:] + ).group() + raise ECMASyntaxError( + "Invalid %s escape sequence '%s' at %s:%s" % ( + type_, seq, lexer.lineno, + lexer._get_colno_lexpos(position) + ) + ) + tl = 16 # truncate length + + if lexer.current_template_tokens: + # join all tokens together + tmpl = '...'.join( + t.value for t in chain(lexer.current_template_tokens, [token])) + lineno = lexer.current_template_tokens[0].lineno + colno = lexer.current_template_tokens[0].colno + else: + tmpl = token.value + lineno = token.lineno + + raise ECMASyntaxError('Unterminated template literal %s at %s:%s' % ( + repr_compat(tmpl[:tl].strip() + (tmpl[tl:] and '...')), lineno, colno)) + class Lexer(ES5Lexer): """ ES2015 lexer. """ + def __init__(self, with_comments=False, yield_comments=False): + super(Lexer, self).__init__( + with_comments=with_comments, yield_comments=yield_comments) + self.error_token_handlers.append(broken_template_token_handler) + self.current_template_tokens = [] + # Punctuators (ES6) # t_DOLLAR_LBRACE = r'${' # this is also a right brace punctuator... @@ -84,9 +149,27 @@ class Lexer(ES5Lexer): (?:`|\${)) # closing ` or ${ """ + RBRACE = r'}' + @ply.lex.TOKEN(template) def t_TEMPLATE_RAW(self, token): for patt, token_type in template_token_types: if patt.match(token.value): token.type = token_type + break + if token.type == 'TEMPLATE_HEAD': + self.current_template_tokens = [token] + elif token.type == 'TEMPLATE_MIDDLE': + self.current_template_tokens.append(token) + else: + self.current_template_tokens = [] + + return token + + @ply.lex.TOKEN(RBRACE) + def t_RBRACE(self, token): + if self.current_template_tokens: + self.lexer.lexpos = self.lexer.lexpos - 1 + token.value = self.lexer.lexdata[self.lexer.lexpos:] + broken_template_token_handler(self, token) return token diff --git a/src/calmjs/parse/tests/lexer.py b/src/calmjs/parse/tests/lexer.py index b53eb02..fdbadd7 100644 --- a/src/calmjs/parse/tests/lexer.py +++ b/src/calmjs/parse/tests/lexer.py @@ -673,6 +673,52 @@ ) ] +# various template related syntax errors +es2015_error_cases_tmpl = [ + ( + 'unterminated_template_eof', + "var foo = `test", + "Unterminated template literal '`test' at 1:11", + ), ( + 'unterminated_template_middle_eof', + "var foo = `${foo}bar${baz}fail", + # the specific identifiers are not tracked, thus ... + "Unterminated template literal '`${...}bar${...}...' at 1:11", + ), ( + 'invalid_hex_sequence', + "var foo = `fail\\x1`", + # backticks are converted to single quotes + "Invalid hexadecimal escape sequence '\\x1' at 1:16", + ), ( + 'invalid_unicode_sequence', + "var foo = `fail\\u12`", + "Invalid unicode escape sequence '\\u12' at 1:16", + ), ( + 'invalid_hex_sequence_multiline', + "var foo = `foobar\r\nfail\\x1`", + # backticks are converted to single quotes + "Invalid hexadecimal escape sequence '\\x1' at 2:5", + ), ( + 'invalid_unicode_sequence_multiline', + "var foo = `foobar\nfail\\u12`", + "Invalid unicode escape sequence '\\u12' at 2:5", + ), ( + 'invalid_hex_sequence_middle', + "var foo = `fail${wat}blah\\x1`", + # backticks are converted to single quotes + "Invalid hexadecimal escape sequence '\\x1' at 1:26", + ), ( + 'invalid_hex_sequence_middle_multiline', + "var foo = `foobar${lolwat}\r\nfailure${failure}wat\r\nwat\\x1`", + # backticks are converted to single quotes + "Invalid hexadecimal escape sequence '\\x1' at 3:4", + ), ( + 'long_invalid_template_truncated', + "var foo = `1234567890abcdetruncated", + "Unterminated template literal '`1234567890abcde...' at 1:11", + ) +] + def run_lexer(value, lexer_cls): lexer = lexer_cls() diff --git a/src/calmjs/parse/tests/test_es2015_lexer.py b/src/calmjs/parse/tests/test_es2015_lexer.py index 6fbea12..3085ec8 100644 --- a/src/calmjs/parse/tests/test_es2015_lexer.py +++ b/src/calmjs/parse/tests/test_es2015_lexer.py @@ -6,6 +6,7 @@ from calmjs.parse.exceptions import ECMASyntaxError from calmjs.parse.testing.util import build_equality_testcase +from calmjs.parse.testing.util import build_exception_testcase from calmjs.parse.tests.lexer import ( run_lexer, run_lexer_pos, @@ -14,6 +15,7 @@ es5_all_cases, es2015_cases, es2015_pos_cases, + es2015_error_cases_tmpl, ) @@ -24,7 +26,8 @@ def test_initial_template_character(self): lexer.input('`') with self.assertRaises(ECMASyntaxError) as e: [token for token in lexer] - self.assertEqual(str(e.exception), "Illegal character '`' at 1:1") + self.assertEqual( + str(e.exception), "Unterminated template literal '`' at 1:1") LexerKeywordTestCase = build_equality_testcase( @@ -60,3 +63,7 @@ def test_initial_template_character(self): LexerES2015PosTestCase = build_equality_testcase( 'LexerES2015PosTestCase', partial( run_lexer_pos, lexer_cls=Lexer), es2015_pos_cases) + +LexerES2015ErrorTemplateTestCase = build_exception_testcase( + 'LexerES2015ErrorTemplateTestCase', partial( + run_lexer, lexer_cls=Lexer), es2015_error_cases_tmpl, ECMASyntaxError) From 055b5d87ffbabf40126ab12c4e8a491fce79cb70 Mon Sep 17 00:00:00 2001 From: Tommy Yu Date: Sat, 27 Jul 2019 11:20:59 -0700 Subject: [PATCH 10/12] Initial cut of the ES2015 parser - This is basically identical to the ES5 parser at this point, with the references modify to reflect the ES2015 intent that this parser will handle. - Additional changes since the initial cut have been squashed into this commit to match as close to the current implementation and tests, including: - Fix to disambiguation of DIV vs. REGEX lexer tokens. --- src/calmjs/parse/asttypes.py | 4 + src/calmjs/parse/parsers/es2015.py | 1463 ++++++++++++++++++ src/calmjs/parse/parsers/optimize.py | 7 +- src/calmjs/parse/tests/test_es2015_parser.py | 75 + 4 files changed, 1547 insertions(+), 2 deletions(-) create mode 100644 src/calmjs/parse/parsers/es2015.py create mode 100644 src/calmjs/parse/tests/test_es2015_parser.py diff --git a/src/calmjs/parse/asttypes.py b/src/calmjs/parse/asttypes.py index efc606a..2c73e41 100644 --- a/src/calmjs/parse/asttypes.py +++ b/src/calmjs/parse/asttypes.py @@ -157,6 +157,10 @@ class ES5Program(Program): pass +class ES2015Program(Program): + pass + + class Block(Node): pass diff --git a/src/calmjs/parse/parsers/es2015.py b/src/calmjs/parse/parsers/es2015.py new file mode 100644 index 0000000..1d49e3b --- /dev/null +++ b/src/calmjs/parse/parsers/es2015.py @@ -0,0 +1,1463 @@ +# -*- coding: utf-8 -*- +""" +ES2015 (ECMAScript Edition 6/ES6) parser. +""" + +from functools import partial + +import ply.yacc + +from calmjs.parse.exceptions import ECMASyntaxError +from calmjs.parse.exceptions import ProductionError +from calmjs.parse.lexers.tokens import AutoLexToken +from calmjs.parse.lexers.es2015 import Lexer +from calmjs.parse.factory import AstTypesFactory +# XXX this is still the es5 version +from calmjs.parse.unparsers.es5 import pretty_print +from calmjs.parse.walkers import ReprWalker +from calmjs.parse.utils import generate_tab_names +from calmjs.parse.utils import format_lex_token +from calmjs.parse.utils import str +from calmjs.parse.io import read as io_read + +asttypes = AstTypesFactory(pretty_print, ReprWalker()) + +# The default values for the `Parser` constructor, passed on to ply; they must +# be strings +lextab, yacctab = generate_tab_names(__name__) + + +class Parser(object): + """ECMAScript 2015 parser (ECMA-262 6th edition grammar). + + The '*noin' variants are needed to avoid confusing the `in` operator in + a relational expression with the `in` operator in a `for` statement. + + '*nobf' stands for 'no brace or function' + + This is a stateful, low level parser. Please use the parse function + instead for general, higher level usage. + """ + + def __init__(self, lex_optimize=True, lextab=lextab, + yacc_optimize=True, yacctab=yacctab, yacc_debug=False, + yacc_tracking=True, with_comments=False, asttypes=asttypes): + # A warning: in order for line numbers and column numbers be + # tracked correctly, ``yacc_tracking`` MUST be turned ON. As + # this parser was initially implemented with a number of manual + # tracking features that was also added to the lexer, + # construction of the Node subclasses may require the calling of + # the `setpos` method with an index to the YaccProduction slice + # index that contain the tracked token. The indexes were + # generally determined with yacc_tracking OFF, through the + # manual tracking that got added, before turning it back ON for + # standard usage. + + self.lex_optimize = lex_optimize + self.lextab = lextab + self.yacc_optimize = yacc_optimize + self.yacctab = yacctab + self.yacc_debug = yacc_debug + self.yacc_tracking = yacc_tracking + + self.lexer = Lexer(with_comments=with_comments) + self.lexer.build(optimize=lex_optimize, lextab=lextab) + self.tokens = self.lexer.tokens + + self.parser = ply.yacc.yacc( + module=self, optimize=yacc_optimize, + debug=yacc_debug, tabmodule=yacctab, start='program') + + self.asttypes = asttypes + + def _raise_syntax_error(self, token): + tokens = [format_lex_token(t) for t in [ + self.lexer.valid_prev_token, + None if isinstance(token, AutoLexToken) else token, + self.lexer.token() + ] if t is not None] + msg = ( + 'Unexpected end of input', + 'Unexpected end of input after {0}', + 'Unexpected {1} after {0}', + 'Unexpected {1} between {0} and {2}', + ) + raise ECMASyntaxError(msg[len(tokens)].format(*tokens)) + + def parse(self, text, debug=False): + if not isinstance(text, str): + raise TypeError("'%s' argument expected, got '%s'" % ( + str.__name__, type(text).__name__)) + + try: + return self.parser.parse( + text, lexer=self.lexer, debug=debug, + tracking=self.yacc_tracking) + except ProductionError as e: + raise e.args[0] + + def p_empty(self, p): + """empty :""" + + def p_error(self, token): + next_token = self.lexer.auto_semi(token) + if next_token is not None: + self.parser.errok() + return next_token + # try to use the token in the actual lexer over the token that + # got passed in. + cur_token = self.lexer.cur_token or token + if (cur_token.type == 'DIV' and self.lexer.valid_prev_token.type in ( + 'RBRACE', 'PLUSPLUS', 'MINUSMINUS')): + # this is the most pathological case in JavaScript; given + # the usage of the LRParser there is no way to use the rules + # below to signal the specific "safe" cases, so we have to + # wait until such an error to occur for specific tokens and + # attempt to backtrack here + regex_token = self.lexer.backtracked_token(pos=1) + if regex_token.type == 'REGEX': + self.parser.errok() + return regex_token + self._raise_syntax_error(token) + + # Main rules + + def p_program(self, p): + """program : source_elements""" + p[0] = self.asttypes.ES2015Program(p[1]) + p[0].setpos(p) # require yacc_tracking + + def p_source_elements(self, p): + """source_elements : empty + | source_element_list + """ + p[0] = p[1] + + def p_source_element_list(self, p): + """source_element_list : source_element + | source_element_list source_element + """ + if len(p) == 2: # single source element + p[0] = [p[1]] + else: + p[1].append(p[2]) + p[0] = p[1] + + def p_source_element(self, p): + """source_element : statement + | function_declaration + """ + p[0] = p[1] + + def p_statement(self, p): + """statement : block + | variable_statement + | empty_statement + | expr_statement + | if_statement + | iteration_statement + | continue_statement + | break_statement + | return_statement + | with_statement + | switch_statement + | labelled_statement + | throw_statement + | try_statement + | debugger_statement + | function_declaration + """ + p[0] = p[1] + + # By having source_elements in the production we support + # also function_declaration inside blocks + def p_block(self, p): + """block : LBRACE source_elements RBRACE""" + p[0] = self.asttypes.Block(p[2]) + p[0].setpos(p) + + def p_literal(self, p): + """literal : null_literal + | boolean_literal + | numeric_literal + | string_literal + | regex_literal + """ + p[0] = p[1] + + def p_boolean_literal(self, p): + """boolean_literal : TRUE + | FALSE + """ + p[0] = self.asttypes.Boolean(p[1]) + p[0].setpos(p) + + def p_null_literal(self, p): + """null_literal : NULL""" + p[0] = self.asttypes.Null(p[1]) + p[0].setpos(p) + + def p_numeric_literal(self, p): + """numeric_literal : NUMBER""" + p[0] = self.asttypes.Number(p[1]) + p[0].setpos(p) + + def p_string_literal(self, p): + """string_literal : STRING""" + p[0] = self.asttypes.String(p[1]) + p[0].setpos(p) + + def p_regex_literal(self, p): + """regex_literal : REGEX""" + p[0] = self.asttypes.Regex(p[1]) + p[0].setpos(p) + + def p_identifier(self, p): + """identifier : ID""" + p[0] = self.asttypes.Identifier(p[1]) + p[0].setpos(p) + + # Because reserved words can be used as identifiers under certain + # conditions... + def p_reserved_word(self, p): + """reserved_word : BREAK + | CASE + | CATCH + | CONTINUE + | DEBUGGER + | DEFAULT + | DELETE + | DO + | ELSE + | FINALLY + | FOR + | FUNCTION + | IF + | IN + | INSTANCEOF + | LET + | NEW + | RETURN + | STATIC + | SWITCH + | THIS + | THROW + | TRY + | TYPEOF + | VAR + | VOID + | WHILE + | WITH + | YIELD + | NULL + | TRUE + | FALSE + | CLASS + | CONST + | ENUM + | EXPORT + | EXTENDS + | IMPORT + | SUPER + """ + p[0] = self.asttypes.Identifier(p[1]) + p[0].setpos(p) + + def p_identifier_name(self, p): + """identifier_name : identifier + | reserved_word + """ + p[0] = p[1] + + ########################################### + # Expressions + ########################################### + def p_primary_expr(self, p): + """primary_expr : primary_expr_no_brace + | object_literal + """ + p[0] = p[1] + + def p_primary_expr_no_brace_1(self, p): + """primary_expr_no_brace : identifier""" + p[0] = p[1] + + def p_primary_expr_no_brace_2(self, p): + """primary_expr_no_brace : THIS""" + p[0] = self.asttypes.This() + p[0].setpos(p) + + def p_primary_expr_no_brace_3(self, p): + """primary_expr_no_brace : literal + | array_literal + """ + p[0] = p[1] + + def p_primary_expr_no_brace_4(self, p): + """primary_expr_no_brace : LPAREN expr RPAREN""" + if isinstance(p[2], self.asttypes.GroupingOp): + # this reduces the grouping operator to one. + p[0] = p[2] + else: + p[0] = self.asttypes.GroupingOp(expr=p[2]) + p[0].setpos(p) + + def p_array_literal_1(self, p): + """array_literal : LBRACKET elision_opt RBRACKET""" + p[0] = self.asttypes.Array(items=p[2]) + p[0].setpos(p) + + def p_array_literal_2(self, p): + """array_literal : LBRACKET element_list RBRACKET + | LBRACKET element_list COMMA elision_opt RBRACKET + """ + items = p[2] + if len(p) == 6: + items.extend(p[4]) + p[0] = self.asttypes.Array(items=items) + p[0].setpos(p) + + def p_element_list(self, p): + """element_list : elision_opt assignment_expr + | element_list COMMA elision_opt assignment_expr + """ + if len(p) == 3: + p[0] = p[1] + [p[2]] + else: + p[1].extend(p[3]) + p[1].append(p[4]) + p[0] = p[1] + + def p_elision_opt_1(self, p): + """elision_opt : empty""" + p[0] = [] + + def p_elision_opt_2(self, p): + """elision_opt : elision""" + p[0] = p[1] + + def p_elision(self, p): + """elision : COMMA + | elision COMMA + """ + if len(p) == 2: + p[0] = [self.asttypes.Elision(1)] + p[0][0].setpos(p) + else: + # increment the Elision value. + p[1][-1].value += 1 + p[0] = p[1] + # TODO there should be a cleaner API for the lexer and their + # token types for ensuring that the mappings are available. + p[0][0]._token_map = {(',' * p[0][0].value): [ + p[0][0].findpos(p, 0)]} + return + + # 12.2.6 object initializer + def p_object_literal(self, p): + """object_literal : LBRACE RBRACE + | LBRACE property_list RBRACE + | LBRACE property_list COMMA RBRACE + """ + if len(p) == 3: + p[0] = self.asttypes.Object() + else: + p[0] = self.asttypes.Object(properties=p[2]) + p[0].setpos(p) + + def p_property_list(self, p): + """property_list : property_assignment + | property_list COMMA property_assignment + """ + if len(p) == 2: + p[0] = [p[1]] + else: + p[1].append(p[3]) + p[0] = p[1] + + def p_property_assignment(self, p): + """property_assignment \ + : property_name COLON assignment_expr + | GETPROP property_name LPAREN RPAREN LBRACE function_body RBRACE + | SETPROP property_name LPAREN property_set_parameter_list RPAREN\ + LBRACE function_body RBRACE + """ + if len(p) == 4: + p[0] = self.asttypes.Assign(left=p[1], op=p[2], right=p[3]) + p[0].setpos(p, 2) + elif len(p) == 8: + p[0] = self.asttypes.GetPropAssign(prop_name=p[2], elements=p[6]) + p[0].setpos(p) + else: + p[0] = self.asttypes.SetPropAssign( + prop_name=p[2], parameter=p[4], elements=p[7]) + p[0].setpos(p) + + # For the evaluation of Object Initialisere as described in 12.2.6 + # and property accessors as described in 12.3.2, the IdentifierName + # is evaluated to a String value, thus they are not to be treated as + # standard Identifier types. In this case, it can be marked as a + # PropIdentifier to identifiy this specific case. + def p_identifier_name_string(self, p): + """identifier_name_string : identifier_name + """ + p[0] = asttypes.PropIdentifier(p[1].value) + # manually clone the position attributes. + for k in ('_token_map', 'lexpos', 'lineno', 'colno'): + setattr(p[0], k, getattr(p[1], k)) + + # identifier_name_string ~= identifier_name + def p_property_name(self, p): + """property_name : identifier_name_string + | string_literal + | numeric_literal + """ + p[0] = p[1] + + def p_property_set_parameter_list(self, p): + """property_set_parameter_list : identifier + """ + p[0] = p[1] + + # 12.3 Left-Hand-Side Expressions + + # identifier_name_string ~= identifier_name, as specified in section + # 12.3.2; same for the further cases. + def p_member_expr(self, p): + """member_expr : primary_expr + | function_expr + | member_expr LBRACKET expr RBRACKET + | member_expr PERIOD identifier_name_string + | NEW member_expr arguments + """ + if len(p) == 2: + p[0] = p[1] + return + + if p[1] == 'new': + p[0] = self.asttypes.NewExpr(p[2], p[3]) + p[0].setpos(p) + elif p[2] == '.': + p[0] = self.asttypes.DotAccessor(p[1], p[3]) + p[0].setpos(p, 2) + else: + p[0] = self.asttypes.BracketAccessor(p[1], p[3]) + p[0].setpos(p, 2) + + def p_member_expr_nobf(self, p): + """member_expr_nobf : primary_expr_no_brace + | function_expr + | member_expr_nobf LBRACKET expr RBRACKET + | member_expr_nobf PERIOD identifier_name_string + | NEW member_expr arguments + """ + if len(p) == 2: + p[0] = p[1] + return + + if p[1] == 'new': + p[0] = self.asttypes.NewExpr(p[2], p[3]) + p[0].setpos(p, 1) + elif p[2] == '.': + p[0] = self.asttypes.DotAccessor(p[1], p[3]) + p[0].setpos(p, 2) + else: + p[0] = self.asttypes.BracketAccessor(p[1], p[3]) + p[0].setpos(p, 2) + + def p_new_expr(self, p): + """new_expr : member_expr + | NEW new_expr + """ + if len(p) == 2: + p[0] = p[1] + else: + p[0] = self.asttypes.NewExpr(p[2]) + p[0].setpos(p) + + def p_new_expr_nobf(self, p): + """new_expr_nobf : member_expr_nobf + | NEW new_expr + """ + if len(p) == 2: + p[0] = p[1] + else: + p[0] = self.asttypes.NewExpr(p[2]) + p[0].setpos(p) + + def p_call_expr(self, p): + """call_expr : member_expr arguments + | call_expr arguments + | call_expr LBRACKET expr RBRACKET + | call_expr PERIOD identifier_name_string + """ + if len(p) == 3: + p[0] = self.asttypes.FunctionCall(p[1], p[2]) + p[0].setpos(p) # require yacc_tracking + elif len(p) == 4: + p[0] = self.asttypes.DotAccessor(p[1], p[3]) + p[0].setpos(p, 2) + else: + p[0] = self.asttypes.BracketAccessor(p[1], p[3]) + p[0].setpos(p, 2) + + def p_call_expr_nobf(self, p): + """call_expr_nobf : member_expr_nobf arguments + | call_expr_nobf arguments + | call_expr_nobf LBRACKET expr RBRACKET + | call_expr_nobf PERIOD identifier_name_string + """ + if len(p) == 3: + p[0] = self.asttypes.FunctionCall(p[1], p[2]) + p[0].setpos(p) # require yacc_tracking + elif len(p) == 4: + p[0] = self.asttypes.DotAccessor(p[1], p[3]) + p[0].setpos(p, 2) + else: + p[0] = self.asttypes.BracketAccessor(p[1], p[3]) + p[0].setpos(p, 2) + + def p_arguments(self, p): + """arguments : LPAREN RPAREN + | LPAREN argument_list RPAREN + """ + if len(p) == 4: + p[0] = self.asttypes.Arguments(p[2]) + else: + p[0] = self.asttypes.Arguments([]) + p[0].setpos(p) + + def p_argument_list(self, p): + """argument_list : assignment_expr + | argument_list COMMA assignment_expr + """ + if len(p) == 2: + p[0] = [p[1]] + else: + p[1].append(p[3]) + p[0] = p[1] + + def p_lef_hand_side_expr(self, p): + """left_hand_side_expr : new_expr + | call_expr + """ + p[0] = p[1] + + def p_lef_hand_side_expr_nobf(self, p): + """left_hand_side_expr_nobf : new_expr_nobf + | call_expr_nobf + """ + p[0] = p[1] + + # 12.4 Postfix Expressions + def p_postfix_expr(self, p): + """postfix_expr : left_hand_side_expr + | left_hand_side_expr PLUSPLUS + | left_hand_side_expr MINUSMINUS + """ + if len(p) == 2: + p[0] = p[1] + else: + p[0] = self.asttypes.PostfixExpr(op=p[2], value=p[1]) + p[0].setpos(p, 2) + + def p_postfix_expr_nobf(self, p): + """postfix_expr_nobf : left_hand_side_expr_nobf + | left_hand_side_expr_nobf PLUSPLUS + | left_hand_side_expr_nobf MINUSMINUS + """ + if len(p) == 2: + p[0] = p[1] + else: + p[0] = self.asttypes.PostfixExpr(op=p[2], value=p[1]) + p[0].setpos(p, 2) + + # 12.5 Unary Operators + def p_unary_expr(self, p): + """unary_expr : postfix_expr + | unary_expr_common + """ + p[0] = p[1] + + def p_unary_expr_nobf(self, p): + """unary_expr_nobf : postfix_expr_nobf + | unary_expr_common + """ + p[0] = p[1] + + def p_unary_expr_common(self, p): + """unary_expr_common : DELETE unary_expr + | VOID unary_expr + | TYPEOF unary_expr + | PLUSPLUS unary_expr + | MINUSMINUS unary_expr + | PLUS unary_expr + | MINUS unary_expr + | BNOT unary_expr + | NOT unary_expr + """ + p[0] = self.asttypes.UnaryExpr(p[1], p[2]) + p[0].setpos(p) + + # 12.6 Multiplicative Operators + def p_multiplicative_expr(self, p): + """multiplicative_expr : unary_expr + | multiplicative_expr MULT unary_expr + | multiplicative_expr DIV unary_expr + | multiplicative_expr MOD unary_expr + """ + if len(p) == 2: + p[0] = p[1] + else: + p[0] = self.asttypes.BinOp(op=p[2], left=p[1], right=p[3]) + p[0].setpos(p, 2) + + def p_multiplicative_expr_nobf(self, p): + """multiplicative_expr_nobf : unary_expr_nobf + | multiplicative_expr_nobf MULT unary_expr + | multiplicative_expr_nobf DIV unary_expr + | multiplicative_expr_nobf MOD unary_expr + """ + if len(p) == 2: + p[0] = p[1] + else: + p[0] = self.asttypes.BinOp(op=p[2], left=p[1], right=p[3]) + p[0].setpos(p, 2) + + # 12.7 Additive Operators + def p_additive_expr(self, p): + """additive_expr : multiplicative_expr + | additive_expr PLUS multiplicative_expr + | additive_expr MINUS multiplicative_expr + """ + if len(p) == 2: + p[0] = p[1] + else: + p[0] = self.asttypes.BinOp(op=p[2], left=p[1], right=p[3]) + p[0].setpos(p, 2) + + def p_additive_expr_nobf(self, p): + """additive_expr_nobf : multiplicative_expr_nobf + | additive_expr_nobf PLUS multiplicative_expr + | additive_expr_nobf MINUS multiplicative_expr + """ + if len(p) == 2: + p[0] = p[1] + else: + p[0] = self.asttypes.BinOp(op=p[2], left=p[1], right=p[3]) + p[0].setpos(p, 2) + + # 12.8 Bitwise Shift Operators + def p_shift_expr(self, p): + """shift_expr : additive_expr + | shift_expr LSHIFT additive_expr + | shift_expr RSHIFT additive_expr + | shift_expr URSHIFT additive_expr + """ + if len(p) == 2: + p[0] = p[1] + else: + p[0] = self.asttypes.BinOp(op=p[2], left=p[1], right=p[3]) + p[0].setpos(p, 2) + + def p_shift_expr_nobf(self, p): + """shift_expr_nobf : additive_expr_nobf + | shift_expr_nobf LSHIFT additive_expr + | shift_expr_nobf RSHIFT additive_expr + | shift_expr_nobf URSHIFT additive_expr + """ + if len(p) == 2: + p[0] = p[1] + else: + p[0] = self.asttypes.BinOp(op=p[2], left=p[1], right=p[3]) + p[0].setpos(p, 2) + + # 12.9 Relational Operators + def p_relational_expr(self, p): + """relational_expr : shift_expr + | relational_expr LT shift_expr + | relational_expr GT shift_expr + | relational_expr LE shift_expr + | relational_expr GE shift_expr + | relational_expr INSTANCEOF shift_expr + | relational_expr IN shift_expr + """ + if len(p) == 2: + p[0] = p[1] + else: + p[0] = self.asttypes.BinOp(op=p[2], left=p[1], right=p[3]) + p[0].setpos(p, 2) + + def p_relational_expr_noin(self, p): + """relational_expr_noin : shift_expr + | relational_expr_noin LT shift_expr + | relational_expr_noin GT shift_expr + | relational_expr_noin LE shift_expr + | relational_expr_noin GE shift_expr + | relational_expr_noin INSTANCEOF shift_expr + """ + if len(p) == 2: + p[0] = p[1] + else: + p[0] = self.asttypes.BinOp(op=p[2], left=p[1], right=p[3]) + p[0].setpos(p, 2) + + def p_relational_expr_nobf(self, p): + """relational_expr_nobf : shift_expr_nobf + | relational_expr_nobf LT shift_expr + | relational_expr_nobf GT shift_expr + | relational_expr_nobf LE shift_expr + | relational_expr_nobf GE shift_expr + | relational_expr_nobf INSTANCEOF shift_expr + | relational_expr_nobf IN shift_expr + """ + if len(p) == 2: + p[0] = p[1] + else: + p[0] = self.asttypes.BinOp(op=p[2], left=p[1], right=p[3]) + p[0].setpos(p, 2) + + # 12.10 Equality Operators + def p_equality_expr(self, p): + """equality_expr : relational_expr + | equality_expr EQEQ relational_expr + | equality_expr NE relational_expr + | equality_expr STREQ relational_expr + | equality_expr STRNEQ relational_expr + """ + if len(p) == 2: + p[0] = p[1] + else: + p[0] = self.asttypes.BinOp(op=p[2], left=p[1], right=p[3]) + p[0].setpos(p, 2) + + def p_equality_expr_noin(self, p): + """equality_expr_noin : relational_expr_noin + | equality_expr_noin EQEQ relational_expr + | equality_expr_noin NE relational_expr + | equality_expr_noin STREQ relational_expr + | equality_expr_noin STRNEQ relational_expr + """ + if len(p) == 2: + p[0] = p[1] + else: + p[0] = self.asttypes.BinOp(op=p[2], left=p[1], right=p[3]) + p[0].setpos(p, 2) + + def p_equality_expr_nobf(self, p): + """equality_expr_nobf : relational_expr_nobf + | equality_expr_nobf EQEQ relational_expr + | equality_expr_nobf NE relational_expr + | equality_expr_nobf STREQ relational_expr + | equality_expr_nobf STRNEQ relational_expr + """ + if len(p) == 2: + p[0] = p[1] + else: + p[0] = self.asttypes.BinOp(op=p[2], left=p[1], right=p[3]) + p[0].setpos(p, 2) + + # 12.11 Binary Bitwise Operators + def p_bitwise_and_expr(self, p): + """bitwise_and_expr : equality_expr + | bitwise_and_expr BAND equality_expr + """ + if len(p) == 2: + p[0] = p[1] + else: + p[0] = self.asttypes.BinOp(op=p[2], left=p[1], right=p[3]) + p[0].setpos(p, 2) + + def p_bitwise_and_expr_noin(self, p): + """bitwise_and_expr_noin \ + : equality_expr_noin + | bitwise_and_expr_noin BAND equality_expr_noin + """ + if len(p) == 2: + p[0] = p[1] + else: + p[0] = self.asttypes.BinOp(op=p[2], left=p[1], right=p[3]) + p[0].setpos(p, 2) + + def p_bitwise_and_expr_nobf(self, p): + """bitwise_and_expr_nobf \ + : equality_expr_nobf + | bitwise_and_expr_nobf BAND equality_expr_nobf + """ + if len(p) == 2: + p[0] = p[1] + else: + p[0] = self.asttypes.BinOp(op=p[2], left=p[1], right=p[3]) + p[0].setpos(p, 2) + + def p_bitwise_xor_expr(self, p): + """bitwise_xor_expr : bitwise_and_expr + | bitwise_xor_expr BXOR bitwise_and_expr + """ + if len(p) == 2: + p[0] = p[1] + else: + p[0] = self.asttypes.BinOp(op=p[2], left=p[1], right=p[3]) + p[0].setpos(p, 2) + + def p_bitwise_xor_expr_noin(self, p): + """ + bitwise_xor_expr_noin \ + : bitwise_and_expr_noin + | bitwise_xor_expr_noin BXOR bitwise_and_expr_noin + """ + if len(p) == 2: + p[0] = p[1] + else: + p[0] = self.asttypes.BinOp(op=p[2], left=p[1], right=p[3]) + p[0].setpos(p, 2) + + def p_bitwise_xor_expr_nobf(self, p): + """ + bitwise_xor_expr_nobf \ + : bitwise_and_expr_nobf + | bitwise_xor_expr_nobf BXOR bitwise_and_expr_nobf + """ + if len(p) == 2: + p[0] = p[1] + else: + p[0] = self.asttypes.BinOp(op=p[2], left=p[1], right=p[3]) + p[0].setpos(p, 2) + + def p_bitwise_or_expr(self, p): + """bitwise_or_expr : bitwise_xor_expr + | bitwise_or_expr BOR bitwise_xor_expr + """ + if len(p) == 2: + p[0] = p[1] + else: + p[0] = self.asttypes.BinOp(op=p[2], left=p[1], right=p[3]) + p[0].setpos(p, 2) + + def p_bitwise_or_expr_noin(self, p): + """ + bitwise_or_expr_noin \ + : bitwise_xor_expr_noin + | bitwise_or_expr_noin BOR bitwise_xor_expr_noin + """ + if len(p) == 2: + p[0] = p[1] + else: + p[0] = self.asttypes.BinOp(op=p[2], left=p[1], right=p[3]) + p[0].setpos(p, 2) + + def p_bitwise_or_expr_nobf(self, p): + """ + bitwise_or_expr_nobf \ + : bitwise_xor_expr_nobf + | bitwise_or_expr_nobf BOR bitwise_xor_expr_nobf + """ + if len(p) == 2: + p[0] = p[1] + else: + p[0] = self.asttypes.BinOp(op=p[2], left=p[1], right=p[3]) + p[0].setpos(p, 2) + + # 12.12 Binary Logical Operators + def p_logical_and_expr(self, p): + """logical_and_expr : bitwise_or_expr + | logical_and_expr AND bitwise_or_expr + """ + if len(p) == 2: + p[0] = p[1] + else: + p[0] = self.asttypes.BinOp(op=p[2], left=p[1], right=p[3]) + p[0].setpos(p, 2) + + def p_logical_and_expr_noin(self, p): + """ + logical_and_expr_noin : bitwise_or_expr_noin + | logical_and_expr_noin AND bitwise_or_expr_noin + """ + if len(p) == 2: + p[0] = p[1] + else: + p[0] = self.asttypes.BinOp(op=p[2], left=p[1], right=p[3]) + p[0].setpos(p, 2) + + def p_logical_and_expr_nobf(self, p): + """ + logical_and_expr_nobf : bitwise_or_expr_nobf + | logical_and_expr_nobf AND bitwise_or_expr_nobf + """ + if len(p) == 2: + p[0] = p[1] + else: + p[0] = self.asttypes.BinOp(op=p[2], left=p[1], right=p[3]) + p[0].setpos(p, 2) + + def p_logical_or_expr(self, p): + """logical_or_expr : logical_and_expr + | logical_or_expr OR logical_and_expr + """ + if len(p) == 2: + p[0] = p[1] + else: + p[0] = self.asttypes.BinOp(op=p[2], left=p[1], right=p[3]) + p[0].setpos(p, 2) + + def p_logical_or_expr_noin(self, p): + """logical_or_expr_noin : logical_and_expr_noin + | logical_or_expr_noin OR logical_and_expr_noin + """ + if len(p) == 2: + p[0] = p[1] + else: + p[0] = self.asttypes.BinOp(op=p[2], left=p[1], right=p[3]) + p[0].setpos(p, 2) + + def p_logical_or_expr_nobf(self, p): + """logical_or_expr_nobf : logical_and_expr_nobf + | logical_or_expr_nobf OR logical_and_expr_nobf + """ + if len(p) == 2: + p[0] = p[1] + else: + p[0] = self.asttypes.BinOp(op=p[2], left=p[1], right=p[3]) + p[0].setpos(p, 2) + + # 12.13 Conditional Operator ( ? : ) + def p_conditional_expr(self, p): + """ + conditional_expr \ + : logical_or_expr + | logical_or_expr CONDOP assignment_expr COLON assignment_expr + """ + if len(p) == 2: + p[0] = p[1] + else: + p[0] = self.asttypes.Conditional( + predicate=p[1], consequent=p[3], alternative=p[5]) + p[0].setpos(p, 2) + + def p_conditional_expr_noin(self, p): + """ + conditional_expr_noin \ + : logical_or_expr_noin + | logical_or_expr_noin CONDOP assignment_expr_noin COLON \ + assignment_expr_noin + """ + if len(p) == 2: + p[0] = p[1] + else: + p[0] = self.asttypes.Conditional( + predicate=p[1], consequent=p[3], alternative=p[5]) + p[0].setpos(p, 2) + + def p_conditional_expr_nobf(self, p): + """ + conditional_expr_nobf \ + : logical_or_expr_nobf + | logical_or_expr_nobf CONDOP assignment_expr COLON assignment_expr + """ + if len(p) == 2: + p[0] = p[1] + else: + p[0] = self.asttypes.Conditional( + predicate=p[1], consequent=p[3], alternative=p[5]) + p[0].setpos(p, 2) + + # 12.14 Assignment Operators + def p_assignment_expr(self, p): + """ + assignment_expr \ + : conditional_expr + | left_hand_side_expr assignment_operator assignment_expr + """ + if len(p) == 2: + p[0] = p[1] + else: + p[0] = self.asttypes.Assign(left=p[1], op=p[2], right=p[3]) + p[0].setpos(p, 2) # require yacc_tracking + + def p_assignment_expr_noin(self, p): + """ + assignment_expr_noin \ + : conditional_expr_noin + | left_hand_side_expr assignment_operator assignment_expr_noin + """ + if len(p) == 2: + p[0] = p[1] + else: + p[0] = self.asttypes.Assign(left=p[1], op=p[2], right=p[3]) + p[0].setpos(p, 2) # require yacc_tracking + + def p_assignment_expr_nobf(self, p): + """ + assignment_expr_nobf \ + : conditional_expr_nobf + | left_hand_side_expr_nobf assignment_operator assignment_expr + """ + if len(p) == 2: + p[0] = p[1] + else: + p[0] = self.asttypes.Assign(left=p[1], op=p[2], right=p[3]) + p[0].setpos(p, 2) # require yacc_tracking + + def p_assignment_operator(self, p): + """assignment_operator : EQ + | MULTEQUAL + | DIVEQUAL + | MODEQUAL + | PLUSEQUAL + | MINUSEQUAL + | LSHIFTEQUAL + | RSHIFTEQUAL + | URSHIFTEQUAL + | ANDEQUAL + | XOREQUAL + | OREQUAL + """ + p[0] = p[1] + + # 12.15 Comma Operator + def p_expr(self, p): + """expr : assignment_expr + | expr COMMA assignment_expr + """ + if len(p) == 2: + p[0] = p[1] + else: + p[0] = self.asttypes.Comma(left=p[1], right=p[3]) + p[0].setpos(p, 2) + + def p_expr_noin(self, p): + """expr_noin : assignment_expr_noin + | expr_noin COMMA assignment_expr_noin + """ + if len(p) == 2: + p[0] = p[1] + else: + p[0] = self.asttypes.Comma(left=p[1], right=p[3]) + p[0].setpos(p, 2) + + def p_expr_nobf(self, p): + """expr_nobf : assignment_expr_nobf + | expr_nobf COMMA assignment_expr + """ + if len(p) == 2: + p[0] = p[1] + else: + p[0] = self.asttypes.Comma(left=p[1], right=p[3]) + p[0].setpos(p, 2) + + # 13.3.2 Variable Statement + def p_variable_statement(self, p): + """variable_statement : VAR variable_declaration_list SEMI + | VAR variable_declaration_list AUTOSEMI + """ + p[0] = self.asttypes.VarStatement(p[2]) + p[0].setpos(p) + + def p_variable_declaration_list(self, p): + """ + variable_declaration_list \ + : variable_declaration + | variable_declaration_list COMMA variable_declaration + """ + if len(p) == 2: + p[0] = [p[1]] + else: + p[1].append(p[3]) + p[0] = p[1] + + def p_variable_declaration_list_noin(self, p): + """ + variable_declaration_list_noin \ + : variable_declaration_noin + | variable_declaration_list_noin COMMA variable_declaration_noin + """ + if len(p) == 2: + p[0] = [p[1]] + else: + p[1].append(p[3]) + p[0] = p[1] + + def p_variable_declaration(self, p): + """variable_declaration : identifier + | identifier initializer + """ + if len(p) == 2: + p[0] = self.asttypes.VarDecl(p[1]) + p[0].setpos(p) # require yacc_tracking + else: + p[0] = self.asttypes.VarDecl(p[1], p[2]) + p[0].setpos(p, additional=(('=', 2),)) # require yacc_tracking + + def p_variable_declaration_noin(self, p): + """variable_declaration_noin : identifier + | identifier initializer_noin + """ + if len(p) == 2: + p[0] = self.asttypes.VarDecl(p[1]) + p[0].setpos(p) # require yacc_tracking + else: + p[0] = self.asttypes.VarDecl(p[1], p[2]) + p[0].setpos(p, additional=(('=', 2),)) # require yacc_tracking + + def p_initializer(self, p): + """initializer : EQ assignment_expr""" + p[0] = p[2] + + def p_initializer_noin(self, p): + """initializer_noin : EQ assignment_expr_noin""" + p[0] = p[2] + + # 13.4 Empty Statement + def p_empty_statement(self, p): + """empty_statement : SEMI""" + p[0] = self.asttypes.EmptyStatement(p[1]) + p[0].setpos(p) + + # 13.5 Expression Statement + def p_expr_statement(self, p): + """expr_statement : expr_nobf SEMI + | expr_nobf AUTOSEMI + """ + # In 13.5, expression statements cannot start with either the + # 'function' keyword or '{'. However, the lexing and production + # of the FuncExpr nodes can be done through further rules have + # been done, so flag this as an exception, but must be raised + # like so due to avoid the SyntaxError being flagged by ply and + # which would result in an infinite loop in this case. + + if isinstance(p[1], self.asttypes.FuncExpr): + _, line, col = p[1].getpos('(', 0) + raise ProductionError(ECMASyntaxError( + 'Function statement requires a name at %s:%s' % (line, col))) + + # The most bare 'block' rule is defined as part of 'statement' + # and there are no other bare rules that would result in the + # production of such like for 'function_expr'. + + p[0] = self.asttypes.ExprStatement(p[1]) + p[0].setpos(p) # require yacc_tracking + + # 13.6 The if Statement + def p_if_statement_1(self, p): + """if_statement : IF LPAREN expr RPAREN statement""" + p[0] = self.asttypes.If(predicate=p[3], consequent=p[5]) + p[0].setpos(p) + + def p_if_statement_2(self, p): + """if_statement : IF LPAREN expr RPAREN statement ELSE statement""" + p[0] = self.asttypes.If( + predicate=p[3], consequent=p[5], alternative=p[7]) + p[0].setpos(p) + + # 13.7 Iteration Statements + def p_iteration_statement_1(self, p): + """ + iteration_statement \ + : DO statement WHILE LPAREN expr RPAREN SEMI + | DO statement WHILE LPAREN expr RPAREN AUTOSEMI + """ + p[0] = self.asttypes.DoWhile(predicate=p[5], statement=p[2]) + p[0].setpos(p) + + def p_iteration_statement_2(self, p): + """iteration_statement : WHILE LPAREN expr RPAREN statement""" + p[0] = self.asttypes.While(predicate=p[3], statement=p[5]) + p[0].setpos(p) + + def p_iteration_statement_3(self, p): + """ + iteration_statement \ + : FOR LPAREN expr_noin_opt SEMI expr_opt SEMI expr_opt RPAREN \ + statement + | FOR LPAREN VAR variable_declaration_list_noin SEMI expr_opt SEMI\ + expr_opt RPAREN statement + """ + def wrap(node, key): + if node is None: + # work around bug with yacc tracking of empty elements + # by using the previous token, and increment the + # positions + node = self.asttypes.EmptyStatement(';') + node.setpos(p, key - 1) + node.lexpos += 1 + node.colno += 1 + else: + node = self.asttypes.ExprStatement(expr=node) + node.setpos(p, key) + return node + + if len(p) == 10: + p[0] = self.asttypes.For( + init=wrap(p[3], 3), cond=wrap(p[5], 5), + count=p[7], statement=p[9]) + else: + init = self.asttypes.VarStatement(p[4]) + init.setpos(p, 3) + p[0] = self.asttypes.For( + init=init, cond=wrap(p[6], 6), count=p[8], statement=p[10]) + p[0].setpos(p) + + def p_iteration_statement_4(self, p): + """ + iteration_statement \ + : FOR LPAREN left_hand_side_expr IN expr RPAREN statement + """ + p[0] = self.asttypes.ForIn(item=p[3], iterable=p[5], statement=p[7]) + p[0].setpos(p) + + def p_iteration_statement_5(self, p): + """ + iteration_statement : \ + FOR LPAREN VAR identifier IN expr RPAREN statement + """ + vardecl = self.asttypes.VarDeclNoIn(identifier=p[4]) + vardecl.setpos(p, 3) + p[0] = self.asttypes.ForIn(item=vardecl, iterable=p[6], statement=p[8]) + p[0].setpos(p) + + def p_iteration_statement_6(self, p): + """ + iteration_statement \ + : FOR LPAREN VAR identifier initializer_noin IN expr RPAREN statement + """ + vardecl = self.asttypes.VarDeclNoIn( + identifier=p[4], initializer=p[5]) + vardecl.setpos(p, 3) + p[0] = self.asttypes.ForIn(item=vardecl, iterable=p[7], statement=p[9]) + p[0].setpos(p) + + def p_expr_opt(self, p): + """expr_opt : empty + | expr + """ + p[0] = p[1] + + def p_expr_noin_opt(self, p): + """expr_noin_opt : empty + | expr_noin + """ + p[0] = p[1] + + # 13.8 The continue Statement + def p_continue_statement_1(self, p): + """continue_statement : CONTINUE SEMI + | CONTINUE AUTOSEMI + """ + p[0] = self.asttypes.Continue() + p[0].setpos(p) + + def p_continue_statement_2(self, p): + """continue_statement : CONTINUE identifier SEMI + | CONTINUE identifier AUTOSEMI + """ + p[0] = self.asttypes.Continue(p[2]) + p[0].setpos(p) + + # 13.9 The break Statement + def p_break_statement_1(self, p): + """break_statement : BREAK SEMI + | BREAK AUTOSEMI + """ + p[0] = self.asttypes.Break() + p[0].setpos(p) + + def p_break_statement_2(self, p): + """break_statement : BREAK identifier SEMI + | BREAK identifier AUTOSEMI + """ + p[0] = self.asttypes.Break(p[2]) + p[0].setpos(p) + + # 13.10 The return Statement + def p_return_statement_1(self, p): + """return_statement : RETURN SEMI + | RETURN AUTOSEMI + """ + p[0] = self.asttypes.Return() + p[0].setpos(p) + + def p_return_statement_2(self, p): + """return_statement : RETURN expr SEMI + | RETURN expr AUTOSEMI + """ + p[0] = self.asttypes.Return(expr=p[2]) + p[0].setpos(p) + + # 13.11 The with Statement + def p_with_statement(self, p): + """with_statement : WITH LPAREN expr RPAREN statement""" + p[0] = self.asttypes.With(expr=p[3], statement=p[5]) + p[0].setpos(p) + + # 13.12 The switch Statement + def p_switch_statement(self, p): + """switch_statement : SWITCH LPAREN expr RPAREN case_block""" + # this uses a completely different type that corrects a + # subtly wrong interpretation of this construct. + # see: https://github.com/rspivak/slimit/issues/94 + p[0] = self.asttypes.Switch(expr=p[3], case_block=p[5]) + p[0].setpos(p) + return + + def p_case_block(self, p): + """ + case_block \ + : LBRACE case_clauses_opt RBRACE + | LBRACE case_clauses_opt default_clause case_clauses_opt RBRACE + """ + statements = [] + for s in p[2:-1]: + if isinstance(s, list): + for i in s: + statements.append(i) + elif isinstance(s, self.asttypes.Default): + statements.append(s) + p[0] = self.asttypes.CaseBlock(statements) + p[0].setpos(p) + + def p_case_clauses_opt(self, p): + """case_clauses_opt : empty + | case_clauses + """ + p[0] = p[1] + + def p_case_clauses(self, p): + """case_clauses : case_clause + | case_clauses case_clause + """ + if len(p) == 2: + p[0] = [p[1]] + else: + p[1].append(p[2]) + p[0] = p[1] + + def p_case_clause(self, p): + """case_clause : CASE expr COLON source_elements""" + p[0] = self.asttypes.Case(expr=p[2], elements=p[4]) + p[0].setpos(p) + + def p_default_clause(self, p): + """default_clause : DEFAULT COLON source_elements""" + p[0] = self.asttypes.Default(elements=p[3]) + p[0].setpos(p) + + # 13.13 Labelled Statements + def p_labelled_statement(self, p): + """labelled_statement : identifier COLON statement""" + p[0] = self.asttypes.Label(identifier=p[1], statement=p[3]) + p[0].setpos(p, 2) + + # 13.14 The throw Statement + def p_throw_statement(self, p): + """throw_statement : THROW expr SEMI + | THROW expr AUTOSEMI + """ + p[0] = self.asttypes.Throw(expr=p[2]) + p[0].setpos(p) + + # 13.15 The try Statement + def p_try_statement_1(self, p): + """try_statement : TRY block catch""" + p[0] = self.asttypes.Try(statements=p[2], catch=p[3]) + p[0].setpos(p) + + def p_try_statement_2(self, p): + """try_statement : TRY block finally""" + p[0] = self.asttypes.Try(statements=p[2], fin=p[3]) + p[0].setpos(p) + + def p_try_statement_3(self, p): + """try_statement : TRY block catch finally""" + p[0] = self.asttypes.Try(statements=p[2], catch=p[3], fin=p[4]) + p[0].setpos(p) + + def p_catch(self, p): + """catch : CATCH LPAREN identifier RPAREN block""" + p[0] = self.asttypes.Catch(identifier=p[3], elements=p[5]) + p[0].setpos(p) + + def p_finally(self, p): + """finally : FINALLY block""" + p[0] = self.asttypes.Finally(elements=p[2]) + p[0].setpos(p) + + # 13.16 The debugger statement + def p_debugger_statement(self, p): + """debugger_statement : DEBUGGER SEMI + | DEBUGGER AUTOSEMI + """ + p[0] = self.asttypes.Debugger(p[1]) + p[0].setpos(p) + + # 13 Function Definition + def p_function_declaration(self, p): + """ + function_declaration \ + : FUNCTION identifier LPAREN RPAREN LBRACE function_body RBRACE + | FUNCTION identifier LPAREN formal_parameter_list RPAREN LBRACE \ + function_body RBRACE + """ + if len(p) == 8: + p[0] = self.asttypes.FuncDecl( + identifier=p[2], parameters=None, elements=p[6]) + else: + p[0] = self.asttypes.FuncDecl( + identifier=p[2], parameters=p[4], elements=p[7]) + p[0].setpos(p) + + def p_function_expr_1(self, p): + """ + function_expr \ + : FUNCTION LPAREN RPAREN LBRACE function_body RBRACE + | FUNCTION LPAREN formal_parameter_list RPAREN \ + LBRACE function_body RBRACE + """ + if len(p) == 7: + p[0] = self.asttypes.FuncExpr( + identifier=None, parameters=None, elements=p[5]) + else: + p[0] = self.asttypes.FuncExpr( + identifier=None, parameters=p[3], elements=p[6]) + p[0].setpos(p) + + def p_function_expr_2(self, p): + """ + function_expr \ + : FUNCTION identifier LPAREN RPAREN LBRACE function_body RBRACE + | FUNCTION identifier LPAREN formal_parameter_list RPAREN \ + LBRACE function_body RBRACE + """ + if len(p) == 8: + p[0] = self.asttypes.FuncExpr( + identifier=p[2], parameters=None, elements=p[6]) + else: + p[0] = self.asttypes.FuncExpr( + identifier=p[2], parameters=p[4], elements=p[7]) + p[0].setpos(p) + + def p_formal_parameter_list(self, p): + """formal_parameter_list : identifier + | formal_parameter_list COMMA identifier + """ + if len(p) == 2: + p[0] = [p[1]] + else: + p[1].append(p[3]) + p[0] = p[1] + + def p_function_body(self, p): + """function_body : source_elements""" + p[0] = p[1] + + +def parse(source, with_comments=False): + """ + Return an AST from the input ES2015 source. + """ + + parser = Parser(with_comments=with_comments) + return parser.parse(source) + + +read = partial(io_read, parse) diff --git a/src/calmjs/parse/parsers/optimize.py b/src/calmjs/parse/parsers/optimize.py index af01282..76bc92e 100644 --- a/src/calmjs/parse/parsers/optimize.py +++ b/src/calmjs/parse/parsers/optimize.py @@ -16,7 +16,10 @@ from importlib import import_module # have to do this for every parser modules -from calmjs.parse.parsers import es5 +from calmjs.parse.parsers import ( + es5, + es2015, +) def purge_tabs(module): @@ -67,7 +70,7 @@ def reoptimize(module): def reoptimize_all(monkey_patch=False): if monkey_patch: lex.open = partial(codecs.open, encoding='utf8') - modules = (es5,) + modules = (es5, es2015,) for module in modules: reoptimize(module) diff --git a/src/calmjs/parse/tests/test_es2015_parser.py b/src/calmjs/parse/tests/test_es2015_parser.py new file mode 100644 index 0000000..3e51a0a --- /dev/null +++ b/src/calmjs/parse/tests/test_es2015_parser.py @@ -0,0 +1,75 @@ +# -*- coding: utf-8 -*- +from __future__ import unicode_literals + +# import textwrap +import unittest +from io import StringIO + +from calmjs.parse import asttypes +# from calmjs.parse.parsers.es2015 import Parser +from calmjs.parse.parsers.es2015 import parse +from calmjs.parse.parsers.es2015 import read +# from calmjs.parse.unparsers.es2015 import pretty_print +# from calmjs.parse.walkers import walk + +from calmjs.parse.tests.parser import ( + ParserCaseMixin, + build_node_repr_test_cases, + # build_asi_test_cases, + build_syntax_error_test_cases, + build_regex_syntax_error_test_cases, + build_comments_test_cases, +) + + +class ParserTestCase(unittest.TestCase, ParserCaseMixin): + + parse = staticmethod(parse) + + # XXX Commented until pretty_print is implemented + # def test_modify_tree(self): + # text = """ + # for (var i = 0; i < 10; i++) { + # var x = 5 + i; + # } + # """ + # parser = Parser() + # tree = parser.parse(text) + # for node in walk(tree): + # if isinstance(node, asttypes.Identifier) and node.value == 'i': + # node.value = 'hello' + # self.assertMultiLineEqual( + # str(tree), + # textwrap.dedent(""" + # for (var hello = 0; hello < 10; hello++) { + # var x = 5 + hello; + # } + # """).lstrip() + # ) + + def test_read(self): + stream = StringIO('var foo = "bar";') + node = read(stream) + self.assertTrue(isinstance(node, asttypes.ES2015Program)) + self.assertIsNone(node.sourcepath) + + stream.name = 'somefile.js' + node = read(stream) + self.assertEqual(node.sourcepath, 'somefile.js') + + +ParsedNodeTypeTestCase = build_node_repr_test_cases( + 'ParsedNodeTypeTestCase', parse, 'ES2015Program') + +# ASI - Automatic Semicolon Insertion +# ParserToECMAASITestCase = build_asi_test_cases( +# 'ParserToECMAASITestCase', parse, pretty_print) + +ECMASyntaxErrorsTestCase = build_syntax_error_test_cases( + 'ECMASyntaxErrorsTestCase', parse) + +ECMARegexSyntaxErrorsTestCase = build_regex_syntax_error_test_cases( + 'ECMARegexSyntaxErrorsTestCase', parse) + +ParsedNodeTypesWithCommentsTestCase = build_comments_test_cases( + 'ParsedNodeTypeWithCommentsTestCase', parse, 'ES2015Program') From eba7ddf1a40507e0b7a231dadda577e480c0cb90 Mon Sep 17 00:00:00 2001 From: Tommy Yu Date: Sun, 28 Jul 2019 11:52:00 -0700 Subject: [PATCH 11/12] Initial implementation of TemplateLiteral rules - The parser can now parse template literals from tokens produced by the lexer and produce the ast with the new asttypes for templates. - Initial set of test cases included, however there are certain limitations which have been found that needed correction in the lexer itself. --- src/calmjs/parse/asttypes.py | 37 ++++ src/calmjs/parse/parsers/es2015.py | 58 +++++- src/calmjs/parse/tests/parser.py | 185 +++++++++++++++++++ src/calmjs/parse/tests/test_es2015_parser.py | 8 + 4 files changed, 287 insertions(+), 1 deletion(-) diff --git a/src/calmjs/parse/asttypes.py b/src/calmjs/parse/asttypes.py index 2c73e41..5332fc2 100644 --- a/src/calmjs/parse/asttypes.py +++ b/src/calmjs/parse/asttypes.py @@ -200,6 +200,43 @@ def __init__(self, value): self.value = value +class Template(Node): + """ + All template subclasses. + """ + + +class TemplateLiteral(Template): + """ + The top level template literal object + """ + + +class TemplateFragment(Template): + """ + All template fragments + """ + + def __init__(self, value): + self.value = value + + +class TemplateNoSub(TemplateFragment): + pass + + +class TemplateHead(TemplateFragment): + pass + + +class TemplateMiddle(TemplateFragment): + pass + + +class TemplateTail(TemplateFragment): + pass + + class Regex(Node): def __init__(self, value): self.value = value diff --git a/src/calmjs/parse/parsers/es2015.py b/src/calmjs/parse/parsers/es2015.py index 1d49e3b..7e42401 100644 --- a/src/calmjs/parse/parsers/es2015.py +++ b/src/calmjs/parse/parsers/es2015.py @@ -181,7 +181,6 @@ def p_literal(self, p): | boolean_literal | numeric_literal | string_literal - | regex_literal """ p[0] = p[1] @@ -212,6 +211,61 @@ def p_regex_literal(self, p): p[0] = self.asttypes.Regex(p[1]) p[0].setpos(p) + # 11.8.6 Template + def p_template_nosub(self, p): + """template_nosub : TEMPLATE_NOSUB""" + # no_sub_template is called as such here for consistency + p[0] = self.asttypes.TemplateNoSub(p[1]) + p[0].setpos(p) + + def p_template_head(self, p): + """template_head : TEMPLATE_HEAD""" + p[0] = self.asttypes.TemplateHead(p[1]) + p[0].setpos(p) + + def p_template_middle(self, p): + """template_middle : TEMPLATE_MIDDLE""" + p[0] = self.asttypes.TemplateMiddle(p[1]) + p[0].setpos(p) + + def p_template_tail(self, p): + """template_tail : TEMPLATE_TAIL""" + p[0] = self.asttypes.TemplateTail(p[1]) + p[0].setpos(p) + + def p_template_literal(self, p): + """template_literal : template_nosub + | template_head expr template_spans + """ + literals = [p[1]] + if len(p) > 2: + # append the expression and extend with template spans + literals.append(p[2]) + literals.extend(p[3]) + p[0] = self.asttypes.TemplateLiteral(literals) + p[0].setpos(p) + + def p_template_spans(self, p): + """template_spans : template_tail + | template_middle_list template_tail + """ + if len(p) == 2: + p[0] = [p[1]] + else: + p[1].append(p[2]) + p[0] = p[1] + + def p_template_middle_list(self, p): + """template_middle_list : template_middle expr + | template_middle_list template_middle \ + expr + """ + if len(p) == 3: + p[0] = [p[1], p[2]] + else: + p[1].extend([p[2], p[3]]) + p[0] = p[1] + def p_identifier(self, p): """identifier : ID""" p[0] = self.asttypes.Identifier(p[1]) @@ -290,6 +344,8 @@ def p_primary_expr_no_brace_2(self, p): def p_primary_expr_no_brace_3(self, p): """primary_expr_no_brace : literal | array_literal + | regex_literal + | template_literal """ p[0] = p[1] diff --git a/src/calmjs/parse/tests/parser.py b/src/calmjs/parse/tests/parser.py index 41d773b..f776e60 100644 --- a/src/calmjs/parse/tests/parser.py +++ b/src/calmjs/parse/tests/parser.py @@ -2729,6 +2729,175 @@ def regenerate(value): )])) +def build_es2015_node_repr_test_cases(clsname, parse, program_type): + + def parse_to_repr(value): + return repr_walker.walk(parse(value), pos=True) + + return build_equality_testcase(clsname, parse_to_repr, (( + label, + textwrap.dedent(argument).strip(), + singleline(result).replace(', + initializer= + ]> + > + ]> + ]> + """, + ), ( + 'template_literal_sub', + """ + var t = `some_template${value}tail` + """, + """ + , + initializer=, + , + + ]> + > + ]> + ]> + """, + ), ( + 'template_literal_sub_once', + """ + var t = `some_template${value}middle${value}tail` + """, + """ + , + initializer=, + , + , + , + + ]> + > + ]> + ]> + """, + ), ( + 'template_literal_sub_multiple', + """ + var t = `some_template${value}middle${value}another${tail}tail` + """, + """ + , + initializer=, + , + , + , + , + , + + ]> + > + ]> + ]> + """, + ), ( + 'template_multiline_between_expressions', + """ + t = `tt${ + s + s + }ttttt` + """, + """ + , + op='=', + right=, + , + op='+', right=>, + + ]> + >> + ]> + """, + ), ( + 'template_with_regex', + """ + value = `regex is ${/wat/}` + """, + """ + , + op='=', + right=, + , + + ]> + >> + ]> + """, + ), ( + 'template_with_string', + """ + value = `string is ${'wat'}` + """, + """ + , + op='=', + right=, + , + + ]> + >> + ]> + """, + ), ( + 'template_in_template', + """ + value = `template embed ${`another${`template`}`} inside` + """, + """ + , + op='=', + right=, + , + + ]>, + + ]>, + + ]> + >> + ]> + """, + )])) + + def build_syntax_error_test_cases(clsname, parse): return build_exception_testcase(clsname, parse, (( label, @@ -2801,6 +2970,22 @@ def build_syntax_error_test_cases(clsname, parse): )]), ECMASyntaxError) +def build_es2015_syntax_error_test_cases(clsname, parse): + return build_exception_testcase(clsname, parse, (( + label, + textwrap.dedent(argument).strip(), + msg, + ) for label, argument, msg in [( + 'unexpected_if_in_template', + '`${if (wat)}`', + "Unexpected 'if' at 1:4 between '`${' at 1:1 and '(' at 1:7", + ), ( + 'empty_expression_in_template', + '`head${}tail`', + "Unexpected '}tail`' at 1:8 after '`head${' at 1:1", + )]), ECMASyntaxError) + + def build_regex_syntax_error_test_cases(clsname, parse): return build_exception_testcase(clsname, parse, (( label, diff --git a/src/calmjs/parse/tests/test_es2015_parser.py b/src/calmjs/parse/tests/test_es2015_parser.py index 3e51a0a..b0fccf5 100644 --- a/src/calmjs/parse/tests/test_es2015_parser.py +++ b/src/calmjs/parse/tests/test_es2015_parser.py @@ -15,8 +15,10 @@ from calmjs.parse.tests.parser import ( ParserCaseMixin, build_node_repr_test_cases, + build_es2015_node_repr_test_cases, # build_asi_test_cases, build_syntax_error_test_cases, + build_es2015_syntax_error_test_cases, build_regex_syntax_error_test_cases, build_comments_test_cases, ) @@ -61,6 +63,9 @@ def test_read(self): ParsedNodeTypeTestCase = build_node_repr_test_cases( 'ParsedNodeTypeTestCase', parse, 'ES2015Program') +ParsedES2015NodeTypeTestCase = build_es2015_node_repr_test_cases( + 'ParsedES2015NodeTypeTestCase', parse, 'ES2015Program') + # ASI - Automatic Semicolon Insertion # ParserToECMAASITestCase = build_asi_test_cases( # 'ParserToECMAASITestCase', parse, pretty_print) @@ -68,6 +73,9 @@ def test_read(self): ECMASyntaxErrorsTestCase = build_syntax_error_test_cases( 'ECMASyntaxErrorsTestCase', parse) +ECMA2015SyntaxErrorsTestCase = build_es2015_syntax_error_test_cases( + 'ECMA2015SyntaxErrorsTestCase', parse) + ECMARegexSyntaxErrorsTestCase = build_regex_syntax_error_test_cases( 'ECMARegexSyntaxErrorsTestCase', parse) From a4bb2c64c6b89fc42ee71f6cde3e9ef7ec9c3bab Mon Sep 17 00:00:00 2001 From: Tommy Yu Date: Mon, 19 Aug 2019 13:45:01 +1200 Subject: [PATCH 12/12] Correct tracking of nested template states - This must be done as templates can nest templates - Not to mention the fact that objects can be provided as values. Given they share the RBRACE symbol, there needs to be a way to disambiguate that symbol for objects and the opening of the template middle/tail fragments. --- src/calmjs/parse/lexers/es2015.py | 45 ++++++-- src/calmjs/parse/tests/lexer.py | 21 ++++ src/calmjs/parse/tests/parser.py | 116 ++++++++++++++++++++ src/calmjs/parse/tests/test_es2015_lexer.py | 8 ++ 4 files changed, 182 insertions(+), 8 deletions(-) diff --git a/src/calmjs/parse/lexers/es2015.py b/src/calmjs/parse/lexers/es2015.py index 6244e8b..f01d7e1 100644 --- a/src/calmjs/parse/lexers/es2015.py +++ b/src/calmjs/parse/lexers/es2015.py @@ -3,6 +3,8 @@ ES2015 (ECMAScript 6th Edition/ES6) lexer. """ +from __future__ import unicode_literals + import re import ply from itertools import chain @@ -75,9 +77,9 @@ def broken_template_token_handler(lexer, token): if lexer.current_template_tokens: # join all tokens together tmpl = '...'.join( - t.value for t in chain(lexer.current_template_tokens, [token])) - lineno = lexer.current_template_tokens[0].lineno - colno = lexer.current_template_tokens[0].colno + t.value for t in chain(lexer.current_template_tokens[-1], [token])) + lineno = lexer.current_template_tokens[-1][0].lineno + colno = lexer.current_template_tokens[-1][0].colno else: tmpl = token.value lineno = token.lineno @@ -96,6 +98,7 @@ def __init__(self, with_comments=False, yield_comments=False): with_comments=with_comments, yield_comments=yield_comments) self.error_token_handlers.append(broken_template_token_handler) self.current_template_tokens = [] + self.current_template_tokens_braces = [] # Punctuators (ES6) # t_DOLLAR_LBRACE = r'${' @@ -149,6 +152,7 @@ def __init__(self, with_comments=False, yield_comments=False): (?:`|\${)) # closing ` or ${ """ + LBRACE = r'{' RBRACE = r'}' @ply.lex.TOKEN(template) @@ -157,13 +161,38 @@ def t_TEMPLATE_RAW(self, token): if patt.match(token.value): token.type = token_type break - if token.type == 'TEMPLATE_HEAD': - self.current_template_tokens = [token] - elif token.type == 'TEMPLATE_MIDDLE': - self.current_template_tokens.append(token) else: - self.current_template_tokens = [] + raise ValueError("invalid token %r" % token) + + if token.type == 'TEMPLATE_HEAD': + self.current_template_tokens.append([token]) + self.current_template_tokens_braces.append(0) + return token + elif token.type == 'TEMPLATE_NOSUB': + return token + + if not self.current_template_tokens_braces: + raise ECMASyntaxError('Unexpected %s at %s:%s' % ( + repr_compat('}'), token.lineno, self._get_colno(token))) + if self.current_template_tokens_braces[-1] > 0: + # produce a LBRACE token instead + self.current_template_tokens_braces[-1] -= 1 + self.lexer.lexpos = self.lexer.lexpos - len(token.value) + 1 + token.value = token.value[0] + token.type = 'RBRACE' + return token + + if token.type == 'TEMPLATE_MIDDLE': + self.current_template_tokens[-1].append(token) + elif token.type == 'TEMPLATE_TAIL': + self.current_template_tokens_braces.pop() + self.current_template_tokens.pop() + return token + @ply.lex.TOKEN(LBRACE) + def t_LBRACE(self, token): + if self.current_template_tokens_braces: + self.current_template_tokens_braces[-1] += 1 return token @ply.lex.TOKEN(RBRACE) diff --git a/src/calmjs/parse/tests/lexer.py b/src/calmjs/parse/tests/lexer.py index fdbadd7..23028cb 100644 --- a/src/calmjs/parse/tests/lexer.py +++ b/src/calmjs/parse/tests/lexer.py @@ -629,6 +629,18 @@ 'template_literal_escape', (r'`f\`o`', [r'TEMPLATE_NOSUB `f\`o`']), + ), ( + 'template_middle_with_object', + ('`object${{1:1}} ${foo}`', + ['TEMPLATE_HEAD `object${', + 'LBRACE {', 'NUMBER 1', 'COLON :', 'NUMBER 1', 'RBRACE }', + 'TEMPLATE_MIDDLE } ${', 'ID foo', 'TEMPLATE_TAIL }`']), + ), ( + 'template_tail_with_object', + ('`object${{1:1}}`', + ['TEMPLATE_HEAD `object${', + 'LBRACE {', 'NUMBER 1', 'COLON :', 'NUMBER 1', 'RBRACE }', + 'TEMPLATE_TAIL }`']), ), ( 'template_literal_assignment', ('s = `hello world`', @@ -684,6 +696,15 @@ "var foo = `${foo}bar${baz}fail", # the specific identifiers are not tracked, thus ... "Unterminated template literal '`${...}bar${...}...' at 1:11", + ), ( + 'unterminated_template_nested', + "var foo = `${`${foo}bar${baz}fail`}", + # the specific identifiers are not tracked, thus ... + "Unterminated template literal '`${...}' at 1:11", + ), ( + 'unexpected_template_tail', + "var foo = `${value}`}`", + "Unexpected '}' at 1:21", ), ( 'invalid_hex_sequence', "var foo = `fail\\x1`", diff --git a/src/calmjs/parse/tests/parser.py b/src/calmjs/parse/tests/parser.py index f776e60..477796b 100644 --- a/src/calmjs/parse/tests/parser.py +++ b/src/calmjs/parse/tests/parser.py @@ -2871,6 +2871,24 @@ def parse_to_repr(value): >> ]> """, + ), ( + 'template_with_many_rbrace', + """ + value = `string is ${wat}}}}` + """, + """ + , + op='=', + right=, + , + + ]> + >> + ]> + """, ), ( 'template_in_template', """ @@ -2895,6 +2913,88 @@ def parse_to_repr(value): >> ]> """, + ), ( + 'template_tail_with_object', + """ + value = `object${{1:1}}}` + """, + """ + , op='=', + right=, + , + op=':', + right=> + ]>, + + ]> + >> + ]> + """, + ), ( + 'template_middle_with_object', + """ + value = `object${{1:1}}middle${tail}` + """, + """ + , + op='=', right=, + , + op=':', right=> + ]>, + , + , + + ]> + >> + ]> + """, + ), ( + 'template_with_object_with_template_with_object', + """ + value = `object + ${{1:`${{ + 2:`${{3:3}}` + }}`}} + ` + """, + r""" + , + op='=', right=, + , op=':', + right=, + , op=':', + right=, + , + op=':', right=> + ]>, + + ]>> + ]>, + + ]> + > + ]>, + + ]> + >> + ]> + """, )])) @@ -2983,6 +3083,22 @@ def build_es2015_syntax_error_test_cases(clsname, parse): 'empty_expression_in_template', '`head${}tail`', "Unexpected '}tail`' at 1:8 after '`head${' at 1:1", + ), ( + 'mismatched_template_termination_eof', + "var foo = `${`${foo}bar${baz}fail`", + "Unexpected end of input after '}fail`' at 1:29", + ), ( + 'mismatched_template_termination', + "var foo = `head${`${foo}bar${baz}fail`}", + "Unterminated template literal '`head${...}' at 1:11", + ), ( + 'unexpected_block', + "var foo = `${{11}}`", + "Unexpected '}' at 1:17 after '11' at 1:15", + ), ( + 'object_no_template_keys', + "var foo = {`foo`: `foo`}", + "Unexpected '`foo`' at 1:12 between '{' at 1:11 and ':' at 1:17", )]), ECMASyntaxError) diff --git a/src/calmjs/parse/tests/test_es2015_lexer.py b/src/calmjs/parse/tests/test_es2015_lexer.py index 3085ec8..c5d7a76 100644 --- a/src/calmjs/parse/tests/test_es2015_lexer.py +++ b/src/calmjs/parse/tests/test_es2015_lexer.py @@ -29,6 +29,14 @@ def test_initial_template_character(self): self.assertEqual( str(e.exception), "Unterminated template literal '`' at 1:1") + def test_invalid_template_token(self): + lexer = Lexer() + lexer.input('""') + token = lexer.next() + # force an invalid token into the statement + with self.assertRaises(ValueError): + lexer.t_TEMPLATE_RAW(token) + LexerKeywordTestCase = build_equality_testcase( 'LexerTestCase', partial(run_lexer, lexer_cls=Lexer), (