-
Notifications
You must be signed in to change notification settings - Fork 104
Open
Description
It would be good for array assignments to flagged as unimplemented when the new proceedonerror flag is enabled. This way, a complete AST can still be generated.
Currently, array assignment leads to a parsing error:
$ snippet='num=2 arr=(1 2 3)'
$ python -c "import bashlex; print(''.join(p.dump() for p in bashlex.parse('$snippet', proceedonerror=0)))"
Traceback (most recent call last):
...
File "/usr/local/misc/programs/python/bashlex/bashlex/parser.py", line 587, in p_error
raise errors.ParsingError('unexpected token %r' % p.value,
bashlex.errors.ParsingError: unexpected token '(' (position 10)
It would be better to add an unimplemented node to the AST:
$ python -c "import bashlex; print(''.join(p.dump() for p in bashlex.parse('$snippet', proceedonerror=1)))"
CommandNode(pos=(0, 17), parts=[
AssignmentNode(pos=(0, 5), word='num=2'),
UnimplementedNode(pos=(6, 17), word='arr=(1 2 3)'),
])
This can be implemented as follows (see attachment for complete diff):
--- a/bashlex/flags.py
+++ b/bashlex/flags.py
@@ -52,4 +52,5 @@ word = enum.Enum('wordflags', [
+ 'UNIMPLEMENTED', # word uses unimplemented feature (e.g., array)
--- a/bashlex/parser.py
+++ b/bashlex/parser.py
@@ -173,6 +173,8 @@ def p_simple_command_element(p):
+ if (p.slice[1].flags & flags.word.UNIMPLEMENTED):
+ p[0][0].kind = 'unimplemented'
@@ -720,6 +722,7 @@ class _parser(object):
+ proceedonerror=proceedonerror,
--- a/bashlex/tokenizer.py
+++ b/bashlex/tokenizer.py
@@ -199,7 +199,8 @@ eoftoken = token(tokentype.EOF, None)
- lastreadtoken=None, tokenbeforethat=None, twotokensago=None):
+ lastreadtoken=None, tokenbeforethat=None, twotokensago=None,
+ proceedonerror=None):
@@ -232,6 +233,7 @@ class tokenizer(object):
+ self._proceedonerror = proceedonerror
@@ -391,7 +393,7 @@ class tokenizer(object):
- d['dollar_present'] = d['quoted'] = d['pass_next_character'] = d['compound_assignment'] = False
+ d['dollar_present'] = d['quoted'] = d['pass_next_character'] = d['compound_assignment'] = d['unimplemented'] = False
@@ -467,6 +469,19 @@ class tokenizer(object):
+ def handlecompoundassignment():
+ # note: only finds matching parenthesis, so parsing can proceed
+ handled = False
+ if self._proceedonerror:
+ ttok = self._parse_matched_pair(None, '(', ')')
+ if ttok:
+ tokenword.append(c)
+ tokenword.extend(ttok)
+ d['compound_assignment'] = True
+ d['unimplemented'] = True
+ handled = True
+ return handled
+
@@ -512,6 +527,8 @@ class tokenizer(object):
+ elif c == '(' and handlecompoundassignment():
+ gotonext = True
@@ -573,7 +590,7 @@ class tokenizer(object):
- if d['compound_assignment'] and tokenword[-1] == ')':
+ if d['compound_assignment'] and tokenword.value[-1] == ')':
@@ -581,6 +598,10 @@ class tokenizer(object):
+ if d['compound_assignment']:
+ tokenword.flags.add(wordflags.ASSIGNARRAY)
+ if d['unimplemented']:
+ tokenword.flags.add(wordflags.UNIMPLEMENTED)
unimplemented-array-node-diff.txt
I can work this into a pull request if desired. I wasn't quite sure of the best way to handle the flags, so suggestions would be welcome. For example, I was going to use parser flags, but they seemed more related to internal state than final attribute.
Metadata
Metadata
Assignees
Labels
No labels