3 # This module is needed to run generated parsers.
9 class SyntaxError(Exception):
10 """When we run into an unexpected token, this is the exception to use"""
11 def __init__(self, pos=-1, msg="Bad Token"):
15 if self.pos < 0: return "#<syntax-error>"
16 else: return "SyntaxError[@ char " + `self.pos` + ": " + self.msg + "]"
18 class NoMoreTokens(Exception):
19 """Another exception object, for when we run out of tokens"""
23 def __init__(self, patterns, ignore, input):
24 """Patterns is [(terminal,regex)...]
25 Ignore is [terminal,...];
28 self.restrictions = []
32 # The stored patterns are a pair (compiled regex,source
33 # regex). If the patterns variable passed in to the
34 # constructor is None, we assume that the class already has a
35 # proper .patterns list constructed
36 if patterns is not None:
39 self.patterns.append( (k, re.compile(r)) )
41 def token(self, i, restrict=0):
42 """Get the i'th token, and if i is one past the end, then scan
43 for another token; restrict is a list of tokens that
44 are allowed, or 0 for any token."""
45 if i == len(self.tokens): self.scan(restrict)
46 if i < len(self.tokens):
47 # Make sure the restriction is more restricted
48 if restrict and self.restrictions[i]:
50 if r not in self.restrictions[i]:
51 raise "Unimplemented: restriction set changed"
56 """Print the last 10 tokens that have been scanned in"""
58 for t in self.tokens[-10:]:
59 output = '%s\n (@%s) %s = %s' % (output,t[0],t[2],`t[3]`)
62 def scan(self, restrict):
63 """Should scan another token and add it to the list, self.tokens,
64 and add the restriction to self.restrictions"""
65 # Keep looking for a token, ignoring any in self.ignore
67 # Search the patterns for the longest match, with earlier
68 # tokens in the list having preference
71 for p, regexp in self.patterns:
72 # First check to see if we're ignoring this token
73 if restrict and p not in restrict and p not in self.ignore:
75 m = regexp.match(self.input, self.pos)
76 if m and len(m.group(0)) > best_match:
77 # We got a match that's better than the previous one
79 best_match = len(m.group(0))
81 # If we didn't find anything, raise an error
82 if best_pat == '(error)' and best_match < 0:
85 msg = "Trying to find one of "+join(restrict,", ")
86 raise SyntaxError(self.pos, msg)
88 # If we found something that isn't to be ignored, return it
89 if best_pat not in self.ignore:
90 # Create a token with this data
91 token = (self.pos, self.pos+best_match, best_pat,
92 self.input[self.pos:self.pos+best_match])
93 self.pos = self.pos + best_match
94 # Only add this token if it's not in the list
95 # (to prevent looping)
96 if not self.tokens or token != self.tokens[-1]:
97 self.tokens.append(token)
98 self.restrictions.append(restrict)
101 # This token should be ignored ..
102 self.pos = self.pos + best_match
105 def __init__(self, scanner):
106 self._scanner = scanner
109 def _peek(self, *types):
110 """Returns the token type for lookahead; if there are any args
111 then the list of args is the set of token types to allow"""
112 tok = self._scanner.token(self._pos, types)
115 def _scan(self, type):
116 """Returns the matched text, and moves to the next token"""
117 tok = self._scanner.token(self._pos, [type])
119 raise SyntaxError(tok[0], 'Trying to find '+type)
120 self._pos = 1+self._pos
125 def print_error(input, err, scanner):
126 """This is a really dumb long function to print error messages nicely."""
128 # Figure out the line number
129 line = count(input[:p], '\n')
130 print err.msg+" on line "+`line+1`+":"
131 # Now try printing part of the line
132 text = input[max(p-80,0):p+80]
136 i = rfind(text[:p],'\n')
137 j = rfind(text[:p],'\r')
138 if i < 0 or (j < i and j >= 0): i = j
144 i = find(text,'\n',p)
145 j = find(text,'\r',p)
146 if i < 0 or (j < i and j >= 0): i = j
150 # Now shorten the text
151 while len(text) > 70 and p > 60:
153 text = "..." + text[10:]
156 # Now print the string, along with an indicator
158 print '> ',' '*p + '^'
159 print 'List of nearby tokens:', scanner
161 def wrap_error_reporter(parser, rule):
162 try: return getattr(parser, rule)()
163 except SyntaxError, s:
164 input = parser._scanner.input
166 print_error(input, s, parser._scanner)
168 print 'Syntax Error',s.msg,'on line',1+count(input[:s.pos], '\n')
170 print 'Could not complete parsing; stopped around here:'
171 print parser._scanner