2 // System.Xml.XPath.Tokenizer.cs / Mono.Xml.Xsl/PatternTokenizer.cs
5 // Piers Haken (piersh@friskit.com)
6 // Atsushi Enomoto (atsushi@ximian.com)
8 // (C) 2002 Piers Haken
9 // (C) 2005 Novell Inc,
13 // Do not edit PatternTokenizer.cs. It is autogenerated.
17 // Permission is hereby granted, free of charge, to any person obtaining
18 // a copy of this software and associated documentation files (the
19 // "Software"), to deal in the Software without restriction, including
20 // without limitation the rights to use, copy, modify, merge, publish,
21 // distribute, sublicense, and/or sell copies of the Software, and to
22 // permit persons to whom the Software is furnished to do so, subject to
23 // the following conditions:
25 // The above copyright notice and this permission notice shall be
26 // included in all copies or substantial portions of the Software.
28 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
29 // EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
30 // MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
31 // NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
32 // LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
33 // OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
34 // WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
37 using System.Globalization;
40 using System.Collections;
42 using System.Xml.XPath;
46 namespace Mono.Xml.Xsl
48 namespace Mono.Xml.XPath
51 internal class Tokenizer : yyParser.yyInput
53 private string m_rgchInput;
57 private int m_iTokenPrev = Token.EOF;
58 private Object m_objToken;
59 private bool m_fPrevWasOperator = false;
60 private bool m_fThisIsOperator = false;
61 private static readonly Hashtable s_mapTokens = new Hashtable ();
62 private static readonly Object [] s_rgTokenMap =
68 Token.ANCESTOR, "ancestor",
69 Token.ANCESTOR_OR_SELF, "ancestor-or-self",
70 Token.ATTRIBUTE, "attribute",
72 Token.DESCENDANT, "descendant",
73 Token.DESCENDANT_OR_SELF, "descendant-or-self",
74 Token.FOLLOWING, "following",
75 Token.FOLLOWING_SIBLING, "following-sibling",
76 Token.NAMESPACE, "namespace",
77 Token.PARENT, "parent",
78 Token.PRECEDING, "preceding",
79 Token.PRECEDING_SIBLING, "preceding-sibling",
81 Token.COMMENT, "comment",
83 Token.PROCESSING_INSTRUCTION, "processing-instruction",
86 private const char EOL = '\0';
90 for (int i = 0; i < s_rgTokenMap.Length; i += 2)
91 s_mapTokens.Add (s_rgTokenMap [i + 1], s_rgTokenMap [i]);
94 public Tokenizer (string strInput)
96 //Console.WriteLine ("Tokenizing: " + strInput);
97 m_rgchInput = strInput;
99 m_cch = strInput.Length;
103 private char Peek (int iOffset)
105 if (m_ich + iOffset>= m_cch)
107 return m_rgchInput [m_ich + iOffset];
115 private char GetChar ()
119 return m_rgchInput [m_ich++];
122 private char PutBack ()
125 throw new XPathException ("XPath parser returned an error status: invalid tokenizer state.");
126 return m_rgchInput [--m_ich];
129 private bool SkipWhitespace () // returns trus if any whitespace was skipped
131 if (!IsWhitespace (Peek ()))
134 while (IsWhitespace (Peek ()))
140 private int ParseNumber ()
142 StringBuilder sb = new StringBuilder ();
144 while (IsDigit (Peek ()))
145 sb.Append ((char) GetChar ());
147 // don't handle '3.' as an error case (it is not. XPath 3.7 syntax [30])
150 sb.Append ((char) GetChar ());
151 while (IsDigit (Peek ()))
152 sb.Append ((char) GetChar ());
154 m_objToken = Double.Parse (sb.ToString (), NumberFormatInfo.InvariantInfo);
158 private int ParseLiteral ()
160 StringBuilder sb = new StringBuilder ();
162 char chInit = GetChar ();
164 while ((ch = Peek ()) != chInit)
167 throw new XPathException ("unmatched "+chInit+" in expression");
168 sb.Append ((char) GetChar ());
171 m_objToken = sb.ToString ();
172 return Token.LITERAL;
175 private string ReadIdentifier ()
177 StringBuilder sb = new StringBuilder ();
180 if (!Char.IsLetter (ch) && ch != '_')
183 sb.Append ((char) GetChar ());
185 while ((ch = Peek ()) == '_' || ch == '-' || ch == '.' || Char.IsLetterOrDigit (ch))
186 sb.Append ((char) GetChar ());
189 return sb.ToString ();
192 private int ParseIdentifier ()
194 string strToken = ReadIdentifier ();
195 Object objToken = s_mapTokens [strToken];
197 int iToken = (objToken != null) ? (int) objToken : Token.QName;
198 m_objToken = strToken;
205 // If the two characters following an NCName (possibly
206 // after intervening ExprWhitespace) are ::, then the
207 // token must be recognized as an AxisName.
208 if (objToken == null || !IsAxisName (iToken))
209 throw new XPathException ("invalid axis name: '"+strToken+"'");
220 m_objToken = new XmlQualifiedName ("", strToken);
223 string strToken2 = ReadIdentifier ();
224 if (strToken2 == null)
225 throw new XPathException ("invalid QName: "+strToken+":"+(char)ch);
228 m_objToken = new XmlQualifiedName (strToken2, strToken);
230 return Token.FUNCTION_NAME;
234 // If there is a preceding token and the preceding
235 // token is not one of @, ::, (, [, , or an Operator,
236 // then a * must be recognized as a MultiplyOperator
237 // and an NCName must be recognized as an OperatorName.
238 if (!IsFirstToken && !m_fPrevWasOperator)
240 if (objToken == null || !IsOperatorName (iToken))
241 throw new XPathException ("invalid operator name: '"+strToken+"'");
247 // If the character following an NCName (possibly
248 // after intervening ExprWhitespace) is (, then the
249 // token must be recognized as a NodeType or a FunctionName.
250 if (objToken == null)
252 m_objToken = new XmlQualifiedName (strToken, "");
253 return Token.FUNCTION_NAME;
255 if (IsNodeType (iToken))
257 throw new XPathException ("invalid function name: '"+strToken+"'");
260 m_objToken = new XmlQualifiedName (strToken, "");
264 private static bool IsWhitespace (char ch)
266 // return Char.IsWhiteSpace (ch);
267 return (ch == ' ' || ch == '\t' || ch == '\n' || ch == '\r');
270 private static bool IsDigit (char ch)
272 // return Char.IsDigit (ch);
273 return ch >= '0' && ch <= '9';
286 m_fThisIsOperator = true;
302 else if (IsDigit (Peek ()))
305 return ParseNumber ();
313 m_fThisIsOperator = true;
320 m_fThisIsOperator = true;
325 m_fThisIsOperator = true;
330 m_fThisIsOperator = true;
332 return Token.BRACKET_OPEN;
336 return Token.BRACKET_CLOSE;
339 m_fThisIsOperator = true;
341 return Token.PAREN_OPEN;
345 return Token.PAREN_CLOSE;
348 m_fThisIsOperator = true;
353 m_fThisIsOperator = true;
359 if (!IsFirstToken && !m_fPrevWasOperator)
361 m_fThisIsOperator = true;
362 return Token.MULTIPLY;
364 return Token.ASTERISK;
368 m_fThisIsOperator = true;
372 m_fThisIsOperator = true;
377 m_fThisIsOperator = true;
385 m_fThisIsOperator = true;
392 m_fThisIsOperator = true;
402 m_fThisIsOperator = true;
412 return ParseLiteral ();
415 return ParseLiteral ();
420 return ParseNumber ();
422 else if (Char.IsLetter (ch) || ch == '_') // NCName
424 int iToken = ParseIdentifier ();
425 if (IsOperatorName (iToken))
426 m_fThisIsOperator = true;
431 throw new XPathException ("invalid token: '"+ch+"'");
434 ///////////////////////////
435 // yyParser.yyInput methods
436 ///////////////////////////
438 /** move on to next token.
439 @return false if positioned beyond tokens.
440 @throws IOException on input error.
442 public bool advance ()
444 m_fThisIsOperator = false;
446 m_iToken = ParseToken ();
448 m_iTokenPrev = m_iToken;
449 m_fPrevWasOperator = m_fThisIsOperator;
450 return (m_iToken != Token.EOF);
453 /** classifies current token.
454 Should not be called if advance() returned false.
455 @return current %token or single character.
462 /** associated with current token.
463 Should not be called if advance() returned false.
464 @return value for token().
466 public Object value ()
470 private bool IsFirstToken { get { return m_iTokenPrev == Token.EOF; } }
472 private bool IsNodeType (int iToken)
478 case Token.PROCESSING_INSTRUCTION:
485 private bool IsOperatorName (int iToken)
498 private bool IsAxisName (int iToken)
502 case Token.ATTRIBUTE:
504 case Token.ANCESTOR_OR_SELF:
506 case Token.DESCENDANT:
507 case Token.DESCENDANT_OR_SELF:
508 case Token.FOLLOWING:
509 case Token.FOLLOWING_SIBLING:
510 case Token.NAMESPACE:
512 case Token.PRECEDING:
513 case Token.PRECEDING_SIBLING: