2 // System.Xml.XPath.Tokenizer
5 // Piers Haken (piersh@friskit.com)
7 // (C) 2002 Piers Haken
11 // Permission is hereby granted, free of charge, to any person obtaining
12 // a copy of this software and associated documentation files (the
13 // "Software"), to deal in the Software without restriction, including
14 // without limitation the rights to use, copy, modify, merge, publish,
15 // distribute, sublicense, and/or sell copies of the Software, and to
16 // permit persons to whom the Software is furnished to do so, subject to
17 // the following conditions:
19 // The above copyright notice and this permission notice shall be
20 // included in all copies or substantial portions of the Software.
22 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
23 // EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
24 // MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
25 // NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
26 // LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
27 // OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
28 // WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
31 using System.Globalization;
34 using System.Collections;
36 using Mono.Xml.XPath.yyParser;
38 namespace System.Xml.XPath
40 internal class Tokenizer : Mono.Xml.XPath.yyParser.yyInput
42 private string m_rgchInput;
46 private int m_iTokenPrev = Token.EOF;
47 private Object m_objToken;
48 private bool m_fPrevWasOperator = false;
49 private bool m_fThisIsOperator = false;
50 private static readonly Hashtable s_mapTokens = new Hashtable ();
51 private static readonly Object [] s_rgTokenMap =
57 Token.ANCESTOR, "ancestor",
58 Token.ANCESTOR_OR_SELF, "ancestor-or-self",
59 Token.ATTRIBUTE, "attribute",
61 Token.DESCENDANT, "descendant",
62 Token.DESCENDANT_OR_SELF, "descendant-or-self",
63 Token.FOLLOWING, "following",
64 Token.FOLLOWING_SIBLING, "following-sibling",
65 Token.NAMESPACE, "namespace",
66 Token.PARENT, "parent",
67 Token.PRECEDING, "preceding",
68 Token.PRECEDING_SIBLING, "preceding-sibling",
70 Token.COMMENT, "comment",
72 Token.PROCESSING_INSTRUCTION, "processing-instruction",
75 private const char EOL = '\0';
79 for (int i = 0; i < s_rgTokenMap.Length; i += 2)
80 s_mapTokens.Add (s_rgTokenMap [i + 1], s_rgTokenMap [i]);
83 public Tokenizer (string strInput)
85 //Console.WriteLine ("Tokenizing: " + strInput);
86 m_rgchInput = strInput;
88 m_cch = strInput.Length;
92 private char Peek (int iOffset)
94 if (m_ich + iOffset>= m_cch)
96 return m_rgchInput [m_ich + iOffset];
104 private char GetChar ()
108 return m_rgchInput [m_ich++];
111 private char PutBack ()
114 throw new XPathException ("XPath parser returned an error status: invalid tokenizer state.");
115 return m_rgchInput [--m_ich];
118 private bool SkipWhitespace () // returns trus if any whitespace was skipped
120 if (!IsWhitespace (Peek ()))
123 while (IsWhitespace (Peek ()))
129 private int ParseNumber ()
131 StringBuilder sb = new StringBuilder ();
133 while (IsDigit (Peek ()))
134 sb.Append ((char) GetChar ());
136 // don't handle '3.' as an error case (it is not. XPath 3.7 syntax [30])
139 sb.Append ((char) GetChar ());
140 while (IsDigit (Peek ()))
141 sb.Append ((char) GetChar ());
143 m_objToken = Double.Parse (sb.ToString (), NumberFormatInfo.InvariantInfo);
147 private int ParseLiteral ()
149 StringBuilder sb = new StringBuilder ();
151 char chInit = GetChar ();
153 while ((ch = Peek ()) != chInit)
156 throw new XPathException ("unmatched "+chInit+" in expression");
157 sb.Append ((char) GetChar ());
160 m_objToken = sb.ToString ();
161 return Token.LITERAL;
164 private string ReadIdentifier ()
166 StringBuilder sb = new StringBuilder ();
169 if (!Char.IsLetter (ch) && ch != '_')
172 sb.Append ((char) GetChar ());
174 while ((ch = Peek ()) == '_' || ch == '-' || ch == '.' || Char.IsLetterOrDigit (ch))
175 sb.Append ((char) GetChar ());
178 return sb.ToString ();
181 private int ParseIdentifier ()
183 string strToken = ReadIdentifier ();
184 Object objToken = s_mapTokens [strToken];
186 int iToken = (objToken != null) ? (int) objToken : Token.QName;
187 m_objToken = strToken;
194 // If the two characters following an NCName (possibly
195 // after intervening ExprWhitespace) are ::, then the
196 // token must be recognized as an AxisName.
197 if (objToken == null || !IsAxisName (iToken))
198 throw new XPathException ("invalid axis name: '"+strToken+"'");
209 m_objToken = new XmlQualifiedName ("", strToken);
212 string strToken2 = ReadIdentifier ();
213 if (strToken2 == null)
214 throw new XPathException ("invalid QName: "+strToken+":"+(char)ch);
217 m_objToken = new XmlQualifiedName (strToken2, strToken);
219 return Token.FUNCTION_NAME;
223 // If there is a preceding token and the preceding
224 // token is not one of @, ::, (, [, , or an Operator,
225 // then a * must be recognized as a MultiplyOperator
226 // and an NCName must be recognized as an OperatorName.
227 if (!IsFirstToken && !m_fPrevWasOperator)
229 if (objToken == null || !IsOperatorName (iToken))
230 throw new XPathException ("invalid operator name: '"+strToken+"'");
236 // If the character following an NCName (possibly
237 // after intervening ExprWhitespace) is (, then the
238 // token must be recognized as a NodeType or a FunctionName.
239 if (objToken == null)
241 m_objToken = new XmlQualifiedName (strToken, "");
242 return Token.FUNCTION_NAME;
244 if (IsNodeType (iToken))
246 throw new XPathException ("invalid function name: '"+strToken+"'");
249 m_objToken = new XmlQualifiedName (strToken, "");
253 private static bool IsWhitespace (char ch)
255 // return Char.IsWhiteSpace (ch);
256 return (ch == ' ' || ch == '\t' || ch == '\n' || ch == '\r');
259 private static bool IsDigit (char ch)
261 // return Char.IsDigit (ch);
262 return ch >= '0' && ch <= '9';
275 m_fThisIsOperator = true;
291 else if (IsDigit (Peek ()))
294 return ParseNumber ();
302 m_fThisIsOperator = true;
309 m_fThisIsOperator = true;
314 m_fThisIsOperator = true;
319 m_fThisIsOperator = true;
321 return Token.BRACKET_OPEN;
325 return Token.BRACKET_CLOSE;
328 m_fThisIsOperator = true;
330 return Token.PAREN_OPEN;
334 return Token.PAREN_CLOSE;
337 m_fThisIsOperator = true;
342 m_fThisIsOperator = true;
348 if (!IsFirstToken && !m_fPrevWasOperator)
350 m_fThisIsOperator = true;
351 return Token.MULTIPLY;
353 return Token.ASTERISK;
357 m_fThisIsOperator = true;
361 m_fThisIsOperator = true;
366 m_fThisIsOperator = true;
374 m_fThisIsOperator = true;
381 m_fThisIsOperator = true;
391 m_fThisIsOperator = true;
401 return ParseLiteral ();
404 return ParseLiteral ();
409 return ParseNumber ();
411 else if (Char.IsLetter (ch) || ch == '_') // NCName
413 int iToken = ParseIdentifier ();
414 if (IsOperatorName (iToken))
415 m_fThisIsOperator = true;
420 throw new XPathException ("invalid token: '"+ch+"'");
423 ///////////////////////////
424 // yyParser.yyInput methods
425 ///////////////////////////
427 /** move on to next token.
428 @return false if positioned beyond tokens.
429 @throws IOException on input error.
431 public bool advance ()
433 m_fThisIsOperator = false;
435 m_iToken = ParseToken ();
437 m_iTokenPrev = m_iToken;
438 m_fPrevWasOperator = m_fThisIsOperator;
439 return (m_iToken != Token.EOF);
442 /** classifies current token.
443 Should not be called if advance() returned false.
444 @return current %token or single character.
451 /** associated with current token.
452 Should not be called if advance() returned false.
453 @return value for token().
455 public Object value ()
459 private bool IsFirstToken { get { return m_iTokenPrev == Token.EOF; } }
461 private bool IsNodeType (int iToken)
467 case Token.PROCESSING_INSTRUCTION:
474 private bool IsOperatorName (int iToken)
487 private bool IsAxisName (int iToken)
491 case Token.ATTRIBUTE:
493 case Token.ANCESTOR_OR_SELF:
495 case Token.DESCENDANT:
496 case Token.DESCENDANT_OR_SELF:
497 case Token.FOLLOWING:
498 case Token.FOLLOWING_SIBLING:
499 case Token.NAMESPACE:
501 case Token.PRECEDING:
502 case Token.PRECEDING_SIBLING: