2 // System.Xml.XPath.Tokenizer
5 // Piers Haken (piersh@friskit.com)
7 // (C) 2002 Piers Haken
12 using System.Collections;
14 using Mono.Xml.XPath.yyParser;
16 namespace System.Xml.XPath
18 internal class Tokenizer : Mono.Xml.XPath.yyParser.yyInput
20 private char [] m_rgchInput;
24 private Object m_objToken;
25 private bool m_fPrevWasSpecial = false;
26 private static readonly Hashtable s_mapTokens = new Hashtable ();
27 private static readonly Object [] s_rgTokenMap =
33 Token.ANCESTOR, "ancestor",
34 Token.ANCESTOR_OR_SELF, "ancestor-or-self",
35 Token.ATTRIBUTE, "attribute",
37 Token.DESCENDANT, "descendant",
38 Token.DESCENDANT_OR_SELF, "descendant-or-self",
39 Token.FOLLOWING, "following",
40 Token.FOLLOWING_SIBLING, "following-sibling",
41 Token.NAMESPACE, "namespace",
42 Token.PARENT, "parent",
43 Token.PRECEDING, "preceding",
44 Token.PRECEDING_SIBLING, "preceding-sibling",
46 Token.COMMENT, "comment",
48 Token.PROCESSING_INSTRUCTION, "processing-instruction",
51 private static readonly Hashtable s_mapfPrevWasSpecial = new Hashtable ();
52 private static readonly int [] s_rgfPrevWasSpecial =
79 private const char EOL = '\0';
83 for (int i = 0; i < s_rgTokenMap.Length; i += 2)
84 s_mapTokens.Add (s_rgTokenMap [i + 1], s_rgTokenMap [i]);
85 object objTmp = new Object ();
86 for (int i = 0; i < s_rgfPrevWasSpecial.Length; i++)
87 s_mapfPrevWasSpecial.Add (s_rgfPrevWasSpecial [i], null);
90 public Tokenizer (string strInput)
92 m_rgchInput = strInput.ToCharArray ();
94 m_cch = strInput.Length;
98 private char Peek (int iOffset)
100 if (m_ich + iOffset>= m_cch)
102 return m_rgchInput [m_ich + iOffset];
110 private char GetChar ()
114 return m_rgchInput [m_ich++];
117 private char PutBack ()
120 throw new XPathException ("invalid tokenizer state"); // TODO: better description
121 return m_rgchInput [--m_ich];
124 private bool SkipWhitespace () // returns trus if any whitespace was skipped
126 if (!IsWhitespace (Peek ()))
129 while (IsWhitespace (Peek ()))
136 private int ParseNumber ()
138 StringBuilder sb = new StringBuilder ();
140 while (IsDigit (Peek ()))
141 sb.Append ((char) GetChar ());
143 // TODO: doesn't handle '3.' error case
146 sb.Append ((char) GetChar ());
147 while (IsDigit (Peek ()))
148 sb.Append ((char) GetChar ());
150 m_objToken = Double.Parse (sb.ToString ());
154 private int ParseLiteral ()
156 StringBuilder sb = new StringBuilder ();
158 char chInit = GetChar ();
160 while ((ch = Peek ()) != chInit)
164 sb.Append ((char) GetChar ());
167 m_objToken = sb.ToString ();
168 return Token.LITERAL;
171 private int ParseIdentifier ()
173 StringBuilder sb = new StringBuilder ();
176 while ((ch = Peek ()) == '_' || ch == '-' || Char.IsLetterOrDigit (ch))
177 sb.Append ((char) GetChar ());
179 String strToken = sb.ToString ();
180 Object objToken = s_mapTokens [strToken];
182 if (!m_fPrevWasSpecial && objToken != null)
183 return (int) objToken;
190 if (objToken != null)
191 return (int) objToken;
192 m_objToken = strToken;
193 return Token.FUNCTION_NAME;
195 else if (ch == ':' && Peek (1) == ':')
197 if (objToken != null)
198 return (int) objToken;
201 m_objToken = strToken;
205 private static bool IsWhitespace (char ch)
207 // return Char.IsWhiteSpace (ch);
208 return (ch == ' ' || ch == '\t' || ch == '\n' || ch == '\r');
211 private static bool IsDigit (char ch)
213 // return Char.IsDigit (ch);
214 return ch >= '0' && ch <= '9';
242 else if (IsDigit (Peek ()))
245 return ParseNumber ();
268 return Token.BRACKET_OPEN;
272 return Token.BRACKET_CLOSE;
276 return Token.PAREN_OPEN;
280 return Token.PAREN_CLOSE;
292 return Token.ASTERISK;
334 return ParseLiteral ();
337 return ParseLiteral ();
343 return ParseNumber ();
347 return ParseIdentifier ();
354 ///////////////////////////
355 // yyParser.yyInput methods
356 ///////////////////////////
358 /** move on to next token.
359 @return false if positioned beyond tokens.
360 @throws IOException on input error.
362 public bool advance ()
365 m_iToken = ParseToken ();
366 bool fWhitespace = SkipWhitespace ();
367 m_fPrevWasSpecial = (!fWhitespace && s_mapfPrevWasSpecial.Contains (m_iToken));
368 return (m_iToken != Token.EOF);
371 /** classifies current token.
372 Should not be called if advance() returned false.
373 @return current %token or single character.
380 /** associated with current token.
381 Should not be called if advance() returned false.
382 @return value for token().
384 public Object value ()