2 // System.Xml.XPath.Tokenizer
5 // Piers Haken (piersh@friskit.com)
7 // (C) 2002 Piers Haken
12 using System.Collections;
14 using Mono.Xml.XPath.yyParser;
16 namespace System.Xml.XPath
18 internal class Tokenizer : Mono.Xml.XPath.yyParser.yyInput
20 private char [] m_rgchInput;
23 // private System.IO.StreamReader m_input;
25 private Object m_objToken;
26 private static Hashtable m_mapTokens = new Hashtable ();
27 private static readonly Object [] rgTokenMap =
33 Token.ANCESTOR, "ancestor",
34 Token.ANCESTOR_OR_SELF, "ancestor-or-self",
35 Token.ATTRIBUTE, "attribute",
37 Token.DESCENDANT, "descendant",
38 Token.DESCENDANT_OR_SELF, "descendant-or-self",
39 Token.FOLLOWING, "following",
40 Token.FOLLOWING_SIBLING, "following-sibling",
41 Token.NAMESPACE, "namespace",
42 Token.PARENT, "parent",
43 Token.PRECEDING, "preceding",
44 Token.PRECEDING_SIBLING, "preceding-sibling",
46 Token.COMMENT, "comment",
48 Token.PROCESSING_INSTRUCTION, "processing-instruction",
54 for (int i = 0; i < rgTokenMap.Length; i += 2)
55 m_mapTokens.Add (rgTokenMap [i + 1], rgTokenMap [i]);
58 public Tokenizer (string strInput)
60 m_rgchInput = strInput.ToCharArray ();
62 m_cch = strInput.Length;
70 return m_rgchInput [m_ich];
73 private int GetChar ()
77 return m_rgchInput [m_ich++];
80 private int PutBack ()
83 throw new XPathException ("invalid tokenizer state"); // TODO: better description
84 return m_rgchInput [--m_ich];
87 private void SkipWhitespace ()
89 while (IsWhitespace (Peek ()))
94 private int ParseNumber ()
96 StringBuilder sb = new StringBuilder ();
98 while (IsDigit (Peek ()))
99 sb.Append ((char) GetChar ());
101 // TODO: doesn't handle '3.' error case
104 sb.Append ((char) GetChar ());
105 while (IsDigit (Peek ()))
106 sb.Append ((char) GetChar ());
108 m_objToken = Double.Parse (sb.ToString ());
112 private int ParseLiteral ()
114 StringBuilder sb = new StringBuilder ();
116 int chInit = GetChar ();
118 while ((ch = Peek ()) != chInit)
122 sb.Append ((char) GetChar ());
125 m_objToken = sb.ToString ();
126 return Token.LITERAL;
129 private int ParseIdentifier ()
131 StringBuilder sb = new StringBuilder ();
136 if (ch == '_' || ch == '-' ||
137 (ch >= 'a' && ch <= 'z') ||
138 (ch >= 'A' && ch <= 'Z'))
140 sb.Append ((char) GetChar ());
145 String strToken = sb.ToString ();
146 Object objToken = m_mapTokens [strToken];
147 if (objToken != null)
149 return (int) objToken;
153 m_objToken = strToken;
157 return Token.FUNCTION_NAME;
162 private static bool IsWhitespace (int ch)
164 return (ch == ' ' || ch == '\t' || ch == '\n' || ch == '\r');
167 private static bool IsDigit (int ch)
169 return ch >= '0' && ch <= '9';
197 else if (IsDigit (Peek ()))
200 return ParseNumber ();
223 return Token.BRACKET_OPEN;
227 return Token.BRACKET_CLOSE;
231 return Token.PAREN_OPEN;
235 return Token.PAREN_CLOSE;
247 return Token.ASTERISK;
289 return ParseLiteral ();
292 return ParseLiteral ();
298 return ParseNumber ();
302 return ParseIdentifier ();
309 ///////////////////////////
310 // yyParser.yyInput methods
311 ///////////////////////////
313 /** move on to next token.
314 @return false if positioned beyond tokens.
315 @throws IOException on input error.
317 public bool advance ()
320 m_iToken = ParseToken ();
322 return (m_iToken != Token.EOF);
325 /** classifies current token.
326 Should not be called if advance() returned false.
327 @return current %token or single character.
334 /** associated with current token.
335 Should not be called if advance() returned false.
336 @return value for token().
338 public Object value ()