2 // RELAX NG Compact Syntax parser
\r
5 // Atsushi Enomoto <ginga@kit.hi-ho.ne.jp>
\r
7 // (C)2003 Atsushi Enomoto
\r
8 // (C)2004 Novell Inc.
\r
12 // Permission is hereby granted, free of charge, to any person obtaining
13 // a copy of this software and associated documentation files (the
14 // "Software"), to deal in the Software without restriction, including
15 // without limitation the rights to use, copy, modify, merge, publish,
16 // distribute, sublicense, and/or sell copies of the Software, and to
17 // permit persons to whom the Software is furnished to do so, subject to
18 // the following conditions:
20 // The above copyright notice and this permission notice shall be
21 // included in all copies or substantial portions of the Software.
23 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 // EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 // MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 // NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
27 // LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
28 // OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
29 // WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
33 using System.Collections;
\r
36 using Commons.Xml.Relaxng;
\r
38 namespace Commons.Xml.Relaxng.Rnc
\r
40 internal class RncTokenizer : Commons.Xml.Relaxng.Rnc.yyParser.yyInput
\r
49 bool isLiteralNsUri;
\r
53 int savedLineNumber = 1;
\r
54 int savedLinePosition;
\r
55 bool nextIncrementLine;
\r
57 public RncTokenizer (TextReader source)
\r
59 this.source = source;
\r
62 public bool IsElement {
\r
63 get { return isElement; }
\r
67 get { return savedLineNumber; }
\r
71 get { return savedLinePosition; }
\r
74 // jay interface implementation
\r
78 return currentToken;
\r
81 public bool advance ()
\r
84 currentToken = ParseToken (false);
\r
85 savedLineNumber = line;
\r
86 savedLinePosition = column;
\r
87 return currentToken != Token.EOF;
\r
90 public object value ()
\r
97 private int ReadEscapedHexNumber (int current)
\r
99 int i = source.Read ();
\r
111 current = current * 16 + (i - '0');
\r
112 return ReadEscapedHexNumber (current);
\r
119 current = current * 16 + (i - 'A') + 10;
\r
120 return ReadEscapedHexNumber (current);
\r
127 current = current * 16 + (i - 'a' + 10);
\r
128 return ReadEscapedHexNumber (current);
\r
134 private int ReadFromStream ()
\r
136 int ret = source.Read ();
\r
139 ret = source.Read ();
\r
146 tmp = source.Read ();
\r
147 } while (tmp == 'x');
\r
149 peekString = new string ('x', xcount);
\r
151 peekString += (char) tmp;
\r
154 ret = ReadEscapedHexNumber (0);
\r
155 if (peekChar != '}')
\r
160 peekString = new string ((char) ret, 1);
\r
164 private int PeekChar ()
\r
166 if (peekChar == 0) {
\r
167 if (peekString != null) {
\r
168 peekChar = peekString [0];
\r
169 peekString = peekString.Length == 1 ?
\r
170 null : peekString.Substring (1);
\r
173 peekChar = ReadFromStream ();
\r
179 private int ReadChar ()
\r
182 if (peekChar != 0) {
\r
186 else if (peekString != null) {
\r
187 ret = peekString [0];
\r
188 peekString = peekString.Length == 1 ?
\r
189 null : peekString.Substring (1);
\r
192 ret = ReadFromStream ();
\r
194 if (nextIncrementLine) {
\r
197 nextIncrementLine = false;
\r
203 nextIncrementLine = true;
\r
213 private void SkipWhitespaces ()
\r
216 switch (PeekChar ()) {
\r
229 char [] nameBuffer = new char [30];
\r
231 private string ReadQuoted (char quoteChar)
\r
236 int c = ReadChar ();
\r
241 if (quoteChar != c)
\r
247 throw new RelaxngException ("Unterminated quoted literal.");
\r
248 if (XmlChar.IsInvalid (c))
\r
249 throw new RelaxngException ("Invalid character in literal.");
\r
250 AppendNameChar (c, ref index);
\r
255 return new string (nameBuffer, 0, index);
\r
258 private void AppendNameChar (int c, ref int index)
\r
260 if (nameBuffer.Length == index) {
\r
261 char [] arr = new char [index * 2];
\r
262 Array.Copy (nameBuffer, arr, index);
\r
266 AppendNameChar ((c - 0x10000) / 0x400 + 0xD800, ref index);
\r
267 AppendNameChar ((c - 0x10000) % 0x400 + 0xDC00, ref index);
\r
270 nameBuffer [index++] = (char) c;
\r
273 private string ReadTripleQuoted (char quoteChar)
\r
278 int c = ReadChar ();
\r
284 if (quoteChar != c)
\r
287 if ((c = PeekChar ()) != quoteChar) {
\r
288 AppendNameChar (quoteChar, ref index);
\r
293 if ((c = PeekChar ()) == quoteChar) {
\r
298 AppendNameChar (quoteChar, ref index);
\r
299 AppendNameChar (quoteChar, ref index);
\r
303 throw new RelaxngException ("Unterminated triple-quoted literal.");
\r
304 if (XmlChar.IsInvalid (c))
\r
305 throw new RelaxngException ("Invalid character in literal.");
\r
306 AppendNameChar (c, ref index);
\r
311 return new string (nameBuffer, 0, index);
\r
314 private string ReadOneName ()
\r
318 int c = PeekChar ();
\r
319 if (!XmlChar.IsFirstNameChar (c) || !XmlChar.IsNCNameChar (c))
\r
320 throw new RelaxngException (String.Format ("Invalid NCName start character: {0}", c));
\r
333 if (!XmlChar.IsNCNameChar (c)) {
\r
339 if (nameBuffer.Length == index) {
\r
340 char [] arr = new char [index * 2];
\r
341 Array.Copy (nameBuffer, arr, index);
\r
344 nameBuffer [index++] = (char) c;
\r
349 return new string (nameBuffer, 0, index);
\r
352 private string ReadLine ()
\r
354 string s = source.ReadLine ();
\r
360 private int ParseToken (bool backslashed)
\r
362 SkipWhitespaces ();
\r
363 int c = ReadChar ();
\r
369 return Token.Equal;
\r
371 return Token.Tilde;
\r
373 return Token.Comma;
\r
375 return Token.OpenCurly;
\r
377 return Token.CloseCurly;
\r
379 return Token.OpenParen;
\r
381 return Token.CloseParen;
\r
383 return Token.OpenBracket;
\r
385 return Token.CloseBracket;
\r
387 if (PeekChar () != '=')
\r
390 return Token.AndEquals;
\r
392 if (PeekChar () != '=')
\r
395 return Token.OrEquals;
\r
397 return Token.Question;
\r
399 // See also ':' for NsName
\r
400 return Token.Asterisk;
\r
403 return Token.ERROR;
\r
404 return ParseToken (true);
\r
408 return Token.Minus;
\r
410 if (PeekChar () == '>') {
\r
412 return Token.TwoGreaters;
\r
417 // tokenValue = ReadLine ();
\r
418 // return Token.Documentation;
\r
420 return ParseToken (false);
\r
423 if (PeekChar () != c)
\r
424 name = ReadQuoted ((char) c);
\r
427 if (PeekChar () == c) {
\r
429 name = ReadTripleQuoted ((char) c);
\r
431 name = String.Empty;
\r
433 int invidx = XmlChar.IndexOfInvalid (name, true) ;
\r
435 throw new RelaxngException (String.Format ("Invalid XML character in compact syntax literal segment at {0:X}", (int) name [invidx]));
\r
437 return Token.LiteralSegment;
\r
439 if (!XmlChar.IsNCNameChar (c))
\r
440 throw new RelaxngException ("Invalid NCName character.");
\r
442 name = ReadOneName ();
\r
443 if (PeekChar () == ':') {
\r
445 if (PeekChar () == '*') {
\r
448 return Token.NsName;
\r
450 tokenValue = name + ":" + ReadOneName ();
\r
451 return Token.CName;
\r
456 return Token.QuotedIdentifier;
\r
460 return Token.KeywordAttribute;
\r
463 return Token.KeywordElement;
\r
465 return Token.KeywordDatatypes;
\r
467 return Token.KeywordDefault;
\r
469 return Token.KeywordDiv;
\r
471 return Token.KeywordEmpty;
\r
473 return Token.KeywordExternal;
\r
475 return Token.KeywordGrammar;
\r
477 return Token.KeywordInclude;
\r
479 return Token.KeywordInherit;
\r
481 return Token.KeywordList;
\r
483 return Token.KeywordMixed;
\r
485 return Token.KeywordNamespace;
\r
487 return Token.KeywordNotAllowed;
\r
489 return Token.KeywordParent;
\r
491 return Token.KeywordStart;
\r
493 return Token.KeywordString;
\r
495 return Token.KeywordText;
\r
497 return Token.KeywordToken;
\r
499 return Token.NCName;
\r