2 // RELAX NG Compact Syntax parser
\r
5 // Atsushi Enomoto <ginga@kit.hi-ho.ne.jp>
\r
7 // (C)2003 Atsushi Enomoto
\r
8 // (C)2004 Novell Inc.
\r
12 // Permission is hereby granted, free of charge, to any person obtaining
13 // a copy of this software and associated documentation files (the
14 // "Software"), to deal in the Software without restriction, including
15 // without limitation the rights to use, copy, modify, merge, publish,
16 // distribute, sublicense, and/or sell copies of the Software, and to
17 // permit persons to whom the Software is furnished to do so, subject to
18 // the following conditions:
20 // The above copyright notice and this permission notice shall be
21 // included in all copies or substantial portions of the Software.
23 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 // EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 // MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 // NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
27 // LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
28 // OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
29 // WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
33 using System.Collections;
\r
36 using Commons.Xml.Relaxng;
\r
38 namespace Commons.Xml.Relaxng.Rnc
\r
40 internal class RncTokenizer : Commons.Xml.Relaxng.Rnc.yyParser.yyInput
\r
49 bool isLiteralNsUri;
\r
53 int savedLineNumber = 1;
\r
54 int savedLinePosition;
\r
55 bool nextIncrementLine;
\r
58 public RncTokenizer (TextReader source, string baseUri)
\r
60 this.source = source;
\r
61 this.baseUri = baseUri;
\r
64 public bool IsElement {
\r
65 get { return isElement; }
\r
69 get { return savedLineNumber; }
\r
73 get { return savedLinePosition; }
\r
76 public string BaseUri {
\r
77 get { return baseUri; }
\r
80 // jay interface implementation
\r
84 return currentToken;
\r
87 public bool advance ()
\r
90 currentToken = ParseToken (false);
\r
91 savedLineNumber = line;
\r
92 savedLinePosition = column;
\r
93 return currentToken != Token.EOF;
\r
96 public object value ()
\r
103 private int ReadEscapedHexNumber (int current)
\r
105 int i = source.Read ();
\r
117 current = current * 16 + (i - '0');
\r
118 return ReadEscapedHexNumber (current);
\r
125 current = current * 16 + (i - 'A') + 10;
\r
126 return ReadEscapedHexNumber (current);
\r
133 current = current * 16 + (i - 'a' + 10);
\r
134 return ReadEscapedHexNumber (current);
\r
140 private int ReadFromStream ()
\r
142 int ret = source.Read ();
\r
145 ret = source.Read ();
\r
152 tmp = source.Read ();
\r
153 } while (tmp == 'x');
\r
155 peekString = new string ('x', xcount);
\r
157 peekString += (char) tmp;
\r
160 ret = ReadEscapedHexNumber (0);
\r
161 if (peekChar != '}')
\r
166 peekString = new string ((char) ret, 1);
\r
170 private int PeekChar ()
\r
172 if (peekChar == 0) {
\r
173 if (peekString != null) {
\r
174 peekChar = peekString [0];
\r
175 peekString = peekString.Length == 1 ?
\r
176 null : peekString.Substring (1);
\r
179 peekChar = ReadFromStream ();
\r
185 private int ReadChar ()
\r
188 if (peekChar != 0) {
\r
192 else if (peekString != null) {
\r
193 ret = peekString [0];
\r
194 peekString = peekString.Length == 1 ?
\r
195 null : peekString.Substring (1);
\r
198 ret = ReadFromStream ();
\r
200 if (nextIncrementLine) {
\r
203 nextIncrementLine = false;
\r
209 nextIncrementLine = true;
\r
219 private void SkipWhitespaces ()
\r
222 switch (PeekChar ()) {
\r
235 char [] nameBuffer = new char [30];
\r
237 private string ReadQuoted (char quoteChar)
\r
242 int c = ReadChar ();
\r
247 if (quoteChar != c)
\r
253 throw new RelaxngException ("Unterminated quoted literal.");
\r
254 if (XmlChar.IsInvalid (c))
\r
255 throw new RelaxngException ("Invalid character in literal.");
\r
256 AppendNameChar (c, ref index);
\r
261 return new string (nameBuffer, 0, index);
\r
264 private void AppendNameChar (int c, ref int index)
\r
266 if (nameBuffer.Length == index) {
\r
267 char [] arr = new char [index * 2];
\r
268 Array.Copy (nameBuffer, arr, index);
\r
272 AppendNameChar ((c - 0x10000) / 0x400 + 0xD800, ref index);
\r
273 AppendNameChar ((c - 0x10000) % 0x400 + 0xDC00, ref index);
\r
276 nameBuffer [index++] = (char) c;
\r
279 private string ReadTripleQuoted (char quoteChar)
\r
284 int c = ReadChar ();
\r
290 if (quoteChar != c)
\r
293 if ((c = PeekChar ()) != quoteChar) {
\r
294 AppendNameChar (quoteChar, ref index);
\r
299 if ((c = PeekChar ()) == quoteChar) {
\r
304 AppendNameChar (quoteChar, ref index);
\r
305 AppendNameChar (quoteChar, ref index);
\r
309 throw new RelaxngException ("Unterminated triple-quoted literal.");
\r
310 if (XmlChar.IsInvalid (c))
\r
311 throw new RelaxngException ("Invalid character in literal.");
\r
312 AppendNameChar (c, ref index);
\r
317 return new string (nameBuffer, 0, index);
\r
320 private string ReadOneName ()
\r
324 int c = PeekChar ();
\r
325 if (!XmlChar.IsFirstNameChar (c) || !XmlChar.IsNCNameChar (c))
\r
326 throw new RelaxngException (String.Format ("Invalid NCName start character: {0}", c));
\r
339 if (!XmlChar.IsNCNameChar (c)) {
\r
345 if (nameBuffer.Length == index) {
\r
346 char [] arr = new char [index * 2];
\r
347 Array.Copy (nameBuffer, arr, index);
\r
350 nameBuffer [index++] = (char) c;
\r
355 return new string (nameBuffer, 0, index);
\r
358 private string ReadLine ()
\r
360 string s = source.ReadLine ();
\r
366 private int ParseToken (bool backslashed)
\r
368 SkipWhitespaces ();
\r
369 int c = ReadChar ();
\r
375 return Token.Equal;
\r
377 return Token.Tilde;
\r
379 return Token.Comma;
\r
381 return Token.OpenCurly;
\r
383 return Token.CloseCurly;
\r
385 return Token.OpenParen;
\r
387 return Token.CloseParen;
\r
389 return Token.OpenBracket;
\r
391 return Token.CloseBracket;
\r
393 if (PeekChar () != '=')
\r
396 return Token.AndEquals;
\r
398 if (PeekChar () != '=')
\r
401 return Token.OrEquals;
\r
403 return Token.Question;
\r
405 // See also ':' for NsName
\r
406 return Token.Asterisk;
\r
409 return Token.ERROR;
\r
410 return ParseToken (true);
\r
414 return Token.Minus;
\r
416 if (PeekChar () == '>') {
\r
418 return Token.TwoGreaters;
\r
423 // tokenValue = ReadLine ();
\r
424 // return Token.Documentation;
\r
426 return ParseToken (false);
\r
429 if (PeekChar () != c)
\r
430 name = ReadQuoted ((char) c);
\r
433 if (PeekChar () == c) {
\r
435 name = ReadTripleQuoted ((char) c);
\r
437 name = String.Empty;
\r
439 int invidx = XmlChar.IndexOfInvalid (name, true) ;
\r
441 throw new RelaxngException (String.Format ("Invalid XML character in compact syntax literal segment at {0:X}", (int) name [invidx]));
\r
443 return Token.LiteralSegment;
\r
445 if (!XmlChar.IsNCNameChar (c))
\r
446 throw new RelaxngException ("Invalid NCName character.");
\r
448 name = ReadOneName ();
\r
449 if (PeekChar () == ':') {
\r
451 if (PeekChar () == '*') {
\r
454 return Token.NsName;
\r
456 tokenValue = name + ":" + ReadOneName ();
\r
457 return Token.CName;
\r
462 return Token.QuotedIdentifier;
\r
466 return Token.KeywordAttribute;
\r
469 return Token.KeywordElement;
\r
471 return Token.KeywordDatatypes;
\r
473 return Token.KeywordDefault;
\r
475 return Token.KeywordDiv;
\r
477 return Token.KeywordEmpty;
\r
479 return Token.KeywordExternal;
\r
481 return Token.KeywordGrammar;
\r
483 return Token.KeywordInclude;
\r
485 return Token.KeywordInherit;
\r
487 return Token.KeywordList;
\r
489 return Token.KeywordMixed;
\r
491 return Token.KeywordNamespace;
\r
493 return Token.KeywordNotAllowed;
\r
495 return Token.KeywordParent;
\r
497 return Token.KeywordStart;
\r
499 return Token.KeywordString;
\r
501 return Token.KeywordText;
\r
503 return Token.KeywordToken;
\r
505 return Token.NCName;
\r