// (C)2003 Atsushi Enomoto\r
// (C)2004 Novell Inc.\r
//\r
-
-//
-// Permission is hereby granted, free of charge, to any person obtaining
-// a copy of this software and associated documentation files (the
-// "Software"), to deal in the Software without restriction, including
-// without limitation the rights to use, copy, modify, merge, publish,
-// distribute, sublicense, and/or sell copies of the Software, and to
-// permit persons to whom the Software is furnished to do so, subject to
-// the following conditions:
-//
-// The above copyright notice and this permission notice shall be
-// included in all copies or substantial portions of the Software.
-//
-// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
-// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
-// MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
-// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
-// LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
-// OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
-// WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-//
+\r
+//\r
+// Permission is hereby granted, free of charge, to any person obtaining\r
+// a copy of this software and associated documentation files (the\r
+// "Software"), to deal in the Software without restriction, including\r
+// without limitation the rights to use, copy, modify, merge, publish,\r
+// distribute, sublicense, and/or sell copies of the Software, and to\r
+// permit persons to whom the Software is furnished to do so, subject to\r
+// the following conditions:\r
+// \r
+// The above copyright notice and this permission notice shall be\r
+// included in all copies or substantial portions of the Software.\r
+// \r
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,\r
+// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF\r
+// MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND\r
+// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE\r
+// LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION\r
+// OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION\r
+// WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.\r
+//\r
\r
using System;\r
using System.Collections;\r
int currentToken;\r
object tokenValue;\r
int peekChar;\r
+ string peekString;\r
bool isElement;\r
bool isLiteralNsUri;\r
\r
int line = 1;\r
int column;\r
+ int savedLineNumber = 1;\r
+ int savedLinePosition;\r
bool nextIncrementLine;\r
- string prefixName;\r
+ string baseUri;\r
\r
- public RncTokenizer (TextReader source)\r
+ public RncTokenizer (TextReader source, string baseUri)\r
{\r
this.source = source;\r
+ this.baseUri = baseUri;\r
}\r
\r
public bool IsElement {\r
}\r
\r
public int Line {\r
- get { return line; }\r
+ get { return savedLineNumber; }\r
}\r
\r
public int Column {\r
- get { return column; }\r
+ get { return savedLinePosition; }\r
+ }\r
+\r
+ public string BaseUri {\r
+ get { return baseUri; }\r
}\r
\r
// jay interface implementation\r
\r
public bool advance ()\r
{\r
- if (prefixName != null)\r
- throw new RelaxngException ("Invalid prefix was found.");\r
tokenValue = null;\r
- currentToken = ParseToken ();\r
+ currentToken = ParseToken (false);\r
+ savedLineNumber = line;\r
+ savedLinePosition = column;\r
return currentToken != Token.EOF;\r
}\r
\r
\r
// private methods\r
\r
+ private int ReadEscapedHexNumber (int current)\r
+ {\r
+ int i = source.Read ();\r
+ switch (i) {\r
+ case '0':\r
+ case '1':\r
+ case '2':\r
+ case '3':\r
+ case '4':\r
+ case '5':\r
+ case '6':\r
+ case '7':\r
+ case '8':\r
+ case '9':\r
+ current = current * 16 + (i - '0');\r
+ return ReadEscapedHexNumber (current);\r
+ case 'A':\r
+ case 'B':\r
+ case 'C':\r
+ case 'D':\r
+ case 'E':\r
+ case 'F':\r
+ current = current * 16 + (i - 'A') + 10;\r
+ return ReadEscapedHexNumber (current);\r
+ case 'a':\r
+ case 'b':\r
+ case 'c':\r
+ case 'd':\r
+ case 'e':\r
+ case 'f':\r
+ current = current * 16 + (i - 'a' + 10);\r
+ return ReadEscapedHexNumber (current);\r
+ }\r
+ peekChar = i;\r
+ return current;\r
+ }\r
+\r
+ private int ReadFromStream ()\r
+ {\r
+ int ret = source.Read ();\r
+ if (ret != '\\')\r
+ return ret;\r
+ ret = source.Read ();\r
+ switch (ret) {\r
+ case 'x':\r
+ int tmp;\r
+ int xcount = 0;\r
+ do {\r
+ xcount++;\r
+ tmp = source.Read ();\r
+ } while (tmp == 'x');\r
+ if (tmp != '{') {\r
+ peekString = new string ('x', xcount);\r
+ if (tmp >= 0)\r
+ peekString += (char) tmp;\r
+ return '\\';\r
+ }\r
+ ret = ReadEscapedHexNumber (0);\r
+ if (peekChar != '}')\r
+ break;\r
+ peekChar = 0;\r
+ return ret;\r
+ }\r
+ peekString = new string ((char) ret, 1);\r
+ return '\\';\r
+ }\r
+\r
private int PeekChar ()\r
{\r
- if (peekChar == 0)\r
- peekChar = source.Read ();\r
+ if (peekChar == 0) {\r
+ if (peekString != null) {\r
+ peekChar = peekString [0];\r
+ peekString = peekString.Length == 1 ?\r
+ null : peekString.Substring (1);\r
+ }\r
+ else\r
+ peekChar = ReadFromStream ();\r
+ }\r
+\r
return peekChar;\r
}\r
\r
ret = peekChar;\r
peekChar = 0;\r
}\r
+ else if (peekString != null) {\r
+ ret = peekString [0];\r
+ peekString = peekString.Length == 1 ?\r
+ null : peekString.Substring (1);\r
+ }\r
else\r
- ret = source.Read ();\r
+ ret = ReadFromStream ();\r
\r
if (nextIncrementLine) {\r
line++;\r
\r
char [] nameBuffer = new char [30];\r
\r
- // TODO: parse three quoted\r
private string ReadQuoted (char quoteChar)\r
{\r
int index = 0;\r
bool loop = true;\r
- do {\r
+ while (loop) {\r
int c = ReadChar ();\r
switch (c) {\r
case -1:\r
+ case '\'':\r
case '\"':\r
+ if (quoteChar != c)\r
+ goto default;\r
loop = false;\r
break;\r
default:\r
- if (nameBuffer.Length == index) {\r
- char [] arr = new char [index * 2];\r
- Array.Copy (nameBuffer, arr, index);\r
- nameBuffer = arr;\r
+ if (c < 0)\r
+ throw new RelaxngException ("Unterminated quoted literal.");\r
+ if (XmlChar.IsInvalid (c))\r
+ throw new RelaxngException ("Invalid character in literal.");\r
+ AppendNameChar (c, ref index);\r
+ break;\r
+ }\r
+ }\r
+\r
+ return new string (nameBuffer, 0, index);\r
+ }\r
+\r
+ private void AppendNameChar (int c, ref int index)\r
+ {\r
+ if (nameBuffer.Length == index) {\r
+ char [] arr = new char [index * 2];\r
+ Array.Copy (nameBuffer, arr, index);\r
+ nameBuffer = arr;\r
+ }\r
+ if (c > 0x10000) {\r
+ AppendNameChar ((c - 0x10000) / 0x400 + 0xD800, ref index);\r
+ AppendNameChar ((c - 0x10000) % 0x400 + 0xDC00, ref index);\r
+ }\r
+ else\r
+ nameBuffer [index++] = (char) c;\r
+ }\r
+\r
+ private string ReadTripleQuoted (char quoteChar)\r
+ {\r
+ int index = 0;\r
+ bool loop = true;\r
+ do {\r
+ int c = ReadChar ();\r
+ switch (c) {\r
+ case -1:\r
+ case '\'':\r
+ case '\"':\r
+ // 1\r
+ if (quoteChar != c)\r
+ goto default;\r
+ // 2\r
+ if ((c = PeekChar ()) != quoteChar) {\r
+ AppendNameChar (quoteChar, ref index);\r
+ goto default;\r
}\r
- nameBuffer [index++] = (char) c;\r
+ ReadChar ();\r
+ // 3\r
+ if ((c = PeekChar ()) == quoteChar) {\r
+ ReadChar ();\r
+ loop = false;\r
+ break;\r
+ }\r
+ AppendNameChar (quoteChar, ref index);\r
+ AppendNameChar (quoteChar, ref index);\r
+ break;\r
+ default:\r
+ if (c < 0)\r
+ throw new RelaxngException ("Unterminated triple-quoted literal.");\r
+ if (XmlChar.IsInvalid (c))\r
+ throw new RelaxngException ("Invalid character in literal.");\r
+ AppendNameChar (c, ref index);\r
break;\r
}\r
} while (loop);\r
return new string (nameBuffer, 0, index);\r
}\r
\r
- private string ReadOneToken ()\r
+ private string ReadOneName ()\r
{\r
int index = 0;\r
bool loop = true;\r
+ int c = PeekChar ();\r
+ if (!XmlChar.IsFirstNameChar (c) || !XmlChar.IsNCNameChar (c))\r
+ throw new RelaxngException (String.Format ("Invalid NCName start character: {0}", c));\r
do {\r
- int c = PeekChar ();\r
+ c = PeekChar ();\r
switch (c) {\r
case -1:\r
case ' ':\r
loop = false;\r
break;\r
default:\r
- if (!IsTokenContinuable (c)) {\r
- if (c == ':') {\r
- if (prefixName != null)\r
- throw new RelaxngException ("Invalid colon was found.");\r
- prefixName = new string (nameBuffer, 0, index);\r
- }\r
+ if (!XmlChar.IsNCNameChar (c)) {\r
loop = false;\r
break;\r
}\r
return s;\r
}\r
\r
- private bool IsTokenContinuable (int c)\r
- {\r
- switch (c) {\r
- case '=':\r
- case ':':\r
- case ',':\r
- case '{':\r
- case '}':\r
- case '(':\r
- case ')':\r
- case '[':\r
- case ']':\r
- case '&':\r
- case '|':\r
- case '?':\r
- case '*':\r
- case '\\':\r
- case '+':\r
- case '-':\r
- case '>':\r
- case '#':\r
- case '\'':\r
- case '\"':\r
- return false;\r
- }\r
- return true;\r
- }\r
-\r
- private int ParseToken ()\r
+ private int ParseToken (bool backslashed)\r
{\r
SkipWhitespaces ();\r
int c = ReadChar ();\r
return Token.EOF;\r
case '=':\r
return Token.Equal;\r
- case ':':\r
- // return CName\r
- if (prefixName == null)\r
- throw new RelaxngException ("Invalid character ':' was found.");\r
- if (PeekChar () == '*') {\r
- ReadChar ();\r
- tokenValue = prefixName;\r
- prefixName = null;\r
- return Token.NsName;\r
- }\r
- tokenValue = prefixName + ":" + ReadOneToken ();\r
- prefixName = null;\r
- return Token.CName;\r
+ case '~':\r
+ return Token.Tilde;\r
case ',':\r
return Token.Comma;\r
case '{':\r
// See also ':' for NsName\r
return Token.Asterisk;\r
case '\\':\r
- return Token.BackSlash;\r
+ if (backslashed)\r
+ return Token.ERROR;\r
+ return ParseToken (true);\r
case '+':\r
return Token.Plus;\r
case '-':\r
peekChar = '>';\r
goto default;\r
case '#':\r
- // NOTE: This interpretation is expanded against the spec\r
-// if (ReadChar () != '#')\r
-// throw new RelaxngException ("Invalid character after '#'.");\r
- tokenValue = ReadLine ();\r
- return Token.Documentation;\r
+// tokenValue = ReadLine ();\r
+// return Token.Documentation;\r
+ ReadLine ();\r
+ return ParseToken (false);\r
case '\'':\r
case '\"':\r
- name = ReadQuoted ((char) c);\r
+ if (PeekChar () != c)\r
+ name = ReadQuoted ((char) c);\r
+ else {\r
+ ReadChar ();\r
+ if (PeekChar () == c) {\r
+ ReadChar ();\r
+ name = ReadTripleQuoted ((char) c);\r
+ } // else '' or ""\r
+ name = String.Empty;\r
+ }\r
+ int invidx = XmlChar.IndexOfInvalid (name, true) ;\r
+ if (invidx >= 0)\r
+ throw new RelaxngException (String.Format ("Invalid XML character in compact syntax literal segment at {0:X}", (int) name [invidx]));\r
tokenValue = name;\r
return Token.LiteralSegment;\r
default:\r
+ if (!XmlChar.IsNCNameChar (c))\r
+ throw new RelaxngException ("Invalid NCName character.");\r
peekChar = c;\r
- name = ReadOneToken ();\r
- if (prefixName != null)\r
- return ParseToken ();\r
+ name = ReadOneName ();\r
+ if (PeekChar () == ':') {\r
+ ReadChar ();\r
+ if (PeekChar () == '*') {\r
+ ReadChar ();\r
+ tokenValue = name;\r
+ return Token.NsName;\r
+ }\r
+ tokenValue = name + ":" + ReadOneName ();\r
+ return Token.CName;\r
+\r
+ }\r
tokenValue = name;\r
+ if (backslashed)\r
+ return Token.QuotedIdentifier;\r
switch (name) {\r
case "attribute":\r
isElement = false;\r
case "token":\r
return Token.KeywordToken;\r
default:\r
- return Token.NCNameButKeyword;\r
+ return Token.NCName;\r
}\r
}\r
}\r
\r
}\r
-}
+}\r