int currentToken;\r
object tokenValue;\r
int peekChar;\r
+ string peekString;\r
bool isElement;\r
bool isLiteralNsUri;\r
\r
int line = 1;\r
int column;\r
+ int savedLineNumber = 1;\r
+ int savedLinePosition;\r
bool nextIncrementLine;\r
- string prefixName;\r
\r
public RncTokenizer (TextReader source)\r
{\r
}\r
\r
public int Line {\r
- get { return line; }\r
+ get { return savedLineNumber; }\r
}\r
\r
public int Column {\r
- get { return column; }\r
+ get { return savedLinePosition; }\r
}\r
\r
// jay interface implementation\r
\r
public bool advance ()\r
{\r
- if (prefixName != null)\r
- throw new RelaxngException ("Invalid prefix was found.");\r
tokenValue = null;\r
- currentToken = ParseToken ();\r
+ currentToken = ParseToken (false);\r
+ savedLineNumber = line;\r
+ savedLinePosition = column;\r
return currentToken != Token.EOF;\r
}\r
\r
\r
// private methods\r
\r
+ private int ReadEscapedHexNumber (int current)\r
+ {\r
+ int i = source.Read ();\r
+ switch (i) {\r
+ case '0':\r
+ case '1':\r
+ case '2':\r
+ case '3':\r
+ case '4':\r
+ case '5':\r
+ case '6':\r
+ case '7':\r
+ case '8':\r
+ case '9':\r
+ current = current * 16 + (i - '0');\r
+ return ReadEscapedHexNumber (current);\r
+ case 'A':\r
+ case 'B':\r
+ case 'C':\r
+ case 'D':\r
+ case 'E':\r
+ case 'F':\r
+ current = current * 16 + (i - 'A') + 10;\r
+ return ReadEscapedHexNumber (current);\r
+ case 'a':\r
+ case 'b':\r
+ case 'c':\r
+ case 'd':\r
+ case 'e':\r
+ case 'f':\r
+ current = current * 16 + (i - 'a' + 10);\r
+ return ReadEscapedHexNumber (current);\r
+ }\r
+ peekChar = i;\r
+ return current;\r
+ }\r
+\r
+ private int ReadFromStream ()\r
+ {\r
+ int ret = source.Read ();\r
+ if (ret != '\\')\r
+ return ret;\r
+ ret = source.Read ();\r
+ switch (ret) {\r
+ case 'x':\r
+ int tmp;\r
+ int xcount = 0;\r
+ do {\r
+ xcount++;\r
+ tmp = source.Read ();\r
+ } while (tmp == 'x');\r
+ if (tmp != '{') {\r
+ peekString = new string ('x', xcount);\r
+ if (tmp >= 0)\r
+ peekString += (char) tmp;\r
+ return '\\';\r
+ }\r
+ ret = ReadEscapedHexNumber (0);\r
+ if (peekChar != '}')\r
+ break;\r
+ peekChar = 0;\r
+ return ret;\r
+ }\r
+ peekString = new string ((char) ret, 1);\r
+ return '\\';\r
+ }\r
+\r
private int PeekChar ()\r
{\r
- if (peekChar == 0)\r
- peekChar = source.Read ();\r
+ if (peekChar == 0) {\r
+ if (peekString != null) {\r
+ peekChar = peekString [0];\r
+ peekString = peekString.Length == 1 ?\r
+ null : peekString.Substring (1);\r
+ }\r
+ else\r
+ peekChar = ReadFromStream ();\r
+ }\r
+\r
return peekChar;\r
}\r
\r
ret = peekChar;\r
peekChar = 0;\r
}\r
+ else if (peekString != null) {\r
+ ret = peekString [0];\r
+ peekString = peekString.Length == 1 ?\r
+ null : peekString.Substring (1);\r
+ }\r
else\r
- ret = source.Read ();\r
+ ret = ReadFromStream ();\r
\r
if (nextIncrementLine) {\r
line++;\r
\r
char [] nameBuffer = new char [30];\r
\r
- // TODO: parse three quoted\r
private string ReadQuoted (char quoteChar)\r
{\r
int index = 0;\r
bool loop = true;\r
- do {\r
+ while (loop) {\r
int c = ReadChar ();\r
switch (c) {\r
case -1:\r
+ case '\'':\r
case '\"':\r
+ if (quoteChar != c)\r
+ goto default;\r
loop = false;\r
break;\r
default:\r
- if (nameBuffer.Length == index) {\r
- char [] arr = new char [index * 2];\r
- Array.Copy (nameBuffer, arr, index);\r
- nameBuffer = arr;\r
+ if (c < 0)\r
+ throw new RelaxngException ("Unterminated quoted literal.");\r
+ if (XmlChar.IsInvalid (c))\r
+ throw new RelaxngException ("Invalid character in literal.");\r
+ AppendNameChar (c, ref index);\r
+ break;\r
+ }\r
+ }\r
+\r
+ return new string (nameBuffer, 0, index);\r
+ }\r
+\r
+ private void AppendNameChar (int c, ref int index)\r
+ {\r
+ if (nameBuffer.Length == index) {\r
+ char [] arr = new char [index * 2];\r
+ Array.Copy (nameBuffer, arr, index);\r
+ nameBuffer = arr;\r
+ }\r
+ nameBuffer [index++] = (char) c;\r
+ }\r
+\r
+ private string ReadTripleQuoted (char quoteChar)\r
+ {\r
+ int index = 0;\r
+ bool loop = true;\r
+ do {\r
+ int c = ReadChar ();\r
+ switch (c) {\r
+ case -1:\r
+ case '\'':\r
+ case '\"':\r
+ // 1\r
+ if (quoteChar != c)\r
+ goto default;\r
+ // 2\r
+ if ((c = PeekChar ()) != quoteChar) {\r
+ AppendNameChar (quoteChar, ref index);\r
+ goto default;\r
}\r
- nameBuffer [index++] = (char) c;\r
+ ReadChar ();\r
+ // 3\r
+ if ((c = PeekChar ()) == quoteChar) {\r
+ ReadChar ();\r
+ loop = false;\r
+ break;\r
+ }\r
+ AppendNameChar (quoteChar, ref index);\r
+ AppendNameChar (quoteChar, ref index);\r
+ break;\r
+ default:\r
+ if (c < 0)\r
+ throw new RelaxngException ("Unterminated triple-quoted literal.");\r
+ if (XmlChar.IsInvalid (c))\r
+ throw new RelaxngException ("Invalid character in literal.");\r
+ AppendNameChar (c, ref index);\r
break;\r
}\r
} while (loop);\r
return new string (nameBuffer, 0, index);\r
}\r
\r
- private string ReadOneToken ()\r
+ private string ReadOneName ()\r
{\r
int index = 0;\r
bool loop = true;\r
+ int c = PeekChar ();\r
+ if (!XmlChar.IsFirstNameChar (c) || !XmlChar.IsNCNameChar (c))\r
+ throw new RelaxngException (String.Format ("Invalid NCName start character: {0}", c));\r
do {\r
- int c = PeekChar ();\r
+ c = PeekChar ();\r
switch (c) {\r
case -1:\r
case ' ':\r
loop = false;\r
break;\r
default:\r
- if (!IsTokenContinuable (c)) {\r
- if (c == ':') {\r
- if (prefixName != null)\r
- throw new RelaxngException ("Invalid colon was found.");\r
- prefixName = new string (nameBuffer, 0, index);\r
- }\r
+ if (!XmlChar.IsNCNameChar (c)) {\r
loop = false;\r
break;\r
}\r
return s;\r
}\r
\r
- private bool IsTokenContinuable (int c)\r
- {\r
- switch (c) {\r
- case '=':\r
- case ':':\r
- case ',':\r
- case '{':\r
- case '}':\r
- case '(':\r
- case ')':\r
- case '[':\r
- case ']':\r
- case '&':\r
- case '|':\r
- case '?':\r
- case '*':\r
- case '\\':\r
- case '+':\r
- case '-':\r
- case '>':\r
- case '#':\r
- case '\'':\r
- case '\"':\r
- return false;\r
- }\r
- return true;\r
- }\r
-\r
- private int ParseToken ()\r
+ private int ParseToken (bool backslashed)\r
{\r
SkipWhitespaces ();\r
int c = ReadChar ();\r
return Token.EOF;\r
case '=':\r
return Token.Equal;\r
- case ':':\r
- // return CName\r
- if (prefixName == null)\r
- throw new RelaxngException ("Invalid character ':' was found.");\r
- if (PeekChar () == '*') {\r
- ReadChar ();\r
- tokenValue = prefixName;\r
- prefixName = null;\r
- return Token.NsName;\r
- }\r
- tokenValue = prefixName + ":" + ReadOneToken ();\r
- prefixName = null;\r
- return Token.CName;\r
case '~':\r
return Token.Tilde;\r
case ',':\r
// See also ':' for NsName\r
return Token.Asterisk;\r
case '\\':\r
- return Token.BackSlash;\r
+ if (backslashed)\r
+ return Token.BackSlash;\r
+ return ParseToken (true);\r
case '+':\r
return Token.Plus;\r
case '-':\r
// if (ReadChar () != '#')\r
// throw new RelaxngException ("Invalid character after '#'.");\r
tokenValue = ReadLine ();\r
- return Token.Documentation;\r
+// return Token.Documentation;\r
+ return ParseToken (false);\r
case '\'':\r
case '\"':\r
- name = ReadQuoted ((char) c);\r
+ if (PeekChar () != c)\r
+ name = ReadQuoted ((char) c);\r
+ else {\r
+ ReadChar ();\r
+ if (PeekChar () == c) {\r
+ ReadChar ();\r
+ name = ReadTripleQuoted ((char) c);\r
+ } // else '' or ""\r
+ name = String.Empty;\r
+ }\r
tokenValue = name;\r
return Token.LiteralSegment;\r
default:\r
+ if (!XmlChar.IsNCNameChar (c))\r
+ throw new RelaxngException ("Invalid NCName character.");\r
peekChar = c;\r
- name = ReadOneToken ();\r
- if (prefixName != null)\r
- return ParseToken ();\r
+ name = ReadOneName ();\r
+ if (PeekChar () == ':') {\r
+ ReadChar ();\r
+ if (PeekChar () == '*') {\r
+ ReadChar ();\r
+ tokenValue = name;\r
+ return Token.NsName;\r
+ }\r
+ tokenValue = name + ":" + ReadOneName ();\r
+ return Token.CName;\r
+\r
+ }\r
tokenValue = name;\r
+ if (backslashed)\r
+ return Token.NCName;\r
switch (name) {\r
case "attribute":\r
isElement = false;\r
case "token":\r
return Token.KeywordToken;\r
default:\r
- return Token.NCNameButKeyword;\r
+ return Token.NCName;\r
}\r
}\r
}\r