updating to the latest module.
[mono.git] / mcs / class / Commons.Xml.Relaxng / Commons.Xml.Relaxng.Rnc / RncTokenizer.cs
index 59a269c8de6b05650501150264e3d2b4e29a8f2d..747364a5935f424f750ff00e3ec8a68439fe590d 100755 (executable)
@@ -44,13 +44,15 @@ namespace Commons.Xml.Relaxng.Rnc
                int currentToken;\r
                object tokenValue;\r
                int peekChar;\r
+               string peekString;\r
                bool isElement;\r
                bool isLiteralNsUri;\r
 \r
                int line = 1;\r
                int column;\r
+               int savedLineNumber = 1;\r
+               int savedLinePosition;\r
                bool nextIncrementLine;\r
-               string prefixName;\r
 \r
                public RncTokenizer (TextReader source)\r
                {\r
@@ -62,11 +64,11 @@ namespace Commons.Xml.Relaxng.Rnc
                }\r
 \r
                public int Line {\r
-                       get { return line; }\r
+                       get { return savedLineNumber; }\r
                }\r
 \r
                public int Column {\r
-                       get { return column; }\r
+                       get { return savedLinePosition; }\r
                }\r
 \r
                // jay interface implementation\r
@@ -78,10 +80,10 @@ namespace Commons.Xml.Relaxng.Rnc
 \r
                public bool advance ()\r
                {\r
-                       if (prefixName != null)\r
-                               throw new RelaxngException ("Invalid prefix was found.");\r
                        tokenValue = null;\r
-                       currentToken = ParseToken ();\r
+                       currentToken = ParseToken (false);\r
+                       savedLineNumber = line;\r
+                       savedLinePosition = column;\r
                        return currentToken != Token.EOF;\r
                }\r
 \r
@@ -92,10 +94,85 @@ namespace Commons.Xml.Relaxng.Rnc
 \r
                // private methods\r
 \r
+               private int ReadEscapedHexNumber (int current)\r
+               {\r
+                       int i = source.Read ();\r
+                       switch (i) {\r
+                       case '0':\r
+                       case '1':\r
+                       case '2':\r
+                       case '3':\r
+                       case '4':\r
+                       case '5':\r
+                       case '6':\r
+                       case '7':\r
+                       case '8':\r
+                       case '9':\r
+                               current = current * 16 + (i - '0');\r
+                               return ReadEscapedHexNumber (current);\r
+                       case 'A':\r
+                       case 'B':\r
+                       case 'C':\r
+                       case 'D':\r
+                       case 'E':\r
+                       case 'F':\r
+                               current = current * 16 + (i - 'A') + 10;\r
+                               return ReadEscapedHexNumber (current);\r
+                       case 'a':\r
+                       case 'b':\r
+                       case 'c':\r
+                       case 'd':\r
+                       case 'e':\r
+                       case 'f':\r
+                               current = current * 16 + (i - 'a' + 10);\r
+                               return ReadEscapedHexNumber (current);\r
+                       }\r
+                       peekChar = i;\r
+                       return current;\r
+               }\r
+\r
+               private int ReadFromStream ()\r
+               {\r
+                       int ret = source.Read ();\r
+                       if (ret != '\\')\r
+                               return ret;\r
+                       ret = source.Read ();\r
+                       switch (ret) {\r
+                       case 'x':\r
+                               int tmp;\r
+                               int xcount = 0;\r
+                               do {\r
+                                       xcount++;\r
+                                       tmp = source.Read ();\r
+                               } while (tmp == 'x');\r
+                               if (tmp != '{') {\r
+                                       peekString = new string ('x', xcount);\r
+                                       if (tmp >= 0)\r
+                                               peekString += (char) tmp;\r
+                                       return '\\';\r
+                               }\r
+                               ret = ReadEscapedHexNumber (0);\r
+                               if (peekChar != '}')\r
+                                       break;\r
+                               peekChar = 0;\r
+                               return ret;\r
+                       }\r
+                       peekString = new string ((char) ret, 1);\r
+                       return '\\';\r
+               }\r
+\r
                private int PeekChar ()\r
                {\r
-                       if (peekChar == 0)\r
-                               peekChar = source.Read ();\r
+                       if (peekChar == 0) {\r
+                               if (peekString != null) {\r
+                                       peekChar = peekString [0];\r
+                                       peekString = peekString.Length == 1 ?\r
+                                               null : peekString.Substring (1);\r
+                               }\r
+                               else\r
+                                       peekChar = ReadFromStream ();\r
+                       }\r
+\r
                        return peekChar;\r
                }\r
 \r
@@ -106,8 +183,13 @@ namespace Commons.Xml.Relaxng.Rnc
                                ret = peekChar;\r
                                peekChar = 0;\r
                        }\r
+                       else if (peekString != null) {\r
+                               ret = peekString [0];\r
+                               peekString = peekString.Length == 1 ?\r
+                                       null : peekString.Substring (1);\r
+                       }\r
                        else\r
-                               ret = source.Read ();\r
+                               ret = ReadFromStream ();\r
 \r
                        if (nextIncrementLine) {\r
                                line++;\r
@@ -146,25 +228,77 @@ namespace Commons.Xml.Relaxng.Rnc
 \r
                char [] nameBuffer = new char [30];\r
 \r
-               // TODO: parse three quoted\r
                private string ReadQuoted (char quoteChar)\r
                {\r
                        int index = 0;\r
                        bool loop = true;\r
-                       do {\r
+                       while (loop) {\r
                                int c = ReadChar ();\r
                                switch (c) {\r
                                case -1:\r
+                               case '\'':\r
                                case '\"':\r
+                                       if (quoteChar != c)\r
+                                               goto default;\r
                                        loop = false;\r
                                        break;\r
                                default:\r
-                                       if (nameBuffer.Length == index) {\r
-                                               char [] arr = new char [index * 2];\r
-                                               Array.Copy (nameBuffer, arr, index);\r
-                                               nameBuffer = arr;\r
+                                       if (c < 0)\r
+                                               throw new RelaxngException ("Unterminated quoted literal.");\r
+                                       if (XmlChar.IsInvalid (c))\r
+                                               throw new RelaxngException ("Invalid character in literal.");\r
+                                       AppendNameChar (c, ref index);\r
+                                       break;\r
+                               }\r
+                       }\r
+\r
+                       return new string (nameBuffer, 0, index);\r
+               }\r
+\r
+               private void AppendNameChar (int c, ref int index)\r
+               {\r
+                       if (nameBuffer.Length == index) {\r
+                               char [] arr = new char [index * 2];\r
+                               Array.Copy (nameBuffer, arr, index);\r
+                               nameBuffer = arr;\r
+                       }\r
+                       nameBuffer [index++] = (char) c;\r
+               }\r
+\r
+               private string ReadTripleQuoted (char quoteChar)\r
+               {\r
+                       int index = 0;\r
+                       bool loop = true;\r
+                       do {\r
+                               int c = ReadChar ();\r
+                               switch (c) {\r
+                               case -1:\r
+                               case '\'':\r
+                               case '\"':\r
+                                       // 1\r
+                                       if (quoteChar != c)\r
+                                               goto default;\r
+                                       // 2\r
+                                       if ((c = PeekChar ()) != quoteChar) {\r
+                                               AppendNameChar (quoteChar, ref index);\r
+                                               goto default;\r
                                        }\r
-                                       nameBuffer [index++] = (char) c;\r
+                                       ReadChar ();\r
+                                       // 3\r
+                                       if ((c = PeekChar ()) == quoteChar) {\r
+                                               ReadChar ();\r
+                                               loop = false;\r
+                                               break;\r
+                                       }\r
+                                       AppendNameChar (quoteChar, ref index);\r
+                                       AppendNameChar (quoteChar, ref index);\r
+                                       break;\r
+                               default:\r
+                                       if (c < 0)\r
+                                               throw new RelaxngException ("Unterminated triple-quoted literal.");\r
+                                       if (XmlChar.IsInvalid (c))\r
+                                               throw new RelaxngException ("Invalid character in literal.");\r
+                                       AppendNameChar (c, ref index);\r
                                        break;\r
                                }\r
                        } while (loop);\r
@@ -172,12 +306,15 @@ namespace Commons.Xml.Relaxng.Rnc
                        return new string (nameBuffer, 0, index);\r
                }\r
 \r
-               private string ReadOneToken ()\r
+               private string ReadOneName ()\r
                {\r
                        int index = 0;\r
                        bool loop = true;\r
+                       int c = PeekChar ();\r
+                       if (!XmlChar.IsFirstNameChar (c) || !XmlChar.IsNCNameChar (c))\r
+                               throw new RelaxngException (String.Format ("Invalid NCName start character: {0}", c));\r
                        do {\r
-                               int c = PeekChar ();\r
+                               c = PeekChar ();\r
                                switch (c) {\r
                                case -1:\r
                                case ' ':\r
@@ -188,12 +325,7 @@ namespace Commons.Xml.Relaxng.Rnc
                                        loop = false;\r
                                        break;\r
                                default:\r
-                                       if (!IsTokenContinuable (c)) {\r
-                                               if (c == ':') {\r
-                                                       if (prefixName != null)\r
-                                                               throw new RelaxngException ("Invalid colon was found.");\r
-                                                       prefixName = new string (nameBuffer, 0, index);\r
-                                               }\r
+                                       if (!XmlChar.IsNCNameChar (c)) {\r
                                                loop = false;\r
                                                break;\r
                                        }\r
@@ -220,35 +352,7 @@ namespace Commons.Xml.Relaxng.Rnc
                        return s;\r
                }\r
 \r
-               private bool IsTokenContinuable (int c)\r
-               {\r
-                       switch (c) {\r
-                       case '=':\r
-                       case ':':\r
-                       case ',':\r
-                       case '{':\r
-                       case '}':\r
-                       case '(':\r
-                       case ')':\r
-                       case '[':\r
-                       case ']':\r
-                       case '&':\r
-                       case '|':\r
-                       case '?':\r
-                       case '*':\r
-                       case '\\':\r
-                       case '+':\r
-                       case '-':\r
-                       case '>':\r
-                       case '#':\r
-                       case '\'':\r
-                       case '\"':\r
-                               return false;\r
-                       }\r
-                       return true;\r
-               }\r
-\r
-               private int ParseToken ()\r
+               private int ParseToken (bool backslashed)\r
                {\r
                        SkipWhitespaces ();\r
                        int c = ReadChar ();\r
@@ -258,19 +362,6 @@ namespace Commons.Xml.Relaxng.Rnc
                                return Token.EOF;\r
                        case '=':\r
                                return Token.Equal;\r
-                       case ':':\r
-                               // return CName\r
-                               if (prefixName == null)\r
-                                       throw new RelaxngException ("Invalid character ':' was found.");\r
-                               if (PeekChar () == '*') {\r
-                                       ReadChar ();\r
-                                       tokenValue = prefixName;\r
-                                       prefixName = null;\r
-                                       return Token.NsName;\r
-                               }\r
-                               tokenValue = prefixName + ":" + ReadOneToken ();\r
-                               prefixName = null;\r
-                               return Token.CName;\r
                        case '~':\r
                                return Token.Tilde;\r
                        case ',':\r
@@ -303,7 +394,9 @@ namespace Commons.Xml.Relaxng.Rnc
                                // See also ':' for NsName\r
                                return Token.Asterisk;\r
                        case '\\':\r
-                               return Token.BackSlash;\r
+                               if (backslashed)\r
+                                       return Token.BackSlash;\r
+                               return ParseToken (true);\r
                        case '+':\r
                                return Token.Plus;\r
                        case '-':\r
@@ -320,18 +413,41 @@ namespace Commons.Xml.Relaxng.Rnc
 //                             if (ReadChar () != '#')\r
 //                                     throw new RelaxngException ("Invalid character after '#'.");\r
                                tokenValue = ReadLine ();\r
-                               return Token.Documentation;\r
+//                             return Token.Documentation;\r
+                               return ParseToken (false);\r
                        case '\'':\r
                        case '\"':\r
-                               name = ReadQuoted ((char) c);\r
+                               if (PeekChar () != c)\r
+                                       name = ReadQuoted ((char) c);\r
+                               else {\r
+                                       ReadChar ();\r
+                                       if (PeekChar () == c) {\r
+                                               ReadChar ();\r
+                                               name = ReadTripleQuoted ((char) c);\r
+                                       } // else '' or ""\r
+                                       name = String.Empty;\r
+                               }\r
                                tokenValue = name;\r
                                return Token.LiteralSegment;\r
                        default:\r
+                               if (!XmlChar.IsNCNameChar (c))\r
+                                       throw new RelaxngException ("Invalid NCName character.");\r
                                peekChar = c;\r
-                               name = ReadOneToken ();\r
-                               if (prefixName != null)\r
-                                       return ParseToken ();\r
+                               name = ReadOneName ();\r
+                               if (PeekChar () == ':') {\r
+                                       ReadChar ();\r
+                                       if (PeekChar () == '*') {\r
+                                               ReadChar ();\r
+                                               tokenValue = name;\r
+                                               return Token.NsName;\r
+                                       }\r
+                                       tokenValue = name + ":" + ReadOneName ();\r
+                                       return Token.CName;\r
+\r
+                               }\r
                                tokenValue = name;\r
+                               if (backslashed)\r
+                                       return Token.NCName;\r
                                switch (name) {\r
                                case "attribute":\r
                                        isElement = false;\r
@@ -374,7 +490,7 @@ namespace Commons.Xml.Relaxng.Rnc
                                case "token":\r
                                        return Token.KeywordToken;\r
                                default:\r
-                                       return Token.NCNameButKeyword;\r
+                                       return Token.NCName;\r
                                }\r
                        }\r
                }\r