}
public static string Unescape (string str) {
+ if (str.IndexOf ('\\') == -1)
+ return str;
return new Parser ().ParseString (str);
}
}
}
- public IDictionary GetMapping () {
+ public IDictionary GetMapping ()
+ {
Hashtable mapping = new Hashtable ();
- Hashtable numbers = new Hashtable ();
int end = caps.Count;
mapping.Add ("0", 0);
for (int i = 0; i < end; i++) {
CapturingGroup group = (CapturingGroup) caps [i];
- if (group.Name != null && !mapping.Contains (group.Name)) {
+ if (group.Name != null) {
+ if (mapping.Contains (group.Name)) {
+ if ((int) mapping [group.Name] != group.Number)
+ throw new SystemException ("invalid state");
+ continue;
+ }
mapping.Add (group.Name, group.Number);
- numbers.Add (group.Number, group.Number);
+ } else {
+ mapping.Add (group.Number.ToString (), group.Number);
}
}
- for (int i = 1; i < end; i++) {
- if (numbers [i] == null)
- mapping.Add (i.ToString (), i);
- }
-
return mapping;
}
switch (k) {
case '?': min = 0; max = 1; break;
- case '*': min = 0; max = 0xffff; break;
- case '+': min = 1; max = 0xffff; break;
+ case '*': min = 0; max = 0x7fffffff; break;
+ case '+': min = 1; max = 0x7fffffff; break;
}
} else if (k == '{' && ptr + 1 < pattern.Length) {
int saved_ptr = ptr;
}
else { // capture test
++ ptr;
- asn = new CaptureAssertion ();
+ asn = new CaptureAssertion (new Literal (name, IsIgnoreCase (options)));
refs.Add (asn, name);
}
}
private Expression ParseCharacterClass (RegexOptions options) {
- bool negate, ecma;
+ bool negate = false;
if (pattern[ptr] == '^') {
negate = true;
++ ptr;
}
- else
- negate = false;
- ecma = IsECMAScript (options);
+ bool ecma = IsECMAScript (options);
CharacterClass cls = new CharacterClass (negate, IsIgnoreCase (options));
if (pattern[ptr] == ']') {
closed = true;
break;
}
-
- if (c == '-') {
+
+ if (c == '-' && last >= 0 && !range) {
range = true;
continue;
}
if (c == '\\') {
c = ParseEscape ();
- if (c < 0) {
- // didn't recognize escape
-
- c = pattern[ptr ++];
- switch (c) {
- case 'b': c = '\b'; break;
-
- case 'd':
- cls.AddCategory (ecma ? Category.EcmaDigit : Category.Digit, false);
- last = -1;
- continue;
-
- case 'w':
- cls.AddCategory (ecma ? Category.EcmaWord : Category.Word, false);
- last = -1;
- continue;
-
- case 's':
- cls.AddCategory (ecma ? Category.EcmaWhiteSpace : Category.WhiteSpace, false);
- last = -1;
- continue;
-
- case 'p':
- cls.AddCategory (ParseUnicodeCategory (), false); // ignore ecma
- last = -1;
- continue;
-
- case 'D':
- cls.AddCategory (ecma ? Category.EcmaDigit : Category.Digit, true);
- last = -1;
- continue;
-
- case 'W':
- cls.AddCategory (ecma ? Category.EcmaWord : Category.Word, true);
- last = -1;
- continue;
-
- case 'S':
- cls.AddCategory (ecma ? Category.EcmaWhiteSpace : Category.WhiteSpace, true);
- last = -1;
- continue;
-
- case 'P':
- cls.AddCategory (ParseUnicodeCategory (), true);
- last = -1;
- continue;
-
- default: break; // add escaped character
- }
+ if (c >= 0)
+ goto char_recognized;
+
+ // didn't recognize escape
+ c = pattern [ptr ++];
+ switch (c) {
+ case 'b':
+ c = '\b';
+ goto char_recognized;
+
+ case 'd': case 'D':
+ cls.AddCategory (ecma ? Category.EcmaDigit : Category.Digit, c == 'D');
+ break;
+
+ case 'w': case 'W':
+ cls.AddCategory (ecma ? Category.EcmaWord : Category.Word, c == 'W');
+ break;
+
+ case 's': case 'S':
+ cls.AddCategory (ecma ? Category.EcmaWhiteSpace : Category.WhiteSpace, c == 'S');
+ break;
+
+ case 'p': case 'P':
+ cls.AddCategory (ParseUnicodeCategory (), c == 'P'); // ignore ecma
+ break;
+
+ default: // add escaped character
+ goto char_recognized;
}
+
+ // if the pattern looks like [a-\s] ...
+ if (range)
+ throw NewParseException ("character range cannot have category \\" + c);
+
+ last = -1;
+ continue;
}
+ char_recognized:
if (range) {
+ // if 'range' is true, we know that 'last >= 0'
if (c < last)
- throw NewParseException ("[x-y] range in reverse order.");
-
- if (last >=0 )
- cls.AddRange ((char)last, (char)c);
- else {
- cls.AddCharacter ((char)c);
- cls.AddCharacter ('-');
- }
-
- range = false;
+ throw NewParseException ("[" + last + "-" + c + "] range in reverse order.");
+ cls.AddRange ((char)last, (char)c);
last = -1;
+ range = false;
+ continue;
}
- else {
- cls.AddCharacter ((char)c);
- last = c;
- }
+
+ cls.AddCharacter ((char)c);
+ last = c;
}
if (!closed)
/* check bounds and ordering */
- if (n >= 0xffff || m >= 0xffff)
- throw NewParseException ("Illegal {x, y} - maximum of 65535.");
+ if (n > 0x7fffffff || m > 0x7fffffff)
+ throw NewParseException ("Illegal {x, y} - maximum of 2147483647.");
if (m >= 0 && m < n)
throw NewParseException ("Illegal {x, y} with x > y.");
if (m > 0)
max = m;
else
- max = 0xffff;
+ max = 0x7fffffff;
return true;
}
}
private void ConsumeWhitespace (bool ignore) {
- while (true) {
- if (ptr >= pattern.Length)
- break;
-
+ while (ptr < pattern.Length) {
if (pattern[ptr] == '(') {
if (ptr + 3 >= pattern.Length)
return;
return;
ptr += 3;
- while (pattern[ptr ++] != ')')
+ while (ptr < pattern.Length && pattern[ptr ++] != ')')
/* ignore */ ;
}
else if (ignore && pattern[ptr] == '#') {
foreach (Expression expr in refs.Keys) {
string name = (string)refs[expr];
if (!dict.Contains (name)) {
+ if (expr is CaptureAssertion && !Char.IsDigit (name [0]))
+ continue;
throw NewParseException ("Reference to undefined group " +
(Char.IsDigit (name[0]) ? "number " : "name ") +
name);