// // This file is not part of the build. It is used to generate the character related // opcodes in RxInterpreter.cs. // using System; class Op { public string name; public string cond; public int len; public Op (string name, string cond, int len) { this.name = name; this.cond = cond; this.len = len; } } public class GenInterp { public static int base_indent; public static void write (int indent, string format, params string[] args) { for (int i = 0; i < base_indent + indent; ++i) Console.Write ("\t"); Console.WriteLine (format, args); } public static void Main () { base_indent = 4; write (0, "// GENERATED BY gen-interp.cs, DO NOT MODIFY"); Op[] ops = new Op [] { new Op ("Char", "c == program [pc + 1]", 2), new Op ("Range", "(c >= program [pc + 1] && c <= program [pc + 2])", 3), new Op ("UnicodeRange", "(c >= (program [pc + 1] | (program [pc + 2] << 8))) && (c <= (program [pc + 3] | (program [pc + 4] << 8)))", 5), new Op ("UnicodeChar", "(c == (program [pc + 1] | (program [pc + 2] << 8)))", 3), new Op ("CategoryAny", @"(c != '\n')", 1), new Op ("CategoryAnySingleline", "true", 1), new Op ("CategoryWord", "(Char.IsLetterOrDigit (c) || Char.GetUnicodeCategory (c) == UnicodeCategory.ConnectorPunctuation)", 1), new Op ("CategoryDigit", "(Char.IsDigit (c))", 1), new Op ("CategoryWhiteSpace", "(Char.IsWhiteSpace (c))", 1), new Op ("CategoryEcmaWord", "('a' <= c && c <= 'z' || 'A' <= c && c <= 'Z' || '0' <= c && c <= '9' || c == '_')", 1), new Op ("CategoryEcmaWhiteSpace", @"(c == ' ' || c == '\t' || c == '\n' || c == '\r' || c == '\f' || c == '\v')", 1), new Op ("CategoryUnicodeSpecials", @"('\uFEFF' <= c && c <= '\uFEFF' || '\uFFF0' <= c && c <= '\uFFFD')", 1), new Op ("CategoryUnicode", "(Char.GetUnicodeCategory (c) == (UnicodeCategory)program [pc + 1])", 2), new Op ("CategoryGeneral", "(CategoryUtils.IsCategory ((Category)program [pc + 1], c))", 2), }; for (int i1 = 0; i1 < 2; ++i1) { for (int i2 = 0; i2 < 2; ++i2) { for (int i3 = 0; i3 < 2; ++i3) { bool reverse = (i1 == 1); bool revert = (i2 == 1); bool ignore = (i3 == 1); foreach (Op op in ops) { if (op.name.StartsWith ("Category") && ignore) // These have no IgnoreCase versions continue; if (i1 == 0 && i2 == 0 && i3 == 0) { write (0, ""); write (0, "/* {0} */", op.name); write (0, ""); } write (0, "case RxOp.{0}{1}{2}{3}:", revert ? "No" : "", op.name, ignore ? "IgnoreCase" : "", reverse ? "Reverse" : ""); if (reverse) write (1, "if (strpos > 0) {{"); else write (1, "if (strpos < string_end) {{"); if (!ignore) { if (reverse) write (2, "char c = str [strpos - 1];"); else write (2, "char c = str [strpos];"); } else { if (reverse) write (2, "char c = Char.ToLower (str [strpos - 1]);"); else write (2, "char c = Char.ToLower (str [strpos]);"); } write (2, "if ({0}({1})) {{", revert ? "!" : "", op.cond); // TRUE case if (!revert) { if (!reverse) write (3, "strpos ++;"); else write (3, "strpos --;"); write (3, "if (char_group_end != 0)"); write (4, "goto test_char_group_passed;"); write (3, "pc += {0};", "" + op.len); write (3, "continue;"); } else { /* * If we are inside a char group, the cases are ANDed * together, so we have to continue checking the * other cases, and we need to increase strpos after * the final check. * The char group is termined by a True, hence the * + 1 below. */ write (3, "pc += {0};", "" + op.len); write (3, "if (char_group_end == 0 || (pc + 1 == char_group_end)) {{"); if (!reverse) write (4, "strpos ++;"); else write (4, "strpos --;"); write (3, "if (pc + 1 == char_group_end)"); write (4, "goto test_char_group_passed;"); write (3, "}}"); write (3, "continue;"); } write (2, "}}"); write (1, "}}"); // FALSE case if (!revert) { write (1, "if (char_group_end == 0)"); write (2, "return false;"); write (1, "pc += {0};", "" + op.len); write (1, "continue;"); } else { /* Fail both inside and outside a char group */ write (1, "return false;"); } #if FALSE if (strpos < string_end && (COND (str [strpos]))) { if (!revert) { strpos ++; if (char_group_end != 0) goto test_char_group_passed; pc += ins_len; continue; } else { /* * If we are inside a char group, the cases are ANDed * together, so we have to continue checking the * other cases, and we need to increase strpos after * the final check. * The char group is termined by a True, hence the * + 1 below. * FIXME: Optimize this. */ pc += ins_len; if (char_group_end == 0 || (pc + 1 == char_group_end)) strpos ++; continue; } } else { if (!revert) { if (char_group_end == 0) return false; pc += ins_len; continue; } else { /* Fail both inside and outside a char group */ return false; } } } else { // Same as above, but use: // - strpos > 0 instead of strpos < string_len // - COND (str [strpos - 1]) instead of COND (str [strpos]) // - strpos -- instead of strpos ++ } #endif } } } } write (0, ""); write (0, "// END OF GENERATED CODE"); } }