2 // This file is not part of the build. It is used to generate the character related
3 // opcodes in RxInterpreter.cs.
9 public string name, body, cond, len;
11 public Op (string name, string body, string cond, string len) {
19 public class GenInterp
21 public static int base_indent;
23 public static void write (int indent, string format, params string[] args) {
24 for (int i = 0; i < base_indent + indent; ++i)
26 Console.WriteLine (format, args);
29 public static void Main () {
32 write (0, "// GENERATED BY gen-interp.cs, DO NOT MODIFY");
34 Op[] ops = new Op [] {
36 new Op ("Char", "", "(c == program [pc + 1])", "2"),
37 new Op ("Range", "", "(c >= program [pc + 1] && c <= program [pc + 2])", "3"),
38 new Op ("UnicodeRange", "", "(c >= (program [pc + 1] | ((int)program [pc + 2] << 8))) && (c <= (program [pc + 3] | ((int)program [pc + 4] << 8)))", "5"),
39 new Op ("UnicodeChar", "", "(c == (program [pc + 1] | ((int)program [pc + 2] << 8)))", "3"),
40 new Op ("CategoryAny", "", @"(c != '\n')", "1"),
41 new Op ("CategoryAnySingleline", "", "true", "1"),
42 new Op ("CategoryWord", "", "(Char.IsLetterOrDigit (c) || Char.GetUnicodeCategory (c) == UnicodeCategory.ConnectorPunctuation)", "1"),
43 new Op ("CategoryDigit", "", "(Char.IsDigit (c))", "1"),
44 new Op ("CategoryWhiteSpace", "", "(Char.IsWhiteSpace (c))", "1"),
45 new Op ("CategoryEcmaWord", "", "('a' <= c && c <= 'z' || 'A' <= c && c <= 'Z' || '0' <= c && c <= '9' || c == '_')", "1"),
46 new Op ("CategoryEcmaWhiteSpace", "", @"(c == ' ' || c == '\t' || c == '\n' || c == '\r' || c == '\f' || c == '\v')", "1"),
47 new Op ("CategoryUnicodeSpecials", "", @"('\uFEFF' <= c && c <= '\uFEFF' || '\uFFF0' <= c && c <= '\uFFFD')", "1"),
48 new Op ("CategoryUnicode", "", "(Char.GetUnicodeCategory (c) == (UnicodeCategory)program [pc + 1])", "2"),
49 new Op ("CategoryGeneral", "", "(CategoryUtils.IsCategory ((Category)program [pc + 1], c))", "2"),
50 new Op ("Bitmap", "int c2 = (int)c; c2 -= program [pc + 1]; length = program [pc + 2];", "(c2 >= 0 && c2 < (length << 3) && (program [pc + 3 + (c2 >> 3)] & (1 << (c2 & 0x7))) != 0)", "3 + program [pc + 2]"),
51 new Op ("UnicodeBitmap", "int c2 = (int)c; c2 -= (program [pc + 1] | ((int)program [pc + 2] << 8)); length = (program [pc + 3] | ((int)program [pc + 4] << 8));", "(c2 >= 0 && c2 < (length << 3) && (program [pc + 5 + (c2 >> 3)] & (1 << (c2 & 0x7))) != 0)", "5 + (program [pc + 3] | ((int)program [pc + 4] << 8))")
54 for (int i1 = 0; i1 < 2; ++i1) {
55 for (int i2 = 0; i2 < 2; ++i2) {
56 for (int i3 = 0; i3 < 2; ++i3) {
57 bool reverse = (i1 == 1);
58 bool revert = (i2 == 1);
59 bool ignore = (i3 == 1);
61 foreach (Op op in ops) {
62 if (op.name.StartsWith ("Category") && ignore)
63 // These have no IgnoreCase versions
66 if (i1 == 0 && i2 == 0 && i3 == 0) {
68 write (0, "/* {0} */", op.name);
72 write (0, "case RxOp.{0}{1}{2}{3}:", revert ? "No" : "", op.name, ignore ? "IgnoreCase" : "", reverse ? "Reverse" : "");
74 write (1, "if (strpos > 0) {{");
76 write (1, "if (strpos < string_end) {{");
79 write (2, "char c = str [strpos - 1];");
81 write (2, "char c = str [strpos];");
84 write (2, "char c = Char.ToLower (str [strpos - 1]);");
86 write (2, "char c = Char.ToLower (str [strpos]);");
88 if (op.body != String.Empty)
90 write (2, "if ({0}({1})) {{", revert ? "!" : "", op.cond);
94 write (3, "strpos ++;");
96 write (3, "strpos --;");
97 write (3, "if (char_group_end != 0)");
98 write (4, "goto test_char_group_passed;");
99 write (3, "pc += {0};", "" + op.len);
100 write (3, "continue;");
103 * If we are inside a char group, the cases are ANDed
104 * together, so we have to continue checking the
105 * other cases, and we need to increase strpos after
107 * The char group is termined by a True, hence the
110 write (3, "pc += {0};", "" + op.len);
111 write (3, "if (char_group_end == 0 || (pc + 1 == char_group_end)) {{");
113 write (4, "strpos ++;");
115 write (4, "strpos --;");
116 write (3, "if (pc + 1 == char_group_end)");
117 write (4, "goto test_char_group_passed;");
119 write (3, "continue;");
126 write (1, "if (char_group_end == 0)");
127 write (2, "return false;");
128 write (1, "pc += {0};", "" + op.len);
129 write (1, "continue;");
131 /* Fail both inside and outside a char group */
132 write (1, "return false;");
136 if (strpos < string_end && (COND (str [strpos]))) {
139 if (char_group_end != 0)
140 goto test_char_group_passed;
145 * If we are inside a char group, the cases are ANDed
146 * together, so we have to continue checking the
147 * other cases, and we need to increase strpos after
149 * The char group is termined by a True, hence the
151 * FIXME: Optimize this.
154 if (char_group_end == 0 || (pc + 1 == char_group_end))
160 if (char_group_end == 0)
165 /* Fail both inside and outside a char group */
170 // Same as above, but use:
171 // - strpos > 0 instead of strpos < string_len
172 // - COND (str [strpos - 1]) instead of COND (str [strpos])
173 // - strpos -- instead of strpos ++
182 write (0, "// END OF GENERATED CODE");