1 //------------------------------------------------------------------------------
2 // <copyright file="RegexCompiler.cs" company="Microsoft">
3 // Copyright (c) Microsoft Corporation. All rights reserved.
5 //------------------------------------------------------------------------------
7 // The RegexCompiler class is internal to the Regex package.
8 // It translates a block of RegexCode to MSIL, and creates a
9 // subclass of the RegexRunner type.
12 #if !SILVERLIGHT && !FULL_AOT_RUNTIME
14 namespace System.Text.RegularExpressions {
16 using System.Collections;
17 using System.Collections.Generic;
18 using System.Threading;
19 using System.Reflection;
20 using System.Reflection.Emit;
21 using System.Security;
22 using System.Security.Policy;
23 using System.Security.Permissions;
24 using System.Diagnostics;
25 using System.Diagnostics.CodeAnalysis;
26 using System.Globalization;
27 using System.Runtime.Versioning;
32 * Because dynamic modules are expensive and not thread-safe, we create
33 * one dynamic module per-thread, and cache as much information about it
36 * While we're at it, we just create one RegexCompiler per thread
37 * as well, and have RegexCompiler inherit from RegexDynamicModule.
39 internal abstract class RegexCompiler {
40 // fields that never change (making them saves about 6% overall running time)
42 internal static FieldInfo _textbegF;
43 internal static FieldInfo _textendF;
44 internal static FieldInfo _textstartF;
45 internal static FieldInfo _textposF;
46 internal static FieldInfo _textF;
47 internal static FieldInfo _trackposF;
48 internal static FieldInfo _trackF;
49 internal static FieldInfo _stackposF;
50 internal static FieldInfo _stackF;
51 internal static FieldInfo _trackcountF;
55 internal static MethodInfo _ensurestorageM;
56 internal static MethodInfo _captureM;
57 internal static MethodInfo _transferM;
58 internal static MethodInfo _uncaptureM;
59 internal static MethodInfo _ismatchedM;
60 internal static MethodInfo _matchlengthM;
61 internal static MethodInfo _matchindexM;
62 internal static MethodInfo _isboundaryM;
63 internal static MethodInfo _isECMABoundaryM;
64 internal static MethodInfo _chartolowerM;
65 internal static MethodInfo _getcharM;
66 internal static MethodInfo _crawlposM;
67 internal static MethodInfo _charInSetM;
68 internal static MethodInfo _getCurrentCulture;
69 internal static MethodInfo _getInvariantCulture;
70 internal static MethodInfo _checkTimeoutM;
72 internal static MethodInfo _dumpstateM;
75 internal ILGenerator _ilg;
77 // tokens representing local variables
78 internal LocalBuilder _textstartV;
79 internal LocalBuilder _textbegV;
80 internal LocalBuilder _textendV;
81 internal LocalBuilder _textposV;
82 internal LocalBuilder _textV;
83 internal LocalBuilder _trackposV;
84 internal LocalBuilder _trackV;
85 internal LocalBuilder _stackposV;
86 internal LocalBuilder _stackV;
87 internal LocalBuilder _tempV;
88 internal LocalBuilder _temp2V;
89 internal LocalBuilder _temp3V;
92 internal RegexCode _code; // the RegexCode object (used for debugging only)
93 internal int[] _codes; // the RegexCodes being translated
94 internal String[] _strings; // the stringtable associated with the RegexCodes
95 internal RegexPrefix _fcPrefix; // the possible first chars computed by RegexFCD
96 internal RegexBoyerMoore _bmPrefix; // a prefix as a boyer-moore machine
97 internal int _anchors; // the set of anchors
99 internal Label[] _labels; // a label for every operation in _codes
100 internal BacktrackNote[] _notes; // a list of the backtracking states to be generated
101 internal int _notecount; // true count of _notes (allocation grows exponentially)
102 internal int _trackcount; // count of backtracking states (used to reduce allocations)
104 internal Label _backtrack; // label for backtracking
107 internal int _regexopcode; // the current opcode being processed
108 internal int _codepos; // the current code being translated
109 internal int _backpos; // the current backtrack-note being translated
111 internal RegexOptions _options; // options
113 // special code fragments
114 internal int[] _uniquenote; // _notes indices for code that should be emitted <= once
115 internal int[] _goto; // indices for forward-jumps-through-switch (for allocations)
117 // indices for unique code fragments
118 internal const int stackpop = 0; // pop one
119 internal const int stackpop2 = 1; // pop two
120 internal const int stackpop3 = 2; // pop three
121 internal const int capback = 3; // uncapture
122 internal const int capback2 = 4; // uncapture 2
123 internal const int branchmarkback2 = 5; // back2 part of branchmark
124 internal const int lazybranchmarkback2 = 6; // back2 part of lazybranchmark
125 internal const int branchcountback2 = 7; // back2 part of branchcount
126 internal const int lazybranchcountback2 = 8; // back2 part of lazybranchcount
127 internal const int forejumpback = 9; // back part of forejump
128 internal const int uniquecount = 10;
130 static RegexCompiler() {
131 // <SECREVIEW> Regex only generates string manipulation, so this is ok.
135 new ReflectionPermission(PermissionState.Unrestricted).Assert();
139 _textbegF = RegexRunnerField("runtextbeg");
140 _textendF = RegexRunnerField("runtextend");
141 _textstartF = RegexRunnerField("runtextstart");
142 _textposF = RegexRunnerField("runtextpos");
143 _textF = RegexRunnerField("runtext");
144 _trackposF = RegexRunnerField("runtrackpos");
145 _trackF = RegexRunnerField("runtrack");
146 _stackposF = RegexRunnerField("runstackpos");
147 _stackF = RegexRunnerField("runstack");
148 _trackcountF = RegexRunnerField("runtrackcount");
151 _ensurestorageM = RegexRunnerMethod("EnsureStorage");
152 _captureM = RegexRunnerMethod("Capture");
153 _transferM = RegexRunnerMethod("TransferCapture");
154 _uncaptureM = RegexRunnerMethod("Uncapture");
155 _ismatchedM = RegexRunnerMethod("IsMatched");
156 _matchlengthM = RegexRunnerMethod("MatchLength");
157 _matchindexM = RegexRunnerMethod("MatchIndex");
158 _isboundaryM = RegexRunnerMethod("IsBoundary");
159 _charInSetM = RegexRunnerMethod("CharInClass");
160 _isECMABoundaryM= RegexRunnerMethod("IsECMABoundary");
161 _crawlposM = RegexRunnerMethod("Crawlpos");
162 _checkTimeoutM = RegexRunnerMethod("CheckTimeout");
164 _chartolowerM = typeof(Char).GetMethod("ToLower", new Type[] {typeof(Char), typeof(CultureInfo)});
165 _getcharM = typeof(String).GetMethod("get_Chars", new Type[] {typeof(int)});
166 _getCurrentCulture = typeof(CultureInfo).GetMethod("get_CurrentCulture");
167 _getInvariantCulture = typeof(CultureInfo).GetMethod("get_InvariantCulture");
171 _dumpstateM = RegexRunnerMethod("DumpState");
175 CodeAccessPermission.RevertAssert();
179 private static FieldInfo RegexRunnerField(String fieldname) {
180 return typeof(RegexRunner).GetField(fieldname, BindingFlags.NonPublic | BindingFlags.Public | BindingFlags.Instance | BindingFlags.Static);
183 private static MethodInfo RegexRunnerMethod(String methname) {
184 return typeof(RegexRunner).GetMethod(methname, BindingFlags.NonPublic | BindingFlags.Public | BindingFlags.Instance | BindingFlags.Static);
189 * Entry point to dynamically compile a regular expression. The expression is compiled to
190 * an in-memory assembly.
192 internal static RegexRunnerFactory Compile(RegexCode code, RegexOptions options) {
193 RegexLWCGCompiler c = new RegexLWCGCompiler();
194 RegexRunnerFactory factory;
196 // <SECREVIEW> Regex only generates string manipulation, so this is ok.
199 new ReflectionPermission(PermissionState.Unrestricted).Assert();
202 factory = c.FactoryInstanceFromCode(code, options);
205 CodeAccessPermission.RevertAssert();
211 * Compile regular expressions into an assembly on disk.
213 [ResourceExposure(ResourceScope.Machine)]
214 [ResourceConsumption(ResourceScope.Machine)]
215 [SuppressMessage("Microsoft.Security","CA2106:SecureAsserts", Justification="[....]: SECREVIEW : Regex only generates string manipulation, so this is OK")]
216 internal static void CompileToAssembly(RegexCompilationInfo[] regexes, AssemblyName an, CustomAttributeBuilder[] attribs, String resourceFile) {
217 RegexTypeCompiler c = new RegexTypeCompiler(an, attribs, resourceFile);
219 for (int i=0; i<regexes.Length; i++) {
220 if (regexes[i] == null) {
221 throw new ArgumentNullException("regexes", SR.GetString(SR.ArgumentNull_ArrayWithNullElements));
223 String pattern = regexes[i].Pattern;
224 RegexOptions options = regexes[i].Options;
226 if (regexes[i].Namespace.Length == 0)
227 fullname = regexes[i].Name;
229 fullname = regexes[i].Namespace + "." + regexes[i].Name;
231 TimeSpan mTimeout = regexes[i].MatchTimeout;
233 RegexTree tree = RegexParser.Parse(pattern, options);
234 RegexCode code = RegexWriter.Write(tree);
239 new ReflectionPermission(PermissionState.Unrestricted).Assert();
242 factory = c.FactoryTypeFromCode(code, options, fullname);
243 c.GenerateRegexType(pattern, options, fullname, regexes[i].IsPublic, code, tree, factory, mTimeout);
246 CodeAccessPermission.RevertAssert();
255 * Keeps track of an operation that needs to be referenced in the backtrack-jump
256 * switch table, and that needs backtracking code to be emitted (if flags != 0)
258 internal sealed class BacktrackNote {
259 internal BacktrackNote(int flags, Label label, int codepos) {
265 internal int _codepos;
267 internal Label _label;
271 * Adds a backtrack note to the list of them, and returns the index of the new
272 * note (which is also the index for the jump used by the switch table)
274 internal int AddBacktrackNote(int flags, Label l, int codepos) {
275 if (_notes == null || _notecount >= _notes.Length) {
276 BacktrackNote[] newnotes = new BacktrackNote[_notes == null ? 16 : _notes.Length * 2];
278 System.Array.Copy(_notes, 0, newnotes, 0, _notecount);
282 _notes[_notecount] = new BacktrackNote(flags, l, codepos);
288 * Adds a backtrack note for the current operation; creates a new label for
289 * where the code will be, and returns the switch index.
291 internal int AddTrack() {
292 return AddTrack(RegexCode.Back);
296 * Adds a backtrack note for the current operation; creates a new label for
297 * where the code will be, and returns the switch index.
299 internal int AddTrack(int flags) {
300 return AddBacktrackNote(flags, DefineLabel(), _codepos);
304 * Adds a switchtable entry for the specified position (for the forward
305 * logic; does not cause backtracking logic to be generated)
307 internal int AddGoto(int destpos) {
308 if (_goto[destpos] == -1)
309 _goto[destpos] = AddBacktrackNote(0, _labels[destpos], destpos);
311 return _goto[destpos];
315 * Adds a note for backtracking code that only needs to be generated once;
316 * if it's already marked to be generated, returns the switch index
317 * for the unique piece of code.
319 internal int AddUniqueTrack(int i) {
320 return AddUniqueTrack(i, RegexCode.Back);
324 * Adds a note for backtracking code that only needs to be generated once;
325 * if it's already marked to be generated, returns the switch index
326 * for the unique piece of code.
328 internal int AddUniqueTrack(int i, int flags) {
329 if (_uniquenote[i] == -1)
330 _uniquenote[i] = AddTrack(flags);
332 return _uniquenote[i];
336 * A macro for _ilg.DefineLabel
338 internal Label DefineLabel() {
339 return _ilg.DefineLabel();
343 * A macro for _ilg.MarkLabel
345 internal void MarkLabel(Label l) {
350 * Returns the ith operand of the current operation
352 internal int Operand(int i) {
353 return _codes[_codepos + i + 1];
357 * True if the current operation is marked for the leftward direction
359 internal bool IsRtl() {
360 return(_regexopcode & RegexCode.Rtl) != 0;
364 * True if the current operation is marked for case insensitive operation
366 internal bool IsCi() {
367 return(_regexopcode & RegexCode.Ci) != 0;
372 * True if we need to do the backtrack logic for the current operation
374 internal bool IsBack() {
375 return(_regexopcode & RegexCode.Back) != 0;
379 * True if we need to do the second-backtrack logic for the current operation
381 internal bool IsBack2() {
382 return(_regexopcode & RegexCode.Back2) != 0;
387 * Returns the raw regex opcode (masking out Back and Rtl)
389 internal int Code() {
390 return _regexopcode & RegexCode.Mask;
393 internal void Ldstr(string str) {
394 _ilg.Emit(OpCodes.Ldstr, str);
398 * A macro for the various forms of Ldc
400 internal void Ldc(int i) {
401 if (i <= 127 && i >= -128)
402 _ilg.Emit(OpCodes.Ldc_I4_S, (byte)i);
404 _ilg.Emit(OpCodes.Ldc_I4, i);
407 internal void LdcI8(long i) {
408 if (i <= Int32.MaxValue && i >= Int32.MinValue) {
410 _ilg.Emit(OpCodes.Conv_I8);
412 _ilg.Emit(OpCodes.Ldc_I8, i);
417 * A macro for _ilg.Emit(OpCodes.Dup)
419 internal void Dup() {
420 _ilg.Emit(OpCodes.Dup);
424 * A macro for _ilg.Emit(OpCodes.Ret)
426 internal void Ret() {
427 _ilg.Emit(OpCodes.Ret);
431 * A macro for _ilg.Emit(OpCodes.Pop)
433 internal void Pop() {
434 _ilg.Emit(OpCodes.Pop);
438 * A macro for _ilg.Emit(OpCodes.Add)
440 internal void Add() {
441 _ilg.Emit(OpCodes.Add);
445 * A macro for _ilg.Emit(OpCodes.Add); a true flag can turn it into a Sub
447 internal void Add(bool negate) {
449 _ilg.Emit(OpCodes.Sub);
451 _ilg.Emit(OpCodes.Add);
455 * A macro for _ilg.Emit(OpCodes.Sub)
457 internal void Sub() {
458 _ilg.Emit(OpCodes.Sub);
462 * A macro for _ilg.Emit(OpCodes.Sub); a true flag can turn it into a Add
464 internal void Sub(bool negate) {
466 _ilg.Emit(OpCodes.Add);
468 _ilg.Emit(OpCodes.Sub);
472 * A macro for _ilg.Emit(OpCodes.Ldloc);
474 internal void Ldloc(LocalBuilder lt) {
475 _ilg.Emit(OpCodes.Ldloc_S, lt);
479 * A macro for _ilg.Emit(OpCodes.Stloc);
481 internal void Stloc(LocalBuilder lt) {
482 _ilg.Emit(OpCodes.Stloc_S, lt);
486 * A macro for _ilg.Emit(OpCodes.Ldarg_0);
488 internal void Ldthis() {
489 _ilg.Emit(OpCodes.Ldarg_0);
493 * A macro for Ldthis(); Ldfld();
495 internal void Ldthisfld(FieldInfo ft) {
497 _ilg.Emit(OpCodes.Ldfld, ft);
501 * A macro for Ldthis(); Ldfld(); Stloc();
503 internal void Mvfldloc(FieldInfo ft, LocalBuilder lt) {
509 * A macro for Ldthis(); Ldthisfld(); Stloc();
511 internal void Mvlocfld(LocalBuilder lt, FieldInfo ft) {
518 * A macro for _ilg.Emit(OpCodes.Stfld);
520 internal void Stfld(FieldInfo ft) {
521 _ilg.Emit(OpCodes.Stfld, ft);
525 * A macro for _ilg.Emit(OpCodes.Callvirt);
527 internal void Callvirt(MethodInfo mt) {
528 _ilg.Emit(OpCodes.Callvirt, mt);
532 * A macro for _ilg.Emit(OpCodes.Call);
534 internal void Call(MethodInfo mt) {
535 _ilg.Emit(OpCodes.Call, mt);
539 * A macro for _ilg.Emit(OpCodes.Newobj);
541 internal void Newobj(ConstructorInfo ct) {
542 _ilg.Emit(OpCodes.Newobj, ct);
546 * A macro for _ilg.Emit(OpCodes.Brfalse) (long form)
548 internal void BrfalseFar(Label l) {
549 _ilg.Emit(OpCodes.Brfalse, l);
553 * A macro for _ilg.Emit(OpCodes.Brtrue) (long form)
555 internal void BrtrueFar(Label l) {
556 _ilg.Emit(OpCodes.Brtrue, l);
560 * A macro for _ilg.Emit(OpCodes.Br) (long form)
562 internal void BrFar(Label l) {
563 _ilg.Emit(OpCodes.Br, l);
567 * A macro for _ilg.Emit(OpCodes.Ble) (long form)
569 internal void BleFar(Label l) {
570 _ilg.Emit(OpCodes.Ble, l);
574 * A macro for _ilg.Emit(OpCodes.Blt) (long form)
576 internal void BltFar(Label l) {
577 _ilg.Emit(OpCodes.Blt, l);
581 * A macro for _ilg.Emit(OpCodes.Bge) (long form)
583 internal void BgeFar(Label l) {
584 _ilg.Emit(OpCodes.Bge, l);
588 * A macro for _ilg.Emit(OpCodes.Bgt) (long form)
590 internal void BgtFar(Label l) {
591 _ilg.Emit(OpCodes.Bgt, l);
595 * A macro for _ilg.Emit(OpCodes.Bne) (long form)
597 internal void BneFar(Label l) {
598 _ilg.Emit(OpCodes.Bne_Un, l);
602 * A macro for _ilg.Emit(OpCodes.Beq) (long form)
604 internal void BeqFar(Label l) {
605 _ilg.Emit(OpCodes.Beq, l);
609 * A macro for _ilg.Emit(OpCodes.Brfalse_S) (short jump)
611 internal void Brfalse(Label l) {
612 _ilg.Emit(OpCodes.Brfalse_S, l);
616 * A macro for _ilg.Emit(OpCodes.Br_S) (short jump)
618 internal void Br(Label l) {
619 _ilg.Emit(OpCodes.Br_S, l);
623 * A macro for _ilg.Emit(OpCodes.Ble_S) (short jump)
625 internal void Ble(Label l) {
626 _ilg.Emit(OpCodes.Ble_S, l);
630 * A macro for _ilg.Emit(OpCodes.Blt_S) (short jump)
632 internal void Blt(Label l) {
633 _ilg.Emit(OpCodes.Blt_S, l);
637 * A macro for _ilg.Emit(OpCodes.Bge_S) (short jump)
639 internal void Bge(Label l) {
640 _ilg.Emit(OpCodes.Bge_S, l);
644 * A macro for _ilg.Emit(OpCodes.Bgt_S) (short jump)
646 internal void Bgt(Label l) {
647 _ilg.Emit(OpCodes.Bgt_S, l);
651 * A macro for _ilg.Emit(OpCodes.Bleun_S) (short jump)
653 internal void Bgtun(Label l) {
654 _ilg.Emit(OpCodes.Bgt_Un_S, l);
658 * A macro for _ilg.Emit(OpCodes.Bne_S) (short jump)
660 internal void Bne(Label l) {
661 _ilg.Emit(OpCodes.Bne_Un_S, l);
665 * A macro for _ilg.Emit(OpCodes.Beq_S) (short jump)
667 internal void Beq(Label l) {
668 _ilg.Emit(OpCodes.Beq_S, l);
672 * A macro for the Ldlen instruction
674 internal void Ldlen() {
675 _ilg.Emit(OpCodes.Ldlen);
679 * Loads the char to the right of the current position
681 internal void Rightchar() {
688 * Loads the char to the right of the current position and advances the current position
690 internal void Rightcharnext() {
701 * Loads the char to the left of the current position
703 internal void Leftchar() {
712 * Loads the char to the left of the current position and advances (leftward)
714 internal void Leftcharnext() {
725 * Creates a backtrack note and pushes the switch index it on the tracking stack
727 internal void Track() {
734 * Pushes the current switch index on the tracking stack so the backtracking
735 * logic will be repeated again next time we backtrack here.
740 internal void Trackagain() {
747 * Saves the value of a local variable on the tracking stack
749 internal void PushTrack(LocalBuilder lt) {
756 * Creates a backtrack note for a piece of code that should only be generated once,
757 * and emits code that pushes the switch index on the backtracking stack.
759 internal void TrackUnique(int i) {
761 Ldc(AddUniqueTrack(i));
766 * Creates a second-backtrack note for a piece of code that should only be
767 * generated once, and emits code that pushes the switch index on the
768 * backtracking stack.
770 internal void TrackUnique2(int i) {
772 Ldc(AddUniqueTrack(i, RegexCode.Back2));
777 * Prologue to code that will push an element on the tracking stack
779 internal void ReadyPushTrack() {
780 _ilg.Emit(OpCodes.Ldloc_S, _trackV);
781 _ilg.Emit(OpCodes.Ldloc_S, _trackposV);
782 _ilg.Emit(OpCodes.Ldc_I4_1);
783 _ilg.Emit(OpCodes.Sub);
784 _ilg.Emit(OpCodes.Dup);
785 _ilg.Emit(OpCodes.Stloc_S, _trackposV);
789 * Pops an element off the tracking stack (leave it on the operand stack)
791 internal void PopTrack() {
792 _ilg.Emit(OpCodes.Ldloc_S, _trackV);
793 _ilg.Emit(OpCodes.Ldloc_S, _trackposV);
794 _ilg.Emit(OpCodes.Dup);
795 _ilg.Emit(OpCodes.Ldc_I4_1);
796 _ilg.Emit(OpCodes.Add);
797 _ilg.Emit(OpCodes.Stloc_S, _trackposV);
798 _ilg.Emit(OpCodes.Ldelem_I4);
802 * Retrieves the top entry on the tracking stack without popping
804 internal void TopTrack() {
805 _ilg.Emit(OpCodes.Ldloc_S, _trackV);
806 _ilg.Emit(OpCodes.Ldloc_S, _trackposV);
807 _ilg.Emit(OpCodes.Ldelem_I4);
811 * Saves the value of a local variable on the grouping stack
813 internal void PushStack(LocalBuilder lt) {
815 _ilg.Emit(OpCodes.Ldloc_S, lt);
820 * Prologue to code that will replace the ith element on the grouping stack
822 internal void ReadyReplaceStack(int i) {
823 _ilg.Emit(OpCodes.Ldloc_S, _stackV);
824 _ilg.Emit(OpCodes.Ldloc_S, _stackposV);
827 _ilg.Emit(OpCodes.Add);
832 * Prologue to code that will push an element on the grouping stack
834 internal void ReadyPushStack() {
835 _ilg.Emit(OpCodes.Ldloc_S, _stackV);
836 _ilg.Emit(OpCodes.Ldloc_S, _stackposV);
837 _ilg.Emit(OpCodes.Ldc_I4_1);
838 _ilg.Emit(OpCodes.Sub);
839 _ilg.Emit(OpCodes.Dup);
840 _ilg.Emit(OpCodes.Stloc_S, _stackposV);
844 * Retrieves the top entry on the stack without popping
846 internal void TopStack() {
847 _ilg.Emit(OpCodes.Ldloc_S, _stackV);
848 _ilg.Emit(OpCodes.Ldloc_S, _stackposV);
849 _ilg.Emit(OpCodes.Ldelem_I4);
853 * Pops an element off the grouping stack (leave it on the operand stack)
855 internal void PopStack() {
856 _ilg.Emit(OpCodes.Ldloc_S, _stackV);
857 _ilg.Emit(OpCodes.Ldloc_S, _stackposV);
858 _ilg.Emit(OpCodes.Dup);
859 _ilg.Emit(OpCodes.Ldc_I4_1);
860 _ilg.Emit(OpCodes.Add);
861 _ilg.Emit(OpCodes.Stloc_S, _stackposV);
862 _ilg.Emit(OpCodes.Ldelem_I4);
866 * Pops 1 element off the grouping stack and discards it
868 internal void PopDiscardStack() {
873 * Pops i elements off the grouping stack and discards them
875 internal void PopDiscardStack(int i) {
876 _ilg.Emit(OpCodes.Ldloc_S, _stackposV);
878 _ilg.Emit(OpCodes.Add);
879 _ilg.Emit(OpCodes.Stloc_S, _stackposV);
883 * Epilogue to code that will replace an element on a stack (use Ld* in between)
885 internal void DoReplace() {
886 _ilg.Emit(OpCodes.Stelem_I4);
890 * Epilogue to code that will push an element on a stack (use Ld* in between)
892 internal void DoPush() {
893 _ilg.Emit(OpCodes.Stelem_I4);
897 * Jump to the backtracking switch
899 internal void Back() {
900 _ilg.Emit(OpCodes.Br, _backtrack);
904 * Branch to the MSIL corresponding to the regex code at i
906 * A trick: since track and stack space is gobbled up unboundedly
907 * only as a result of branching backwards, this is where we check
908 * for sufficient space and trigger reallocations.
910 * If the "goto" is backwards, we generate code that checks
911 * available space against the amount of space that would be needed
912 * in the worst case by code that will only go forward; if there's
913 * not enough, we push the destination on the tracking stack, then
914 * we jump to the place where we invoke the allocator.
916 * Since forward gotos pose no threat, they just turn into a Br.
918 internal void Goto(int i) {
920 Label l1 = DefineLabel();
922 // When going backwards, ensure enough space.
924 Ldc(_trackcount * 4);
927 Ldc(_trackcount * 3);
941 * Returns the position of the next operation in the regex code, taking
942 * into account the different numbers of arguments taken by operations
944 internal int NextCodepos() {
945 return _codepos + RegexCode.OpcodeSize(_codes[_codepos]);
949 * The label for the next (forward) operation
951 internal Label AdvanceLabel() {
952 return _labels[NextCodepos()];
956 * Goto the next (forward) operation
958 internal void Advance() {
959 _ilg.Emit(OpCodes.Br, AdvanceLabel());
962 internal void CallToLower()
964 if ((_options & RegexOptions.CultureInvariant) != 0)
965 Call(_getInvariantCulture);
967 Call(_getCurrentCulture);
973 * Generates the first section of the MSIL. This section contains all
974 * the forward logic, and corresponds directly to the regex codes.
976 * In the absence of backtracking, this is all we would need.
978 internal void GenerateForwardSection() {
981 _labels = new Label[_codes.Length];
982 _goto = new int[_codes.Length];
986 for (codepos = 0; codepos < _codes.Length; codepos += RegexCode.OpcodeSize(_codes[codepos])) {
988 _labels[codepos] = _ilg.DefineLabel();
991 _uniquenote = new int[uniquecount];
992 for (int i = 0; i < uniquecount; i++)
995 // emit variable initializers
997 Mvfldloc(_textF, _textV);
998 Mvfldloc(_textstartF, _textstartV);
999 Mvfldloc(_textbegF, _textbegV);
1000 Mvfldloc(_textendF, _textendV);
1001 Mvfldloc(_textposF, _textposV);
1002 Mvfldloc(_trackF, _trackV);
1003 Mvfldloc(_trackposF, _trackposV);
1004 Mvfldloc(_stackF, _stackV);
1005 Mvfldloc(_stackposF, _stackposV);
1009 for (codepos = 0; codepos < _codes.Length; codepos += RegexCode.OpcodeSize(_codes[codepos])) {
1010 MarkLabel(_labels[codepos]);
1012 _regexopcode = _codes[codepos];
1018 * Generates the middle section of the MSIL. This section contains the
1019 * big switch jump that allows us to simulate a stack of addresses,
1020 * and it also contains the calls that expand the tracking and the
1021 * grouping stack when they get too full.
1023 internal void GenerateMiddleSection() {
1024 #pragma warning disable 219
1025 Label l1 = DefineLabel();
1026 #pragma warning restore 219
1031 MarkLabel(_backtrack);
1033 // first call EnsureStorage
1034 Mvlocfld(_trackposV, _trackposF);
1035 Mvlocfld(_stackposV, _stackposF);
1037 Callvirt(_ensurestorageM);
1038 Mvfldloc(_trackposF, _trackposV);
1039 Mvfldloc(_stackposF, _stackposV);
1040 Mvfldloc(_trackF, _trackV);
1041 Mvfldloc(_stackF, _stackV);
1046 table = new Label[_notecount];
1047 for (i = 0; i < _notecount; i++)
1048 table[i] = _notes[i]._label;
1050 _ilg.Emit(OpCodes.Switch, table);
1055 * Generates the last section of the MSIL. This section contains all of
1056 * the backtracking logic.
1058 internal void GenerateBacktrackSection() {
1061 for (i = 0; i < _notecount; i++) {
1062 BacktrackNote n = _notes[i];
1063 if (n._flags != 0) {
1064 _ilg.MarkLabel(n._label);
1065 _codepos = n._codepos;
1067 _regexopcode = _codes[n._codepos] | n._flags;
1074 * Generates FindFirstChar
1076 // !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
1077 // !!!! This function must be kept synchronized with FindFirstChar in !!!!
1078 // !!!! RegexInterpreter.cs !!!!
1079 // !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
1080 internal void GenerateFindFirstChar() {
1081 _textposV = DeclareInt();
1082 _textV = DeclareString();
1083 _tempV = DeclareInt();
1084 _temp2V = DeclareInt();
1086 if (0 != (_anchors & (RegexFCD.Beginning | RegexFCD.Start | RegexFCD.EndZ | RegexFCD.End))) {
1087 if (!_code._rightToLeft) {
1088 if (0 != (_anchors & RegexFCD.Beginning)) {
1089 Label l1 = DefineLabel();
1090 Ldthisfld(_textposF);
1091 Ldthisfld(_textbegF);
1094 Ldthisfld(_textendF);
1101 if (0 != (_anchors & RegexFCD.Start)) {
1102 Label l1 = DefineLabel();
1103 Ldthisfld(_textposF);
1104 Ldthisfld(_textstartF);
1107 Ldthisfld(_textendF);
1114 if (0 != (_anchors & RegexFCD.EndZ)) {
1115 Label l1 = DefineLabel();
1116 Ldthisfld(_textposF);
1117 Ldthisfld(_textendF);
1122 Ldthisfld(_textendF);
1129 if (0 != (_anchors & RegexFCD.End)) {
1130 Label l1 = DefineLabel();
1131 Ldthisfld(_textposF);
1132 Ldthisfld(_textendF);
1135 Ldthisfld(_textendF);
1141 if (0 != (_anchors & RegexFCD.End)) {
1142 Label l1 = DefineLabel();
1143 Ldthisfld(_textposF);
1144 Ldthisfld(_textendF);
1147 Ldthisfld(_textbegF);
1154 if (0 != (_anchors & RegexFCD.EndZ)) {
1155 Label l1 = DefineLabel();
1156 Label l2 = DefineLabel();
1157 Ldthisfld(_textposF);
1158 Ldthisfld(_textendF);
1162 Ldthisfld(_textposF);
1163 Ldthisfld(_textendF);
1166 Ldthisfld(_textposF);
1167 Callvirt(_getcharM);
1172 Ldthisfld(_textbegF);
1179 if (0 != (_anchors & RegexFCD.Start)) {
1180 Label l1 = DefineLabel();
1181 Ldthisfld(_textposF);
1182 Ldthisfld(_textstartF);
1185 Ldthisfld(_textbegF);
1192 if (0 != (_anchors & RegexFCD.Beginning)) {
1193 Label l1 = DefineLabel();
1194 Ldthisfld(_textposF);
1195 Ldthisfld(_textbegF);
1198 Ldthisfld(_textbegF);
1210 else if (_bmPrefix != null && _bmPrefix._negativeUnicode == null) {
1211 // Compiled Boyer-Moore string matching
1215 LocalBuilder chV = _tempV;
1216 LocalBuilder testV = _tempV;
1217 LocalBuilder limitV = _temp2V;
1218 Label lDefaultAdvance = DefineLabel();
1219 Label lAdvance = DefineLabel();
1220 Label lFail = DefineLabel();
1221 Label lStart = DefineLabel();
1222 #pragma warning disable 219
1223 Label lOutOfRange = DefineLabel();
1224 #pragma warning restore 219
1225 Label lPartialMatch = DefineLabel();
1234 if (!_code._rightToLeft) {
1236 last = _bmPrefix._pattern.Length - 1;
1239 beforefirst = _bmPrefix._pattern.Length;
1243 chLast = _bmPrefix._pattern[last];
1245 Mvfldloc(_textF, _textV);
1246 if (!_code._rightToLeft)
1247 Ldthisfld(_textendF);
1249 Ldthisfld(_textbegF);
1252 Ldthisfld(_textposF);
1253 if (!_code._rightToLeft) {
1254 Ldc(_bmPrefix._pattern.Length - 1);
1258 Ldc(_bmPrefix._pattern.Length);
1264 MarkLabel(lDefaultAdvance);
1266 if (!_code._rightToLeft)
1267 Ldc(_bmPrefix._pattern.Length);
1269 Ldc(-_bmPrefix._pattern.Length);
1271 MarkLabel(lAdvance);
1281 if (!_code._rightToLeft)
1287 if (_bmPrefix._caseInsensitive)
1293 BeqFar(lPartialMatch);
1296 Ldc(_bmPrefix._lowASCII);
1300 Ldc(_bmPrefix._highASCII - _bmPrefix._lowASCII);
1301 Bgtun(lDefaultAdvance);
1303 table = new Label[_bmPrefix._highASCII - _bmPrefix._lowASCII + 1];
1305 for (i = _bmPrefix._lowASCII; i <= _bmPrefix._highASCII; i++) {
1306 if (_bmPrefix._negativeASCII[i] == beforefirst)
1307 table[i - _bmPrefix._lowASCII] = lDefaultAdvance;
1309 table[i - _bmPrefix._lowASCII] = DefineLabel();
1313 _ilg.Emit(OpCodes.Switch, table);
1315 for (i = _bmPrefix._lowASCII; i <= _bmPrefix._highASCII; i++) {
1316 if (_bmPrefix._negativeASCII[i] == beforefirst)
1319 MarkLabel(table[i - _bmPrefix._lowASCII]);
1321 Ldc(_bmPrefix._negativeASCII[i]);
1325 MarkLabel(lPartialMatch);
1330 for (i = _bmPrefix._pattern.Length - 2; i >= 0; i--) {
1331 Label lNext = DefineLabel();
1334 if (!_code._rightToLeft)
1337 charindex = _bmPrefix._pattern.Length - 1 - i;
1342 Sub(_code._rightToLeft);
1345 Callvirt(_getcharM);
1346 if (_bmPrefix._caseInsensitive)
1349 Ldc(_bmPrefix._pattern[charindex]);
1351 Ldc(_bmPrefix._positive[charindex]);
1360 if (_code._rightToLeft) {
1371 if (!_code._rightToLeft)
1372 Ldthisfld(_textendF);
1374 Ldthisfld(_textbegF);
1379 else if (_fcPrefix == null) {
1384 LocalBuilder cV = _temp2V;
1385 #pragma warning disable 219
1386 LocalBuilder chV = _tempV;
1387 #pragma warning restore 219
1388 Label l1 = DefineLabel();
1389 Label l2 = DefineLabel();
1390 Label l3 = DefineLabel();
1391 Label l4 = DefineLabel();
1392 Label l5 = DefineLabel();
1394 Mvfldloc(_textposF, _textposV);
1395 Mvfldloc(_textF, _textV);
1397 if (!_code._rightToLeft) {
1398 Ldthisfld(_textendF);
1403 Ldthisfld(_textbegF);
1419 if (_code._rightToLeft)
1424 if (_fcPrefix.CaseInsensitive)
1427 if (!RegexCharClass.IsSingleton(_fcPrefix.Prefix)) {
1428 Ldstr(_fcPrefix.Prefix);
1434 Ldc(RegexCharClass.SingletonChar(_fcPrefix.Prefix));
1442 if (!RegexCharClass.IsSingleton(_fcPrefix.Prefix))
1452 /* // CURRENTLY DISABLED
1453 // If for some reason we have a prefix we didn't use, use it now.
1455 if (_bmPrefix != null) {
1456 if (!_code._rightToLeft) {
1457 Ldthisfld(_textendF);
1462 Ldthisfld(_textbegF);
1465 Ldc(_bmPrefix._pattern.Length - 1);
1468 for (int i = 1; i < _bmPrefix._pattern.Length; i++) {
1471 if (!_code._rightToLeft) {
1479 Callvirt(_getcharM);
1480 if (!_code._rightToLeft)
1481 Ldc(_bmPrefix._pattern[i]);
1483 Ldc(_bmPrefix._pattern[_bmPrefix._pattern.Length - 1 - i]);
1491 Sub(_code._rightToLeft);
1497 Mvlocfld(_textposV, _textposF);
1508 * Generates a very simple method that sets the _trackcount field.
1510 internal void GenerateInitTrackCount() {
1513 Stfld(_trackcountF);
1518 * Declares a local int
1520 internal LocalBuilder DeclareInt() {
1521 return _ilg.DeclareLocal(typeof(int));
1525 * Declares a local int array
1527 internal LocalBuilder DeclareIntArray() {
1528 return _ilg.DeclareLocal(typeof(int[]));
1532 * Declares a local string
1534 internal LocalBuilder DeclareString() {
1535 return _ilg.DeclareLocal(typeof(string));
1539 * Generates the code for "RegexRunner.Go"
1541 internal void GenerateGo() {
1542 // declare some locals
1544 _textposV = DeclareInt();
1545 _textV = DeclareString();
1546 _trackposV = DeclareInt();
1547 _trackV = DeclareIntArray();
1548 _stackposV = DeclareInt();
1549 _stackV = DeclareIntArray();
1550 _tempV = DeclareInt();
1551 _temp2V = DeclareInt();
1552 _temp3V = DeclareInt();
1553 _textbegV = DeclareInt();
1554 _textendV = DeclareInt();
1555 _textstartV = DeclareInt();
1557 // clear some tables
1563 // globally used labels
1565 _backtrack = DefineLabel();
1569 GenerateForwardSection();
1570 GenerateMiddleSection();
1571 GenerateBacktrackSection();
1576 * Some simple debugging stuff
1578 internal static MethodInfo _debugWriteLine = typeof(Debug).GetMethod("WriteLine", new Type[] {typeof(string)});
1581 * Debug only: emit code to print out a message
1583 internal void Message(String str) {
1585 Call(_debugWriteLine);
1591 * The main translation function. It translates the logic for a single opcode at
1592 * the current position. The structure of this function exactly mirrors
1593 * the structure of the inner loop of RegexInterpreter.Go().
1595 * The C# code from RegexInterpreter.Go() that corresponds to each case is
1596 * included as a comment.
1598 * Note that since we're generating code, we can collapse many cases that are
1599 * dealt with one-at-a-time in RegexIntepreter. We can also unroll loops that
1600 * iterate over constant strings or sets.
1602 internal void GenerateOneCode() {
1604 if ((_options & RegexOptions.Debug) != 0) {
1605 Mvlocfld(_textposV, _textposF);
1606 Mvlocfld(_trackposV, _trackposF);
1607 Mvlocfld(_stackposV, _stackposF);
1609 Callvirt(_dumpstateM);
1610 StringBuilder sb = new StringBuilder();
1612 sb.AppendFormat("{0:D6} ", _backpos);
1615 sb.Append(_code.OpcodeDescription(_codepos));
1619 sb.Append(" Back2");
1620 Message(sb.ToString());
1624 // Before executing any RegEx code in the unrolled loop,
1625 // we try checking for the match timeout:
1628 Callvirt(_checkTimeoutM);
1630 // Now generate the IL for the RegEx code saved in _regexopcode.
1631 // We unroll the loop done by the RegexCompiler creating as very long method
1632 // that is longer if the pattern is longer:
1634 switch (_regexopcode) {
1635 case RegexCode.Stop:
1637 Mvlocfld(_textposV, _textposF); // update _textpos
1641 case RegexCode.Nothing:
1646 case RegexCode.Goto:
1647 //: Goto(Operand(0));
1651 case RegexCode.Testref:
1652 //: if (!_match.IsMatched(Operand(0)))
1656 Callvirt(_ismatchedM);
1657 BrfalseFar(_backtrack);
1660 case RegexCode.Lazybranch:
1661 //: Track(Textpos());
1662 PushTrack(_textposV);
1666 case RegexCode.Lazybranch | RegexCode.Back:
1668 //: Textto(Tracked(0));
1669 //: Goto(Operand(0));
1675 case RegexCode.Nullmark:
1681 TrackUnique(stackpop);
1684 case RegexCode.Setmark:
1685 //: Stack(Textpos());
1687 PushStack(_textposV);
1688 TrackUnique(stackpop);
1691 case RegexCode.Nullmark | RegexCode.Back:
1692 case RegexCode.Setmark | RegexCode.Back:
1699 case RegexCode.Getmark:
1701 //: Track(Stacked(0));
1702 //: Textto(Stacked(0));
1712 case RegexCode.Getmark | RegexCode.Back:
1714 //: Stack(Tracked(0));
1722 case RegexCode.Capturemark:
1723 //: if (!IsMatched(Operand(1)))
1726 //: if (Operand(1) != -1)
1727 //: TransferCapture(Operand(0), Operand(1), Stacked(0), Textpos());
1729 //: Capture(Operand(0), Stacked(0), Textpos());
1730 //: Track(Stacked(0));
1733 //: Capture(Operand(0), Stacked(0), Textpos());
1734 //: Track(Stacked(0));
1736 if (Operand(1) != -1) {
1739 Callvirt(_ismatchedM);
1740 BrfalseFar(_backtrack);
1746 if (Operand(1) != -1) {
1752 Callvirt(_transferM);
1759 Callvirt(_captureM);
1764 if (Operand(0) != -1 && Operand(1) != -1)
1765 TrackUnique(capback2);
1767 TrackUnique(capback);
1772 case RegexCode.Capturemark | RegexCode.Back:
1774 //: Stack(Tracked(0));
1776 //: if (Operand(0) != -1 && Operand(1) != -1)
1783 Callvirt(_uncaptureM);
1784 if (Operand(0) != -1 && Operand(1) != -1) {
1786 Callvirt(_uncaptureM);
1791 case RegexCode.Branchmark:
1794 //: if (Textpos() != Stacked(0))
1795 //: { // Nonempty match -> loop now
1796 //: Track(Stacked(0), Textpos()); // Save old mark, textpos
1797 //: Stack(Textpos()); // Make new mark
1798 //: Goto(Operand(0)); // Loop
1801 //: { // Empty match -> straight now
1802 //: Track2(Stacked(0)); // Save old mark
1803 //: Advance(1); // Straight
1805 //: continue Forward;
1807 LocalBuilder mark = _tempV;
1808 Label l1 = DefineLabel();
1812 Stloc(mark); // Stacked(0) -> temp
1815 Beq(l1); // mark == textpos -> branch
1819 PushTrack(_textposV);
1820 PushStack(_textposV);
1822 Goto(Operand(0)); // Goto(Operand(0))
1827 TrackUnique2(branchmarkback2);
1831 case RegexCode.Branchmark | RegexCode.Back:
1834 //: Textto(Tracked(1)); // Recall position
1835 //: Track2(Tracked(0)); // Save old mark
1841 // track spot 0 is already in place
1842 TrackUnique2(branchmarkback2);
1846 case RegexCode.Branchmark | RegexCode.Back2:
1848 //: Stack(Tracked(0)); // Recall old mark
1849 //: break Backward; // Backtrack
1857 case RegexCode.Lazybranchmark:
1859 //: int oldMarkPos = StackPeek();
1861 //: if (Textpos() != oldMarkPos) { // Nonempty match -> next loop
1862 //: { // Nonempty match -> next loop
1863 //: if (oldMarkPos != -1)
1864 //: Track(Stacked(0), Textpos()); // Save old mark, textpos
1866 //: TrackPush(Textpos(), Textpos());
1869 //: { // Empty match -> no loop
1870 //: Track2(Stacked(0)); // Save old mark
1873 //: continue Forward;
1875 LocalBuilder mark = _tempV;
1876 Label l1 = DefineLabel();
1877 Label l2 = DefineLabel();
1878 Label l3 = DefineLabel();
1882 Stloc(mark); // Stacked(0) -> temp
1884 // if (oldMarkPos != -1)
1887 Beq(l2); // mark == -1 -> branch
1892 PushTrack(_textposV);
1895 // if (Textpos() != mark)
1897 Beq(l1); // mark == textpos -> branch
1898 PushTrack(_textposV);
1900 Br(AdvanceLabel()); // Advance (near)
1903 ReadyPushStack(); // push the current textPos on the stack.
1904 // May be ignored by 'back2' or used by a true empty match.
1908 TrackUnique2(lazybranchmarkback2);
1913 case RegexCode.Lazybranchmark | RegexCode.Back:
1915 //: Track2(Tracked(0)); // Save old mark
1916 //: Stack(Textpos()); // Make new mark
1917 //: Textto(Tracked(1)); // Recall position
1918 //: Goto(Operand(0)); // Loop
1922 PushStack(_textposV);
1923 TrackUnique2(lazybranchmarkback2);
1927 case RegexCode.Lazybranchmark | RegexCode.Back2:
1930 //: Stack(Tracked(0)); // Recall old mark
1932 ReadyReplaceStack(0);
1938 case RegexCode.Nullcount:
1939 //: Stack(-1, Operand(0));
1947 TrackUnique(stackpop2);
1950 case RegexCode.Setcount:
1951 //: Stack(Textpos(), Operand(0));
1953 PushStack(_textposV);
1957 TrackUnique(stackpop2);
1961 case RegexCode.Nullcount | RegexCode.Back:
1962 case RegexCode.Setcount | RegexCode.Back:
1970 case RegexCode.Branchcount:
1972 //: int mark = Stacked(0);
1973 //: int count = Stacked(1);
1975 //: if (count >= Operand(1) || Textpos() == mark && count >= 0)
1976 //: { // Max loops or empty match -> straight now
1977 //: Track2(mark, count); // Save old mark, count
1978 //: Advance(2); // Straight
1981 //: { // Nonempty match -> count+loop now
1982 //: Track(mark); // remember mark
1983 //: Stack(Textpos(), count + 1); // Make new mark, incr count
1984 //: Goto(Operand(0)); // Loop
1986 //: continue Forward;
1988 LocalBuilder count = _tempV;
1989 LocalBuilder mark = _temp2V;
1990 Label l1 = DefineLabel();
1991 Label l2 = DefineLabel();
1994 Stloc(count); // count -> temp
1997 Stloc(mark); // mark -> temp2
2001 Bne(l1); // mark != textpos -> l1
2004 Bge(l2); // count >= 0 && mark == textpos -> l2
2009 Bge(l2); // count >= Operand(1) -> l2
2012 PushStack(_textposV);
2014 Ldloc(count); // mark already on track
2021 // if (count >= Operand(1) || Textpos() == mark)
2023 PushTrack(count); // mark already on track
2024 TrackUnique2(branchcountback2);
2028 case RegexCode.Branchcount | RegexCode.Back:
2031 //: if (Stacked(1) > 0) // Positive -> can go straight
2033 //: Textto(Stacked(0)); // Zap to mark
2034 //: Track2(Tracked(0), Stacked(1) - 1); // Save old mark, old count
2035 //: Advance(2); // Straight
2036 //: continue Forward;
2038 //: Stack(Tracked(0), Stacked(1) - 1); // recall old mark, old count
2042 LocalBuilder count = _tempV;
2043 Label l1 = DefineLabel();
2055 PushTrack(count); // Tracked(0) is alredy on the track
2056 TrackUnique2(branchcountback2);
2061 ReadyReplaceStack(0);
2069 case RegexCode.Branchcount | RegexCode.Back2:
2071 //: Stack(Tracked(0), Tracked(1)); // Recall old mark, old count
2072 //: break Backward; // Backtrack
2083 case RegexCode.Lazybranchcount:
2085 //: int mark = Stacked(0);
2086 //: int count = Stacked(1);
2089 //: { // Negative count -> loop now
2090 //: Track2(mark); // Save old mark
2091 //: Stack(Textpos(), count + 1); // Make new mark, incr count
2092 //: Goto(Operand(0)); // Loop
2095 //: { // Nonneg count or empty match -> straight now
2096 //: Track(mark, count, Textpos()); // Save mark, count, position
2099 LocalBuilder count = _tempV;
2100 LocalBuilder mark = _temp2V;
2101 Label l1 = DefineLabel();
2102 #pragma warning disable 219
2103 Label l2 = DefineLabel();
2104 Label l3 = _labels[NextCodepos()];
2105 #pragma warning restore 219
2108 Stloc(count); // count -> temp
2110 Stloc(mark); // mark -> temp2
2114 Bge(l1); // count >= 0 -> l1
2118 PushStack(_textposV);
2124 TrackUnique2(lazybranchcountback2);
2131 PushTrack(_textposV);
2136 case RegexCode.Lazybranchcount | RegexCode.Back:
2138 //: int mark = Tracked(0);
2139 //: int textpos = Tracked(2);
2140 //: if (Tracked(1) < Operand(1) && textpos != mark)
2141 //: { // Under limit and not empty match -> loop
2142 //: Textto(Tracked(2)); // Recall position
2143 //: Stack(Textpos(), Tracked(1) + 1); // Make new mark, incr count
2144 //: Track2(Tracked(0)); // Save old mark
2145 //: Goto(Operand(0)); // Loop
2146 //: continue Forward;
2150 //: Stack(Tracked(0), Tracked(1)); // Recall old mark, count
2151 //: break Backward; // backtrack
2154 Label l1 = DefineLabel();
2155 LocalBuilder cV = _tempV;
2162 Bge(l1); // Tracked(1) >= Operand(1) -> l1
2166 Beq(l1); // textpos == mark -> l1
2168 PushStack(_textposV);
2174 TrackUnique2(lazybranchcountback2);
2186 case RegexCode.Lazybranchcount | RegexCode.Back2:
2193 ReadyReplaceStack(1);
2196 ReadyReplaceStack(0);
2205 case RegexCode.Setjump:
2206 //: Stack(Trackpos(), Crawlpos());
2216 Callvirt(_crawlposM);
2218 TrackUnique(stackpop2);
2221 case RegexCode.Setjump | RegexCode.Back:
2228 case RegexCode.Backjump:
2230 //: Trackto(Stacked(0));
2231 //: while (Crawlpos() != Stacked(1))
2235 Label l1 = DefineLabel();
2236 Label l2 = DefineLabel();
2246 Callvirt(_crawlposM);
2251 Callvirt(_uncaptureM);
2254 Callvirt(_crawlposM);
2263 case RegexCode.Forejump:
2265 //: Trackto(Stacked(0));
2266 //: Track(Stacked(1));
2275 TrackUnique(forejumpback);
2278 case RegexCode.Forejump | RegexCode.Back:
2280 //: while (Crawlpos() != Tracked(0))
2284 Label l1 = DefineLabel();
2285 Label l2 = DefineLabel();
2291 Callvirt(_crawlposM);
2296 Callvirt(_uncaptureM);
2299 Callvirt(_crawlposM);
2309 //: if (Leftchars() > 0 && CharAt(Textpos() - 1) != '\n')
2312 Label l1 = _labels[NextCodepos()];
2323 //: if (Rightchars() > 0 && CharAt(Textpos()) != '\n')
2326 Label l1 = _labels[NextCodepos()];
2336 case RegexCode.Boundary:
2337 case RegexCode.Nonboundary:
2338 //: if (!IsBoundary(Textpos(), _textbeg, _textend))
2344 Callvirt(_isboundaryM);
2345 if (Code() == RegexCode.Boundary)
2346 BrfalseFar(_backtrack);
2348 BrtrueFar(_backtrack);
2351 case RegexCode.ECMABoundary:
2352 case RegexCode.NonECMABoundary:
2353 //: if (!IsECMABoundary(Textpos(), _textbeg, _textend))
2359 Callvirt(_isECMABoundaryM);
2360 if (Code() == RegexCode.ECMABoundary)
2361 BrfalseFar(_backtrack);
2363 BrtrueFar(_backtrack);
2366 case RegexCode.Beginning:
2367 //: if (Leftchars() > 0)
2374 case RegexCode.Start:
2375 //: if (Textpos() != Textstart())
2378 Ldthisfld(_textstartF);
2382 case RegexCode.EndZ:
2383 //: if (Rightchars() > 1 || Rightchars() == 1 && CharAt(Textpos()) != '\n')
2392 Bge(_labels[NextCodepos()]);
2399 //: if (Rightchars() > 0)
2407 case RegexCode.Notone:
2409 case RegexCode.One | RegexCode.Rtl:
2410 case RegexCode.Notone | RegexCode.Rtl:
2411 case RegexCode.Set | RegexCode.Rtl:
2412 case RegexCode.One | RegexCode.Ci:
2413 case RegexCode.Notone | RegexCode.Ci:
2414 case RegexCode.Set | RegexCode.Ci:
2415 case RegexCode.One | RegexCode.Ci | RegexCode.Rtl:
2416 case RegexCode.Notone | RegexCode.Ci | RegexCode.Rtl:
2417 case RegexCode.Set | RegexCode.Ci | RegexCode.Rtl:
2419 //: if (Rightchars() < 1 || Rightcharnext() != (char)Operand(0))
2437 if (Code() == RegexCode.Set) {
2439 Ldstr(_strings[Operand(0)]);
2442 BrfalseFar(_backtrack);
2446 if (Code() == RegexCode.One)
2453 case RegexCode.Multi:
2454 case RegexCode.Multi | RegexCode.Ci:
2463 //: String Str = _strings[Operand(0)];
2465 //: if (Rightchars() < (c = Str.Length))
2467 //: for (i = 0; c > 0; i++, c--)
2468 //: if (Str[i] != Rightcharnext())
2474 str = _strings[Operand(0)];
2482 // unroll the string
2483 for (i = 0; i < str.Length; i++) {
2490 Callvirt(_getcharM);
2506 case RegexCode.Multi | RegexCode.Rtl:
2507 case RegexCode.Multi | RegexCode.Ci | RegexCode.Rtl:
2508 //: String Str = _strings[Operand(0)];
2510 //: if (Leftchars() < (c = Str.Length))
2513 //: if (Str[--c] != Leftcharnext())
2519 str = _strings[Operand(0)];
2527 // unroll the string
2528 for (i = str.Length; i > 0;) {
2532 Ldc(str.Length - i);
2534 Callvirt(_getcharM);
2552 case RegexCode.Ref | RegexCode.Rtl:
2553 case RegexCode.Ref | RegexCode.Ci:
2554 case RegexCode.Ref | RegexCode.Ci | RegexCode.Rtl:
2555 //: int capnum = Operand(0);
2557 //: if (!_match.IsMatched(capnum)) {
2558 //: if (!RegexOptions.ECMAScript)
2561 //: if (Rightchars() < (c = _match.MatchLength(capnum)))
2563 //: for (j = _match.MatchIndex(capnum); c > 0; j++, c--)
2564 //: if (CharAt(j) != Rightcharnext())
2568 LocalBuilder lenV = _tempV;
2569 LocalBuilder indexV = _temp2V;
2570 Label l1 = DefineLabel();
2574 Callvirt(_ismatchedM);
2575 if ((_options & RegexOptions.ECMAScript) != 0)
2576 Brfalse(AdvanceLabel());
2578 BrfalseFar(_backtrack); // !IsMatched() -> back
2582 Callvirt(_matchlengthM);
2594 BgtFar(_backtrack); // Matchlength() > Rightchars() -> back
2598 Callvirt(_matchindexM);
2603 Stloc(indexV); // index += len
2608 Stloc(_textposV); // texpos += len
2613 Ble(AdvanceLabel());
2624 Callvirt(_getcharM);
2638 Callvirt(_getcharM);
2648 case RegexCode.Onerep:
2649 case RegexCode.Notonerep:
2650 case RegexCode.Setrep:
2651 case RegexCode.Onerep | RegexCode.Rtl:
2652 case RegexCode.Notonerep | RegexCode.Rtl:
2653 case RegexCode.Setrep | RegexCode.Rtl:
2654 case RegexCode.Onerep | RegexCode.Ci:
2655 case RegexCode.Notonerep | RegexCode.Ci:
2656 case RegexCode.Setrep | RegexCode.Ci:
2657 case RegexCode.Onerep | RegexCode.Ci | RegexCode.Rtl:
2658 case RegexCode.Notonerep | RegexCode.Ci | RegexCode.Rtl:
2659 case RegexCode.Setrep | RegexCode.Ci | RegexCode.Rtl:
2660 //: int c = Operand(1);
2661 //: if (Rightchars() < c)
2663 //: char ch = (char)Operand(0);
2665 //: if (Rightcharnext() != ch)
2668 LocalBuilder lenV = _tempV;
2669 Label l1 = DefineLabel();
2686 BgtFar(_backtrack); // Matchlength() > Rightchars() -> back
2691 Stloc(_textposV); // texpos += len
2714 Callvirt(_getcharM);
2718 if (Code() == RegexCode.Setrep) {
2719 Ldstr(_strings[Operand(0)]);
2722 BrfalseFar(_backtrack);
2726 if (Code() == RegexCode.Onerep)
2733 if (Code() == RegexCode.Setrep)
2741 case RegexCode.Oneloop:
2742 case RegexCode.Notoneloop:
2743 case RegexCode.Setloop:
2744 case RegexCode.Oneloop | RegexCode.Rtl:
2745 case RegexCode.Notoneloop | RegexCode.Rtl:
2746 case RegexCode.Setloop | RegexCode.Rtl:
2747 case RegexCode.Oneloop | RegexCode.Ci:
2748 case RegexCode.Notoneloop | RegexCode.Ci:
2749 case RegexCode.Setloop | RegexCode.Ci:
2750 case RegexCode.Oneloop | RegexCode.Ci | RegexCode.Rtl:
2751 case RegexCode.Notoneloop | RegexCode.Ci | RegexCode.Rtl:
2752 case RegexCode.Setloop | RegexCode.Ci | RegexCode.Rtl:
2753 //: int c = Operand(1);
2754 //: if (c > Rightchars())
2755 //: c = Rightchars();
2756 //: char ch = (char)Operand(0);
2758 //: for (i = c; i > 0; i--)
2760 //: if (Rightcharnext() != ch)
2767 //: Track(c - i - 1, Textpos() - 1);
2770 LocalBuilder cV = _tempV;
2771 LocalBuilder lenV = _temp2V;
2772 Label l1 = DefineLabel();
2773 Label l2 = DefineLabel();
2788 if (c != Int32.MaxValue) {
2789 Label l4 = DefineLabel();
2810 if (Code() == RegexCode.Setloop)
2822 if (Code() == RegexCode.Setloop) {
2823 Ldstr(_strings[Operand(0)]);
2830 if (Code() == RegexCode.Oneloop)
2844 Ble(AdvanceLabel());
2864 case RegexCode.Oneloop | RegexCode.Back:
2865 case RegexCode.Notoneloop | RegexCode.Back:
2866 case RegexCode.Setloop | RegexCode.Back:
2867 case RegexCode.Oneloop | RegexCode.Rtl | RegexCode.Back:
2868 case RegexCode.Notoneloop | RegexCode.Rtl | RegexCode.Back:
2869 case RegexCode.Setloop | RegexCode.Rtl | RegexCode.Back:
2870 case RegexCode.Oneloop | RegexCode.Ci | RegexCode.Back:
2871 case RegexCode.Notoneloop | RegexCode.Ci | RegexCode.Back:
2872 case RegexCode.Setloop | RegexCode.Ci | RegexCode.Back:
2873 case RegexCode.Oneloop | RegexCode.Ci | RegexCode.Rtl | RegexCode.Back:
2874 case RegexCode.Notoneloop | RegexCode.Ci | RegexCode.Rtl | RegexCode.Back:
2875 case RegexCode.Setloop | RegexCode.Ci | RegexCode.Rtl | RegexCode.Back:
2877 //: int i = Tracked(0);
2878 //: int pos = Tracked(1);
2881 //: Track(i - 1, pos - 1);
2889 BleFar(AdvanceLabel());
2904 case RegexCode.Onelazy:
2905 case RegexCode.Notonelazy:
2906 case RegexCode.Setlazy:
2907 case RegexCode.Onelazy | RegexCode.Rtl:
2908 case RegexCode.Notonelazy | RegexCode.Rtl:
2909 case RegexCode.Setlazy | RegexCode.Rtl:
2910 case RegexCode.Onelazy | RegexCode.Ci:
2911 case RegexCode.Notonelazy | RegexCode.Ci:
2912 case RegexCode.Setlazy | RegexCode.Ci:
2913 case RegexCode.Onelazy | RegexCode.Ci | RegexCode.Rtl:
2914 case RegexCode.Notonelazy | RegexCode.Ci | RegexCode.Rtl:
2915 case RegexCode.Setlazy | RegexCode.Ci | RegexCode.Rtl:
2916 //: int c = Operand(1);
2917 //: if (c > Rightchars())
2918 //: c = Rightchars();
2920 //: Track(c - 1, Textpos());
2922 LocalBuilder cV = _tempV;
2938 if (c != Int32.MaxValue) {
2939 Label l4 = DefineLabel();
2950 Ble(AdvanceLabel());
2956 PushTrack(_textposV);
2961 case RegexCode.Onelazy | RegexCode.Back:
2962 case RegexCode.Notonelazy | RegexCode.Back:
2963 case RegexCode.Setlazy | RegexCode.Back:
2964 case RegexCode.Onelazy | RegexCode.Rtl | RegexCode.Back:
2965 case RegexCode.Notonelazy | RegexCode.Rtl | RegexCode.Back:
2966 case RegexCode.Setlazy | RegexCode.Rtl | RegexCode.Back:
2967 case RegexCode.Onelazy | RegexCode.Ci | RegexCode.Back:
2968 case RegexCode.Notonelazy | RegexCode.Ci | RegexCode.Back:
2969 case RegexCode.Setlazy | RegexCode.Ci | RegexCode.Back:
2970 case RegexCode.Onelazy | RegexCode.Ci | RegexCode.Rtl | RegexCode.Back:
2971 case RegexCode.Notonelazy | RegexCode.Ci | RegexCode.Rtl | RegexCode.Back:
2972 case RegexCode.Setlazy | RegexCode.Ci | RegexCode.Rtl | RegexCode.Back:
2974 //: int pos = Tracked(1);
2976 //: if (Rightcharnext() != (char)Operand(0))
2978 //: int i = Tracked(0);
2980 //: Track(i - 1, pos + 1);
2995 if (Code() == RegexCode.Setlazy) {
2996 Ldstr(_strings[Operand(0)]);
2999 BrfalseFar(_backtrack);
3003 if (Code() == RegexCode.Onelazy)
3011 BleFar(AdvanceLabel());
3017 PushTrack(_textposV);
3023 throw new NotImplementedException(SR.GetString(SR.UnimplementedState));
3028 internal class RegexTypeCompiler : RegexCompiler {
3029 private static int _typeCount = 0;
3030 private static LocalDataStoreSlot _moduleSlot = Thread.AllocateDataSlot();
3032 private AssemblyBuilder _assembly;
3033 private ModuleBuilder _module;
3035 // state of the type builder
3036 private TypeBuilder _typebuilder;
3037 private MethodBuilder _methbuilder;
3039 [ResourceExposure(ResourceScope.Machine)]
3040 [ResourceConsumption(ResourceScope.Machine)]
3041 [SuppressMessage("Microsoft.Security","CA2106:SecureAsserts", Justification="[....]: SECREVIEW : Regex only generates string manipulation, so this is OK")]
3042 internal RegexTypeCompiler(AssemblyName an, CustomAttributeBuilder[] attribs, String resourceFile) {
3043 // SECREVIEW : Regex only generates string manipulation, so this is
3046 #if !DISABLE_CAS_USE
3047 new ReflectionPermission(PermissionState.Unrestricted).Assert();
3050 Debug.Assert(an != null, "AssemblyName should not be null");
3052 List<CustomAttributeBuilder> assemblyAttributes = new List<CustomAttributeBuilder>();
3054 ConstructorInfo transparencyCtor = typeof(SecurityTransparentAttribute).GetConstructor(Type.EmptyTypes);
3055 CustomAttributeBuilder transparencyAttribute = new CustomAttributeBuilder(transparencyCtor, new object[0]);
3056 assemblyAttributes.Add(transparencyAttribute);
3058 #if !DISABLE_CAS_USE
3059 ConstructorInfo securityRulesCtor = typeof(SecurityRulesAttribute).GetConstructor(new Type[] { typeof(SecurityRuleSet) });
3060 CustomAttributeBuilder securityRulesAttribute =
3061 new CustomAttributeBuilder(securityRulesCtor, new object[] { SecurityRuleSet.Level2 });
3062 assemblyAttributes.Add(securityRulesAttribute);
3065 _assembly = AppDomain.CurrentDomain.DefineDynamicAssembly(an, AssemblyBuilderAccess.RunAndSave, assemblyAttributes);
3066 _module = _assembly.DefineDynamicModule(an.Name + ".dll");
3068 if (attribs != null) {
3069 for (int i=0; i<attribs.Length; i++) {
3070 _assembly.SetCustomAttribute(attribs[i]);
3074 if (resourceFile != null) {
3076 // unmanaged resources are not supported
3077 throw new ArgumentOutOfRangeException("resourceFile");
3079 _assembly.DefineUnmanagedResource(resourceFile);
3084 CodeAccessPermission.RevertAssert();
3089 * The top-level driver. Initializes everything then calls the Generate* methods.
3091 internal Type FactoryTypeFromCode(RegexCode code, RegexOptions options, String typeprefix) {
3092 String runnertypename;
3093 String runnerfactoryname;
3098 _codes = code._codes;
3099 _strings = code._strings;
3100 _fcPrefix = code._fcPrefix;
3101 _bmPrefix = code._bmPrefix;
3102 _anchors = code._anchors;
3103 _trackcount = code._trackcount;
3106 // pick a name for the class
3107 int typenum = Interlocked.Increment(ref _typeCount);
3108 string typenumString = typenum.ToString(CultureInfo.InvariantCulture);
3109 runnertypename = typeprefix + "Runner" + typenumString ;
3110 runnerfactoryname = typeprefix + "Factory" + typenumString;
3112 // Generate a RegexRunner class
3113 // (blocks are simply illustrative)
3115 DefineType(runnertypename, false, typeof(RegexRunner));
3117 DefineMethod("Go", null);
3123 DefineMethod("FindFirstChar", typeof(bool));
3125 GenerateFindFirstChar();
3129 DefineMethod("InitTrackCount", null);
3131 GenerateInitTrackCount();
3135 runnertype = BakeType();
3138 // Generate a RegexRunnerFactory class
3140 DefineType(runnerfactoryname, false, typeof(RegexRunnerFactory));
3142 DefineMethod("CreateInstance", typeof(RegexRunner));
3144 GenerateCreateInstance(runnertype);
3148 factory = BakeType();
3154 internal void GenerateRegexType(String pattern, RegexOptions opts, String name, bool ispublic, RegexCode code, RegexTree tree, Type factory, TimeSpan matchTimeout) {
3155 FieldInfo patternF = RegexField("pattern");
3156 FieldInfo optionsF = RegexField("roptions");
3157 FieldInfo factoryF = RegexField("factory");
3158 FieldInfo capsF = RegexField("caps");
3159 FieldInfo capnamesF = RegexField("capnames");
3160 FieldInfo capslistF = RegexField("capslist");
3161 FieldInfo capsizeF = RegexField("capsize");
3162 FieldInfo internalMatchTimeoutF = RegexField("internalMatchTimeout");
3163 Type[] noTypeArray = new Type[0];
3164 ConstructorBuilder defCtorBuilder, tmoutCtorBuilder;
3166 DefineType(name, ispublic, typeof(Regex));
3168 // Define default constructor:
3169 _methbuilder = null;
3170 MethodAttributes ma = System.Reflection.MethodAttributes.Public;
3171 defCtorBuilder = _typebuilder.DefineConstructor(ma, CallingConventions.Standard, noTypeArray);
3172 _ilg = defCtorBuilder.GetILGenerator();
3174 // call base constructor
3176 _ilg.Emit(OpCodes.Call, typeof(Regex).GetConstructor(BindingFlags.Public | BindingFlags.NonPublic | BindingFlags.Instance,
3177 null, new Type[0], new ParameterModifier[0]));
3188 // Set timeout (no need to validate as it should have happened in RegexCompilationInfo):
3190 LdcI8(matchTimeout.Ticks);
3191 Call(typeof(TimeSpan).GetMethod("FromTicks", BindingFlags.Static | BindingFlags.Public));
3192 Stfld(internalMatchTimeoutF);
3196 Newobj(factory.GetConstructor(noTypeArray));
3200 if (code._caps != null)
3202 GenerateCreateType(typeof(Dictionary<Int32, Int32>), capsF, code._caps);
3204 GenerateCreateHashtable(capsF, code._caps);
3208 if (tree._capnames != null)
3210 GenerateCreateType(typeof(Dictionary<String, Int32>), capnamesF, tree._capnames);
3212 GenerateCreateHashtable(capnamesF, tree._capnames);
3217 if (tree._capslist != null) {
3219 Ldc(tree._capslist.Length);
3220 _ilg.Emit(OpCodes.Newarr, typeof(String)); // create new string array
3223 for (int i=0; i< tree._capslist.Length; i++) {
3224 Ldthisfld(capslistF);
3227 Ldstr(tree._capslist[i]);
3228 _ilg.Emit(OpCodes.Stelem_Ref);
3237 // set runnerref and replref by calling InitializeReferences()
3239 Call(typeof(Regex).GetMethod("InitializeReferences", BindingFlags.Instance | BindingFlags.Static | BindingFlags.Public | BindingFlags.NonPublic));
3245 // Constructor with the timeout parameter:
3246 _methbuilder = null;
3247 ma = System.Reflection.MethodAttributes.Public;
3248 tmoutCtorBuilder = _typebuilder.DefineConstructor(ma, CallingConventions.Standard, new Type[] { typeof(TimeSpan) });
3249 _ilg = tmoutCtorBuilder.GetILGenerator();
3251 // Call the default constructor:
3253 _ilg.Emit(OpCodes.Call, defCtorBuilder);
3255 // Validate timeout:
3256 _ilg.Emit(OpCodes.Ldarg_1);
3257 Call(typeof(Regex).GetMethod("ValidateMatchTimeout", BindingFlags.Static | BindingFlags.Public | BindingFlags.NonPublic));
3261 _ilg.Emit(OpCodes.Ldarg_1);
3262 Stfld(internalMatchTimeoutF);
3268 // bake the constructor and type, then save the assembly
3269 defCtorBuilder = null;
3270 tmoutCtorBuilder = null;
3271 _typebuilder.CreateType();
3273 _typebuilder = null;
3277 internal void GenerateCreateType<TKey>(Type myCollectionType, FieldInfo field, Dictionary<TKey,int> ht) {
3278 MethodInfo addMethod = myCollectionType.GetMethod("Add", BindingFlags.Instance | BindingFlags.Static | BindingFlags.Public | BindingFlags.NonPublic);
3281 Newobj(myCollectionType.GetConstructor(new Type[0]));
3283 internal void GenerateCreateHashtable(FieldInfo field, Hashtable ht) {
3284 MethodInfo addMethod = typeof(Hashtable).GetMethod("Add", BindingFlags.Instance | BindingFlags.Static | BindingFlags.Public | BindingFlags.NonPublic);
3287 Newobj(typeof(Hashtable).GetConstructor(new Type[0]));
3292 IDictionaryEnumerator en = ht.GetEnumerator();
3293 while (en.MoveNext()) {
3296 if (en.Key is int) {
3299 _ilg.Emit(OpCodes.Box, typeof(Int32));
3303 Ldstr((String) en.Key);
3305 Ldc((int) en.Value);
3307 _ilg.Emit(OpCodes.Box, typeof(Int32));
3309 Callvirt(addMethod);
3313 private FieldInfo RegexField(String fieldname) {
3314 return typeof(Regex).GetField(fieldname, BindingFlags.Instance | BindingFlags.Static | BindingFlags.Public | BindingFlags.NonPublic);
3317 // Note that we save the assembly to the current directory, and we believe this is not a
3318 // problem because this should only be used by tools, not at runtime.
3319 [ResourceExposure(ResourceScope.None)]
3320 [ResourceConsumption(ResourceScope.Machine, ResourceScope.Machine)]
3321 internal void Save() {
3322 _assembly.Save(_assembly.GetName().Name + ".dll");
3326 * Generates a very simple factory method.
3328 internal void GenerateCreateInstance(Type newtype) {
3329 Newobj(newtype.GetConstructor(new Type[0]));
3334 * Begins the definition of a new type with a specified base class
3336 internal void DefineType(String typename, bool ispublic, Type inheritfromclass) {
3338 _typebuilder = _module.DefineType(typename, TypeAttributes.Class | TypeAttributes.Public, inheritfromclass);
3340 _typebuilder = _module.DefineType(typename, TypeAttributes.Class | TypeAttributes.NotPublic, inheritfromclass);
3345 * Begins the definition of a new method (no args) with a specified return value
3347 internal void DefineMethod(String methname, Type returntype) {
3348 MethodAttributes ma = System.Reflection.MethodAttributes.Public | System.Reflection.MethodAttributes.Virtual;
3350 _methbuilder = _typebuilder.DefineMethod(methname, ma, returntype, null);
3351 _ilg = _methbuilder.GetILGenerator();
3355 * Ends the definition of a method
3357 internal void BakeMethod() {
3358 _methbuilder = null;
3362 * Ends the definition of a class and returns the type
3364 internal Type BakeType() {
3365 Type retval = _typebuilder.CreateType();
3366 _typebuilder = null;
3373 internal class RegexLWCGCompiler : RegexCompiler {
3374 private static int _regexCount = 0;
3375 private static Type[] _paramTypes = new Type[] {typeof(RegexRunner)};
3377 internal RegexLWCGCompiler() {
3381 * The top-level driver. Initializes everything then calls the Generate* methods.
3383 internal RegexRunnerFactory FactoryInstanceFromCode(RegexCode code, RegexOptions options) {
3385 _codes = code._codes;
3386 _strings = code._strings;
3387 _fcPrefix = code._fcPrefix;
3388 _bmPrefix = code._bmPrefix;
3389 _anchors = code._anchors;
3390 _trackcount = code._trackcount;
3393 // pick a unique number for the methods we generate
3394 int regexnum = Interlocked.Increment(ref _regexCount);
3395 string regexnumString = regexnum.ToString(CultureInfo.InvariantCulture);
3397 DynamicMethod goMethod = DefineDynamicMethod("Go" + regexnumString, null, typeof(CompiledRegexRunner));
3400 DynamicMethod firstCharMethod = DefineDynamicMethod("FindFirstChar" + regexnumString, typeof(bool), typeof(CompiledRegexRunner));
3401 GenerateFindFirstChar();
3403 DynamicMethod trackCountMethod = DefineDynamicMethod("InitTrackCount" + regexnumString, null, typeof(CompiledRegexRunner));
3404 GenerateInitTrackCount();
3406 return new CompiledRegexRunnerFactory(goMethod, firstCharMethod, trackCountMethod);
3410 * Begins the definition of a new method (no args) with a specified return value
3412 internal DynamicMethod DefineDynamicMethod(String methname, Type returntype, Type hostType) {
3413 // We're claiming that these are static methods, but really they are instance methods.
3414 // By giving them a parameter which represents "this", we're tricking them into
3415 // being instance methods.
3417 MethodAttributes attribs = MethodAttributes.Public | MethodAttributes.Static;
3418 CallingConventions conventions = CallingConventions.Standard;
3420 DynamicMethod dm = new DynamicMethod(methname, attribs, conventions, returntype, _paramTypes, hostType, false /*skipVisibility*/);
3421 _ilg = dm.GetILGenerator();