Add a third AOT location, alongside ~/.mono/aot-cache and the assembly dir
[mono.git] / mcs / class / System / System.Text.RegularExpressions / interpreter.cs
1 //
2 // assembly:    System
3 // namespace:   System.Text.RegularExpressions
4 // file:        interpreter.cs
5 //
6 // author:      Dan Lewis (dlewis@gmx.co.uk)
7 //              (c) 2002
8
9 //
10 // Permission is hereby granted, free of charge, to any person obtaining
11 // a copy of this software and associated documentation files (the
12 // "Software"), to deal in the Software without restriction, including
13 // without limitation the rights to use, copy, modify, merge, publish,
14 // distribute, sublicense, and/or sell copies of the Software, and to
15 // permit persons to whom the Software is furnished to do so, subject to
16 // the following conditions:
17 // 
18 // The above copyright notice and this permission notice shall be
19 // included in all copies or substantial portions of the Software.
20 // 
21 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
22 // EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
23 // MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
24 // NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
25 // LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
26 // OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
27 // WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
28 //
29
30 using System;
31 using System.Collections;
32 using System.Diagnostics;
33 using System.Globalization;
34
35 namespace System.Text.RegularExpressions {
36
37         partial class Interpreter : BaseMachine {
38                 private int ReadProgramCount (int ptr)
39                 {
40                         int ret = program [ptr + 1];
41                         ret <<= 16;
42                         ret += program [ptr];
43                         return ret;
44                 }
45
46                 public Interpreter (ushort[] program) {
47                         this.program = program;
48                         this.qs = null;
49
50                         // process info block
51                         Debug.Assert ((OpCode)program[0] == OpCode.Info, "Regex", "Cant' find info block");
52                         this.group_count = ReadProgramCount (1) + 1;
53                         this.match_min = ReadProgramCount (3);
54                         //this.match_max = ReadProgramCount (5);
55
56                         // setup
57
58                         this.program_start = 7;
59                         this.groups = new int [group_count];
60                 }
61
62                 // IMachine implementation
63
64                 public override Match Scan (Regex regex, string text, int start, int end, bool substring_mode) {
65                         this.regex_rtl = (regex.Options & RegexOptions.RightToLeft) != 0;
66
67                         if (!initialized)
68                         {
69                                 this.text_start = regex_rtl && substring_mode ? end : start;
70                                 this.text_end = regex_rtl ? substring_mode ? start : 0 : end;
71                                 this.initialized = true;
72                         }
73                         else
74                         {
75                                 this.text_start = start;
76                                 this.text_end = end;
77                         }
78                         this.text = text;
79                         this.scan_ptr = text_start;
80                         this.substring_mode = substring_mode;
81
82                         if (Eval (Mode.Match, ref scan_ptr, program_start))
83                                 return GenerateMatch (regex);
84
85                         return Match.Empty;
86                 }
87
88                 // private methods
89
90                 private void Reset () {
91                         ResetGroups ();
92                         fast = repeat = null;
93                 }
94
95                 private bool Eval (Mode mode, ref int ref_ptr, int pc) {
96                         int ptr = ref_ptr;
97                 Begin:
98                         for (;;) {
99                                 ushort word = program[pc];
100                                 OpCode op = (OpCode)(word & 0x00ff);
101                                 OpFlags flags = (OpFlags)(word & 0xff00);
102
103                                 switch (op) {
104                                 case OpCode.Anchor: {
105                                         int skip = program[pc + 1];
106                                         
107                                         int anch_offset = program[pc + 2];
108                                         bool anch_reverse = (flags & OpFlags.RightToLeft) != 0; 
109                                         int anch_ptr = anch_reverse ?  ptr - anch_offset  : ptr + anch_offset;
110                                         int anch_end = (regex_rtl ? text_start : text_end) - match_min + anch_offset; // maximum anchor position  
111                                         
112                                         
113                                         int anch_begin =  0;
114
115
116                                         // the general case for an anchoring expression is at the bottom, however we
117                                         // do some checks for the common cases before to save processing time. the current
118                                         // optimizer only outputs three types of anchoring expressions: fixed position,
119                                         // fixed substring, and no anchor.
120
121                                         OpCode anch_op = (OpCode)(program[pc + 3] & 0x00ff);                                    
122                                         if (anch_op == OpCode.Position && skip == 6) {                          // position anchor
123                                                 // Anchor
124                                                 //      Position
125                                                 //      True
126
127                                                 switch ((Position)program[pc + 4]) {
128                                                 case Position.StartOfString:
129                                                         if (anch_reverse || anch_offset == 0) {
130                                                                 if (anch_reverse)
131                                                                         ptr = anch_offset;
132                                                                 if (TryMatch (ref ptr, pc + skip))
133                                                                         goto Pass;
134                                                         }
135                                                         break;
136                                                 
137                                                 case Position.StartOfLine:
138                                                          if (anch_ptr == 0) {
139                                                                 ptr = 0;
140                                                                 if (TryMatch (ref ptr, pc + skip))
141                                                                         goto Pass;
142                                                                 
143                                                                 ++ anch_ptr;
144                                                         }
145
146                                                         while ((anch_reverse && anch_ptr >= 0) || (!anch_reverse && anch_ptr <= anch_end)) {  
147                                                                 if (anch_ptr == 0 || text[anch_ptr - 1] == '\n') {
148                                                                         if (anch_reverse)
149                                                                                 ptr = anch_ptr == anch_end ? anch_ptr : anch_ptr + anch_offset;
150                                                                         else
151                                                                                 ptr = anch_ptr == 0 ? anch_ptr : anch_ptr - anch_offset;
152                                                                         if (TryMatch (ref ptr, pc + skip))
153                                                                                 goto Pass;
154                                                                 }
155                                                         
156                                                                 if (anch_reverse)
157                                                                         -- anch_ptr;
158                                                                 else
159                                                                         ++ anch_ptr;
160                                                         }
161                                                         break;
162                                                 
163                                                 case Position.StartOfScan:
164                                                         if (anch_ptr == scan_ptr) {                                                     
165                                                                 ptr = anch_reverse ? scan_ptr + anch_offset : scan_ptr - anch_offset;
166                                                                 if (TryMatch (ref ptr, pc + skip))
167                                                                         goto Pass;
168                                                         }
169                                                         break;
170
171                                                 default:
172                                                         // FIXME
173                                                         break;
174                                                 }
175                                         }
176                                         else if (qs != null ||
177                                                 (anch_op == OpCode.String && skip == 6 + program[pc + 4])) {    // substring anchor
178                                                 // Anchor
179                                                 //      String
180                                                 //      True
181                                  
182                                                 bool reverse = ((OpFlags)program[pc + 3] & OpFlags.RightToLeft) != 0;
183
184                                                 if (qs == null) {
185                                                         bool ignore = ((OpFlags)program[pc + 3] & OpFlags.IgnoreCase) != 0;
186                                                         string substring = GetString (pc + 3);
187                                                         qs = new QuickSearch (substring, ignore, reverse);
188                                                 }
189                                                 while ((anch_reverse && anch_ptr >= anch_begin) 
190                                                        || (!anch_reverse && anch_ptr <= anch_end)) {
191
192                                                         if (reverse)    
193                                                         {
194                                                                 anch_ptr = qs.Search (text, anch_ptr, anch_begin);
195                                                                 if (anch_ptr != -1)
196                                                                         anch_ptr += qs.Length ;
197                                                                 
198                                                         }
199                                                         else
200                                                                 anch_ptr = qs.Search (text, anch_ptr, anch_end);
201                                                         if (anch_ptr < 0)
202                                                                 break;
203
204                                                         ptr = reverse ? anch_ptr + anch_offset : anch_ptr - anch_offset;
205                                                         if (TryMatch (ref ptr, pc + skip))
206                                                                 goto Pass;
207
208                                                         if (reverse)
209                                                                 anch_ptr -= 2;
210                                                         else 
211                                                                 ++ anch_ptr;
212                                                 }
213                                         }
214                                         else if (anch_op == OpCode.True) {                                      // no anchor
215                                                 // Anchor
216                                                 //      True
217
218                                         
219                                                 while ((anch_reverse && anch_ptr >= anch_begin) 
220                                                        || (!anch_reverse && anch_ptr <= anch_end)) {
221
222                                                         ptr = anch_ptr;
223                                                         if (TryMatch (ref ptr, pc + skip))
224                                                                 goto Pass;
225                                                         if (anch_reverse)
226                                                                 -- anch_ptr;
227                                                         else 
228                                                                 ++ anch_ptr;
229                                                 }
230                                         }
231                                         else {                                                                  // general case
232                                                 // Anchor
233                                                 //      <expr>
234                                                 //      True
235
236                                                 while ((anch_reverse && anch_ptr >= anch_begin) 
237                                                        || (!anch_reverse && anch_ptr <= anch_end)) {
238
239                                                         ptr = anch_ptr;
240                                                         if (Eval (Mode.Match, ref ptr, pc + 3)) {
241                                                                 // anchor expression passed: try real expression at the correct offset
242
243                                                                 ptr = anch_reverse ? anch_ptr + anch_offset : anch_ptr - anch_offset;
244                                                                 if (TryMatch (ref ptr, pc + skip))
245                                                                         goto Pass;
246                                                         }
247
248                                                     if (anch_reverse)
249                                                                 -- anch_ptr;
250                                                         else 
251                                                                 ++ anch_ptr;
252                                                 }
253                                         }
254
255                                         goto Fail;
256                                 }
257                                 
258                                 case OpCode.False: {
259                                         goto Fail;
260                                 }
261
262                                 case OpCode.True: {
263                                         goto Pass;
264                                 }
265
266                                 case OpCode.Position: {
267                                         if (!IsPosition ((Position)program[pc + 1], ptr))
268                                                 goto Fail;
269                                         pc += 2;
270                                         break;
271                                 }
272
273                                 case OpCode.String: {
274                                         bool reverse = (flags & OpFlags.RightToLeft) != 0;
275                                         bool ignore = (flags & OpFlags.IgnoreCase) != 0;
276                                         int len = program[pc + 1];
277
278                                         if (reverse) {
279                                                 ptr -= len;
280                                                 if ((!regex_rtl && ptr < 0) || (regex_rtl && ptr < text_end))
281                                                         goto Fail;
282                                         }
283                                         else 
284                                         if (ptr + len > text_end)
285                                                 goto Fail;
286
287                                         pc += 2;
288                                         for (int i = 0; i < len; ++ i) {
289                                                 char c = text[ptr + i];
290                                                 if (ignore)
291                                                         c = Char.ToLower (c);
292
293                                                 if (c != (char)program[pc ++])
294                                                         goto Fail;
295                                         }
296
297                                         if (!reverse)
298                                                 ptr += len;
299                                         break;
300                                 }
301
302                                 case OpCode.Reference: {
303                                         bool reverse = (flags & OpFlags.RightToLeft) != 0;
304                                         bool ignore = (flags & OpFlags.IgnoreCase) != 0;
305                                         int m = GetLastDefined (program [pc + 1]);
306                                         if (m < 0)
307                                                 goto Fail;
308
309                                         int str = marks [m].Index;
310                                         int len = marks [m].Length;
311
312                                         if (reverse) {
313                                                 ptr -= len;
314                                                 if ((!regex_rtl && ptr < 0) || (regex_rtl && ptr < text_end))
315                                                         goto Fail;
316                                         }
317                                         else if (ptr + len > text_end)
318                                                 goto Fail;
319
320                                         pc += 2;
321                                         if (ignore) {
322                                                 for (int i = 0; i < len; ++ i) {
323                                                         if (Char.ToLower (text[ptr + i]) != Char.ToLower (text[str + i]))
324                                                                 goto Fail;
325                                                 }
326                                         } else {
327                                                 for (int i = 0; i < len; ++ i) {
328                                                         if (text[ptr + i] != text[str + i])
329                                                                 goto Fail;
330                                                 }
331                                         }
332
333                                         if (!reverse)
334                                                 ptr += len;
335                                         break;
336                                 }
337
338                                 case OpCode.Character: case OpCode.Category: case OpCode.NotCategory:
339                                 case OpCode.Range: case OpCode.Set: {
340                                         if (!EvalChar (mode, ref ptr, ref pc, false))
341                                                 goto Fail;
342                                         break;
343                                 }
344
345                                 case OpCode.In: {
346                                         int target = pc + program[pc + 1];
347                                         pc += 2;
348                                         if (!EvalChar (mode, ref ptr, ref pc, true))
349                                                 goto Fail;
350
351                                         pc = target;
352                                         break;
353                                 }
354
355                                 case OpCode.Open: {
356                                         Open (program[pc + 1], ptr);
357                                         pc += 2;
358                                         break;
359                                 }
360
361                                 case OpCode.Close: {
362                                         Close (program[pc + 1], ptr);
363                                         pc += 2;
364                                         break;
365                                 }
366
367                                 case OpCode.BalanceStart: {
368
369                                         int start = ptr; //point before the balancing group
370                                         
371                                         if (!Eval (Mode.Match, ref ptr, pc + 5))
372                                                 goto Fail;
373                                         
374                                         
375                                         
376                                         if(!Balance (program[pc + 1], program[pc + 2], (program[pc + 3] == 1 ? true : false) , start)) {
377                                                 goto Fail;
378                                         }
379
380                                         
381                                         pc += program[pc + 4];
382                                         break;
383                                 }
384
385                                 case OpCode.Balance: {
386                                         goto Pass;
387                                 }
388
389                                 case OpCode.IfDefined: {
390                                         int m = GetLastDefined (program [pc + 2]);
391                                         if (m < 0)
392                                                 pc += program[pc + 1];
393                                         else
394                                                 pc += 3;
395                                         break;
396                                 }
397
398                                 case OpCode.Sub: {
399                                         if (!Eval (Mode.Match, ref ptr, pc + 2))
400                                                 goto Fail;
401
402                                         pc += program[pc + 1];
403                                         break;
404                                 }
405
406                                 case OpCode.Test: {
407                                         int cp = Checkpoint ();
408                                         int test_ptr = ptr;
409                                         if (Eval (Mode.Match, ref test_ptr, pc + 3))
410                                                 pc += program[pc + 1];
411                                         else {
412                                                 Backtrack (cp);
413                                                 pc += program[pc + 2];
414                                         }
415                                         break;
416                                 }
417
418                                 case OpCode.Branch: {
419                                         OpCode branch_op;
420                                         do {
421                                                 int cp = Checkpoint ();
422                                                 if (Eval (Mode.Match, ref ptr, pc + 2))
423                                                         goto Pass;
424                                                 
425                                                 Backtrack (cp);
426                                                 
427                                                 pc += program[pc + 1];
428                                                 branch_op = (OpCode)(program[pc] & 0xff);
429                                         } while (branch_op != OpCode.False);
430
431                                         goto Fail;
432                                 }
433
434                                 case OpCode.Jump: {
435                                         pc += program[pc + 1];
436                                         break;
437                                 }
438
439                                 case OpCode.Repeat: {
440                                         this.repeat = new RepeatContext (
441                                                 this.repeat,                    // previous context
442                                                 ReadProgramCount (pc + 2),              // minimum
443                                                 ReadProgramCount (pc + 4),              // maximum
444                                                 (flags & OpFlags.Lazy) != 0,    // lazy
445                                                 pc + 6                          // subexpression
446                                         );
447
448                                         if (Eval (Mode.Match, ref ptr, pc + program[pc + 1]))
449                                                 goto Pass;
450                                         else {
451                                                 this.repeat = this.repeat.Previous;
452                                                 goto Fail;
453                                         }
454                                 }
455
456                                 case OpCode.Until: {
457                                         RepeatContext current = this.repeat;
458
459                                         //
460                                         // Can we avoid recursion?
461                                         //
462                                         // Backtracking can be forced in nested quantifiers from the tail of this quantifier.
463                                         // Thus, we cannot, in general, use a simple loop on repeat.Expression to handle
464                                         // quantifiers.
465                                         //
466                                         // If 'deep' was unmolested, that implies that there was no nested quantifiers.
467                                         // Thus, we can safely avoid recursion.
468                                         //
469                                         if (deep == current)
470                                                 goto Pass;
471
472                                         int start = current.Start;
473                                         int start_count = current.Count;
474
475                                         while (!current.IsMinimum) {
476                                                 ++ current.Count;
477                                                 current.Start = ptr;
478                                                 deep = current;
479                                                 if (!Eval (Mode.Match, ref ptr, current.Expression)) {
480                                                         current.Start = start;
481                                                         current.Count = start_count;
482                                                         goto Fail;
483                                                 }
484                                                 if (deep != current)    // recursive mode
485                                                         goto Pass;
486                                         }
487
488                                         if (ptr == current.Start) {
489                                                 // degenerate match ... match tail or fail
490                                                 this.repeat = current.Previous;
491                                                 deep = null;
492                                                 if (Eval (Mode.Match, ref ptr, pc + 1))
493                                                         goto Pass;
494                                         
495                                                 this.repeat = current;
496                                                 goto Fail;
497                                         }
498
499                                         if (current.IsLazy) {
500                                                 for (;;) {
501                                                         // match tail first ...
502                                                         this.repeat = current.Previous;
503                                                         deep = null;
504                                                         int cp = Checkpoint ();
505                                                         if (Eval (Mode.Match, ref ptr, pc + 1))
506                                                                 goto Pass;
507
508                                                         Backtrack (cp);
509
510                                                         // ... then match more
511                                                         this.repeat = current;
512                                                         if (current.IsMaximum)
513                                                                 goto Fail;
514                                                         ++ current.Count;
515                                                         current.Start = ptr;
516                                                         deep = current;
517                                                         if (!Eval (Mode.Match, ref ptr, current.Expression)) {
518                                                                 current.Start = start;
519                                                                 current.Count = start_count;
520                                                                 goto Fail;
521                                                         }
522                                                         if (deep != current)    // recursive mode
523                                                                 goto Pass;
524                                                         // Degenerate match: ptr has not moved since the last (failed) tail match.
525                                                         // So, next and subsequent tail matches will fail.
526                                                         if (ptr == current.Start)
527                                                                 goto Fail;
528                                                 }
529                                         } else {
530                                                 int stack_size = stack.Count;
531
532                                                 // match greedily as much as possible
533                                                 while (!current.IsMaximum) {
534                                                         int cp = Checkpoint ();
535                                                         int old_ptr = ptr;
536                                                         int old_start = current.Start;
537
538                                                         ++ current.Count;
539                                                         current.Start = ptr;
540                                                         deep = current;
541                                                         if (!Eval (Mode.Match, ref ptr, current.Expression)) {
542                                                                 -- current.Count;
543                                                                 current.Start = old_start;
544                                                                 Backtrack (cp);
545                                                                 break;
546                                                         }
547                                                         if (deep != current) {
548                                                                 // recursive mode: no more backtracking, truncate the stack
549                                                                 stack.Count = stack_size;
550                                                                 goto Pass;
551                                                         }
552                                                         stack.Push (cp);
553                                                         stack.Push (old_ptr);
554
555                                                         // Degenerate match: no point going on
556                                                         if (ptr == current.Start)
557                                                                 break;
558                                                 }
559
560                                                 // then, match the tail, backtracking as necessary.
561                                                 this.repeat = current.Previous;
562                                                 for (;;) {
563                                                         deep = null;
564                                                         if (Eval (Mode.Match, ref ptr, pc + 1)) {
565                                                                 stack.Count = stack_size;
566                                                                 goto Pass;
567                                                         }
568                                                         if (stack.Count == stack_size) {
569                                                                 this.repeat = current;
570                                                                 goto Fail;
571                                                         }
572
573                                                         --current.Count;
574                                                         ptr = stack.Pop ();
575                                                         Backtrack (stack.Pop ());
576                                                 }
577                                         }
578                                 }
579
580                                 case OpCode.FastRepeat: {
581                                         this.fast = new RepeatContext (
582                                                 fast,
583                                                 ReadProgramCount (pc + 2),              // minimum
584                                                 ReadProgramCount (pc + 4),              // maximum
585                                                 (flags & OpFlags.Lazy) != 0,    // lazy
586                                                 pc + 6                          // subexpression
587                                         );
588
589                                         fast.Start = ptr;
590
591                                         int cp = Checkpoint ();
592
593                                         pc += program[pc + 1];          // tail expression
594                                         ushort tail_word = program[pc];
595
596                                         int c1 = -1;            // first character of tail operator
597                                         int c2 = -1;            // ... and the same character, in upper case if ignoring case
598                                         int coff = 0;           // 0 or -1 depending on direction
599
600                                         OpCode tail_op = (OpCode)(tail_word & 0xff);
601                                         if (tail_op == OpCode.Character || tail_op == OpCode.String) {
602                                                 OpFlags tail_flags = (OpFlags)(tail_word & 0xff00);
603
604                                                 if ((tail_flags & OpFlags.Negate) != 0)
605                                                         goto skip;
606
607                                                 if (tail_op == OpCode.String)
608                                                 {
609                                                         int offset = 0;
610                                                 
611                                                         if ((tail_flags & OpFlags.RightToLeft) != 0)
612                                                         {
613                                                                 offset = program[pc + 1] - 1 ;
614                                                         }
615                                                           
616                                                         c1 = program[pc + 2 + offset];                          // first char of string
617                                                 }
618                                                 else
619                                                         c1 = program[pc + 1];                           // character
620                                                 
621                                                 if ((tail_flags & OpFlags.IgnoreCase) != 0)
622                                                         c2 = Char.ToUpper ((char)c1);                   // ignore case
623                                                 else
624                                                         c2 = c1;
625
626                                                 if ((tail_flags & OpFlags.RightToLeft) != 0)
627                                                         coff = -1;                                      // reverse
628                                                 else
629                                                         coff = 0;
630                                         }
631
632                                 skip:
633                                         if (fast.IsLazy) {
634                                                 if (!fast.IsMinimum && !Eval (Mode.Count, ref ptr, fast.Expression)) {
635                                                         //Console.WriteLine ("lazy fast: failed mininum.");
636                                                         fast = fast.Previous;
637                                                         goto Fail;
638                                                 }
639                                                 
640                                                 while (true) {
641                                                         int p = ptr + coff;
642                                                         if (c1 < 0 || (p >= 0 && ((regex_rtl && p >= text_end) || (!regex_rtl && p < text_end)) && (c1 == text[p] || c2 == text[p]))) {
643                                                                 deep = null;
644                                                                 if (Eval (Mode.Match, ref ptr, pc))
645                                                                         break;
646                                                         }
647
648                                                         if (fast.IsMaximum) {
649                                                                 //Console.WriteLine ("lazy fast: failed with maximum.");
650                                                                 fast = fast.Previous;
651                                                                 goto Fail;
652                                                         }
653
654                                                         Backtrack (cp);
655                                                         if (!Eval (Mode.Count, ref ptr, fast.Expression)) {
656                                                                 //Console.WriteLine ("lazy fast: no more.");
657                                                                 fast = fast.Previous;
658                                                                 goto Fail;
659                                                         }
660                                                 }
661                                                 fast = fast.Previous;
662                                                 goto Pass;
663                                         }
664                                         else {
665                                                 if (!Eval (Mode.Count, ref ptr, fast.Expression)) {
666                                                         fast = fast.Previous;
667                                                         goto Fail;
668                                                 }
669                                         
670                                                 int width;
671                                                 if (fast.Count > 0)
672                                                         width = (ptr - fast.Start) / fast.Count;
673                                                 else
674                                                         width = 0;
675
676                                                 while (true) {
677                                                         int p = ptr + coff;
678                                                         if (c1 < 0 || (p >= 0 && ((regex_rtl && p >= text_end) || (!regex_rtl && p < text_end)) && (c1 == text[p] || c2 == text[p]))) {
679                                                                 deep = null;
680                                                                 if (Eval (Mode.Match, ref ptr, pc))
681                                                                         break;
682                                                         }
683
684                                                         -- fast.Count;
685                                                         if (!fast.IsMinimum) {
686                                                                 fast = fast.Previous;
687                                                                 goto Fail;
688                                                         }
689
690                                                         ptr -= width;
691                                                         Backtrack (cp);
692                                                 }
693                                                 fast = fast.Previous;
694                                                 goto Pass;
695                                         }
696                                 }
697
698                                 case OpCode.Info: {
699                                         Debug.Assert (false, "Regex", "Info block found in pattern");
700                                         goto Fail;
701                                 }
702                                 }
703                         }
704                 Pass:
705                         ref_ptr = ptr;
706
707                         switch (mode) {
708                         case Mode.Match:
709                                 return true;
710
711                         case Mode.Count: {
712                                 ++ fast.Count;
713                                 if (fast.IsMaximum || (fast.IsLazy && fast.IsMinimum))
714                                         return true;
715
716                                 pc = fast.Expression;
717                                 goto Begin;
718                         }
719                         }
720
721                 Fail:
722                         switch (mode) {
723                         case Mode.Match:
724                                 return false;
725
726                         case Mode.Count: {
727                                 if (!fast.IsLazy && fast.IsMinimum)
728                                         return true;
729
730                                 ref_ptr = fast.Start;
731                                 return false;
732                         }
733                         }
734
735                         return false;
736                 }
737
738                 private bool EvalChar (Mode mode, ref int ptr, ref int pc, bool multi) {
739                         bool consumed = false;
740                         char c = '\0';
741                         bool negate;
742                         bool ignore;
743                 
744                         do {
745                                 ushort word = program[pc];
746                                 OpCode op = (OpCode)(word & 0x00ff);
747                                 OpFlags flags = (OpFlags)(word & 0xff00);
748
749                                 ++ pc;
750
751                                 ignore = (flags & OpFlags.IgnoreCase) != 0;
752                                 
753                                 // consume character: the direction of an In construct is
754                                 // determined by the direction of its first op
755
756                                 if (!consumed) {
757                                         if ((flags & OpFlags.RightToLeft) != 0) {
758                                                 if ((substring_mode && ptr <= (regex_rtl ? text_end : text_start)) || (!substring_mode && ptr <= 0))
759                                                         return false;
760
761                                                 c = text[-- ptr];
762                                         }
763                                         else {
764                                                 if ((!regex_rtl && ptr >= text_end) || (regex_rtl && ptr >= text_start))
765                                                         return false;
766
767                                                 c = text[ptr ++];
768                                         }
769
770                                         if (ignore)
771                                                 c = Char.ToLower (c);
772
773                                         consumed = true;
774                                 }
775
776                                 // negate flag
777
778                                 negate = (flags & OpFlags.Negate) != 0;
779
780                                 // execute op
781                                 
782                                 switch (op) {
783                                 case OpCode.True:
784                                         return true;
785
786                                 case OpCode.False:
787                                         return false;
788                                 
789                                 case OpCode.Character: {
790                                         if (c == (char)program[pc ++])
791                                                 return !negate;
792                                         break;
793                                 }
794
795                                 case OpCode.Category: {
796                                         if (CategoryUtils.IsCategory ((Category)program[pc ++], c))
797                                                 return !negate;
798                                         break;
799                                 }
800
801                                 case OpCode.NotCategory: {
802                                         if (!CategoryUtils.IsCategory ((Category)program[pc ++], c))
803                                                 return !negate;
804                                         break;
805                                 }
806                                 
807                                 case OpCode.Range: {
808                                         int lo = (char)program[pc ++];
809                                         int hi = (char)program[pc ++];
810                                         if (lo <= c && c <= hi)
811                                                 return !negate;
812                                         break;
813                                 }
814
815                                 case OpCode.Set: {
816                                         int lo = (char)program[pc ++];
817                                         int len = (char)program[pc ++];
818                                         int bits = pc;
819                                         pc += len;
820
821                                         int i = (int)c - lo;
822                                         if (i < 0 || i >= len << 4)
823                                                 break;
824                                                 
825
826                                         if ((program[bits + (i >> 4)] & (1 << (i & 0xf))) != 0)
827                                                 return !negate;
828                                         break;
829                                 }
830                                 }
831                         } while (multi);
832
833                         return negate;
834                 }
835
836                 private bool TryMatch (ref int ref_ptr, int pc) {
837                         Reset ();
838                         
839                         int ptr = ref_ptr;
840                         marks [groups [0]].Start = ptr;
841                         if (Eval (Mode.Match, ref ptr, pc)) {
842                                 marks [groups [0]].End = ptr;
843                                 ref_ptr = ptr;
844                                 return true;
845                         }
846
847                         return false;
848                 }
849                 
850                 private bool IsPosition (Position pos, int ptr) {
851                         switch (pos) {
852                         case Position.Start: case Position.StartOfString:
853                                 return ptr == 0 || (substring_mode && ((!regex_rtl && ptr == text_start) || (regex_rtl && ptr == text_end)));
854
855                         case Position.StartOfLine:
856                                 return ptr == 0 || text[ptr - 1] == '\n' || (substring_mode && ((!regex_rtl && ptr == text_start) || (regex_rtl && ptr == text_end)));
857                                 
858                         case Position.StartOfScan:
859                                 return ptr == scan_ptr;
860                         
861                         case Position.End:
862                                 return (!regex_rtl && ptr == text_end) || (regex_rtl && ptr == text_start) ||
863                                         (((!regex_rtl && ptr == text_end - 1) || (regex_rtl && ptr == text_start - 1)) && text[ptr] == '\n');
864
865                         case Position.EndOfLine:
866                                 return (!regex_rtl && ptr == text_end) || (regex_rtl && ptr == text_start) || text[ptr] == '\n';
867                                 
868                         case Position.EndOfString:
869                                 return (!regex_rtl && ptr == text_end) || (regex_rtl && ptr == text_start);
870                                 
871                         case Position.Boundary:
872                                 if ((!regex_rtl && text_end == 0) || (regex_rtl && text_start == 0))
873                                         return false;
874
875                                 if (ptr == 0)
876                                         return IsWordChar (text[ptr]);
877                                 else if ((!regex_rtl && ptr == text_end) || (regex_rtl && ptr == text_start))
878                                         return IsWordChar (text[ptr - 1]);
879                                 else
880                                         return IsWordChar (text[ptr]) != IsWordChar (text[ptr - 1]);
881
882                         case Position.NonBoundary:
883                                 if ((!regex_rtl && text_end == 0) || (regex_rtl && text_start == 0))
884                                         return false;
885
886                                 if (ptr == 0)
887                                         return !IsWordChar (text[ptr]);
888                                 else if ((!regex_rtl && ptr == text_end) || (regex_rtl && ptr == text_start))
889                                         return !IsWordChar (text[ptr - 1]);
890                                 else
891                                         return IsWordChar (text[ptr]) == IsWordChar (text[ptr - 1]);
892                         
893                         default:
894                                 return false;
895                         }
896                 }
897
898                 private bool IsWordChar (char c) {
899                         return CategoryUtils.IsCategory (Category.Word, c);
900                 }
901
902                 private string GetString (int pc) {
903                         int len = program[pc + 1];
904                         int str = pc + 2;
905
906                         char[] cs = new char[len];
907                         for (int i = 0; i < len; ++ i)
908                                 cs[i] = (char)program[str ++];
909
910                         return new string (cs);
911                 }
912
913                 // capture management
914
915                 private void Open (int gid, int ptr) {
916                         int m = groups [gid];
917                         if (m < mark_start || marks [m].IsDefined) {
918                                 m = CreateMark (m);
919                                 groups [gid] = m;
920                         }
921
922                         marks [m].Start = ptr;
923                 }
924
925                 private void Close (int gid, int ptr) {
926                         marks [groups [gid]].End = ptr;
927                 }
928
929                 private bool Balance (int gid, int balance_gid, bool capture, int ptr) {
930                         int b = groups [balance_gid];
931
932                         if(b == -1 || marks[b].Index < 0) {
933                                 //Group not previously matched
934                                 return false;
935                         }
936                         Debug.Assert (marks [b].IsDefined, "Regex", "Balancng group not closed");
937                         if (gid > 0 && capture){ 
938                                 Open (gid, marks [b].Index + marks [b].Length);
939                                 Close (gid, ptr);
940                         }
941
942                         groups [balance_gid] = marks[b].Previous;
943
944                         return true;
945                 }
946
947                 private int Checkpoint () {
948                         mark_start = mark_end;
949                         return mark_start;
950                 }
951
952                 private void Backtrack (int cp) {
953                         Debug.Assert (cp > mark_start, "Regex", "Attempt to backtrack forwards");
954                         for (int i = 0; i < groups.Length; ++ i) {
955                                 int m = groups [i];
956                                 while (cp <= m) {
957                                         marks [m].Start = -1;
958                                         m = marks [m].Previous;
959                                 }
960
961                                 groups [i] = m;
962                         }
963                         mark_start = cp;
964                 }
965
966                 private void ResetGroups () {
967                         int n = groups.Length;
968                         if (marks == null)
969                                 marks = new Mark [n * 10];
970
971                         for (int i = 0; i < n; ++ i) {
972                                 groups [i] = i;
973
974                                 marks [i].Start = -1;
975                                 marks [i].End = -1;
976                                 marks [i].Previous = -1;
977                         }
978                         
979                         mark_start = 0;
980                         mark_end = n;
981                 }
982
983                 private int GetLastDefined (int gid) {
984                         int m = groups [gid];
985                         while (m >= 0 && !marks [m].IsDefined)
986                                 m = marks [m].Previous;
987
988                         return m;
989                 }
990
991                 private int CreateMark (int previous) {
992                         if (mark_end == marks.Length) {
993                                 Mark [] dest = new Mark [marks.Length * 2];
994                                 marks.CopyTo (dest, 0);
995                                 marks = dest;
996                         }
997
998                         int m = mark_end ++;
999                         marks [m].Start = marks [m].End = -1;
1000                         marks [m].Previous = previous;
1001
1002                         return m;
1003                 }
1004
1005                 private void GetGroupInfo (int gid, out int first_mark_index, out int n_caps)
1006                 {
1007                         first_mark_index = -1;
1008                         n_caps = 0;
1009                         for (int m = groups [gid]; m >= 0; m = marks [m].Previous) {
1010                                 if (!marks [m].IsDefined)
1011                                         continue;
1012                                 if (first_mark_index < 0)
1013                                         first_mark_index = m;
1014                                 ++n_caps;
1015                         }
1016                 }
1017
1018                 private void PopulateGroup (Group g, int first_mark_index, int n_caps)
1019                 {
1020                         int i = 1;
1021                         for (int m = marks [first_mark_index].Previous; m >= 0; m = marks [m].Previous) {
1022                                 if (!marks [m].IsDefined)
1023                                         continue;
1024                                 Capture cap = new Capture (text, marks [m].Index, marks [m].Length);
1025                                 g.Captures.SetValue (cap, n_caps - 1 - i);
1026                                 ++i;
1027                         }
1028                 }
1029
1030                 private Match GenerateMatch (Regex regex)
1031                 {
1032                         int n_caps, first_mark_index;
1033                         Group g;
1034                         GetGroupInfo (0, out first_mark_index, out n_caps);
1035
1036                         // Avoid fully populating the Match instance if not needed
1037                         if (!needs_groups_or_captures)
1038                                 return new Match (regex, this, text, text_end, 0, marks [first_mark_index].Index, marks [first_mark_index].Length);
1039
1040                         Match retval = new Match (regex, this, text, text_end, groups.Length, 
1041                                                   marks [first_mark_index].Index, marks [first_mark_index].Length, n_caps);
1042                         PopulateGroup (retval, first_mark_index, n_caps);
1043
1044                         for (int gid = 1; gid < groups.Length; ++ gid) {
1045                                 GetGroupInfo (gid, out first_mark_index, out n_caps);
1046                                 if (first_mark_index < 0) {
1047                                         g = Group.Fail;
1048                                 } else {
1049                                         g = new Group (text, marks [first_mark_index].Index, marks [first_mark_index].Length, n_caps);
1050                                         PopulateGroup (g, first_mark_index, n_caps);
1051                                 }
1052                                 retval.Groups.SetValue (g, gid);
1053                         }
1054                         return retval;
1055                 }
1056
1057                 // interpreter attributes
1058
1059                 private ushort[] program;               // regex program
1060                 private int program_start;              // first instruction after info block
1061                 private string text;                    // input text
1062                 private int text_end;                   // end of input text (last character + 1)
1063                 private int group_count;                // number of capturing groups
1064                 private int match_min;//, match_max;    // match width information
1065                 private QuickSearch qs;                 // fast substring matcher
1066
1067                 private bool regex_rtl;
1068                 private int text_start;
1069                 private bool substring_mode;
1070                 private bool initialized;
1071
1072                 // match state
1073                 
1074                 private int scan_ptr;                   // start of scan
1075
1076                 private RepeatContext repeat;           // current repeat context
1077                 private RepeatContext fast;             // fast repeat context
1078
1079                 // Repeat/Until handling
1080                 private IntStack stack = new IntStack (); // utility stack
1081                 private RepeatContext deep;             // points to the most-nested repeat context
1082
1083                 private Mark[] marks = null;            // mark stack
1084                 private int mark_start;                 // start of current checkpoint
1085                 private int mark_end;                   // end of checkpoint/next free mark
1086
1087                 private int[] groups;                   // current group definitions
1088
1089                 // private classes
1090
1091                 private struct IntStack {
1092                         int [] values;
1093                         int count;
1094                         public int Pop ()
1095                         {
1096                                 return values [--count];
1097                         }
1098                         public void Push (int value)
1099                         {
1100                                 if (values == null) {
1101                                         values = new int [8];
1102                                 } else if (count == values.Length) {
1103                                         int new_size = values.Length;
1104                                         new_size += new_size >> 1;
1105                                         int [] new_values = new int [new_size];
1106                                         for (int i = 0; i < count; ++i)
1107                                                 new_values [i] = values [i];
1108                                         values = new_values;
1109                                 }
1110                                 values [count++] = value;
1111                         }
1112                         public int Top {
1113                                 get { return values [count - 1]; }
1114                         }
1115                         public int Count {
1116                                 get { return count; }
1117                                 set {
1118                                         if (value > count)
1119                                                 throw new SystemException ("can only truncate the stack");
1120                                         count = value;
1121                                 }
1122                         }
1123                 }
1124
1125                 private class RepeatContext {
1126                         public RepeatContext (RepeatContext previous, int min, int max, bool lazy, int expr_pc) {
1127                                 this.previous = previous;
1128                                 this.min = min;
1129                                 this.max = max;
1130                                 this.lazy = lazy;
1131                                 this.expr_pc = expr_pc;
1132                                 
1133                                 this.start = -1;
1134                                 this.count = 0;
1135                         }
1136
1137                         public int Count {
1138                                 get { return count; }
1139                                 set { count = value; }
1140                         }
1141
1142                         public int Start {
1143                                 get { return start; }
1144                                 set { start = value; }
1145                         }
1146
1147                         public bool IsMinimum {
1148                                 get { return min <= count; }
1149                         }
1150
1151                         public bool IsMaximum {
1152                                 get { return max <= count; }
1153                         }
1154
1155                         public bool IsLazy {
1156                                 get { return lazy; }
1157                         }
1158
1159                         public int Expression {
1160                                 get { return expr_pc; }
1161                         }
1162
1163                         public RepeatContext Previous {
1164                                 get { return previous; }
1165                         }
1166                 
1167                         private int start;
1168                         private int min, max;
1169                         private bool lazy;
1170                         private int expr_pc;
1171                         private RepeatContext previous;
1172
1173                         private int count;
1174                 }
1175
1176                 private enum Mode {
1177                         Search,
1178                         Match,
1179                         Count
1180                 }
1181         }
1182 }