BindingFlags.Public needed here as Exception.HResult is now public in .NET 4.5. This...
[mono.git] / mcs / class / System / System.Text.RegularExpressions / interpreter.cs
1 //
2 // assembly:    System
3 // namespace:   System.Text.RegularExpressions
4 // file:        interpreter.cs
5 //
6 // author:      Dan Lewis (dlewis@gmx.co.uk)
7 //              (c) 2002
8
9 //
10 // Permission is hereby granted, free of charge, to any person obtaining
11 // a copy of this software and associated documentation files (the
12 // "Software"), to deal in the Software without restriction, including
13 // without limitation the rights to use, copy, modify, merge, publish,
14 // distribute, sublicense, and/or sell copies of the Software, and to
15 // permit persons to whom the Software is furnished to do so, subject to
16 // the following conditions:
17 // 
18 // The above copyright notice and this permission notice shall be
19 // included in all copies or substantial portions of the Software.
20 // 
21 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
22 // EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
23 // MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
24 // NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
25 // LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
26 // OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
27 // WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
28 //
29
30 using System;
31 using System.Collections;
32 using System.Diagnostics;
33 using System.Globalization;
34
35 namespace System.Text.RegularExpressions {
36
37         partial class Interpreter : BaseMachine {
38                 private int ReadProgramCount (int ptr)
39                 {
40                         int ret = program [ptr + 1];
41                         ret <<= 16;
42                         ret += program [ptr];
43                         return ret;
44                 }
45
46                 public Interpreter (ushort[] program) {
47                         this.program = program;
48                         this.qs = null;
49
50                         // process info block
51                         Debug.Assert ((OpCode)program[0] == OpCode.Info, "Regex", "Cant' find info block");
52                         this.group_count = ReadProgramCount (1) + 1;
53                         this.match_min = ReadProgramCount (3);
54                         //this.match_max = ReadProgramCount (5);
55
56                         // setup
57
58                         this.program_start = 7;
59                         this.groups = new int [group_count];
60                 }
61
62                 // IMachine implementation
63
64                 public override Match Scan (Regex regex, string text, int start, int end) {
65                         this.text = text;
66                         this.text_end = end;
67                         this.scan_ptr = start;
68
69                         if (Eval (Mode.Match, ref scan_ptr, program_start))
70                                 return GenerateMatch (regex);
71
72                         return Match.Empty;
73                 }
74
75                 // private methods
76
77                 private void Reset () {
78                         ResetGroups ();
79                         fast = repeat = null;
80                 }
81
82                 private bool Eval (Mode mode, ref int ref_ptr, int pc) {
83                         int ptr = ref_ptr;
84                 Begin:
85                         for (;;) {
86                                 ushort word = program[pc];
87                                 OpCode op = (OpCode)(word & 0x00ff);
88                                 OpFlags flags = (OpFlags)(word & 0xff00);
89
90                                 switch (op) {
91                                 case OpCode.Anchor: {
92                                         int skip = program[pc + 1];
93                                         
94                                         int anch_offset = program[pc + 2];
95                                         bool anch_reverse = (flags & OpFlags.RightToLeft) != 0; 
96                                         int anch_ptr = anch_reverse ?  ptr - anch_offset  : ptr + anch_offset;
97                                         int anch_end = text_end - match_min + anch_offset;      // maximum anchor position  
98                                         
99                                         
100                                         int anch_begin =  0;
101
102
103                                         // the general case for an anchoring expression is at the bottom, however we
104                                         // do some checks for the common cases before to save processing time. the current
105                                         // optimizer only outputs three types of anchoring expressions: fixed position,
106                                         // fixed substring, and no anchor.
107
108                                         OpCode anch_op = (OpCode)(program[pc + 3] & 0x00ff);                                    
109                                         if (anch_op == OpCode.Position && skip == 6) {                          // position anchor
110                                                 // Anchor
111                                                 //      Position
112                                                 //      True
113
114                                                 switch ((Position)program[pc + 4]) {
115                                                 case Position.StartOfString:
116                                                         if (anch_reverse || anch_offset == 0) {
117                                                                 if (anch_reverse)
118                                                                         ptr = anch_offset;
119                                                                 if (TryMatch (ref ptr, pc + skip))
120                                                                         goto Pass;
121                                                         }
122                                                         break;
123                                                 
124                                                 case Position.StartOfLine:
125                                                          if (anch_ptr == 0) {
126                                                                 ptr = 0;
127                                                                 if (TryMatch (ref ptr, pc + skip))
128                                                                         goto Pass;
129                                                                 
130                                                                 ++ anch_ptr;
131                                                         }
132
133                                                         while ((anch_reverse && anch_ptr >= 0) || (!anch_reverse && anch_ptr <= anch_end)) {  
134                                                                 if (anch_ptr == 0 || text[anch_ptr - 1] == '\n') {
135                                                                         if (anch_reverse)
136                                                                                 ptr = anch_ptr == anch_end ? anch_ptr : anch_ptr + anch_offset;
137                                                                         else
138                                                                                 ptr = anch_ptr == 0 ? anch_ptr : anch_ptr - anch_offset;
139                                                                         if (TryMatch (ref ptr, pc + skip))
140                                                                                 goto Pass;
141                                                                 }
142                                                         
143                                                                 if (anch_reverse)
144                                                                         -- anch_ptr;
145                                                                 else
146                                                                         ++ anch_ptr;
147                                                         }
148                                                         break;
149                                                 
150                                                 case Position.StartOfScan:
151                                                         if (anch_ptr == scan_ptr) {                                                     
152                                                                 ptr = anch_reverse ? scan_ptr + anch_offset : scan_ptr - anch_offset;
153                                                                 if (TryMatch (ref ptr, pc + skip))
154                                                                         goto Pass;
155                                                         }
156                                                         break;
157
158                                                 default:
159                                                         // FIXME
160                                                         break;
161                                                 }
162                                         }
163                                         else if (qs != null ||
164                                                 (anch_op == OpCode.String && skip == 6 + program[pc + 4])) {    // substring anchor
165                                                 // Anchor
166                                                 //      String
167                                                 //      True
168                                  
169                                                 bool reverse = ((OpFlags)program[pc + 3] & OpFlags.RightToLeft) != 0;
170
171                                                 if (qs == null) {
172                                                         bool ignore = ((OpFlags)program[pc + 3] & OpFlags.IgnoreCase) != 0;
173                                                         string substring = GetString (pc + 3);
174                                                         qs = new QuickSearch (substring, ignore, reverse);
175                                                 }
176                                                 while ((anch_reverse && anch_ptr >= anch_begin) 
177                                                        || (!anch_reverse && anch_ptr <= anch_end)) {
178
179                                                         if (reverse)    
180                                                         {
181                                                                 anch_ptr = qs.Search (text, anch_ptr, anch_begin);
182                                                                 if (anch_ptr != -1)
183                                                                         anch_ptr += qs.Length ;
184                                                                 
185                                                         }
186                                                         else
187                                                                 anch_ptr = qs.Search (text, anch_ptr, anch_end);
188                                                         if (anch_ptr < 0)
189                                                                 break;
190
191                                                         ptr = reverse ? anch_ptr + anch_offset : anch_ptr - anch_offset;
192                                                         if (TryMatch (ref ptr, pc + skip))
193                                                                 goto Pass;
194
195                                                         if (reverse)
196                                                                 anch_ptr -= 2;
197                                                         else 
198                                                                 ++ anch_ptr;
199                                                 }
200                                         }
201                                         else if (anch_op == OpCode.True) {                                      // no anchor
202                                                 // Anchor
203                                                 //      True
204
205                                         
206                                                 while ((anch_reverse && anch_ptr >= anch_begin) 
207                                                        || (!anch_reverse && anch_ptr <= anch_end)) {
208
209                                                         ptr = anch_ptr;
210                                                         if (TryMatch (ref ptr, pc + skip))
211                                                                 goto Pass;
212                                                         if (anch_reverse)
213                                                                 -- anch_ptr;
214                                                         else 
215                                                                 ++ anch_ptr;
216                                                 }
217                                         }
218                                         else {                                                                  // general case
219                                                 // Anchor
220                                                 //      <expr>
221                                                 //      True
222
223                                                 while ((anch_reverse && anch_ptr >= anch_begin) 
224                                                        || (!anch_reverse && anch_ptr <= anch_end)) {
225
226                                                         ptr = anch_ptr;
227                                                         if (Eval (Mode.Match, ref ptr, pc + 3)) {
228                                                                 // anchor expression passed: try real expression at the correct offset
229
230                                                                 ptr = anch_reverse ? anch_ptr + anch_offset : anch_ptr - anch_offset;
231                                                                 if (TryMatch (ref ptr, pc + skip))
232                                                                         goto Pass;
233                                                         }
234
235                                                     if (anch_reverse)
236                                                                 -- anch_ptr;
237                                                         else 
238                                                                 ++ anch_ptr;
239                                                 }
240                                         }
241
242                                         goto Fail;
243                                 }
244                                 
245                                 case OpCode.False: {
246                                         goto Fail;
247                                 }
248
249                                 case OpCode.True: {
250                                         goto Pass;
251                                 }
252
253                                 case OpCode.Position: {
254                                         if (!IsPosition ((Position)program[pc + 1], ptr))
255                                                 goto Fail;
256                                         pc += 2;
257                                         break;
258                                 }
259
260                                 case OpCode.String: {
261                                         bool reverse = (flags & OpFlags.RightToLeft) != 0;
262                                         bool ignore = (flags & OpFlags.IgnoreCase) != 0;
263                                         int len = program[pc + 1];
264
265                                         if (reverse) {
266                                                 ptr -= len;
267                                                 if (ptr < 0)
268                                                         goto Fail;
269                                         }
270                                         else 
271                                         if (ptr + len > text_end)
272                                                 goto Fail;
273
274                                         pc += 2;
275                                         for (int i = 0; i < len; ++ i) {
276                                                 char c = text[ptr + i];
277                                                 if (ignore)
278                                                         c = Char.ToLower (c);
279
280                                                 if (c != (char)program[pc ++])
281                                                         goto Fail;
282                                         }
283
284                                         if (!reverse)
285                                                 ptr += len;
286                                         break;
287                                 }
288
289                                 case OpCode.Reference: {
290                                         bool reverse = (flags & OpFlags.RightToLeft) != 0;
291                                         bool ignore = (flags & OpFlags.IgnoreCase) != 0;
292                                         int m = GetLastDefined (program [pc + 1]);
293                                         if (m < 0)
294                                                 goto Fail;
295
296                                         int str = marks [m].Index;
297                                         int len = marks [m].Length;
298
299                                         if (reverse) {
300                                                 ptr -= len;
301                                                 if (ptr < 0)
302                                                         goto Fail;
303                                         }
304                                         else if (ptr + len > text_end)
305                                                 goto Fail;
306
307                                         pc += 2;
308                                         if (ignore) {
309                                                 for (int i = 0; i < len; ++ i) {
310                                                         if (Char.ToLower (text[ptr + i]) != Char.ToLower (text[str + i]))
311                                                                 goto Fail;
312                                                 }
313                                         } else {
314                                                 for (int i = 0; i < len; ++ i) {
315                                                         if (text[ptr + i] != text[str + i])
316                                                                 goto Fail;
317                                                 }
318                                         }
319
320                                         if (!reverse)
321                                                 ptr += len;
322                                         break;
323                                 }
324
325                                 case OpCode.Character: case OpCode.Category: case OpCode.NotCategory:
326                                 case OpCode.Range: case OpCode.Set: {
327                                         if (!EvalChar (mode, ref ptr, ref pc, false))
328                                                 goto Fail;
329                                         break;
330                                 }
331
332                                 case OpCode.In: {
333                                         int target = pc + program[pc + 1];
334                                         pc += 2;
335                                         if (!EvalChar (mode, ref ptr, ref pc, true))
336                                                 goto Fail;
337
338                                         pc = target;
339                                         break;
340                                 }
341
342                                 case OpCode.Open: {
343                                         Open (program[pc + 1], ptr);
344                                         pc += 2;
345                                         break;
346                                 }
347
348                                 case OpCode.Close: {
349                                         Close (program[pc + 1], ptr);
350                                         pc += 2;
351                                         break;
352                                 }
353
354                                 case OpCode.BalanceStart: {
355
356                                         int start = ptr; //point before the balancing group
357                                         
358                                         if (!Eval (Mode.Match, ref ptr, pc + 5))
359                                                 goto Fail;
360                                         
361                                         
362                                         
363                                         if(!Balance (program[pc + 1], program[pc + 2], (program[pc + 3] == 1 ? true : false) , start)) {
364                                                 goto Fail;
365                                         }
366
367                                         
368                                         pc += program[pc + 4];
369                                         break;
370                                 }
371
372                                 case OpCode.Balance: {
373                                         goto Pass;
374                                 }
375
376                                 case OpCode.IfDefined: {
377                                         int m = GetLastDefined (program [pc + 2]);
378                                         if (m < 0)
379                                                 pc += program[pc + 1];
380                                         else
381                                                 pc += 3;
382                                         break;
383                                 }
384
385                                 case OpCode.Sub: {
386                                         if (!Eval (Mode.Match, ref ptr, pc + 2))
387                                                 goto Fail;
388
389                                         pc += program[pc + 1];
390                                         break;
391                                 }
392
393                                 case OpCode.Test: {
394                                         int cp = Checkpoint ();
395                                         int test_ptr = ptr;
396                                         if (Eval (Mode.Match, ref test_ptr, pc + 3))
397                                                 pc += program[pc + 1];
398                                         else {
399                                                 Backtrack (cp);
400                                                 pc += program[pc + 2];
401                                         }
402                                         break;
403                                 }
404
405                                 case OpCode.Branch: {
406                                         OpCode branch_op;
407                                         do {
408                                                 int cp = Checkpoint ();
409                                                 if (Eval (Mode.Match, ref ptr, pc + 2))
410                                                         goto Pass;
411                                                 
412                                                 Backtrack (cp);
413                                                 
414                                                 pc += program[pc + 1];
415                                                 branch_op = (OpCode)(program[pc] & 0xff);
416                                         } while (branch_op != OpCode.False);
417
418                                         goto Fail;
419                                 }
420
421                                 case OpCode.Jump: {
422                                         pc += program[pc + 1];
423                                         break;
424                                 }
425
426                                 case OpCode.Repeat: {
427                                         this.repeat = new RepeatContext (
428                                                 this.repeat,                    // previous context
429                                                 ReadProgramCount (pc + 2),              // minimum
430                                                 ReadProgramCount (pc + 4),              // maximum
431                                                 (flags & OpFlags.Lazy) != 0,    // lazy
432                                                 pc + 6                          // subexpression
433                                         );
434
435                                         if (Eval (Mode.Match, ref ptr, pc + program[pc + 1]))
436                                                 goto Pass;
437                                         else {
438                                                 this.repeat = this.repeat.Previous;
439                                                 goto Fail;
440                                         }
441                                 }
442
443                                 case OpCode.Until: {
444                                         RepeatContext current = this.repeat;
445
446                                         //
447                                         // Can we avoid recursion?
448                                         //
449                                         // Backtracking can be forced in nested quantifiers from the tail of this quantifier.
450                                         // Thus, we cannot, in general, use a simple loop on repeat.Expression to handle
451                                         // quantifiers.
452                                         //
453                                         // If 'deep' was unmolested, that implies that there was no nested quantifiers.
454                                         // Thus, we can safely avoid recursion.
455                                         //
456                                         if (deep == current)
457                                                 goto Pass;
458
459                                         int start = current.Start;
460                                         int start_count = current.Count;
461
462                                         while (!current.IsMinimum) {
463                                                 ++ current.Count;
464                                                 current.Start = ptr;
465                                                 deep = current;
466                                                 if (!Eval (Mode.Match, ref ptr, current.Expression)) {
467                                                         current.Start = start;
468                                                         current.Count = start_count;
469                                                         goto Fail;
470                                                 }
471                                                 if (deep != current)    // recursive mode
472                                                         goto Pass;
473                                         }
474
475                                         if (ptr == current.Start) {
476                                                 // degenerate match ... match tail or fail
477                                                 this.repeat = current.Previous;
478                                                 deep = null;
479                                                 if (Eval (Mode.Match, ref ptr, pc + 1))
480                                                         goto Pass;
481                                         
482                                                 this.repeat = current;
483                                                 goto Fail;
484                                         }
485
486                                         if (current.IsLazy) {
487                                                 for (;;) {
488                                                         // match tail first ...
489                                                         this.repeat = current.Previous;
490                                                         deep = null;
491                                                         int cp = Checkpoint ();
492                                                         if (Eval (Mode.Match, ref ptr, pc + 1))
493                                                                 goto Pass;
494
495                                                         Backtrack (cp);
496
497                                                         // ... then match more
498                                                         this.repeat = current;
499                                                         if (current.IsMaximum)
500                                                                 goto Fail;
501                                                         ++ current.Count;
502                                                         current.Start = ptr;
503                                                         deep = current;
504                                                         if (!Eval (Mode.Match, ref ptr, current.Expression)) {
505                                                                 current.Start = start;
506                                                                 current.Count = start_count;
507                                                                 goto Fail;
508                                                         }
509                                                         if (deep != current)    // recursive mode
510                                                                 goto Pass;
511                                                         // Degenerate match: ptr has not moved since the last (failed) tail match.
512                                                         // So, next and subsequent tail matches will fail.
513                                                         if (ptr == current.Start)
514                                                                 goto Fail;
515                                                 }
516                                         } else {
517                                                 int stack_size = stack.Count;
518
519                                                 // match greedily as much as possible
520                                                 while (!current.IsMaximum) {
521                                                         int cp = Checkpoint ();
522                                                         int old_ptr = ptr;
523                                                         int old_start = current.Start;
524
525                                                         ++ current.Count;
526                                                         current.Start = ptr;
527                                                         deep = current;
528                                                         if (!Eval (Mode.Match, ref ptr, current.Expression)) {
529                                                                 -- current.Count;
530                                                                 current.Start = old_start;
531                                                                 Backtrack (cp);
532                                                                 break;
533                                                         }
534                                                         if (deep != current) {
535                                                                 // recursive mode: no more backtracking, truncate the stack
536                                                                 stack.Count = stack_size;
537                                                                 goto Pass;
538                                                         }
539                                                         stack.Push (cp);
540                                                         stack.Push (old_ptr);
541
542                                                         // Degenerate match: no point going on
543                                                         if (ptr == current.Start)
544                                                                 break;
545                                                 }
546
547                                                 // then, match the tail, backtracking as necessary.
548                                                 this.repeat = current.Previous;
549                                                 for (;;) {
550                                                         deep = null;
551                                                         if (Eval (Mode.Match, ref ptr, pc + 1)) {
552                                                                 stack.Count = stack_size;
553                                                                 goto Pass;
554                                                         }
555                                                         if (stack.Count == stack_size) {
556                                                                 this.repeat = current;
557                                                                 goto Fail;
558                                                         }
559
560                                                         --current.Count;
561                                                         ptr = stack.Pop ();
562                                                         Backtrack (stack.Pop ());
563                                                 }
564                                         }
565                                 }
566
567                                 case OpCode.FastRepeat: {
568                                         this.fast = new RepeatContext (
569                                                 fast,
570                                                 ReadProgramCount (pc + 2),              // minimum
571                                                 ReadProgramCount (pc + 4),              // maximum
572                                                 (flags & OpFlags.Lazy) != 0,    // lazy
573                                                 pc + 6                          // subexpression
574                                         );
575
576                                         fast.Start = ptr;
577
578                                         int cp = Checkpoint ();
579
580                                         pc += program[pc + 1];          // tail expression
581                                         ushort tail_word = program[pc];
582
583                                         int c1 = -1;            // first character of tail operator
584                                         int c2 = -1;            // ... and the same character, in upper case if ignoring case
585                                         int coff = 0;           // 0 or -1 depending on direction
586
587                                         OpCode tail_op = (OpCode)(tail_word & 0xff);
588                                         if (tail_op == OpCode.Character || tail_op == OpCode.String) {
589                                                 OpFlags tail_flags = (OpFlags)(tail_word & 0xff00);
590
591                                                 if ((tail_flags & OpFlags.Negate) != 0)
592                                                         goto skip;
593
594                                                 if (tail_op == OpCode.String)
595                                                 {
596                                                         int offset = 0;
597                                                 
598                                                         if ((tail_flags & OpFlags.RightToLeft) != 0)
599                                                         {
600                                                                 offset = program[pc + 1] - 1 ;
601                                                         }
602                                                           
603                                                         c1 = program[pc + 2 + offset];                          // first char of string
604                                                 }
605                                                 else
606                                                         c1 = program[pc + 1];                           // character
607                                                 
608                                                 if ((tail_flags & OpFlags.IgnoreCase) != 0)
609                                                         c2 = Char.ToUpper ((char)c1);                   // ignore case
610                                                 else
611                                                         c2 = c1;
612
613                                                 if ((tail_flags & OpFlags.RightToLeft) != 0)
614                                                         coff = -1;                                      // reverse
615                                                 else
616                                                         coff = 0;
617                                         }
618
619                                 skip:
620                                         if (fast.IsLazy) {
621                                                 if (!fast.IsMinimum && !Eval (Mode.Count, ref ptr, fast.Expression)) {
622                                                         //Console.WriteLine ("lazy fast: failed mininum.");
623                                                         fast = fast.Previous;
624                                                         goto Fail;
625                                                 }
626                                                 
627                                                 while (true) {
628                                                         int p = ptr + coff;
629                                                         if (c1 < 0 || (p >= 0 && p < text_end && (c1 == text[p] || c2 == text[p]))) {
630                                                                 deep = null;
631                                                                 if (Eval (Mode.Match, ref ptr, pc))
632                                                                         break;
633                                                         }
634
635                                                         if (fast.IsMaximum) {
636                                                                 //Console.WriteLine ("lazy fast: failed with maximum.");
637                                                                 fast = fast.Previous;
638                                                                 goto Fail;
639                                                         }
640
641                                                         Backtrack (cp);
642                                                         if (!Eval (Mode.Count, ref ptr, fast.Expression)) {
643                                                                 //Console.WriteLine ("lazy fast: no more.");
644                                                                 fast = fast.Previous;
645                                                                 goto Fail;
646                                                         }
647                                                 }
648                                                 fast = fast.Previous;
649                                                 goto Pass;
650                                         }
651                                         else {
652                                                 if (!Eval (Mode.Count, ref ptr, fast.Expression)) {
653                                                         fast = fast.Previous;
654                                                         goto Fail;
655                                                 }
656                                         
657                                                 int width;
658                                                 if (fast.Count > 0)
659                                                         width = (ptr - fast.Start) / fast.Count;
660                                                 else
661                                                         width = 0;
662
663                                                 while (true) {
664                                                         int p = ptr + coff;
665                                                         if (c1 < 0 || (p >= 0 && p < text_end && (c1 == text[p] || c2 == text[p]))) {
666                                                                 deep = null;
667                                                                 if (Eval (Mode.Match, ref ptr, pc))
668                                                                         break;
669                                                         }
670
671                                                         -- fast.Count;
672                                                         if (!fast.IsMinimum) {
673                                                                 fast = fast.Previous;
674                                                                 goto Fail;
675                                                         }
676
677                                                         ptr -= width;
678                                                         Backtrack (cp);
679                                                 }
680                                                 fast = fast.Previous;
681                                                 goto Pass;
682                                         }
683                                 }
684
685                                 case OpCode.Info: {
686                                         Debug.Assert (false, "Regex", "Info block found in pattern");
687                                         goto Fail;
688                                 }
689                                 }
690                         }
691                 Pass:
692                         ref_ptr = ptr;
693
694                         switch (mode) {
695                         case Mode.Match:
696                                 return true;
697
698                         case Mode.Count: {
699                                 ++ fast.Count;
700                                 if (fast.IsMaximum || (fast.IsLazy && fast.IsMinimum))
701                                         return true;
702
703                                 pc = fast.Expression;
704                                 goto Begin;
705                         }
706                         }
707
708                 Fail:
709                         switch (mode) {
710                         case Mode.Match:
711                                 return false;
712
713                         case Mode.Count: {
714                                 if (!fast.IsLazy && fast.IsMinimum)
715                                         return true;
716
717                                 ref_ptr = fast.Start;
718                                 return false;
719                         }
720                         }
721
722                         return false;
723                 }
724
725                 private bool EvalChar (Mode mode, ref int ptr, ref int pc, bool multi) {
726                         bool consumed = false;
727                         char c = '\0';
728                         bool negate;
729                         bool ignore;
730                 
731                         do {
732                                 ushort word = program[pc];
733                                 OpCode op = (OpCode)(word & 0x00ff);
734                                 OpFlags flags = (OpFlags)(word & 0xff00);
735
736                                 ++ pc;
737
738                                 ignore = (flags & OpFlags.IgnoreCase) != 0;
739                                 
740                                 // consume character: the direction of an In construct is
741                                 // determined by the direction of its first op
742
743                                 if (!consumed) {
744                                         if ((flags & OpFlags.RightToLeft) != 0) {
745                                                 if (ptr <= 0)
746                                                         return false;
747
748                                                 c = text[-- ptr];
749                                         }
750                                         else {
751                                                 if (ptr >= text_end)
752                                                         return false;
753
754                                                 c = text[ptr ++];
755                                         }
756
757                                         if (ignore)
758                                                 c = Char.ToLower (c);
759
760                                         consumed = true;
761                                 }
762
763                                 // negate flag
764
765                                 negate = (flags & OpFlags.Negate) != 0;
766
767                                 // execute op
768                                 
769                                 switch (op) {
770                                 case OpCode.True:
771                                         return true;
772
773                                 case OpCode.False:
774                                         return false;
775                                 
776                                 case OpCode.Character: {
777                                         if (c == (char)program[pc ++])
778                                                 return !negate;
779                                         break;
780                                 }
781
782                                 case OpCode.Category: {
783                                         if (CategoryUtils.IsCategory ((Category)program[pc ++], c))
784                                                 return !negate;
785                                         break;
786                                 }
787
788                                 case OpCode.NotCategory: {
789                                         if (!CategoryUtils.IsCategory ((Category)program[pc ++], c))
790                                                 return !negate;
791                                         break;
792                                 }
793                                 
794                                 case OpCode.Range: {
795                                         int lo = (char)program[pc ++];
796                                         int hi = (char)program[pc ++];
797                                         if (lo <= c && c <= hi)
798                                                 return !negate;
799                                         break;
800                                 }
801
802                                 case OpCode.Set: {
803                                         int lo = (char)program[pc ++];
804                                         int len = (char)program[pc ++];
805                                         int bits = pc;
806                                         pc += len;
807
808                                         int i = (int)c - lo;
809                                         if (i < 0 || i >= len << 4)
810                                                 break;
811                                                 
812
813                                         if ((program[bits + (i >> 4)] & (1 << (i & 0xf))) != 0)
814                                                 return !negate;
815                                         break;
816                                 }
817                                 }
818                         } while (multi);
819
820                         return negate;
821                 }
822
823                 private bool TryMatch (ref int ref_ptr, int pc) {
824                         Reset ();
825                         
826                         int ptr = ref_ptr;
827                         marks [groups [0]].Start = ptr;
828                         if (Eval (Mode.Match, ref ptr, pc)) {
829                                 marks [groups [0]].End = ptr;
830                                 ref_ptr = ptr;
831                                 return true;
832                         }
833
834                         return false;
835                 }
836                 
837                 private bool IsPosition (Position pos, int ptr) {
838                         switch (pos) {
839                         case Position.Start: case Position.StartOfString:
840                                 return ptr == 0;
841
842                         case Position.StartOfLine:
843                                 return ptr == 0 || text[ptr - 1] == '\n';
844                                 
845                         case Position.StartOfScan:
846                                 return ptr == scan_ptr;
847                         
848                         case Position.End:
849                                 return ptr == text_end ||
850                                         (ptr == text_end - 1 && text[ptr] == '\n');
851
852                         case Position.EndOfLine:
853                                 return ptr == text_end || text[ptr] == '\n';
854                                 
855                         case Position.EndOfString:
856                                 return ptr == text_end;
857                                 
858                         case Position.Boundary:
859                                 if (text_end == 0)
860                                         return false;
861
862                                 if (ptr == 0)
863                                         return IsWordChar (text[ptr]);
864                                 else if (ptr == text_end)
865                                         return IsWordChar (text[ptr - 1]);
866                                 else
867                                         return IsWordChar (text[ptr]) != IsWordChar (text[ptr - 1]);
868
869                         case Position.NonBoundary:
870                                 if (text_end == 0)
871                                         return false;
872
873                                 if (ptr == 0)
874                                         return !IsWordChar (text[ptr]);
875                                 else if (ptr == text_end)
876                                         return !IsWordChar (text[ptr - 1]);
877                                 else
878                                         return IsWordChar (text[ptr]) == IsWordChar (text[ptr - 1]);
879                         
880                         default:
881                                 return false;
882                         }
883                 }
884
885                 private bool IsWordChar (char c) {
886                         return CategoryUtils.IsCategory (Category.Word, c);
887                 }
888
889                 private string GetString (int pc) {
890                         int len = program[pc + 1];
891                         int str = pc + 2;
892
893                         char[] cs = new char[len];
894                         for (int i = 0; i < len; ++ i)
895                                 cs[i] = (char)program[str ++];
896
897                         return new string (cs);
898                 }
899
900                 // capture management
901
902                 private void Open (int gid, int ptr) {
903                         int m = groups [gid];
904                         if (m < mark_start || marks [m].IsDefined) {
905                                 m = CreateMark (m);
906                                 groups [gid] = m;
907                         }
908
909                         marks [m].Start = ptr;
910                 }
911
912                 private void Close (int gid, int ptr) {
913                         marks [groups [gid]].End = ptr;
914                 }
915
916                 private bool Balance (int gid, int balance_gid, bool capture, int ptr) {
917                         int b = groups [balance_gid];
918
919                         if(b == -1 || marks[b].Index < 0) {
920                                 //Group not previously matched
921                                 return false;
922                         }
923                         Debug.Assert (marks [b].IsDefined, "Regex", "Balancng group not closed");
924                         if (gid > 0 && capture){ 
925                                 Open (gid, marks [b].Index + marks [b].Length);
926                                 Close (gid, ptr);
927                         }
928
929                         groups [balance_gid] = marks[b].Previous;
930
931                         return true;
932                 }
933
934                 private int Checkpoint () {
935                         mark_start = mark_end;
936                         return mark_start;
937                 }
938
939                 private void Backtrack (int cp) {
940                         Debug.Assert (cp > mark_start, "Regex", "Attempt to backtrack forwards");
941                         for (int i = 0; i < groups.Length; ++ i) {
942                                 int m = groups [i];
943                                 while (cp <= m) {
944                                         marks [m].Start = -1;
945                                         m = marks [m].Previous;
946                                 }
947
948                                 groups [i] = m;
949                         }
950                         mark_start = cp;
951                 }
952
953                 private void ResetGroups () {
954                         int n = groups.Length;
955                         if (marks == null)
956                                 marks = new Mark [n * 10];
957
958                         for (int i = 0; i < n; ++ i) {
959                                 groups [i] = i;
960
961                                 marks [i].Start = -1;
962                                 marks [i].End = -1;
963                                 marks [i].Previous = -1;
964                         }
965                         
966                         mark_start = 0;
967                         mark_end = n;
968                 }
969
970                 private int GetLastDefined (int gid) {
971                         int m = groups [gid];
972                         while (m >= 0 && !marks [m].IsDefined)
973                                 m = marks [m].Previous;
974
975                         return m;
976                 }
977
978                 private int CreateMark (int previous) {
979                         if (mark_end == marks.Length) {
980                                 Mark [] dest = new Mark [marks.Length * 2];
981                                 marks.CopyTo (dest, 0);
982                                 marks = dest;
983                         }
984
985                         int m = mark_end ++;
986                         marks [m].Start = marks [m].End = -1;
987                         marks [m].Previous = previous;
988
989                         return m;
990                 }
991
992                 private void GetGroupInfo (int gid, out int first_mark_index, out int n_caps)
993                 {
994                         first_mark_index = -1;
995                         n_caps = 0;
996                         for (int m = groups [gid]; m >= 0; m = marks [m].Previous) {
997                                 if (!marks [m].IsDefined)
998                                         continue;
999                                 if (first_mark_index < 0)
1000                                         first_mark_index = m;
1001                                 ++n_caps;
1002                         }
1003                 }
1004
1005                 private void PopulateGroup (Group g, int first_mark_index, int n_caps)
1006                 {
1007                         int i = 1;
1008                         for (int m = marks [first_mark_index].Previous; m >= 0; m = marks [m].Previous) {
1009                                 if (!marks [m].IsDefined)
1010                                         continue;
1011                                 Capture cap = new Capture (text, marks [m].Index, marks [m].Length);
1012                                 g.Captures.SetValue (cap, n_caps - 1 - i);
1013                                 ++i;
1014                         }
1015                 }
1016
1017                 private Match GenerateMatch (Regex regex)
1018                 {
1019                         int n_caps, first_mark_index;
1020                         Group g;
1021                         GetGroupInfo (0, out first_mark_index, out n_caps);
1022
1023                         // Avoid fully populating the Match instance if not needed
1024                         if (!needs_groups_or_captures)
1025                                 return new Match (regex, this, text, text_end, 0, marks [first_mark_index].Index, marks [first_mark_index].Length);
1026
1027                         Match retval = new Match (regex, this, text, text_end, groups.Length, 
1028                                                   marks [first_mark_index].Index, marks [first_mark_index].Length, n_caps);
1029                         PopulateGroup (retval, first_mark_index, n_caps);
1030
1031                         for (int gid = 1; gid < groups.Length; ++ gid) {
1032                                 GetGroupInfo (gid, out first_mark_index, out n_caps);
1033                                 if (first_mark_index < 0) {
1034                                         g = Group.Fail;
1035                                 } else {
1036                                         g = new Group (text, marks [first_mark_index].Index, marks [first_mark_index].Length, n_caps);
1037                                         PopulateGroup (g, first_mark_index, n_caps);
1038                                 }
1039                                 retval.Groups.SetValue (g, gid);
1040                         }
1041                         return retval;
1042                 }
1043
1044                 // interpreter attributes
1045
1046                 private ushort[] program;               // regex program
1047                 private int program_start;              // first instruction after info block
1048                 private string text;                    // input text
1049                 private int text_end;                   // end of input text (last character + 1)
1050                 private int group_count;                // number of capturing groups
1051                 private int match_min;//, match_max;    // match width information
1052                 private QuickSearch qs;                 // fast substring matcher
1053
1054                 // match state
1055                 
1056                 private int scan_ptr;                   // start of scan
1057
1058                 private RepeatContext repeat;           // current repeat context
1059                 private RepeatContext fast;             // fast repeat context
1060
1061                 // Repeat/Until handling
1062                 private IntStack stack = new IntStack (); // utility stack
1063                 private RepeatContext deep;             // points to the most-nested repeat context
1064
1065                 private Mark[] marks = null;            // mark stack
1066                 private int mark_start;                 // start of current checkpoint
1067                 private int mark_end;                   // end of checkpoint/next free mark
1068
1069                 private int[] groups;                   // current group definitions
1070
1071                 // private classes
1072
1073                 private struct IntStack {
1074                         int [] values;
1075                         int count;
1076                         public int Pop ()
1077                         {
1078                                 return values [--count];
1079                         }
1080                         public void Push (int value)
1081                         {
1082                                 if (values == null) {
1083                                         values = new int [8];
1084                                 } else if (count == values.Length) {
1085                                         int new_size = values.Length;
1086                                         new_size += new_size >> 1;
1087                                         int [] new_values = new int [new_size];
1088                                         for (int i = 0; i < count; ++i)
1089                                                 new_values [i] = values [i];
1090                                         values = new_values;
1091                                 }
1092                                 values [count++] = value;
1093                         }
1094                         public int Top {
1095                                 get { return values [count - 1]; }
1096                         }
1097                         public int Count {
1098                                 get { return count; }
1099                                 set {
1100                                         if (value > count)
1101                                                 throw new SystemException ("can only truncate the stack");
1102                                         count = value;
1103                                 }
1104                         }
1105                 }
1106
1107                 private class RepeatContext {
1108                         public RepeatContext (RepeatContext previous, int min, int max, bool lazy, int expr_pc) {
1109                                 this.previous = previous;
1110                                 this.min = min;
1111                                 this.max = max;
1112                                 this.lazy = lazy;
1113                                 this.expr_pc = expr_pc;
1114                                 
1115                                 this.start = -1;
1116                                 this.count = 0;
1117                         }
1118
1119                         public int Count {
1120                                 get { return count; }
1121                                 set { count = value; }
1122                         }
1123
1124                         public int Start {
1125                                 get { return start; }
1126                                 set { start = value; }
1127                         }
1128
1129                         public bool IsMinimum {
1130                                 get { return min <= count; }
1131                         }
1132
1133                         public bool IsMaximum {
1134                                 get { return max <= count; }
1135                         }
1136
1137                         public bool IsLazy {
1138                                 get { return lazy; }
1139                         }
1140
1141                         public int Expression {
1142                                 get { return expr_pc; }
1143                         }
1144
1145                         public RepeatContext Previous {
1146                                 get { return previous; }
1147                         }
1148                 
1149                         private int start;
1150                         private int min, max;
1151                         private bool lazy;
1152                         private int expr_pc;
1153                         private RepeatContext previous;
1154
1155                         private int count;
1156                 }
1157
1158                 private enum Mode {
1159                         Search,
1160                         Match,
1161                         Count
1162                 }
1163         }
1164 }