merging the Mainsoft branch to the trunk
[mono.git] / mcs / class / System / System.Text.RegularExpressions / interpreter.cs
1 //
2 // assembly:    System
3 // namespace:   System.Text.RegularExpressions
4 // file:        interpreter.cs
5 //
6 // author:      Dan Lewis (dlewis@gmx.co.uk)
7 //              (c) 2002
8
9 //
10 // Permission is hereby granted, free of charge, to any person obtaining
11 // a copy of this software and associated documentation files (the
12 // "Software"), to deal in the Software without restriction, including
13 // without limitation the rights to use, copy, modify, merge, publish,
14 // distribute, sublicense, and/or sell copies of the Software, and to
15 // permit persons to whom the Software is furnished to do so, subject to
16 // the following conditions:
17 // 
18 // The above copyright notice and this permission notice shall be
19 // included in all copies or substantial portions of the Software.
20 // 
21 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
22 // EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
23 // MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
24 // NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
25 // LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
26 // OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
27 // WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
28 //
29
30 using System;
31 using System.Collections;
32 using System.Diagnostics;
33 using System.Globalization;
34
35 namespace System.Text.RegularExpressions {
36
37         class Interpreter : IMachine {
38                 public Interpreter (ushort[] program) {
39                         this.program = program;
40                         this.qs = null;
41
42                         // process info block
43
44                         Debug.Assert ((OpCode)program[0] == OpCode.Info, "Regex", "Cant' find info block");
45
46                         this.group_count = program[1] + 1;
47                         this.match_min = program[2];
48                         //this.match_max = program[3];
49
50                         // setup
51
52                         this.program_start = 4;
53                         this.groups = new int [group_count];
54                 }
55
56                 // IMachine implementation
57
58                 public Match Scan (Regex regex, string text, int start, int end) {
59                         this.text = text;
60                         this.text_end = end;
61                         this.scan_ptr = start;
62
63                         if (Eval (Mode.Match, ref scan_ptr, program_start))
64                                 return GenerateMatch (regex);
65
66                         return Match.Empty;
67                 }
68
69                 // private methods
70
71                 private void Reset () {
72                         ResetGroups ();
73                         fast = repeat = null;
74                 }
75
76                 private bool Eval (Mode mode, ref int ref_ptr, int pc) {
77                         int ptr = ref_ptr;
78                 Begin:
79                         for (;;) {
80                                 ushort word = program[pc];
81                                 OpCode op = (OpCode)(word & 0x00ff);
82                                 OpFlags flags = (OpFlags)(word & 0xff00);
83
84                                 switch (op) {
85                                 case OpCode.Anchor: {
86                                         int skip = program[pc + 1];
87                                         
88                                         int anch_offset = program[pc + 2];
89                                         bool anch_reverse = (flags & OpFlags.RightToLeft) != 0; 
90                                         int anch_ptr = anch_reverse ?  ptr - anch_offset  : ptr + anch_offset;
91                                         int anch_end = text_end - match_min + anch_offset;      // maximum anchor position  
92                                         
93                                         
94                                         int anch_begin =  0;
95
96
97                                         // the general case for an anchoring expression is at the bottom, however we
98                                         // do some checks for the common cases before to save processing time. the current
99                                         // optimizer only outputs three types of anchoring expressions: fixed position,
100                                         // fixed substring, and no anchor.
101
102                                         OpCode anch_op = (OpCode)(program[pc + 3] & 0x00ff);                                    
103                                         if (anch_op == OpCode.Position && skip == 6) {                          // position anchor
104                                                 // Anchor
105                                                 //      Position
106                                                 //      True
107
108                                                 switch ((Position)program[pc + 4]) {
109                                                 case Position.StartOfString:                                                    
110                                                         if (anch_reverse || anch_offset == 0) {
111                                                                 ptr = anch_offset;
112                                                                 if (TryMatch (ref ptr, pc + skip))
113                                                                         goto Pass;
114                                                         }
115                                                         break;
116                                                 
117                                                 case Position.StartOfLine:
118                                                                                                         
119                                                          if (anch_ptr == 0) {
120                                                                 ptr = 0;
121                                                                 if (TryMatch (ref ptr, pc + skip))
122                                                                         goto Pass;
123                                                                 
124                                                                 ++ anch_ptr;
125                                                         }
126
127                                                         while ((anch_reverse && anch_ptr >= 0) || (!anch_reverse && anch_ptr <= anch_end)) {  
128                                                                 if (anch_ptr == 0 || text[anch_ptr - 1] == '\n') {
129                                                                         if (anch_reverse)
130                                                                                 ptr = anch_ptr == anch_end ? anch_ptr : anch_ptr + anch_offset;
131                                                                         else
132                                                                                 ptr = anch_ptr == 0 ? anch_ptr : anch_ptr - anch_offset;
133                                                                         if (TryMatch (ref ptr, pc + skip))
134                                                                                 goto Pass;
135                                                                 }
136                                                         
137                                                                 if (anch_reverse)
138                                                                         -- anch_ptr;
139                                                                 else
140                                                                         ++ anch_ptr;
141                                                         }
142                                                         break;
143                                                 
144                                                 case Position.StartOfScan:
145                                                         if (anch_ptr == scan_ptr) {                                                     
146                                                                 ptr = anch_reverse ? scan_ptr + anch_offset : scan_ptr - anch_offset;
147                                                                 if (TryMatch (ref ptr, pc + skip))
148                                                                         goto Pass;
149                                                         }
150                                                         break;
151
152                                                 default:
153                                                         // FIXME
154                                                         break;
155                                                 }
156                                         }
157                                         else if (qs != null ||
158                                                 (anch_op == OpCode.String && skip == 6 + program[pc + 4])) {    // substring anchor
159                                                 // Anchor
160                                                 //      String
161                                                 //      True
162                                  
163                                                 bool reverse = ((OpFlags)program[pc + 3] & OpFlags.RightToLeft) != 0;
164                                                 string substring = GetString (pc + 3);
165
166                                                 if (qs == null) {
167                                                         bool ignore = ((OpFlags)program[pc + 3] & OpFlags.IgnoreCase) != 0;
168
169                                                         qs = new QuickSearch (substring, ignore, reverse);
170                                                 }
171                                                 while ((anch_reverse && anch_ptr >= anch_begin) 
172                                                        || (!anch_reverse && anch_ptr <= anch_end)) {
173
174                                                         if (reverse)    
175                                                         {
176                                                                 anch_ptr = qs.Search (text, anch_ptr, anch_begin);
177                                                                 if (anch_ptr != -1)
178                                                                         anch_ptr += substring.Length ;
179                                                                 
180                                                         }
181                                                         else
182                                                                 anch_ptr = qs.Search (text, anch_ptr, anch_end);
183                                                         if (anch_ptr < 0)
184                                                                 break;
185
186                                                         ptr = reverse ? anch_ptr + anch_offset : anch_ptr - anch_offset;
187                                                         if (TryMatch (ref ptr, pc + skip))
188                                                                 goto Pass;
189
190                                                         if (reverse)
191                                                                 anch_ptr -= 2;
192                                                         else 
193                                                                 ++ anch_ptr;
194                                                 }
195                                         }
196                                         else if (anch_op == OpCode.True) {                                      // no anchor
197                                                 // Anchor
198                                                 //      True
199
200                                         
201                                                 while ((anch_reverse && anch_ptr >= anch_begin) 
202                                                        || (!anch_reverse && anch_ptr <= anch_end)) {
203
204                                                         ptr = anch_ptr;
205                                                         if (TryMatch (ref ptr, pc + skip))
206                                                                 goto Pass;
207                                                         if (anch_reverse)
208                                                                 -- anch_ptr;
209                                                         else 
210                                                                 ++ anch_ptr;
211                                                 }
212                                         }
213                                         else {                                                                  // general case
214                                                 // Anchor
215                                                 //      <expr>
216                                                 //      True
217
218                                                 while ((anch_reverse && anch_ptr >= anch_begin) 
219                                                        || (!anch_reverse && anch_ptr <= anch_end)) {
220
221                                                         ptr = anch_ptr;
222                                                         if (Eval (Mode.Match, ref ptr, pc + 3)) {
223                                                                 // anchor expression passed: try real expression at the correct offset
224
225                                                                 ptr = anch_reverse ? anch_ptr + anch_offset : anch_ptr - anch_offset;
226                                                                 if (TryMatch (ref ptr, pc + skip))
227                                                                         goto Pass;
228                                                         }
229
230                                                     if (anch_reverse)
231                                                                 -- anch_ptr;
232                                                         else 
233                                                                 ++ anch_ptr;
234                                                 }
235                                         }
236
237                                         goto Fail;
238                                 }
239                                 
240                                 case OpCode.False: {
241                                         goto Fail;
242                                 }
243
244                                 case OpCode.True: {
245                                         goto Pass;
246                                 }
247
248                                 case OpCode.Position: {
249                                         if (!IsPosition ((Position)program[pc + 1], ptr))
250                                                 goto Fail;
251                                         pc += 2;
252                                         break;
253                                 }
254
255                                 case OpCode.String: {
256                                         bool reverse = (flags & OpFlags.RightToLeft) != 0;
257                                         bool ignore = (flags & OpFlags.IgnoreCase) != 0;
258                                         int len = program[pc + 1];
259
260                                         if (reverse) {
261                                                 ptr -= len;
262                                                 if (ptr < 0)
263                                                         goto Fail;
264                                         }
265                                         else 
266                                         if (ptr + len > text_end)
267                                                 goto Fail;
268
269                                         pc += 2;
270                                         for (int i = 0; i < len; ++ i) {
271                                                 char c = text[ptr + i];
272                                                 if (ignore)
273                                                         c = Char.ToLower (c);
274
275                                                 if (c != (char)program[pc ++])
276                                                         goto Fail;
277                                         }
278
279                                         if (!reverse)
280                                                 ptr += len;
281                                         break;
282                                 }
283
284                                 case OpCode.Reference: {
285                                         bool reverse = (flags & OpFlags.RightToLeft) != 0;
286                                         bool ignore = (flags & OpFlags.IgnoreCase) != 0;
287                                         int m = GetLastDefined (program [pc + 1]);
288                                         if (m < 0)
289                                                 goto Fail;
290
291                                         int str = marks [m].Index;
292                                         int len = marks [m].Length;
293
294                                         if (reverse) {
295                                                 ptr -= len;
296                                                 if (ptr < 0)
297                                                         goto Fail;
298                                         }
299                                         else if (ptr + len > text_end)
300                                                 goto Fail;
301
302                                         pc += 2;
303                                         for (int i = 0; i < len; ++ i) {
304                                                 if (ignore) {
305                                                         if (Char.ToLower (text[ptr + i]) != Char.ToLower (text[str + i]))
306                                                                 goto Fail;
307                                                 }
308                                                 else {
309                                                         if (text[ptr + i] != text[str + i])
310                                                                 goto Fail;
311                                                 }
312                                         }
313
314                                         if (!reverse)
315                                                 ptr += len;
316                                         break;
317                                 }
318
319                                 case OpCode.Character: case OpCode.Category:
320                                 case OpCode.Range: case OpCode.Set: {
321                                         if (!EvalChar (mode, ref ptr, ref pc, false))
322                                                 goto Fail;
323                                         break;
324                                 }
325
326                                 case OpCode.In: {
327                                         int target = pc + program[pc + 1];
328                                         pc += 2;
329                                         if (!EvalChar (mode, ref ptr, ref pc, true))
330                                                 goto Fail;
331
332                                         pc = target;
333                                         break;
334                                 }
335
336                                 case OpCode.Open: {
337                                         Open (program[pc + 1], ptr);
338                                         pc += 2;
339                                         break;
340                                 }
341
342                                 case OpCode.Close: {
343                                         Close (program[pc + 1], ptr);
344                                         pc += 2;
345                                         break;
346                                 }
347
348                                 case OpCode.BalanceStart: {
349
350                                         int start = ptr; //point before the balancing group
351                                         
352                                         if (!Eval (Mode.Match, ref ptr, pc + 5))
353                                                 goto Fail;
354                                         
355                                         
356                                         
357                                         if(!Balance (program[pc + 1], program[pc + 2], (program[pc + 3] == 1 ? true : false) , start)) {
358                                                 goto Fail;
359                                         }
360
361                                         
362                                         pc += program[pc + 4];
363                                         break;
364                                 }
365
366                                 case OpCode.Balance: {
367                                         goto Pass;
368                                 }
369
370                                 case OpCode.IfDefined: {
371                                         int m = GetLastDefined (program [pc + 2]);
372                                         if (m < 0)
373                                                 pc += program[pc + 1];
374                                         else
375                                                 pc += 3;
376                                         break;
377                                 }
378
379                                 case OpCode.Sub: {
380                                         if (!Eval (Mode.Match, ref ptr, pc + 2))
381                                                 goto Fail;
382
383                                         pc += program[pc + 1];
384                                         break;
385                                 }
386
387                                 case OpCode.Test: {
388                                         int cp = Checkpoint ();
389                                         int test_ptr = ptr;
390                                         if (Eval (Mode.Match, ref test_ptr, pc + 3))
391                                                 pc += program[pc + 1];
392                                         else {
393                                                 Backtrack (cp);
394                                                 pc += program[pc + 2];
395                                         }
396                                         break;
397                                 }
398
399                                 case OpCode.Branch: {
400                                         OpCode branch_op;
401                                         do {
402                                                 int cp = Checkpoint ();
403                                                 if (Eval (Mode.Match, ref ptr, pc + 2))
404                                                         goto Pass;
405                                                 
406                                                 Backtrack (cp);
407                                                 
408                                                 pc += program[pc + 1];
409                                                 branch_op = (OpCode)(program[pc] & 0xff);
410                                         } while (branch_op != OpCode.False);
411
412                                         goto Fail;
413                                 }
414
415                                 case OpCode.Jump: {
416                                         pc += program[pc + 1];
417                                         break;
418                                 }
419
420                                 case OpCode.Repeat: {
421                                         this.repeat = new RepeatContext (
422                                                 this.repeat,                    // previous context
423                                                 program[pc + 2],                // minimum
424                                                 program[pc + 3],                // maximum
425                                                 (flags & OpFlags.Lazy) != 0,    // lazy
426                                                 pc + 4                          // subexpression
427                                         );
428
429                                         if (Eval (Mode.Match, ref ptr, pc + program[pc + 1]))
430                                                 goto Pass;
431                                         else {
432                                                 this.repeat = this.repeat.Previous;
433                                                 goto Fail;
434                                         }
435                                 }
436
437                                 case OpCode.Until: {
438                                         RepeatContext current = this.repeat;
439                                         int start = current.Start;
440
441                                         if (!current.IsMinimum) {
442                                                 ++ current.Count;
443                                                 current.Start = ptr;
444                                                 if (Eval (Mode.Match, ref ptr, repeat.Expression))
445                                                         goto Pass;
446
447                                                 current.Start = start;
448                                                 -- current.Count;
449                                                 goto Fail;
450                                         }
451
452                                         if (ptr == current.Start) {
453                                                 // degenerate match ... match tail or fail
454
455                                                 this.repeat = current.Previous;
456                                                 if (Eval (Mode.Match, ref ptr, pc + 1))
457                                                         goto Pass;
458                                         
459                                                 this.repeat = current;
460                                                 goto Fail;
461                                         }
462
463                                         if (current.IsLazy) {
464                                                 // match tail first ...
465
466                                                 this.repeat = current.Previous;
467                                                 int cp = Checkpoint ();
468                                                 if (Eval (Mode.Match, ref ptr, pc + 1))
469                                                         goto Pass;
470
471                                                 Backtrack (cp);
472
473                                                 // ... then match more
474
475                                                 this.repeat = current;
476                                                 if (!current.IsMaximum) {
477                                                         ++ current.Count;
478                                                         current.Start = ptr;
479                                                         if (Eval (Mode.Match, ref ptr, current.Expression))
480                                                                 goto Pass;
481
482                                                         current.Start = start;
483                                                         -- current.Count;
484                                                         goto Fail;
485                                                 }
486
487                                                 return false;
488                                         }
489                                         else {
490                                                 // match more first ...
491
492                                                 if (!current.IsMaximum) {
493                                                         int cp = Checkpoint ();
494                                                         ++ current.Count;
495                                                         current.Start = ptr;
496                                                         if (Eval (Mode.Match, ref ptr, current.Expression))
497                                                                 goto Pass;
498
499                                                         current.Start = start;
500                                                         -- current.Count;
501                                                         Backtrack (cp);
502                                                 }
503
504                                                 // ... then match tail
505
506                                                 this.repeat = current.Previous;
507                                                 if (Eval (Mode.Match, ref ptr, pc + 1))
508                                                         goto Pass;
509
510                                                 this.repeat = current;
511                                                 goto Fail;
512                                         }
513                                 }
514
515                                 case OpCode.FastRepeat: {
516                                         this.fast = new RepeatContext (
517                                                 fast,
518                                                 program[pc + 2],                // minimum
519                                                 program[pc + 3],                // maximum
520                                                 (flags & OpFlags.Lazy) != 0,    // lazy
521                                                 pc + 4                          // subexpression
522                                         );
523                                         fast.Start = ptr;
524
525                                         int cp = Checkpoint ();
526
527                                         pc += program[pc + 1];          // tail expression
528                                         ushort tail_word = program[pc];
529
530                                         int c1, c2;                     // first character of tail operator
531                                         int coff;                       // 0 or -1 depending on direction
532
533                                         OpCode tail_op = (OpCode)(tail_word & 0xff);
534                                         if (tail_op == OpCode.Character || tail_op == OpCode.String) {
535                                                 OpFlags tail_flags = (OpFlags)(tail_word & 0xff00);
536
537                                                 if (tail_op == OpCode.String)
538                                                 {
539                                                         int offset = 0;
540                                                 
541                                                         if ((tail_flags & OpFlags.RightToLeft) != 0)
542                                                         {
543                                                                 offset = program[pc + 1] - 1 ;
544                                                         }
545                                                           
546                                                         c1 = program[pc + 2 + offset];                          // first char of string
547                                                 }
548                                                 else
549                                                         c1 = program[pc + 1];                           // character
550                                                 
551                                                 if ((tail_flags & OpFlags.IgnoreCase) != 0)
552                                                         c2 = Char.ToUpper ((char)c1);                   // ignore case
553                                                 else
554                                                         c2 = c1;
555
556                                                 if ((tail_flags & OpFlags.RightToLeft) != 0)
557                                                         coff = -1;                                      // reverse
558                                                 else
559                                                         coff = 0;
560                                         }
561                                         else {
562                                                 c1 = c2 = -1;
563                                                 coff = 0;
564                                         }
565
566                                         if (fast.IsLazy) {
567                                                 if (!fast.IsMinimum && !Eval (Mode.Count, ref ptr, fast.Expression)) {
568                                                         //Console.WriteLine ("lazy fast: failed mininum.");
569                                                         fast = fast.Previous;
570                                                         goto Fail;
571                                                 }
572                                                 
573                                                 while (true) {
574                                                         int p = ptr + coff;
575                                                         if ((c1 < 0 || (p >= 0 && p < text_end && (c1 == text[p] || c2 == text[p]))) &&
576                                                             Eval (Mode.Match, ref ptr, pc))
577                                                                 break;
578
579                                                         if (fast.IsMaximum) {
580                                                                 //Console.WriteLine ("lazy fast: failed with maximum.");
581                                                                 fast = fast.Previous;
582                                                                 goto Fail;
583                                                         }
584
585                                                         Backtrack (cp);
586                                                         if (!Eval (Mode.Count, ref ptr, fast.Expression)) {
587                                                                 //Console.WriteLine ("lazy fast: no more.");
588                                                                 fast = fast.Previous;
589                                                                 goto Fail;
590                                                         }
591                                                 }
592                                                 fast = fast.Previous;
593                                                 goto Pass;
594                                         }
595                                         else {
596                                                 if (!Eval (Mode.Count, ref ptr, fast.Expression)) {
597                                                         fast = fast.Previous;
598                                                         goto Fail;
599                                                 }
600                                         
601                                                 int width;
602                                                 if (fast.Count > 0)
603                                                         width = (ptr - fast.Start) / fast.Count;
604                                                 else
605                                                         width = 0;
606
607                                                 while (true) {
608                                                         int p = ptr + coff;
609                                                         if ((c1 < 0 || (p >= 0 && p < text_end && (c1 == text[p] || c2 == text[p]))) &&
610                                                             Eval (Mode.Match, ref ptr, pc))
611                                                                 break;
612
613                                                         -- fast.Count;
614                                                         if (!fast.IsMinimum) {
615                                                                 fast = fast.Previous;
616                                                                 goto Fail;
617                                                         }
618
619                                                         ptr -= width;
620                                                         Backtrack (cp);
621                                                 }
622                                                 fast = fast.Previous;
623                                                 goto Pass;
624                                         }
625                                 }
626
627                                 case OpCode.Info: {
628                                         Debug.Assert (false, "Regex", "Info block found in pattern");
629                                         goto Fail;
630                                 }
631                                 }
632                         }
633                 Pass:
634                         ref_ptr = ptr;
635
636                         switch (mode) {
637                         case Mode.Match:
638                                 return true;
639
640                         case Mode.Count: {
641                                 ++ fast.Count;
642                                 if (fast.IsMaximum || (fast.IsLazy && fast.IsMinimum))
643                                         return true;
644
645                                 pc = fast.Expression;
646                                 goto Begin;
647                         }
648                         }
649
650                 Fail:
651                         switch (mode) {
652                         case Mode.Match:
653                                 return false;
654
655                         case Mode.Count: {
656                                 if (!fast.IsLazy && fast.IsMinimum)
657                                         return true;
658
659                                 ref_ptr = fast.Start;
660                                 return false;
661                         }
662                         }
663
664                         return false;
665                 }
666
667                 private bool EvalChar (Mode mode, ref int ptr, ref int pc, bool multi) {
668                         bool consumed = false;
669                         char c = '\0';
670                         bool negate;
671                         bool ignore;
672                 
673                         do {
674                                 ushort word = program[pc];
675                                 OpCode op = (OpCode)(word & 0x00ff);
676                                 OpFlags flags = (OpFlags)(word & 0xff00);
677
678                                 ++ pc;
679
680                                 ignore = (flags & OpFlags.IgnoreCase) != 0;
681                                 
682                                 // consume character: the direction of an In construct is
683                                 // determined by the direction of its first op
684
685                                 if (!consumed) {
686                                         if ((flags & OpFlags.RightToLeft) != 0) {
687                                                 if (ptr <= 0)
688                                                         return false;
689
690                                                 c = text[-- ptr];
691                                         }
692                                         else {
693                                                 if (ptr >= text_end)
694                                                         return false;
695
696                                                 c = text[ptr ++];
697                                         }
698
699                                         if (ignore)
700                                                 c = Char.ToLower (c);
701
702                                         consumed = true;
703                                 }
704
705                                 // negate flag
706
707                                 negate = (flags & OpFlags.Negate) != 0;
708
709                                 // execute op
710                                 
711                                 switch (op) {
712                                 case OpCode.True:
713                                         return true;
714
715                                 case OpCode.False:
716                                         return false;
717                                 
718                                 case OpCode.Character: {
719                                         if (c == (char)program[pc ++])
720                                                 return !negate;
721                                         break;
722                                 }
723
724                                 case OpCode.Category: {
725                                         if (CategoryUtils.IsCategory ((Category)program[pc ++], c))
726                                                 return !negate;
727
728                                         break;
729                                 }
730                                 
731                                 case OpCode.Range: {
732                                         int lo = (char)program[pc ++];
733                                         int hi = (char)program[pc ++];
734                                         if (lo <= c && c <= hi)
735                                                 return !negate;
736                                         break;
737                                 }
738
739                                 case OpCode.Set: {
740                                         int lo = (char)program[pc ++];
741                                         int len = (char)program[pc ++];
742                                         int bits = pc;
743                                         pc += len;
744
745                                         int i = (int)c - lo;
746                                         if (i < 0 || i >= len << 4)
747                                                 break;
748                                                 
749
750                                         if ((program[bits + (i >> 4)] & (1 << (i & 0xf))) != 0)
751                                                 return !negate;
752                                         break;
753                                 }
754                                 }
755                         } while (multi);
756
757                         return negate;
758                 }
759
760                 private bool TryMatch (ref int ref_ptr, int pc) {
761                         Reset ();
762                         
763                         int ptr = ref_ptr;
764                         marks [groups [0]].Start = ptr;
765                         if (Eval (Mode.Match, ref ptr, pc)) {
766                                 marks [groups [0]].End = ptr;
767                                 ref_ptr = ptr;
768                                 return true;
769                         }
770
771                         return false;
772                 }
773                 
774                 private bool IsPosition (Position pos, int ptr) {
775                         switch (pos) {
776                         case Position.Start: case Position.StartOfString:
777                                 return ptr == 0;
778
779                         case Position.StartOfLine:
780                                 return ptr == 0 || text[ptr - 1] == '\n';
781                                 
782                         case Position.StartOfScan:
783                                 return ptr == scan_ptr;
784                         
785                         case Position.End:
786                                 return ptr == text_end ||
787                                         (ptr == text_end - 1 && text[ptr] == '\n');
788
789                         case Position.EndOfLine:
790                                 return ptr == text_end || text[ptr] == '\n';
791                                 
792                         case Position.EndOfString:
793                                 return ptr == text_end;
794                                 
795                         case Position.Boundary:
796                                 if (text_end == 0)
797                                         return false;
798
799                                 if (ptr == 0)
800                                         return IsWordChar (text[ptr]);
801                                 else if (ptr == text_end)
802                                         return IsWordChar (text[ptr - 1]);
803                                 else
804                                         return IsWordChar (text[ptr]) != IsWordChar (text[ptr - 1]);
805
806                         case Position.NonBoundary:
807                                 if (text_end == 0)
808                                         return false;
809
810                                 if (ptr == 0)
811                                         return !IsWordChar (text[ptr]);
812                                 else if (ptr == text_end)
813                                         return !IsWordChar (text[ptr - 1]);
814                                 else
815                                         return IsWordChar (text[ptr]) == IsWordChar (text[ptr - 1]);
816                         
817                         default:
818                                 return false;
819                         }
820                 }
821
822                 private bool IsWordChar (char c) {
823                         return CategoryUtils.IsCategory (Category.Word, c);
824                 }
825
826                 private string GetString (int pc) {
827                         int len = program[pc + 1];
828                         int str = pc + 2;
829
830                         char[] cs = new char[len];
831                         for (int i = 0; i < len; ++ i)
832                                 cs[i] = (char)program[str ++];
833
834                         return new string (cs);
835                 }
836
837                 // capture management
838
839                 private void Open (int gid, int ptr) {
840                         int m = groups [gid];
841                         if (m < mark_start || marks [m].IsDefined) {
842                                 m = CreateMark (m);
843                                 groups [gid] = m;
844                         }
845
846                         marks [m].Start = ptr;
847                 }
848
849                 private void Close (int gid, int ptr) {
850                         marks [groups [gid]].End = ptr;
851                 }
852
853                 private bool Balance (int gid, int balance_gid, bool capture, int ptr) {
854                         int b = groups [balance_gid];
855
856                         if(b == -1 || marks[b].Index < 0) {
857                                 //Group not previously matched
858                                 return false;
859                         }
860
861                         Debug.Assert (marks [b].IsDefined, "Regex", "Balancng group not closed");
862
863                         if (gid > 0 && capture){ 
864                                 Open (gid, marks [b].Index + marks [b].Length);
865                                 Close (gid, ptr);
866                         }
867
868                         groups [balance_gid] = marks[b].Previous;
869
870                         return true;
871                 }
872
873                 private int Checkpoint () {
874                         mark_start = mark_end;
875                         return mark_start;
876                 }
877
878                 private void Backtrack (int cp) {
879                         Debug.Assert (cp > mark_start, "Regex", "Attempt to backtrack forwards");
880
881                         for (int i = 0; i < groups.Length; ++ i) {
882                                 int m = groups [i];
883                                 while (cp <= m)
884                                         m = marks [m].Previous;
885
886                                 groups [i] = m;
887                         }
888                 }
889
890                 private void ResetGroups () {
891                         int n = groups.Length;
892                         if (marks == null)
893                                 marks = new Mark [n * 10];
894
895                         for (int i = 0; i < n; ++ i) {
896                                 groups [i] = i;
897
898                                 marks [i].Start = -1;
899                                 marks [i].End = -1;
900                                 marks [i].Previous = -1;
901                         }
902                         
903                         mark_start = 0;
904                         mark_end = n;
905                 }
906
907                 private int GetLastDefined (int gid) {
908                         int m = groups [gid];
909                         while (m >= 0 && !marks [m].IsDefined)
910                                 m = marks [m].Previous;
911
912                         return m;
913                 }
914
915                 private int CreateMark (int previous) {
916                         if (mark_end == marks.Length) {
917                                 Mark [] dest = new Mark [marks.Length * 2];
918                                 marks.CopyTo (dest, 0);
919                                 marks = dest;
920                         }
921
922                         int m = mark_end ++;
923                         marks [m].Start = marks [m].End = -1;
924                         marks [m].Previous = previous;
925
926                         return m;
927                 }
928
929                 private Match GenerateMatch (Regex regex) {
930                         int[][] grps = new int[groups.Length][];
931                         ArrayList caps = new ArrayList ();
932
933                         for (int gid = 0; gid < groups.Length; ++ gid) {
934                                 caps.Clear ();
935                                 for (int m = groups[gid]; m >= 0; m = marks[m].Previous) {
936                                         if (!marks[m].IsDefined)
937                                                 continue;
938                                         
939                                         caps.Add (marks[m].Index);
940                                         caps.Add (marks[m].Length);
941                                 }
942
943                                 grps[gid] = (int[])caps.ToArray (typeof (int));
944                         }
945
946                         return new Match (regex, this, text, text_end, grps);
947                 }
948
949                 // interpreter attributes
950
951                 private ushort[] program;               // regex program
952                 private int program_start;              // first instruction after info block
953                 private string text;                    // input text
954                 private int text_end;                   // end of input text (last character + 1)
955                 private int group_count;                // number of capturing groups
956                 private int match_min;//, match_max;    // match width information
957                 private QuickSearch qs;                 // fast substring matcher
958
959                 // match state
960                 
961                 private int scan_ptr;                   // start of scan
962
963                 private RepeatContext repeat;           // current repeat context
964                 private RepeatContext fast;             // fast repeat context
965
966                 private Mark[] marks = null;            // mark stack
967                 private int mark_start;                 // start of current checkpoint
968                 private int mark_end;                   // end of checkpoint/next free mark
969
970                 private int[] groups;                   // current group definitions
971
972                 // private classes
973 /*
974                 private struct Mark {
975                         public int Start, End;
976                         public int Previous;
977
978                         public bool IsDefined {
979                                 get { return Start >= 0 && End >= 0; }
980                         }
981
982                         public int Index {
983                                 get { return Start < End ? Start : End; }
984                         }
985
986                         public int Length {
987                                 get { return Start < End ? End - Start : Start - End; }
988                         }
989                 }
990 */
991                 private class RepeatContext {
992                         public RepeatContext (RepeatContext previous, int min, int max, bool lazy, int expr_pc) {
993                                 this.previous = previous;
994                                 this.min = min;
995                                 this.max = max;
996                                 this.lazy = lazy;
997                                 this.expr_pc = expr_pc;
998                                 
999                                 this.start = -1;
1000                                 this.count = 0;
1001                         }
1002
1003                         public int Count {
1004                                 get { return count; }
1005                                 set { count = value; }
1006                         }
1007
1008                         public int Start {
1009                                 get { return start; }
1010                                 set { start = value; }
1011                         }
1012
1013                         public bool IsMinimum {
1014                                 get { return min <= count; }
1015                         }
1016
1017                         public bool IsMaximum {
1018                                 get { return max <= count; }
1019                         }
1020
1021                         public bool IsLazy {
1022                                 get { return lazy; }
1023                         }
1024
1025                         public int Expression {
1026                                 get { return expr_pc; }
1027                         }
1028
1029                         public RepeatContext Previous {
1030                                 get { return previous; }
1031                         }
1032                 
1033                         private int start;
1034                         private int min, max;
1035                         private bool lazy;
1036                         private int expr_pc;
1037                         private RepeatContext previous;
1038
1039                         private int count;
1040                 }
1041
1042                 private enum Mode {
1043                         Search,
1044                         Match,
1045                         Count
1046                 }
1047         }
1048 }