* RegexRunnerFactory.cs: removed comment, no longer throw exception
[mono.git] / mcs / class / System / System.Text.RegularExpressions / interpreter.cs
1 //
2 // assembly:    System
3 // namespace:   System.Text.RegularExpressions
4 // file:        interpreter.cs
5 //
6 // author:      Dan Lewis (dlewis@gmx.co.uk)
7 //              (c) 2002
8
9 using System;
10 using System.Collections;
11 using System.Diagnostics;
12 using System.Globalization;
13
14 namespace System.Text.RegularExpressions {
15
16         class Interpreter : IMachine {
17                 public Interpreter (ushort[] program) {
18                         this.program = program;
19                         this.qs = null;
20
21                         // process info block
22
23                         Debug.Assert ((OpCode)program[0] == OpCode.Info, "Regex", "Cant' find info block");
24
25                         this.group_count = program[1] + 1;
26                         this.match_min = program[2];
27                         this.match_max = program[3];
28
29                         // setup
30
31                         this.program_start = 4;
32                         this.groups = new int [group_count];
33                 }
34
35                 // IMachine implementation
36
37                 public Match Scan (Regex regex, string text, int start, int end) {
38                         this.text = text;
39                         this.text_end = end;
40                         this.scan_ptr = start;
41
42                         if (Eval (Mode.Match, ref scan_ptr, program_start))
43                                 return GenerateMatch (regex);
44
45                         return Match.Empty;
46                 }
47
48                 // private methods
49
50                 private void Reset () {
51                         ResetGroups ();
52                         fast = repeat = null;
53                 }
54
55                 private bool Eval (Mode mode, ref int ref_ptr, int pc) {
56                         int ptr = ref_ptr;
57                 Begin:
58                         for (;;) {
59                                 ushort word = program[pc];
60                                 OpCode op = (OpCode)(word & 0x00ff);
61                                 OpFlags flags = (OpFlags)(word & 0xff00);
62
63                                 switch (op) {
64                                 case OpCode.Anchor: {
65                                         int skip = program[pc + 1];
66                                         
67                                         int anch_offset = program[pc + 2];
68                                         bool anch_reverse = (flags & OpFlags.RightToLeft) != 0; 
69                                         int anch_ptr = anch_reverse ?  ptr - anch_offset  : ptr + anch_offset;
70                                         int anch_end = text_end - match_min + anch_offset;      // maximum anchor position  
71                                         
72                                         
73                                         int anch_begin =  0;
74
75
76                                         // the general case for an anchoring expression is at the bottom, however we
77                                         // do some checks for the common cases before to save processing time. the current
78                                         // optimizer only outputs three types of anchoring expressions: fixed position,
79                                         // fixed substring, and no anchor.
80
81                                         OpCode anch_op = (OpCode)(program[pc + 3] & 0x00ff);                                    
82                                         if (anch_op == OpCode.Position && skip == 6) {                          // position anchor
83                                                 // Anchor
84                                                 //      Position
85                                                 //      True
86
87                                                 switch ((Position)program[pc + 4]) {
88                                                 case Position.StartOfString:                                                    
89                                                         if (anch_reverse || anch_offset == 0) {
90                                                                 ptr = anch_offset;
91                                                                 if (TryMatch (ref ptr, pc + skip))
92                                                                         goto Pass;
93                                                         }
94                                                         break;
95                                                 
96                                                 case Position.StartOfLine:
97                                                                                                         
98                                                          if (anch_ptr == 0) {
99                                                                 ptr = 0;
100                                                                 if (TryMatch (ref ptr, pc + skip))
101                                                                         goto Pass;
102                                                                 
103                                                                 ++ anch_ptr;
104                                                         }
105
106                                                         while ((anch_reverse && anch_ptr >= 0) || (!anch_reverse && anch_ptr <= anch_end)) {  
107                                                                 if (anch_ptr == 0 || text[anch_ptr - 1] == '\n') {
108                                                                         if (anch_reverse)
109                                                                                 ptr = anch_ptr == anch_end ? anch_ptr : anch_ptr + anch_offset;
110                                                                         else
111                                                                                 ptr = anch_ptr == 0 ? anch_ptr : anch_ptr - anch_offset;
112                                                                         if (TryMatch (ref ptr, pc + skip))
113                                                                                 goto Pass;
114                                                                 }
115                                                         
116                                                                 if (anch_reverse)
117                                                                         -- anch_ptr;
118                                                                 else
119                                                                         ++ anch_ptr;
120                                                         }
121                                                         break;
122                                                 
123                                                 case Position.StartOfScan:
124                                                         if (anch_ptr == scan_ptr) {                                                     
125                                                                 ptr = anch_reverse ? scan_ptr + anch_offset : scan_ptr - anch_offset;
126                                                                 if (TryMatch (ref ptr, pc + skip))
127                                                                         goto Pass;
128                                                         }
129                                                         break;
130
131                                                 default:
132                                                         // FIXME
133                                                         break;
134                                                 }
135                                         }
136                                         else if (qs != null ||
137                                                 (anch_op == OpCode.String && skip == 6 + program[pc + 4])) {    // substring anchor
138                                                 // Anchor
139                                                 //      String
140                                                 //      True
141                                  
142                                                 bool reverse = ((OpFlags)program[pc + 3] & OpFlags.RightToLeft) != 0;
143                                                 string substring = GetString (pc + 3);
144
145                                                 if (qs == null) {
146                                                         bool ignore = ((OpFlags)program[pc + 3] & OpFlags.IgnoreCase) != 0;
147
148                                                         qs = new QuickSearch (substring, ignore, reverse);
149                                                 }
150                                                 while ((anch_reverse && anch_ptr >= anch_begin) 
151                                                        || (!anch_reverse && anch_ptr <= anch_end)) {
152
153                                                         if (reverse)    
154                                                         {
155                                                                 anch_ptr = qs.Search (text, anch_ptr, anch_begin);
156                                                                 if (anch_ptr != -1)
157                                                                         anch_ptr += substring.Length ;
158                                                                 
159                                                         }
160                                                         else
161                                                                 anch_ptr = qs.Search (text, anch_ptr, anch_end);
162                                                         if (anch_ptr < 0)
163                                                                 break;
164
165                                                         ptr = reverse ? anch_ptr + anch_offset : anch_ptr - anch_offset;
166                                                         if (TryMatch (ref ptr, pc + skip))
167                                                                 goto Pass;
168
169                                                         if (reverse)
170                                                                 anch_ptr -= 2;
171                                                         else 
172                                                                 ++ anch_ptr;
173                                                 }
174                                         }
175                                         else if (anch_op == OpCode.True) {                                      // no anchor
176                                                 // Anchor
177                                                 //      True
178
179                                         
180                                                 while ((anch_reverse && anch_ptr >= anch_begin) 
181                                                        || (!anch_reverse && anch_ptr <= anch_end)) {
182
183                                                         ptr = anch_ptr;
184                                                         if (TryMatch (ref ptr, pc + skip))
185                                                                 goto Pass;
186                                                         if (anch_reverse)
187                                                                 -- anch_ptr;
188                                                         else 
189                                                                 ++ anch_ptr;
190                                                 }
191                                         }
192                                         else {                                                                  // general case
193                                                 // Anchor
194                                                 //      <expr>
195                                                 //      True
196
197                                                 while ((anch_reverse && anch_ptr >= anch_begin) 
198                                                        || (!anch_reverse && anch_ptr <= anch_end)) {
199
200                                                         ptr = anch_ptr;
201                                                         if (Eval (Mode.Match, ref ptr, pc + 3)) {
202                                                                 // anchor expression passed: try real expression at the correct offset
203
204                                                                 ptr = anch_reverse ? anch_ptr + anch_offset : anch_ptr - anch_offset;
205                                                                 if (TryMatch (ref ptr, pc + skip))
206                                                                         goto Pass;
207                                                         }
208
209                                                     if (anch_reverse)
210                                                                 -- anch_ptr;
211                                                         else 
212                                                                 ++ anch_ptr;
213                                                 }
214                                         }
215
216                                         goto Fail;
217                                 }
218                                 
219                                 case OpCode.False: {
220                                         goto Fail;
221                                 }
222
223                                 case OpCode.True: {
224                                         goto Pass;
225                                 }
226
227                                 case OpCode.Position: {
228                                         if (!IsPosition ((Position)program[pc + 1], ptr))
229                                                 goto Fail;
230                                         pc += 2;
231                                         break;
232                                 }
233
234                                 case OpCode.String: {
235                                         bool reverse = (flags & OpFlags.RightToLeft) != 0;
236                                         bool ignore = (flags & OpFlags.IgnoreCase) != 0;
237                                         int len = program[pc + 1];
238
239                                         if (reverse) {
240                                                 ptr -= len;
241                                                 if (ptr < 0)
242                                                         goto Fail;
243                                         }
244                                         else 
245                                         if (ptr + len > text_end)
246                                                 goto Fail;
247
248                                         pc += 2;
249                                         for (int i = 0; i < len; ++ i) {
250                                                 char c = text[ptr + i];
251                                                 if (ignore)
252                                                         c = Char.ToLower (c);
253
254                                                 if (c != (char)program[pc ++])
255                                                         goto Fail;
256                                         }
257
258                                         if (!reverse)
259                                                 ptr += len;
260                                         break;
261                                 }
262
263                                 case OpCode.Reference: {
264                                         bool reverse = (flags & OpFlags.RightToLeft) != 0;
265                                         bool ignore = (flags & OpFlags.IgnoreCase) != 0;
266                                         int m = GetLastDefined (program [pc + 1]);
267                                         if (m < 0)
268                                                 goto Fail;
269
270                                         int str = marks [m].Index;
271                                         int len = marks [m].Length;
272
273                                         if (reverse) {
274                                                 ptr -= len;
275                                                 if (ptr < 0)
276                                                         goto Fail;
277                                         }
278                                         else if (ptr + len > text_end)
279                                                 goto Fail;
280
281                                         pc += 2;
282                                         for (int i = 0; i < len; ++ i) {
283                                                 if (ignore) {
284                                                         if (Char.ToLower (text[ptr + i]) != Char.ToLower (text[str + i]))
285                                                                 goto Fail;
286                                                 }
287                                                 else {
288                                                         if (text[ptr + i] != text[str + i])
289                                                                 goto Fail;
290                                                 }
291                                         }
292
293                                         if (!reverse)
294                                                 ptr += len;
295                                         break;
296                                 }
297
298                                 case OpCode.Character: case OpCode.Category:
299                                 case OpCode.Range: case OpCode.Set: {
300                                         if (!EvalChar (mode, ref ptr, ref pc, false))
301                                                 goto Fail;
302                                         break;
303                                 }
304
305                                 case OpCode.In: {
306                                         int target = pc + program[pc + 1];
307                                         pc += 2;
308                                         if (!EvalChar (mode, ref ptr, ref pc, true))
309                                                 goto Fail;
310
311                                         pc = target;
312                                         break;
313                                 }
314
315                                 case OpCode.Open: {
316                                         Open (program[pc + 1], ptr);
317                                         pc += 2;
318                                         break;
319                                 }
320
321                                 case OpCode.Close: {
322                                         Close (program[pc + 1], ptr);
323                                         pc += 2;
324                                         break;
325                                 }
326
327                                 case OpCode.BalanceStart: {
328
329                                         int start = ptr; //point before the balancing group
330                                         
331                                         if (!Eval (Mode.Match, ref ptr, pc + 5))
332                                                 goto Fail;
333                                         
334                                         
335                                         
336                                         if(!Balance (program[pc + 1], program[pc + 2], (program[pc + 3] == 1 ? true : false) , start)) {
337                                                 goto Fail;
338                                         }
339
340                                         
341                                         pc += program[pc + 4];
342                                         break;
343                                 }
344
345                                 case OpCode.Balance: {
346                                         goto Pass;
347                                 }
348
349                                 case OpCode.IfDefined: {
350                                         int m = GetLastDefined (program [pc + 2]);
351                                         if (m < 0)
352                                                 pc += program[pc + 1];
353                                         else
354                                                 pc += 3;
355                                         break;
356                                 }
357
358                                 case OpCode.Sub: {
359                                         if (!Eval (Mode.Match, ref ptr, pc + 2))
360                                                 goto Fail;
361
362                                         pc += program[pc + 1];
363                                         break;
364                                 }
365
366                                 case OpCode.Test: {
367                                         int cp = Checkpoint ();
368                                         int test_ptr = ptr;
369                                         if (Eval (Mode.Match, ref test_ptr, pc + 3))
370                                                 pc += program[pc + 1];
371                                         else {
372                                                 Backtrack (cp);
373                                                 pc += program[pc + 2];
374                                         }
375                                         break;
376                                 }
377
378                                 case OpCode.Branch: {
379                                         OpCode branch_op;
380                                         do {
381                                                 int cp = Checkpoint ();
382                                                 if (Eval (Mode.Match, ref ptr, pc + 2))
383                                                         goto Pass;
384                                                 
385                                                 Backtrack (cp);
386                                                 
387                                                 pc += program[pc + 1];
388                                                 branch_op = (OpCode)(program[pc] & 0xff);
389                                         } while (branch_op != OpCode.False);
390
391                                         goto Fail;
392                                 }
393
394                                 case OpCode.Jump: {
395                                         pc += program[pc + 1];
396                                         break;
397                                 }
398
399                                 case OpCode.Repeat: {
400                                         this.repeat = new RepeatContext (
401                                                 this.repeat,                    // previous context
402                                                 program[pc + 2],                // minimum
403                                                 program[pc + 3],                // maximum
404                                                 (flags & OpFlags.Lazy) != 0,    // lazy
405                                                 pc + 4                          // subexpression
406                                         );
407
408                                         if (Eval (Mode.Match, ref ptr, pc + program[pc + 1]))
409                                                 goto Pass;
410                                         else {
411                                                 this.repeat = this.repeat.Previous;
412                                                 goto Fail;
413                                         }
414                                 }
415
416                                 case OpCode.Until: {
417                                         RepeatContext current = this.repeat;
418                                         int start = current.Start;
419
420                                         if (!current.IsMinimum) {
421                                                 ++ current.Count;
422                                                 current.Start = ptr;
423                                                 if (Eval (Mode.Match, ref ptr, repeat.Expression))
424                                                         goto Pass;
425
426                                                 current.Start = start;
427                                                 -- current.Count;
428                                                 goto Fail;
429                                         }
430
431                                         if (ptr == current.Start) {
432                                                 // degenerate match ... match tail or fail
433
434                                                 this.repeat = current.Previous;
435                                                 if (Eval (Mode.Match, ref ptr, pc + 1))
436                                                         goto Pass;
437                                         
438                                                 this.repeat = current;
439                                                 goto Fail;
440                                         }
441
442                                         if (current.IsLazy) {
443                                                 // match tail first ...
444
445                                                 this.repeat = current.Previous;
446                                                 int cp = Checkpoint ();
447                                                 if (Eval (Mode.Match, ref ptr, pc + 1))
448                                                         goto Pass;
449
450                                                 Backtrack (cp);
451
452                                                 // ... then match more
453
454                                                 this.repeat = current;
455                                                 if (!current.IsMaximum) {
456                                                         ++ current.Count;
457                                                         current.Start = ptr;
458                                                         if (Eval (Mode.Match, ref ptr, current.Expression))
459                                                                 goto Pass;
460
461                                                         current.Start = start;
462                                                         -- current.Count;
463                                                         goto Fail;
464                                                 }
465
466                                                 return false;
467                                         }
468                                         else {
469                                                 // match more first ...
470
471                                                 if (!current.IsMaximum) {
472                                                         int cp = Checkpoint ();
473                                                         ++ current.Count;
474                                                         current.Start = ptr;
475                                                         if (Eval (Mode.Match, ref ptr, current.Expression))
476                                                                 goto Pass;
477
478                                                         current.Start = start;
479                                                         -- current.Count;
480                                                         Backtrack (cp);
481                                                 }
482
483                                                 // ... then match tail
484
485                                                 this.repeat = current.Previous;
486                                                 if (Eval (Mode.Match, ref ptr, pc + 1))
487                                                         goto Pass;
488
489                                                 this.repeat = current;
490                                                 goto Fail;
491                                         }
492                                 }
493
494                                 case OpCode.FastRepeat: {
495                                         this.fast = new RepeatContext (
496                                                 fast,
497                                                 program[pc + 2],                // minimum
498                                                 program[pc + 3],                // maximum
499                                                 (flags & OpFlags.Lazy) != 0,    // lazy
500                                                 pc + 4                          // subexpression
501                                         );
502                                         fast.Start = ptr;
503
504                                         int cp = Checkpoint ();
505
506                                         pc += program[pc + 1];          // tail expression
507                                         ushort tail_word = program[pc];
508
509                                         int c1, c2;                     // first character of tail operator
510                                         int coff;                       // 0 or -1 depending on direction
511
512                                         OpCode tail_op = (OpCode)(tail_word & 0xff);
513                                         if (tail_op == OpCode.Character || tail_op == OpCode.String) {
514                                                 OpFlags tail_flags = (OpFlags)(tail_word & 0xff00);
515
516                                                 if (tail_op == OpCode.String)
517                                                 {
518                                                         int offset = 0;
519                                                 
520                                                         if ((tail_flags & OpFlags.RightToLeft) != 0)
521                                                         {
522                                                                 offset = program[pc + 1] - 1 ;
523                                                         }
524                                                           
525                                                         c1 = program[pc + 2 + offset];                          // first char of string
526                                                 }
527                                                 else
528                                                         c1 = program[pc + 1];                           // character
529                                                 
530                                                 if ((tail_flags & OpFlags.IgnoreCase) != 0)
531                                                         c2 = Char.ToUpper ((char)c1);                   // ignore case
532                                                 else
533                                                         c2 = c1;
534
535                                                 if ((tail_flags & OpFlags.RightToLeft) != 0)
536                                                         coff = -1;                                      // reverse
537                                                 else
538                                                         coff = 0;
539                                         }
540                                         else {
541                                                 c1 = c2 = -1;
542                                                 coff = 0;
543                                         }
544
545                                         if (fast.IsLazy) {
546                                                 if (!fast.IsMinimum && !Eval (Mode.Count, ref ptr, fast.Expression)) {
547                                                         //Console.WriteLine ("lazy fast: failed mininum.");
548                                                         fast = fast.Previous;
549                                                         goto Fail;
550                                                 }
551                                                 
552                                                 while (true) {
553                                                         int p = ptr + coff;
554                                                         if ((c1 < 0 || (p >= 0 && p < text_end && (c1 == text[p] || c2 == text[p]))) &&
555                                                             Eval (Mode.Match, ref ptr, pc))
556                                                                 break;
557
558                                                         if (fast.IsMaximum) {
559                                                                 //Console.WriteLine ("lazy fast: failed with maximum.");
560                                                                 fast = fast.Previous;
561                                                                 goto Fail;
562                                                         }
563
564                                                         Backtrack (cp);
565                                                         if (!Eval (Mode.Count, ref ptr, fast.Expression)) {
566                                                                 //Console.WriteLine ("lazy fast: no more.");
567                                                                 fast = fast.Previous;
568                                                                 goto Fail;
569                                                         }
570                                                 }
571                                                 fast = fast.Previous;
572                                                 goto Pass;
573                                         }
574                                         else {
575                                                 if (!Eval (Mode.Count, ref ptr, fast.Expression)) {
576                                                         fast = fast.Previous;
577                                                         goto Fail;
578                                                 }
579                                         
580                                                 int width;
581                                                 if (fast.Count > 0)
582                                                         width = (ptr - fast.Start) / fast.Count;
583                                                 else
584                                                         width = 0;
585
586                                                 while (true) {
587                                                         int p = ptr + coff;
588                                                         if ((c1 < 0 || (p >= 0 && p < text_end && (c1 == text[p] || c2 == text[p]))) &&
589                                                             Eval (Mode.Match, ref ptr, pc))
590                                                                 break;
591
592                                                         -- fast.Count;
593                                                         if (!fast.IsMinimum) {
594                                                                 fast = fast.Previous;
595                                                                 goto Fail;
596                                                         }
597
598                                                         ptr -= width;
599                                                         Backtrack (cp);
600                                                 }
601                                                 fast = fast.Previous;
602                                                 goto Pass;
603                                         }
604                                 }
605
606                                 case OpCode.Info: {
607                                         Debug.Assert (false, "Regex", "Info block found in pattern");
608                                         goto Fail;
609                                 }
610                                 }
611                         }
612                 Pass:
613                         ref_ptr = ptr;
614
615                         switch (mode) {
616                         case Mode.Match:
617                                 return true;
618
619                         case Mode.Count: {
620                                 ++ fast.Count;
621                                 if (fast.IsMaximum || (fast.IsLazy && fast.IsMinimum))
622                                         return true;
623
624                                 pc = fast.Expression;
625                                 goto Begin;
626                         }
627                         }
628
629                 Fail:
630                         switch (mode) {
631                         case Mode.Match:
632                                 return false;
633
634                         case Mode.Count: {
635                                 if (!fast.IsLazy && fast.IsMinimum)
636                                         return true;
637
638                                 ref_ptr = fast.Start;
639                                 return false;
640                         }
641                         }
642
643                         return false;
644                 }
645
646                 private bool EvalChar (Mode mode, ref int ptr, ref int pc, bool multi) {
647                         bool consumed = false;
648                         char c = '\0';
649                         bool negate;
650                         bool ignore;
651                 
652                         do {
653                                 ushort word = program[pc];
654                                 OpCode op = (OpCode)(word & 0x00ff);
655                                 OpFlags flags = (OpFlags)(word & 0xff00);
656
657                                 ++ pc;
658
659                                 ignore = (flags & OpFlags.IgnoreCase) != 0;
660                                 
661                                 // consume character: the direction of an In construct is
662                                 // determined by the direction of its first op
663
664                                 if (!consumed) {
665                                         if ((flags & OpFlags.RightToLeft) != 0) {
666                                                 if (ptr <= 0)
667                                                         return false;
668
669                                                 c = text[-- ptr];
670                                         }
671                                         else {
672                                                 if (ptr >= text_end)
673                                                         return false;
674
675                                                 c = text[ptr ++];
676                                         }
677
678                                         if (ignore)
679                                                 c = Char.ToLower (c);
680
681                                         consumed = true;
682                                 }
683
684                                 // negate flag
685
686                                 negate = (flags & OpFlags.Negate) != 0;
687
688                                 // execute op
689                                 
690                                 switch (op) {
691                                 case OpCode.True:
692                                         return true;
693
694                                 case OpCode.False:
695                                         return false;
696                                 
697                                 case OpCode.Character: {
698                                         if (c == (char)program[pc ++])
699                                                 return !negate;
700                                         break;
701                                 }
702
703                                 case OpCode.Category: {
704                                         if (CategoryUtils.IsCategory ((Category)program[pc ++], c))
705                                                 return !negate;
706
707                                         break;
708                                 }
709                                 
710                                 case OpCode.Range: {
711                                         int lo = (char)program[pc ++];
712                                         int hi = (char)program[pc ++];
713                                         if (lo <= c && c <= hi)
714                                                 return !negate;
715                                         break;
716                                 }
717
718                                 case OpCode.Set: {
719                                         int lo = (char)program[pc ++];
720                                         int len = (char)program[pc ++];
721                                         int bits = pc;
722                                         pc += len;
723
724                                         int i = (int)c - lo;
725                                         if (i < 0 || i >= len << 4)
726                                                 break;
727                                                 
728
729                                         if ((program[bits + (i >> 4)] & (1 << (i & 0xf))) != 0)
730                                                 return !negate;
731                                         break;
732                                 }
733                                 }
734                         } while (multi);
735
736                         return negate;
737                 }
738
739                 private bool TryMatch (ref int ref_ptr, int pc) {
740                         Reset ();
741                         
742                         int ptr = ref_ptr;
743                         marks [groups [0]].Start = ptr;
744                         if (Eval (Mode.Match, ref ptr, pc)) {
745                                 marks [groups [0]].End = ptr;
746                                 ref_ptr = ptr;
747                                 return true;
748                         }
749
750                         return false;
751                 }
752                 
753                 private bool IsPosition (Position pos, int ptr) {
754                         switch (pos) {
755                         case Position.Start: case Position.StartOfString:
756                                 return ptr == 0;
757
758                         case Position.StartOfLine:
759                                 return ptr == 0 || text[ptr - 1] == '\n';
760                                 
761                         case Position.StartOfScan:
762                                 return ptr == scan_ptr;
763                         
764                         case Position.End:
765                                 return ptr == text_end ||
766                                         (ptr == text_end - 1 && text[ptr] == '\n');
767
768                         case Position.EndOfLine:
769                                 return ptr == text_end || text[ptr] == '\n';
770                                 
771                         case Position.EndOfString:
772                                 return ptr == text_end;
773                                 
774                         case Position.Boundary:
775                                 if (text_end == 0)
776                                         return false;
777
778                                 if (ptr == 0)
779                                         return IsWordChar (text[ptr]);
780                                 else if (ptr == text_end)
781                                         return IsWordChar (text[ptr - 1]);
782                                 else
783                                         return IsWordChar (text[ptr]) != IsWordChar (text[ptr - 1]);
784
785                         case Position.NonBoundary:
786                                 if (text_end == 0)
787                                         return false;
788
789                                 if (ptr == 0)
790                                         return !IsWordChar (text[ptr]);
791                                 else if (ptr == text_end)
792                                         return !IsWordChar (text[ptr - 1]);
793                                 else
794                                         return IsWordChar (text[ptr]) == IsWordChar (text[ptr - 1]);
795                         
796                         default:
797                                 return false;
798                         }
799                 }
800
801                 private bool IsWordChar (char c) {
802                         return CategoryUtils.IsCategory (Category.Word, c);
803                 }
804
805                 private string GetString (int pc) {
806                         int len = program[pc + 1];
807                         int str = pc + 2;
808
809                         char[] cs = new char[len];
810                         for (int i = 0; i < len; ++ i)
811                                 cs[i] = (char)program[str ++];
812
813                         return new string (cs);
814                 }
815
816                 // capture management
817
818                 private void Open (int gid, int ptr) {
819                         int m = groups [gid];
820                         if (m < mark_start || marks [m].IsDefined) {
821                                 m = CreateMark (m);
822                                 groups [gid] = m;
823                         }
824
825                         marks [m].Start = ptr;
826                 }
827
828                 private void Close (int gid, int ptr) {
829                         marks [groups [gid]].End = ptr;
830                 }
831
832                 private bool Balance (int gid, int balance_gid, bool capture, int ptr) {
833                         int b = groups [balance_gid];
834
835                         if(b == -1 || marks[b].Index < 0) {
836                                 //Group not previously matched
837                                 return false;
838                         }
839
840                         Debug.Assert (marks [b].IsDefined, "Regex", "Balancng group not closed");
841
842                         if (gid > 0 && capture){ 
843                                 Open (gid, marks [b].Index + marks [b].Length);
844                                 Close (gid, ptr);
845                         }
846
847                         groups [balance_gid] = marks[b].Previous;
848
849                         return true;
850                 }
851
852                 private int Checkpoint () {
853                         mark_start = mark_end;
854                         return mark_start;
855                 }
856
857                 private void Backtrack (int cp) {
858                         Debug.Assert (cp > mark_start, "Regex", "Attempt to backtrack forwards");
859
860                         for (int i = 0; i < groups.Length; ++ i) {
861                                 int m = groups [i];
862                                 while (cp <= m)
863                                         m = marks [m].Previous;
864
865                                 groups [i] = m;
866                         }
867                 }
868
869                 private void ResetGroups () {
870                         int n = groups.Length;
871                         if (marks == null)
872                                 marks = new Mark [n * 10];
873
874                         for (int i = 0; i < n; ++ i) {
875                                 groups [i] = i;
876
877                                 marks [i].Start = -1;
878                                 marks [i].End = -1;
879                                 marks [i].Previous = -1;
880                         }
881                         
882                         mark_start = 0;
883                         mark_end = n;
884                 }
885
886                 private int GetLastDefined (int gid) {
887                         int m = groups [gid];
888                         while (m >= 0 && !marks [m].IsDefined)
889                                 m = marks [m].Previous;
890
891                         return m;
892                 }
893
894                 private int CreateMark (int previous) {
895                         if (mark_end == marks.Length) {
896                                 Mark [] dest = new Mark [marks.Length * 2];
897                                 marks.CopyTo (dest, 0);
898                                 marks = dest;
899                         }
900
901                         int m = mark_end ++;
902                         marks [m].Start = marks [m].End = -1;
903                         marks [m].Previous = previous;
904
905                         return m;
906                 }
907
908                 private Match GenerateMatch (Regex regex) {
909                         int[][] grps = new int[groups.Length][];
910                         ArrayList caps = new ArrayList ();
911
912                         for (int gid = 0; gid < groups.Length; ++ gid) {
913                                 caps.Clear ();
914                                 for (int m = groups[gid]; m >= 0; m = marks[m].Previous) {
915                                         if (!marks[m].IsDefined)
916                                                 continue;
917                                         
918                                         caps.Add (marks[m].Index);
919                                         caps.Add (marks[m].Length);
920                                 }
921
922                                 grps[gid] = (int[])caps.ToArray (typeof (int));
923                         }
924
925                         return new Match (regex, this, text, text_end, grps);
926                 }
927
928                 // interpreter attributes
929
930                 private ushort[] program;               // regex program
931                 private int program_start;              // first instruction after info block
932                 private string text;                    // input text
933                 private int text_end;                   // end of input text (last character + 1)
934                 private int group_count;                // number of capturing groups
935                 private int match_min, match_max;       // match width information
936                 private QuickSearch qs;                 // fast substring matcher
937
938                 // match state
939                 
940                 private int scan_ptr;                   // start of scan
941
942                 private RepeatContext repeat;           // current repeat context
943                 private RepeatContext fast;             // fast repeat context
944
945                 private Mark[] marks = null;            // mark stack
946                 private int mark_start;                 // start of current checkpoint
947                 private int mark_end;                   // end of checkpoint/next free mark
948
949                 private int[] groups;                   // current group definitions
950
951                 // private classes
952 /*
953                 private struct Mark {
954                         public int Start, End;
955                         public int Previous;
956
957                         public bool IsDefined {
958                                 get { return Start >= 0 && End >= 0; }
959                         }
960
961                         public int Index {
962                                 get { return Start < End ? Start : End; }
963                         }
964
965                         public int Length {
966                                 get { return Start < End ? End - Start : Start - End; }
967                         }
968                 }
969 */
970                 private class RepeatContext {
971                         public RepeatContext (RepeatContext previous, int min, int max, bool lazy, int expr_pc) {
972                                 this.previous = previous;
973                                 this.min = min;
974                                 this.max = max;
975                                 this.lazy = lazy;
976                                 this.expr_pc = expr_pc;
977                                 
978                                 this.start = -1;
979                                 this.count = 0;
980                         }
981
982                         public int Count {
983                                 get { return count; }
984                                 set { count = value; }
985                         }
986
987                         public int Start {
988                                 get { return start; }
989                                 set { start = value; }
990                         }
991
992                         public bool IsMinimum {
993                                 get { return min <= count; }
994                         }
995
996                         public bool IsMaximum {
997                                 get { return max <= count; }
998                         }
999
1000                         public bool IsLazy {
1001                                 get { return lazy; }
1002                         }
1003
1004                         public int Expression {
1005                                 get { return expr_pc; }
1006                         }
1007
1008                         public RepeatContext Previous {
1009                                 get { return previous; }
1010                         }
1011                 
1012                         private int start;
1013                         private int min, max;
1014                         private bool lazy;
1015                         private int expr_pc;
1016                         private RepeatContext previous;
1017
1018                         private int count;
1019                 }
1020
1021                 private enum Mode {
1022                         Search,
1023                         Match,
1024                         Count
1025                 }
1026         }
1027 }