* roottypes.cs: Rename from tree.cs.
[mono.git] / mcs / class / System / System.Text.RegularExpressions / interpreter.cs
1 //
2 // assembly:    System
3 // namespace:   System.Text.RegularExpressions
4 // file:        interpreter.cs
5 //
6 // author:      Dan Lewis (dlewis@gmx.co.uk)
7 //              (c) 2002
8
9 //
10 // Permission is hereby granted, free of charge, to any person obtaining
11 // a copy of this software and associated documentation files (the
12 // "Software"), to deal in the Software without restriction, including
13 // without limitation the rights to use, copy, modify, merge, publish,
14 // distribute, sublicense, and/or sell copies of the Software, and to
15 // permit persons to whom the Software is furnished to do so, subject to
16 // the following conditions:
17 // 
18 // The above copyright notice and this permission notice shall be
19 // included in all copies or substantial portions of the Software.
20 // 
21 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
22 // EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
23 // MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
24 // NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
25 // LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
26 // OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
27 // WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
28 //
29
30 using System;
31 using System.Collections;
32 using System.Diagnostics;
33 using System.Globalization;
34
35 namespace System.Text.RegularExpressions {
36
37         class Interpreter : IMachine {
38                 private int ReadProgramCount (int ptr)
39                 {
40                         int ret = program [ptr + 1];
41                         ret <<= 16;
42                         ret += program [ptr];
43                         return ret;
44                 }
45
46                 public Interpreter (ushort[] program) {
47                         this.program = program;
48                         this.qs = null;
49
50                         // process info block
51
52                         Debug.Assert ((OpCode)program[0] == OpCode.Info, "Regex", "Cant' find info block");
53
54                         this.group_count = ReadProgramCount (1) + 1;
55                         this.match_min = ReadProgramCount (3);
56                         //this.match_max = ReadProgramCount (5);
57
58                         // setup
59
60                         this.program_start = 7;
61                         this.groups = new int [group_count];
62                 }
63
64                 // IMachine implementation
65
66                 public Match Scan (Regex regex, string text, int start, int end) {
67                         this.text = text;
68                         this.text_end = end;
69                         this.scan_ptr = start;
70
71                         if (Eval (Mode.Match, ref scan_ptr, program_start))
72                                 return GenerateMatch (regex);
73
74                         return Match.Empty;
75                 }
76
77                 // private methods
78
79                 private void Reset () {
80                         ResetGroups ();
81                         fast = repeat = null;
82                 }
83
84                 private bool Eval (Mode mode, ref int ref_ptr, int pc) {
85                         int ptr = ref_ptr;
86                 Begin:
87                         for (;;) {
88                                 ushort word = program[pc];
89                                 OpCode op = (OpCode)(word & 0x00ff);
90                                 OpFlags flags = (OpFlags)(word & 0xff00);
91
92                                 switch (op) {
93                                 case OpCode.Anchor: {
94                                         int skip = program[pc + 1];
95                                         
96                                         int anch_offset = program[pc + 2];
97                                         bool anch_reverse = (flags & OpFlags.RightToLeft) != 0; 
98                                         int anch_ptr = anch_reverse ?  ptr - anch_offset  : ptr + anch_offset;
99                                         int anch_end = text_end - match_min + anch_offset;      // maximum anchor position  
100                                         
101                                         
102                                         int anch_begin =  0;
103
104
105                                         // the general case for an anchoring expression is at the bottom, however we
106                                         // do some checks for the common cases before to save processing time. the current
107                                         // optimizer only outputs three types of anchoring expressions: fixed position,
108                                         // fixed substring, and no anchor.
109
110                                         OpCode anch_op = (OpCode)(program[pc + 3] & 0x00ff);                                    
111                                         if (anch_op == OpCode.Position && skip == 6) {                          // position anchor
112                                                 // Anchor
113                                                 //      Position
114                                                 //      True
115
116                                                 switch ((Position)program[pc + 4]) {
117                                                 case Position.StartOfString:
118                                                         if (anch_reverse || anch_offset == 0) {
119                                                                 if (anch_reverse)
120                                                                         ptr = anch_offset;
121                                                                 if (TryMatch (ref ptr, pc + skip))
122                                                                         goto Pass;
123                                                         }
124                                                         break;
125                                                 
126                                                 case Position.StartOfLine:
127                                                          if (anch_ptr == 0) {
128                                                                 ptr = 0;
129                                                                 if (TryMatch (ref ptr, pc + skip))
130                                                                         goto Pass;
131                                                                 
132                                                                 ++ anch_ptr;
133                                                         }
134
135                                                         while ((anch_reverse && anch_ptr >= 0) || (!anch_reverse && anch_ptr <= anch_end)) {  
136                                                                 if (anch_ptr == 0 || text[anch_ptr - 1] == '\n') {
137                                                                         if (anch_reverse)
138                                                                                 ptr = anch_ptr == anch_end ? anch_ptr : anch_ptr + anch_offset;
139                                                                         else
140                                                                                 ptr = anch_ptr == 0 ? anch_ptr : anch_ptr - anch_offset;
141                                                                         if (TryMatch (ref ptr, pc + skip))
142                                                                                 goto Pass;
143                                                                 }
144                                                         
145                                                                 if (anch_reverse)
146                                                                         -- anch_ptr;
147                                                                 else
148                                                                         ++ anch_ptr;
149                                                         }
150                                                         break;
151                                                 
152                                                 case Position.StartOfScan:
153                                                         if (anch_ptr == scan_ptr) {                                                     
154                                                                 ptr = anch_reverse ? scan_ptr + anch_offset : scan_ptr - anch_offset;
155                                                                 if (TryMatch (ref ptr, pc + skip))
156                                                                         goto Pass;
157                                                         }
158                                                         break;
159
160                                                 default:
161                                                         // FIXME
162                                                         break;
163                                                 }
164                                         }
165                                         else if (qs != null ||
166                                                 (anch_op == OpCode.String && skip == 6 + program[pc + 4])) {    // substring anchor
167                                                 // Anchor
168                                                 //      String
169                                                 //      True
170                                  
171                                                 bool reverse = ((OpFlags)program[pc + 3] & OpFlags.RightToLeft) != 0;
172                                                 string substring = GetString (pc + 3);
173
174                                                 if (qs == null) {
175                                                         bool ignore = ((OpFlags)program[pc + 3] & OpFlags.IgnoreCase) != 0;
176
177                                                         qs = new QuickSearch (substring, ignore, reverse);
178                                                 }
179                                                 while ((anch_reverse && anch_ptr >= anch_begin) 
180                                                        || (!anch_reverse && anch_ptr <= anch_end)) {
181
182                                                         if (reverse)    
183                                                         {
184                                                                 anch_ptr = qs.Search (text, anch_ptr, anch_begin);
185                                                                 if (anch_ptr != -1)
186                                                                         anch_ptr += substring.Length ;
187                                                                 
188                                                         }
189                                                         else
190                                                                 anch_ptr = qs.Search (text, anch_ptr, anch_end);
191                                                         if (anch_ptr < 0)
192                                                                 break;
193
194                                                         ptr = reverse ? anch_ptr + anch_offset : anch_ptr - anch_offset;
195                                                         if (TryMatch (ref ptr, pc + skip))
196                                                                 goto Pass;
197
198                                                         if (reverse)
199                                                                 anch_ptr -= 2;
200                                                         else 
201                                                                 ++ anch_ptr;
202                                                 }
203                                         }
204                                         else if (anch_op == OpCode.True) {                                      // no anchor
205                                                 // Anchor
206                                                 //      True
207
208                                         
209                                                 while ((anch_reverse && anch_ptr >= anch_begin) 
210                                                        || (!anch_reverse && anch_ptr <= anch_end)) {
211
212                                                         ptr = anch_ptr;
213                                                         if (TryMatch (ref ptr, pc + skip))
214                                                                 goto Pass;
215                                                         if (anch_reverse)
216                                                                 -- anch_ptr;
217                                                         else 
218                                                                 ++ anch_ptr;
219                                                 }
220                                         }
221                                         else {                                                                  // general case
222                                                 // Anchor
223                                                 //      <expr>
224                                                 //      True
225
226                                                 while ((anch_reverse && anch_ptr >= anch_begin) 
227                                                        || (!anch_reverse && anch_ptr <= anch_end)) {
228
229                                                         ptr = anch_ptr;
230                                                         if (Eval (Mode.Match, ref ptr, pc + 3)) {
231                                                                 // anchor expression passed: try real expression at the correct offset
232
233                                                                 ptr = anch_reverse ? anch_ptr + anch_offset : anch_ptr - anch_offset;
234                                                                 if (TryMatch (ref ptr, pc + skip))
235                                                                         goto Pass;
236                                                         }
237
238                                                     if (anch_reverse)
239                                                                 -- anch_ptr;
240                                                         else 
241                                                                 ++ anch_ptr;
242                                                 }
243                                         }
244
245                                         goto Fail;
246                                 }
247                                 
248                                 case OpCode.False: {
249                                         goto Fail;
250                                 }
251
252                                 case OpCode.True: {
253                                         goto Pass;
254                                 }
255
256                                 case OpCode.Position: {
257                                         if (!IsPosition ((Position)program[pc + 1], ptr))
258                                                 goto Fail;
259                                         pc += 2;
260                                         break;
261                                 }
262
263                                 case OpCode.String: {
264                                         bool reverse = (flags & OpFlags.RightToLeft) != 0;
265                                         bool ignore = (flags & OpFlags.IgnoreCase) != 0;
266                                         int len = program[pc + 1];
267
268                                         if (reverse) {
269                                                 ptr -= len;
270                                                 if (ptr < 0)
271                                                         goto Fail;
272                                         }
273                                         else 
274                                         if (ptr + len > text_end)
275                                                 goto Fail;
276
277                                         pc += 2;
278                                         for (int i = 0; i < len; ++ i) {
279                                                 char c = text[ptr + i];
280                                                 if (ignore)
281                                                         c = Char.ToLower (c);
282
283                                                 if (c != (char)program[pc ++])
284                                                         goto Fail;
285                                         }
286
287                                         if (!reverse)
288                                                 ptr += len;
289                                         break;
290                                 }
291
292                                 case OpCode.Reference: {
293                                         bool reverse = (flags & OpFlags.RightToLeft) != 0;
294                                         bool ignore = (flags & OpFlags.IgnoreCase) != 0;
295                                         int m = GetLastDefined (program [pc + 1]);
296                                         if (m < 0)
297                                                 goto Fail;
298
299                                         int str = marks [m].Index;
300                                         int len = marks [m].Length;
301
302                                         if (reverse) {
303                                                 ptr -= len;
304                                                 if (ptr < 0)
305                                                         goto Fail;
306                                         }
307                                         else if (ptr + len > text_end)
308                                                 goto Fail;
309
310                                         pc += 2;
311                                         for (int i = 0; i < len; ++ i) {
312                                                 if (ignore) {
313                                                         if (Char.ToLower (text[ptr + i]) != Char.ToLower (text[str + i]))
314                                                                 goto Fail;
315                                                 }
316                                                 else {
317                                                         if (text[ptr + i] != text[str + i])
318                                                                 goto Fail;
319                                                 }
320                                         }
321
322                                         if (!reverse)
323                                                 ptr += len;
324                                         break;
325                                 }
326
327                                 case OpCode.Character: case OpCode.Category: case OpCode.NotCategory:
328                                 case OpCode.Range: case OpCode.Set: {
329                                         if (!EvalChar (mode, ref ptr, ref pc, false))
330                                                 goto Fail;
331                                         break;
332                                 }
333
334                                 case OpCode.In: {
335                                         int target = pc + program[pc + 1];
336                                         pc += 2;
337                                         if (!EvalChar (mode, ref ptr, ref pc, true))
338                                                 goto Fail;
339
340                                         pc = target;
341                                         break;
342                                 }
343
344                                 case OpCode.Open: {
345                                         Open (program[pc + 1], ptr);
346                                         pc += 2;
347                                         break;
348                                 }
349
350                                 case OpCode.Close: {
351                                         Close (program[pc + 1], ptr);
352                                         pc += 2;
353                                         break;
354                                 }
355
356                                 case OpCode.BalanceStart: {
357
358                                         int start = ptr; //point before the balancing group
359                                         
360                                         if (!Eval (Mode.Match, ref ptr, pc + 5))
361                                                 goto Fail;
362                                         
363                                         
364                                         
365                                         if(!Balance (program[pc + 1], program[pc + 2], (program[pc + 3] == 1 ? true : false) , start)) {
366                                                 goto Fail;
367                                         }
368
369                                         
370                                         pc += program[pc + 4];
371                                         break;
372                                 }
373
374                                 case OpCode.Balance: {
375                                         goto Pass;
376                                 }
377
378                                 case OpCode.IfDefined: {
379                                         int m = GetLastDefined (program [pc + 2]);
380                                         if (m < 0)
381                                                 pc += program[pc + 1];
382                                         else
383                                                 pc += 3;
384                                         break;
385                                 }
386
387                                 case OpCode.Sub: {
388                                         if (!Eval (Mode.Match, ref ptr, pc + 2))
389                                                 goto Fail;
390
391                                         pc += program[pc + 1];
392                                         break;
393                                 }
394
395                                 case OpCode.Test: {
396                                         int cp = Checkpoint ();
397                                         int test_ptr = ptr;
398                                         if (Eval (Mode.Match, ref test_ptr, pc + 3))
399                                                 pc += program[pc + 1];
400                                         else {
401                                                 Backtrack (cp);
402                                                 pc += program[pc + 2];
403                                         }
404                                         break;
405                                 }
406
407                                 case OpCode.Branch: {
408                                         OpCode branch_op;
409                                         do {
410                                                 int cp = Checkpoint ();
411                                                 if (Eval (Mode.Match, ref ptr, pc + 2))
412                                                         goto Pass;
413                                                 
414                                                 Backtrack (cp);
415                                                 
416                                                 pc += program[pc + 1];
417                                                 branch_op = (OpCode)(program[pc] & 0xff);
418                                         } while (branch_op != OpCode.False);
419
420                                         goto Fail;
421                                 }
422
423                                 case OpCode.Jump: {
424                                         pc += program[pc + 1];
425                                         break;
426                                 }
427
428                                 case OpCode.Repeat: {
429                                         this.repeat = new RepeatContext (
430                                                 this.repeat,                    // previous context
431                                                 ReadProgramCount (pc + 2),              // minimum
432                                                 ReadProgramCount (pc + 4),              // maximum
433                                                 (flags & OpFlags.Lazy) != 0,    // lazy
434                                                 pc + 6                          // subexpression
435                                         );
436
437                                         if (Eval (Mode.Match, ref ptr, pc + program[pc + 1]))
438                                                 goto Pass;
439                                         else {
440                                                 this.repeat = this.repeat.Previous;
441                                                 goto Fail;
442                                         }
443                                 }
444
445                                 case OpCode.Until: {
446                                         RepeatContext current = this.repeat;
447
448                                         //
449                                         // Can we avoid recursion?
450                                         //
451                                         // Backtracking can be forced in nested quantifiers from the tail of this quantifier.
452                                         // Thus, we cannot, in general, use a simple loop on repeat.Expression to handle
453                                         // quantifiers.
454                                         //
455                                         // If 'deep' was unmolested, that implies that there was no nested quantifiers.
456                                         // Thus, we can safely avoid recursion.
457                                         //
458                                         if (deep == current)
459                                                 goto Pass;
460
461                                         int start = current.Start;
462                                         int start_count = current.Count;
463
464                                         while (!current.IsMinimum) {
465                                                 ++ current.Count;
466                                                 current.Start = ptr;
467                                                 deep = current;
468                                                 if (!Eval (Mode.Match, ref ptr, current.Expression)) {
469                                                         current.Start = start;
470                                                         current.Count = start_count;
471                                                         goto Fail;
472                                                 }
473                                                 if (deep != current)    // recursive mode
474                                                         goto Pass;
475                                         }
476
477                                         if (ptr == current.Start) {
478                                                 // degenerate match ... match tail or fail
479                                                 this.repeat = current.Previous;
480                                                 deep = null;
481                                                 if (Eval (Mode.Match, ref ptr, pc + 1))
482                                                         goto Pass;
483                                         
484                                                 this.repeat = current;
485                                                 goto Fail;
486                                         }
487
488                                         if (current.IsLazy) {
489                                                 for (;;) {
490                                                         // match tail first ...
491                                                         this.repeat = current.Previous;
492                                                         deep = null;
493                                                         int cp = Checkpoint ();
494                                                         if (Eval (Mode.Match, ref ptr, pc + 1))
495                                                                 goto Pass;
496
497                                                         Backtrack (cp);
498
499                                                         // ... then match more
500                                                         this.repeat = current;
501                                                         if (current.IsMaximum)
502                                                                 goto Fail;
503                                                         ++ current.Count;
504                                                         current.Start = ptr;
505                                                         deep = current;
506                                                         if (!Eval (Mode.Match, ref ptr, current.Expression)) {
507                                                                 current.Start = start;
508                                                                 current.Count = start_count;
509                                                                 goto Fail;
510                                                         }
511                                                         if (deep != current)    // recursive mode
512                                                                 goto Pass;
513                                                         // Degenerate match: ptr has not moved since the last (failed) tail match.
514                                                         // So, next and subsequent tail matches will fail.
515                                                         if (ptr == current.Start)
516                                                                 goto Fail;
517                                                 }
518                                         } else {
519                                                 int stack_size = stack.Count;
520
521                                                 // match greedily as much as possible
522                                                 while (!current.IsMaximum) {
523                                                         int cp = Checkpoint ();
524                                                         int old_ptr = ptr;
525                                                         int old_start = current.Start;
526
527                                                         ++ current.Count;
528                                                         current.Start = ptr;
529                                                         deep = current;
530                                                         if (!Eval (Mode.Match, ref ptr, current.Expression)) {
531                                                                 -- current.Count;
532                                                                 current.Start = old_start;
533                                                                 Backtrack (cp);
534                                                                 break;
535                                                         }
536                                                         if (deep != current) {
537                                                                 // recursive mode: no more backtracking, truncate the stack
538                                                                 stack.Count = stack_size;
539                                                                 goto Pass;
540                                                         }
541                                                         stack.Push (cp);
542                                                         stack.Push (old_ptr);
543
544                                                         // Degenerate match: no point going on
545                                                         if (ptr == current.Start)
546                                                                 break;
547                                                 }
548
549                                                 // then, match the tail, backtracking as necessary.
550                                                 this.repeat = current.Previous;
551                                                 for (;;) {
552                                                         deep = null;
553                                                         if (Eval (Mode.Match, ref ptr, pc + 1)) {
554                                                                 stack.Count = stack_size;
555                                                                 goto Pass;
556                                                         }
557                                                         if (stack.Count == stack_size) {
558                                                                 this.repeat = current;
559                                                                 goto Fail;
560                                                         }
561
562                                                         --current.Count;
563                                                         ptr = stack.Pop ();
564                                                         Backtrack (stack.Pop ());
565                                                 }
566                                         }
567                                 }
568
569                                 case OpCode.FastRepeat: {
570                                         this.fast = new RepeatContext (
571                                                 fast,
572                                                 ReadProgramCount (pc + 2),              // minimum
573                                                 ReadProgramCount (pc + 4),              // maximum
574                                                 (flags & OpFlags.Lazy) != 0,    // lazy
575                                                 pc + 6                          // subexpression
576                                         );
577
578                                         fast.Start = ptr;
579
580                                         int cp = Checkpoint ();
581
582                                         pc += program[pc + 1];          // tail expression
583                                         ushort tail_word = program[pc];
584
585                                         int c1 = -1;            // first character of tail operator
586                                         int c2 = -1;            // ... and the same character, in upper case if ignoring case
587                                         int coff = 0;           // 0 or -1 depending on direction
588
589                                         OpCode tail_op = (OpCode)(tail_word & 0xff);
590                                         if (tail_op == OpCode.Character || tail_op == OpCode.String) {
591                                                 OpFlags tail_flags = (OpFlags)(tail_word & 0xff00);
592
593                                                 if ((tail_flags & OpFlags.Negate) != 0)
594                                                         goto skip;
595
596                                                 if (tail_op == OpCode.String)
597                                                 {
598                                                         int offset = 0;
599                                                 
600                                                         if ((tail_flags & OpFlags.RightToLeft) != 0)
601                                                         {
602                                                                 offset = program[pc + 1] - 1 ;
603                                                         }
604                                                           
605                                                         c1 = program[pc + 2 + offset];                          // first char of string
606                                                 }
607                                                 else
608                                                         c1 = program[pc + 1];                           // character
609                                                 
610                                                 if ((tail_flags & OpFlags.IgnoreCase) != 0)
611                                                         c2 = Char.ToUpper ((char)c1);                   // ignore case
612                                                 else
613                                                         c2 = c1;
614
615                                                 if ((tail_flags & OpFlags.RightToLeft) != 0)
616                                                         coff = -1;                                      // reverse
617                                                 else
618                                                         coff = 0;
619                                         }
620
621                                 skip:
622                                         if (fast.IsLazy) {
623                                                 if (!fast.IsMinimum && !Eval (Mode.Count, ref ptr, fast.Expression)) {
624                                                         //Console.WriteLine ("lazy fast: failed mininum.");
625                                                         fast = fast.Previous;
626                                                         goto Fail;
627                                                 }
628                                                 
629                                                 while (true) {
630                                                         int p = ptr + coff;
631                                                         if (c1 < 0 || (p >= 0 && p < text_end && (c1 == text[p] || c2 == text[p]))) {
632                                                                 deep = null;
633                                                                 if (Eval (Mode.Match, ref ptr, pc))
634                                                                         break;
635                                                         }
636
637                                                         if (fast.IsMaximum) {
638                                                                 //Console.WriteLine ("lazy fast: failed with maximum.");
639                                                                 fast = fast.Previous;
640                                                                 goto Fail;
641                                                         }
642
643                                                         Backtrack (cp);
644                                                         if (!Eval (Mode.Count, ref ptr, fast.Expression)) {
645                                                                 //Console.WriteLine ("lazy fast: no more.");
646                                                                 fast = fast.Previous;
647                                                                 goto Fail;
648                                                         }
649                                                 }
650                                                 fast = fast.Previous;
651                                                 goto Pass;
652                                         }
653                                         else {
654                                                 if (!Eval (Mode.Count, ref ptr, fast.Expression)) {
655                                                         fast = fast.Previous;
656                                                         goto Fail;
657                                                 }
658                                         
659                                                 int width;
660                                                 if (fast.Count > 0)
661                                                         width = (ptr - fast.Start) / fast.Count;
662                                                 else
663                                                         width = 0;
664
665                                                 while (true) {
666                                                         int p = ptr + coff;
667                                                         if (c1 < 0 || (p >= 0 && p < text_end && (c1 == text[p] || c2 == text[p]))) {
668                                                                 deep = null;
669                                                                 if (Eval (Mode.Match, ref ptr, pc))
670                                                                         break;
671                                                         }
672
673                                                         -- fast.Count;
674                                                         if (!fast.IsMinimum) {
675                                                                 fast = fast.Previous;
676                                                                 goto Fail;
677                                                         }
678
679                                                         ptr -= width;
680                                                         Backtrack (cp);
681                                                 }
682                                                 fast = fast.Previous;
683                                                 goto Pass;
684                                         }
685                                 }
686
687                                 case OpCode.Info: {
688                                         Debug.Assert (false, "Regex", "Info block found in pattern");
689                                         goto Fail;
690                                 }
691                                 }
692                         }
693                 Pass:
694                         ref_ptr = ptr;
695
696                         switch (mode) {
697                         case Mode.Match:
698                                 return true;
699
700                         case Mode.Count: {
701                                 ++ fast.Count;
702                                 if (fast.IsMaximum || (fast.IsLazy && fast.IsMinimum))
703                                         return true;
704
705                                 pc = fast.Expression;
706                                 goto Begin;
707                         }
708                         }
709
710                 Fail:
711                         switch (mode) {
712                         case Mode.Match:
713                                 return false;
714
715                         case Mode.Count: {
716                                 if (!fast.IsLazy && fast.IsMinimum)
717                                         return true;
718
719                                 ref_ptr = fast.Start;
720                                 return false;
721                         }
722                         }
723
724                         return false;
725                 }
726
727                 private bool EvalChar (Mode mode, ref int ptr, ref int pc, bool multi) {
728                         bool consumed = false;
729                         char c = '\0';
730                         bool negate;
731                         bool ignore;
732                 
733                         do {
734                                 ushort word = program[pc];
735                                 OpCode op = (OpCode)(word & 0x00ff);
736                                 OpFlags flags = (OpFlags)(word & 0xff00);
737
738                                 ++ pc;
739
740                                 ignore = (flags & OpFlags.IgnoreCase) != 0;
741                                 
742                                 // consume character: the direction of an In construct is
743                                 // determined by the direction of its first op
744
745                                 if (!consumed) {
746                                         if ((flags & OpFlags.RightToLeft) != 0) {
747                                                 if (ptr <= 0)
748                                                         return false;
749
750                                                 c = text[-- ptr];
751                                         }
752                                         else {
753                                                 if (ptr >= text_end)
754                                                         return false;
755
756                                                 c = text[ptr ++];
757                                         }
758
759                                         if (ignore)
760                                                 c = Char.ToLower (c);
761
762                                         consumed = true;
763                                 }
764
765                                 // negate flag
766
767                                 negate = (flags & OpFlags.Negate) != 0;
768
769                                 // execute op
770                                 
771                                 switch (op) {
772                                 case OpCode.True:
773                                         return true;
774
775                                 case OpCode.False:
776                                         return false;
777                                 
778                                 case OpCode.Character: {
779                                         if (c == (char)program[pc ++])
780                                                 return !negate;
781                                         break;
782                                 }
783
784                                 case OpCode.Category: {
785                                         if (CategoryUtils.IsCategory ((Category)program[pc ++], c))
786                                                 return !negate;
787                                         break;
788                                 }
789
790                                 case OpCode.NotCategory: {
791                                         if (!CategoryUtils.IsCategory ((Category)program[pc ++], c))
792                                                 return !negate;
793                                         break;
794                                 }
795                                 
796                                 case OpCode.Range: {
797                                         int lo = (char)program[pc ++];
798                                         int hi = (char)program[pc ++];
799                                         if (lo <= c && c <= hi)
800                                                 return !negate;
801                                         break;
802                                 }
803
804                                 case OpCode.Set: {
805                                         int lo = (char)program[pc ++];
806                                         int len = (char)program[pc ++];
807                                         int bits = pc;
808                                         pc += len;
809
810                                         int i = (int)c - lo;
811                                         if (i < 0 || i >= len << 4)
812                                                 break;
813                                                 
814
815                                         if ((program[bits + (i >> 4)] & (1 << (i & 0xf))) != 0)
816                                                 return !negate;
817                                         break;
818                                 }
819                                 }
820                         } while (multi);
821
822                         return negate;
823                 }
824
825                 private bool TryMatch (ref int ref_ptr, int pc) {
826                         Reset ();
827                         
828                         int ptr = ref_ptr;
829                         marks [groups [0]].Start = ptr;
830                         if (Eval (Mode.Match, ref ptr, pc)) {
831                                 marks [groups [0]].End = ptr;
832                                 ref_ptr = ptr;
833                                 return true;
834                         }
835
836                         return false;
837                 }
838                 
839                 private bool IsPosition (Position pos, int ptr) {
840                         switch (pos) {
841                         case Position.Start: case Position.StartOfString:
842                                 return ptr == 0;
843
844                         case Position.StartOfLine:
845                                 return ptr == 0 || text[ptr - 1] == '\n';
846                                 
847                         case Position.StartOfScan:
848                                 return ptr == scan_ptr;
849                         
850                         case Position.End:
851                                 return ptr == text_end ||
852                                         (ptr == text_end - 1 && text[ptr] == '\n');
853
854                         case Position.EndOfLine:
855                                 return ptr == text_end || text[ptr] == '\n';
856                                 
857                         case Position.EndOfString:
858                                 return ptr == text_end;
859                                 
860                         case Position.Boundary:
861                                 if (text_end == 0)
862                                         return false;
863
864                                 if (ptr == 0)
865                                         return IsWordChar (text[ptr]);
866                                 else if (ptr == text_end)
867                                         return IsWordChar (text[ptr - 1]);
868                                 else
869                                         return IsWordChar (text[ptr]) != IsWordChar (text[ptr - 1]);
870
871                         case Position.NonBoundary:
872                                 if (text_end == 0)
873                                         return false;
874
875                                 if (ptr == 0)
876                                         return !IsWordChar (text[ptr]);
877                                 else if (ptr == text_end)
878                                         return !IsWordChar (text[ptr - 1]);
879                                 else
880                                         return IsWordChar (text[ptr]) == IsWordChar (text[ptr - 1]);
881                         
882                         default:
883                                 return false;
884                         }
885                 }
886
887                 private bool IsWordChar (char c) {
888                         return CategoryUtils.IsCategory (Category.Word, c);
889                 }
890
891                 private string GetString (int pc) {
892                         int len = program[pc + 1];
893                         int str = pc + 2;
894
895                         char[] cs = new char[len];
896                         for (int i = 0; i < len; ++ i)
897                                 cs[i] = (char)program[str ++];
898
899                         return new string (cs);
900                 }
901
902                 // capture management
903
904                 private void Open (int gid, int ptr) {
905                         int m = groups [gid];
906                         if (m < mark_start || marks [m].IsDefined) {
907                                 m = CreateMark (m);
908                                 groups [gid] = m;
909                         }
910
911                         marks [m].Start = ptr;
912                 }
913
914                 private void Close (int gid, int ptr) {
915                         marks [groups [gid]].End = ptr;
916                 }
917
918                 private bool Balance (int gid, int balance_gid, bool capture, int ptr) {
919                         int b = groups [balance_gid];
920
921                         if(b == -1 || marks[b].Index < 0) {
922                                 //Group not previously matched
923                                 return false;
924                         }
925
926                         Debug.Assert (marks [b].IsDefined, "Regex", "Balancng group not closed");
927
928                         if (gid > 0 && capture){ 
929                                 Open (gid, marks [b].Index + marks [b].Length);
930                                 Close (gid, ptr);
931                         }
932
933                         groups [balance_gid] = marks[b].Previous;
934
935                         return true;
936                 }
937
938                 private int Checkpoint () {
939                         mark_start = mark_end;
940                         return mark_start;
941                 }
942
943                 private void Backtrack (int cp) {
944                         Debug.Assert (cp > mark_start, "Regex", "Attempt to backtrack forwards");
945
946                         for (int i = 0; i < groups.Length; ++ i) {
947                                 int m = groups [i];
948                                 while (cp <= m)
949                                         m = marks [m].Previous;
950
951                                 groups [i] = m;
952                         }
953                 }
954
955                 private void ResetGroups () {
956                         int n = groups.Length;
957                         if (marks == null)
958                                 marks = new Mark [n * 10];
959
960                         for (int i = 0; i < n; ++ i) {
961                                 groups [i] = i;
962
963                                 marks [i].Start = -1;
964                                 marks [i].End = -1;
965                                 marks [i].Previous = -1;
966                         }
967                         
968                         mark_start = 0;
969                         mark_end = n;
970                 }
971
972                 private int GetLastDefined (int gid) {
973                         int m = groups [gid];
974                         while (m >= 0 && !marks [m].IsDefined)
975                                 m = marks [m].Previous;
976
977                         return m;
978                 }
979
980                 private int CreateMark (int previous) {
981                         if (mark_end == marks.Length) {
982                                 Mark [] dest = new Mark [marks.Length * 2];
983                                 marks.CopyTo (dest, 0);
984                                 marks = dest;
985                         }
986
987                         int m = mark_end ++;
988                         marks [m].Start = marks [m].End = -1;
989                         marks [m].Previous = previous;
990
991                         return m;
992                 }
993
994                 private void GetGroupInfo (int gid, out int first_mark_index, out int n_caps)
995                 {
996                         first_mark_index = -1;
997                         n_caps = 0;
998                         for (int m = groups [gid]; m >= 0; m = marks [m].Previous) {
999                                 if (!marks [m].IsDefined)
1000                                         continue;
1001                                 if (first_mark_index < 0)
1002                                         first_mark_index = m;
1003                                 ++n_caps;
1004                         }
1005                 }
1006
1007                 private void PopulateGroup (Group g, int first_mark_index, int n_caps)
1008                 {
1009                         int i = 1;
1010                         for (int m = marks [first_mark_index].Previous; m >= 0; m = marks [m].Previous) {
1011                                 if (!marks [m].IsDefined)
1012                                         continue;
1013                                 Capture cap = new Capture (text, marks [m].Index, marks [m].Length);
1014                                 g.Captures.SetValue (cap, n_caps - 1 - i);
1015                                 ++i;
1016                         }
1017                 }
1018
1019                 private Match GenerateMatch (Regex regex)
1020                 {
1021                         int n_caps, first_mark_index;
1022                         Group g;
1023                         GetGroupInfo (0, out first_mark_index, out n_caps);
1024                         Match retval = new Match (regex, this, text, text_end, groups.Length, 
1025                                                   marks [first_mark_index].Index, marks [first_mark_index].Length, n_caps);
1026                         PopulateGroup (retval, first_mark_index, n_caps);
1027
1028                         for (int gid = 1; gid < groups.Length; ++ gid) {
1029                                 GetGroupInfo (gid, out first_mark_index, out n_caps);
1030                                 if (first_mark_index < 0) {
1031                                         g = Group.Fail;
1032                                 } else {
1033                                         g = new Group (text, marks [first_mark_index].Index, marks [first_mark_index].Length, n_caps);
1034                                         PopulateGroup (g, first_mark_index, n_caps);
1035                                 }
1036                                 retval.Groups.SetValue (g, gid);
1037                         }
1038                         return retval;
1039                 }
1040
1041                 // interpreter attributes
1042
1043                 private ushort[] program;               // regex program
1044                 private int program_start;              // first instruction after info block
1045                 private string text;                    // input text
1046                 private int text_end;                   // end of input text (last character + 1)
1047                 private int group_count;                // number of capturing groups
1048                 private int match_min;//, match_max;    // match width information
1049                 private QuickSearch qs;                 // fast substring matcher
1050
1051                 // match state
1052                 
1053                 private int scan_ptr;                   // start of scan
1054
1055                 private RepeatContext repeat;           // current repeat context
1056                 private RepeatContext fast;             // fast repeat context
1057
1058                 // Repeat/Until handling
1059                 private IntStack stack = new IntStack (); // utility stack
1060                 private RepeatContext deep;             // points to the most-nested repeat context
1061
1062                 private Mark[] marks = null;            // mark stack
1063                 private int mark_start;                 // start of current checkpoint
1064                 private int mark_end;                   // end of checkpoint/next free mark
1065
1066                 private int[] groups;                   // current group definitions
1067
1068                 // private classes
1069
1070                 private struct IntStack {
1071                         int [] values;
1072                         int count;
1073                         public int Pop ()
1074                         {
1075                                 return values [--count];
1076                         }
1077                         public void Push (int value)
1078                         {
1079                                 if (values == null) {
1080                                         values = new int [8];
1081                                 } else if (count == values.Length) {
1082                                         int new_size = values.Length;
1083                                         new_size += new_size >> 1;
1084                                         int [] new_values = new int [new_size];
1085                                         for (int i = 0; i < count; ++i)
1086                                                 new_values [i] = values [i];
1087                                         values = new_values;
1088                                 }
1089                                 values [count++] = value;
1090                         }
1091                         public int Top {
1092                                 get { return values [count - 1]; }
1093                         }
1094                         public int Count {
1095                                 get { return count; }
1096                                 set {
1097                                         if (value > count)
1098                                                 throw new SystemException ("can only truncate the stack");
1099                                         count = value;
1100                                 }
1101                         }
1102                 }
1103
1104                 private class RepeatContext {
1105                         public RepeatContext (RepeatContext previous, int min, int max, bool lazy, int expr_pc) {
1106                                 this.previous = previous;
1107                                 this.min = min;
1108                                 this.max = max;
1109                                 this.lazy = lazy;
1110                                 this.expr_pc = expr_pc;
1111                                 
1112                                 this.start = -1;
1113                                 this.count = 0;
1114                         }
1115
1116                         public int Count {
1117                                 get { return count; }
1118                                 set { count = value; }
1119                         }
1120
1121                         public int Start {
1122                                 get { return start; }
1123                                 set { start = value; }
1124                         }
1125
1126                         public bool IsMinimum {
1127                                 get { return min <= count; }
1128                         }
1129
1130                         public bool IsMaximum {
1131                                 get { return max <= count; }
1132                         }
1133
1134                         public bool IsLazy {
1135                                 get { return lazy; }
1136                         }
1137
1138                         public int Expression {
1139                                 get { return expr_pc; }
1140                         }
1141
1142                         public RepeatContext Previous {
1143                                 get { return previous; }
1144                         }
1145                 
1146                         private int start;
1147                         private int min, max;
1148                         private bool lazy;
1149                         private int expr_pc;
1150                         private RepeatContext previous;
1151
1152                         private int count;
1153                 }
1154
1155                 private enum Mode {
1156                         Search,
1157                         Match,
1158                         Count
1159                 }
1160         }
1161 }