2003-11-27 Gonzalo Paniagua Javier <gonzalo@ximian.com>
[mono.git] / mcs / class / System / System.Text.RegularExpressions / interpreter.cs
1 //\r
2 // assembly:    System\r
3 // namespace:   System.Text.RegularExpressions\r
4 // file:        interpreter.cs\r
5 //\r
6 // author:      Dan Lewis (dlewis@gmx.co.uk)\r
7 //              (c) 2002\r
8 \r
9 using System;\r
10 using System.Collections;\r
11 using System.Diagnostics;\r
12 using System.Globalization;\r
13 \r
14 namespace System.Text.RegularExpressions {\r
15 \r
16         class Interpreter : IMachine {\r
17                 public Interpreter (ushort[] program) {\r
18                         this.program = program;\r
19                         this.qs = null;\r
20 \r
21                         // process info block\r
22 \r
23                         Debug.Assert ((OpCode)program[0] == OpCode.Info, "Regex", "Cant' find info block");\r
24 \r
25                         this.group_count = program[1] + 1;\r
26                         this.match_min = program[2];\r
27                         this.match_max = program[3];\r
28 \r
29                         // setup\r
30 \r
31                         this.program_start = 4;\r
32                         this.groups = new int [group_count];\r
33                 }\r
34 \r
35                 // IMachine implementation\r
36 \r
37                 public Match Scan (Regex regex, string text, int start, int end) {\r
38                         this.text = text;\r
39                         this.text_end = end;\r
40                         this.scan_ptr = start;\r
41 \r
42                         if (Eval (Mode.Match, ref scan_ptr, program_start))\r
43                                 return GenerateMatch (regex);\r
44 \r
45                         return Match.Empty;\r
46                 }\r
47 \r
48                 // private methods\r
49 \r
50                 private void Reset () {\r
51                         ResetGroups ();\r
52                         fast = repeat = null;\r
53                 }\r
54 \r
55                 private bool Eval (Mode mode, ref int ref_ptr, int pc) {\r
56                         int ptr = ref_ptr;\r
57                 Begin:\r
58                         for (;;) {\r
59                                 ushort word = program[pc];\r
60                                 OpCode op = (OpCode)(word & 0x00ff);\r
61                                 OpFlags flags = (OpFlags)(word & 0xff00);\r
62 \r
63                                 switch (op) {\r
64                                 case OpCode.Anchor: {\r
65                                         int skip = program[pc + 1];\r
66 \r
67                                         int anch_offset = program[pc + 2];\r
68                                         int anch_ptr = ptr + anch_offset;\r
69                                         int anch_end = text_end - match_min + anch_offset;      // maximum anchor position\r
70 \r
71                                         // the general case for an anchoring expression is at the bottom, however we\r
72                                         // do some checks for the common cases before to save processing time. the current\r
73                                         // optimizer only outputs three types of anchoring expressions: fixed position,\r
74                                         // fixed substring, and no anchor.\r
75 \r
76                                         OpCode anch_op = (OpCode)(program[pc + 3] & 0x00ff);\r
77                                         if (anch_op == OpCode.Position && skip == 6) {                          // position anchor\r
78                                                 // Anchor\r
79                                                 //      Position\r
80                                                 //      True\r
81 \r
82                                                 switch ((Position)program[pc + 4]) {\r
83                                                 case Position.StartOfString:\r
84                                                         if (anch_ptr == 0) {\r
85                                                                 ptr = 0;\r
86                                                                 if (TryMatch (ref ptr, pc + skip))\r
87                                                                         goto Pass;\r
88                                                         }\r
89                                                         break;\r
90                                                 \r
91                                                 case Position.StartOfLine:\r
92                                                         if (anch_ptr == 0) {\r
93                                                                 ptr = 0;\r
94                                                                 if (TryMatch (ref ptr, pc + skip))\r
95                                                                         goto Pass;\r
96 \r
97                                                                 ++ anch_ptr;\r
98                                                         }\r
99 \r
100                                                         while (anch_ptr <= anch_end) {\r
101                                                                 if (text[anch_ptr - 1] == '\n') {\r
102                                                                         ptr = anch_ptr - anch_offset;\r
103                                                                         if (TryMatch (ref ptr, pc + skip))\r
104                                                                                 goto Pass;\r
105                                                                 }\r
106 \r
107                                                                 ++ anch_ptr;\r
108                                                         }\r
109                                                         break;\r
110                                                 \r
111                                                 case Position.StartOfScan:\r
112                                                         if (anch_ptr == scan_ptr) {\r
113                                                                 ptr = scan_ptr - anch_offset;\r
114                                                                 if (TryMatch (ref ptr, pc + skip))\r
115                                                                         goto Pass;\r
116                                                         }\r
117                                                         break;\r
118 \r
119                                                 default:\r
120                                                         // FIXME\r
121                                                         break;\r
122                                                 }\r
123                                         }\r
124                                         else if (qs != null ||\r
125                                                 (anch_op == OpCode.String && skip == 6 + program[pc + 4])) {    // substring anchor\r
126                                                 // Anchor\r
127                                                 //      String\r
128                                                 //      True\r
129 \r
130                                                 if (qs == null) {\r
131                                                         bool ignore = ((OpFlags)program[pc + 3] & OpFlags.IgnoreCase) != 0;\r
132                                                         string substring = GetString (pc + 3);\r
133 \r
134                                                         qs = new QuickSearch (substring, ignore);\r
135                                                 }\r
136 \r
137                                                 while (anch_ptr <= anch_end) {\r
138                                                         anch_ptr = qs.Search (text, anch_ptr, anch_end);\r
139                                                         if (anch_ptr < 0)\r
140                                                                 break;\r
141 \r
142                                                         ptr = anch_ptr - anch_offset;\r
143                                                         if (TryMatch (ref ptr, pc + skip))\r
144                                                                 goto Pass;\r
145 \r
146                                                         ++ anch_ptr;\r
147                                                 }\r
148                                         }\r
149                                         else if (anch_op == OpCode.True) {                                      // no anchor\r
150                                                 // Anchor\r
151                                                 //      True\r
152 \r
153                                                 while (anch_ptr <= anch_end) {\r
154                                                         ptr = anch_ptr;\r
155                                                         if (TryMatch (ref ptr, pc + skip))\r
156                                                                 goto Pass;\r
157 \r
158                                                         ++ anch_ptr;\r
159                                                 }\r
160                                         }\r
161                                         else {                                                                  // general case\r
162                                                 // Anchor\r
163                                                 //      <expr>\r
164                                                 //      True\r
165 \r
166                                                 while (anch_ptr <= anch_end) {\r
167                                                         ptr = anch_ptr;\r
168                                                         if (Eval (Mode.Match, ref ptr, pc + 3)) {\r
169                                                                 // anchor expression passed: try real expression at the correct offset\r
170 \r
171                                                                 ptr = anch_ptr - anch_offset;\r
172                                                                 if (TryMatch (ref ptr, pc + skip))\r
173                                                                         goto Pass;\r
174                                                         }\r
175 \r
176                                                         ++ anch_ptr;\r
177                                                 }\r
178                                         }\r
179 \r
180                                         goto Fail;\r
181                                 }\r
182                                 \r
183                                 case OpCode.False: {\r
184                                         goto Fail;\r
185                                 }\r
186 \r
187                                 case OpCode.True: {\r
188                                         goto Pass;\r
189                                 }\r
190 \r
191                                 case OpCode.Position: {\r
192                                         if (!IsPosition ((Position)program[pc + 1], ptr))\r
193                                                 goto Fail;\r
194                                         pc += 2;\r
195                                         break;\r
196                                 }\r
197 \r
198                                 case OpCode.String: {\r
199                                         bool reverse = (flags & OpFlags.RightToLeft) != 0;\r
200                                         bool ignore = (flags & OpFlags.IgnoreCase) != 0;\r
201                                         int len = program[pc + 1];\r
202 \r
203                                         if (reverse) {\r
204                                                 ptr -= len;\r
205                                                 if (ptr < 0)\r
206                                                         goto Fail;\r
207                                         }\r
208                                         else if (ptr + len > text_end)\r
209                                                 goto Fail;\r
210 \r
211                                         pc += 2;\r
212                                         for (int i = 0; i < len; ++ i) {\r
213                                                 char c = text[ptr + i];\r
214                                                 if (ignore)\r
215                                                         c = Char.ToLower (c);\r
216 \r
217                                                 if (c != (char)program[pc ++])\r
218                                                         goto Fail;\r
219                                         }\r
220 \r
221                                         if (!reverse)\r
222                                                 ptr += len;\r
223                                         break;\r
224                                 }\r
225 \r
226                                 case OpCode.Reference: {\r
227                                         bool reverse = (flags & OpFlags.RightToLeft) != 0;\r
228                                         bool ignore = (flags & OpFlags.IgnoreCase) != 0;\r
229                                         int m = GetLastDefined (program [pc + 1]);\r
230                                         if (m < 0)\r
231                                                 goto Fail;\r
232 \r
233                                         int str = marks [m].Index;\r
234                                         int len = marks [m].Length;\r
235 \r
236                                         if (reverse) {\r
237                                                 ptr -= len;\r
238                                                 if (ptr < 0)\r
239                                                         goto Fail;\r
240                                         }\r
241                                         else if (ptr + len > text_end)\r
242                                                 goto Fail;\r
243 \r
244                                         pc += 2;\r
245                                         for (int i = 0; i < len; ++ i) {\r
246                                                 if (ignore) {\r
247                                                         if (Char.ToLower (text[ptr + i]) != Char.ToLower (text[str + i]))\r
248                                                                 goto Fail;\r
249                                                 }\r
250                                                 else {\r
251                                                         if (text[ptr + i] != text[str + i])\r
252                                                                 goto Fail;\r
253                                                 }\r
254                                         }\r
255 \r
256                                         if (!reverse)\r
257                                                 ptr += len;\r
258                                         break;\r
259                                 }\r
260 \r
261                                 case OpCode.Character: case OpCode.Category:\r
262                                 case OpCode.Range: case OpCode.Set: {\r
263                                         if (!EvalChar (mode, ref ptr, ref pc, false))\r
264                                                 goto Fail;\r
265                                         break;\r
266                                 }\r
267 \r
268                                 case OpCode.In: {\r
269                                         int target = pc + program[pc + 1];\r
270                                         pc += 2;\r
271                                         if (!EvalChar (mode, ref ptr, ref pc, true))\r
272                                                 goto Fail;\r
273 \r
274                                         pc = target;\r
275                                         break;\r
276                                 }\r
277 \r
278                                 case OpCode.Open: {\r
279                                         Open (program[pc + 1], ptr);\r
280                                         pc += 2;\r
281                                         break;\r
282                                 }\r
283 \r
284                                 case OpCode.Close: {\r
285                                         Close (program[pc + 1], ptr);\r
286                                         pc += 2;\r
287                                         break;\r
288                                 }\r
289 \r
290                                 case OpCode.Balance: {\r
291                                         Balance (program[pc + 1], program[pc + 2], ptr);\r
292                                         break;\r
293                                 }\r
294 \r
295                                 case OpCode.IfDefined: {\r
296                                         int m = GetLastDefined (program [pc + 2]);\r
297                                         if (m < 0)\r
298                                                 pc += program[pc + 1];\r
299                                         else\r
300                                                 pc += 3;\r
301                                         break;\r
302                                 }\r
303 \r
304                                 case OpCode.Sub: {\r
305                                         if (!Eval (Mode.Match, ref ptr, pc + 2))\r
306                                                 goto Fail;\r
307 \r
308                                         pc += program[pc + 1];\r
309                                         break;\r
310                                 }\r
311 \r
312                                 case OpCode.Test: {\r
313                                         int cp = Checkpoint ();\r
314                                         int test_ptr = ptr;\r
315                                         if (Eval (Mode.Match, ref test_ptr, pc + 3))\r
316                                                 pc += program[pc + 1];\r
317                                         else {\r
318                                                 Backtrack (cp);\r
319                                                 pc += program[pc + 2];\r
320                                         }\r
321                                         break;\r
322                                 }\r
323 \r
324                                 case OpCode.Branch: {\r
325                                         OpCode branch_op;\r
326                                         do {\r
327                                                 int cp = Checkpoint ();\r
328                                                 if (Eval (Mode.Match, ref ptr, pc + 2))\r
329                                                         goto Pass;\r
330                                                 \r
331                                                 Backtrack (cp);\r
332                                                 \r
333                                                 pc += program[pc + 1];\r
334                                                 branch_op = (OpCode)(program[pc] & 0xff);\r
335                                         } while (branch_op != OpCode.False);\r
336 \r
337                                         goto Fail;\r
338                                 }\r
339 \r
340                                 case OpCode.Jump: {\r
341                                         pc += program[pc + 1];\r
342                                         break;\r
343                                 }\r
344 \r
345                                 case OpCode.Repeat: {\r
346                                         this.repeat = new RepeatContext (\r
347                                                 this.repeat,                    // previous context\r
348                                                 program[pc + 2],                // minimum\r
349                                                 program[pc + 3],                // maximum\r
350                                                 (flags & OpFlags.Lazy) != 0,    // lazy\r
351                                                 pc + 4                          // subexpression\r
352                                         );\r
353 \r
354                                         if (Eval (Mode.Match, ref ptr, pc + program[pc + 1]))\r
355                                                 goto Pass;\r
356                                         else {\r
357                                                 this.repeat = this.repeat.Previous;\r
358                                                 goto Fail;\r
359                                         }\r
360                                 }\r
361 \r
362                                 case OpCode.Until: {\r
363                                         RepeatContext current = this.repeat;\r
364                                         int start = current.Start;\r
365 \r
366                                         if (!current.IsMinimum) {\r
367                                                 ++ current.Count;\r
368                                                 current.Start = ptr;\r
369                                                 if (Eval (Mode.Match, ref ptr, repeat.Expression))\r
370                                                         goto Pass;\r
371 \r
372                                                 current.Start = start;\r
373                                                 -- current.Count;\r
374                                                 goto Fail;\r
375                                         }\r
376 \r
377                                         if (ptr == current.Start) {\r
378                                                 // degenerate match ... match tail or fail\r
379 \r
380                                                 this.repeat = current.Previous;\r
381                                                 if (Eval (Mode.Match, ref ptr, pc + 1))\r
382                                                         goto Pass;\r
383                                         \r
384                                                 this.repeat = current;\r
385                                                 goto Fail;\r
386                                         }\r
387 \r
388                                         if (current.IsLazy) {\r
389                                                 // match tail first ...\r
390 \r
391                                                 this.repeat = current.Previous;\r
392                                                 int cp = Checkpoint ();\r
393                                                 if (Eval (Mode.Match, ref ptr, pc + 1))\r
394                                                         goto Pass;\r
395 \r
396                                                 Backtrack (cp);\r
397 \r
398                                                 // ... then match more\r
399 \r
400                                                 this.repeat = current;\r
401                                                 if (!current.IsMaximum) {\r
402                                                         ++ current.Count;\r
403                                                         current.Start = ptr;\r
404                                                         if (Eval (Mode.Match, ref ptr, current.Expression))\r
405                                                                 goto Pass;\r
406 \r
407                                                         current.Start = start;\r
408                                                         -- current.Count;\r
409                                                         goto Fail;\r
410                                                 }\r
411 \r
412                                                 return false;\r
413                                         }\r
414                                         else {\r
415                                                 // match more first ...\r
416 \r
417                                                 if (!current.IsMaximum) {\r
418                                                         int cp = Checkpoint ();\r
419                                                         ++ current.Count;\r
420                                                         current.Start = ptr;\r
421                                                         if (Eval (Mode.Match, ref ptr, current.Expression))\r
422                                                                 goto Pass;\r
423 \r
424                                                         current.Start = start;\r
425                                                         -- current.Count;\r
426                                                         Backtrack (cp);\r
427                                                 }\r
428 \r
429                                                 // ... then match tail\r
430 \r
431                                                 this.repeat = current.Previous;\r
432                                                 if (Eval (Mode.Match, ref ptr, pc + 1))\r
433                                                         goto Pass;\r
434 \r
435                                                 this.repeat = current;\r
436                                                 goto Fail;\r
437                                         }\r
438                                 }\r
439 \r
440                                 case OpCode.FastRepeat: {\r
441                                         this.fast = new RepeatContext (\r
442                                                 fast,\r
443                                                 program[pc + 2],                // minimum\r
444                                                 program[pc + 3],                // maximum\r
445                                                 (flags & OpFlags.Lazy) != 0,    // lazy\r
446                                                 pc + 4                          // subexpression\r
447                                         );\r
448                                         fast.Start = ptr;\r
449 \r
450                                         int cp = Checkpoint ();\r
451 \r
452                                         pc += program[pc + 1];          // tail expression\r
453                                         ushort tail_word = program[pc];\r
454 \r
455                                         int c1, c2;                     // first character of tail operator\r
456                                         int coff;                       // 0 or -1 depending on direction\r
457 \r
458                                         OpCode tail_op = (OpCode)(tail_word & 0xff);\r
459                                         if (tail_op == OpCode.Character || tail_op == OpCode.String) {\r
460                                                 OpFlags tail_flags = (OpFlags)(tail_word & 0xff00);\r
461 \r
462                                                 if (tail_op == OpCode.String)\r
463                                                         c1 = program[pc + 2];                           // first char of string\r
464                                                 else\r
465                                                         c1 = program[pc + 1];                           // character\r
466                                                 \r
467                                                 if ((tail_flags & OpFlags.IgnoreCase) != 0)\r
468                                                         c2 = Char.ToUpper ((char)c1);                   // ignore case\r
469                                                 else\r
470                                                         c2 = c1;\r
471 \r
472                                                 if ((tail_flags & OpFlags.RightToLeft) != 0)\r
473                                                         coff = -1;                                      // reverse\r
474                                                 else\r
475                                                         coff = 0;\r
476                                         }\r
477                                         else {\r
478                                                 c1 = c2 = -1;\r
479                                                 coff = 0;\r
480                                         }\r
481 \r
482                                         if (fast.IsLazy) {\r
483                                                 if (!fast.IsMinimum && !Eval (Mode.Count, ref ptr, fast.Expression)) {\r
484                                                         //Console.WriteLine ("lazy fast: failed mininum.");\r
485                                                         fast = fast.Previous;\r
486                                                         goto Fail;\r
487                                                 }\r
488                                                 \r
489                                                 while (true) {\r
490                                                         int p = ptr + coff;\r
491                                                         if ((c1 < 0 || (p >= 0 && p < text_end && (c1 == text[p] || c2 == text[p]))) &&\r
492                                                             Eval (Mode.Match, ref ptr, pc))\r
493                                                                 break;\r
494 \r
495                                                         if (fast.IsMaximum) {\r
496                                                                 //Console.WriteLine ("lazy fast: failed with maximum.");\r
497                                                                 fast = fast.Previous;\r
498                                                                 goto Fail;\r
499                                                         }\r
500 \r
501                                                         Backtrack (cp);\r
502                                                         if (!Eval (Mode.Count, ref ptr, fast.Expression)) {\r
503                                                                 //Console.WriteLine ("lazy fast: no more.");\r
504                                                                 fast = fast.Previous;\r
505                                                                 goto Fail;\r
506                                                         }\r
507                                                 }\r
508                                                 fast = fast.Previous;\r
509                                                 goto Pass;\r
510                                         }\r
511                                         else {\r
512                                                 if (!Eval (Mode.Count, ref ptr, fast.Expression)) {\r
513                                                         fast = fast.Previous;\r
514                                                         goto Fail;\r
515                                                 }\r
516                                         \r
517                                                 int width;\r
518                                                 if (fast.Count > 0)\r
519                                                         width = (ptr - fast.Start) / fast.Count;\r
520                                                 else\r
521                                                         width = 0;\r
522 \r
523                                                 while (true) {\r
524                                                         int p = ptr + coff;\r
525                                                         if ((c1 < 0 || (p >= 0 && p < text_end && (c1 == text[p] || c2 == text[p]))) &&\r
526                                                             Eval (Mode.Match, ref ptr, pc))\r
527                                                                 break;\r
528 \r
529                                                         -- fast.Count;\r
530                                                         if (!fast.IsMinimum) {\r
531                                                                 fast = fast.Previous;\r
532                                                                 goto Fail;\r
533                                                         }\r
534 \r
535                                                         ptr -= width;\r
536                                                         Backtrack (cp);\r
537                                                 }\r
538                                                 fast = fast.Previous;\r
539                                                 goto Pass;\r
540                                         }\r
541                                 }\r
542 \r
543                                 case OpCode.Info: {\r
544                                         Debug.Assert (false, "Regex", "Info block found in pattern");\r
545                                         goto Fail;\r
546                                 }\r
547                                 }\r
548                         }\r
549                 Pass:\r
550                         ref_ptr = ptr;\r
551 \r
552                         switch (mode) {\r
553                         case Mode.Match:\r
554                                 return true;\r
555 \r
556                         case Mode.Count: {\r
557                                 ++ fast.Count;\r
558                                 if (fast.IsMaximum || (fast.IsLazy && fast.IsMinimum))\r
559                                         return true;\r
560 \r
561                                 pc = fast.Expression;\r
562                                 goto Begin;\r
563                         }\r
564                         }\r
565 \r
566                 Fail:\r
567                         switch (mode) {\r
568                         case Mode.Match:\r
569                                 return false;\r
570 \r
571                         case Mode.Count: {\r
572                                 if (!fast.IsLazy && fast.IsMinimum)\r
573                                         return true;\r
574 \r
575                                 ref_ptr = fast.Start;\r
576                                 return false;\r
577                         }\r
578                         }\r
579 \r
580                         return false;\r
581                 }\r
582 \r
583                 private bool EvalChar (Mode mode, ref int ptr, ref int pc, bool multi) {\r
584                         bool consumed = false;\r
585                         char c = '\0';\r
586                         bool negate;\r
587                         bool ignore;\r
588                         do {\r
589                                 ushort word = program[pc];\r
590                                 OpCode op = (OpCode)(word & 0x00ff);\r
591                                 OpFlags flags = (OpFlags)(word & 0xff00);\r
592 \r
593                                 ++ pc;\r
594 \r
595                                 ignore = (flags & OpFlags.IgnoreCase) != 0;\r
596                                 \r
597                                 // consume character: the direction of an In construct is\r
598                                 // determined by the direction of its first op\r
599 \r
600                                 if (!consumed) {\r
601                                         if ((flags & OpFlags.RightToLeft) != 0) {\r
602                                                 if (ptr <= 0)\r
603                                                         return false;\r
604 \r
605                                                 c = text[-- ptr];\r
606                                         }\r
607                                         else {\r
608                                                 if (ptr >= text_end)\r
609                                                         return false;\r
610 \r
611                                                 c = text[ptr ++];\r
612                                         }\r
613 \r
614                                         if (ignore)\r
615                                                 c = Char.ToLower (c);\r
616 \r
617                                         consumed = true;\r
618                                 }\r
619 \r
620                                 // negate flag\r
621 \r
622                                 negate = (flags & OpFlags.Negate) != 0;\r
623 \r
624                                 // execute op\r
625                                 \r
626                                 switch (op) {\r
627                                 case OpCode.True:\r
628                                         return true;\r
629 \r
630                                 case OpCode.False:\r
631                                         return false;\r
632                                 \r
633                                 case OpCode.Character: {\r
634                                         if (c == (char)program[pc ++])\r
635                                                 return !negate;\r
636                                         break;\r
637                                 }\r
638 \r
639                                 case OpCode.Category: {\r
640                                         if (CategoryUtils.IsCategory ((Category)program[pc ++], c))\r
641                                                 return !negate;\r
642 \r
643                                         break;\r
644                                 }\r
645                                 \r
646                                 case OpCode.Range: {\r
647                                         int lo = (char)program[pc ++];\r
648                                         int hi = (char)program[pc ++];\r
649                                         if (lo <= c && c <= hi)\r
650                                                 return !negate;\r
651                                         break;\r
652                                 }\r
653 \r
654                                 case OpCode.Set: {\r
655                                         int lo = (char)program[pc ++];\r
656                                         int len = (char)program[pc ++];\r
657                                         int bits = pc;\r
658                                         pc += len;\r
659 \r
660                                         int i = (int)c - lo;\r
661                                         if (i < 0 || i >= len << 4)\r
662                                                 break;\r
663 \r
664                                         if ((program[bits + (i >> 4)] & (1 << (i & 0xf))) != 0)\r
665                                                 return !negate;\r
666                                         break;\r
667                                 }\r
668                                 }\r
669                         } while (multi);\r
670 \r
671                         return negate;\r
672                 }\r
673 \r
674                 private bool TryMatch (ref int ref_ptr, int pc) {\r
675                         Reset ();\r
676                         \r
677                         int ptr = ref_ptr;\r
678                         marks [groups [0]].Start = ptr;\r
679                         if (Eval (Mode.Match, ref ptr, pc)) {\r
680                                 marks [groups [0]].End = ptr;\r
681                                 ref_ptr = ptr;\r
682                                 return true;\r
683                         }\r
684 \r
685                         return false;\r
686                 }\r
687                 \r
688                 private bool IsPosition (Position pos, int ptr) {\r
689                         switch (pos) {\r
690                         case Position.Start: case Position.StartOfString:\r
691                                 return ptr == 0;\r
692 \r
693                         case Position.StartOfLine:\r
694                                 return ptr == 0 || text[ptr - 1] == '\n';\r
695                                 \r
696                         case Position.StartOfScan:\r
697                                 return ptr == scan_ptr;\r
698                         \r
699                         case Position.End:\r
700                                 return ptr == text_end ||\r
701                                         (ptr == text_end - 1 && text[ptr] == '\n');\r
702 \r
703                         case Position.EndOfLine:\r
704                                 return ptr == text_end || text[ptr] == '\n';\r
705                                 \r
706                         case Position.EndOfString:\r
707                                 return ptr == text_end;\r
708                                 \r
709                         case Position.Boundary:\r
710                                 if (text_end == 0)\r
711                                         return false;\r
712 \r
713                                 if (ptr == 0)\r
714                                         return IsWordChar (text[ptr]);\r
715                                 else if (ptr == text_end)\r
716                                         return IsWordChar (text[ptr - 1]);\r
717                                 else\r
718                                         return IsWordChar (text[ptr]) != IsWordChar (text[ptr - 1]);\r
719 \r
720                         case Position.NonBoundary:\r
721                                 if (text_end == 0)\r
722                                         return false;\r
723 \r
724                                 if (ptr == 0)\r
725                                         return !IsWordChar (text[ptr]);\r
726                                 else if (ptr == text_end)\r
727                                         return !IsWordChar (text[ptr - 1]);\r
728                                 else\r
729                                         return IsWordChar (text[ptr]) == IsWordChar (text[ptr - 1]);\r
730                         \r
731                         default:\r
732                                 return false;\r
733                         }\r
734                 }\r
735 \r
736                 private bool IsWordChar (char c) {\r
737                         return CategoryUtils.IsCategory (Category.Word, c);\r
738                 }\r
739 \r
740                 private string GetString (int pc) {\r
741                         int len = program[pc + 1];\r
742                         int str = pc + 2;\r
743 \r
744                         char[] cs = new char[len];\r
745                         for (int i = 0; i < len; ++ i)\r
746                                 cs[i] = (char)program[str ++];\r
747 \r
748                         return new string (cs);\r
749                 }\r
750 \r
751                 // capture management\r
752 \r
753                 private void Open (int gid, int ptr) {\r
754                         int m = groups [gid];\r
755                         if (m < mark_start || marks [m].IsDefined) {\r
756                                 m = CreateMark (m);\r
757                                 groups [gid] = m;\r
758                         }\r
759 \r
760                         marks [m].Start = ptr;\r
761                 }\r
762 \r
763                 private void Close (int gid, int ptr) {\r
764                         marks [groups [gid]].End = ptr;\r
765                 }\r
766 \r
767                 private void Balance (int gid, int balance_gid, int ptr) {\r
768                         int b = groups [balance_gid];\r
769                         Debug.Assert (marks [b].IsDefined, "Regex", "Balancing group not closed");\r
770 \r
771                         if (gid > 0) {\r
772                                 Open (gid, marks [b].Index + marks [b].Length);\r
773                                 Close (gid, ptr);\r
774                         }\r
775 \r
776                         groups [balance_gid] = marks [b].Previous;\r
777                 }\r
778 \r
779                 private int Checkpoint () {\r
780                         mark_start = mark_end;\r
781                         return mark_start;\r
782                 }\r
783 \r
784                 private void Backtrack (int cp) {\r
785                         Debug.Assert (cp > mark_start, "Regex", "Attempt to backtrack forwards");\r
786 \r
787                         for (int i = 0; i < groups.Length; ++ i) {\r
788                                 int m = groups [i];\r
789                                 while (cp <= m)\r
790                                         m = marks [m].Previous;\r
791 \r
792                                 groups [i] = m;\r
793                         }\r
794                 }\r
795 \r
796                 private void ResetGroups () {\r
797                         int n = groups.Length;\r
798                         if (marks == null)\r
799                                 marks = new Mark [n * 10];\r
800 \r
801                         for (int i = 0; i < n; ++ i) {\r
802                                 groups [i] = i;\r
803 \r
804                                 marks [i].Start = -1;\r
805                                 marks [i].End = -1;\r
806                                 marks [i].Previous = -1;\r
807                         }\r
808                         \r
809                         mark_start = 0;\r
810                         mark_end = n;\r
811                 }\r
812 \r
813                 private int GetLastDefined (int gid) {\r
814                         int m = groups [gid];\r
815                         while (m >= 0 && !marks [m].IsDefined)\r
816                                 m = marks [m].Previous;\r
817 \r
818                         return m;\r
819                 }\r
820 \r
821                 private int CreateMark (int previous) {\r
822                         if (mark_end == marks.Length) {\r
823                                 Mark [] dest = new Mark [marks.Length * 2];\r
824                                 marks.CopyTo (dest, 0);\r
825                                 marks = dest;\r
826                         }\r
827 \r
828                         int m = mark_end ++;\r
829                         marks [m].Start = marks [m].End = -1;\r
830                         marks [m].Previous = previous;\r
831 \r
832                         return m;\r
833                 }\r
834 \r
835                 private Match GenerateMatch (Regex regex) {\r
836                         int[][] grps = new int[groups.Length][];\r
837                         ArrayList caps = new ArrayList ();\r
838 \r
839                         for (int gid = 0; gid < groups.Length; ++ gid) {\r
840                                 caps.Clear ();\r
841                                 for (int m = groups[gid]; m >= 0; m = marks[m].Previous) {\r
842                                         if (!marks[m].IsDefined)\r
843                                                 continue;\r
844                                         \r
845                                         caps.Add (marks[m].Index);\r
846                                         caps.Add (marks[m].Length);\r
847                                 }\r
848 \r
849                                 grps[gid] = (int[])caps.ToArray (typeof (int));\r
850                         }\r
851 \r
852                         return new Match (regex, this, text, text_end, grps);\r
853                 }\r
854 \r
855                 // interpreter attributes\r
856 \r
857                 private ushort[] program;               // regex program\r
858                 private int program_start;              // first instruction after info block\r
859                 private string text;                    // input text\r
860                 private int text_end;                   // end of input text (last character + 1)\r
861                 private int group_count;                // number of capturing groups\r
862                 private int match_min, match_max;       // match width information\r
863                 private QuickSearch qs;                 // fast substring matcher\r
864 \r
865                 // match state\r
866                 \r
867                 private int scan_ptr;                   // start of scan\r
868 \r
869                 private RepeatContext repeat;           // current repeat context\r
870                 private RepeatContext fast;             // fast repeat context\r
871 \r
872                 private Mark[] marks = null;            // mark stack\r
873                 private int mark_start;                 // start of current checkpoint\r
874                 private int mark_end;                   // end of checkpoint/next free mark\r
875 \r
876                 private int[] groups;                   // current group definitions\r
877 \r
878                 // private classes\r
879 \r
880                 private struct Mark {\r
881                         public int Start, End;\r
882                         public int Previous;\r
883 \r
884                         public bool IsDefined {\r
885                                 get { return Start >= 0 && End >= 0; }\r
886                         }\r
887 \r
888                         public int Index {\r
889                                 get { return Start < End ? Start : End; }\r
890                         }\r
891 \r
892                         public int Length {\r
893                                 get { return Start < End ? End - Start : Start - End; }\r
894                         }\r
895                 }\r
896 \r
897                 private class RepeatContext {\r
898                         public RepeatContext (RepeatContext previous, int min, int max, bool lazy, int expr_pc) {\r
899                                 this.previous = previous;\r
900                                 this.min = min;\r
901                                 this.max = max;\r
902                                 this.lazy = lazy;\r
903                                 this.expr_pc = expr_pc;\r
904                                 \r
905                                 this.start = -1;\r
906                                 this.count = 0;\r
907                         }\r
908 \r
909                         public int Count {\r
910                                 get { return count; }\r
911                                 set { count = value; }\r
912                         }\r
913 \r
914                         public int Start {\r
915                                 get { return start; }\r
916                                 set { start = value; }\r
917                         }\r
918 \r
919                         public bool IsMinimum {\r
920                                 get { return min <= count; }\r
921                         }\r
922 \r
923                         public bool IsMaximum {\r
924                                 get { return max <= count; }\r
925                         }\r
926 \r
927                         public bool IsLazy {\r
928                                 get { return lazy; }\r
929                         }\r
930 \r
931                         public int Expression {\r
932                                 get { return expr_pc; }\r
933                         }\r
934 \r
935                         public RepeatContext Previous {\r
936                                 get { return previous; }\r
937                         }\r
938                 \r
939                         private int start;\r
940                         private int min, max;\r
941                         private bool lazy;\r
942                         private int expr_pc;\r
943                         private RepeatContext previous;\r
944 \r
945                         private int count;\r
946                 }\r
947 \r
948                 private enum Mode {\r
949                         Search,\r
950                         Match,\r
951                         Count\r
952                 }\r
953         }\r
954 }\r