* RegexTest.cs: Add some simple tests for debugging/zen
[mono.git] / mcs / class / System / System.Text.RegularExpressions / syntax.cs
1 //
2 // assembly:    System
3 // namespace:   System.Text.RegularExpressions
4 // file:        syntax.cs
5 //
6 // author:      Dan Lewis (dlewis@gmx.co.uk)
7 //              (c) 2002
8
9 //
10 // Permission is hereby granted, free of charge, to any person obtaining
11 // a copy of this software and associated documentation files (the
12 // "Software"), to deal in the Software without restriction, including
13 // without limitation the rights to use, copy, modify, merge, publish,
14 // distribute, sublicense, and/or sell copies of the Software, and to
15 // permit persons to whom the Software is furnished to do so, subject to
16 // the following conditions:
17 // 
18 // The above copyright notice and this permission notice shall be
19 // included in all copies or substantial portions of the Software.
20 // 
21 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
22 // EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
23 // MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
24 // NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
25 // LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
26 // OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
27 // WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
28 //
29
30 using System;
31 using System.Collections;
32
33 namespace System.Text.RegularExpressions.Syntax {
34         // collection classes
35         
36         class ExpressionCollection : CollectionBase {
37                 public void Add (Expression e) {
38                         List.Add (e);
39                 }
40
41                 public Expression this[int i] {
42                         get { return (Expression)List[i]; }
43                         set { List[i] = value; }
44                 }
45
46                 protected override void OnValidate (object o) {
47                         // allow null elements
48                 }
49         }
50
51         // abstract classes
52         
53         abstract class Expression {
54                 public abstract void Compile (ICompiler cmp, bool reverse);
55                 public abstract void GetWidth (out int min, out int max);
56
57                 public int GetFixedWidth () {
58                         int min, max;
59                         GetWidth (out min, out max);
60
61                         if (min == max)
62                                 return min;
63
64                         return -1;
65                 }
66
67                 public virtual AnchorInfo GetAnchorInfo (bool reverse) {
68                         return new AnchorInfo (this, GetFixedWidth ());
69                 }
70
71                 public virtual bool IsComplex () {
72                         return true;
73                 }
74         }
75
76         // composite expressions
77         
78         abstract class CompositeExpression : Expression {
79                 public CompositeExpression () {
80                         expressions = new ExpressionCollection ();
81                 }
82
83                 protected ExpressionCollection Expressions {
84                         get { return expressions; }
85                 }
86
87                 protected void GetWidth (out int min, out int max, int count) {
88                         min = Int32.MaxValue;
89                         max = 0;
90                         bool empty = true;
91
92                         for (int i = 0; i < count; ++ i) {
93                                 Expression e = Expressions[i];
94                                 if (e == null)
95                                         continue;
96                         
97                                 empty = false;
98                                 int a, b;
99                                 e.GetWidth (out a, out b);
100                                 if (a < min) min = a;
101                                 if (b > max) max = b;
102                         }
103
104                         if (empty)
105                                 min = max = 0;
106                 }
107
108                 private ExpressionCollection expressions;
109         }
110
111         // groups
112         
113         class Group : CompositeExpression {
114                 public Group () {
115                 }
116
117                 public Expression Expression {
118                         get { return Expressions[0]; }
119                         set { Expressions[0] = value; }
120                 }
121
122                 public void AppendExpression (Expression e) {
123                         Expressions.Add (e);
124                 }
125
126                 public override void Compile (ICompiler cmp, bool reverse) {
127                         int count = Expressions.Count;
128                         for (int i = 0; i < count; ++ i) {
129                                 Expression e;
130                                 if (reverse)
131                                         e = Expressions[count - i - 1];
132                                 else
133                                         e = Expressions[i];
134
135                                 e.Compile (cmp, reverse);
136                         }
137                 }
138
139                 public override void GetWidth (out int min, out int max) {
140                         min = 0;
141                         max = 0;
142
143                         foreach (Expression e in Expressions) {
144                                 int a, b;
145                                 e.GetWidth (out a, out b);
146                                 min += a;
147                                 if (max == Int32.MaxValue || b == Int32.MaxValue)
148                                         max = Int32.MaxValue;
149                                 else
150                                         max += b;
151                         }
152                 }
153
154                 public override AnchorInfo GetAnchorInfo (bool reverse) {
155                         int ptr;
156                         int width = GetFixedWidth ();
157
158                         ArrayList infos = new ArrayList ();
159                         IntervalCollection segments = new IntervalCollection ();
160
161                         // accumulate segments
162
163                         ptr = 0;
164                         //foreach (Expression e in Expressions) {
165                         int count = Expressions.Count;
166                         for (int i = 0; i < count; ++ i) {
167                                 Expression e;
168                                 if (reverse)
169                                         e = Expressions [count - i - 1];
170                                 else
171                                         e = Expressions [i];            
172                                 
173                                 AnchorInfo info = e.GetAnchorInfo (reverse);
174                                 infos.Add (info);
175
176                                 if (info.IsPosition)
177                                         return new AnchorInfo (this, ptr + info.Offset, width, info.Position);
178
179                                 if (info.IsSubstring)
180                                         segments.Add (info.GetInterval (ptr));
181
182                                 if (info.IsUnknownWidth)
183                                         break;
184
185                                 ptr += info.Width;
186                         }
187
188                         // normalize and find the longest segment
189
190                         segments.Normalize ();
191
192                         Interval longest = Interval.Empty;
193                         foreach (Interval segment in segments) {
194                                 if (segment.Size > longest.Size)
195                                         longest = segment;
196                         }
197
198                         // now chain the substrings that made this segment together
199
200                         if (!longest.IsEmpty) {
201                                 string str = "";
202                                 ArrayList strs = new ArrayList();
203                                 bool ignore = false;
204
205                                 ptr = 0;
206                                 
207                                 //foreach (AnchorInfo info in infos) {
208                                 for (int i = 0; i < infos.Count; ++ i) {
209                                         AnchorInfo info;
210
211                                         info = (AnchorInfo)infos[i];            
212                                         
213                                         if (info.IsSubstring && longest.Contains (info.GetInterval (ptr))) {
214                                                 //str += info.Substring;        // TODO mark subexpressions
215                                                 strs.Add(info.Substring);
216                                                 ignore |= info.IgnoreCase;
217                                         }
218
219                                         
220                                                 if (info.IsUnknownWidth)                                                        
221                                                         break;
222                                         
223                                                 ptr += info.Width;
224                                 }       
225                                         
226                                 for (int i = 0; i< strs.Count; ++i)
227                                 {
228                                         if (reverse)
229                                                 str += strs [strs.Count - i - 1];
230                                         else
231                                                 str += strs [i];
232                                                         
233                                         
234                                 }
235                         
236
237                                 return new AnchorInfo (this, longest.low, width, str, ignore);
238                         }
239
240                         return new AnchorInfo (this, width);
241                 }
242
243                 public override bool IsComplex () {
244                         bool comp = false;
245                         foreach (Expression e in Expressions) {
246                                 comp |= e.IsComplex ();
247                         }
248
249                         return comp | GetFixedWidth () <= 0;
250                 }
251         }
252
253         class RegularExpression : Group {
254                 public RegularExpression () {
255                         group_count = 0;
256                 }
257
258                 public int GroupCount {
259                         get { return group_count; }
260                         set { group_count = value; }
261                 }
262
263                 public override void Compile (ICompiler cmp, bool reverse) {
264                         // info block
265
266                         int min, max;
267                         GetWidth (out min, out max);
268                         cmp.EmitInfo (group_count, min, max);
269
270                         // anchoring expression
271
272                         AnchorInfo info = GetAnchorInfo (reverse);
273                         //if (reverse)
274                         //      info = new AnchorInfo (this, GetFixedWidth ()); // FIXME
275
276                         LinkRef pattern = cmp.NewLink ();
277                         cmp.EmitAnchor (reverse, info.Offset, pattern);
278
279                         if (info.IsPosition)
280                                 cmp.EmitPosition (info.Position);
281                         else if (info.IsSubstring)
282                                 cmp.EmitString (info.Substring, info.IgnoreCase, reverse);
283                         
284                         cmp.EmitTrue ();
285                         
286                         // pattern
287
288                         cmp.ResolveLink (pattern);
289                         base.Compile (cmp, reverse);
290                         cmp.EmitTrue ();
291                 }
292
293                 private int group_count;
294         }
295
296         class CapturingGroup : Group {
297                 public CapturingGroup () {
298                         this.gid = 0;
299                         this.name = null;
300                 }
301
302                 public int Number {
303                         get { return gid; }
304                         set { gid = value; }
305                 }
306
307                 public string Name {
308                         get { return name; }
309                         set { name = value; }
310                 }
311
312                 public bool IsNamed {
313                         get { return name != null; }
314                 }
315
316                 public override void Compile (ICompiler cmp, bool reverse) {
317                         cmp.EmitOpen (gid);
318                         base.Compile (cmp, reverse);
319                         cmp.EmitClose (gid);
320                 }
321
322                 public override bool IsComplex () {
323                         return true;
324                 }
325
326                 private int gid;
327                 private string name;
328         }
329
330         class BalancingGroup : CapturingGroup {
331                 public BalancingGroup () {
332                         this.balance = null;
333                 }
334
335                 public CapturingGroup Balance {
336                         get { return balance; }
337                         set { balance = value; }
338                 }
339
340                 public override void Compile (ICompiler cmp, bool reverse) {
341                         // can't invoke Group.Compile from here :(
342                         // so I'll just repeat the code
343                         
344                         LinkRef tail = cmp.NewLink ();
345
346                         cmp.EmitBalanceStart (this.Number, balance.Number, this.IsNamed,  tail);
347
348                         int count = Expressions.Count;
349                         for (int i = 0; i < count; ++ i) {
350                                 Expression e;
351                                 if (reverse)
352                                         e = Expressions[count - i - 1];
353                                 else
354                                         e = Expressions[i];
355
356                                 e.Compile (cmp, reverse);
357                         }
358
359                         cmp.EmitBalance ();
360                         cmp.ResolveLink(tail);
361                 }
362
363                 private CapturingGroup balance;
364         }
365
366         class NonBacktrackingGroup : Group {
367                 public NonBacktrackingGroup () {
368                 }
369
370                 public override void Compile (ICompiler cmp, bool reverse) {
371                         LinkRef tail = cmp.NewLink ();
372
373                         cmp.EmitSub (tail);
374                         base.Compile (cmp, reverse);
375                         cmp.EmitTrue ();
376                         cmp.ResolveLink (tail);
377                 }
378
379                 public override bool IsComplex () {
380                         return true;
381                 }
382         }
383
384         // repetition
385
386         class Repetition : CompositeExpression {
387                 public Repetition (int min, int max, bool lazy) {
388                         Expressions.Add (null);
389                         
390                         this.min = min;
391                         this.max = max;
392                         this.lazy = lazy;
393                 }
394
395                 public Expression Expression {
396                         get { return Expressions[0]; }
397                         set { Expressions[0] = value; }
398                 }
399
400                 public int Minimum {
401                         get { return min; }
402                         set { min = value; }
403                 }
404
405                 public int Maximum {
406                         get { return max; }
407                         set { max = value; }
408                 }
409
410                 public bool Lazy {
411                         get { return lazy; }
412                         set { lazy = value; }
413                 }
414
415                 public override void Compile (ICompiler cmp, bool reverse) {
416                         if (Expression.IsComplex ()) {
417                                 LinkRef until = cmp.NewLink ();
418                                 
419                                 cmp.EmitRepeat (min, max, lazy, until);
420                                 Expression.Compile (cmp, reverse);
421                                 cmp.EmitUntil (until);
422                         }
423                         else {
424                                 LinkRef tail = cmp.NewLink ();
425
426                                 cmp.EmitFastRepeat (min, max, lazy, tail);
427                                 Expression.Compile (cmp, reverse);
428                                 cmp.EmitTrue ();
429                                 cmp.ResolveLink (tail);
430                         }
431                 }
432
433                 public override void GetWidth (out int min, out int max) {
434                         Expression.GetWidth (out min, out max);
435                         min = min * this.min;
436                         if (max == Int32.MaxValue || this.max == 0xffff)
437                                 max = Int32.MaxValue;
438                         else
439                                 max = max * this.max;
440                 }
441
442                 public override AnchorInfo GetAnchorInfo (bool reverse) {
443                         int width = GetFixedWidth ();
444                         if (Minimum == 0)
445                                 return new AnchorInfo (this, width);
446                         
447                         AnchorInfo info = Expression.GetAnchorInfo (reverse);
448                         if (info.IsPosition)
449                                 return new AnchorInfo (this, info.Offset, width, info.Position);
450                         
451                         if (info.IsSubstring) {
452                                 if (info.IsComplete) {
453                                         string str = "";
454                                         for (int i = 0; i < Minimum; ++ i)
455                                                 str += info.Substring;
456
457                                         return new AnchorInfo (this, 0, width, str, info.IgnoreCase);
458                                 }
459
460                                 return new AnchorInfo (this, info.Offset, width, info.Substring, info.IgnoreCase);
461                         }
462
463                         return new AnchorInfo (this, width);
464                 }
465
466                 private int min, max;
467                 private bool lazy;
468         }
469
470         // assertions
471
472         abstract class Assertion : CompositeExpression {
473                 public Assertion () {
474                         Expressions.Add (null);         // true expression
475                         Expressions.Add (null);         // false expression
476                 }
477
478                 public Expression TrueExpression {
479                         get { return Expressions[0]; }
480                         set { Expressions[0] = value; }
481                 }
482
483                 public Expression FalseExpression {
484                         get { return Expressions[1]; }
485                         set { Expressions[1] = value; }
486                 }
487
488                 public override void GetWidth (out int min, out int max) {
489                         GetWidth (out min, out max, 2);
490
491                         if (TrueExpression == null || FalseExpression == null)
492                                 min = 0;
493                 }
494         }
495
496         class CaptureAssertion : Assertion {
497                 public CaptureAssertion () {
498                 }
499
500                 public CapturingGroup CapturingGroup {
501                         get { return group; }
502                         set { group = value; }
503                 }
504
505                 public override void Compile (ICompiler cmp, bool reverse) {
506                         int gid = group.Number;
507                         LinkRef tail = cmp.NewLink ();
508
509                         if (FalseExpression == null) {
510                                 //    IfDefined :1
511                                 //      <yes_exp>
512                                 // 1: <tail>
513                         
514                                 cmp.EmitIfDefined (gid, tail);
515                                 TrueExpression.Compile (cmp, reverse);
516                         }
517                         else {
518                                 //    IfDefined :1
519                                 //      <yes_expr>
520                                 //      Jump :2
521                                 // 1:   <no_expr>
522                                 // 2: <tail>
523                         
524                                 LinkRef false_expr = cmp.NewLink ();
525                                 cmp.EmitIfDefined (gid, false_expr);
526                                 TrueExpression.Compile (cmp, reverse);
527                                 cmp.EmitJump (tail);
528                                 cmp.ResolveLink (false_expr);
529                                 FalseExpression.Compile (cmp, reverse);
530                         }
531
532                         cmp.ResolveLink (tail);
533                 }
534
535                 public override bool IsComplex () {
536                         bool comp = false;
537                         if (TrueExpression != null)
538                                 comp |= TrueExpression.IsComplex ();
539                         if (FalseExpression != null)
540                                 comp |= FalseExpression.IsComplex ();
541
542                         return comp | GetFixedWidth () <= 0;
543                 }
544
545                 private CapturingGroup group;
546         }
547
548         class ExpressionAssertion : Assertion {
549                 public ExpressionAssertion () {
550                         Expressions.Add (null);         // test expression
551                 }
552
553                 public bool Reverse {
554                         get { return reverse; }
555                         set { reverse = value; }
556                 }
557
558                 public bool Negate {
559                         get { return negate; }
560                         set { negate = value; }
561                 }
562
563                 public Expression TestExpression {
564                         get { return Expressions[2]; }
565                         set { Expressions[2] = value; }
566                 }
567
568                 public override void Compile (ICompiler cmp, bool reverse) {
569                         LinkRef true_expr = cmp.NewLink ();
570                         LinkRef false_expr = cmp.NewLink ();
571
572                         // test op: positive / negative
573
574                         if (!negate)
575                                 cmp.EmitTest (true_expr, false_expr);
576                         else
577                                 cmp.EmitTest (false_expr, true_expr);
578                         
579                         // test expression: lookahead / lookbehind
580
581                         TestExpression.Compile (cmp, this.reverse);
582                         cmp.EmitTrue ();
583
584                         // target expressions
585
586                         if (TrueExpression == null) {                   // (?= ...)
587                                 //    Test :1, :2
588                                 //      <test_expr>
589                                 // :2   False
590                                 // :1   <tail>
591                         
592                                 cmp.ResolveLink (false_expr);
593                                 cmp.EmitFalse ();
594                                 cmp.ResolveLink (true_expr);
595                         }
596                         else {
597                                 cmp.ResolveLink (true_expr);
598                                 TrueExpression.Compile (cmp, reverse);
599                                 
600                                 if (FalseExpression == null) {          // (?(...) ...)
601                                         //    Test :1, :2
602                                         //      <test_expr>
603                                         // :1   <yes_expr>
604                                         // :2   <tail>
605
606                                         cmp.ResolveLink (false_expr);
607                                 }
608                                 else {                                  // (?(...) ... | ...)
609                                         //    Test :1, :2
610                                         //      <test_expr>
611                                         // :1   <yes_expr>
612                                         //      Jump :3
613                                         // :2   <no_expr>
614                                         // :3   <tail>
615                                 
616                                         LinkRef tail = cmp.NewLink ();
617                                 
618                                         cmp.EmitJump (tail);
619                                         cmp.ResolveLink (false_expr);
620                                         FalseExpression.Compile (cmp, reverse);
621                                         cmp.ResolveLink (tail);
622                                 }
623                         }
624                 }
625
626                 private bool reverse, negate;
627         }
628
629         // alternation
630
631         class Alternation : CompositeExpression {
632                 public Alternation () {
633                 }
634
635                 public ExpressionCollection Alternatives {
636                         get { return Expressions; }
637                 }
638
639                 public void AddAlternative (Expression e) {
640                         Alternatives.Add (e);
641                 }
642
643                 public override void Compile (ICompiler cmp, bool reverse) {
644                         //                      LinkRef next = cmp.NewLink ();
645                         LinkRef tail = cmp.NewLink ();
646                 
647                         foreach (Expression e in Alternatives) {
648                                 LinkRef next = cmp.NewLink ();
649                                 cmp.EmitBranch (next);
650                                 e.Compile (cmp, reverse);
651                                 cmp.EmitJump (tail);
652                                 cmp.ResolveLink (next);
653                                 cmp.EmitBranchEnd();
654                         }
655
656                         cmp.EmitFalse ();
657                         cmp.ResolveLink (tail);
658                         cmp.EmitAlternationEnd();
659                 }
660
661                 public override void GetWidth (out int min, out int max) {
662                         GetWidth (out min, out max, Alternatives.Count);
663                 }
664
665                 public override bool IsComplex () {
666                         bool comp = false;
667                         foreach (Expression e in Alternatives) {
668                                 comp |= e.IsComplex ();
669                         }
670
671                         return comp | GetFixedWidth () <= 0;
672                 }
673         }
674
675         // terminal expressions
676
677         class Literal : Expression {
678                 public Literal (string str, bool ignore) {
679                         this.str = str;
680                         this.ignore = ignore;
681                 }
682
683                 public string String {
684                         get { return str; }
685                         set { str = value; }
686                 }
687
688                 public bool IgnoreCase {
689                         get { return ignore; }
690                         set { ignore = value; }
691                 }
692
693                 public override void Compile (ICompiler cmp, bool reverse) {
694                         if (str.Length == 0)
695                                 return;
696
697                         if (str.Length == 1)
698                                 cmp.EmitCharacter (str[0], false, ignore, reverse);
699                         else
700                                 cmp.EmitString (str, ignore, reverse);
701                 }
702
703                 public override void GetWidth (out int min, out int max) {
704                         min = max = str.Length;
705                 }
706
707                 public override AnchorInfo GetAnchorInfo (bool reverse) {
708                         return new AnchorInfo (this, 0, str.Length, str, ignore);
709                 }
710
711                 public override bool IsComplex () {
712                         return false;
713                 }
714
715                 private string str;
716                 private bool ignore;
717         }
718
719         class PositionAssertion : Expression {
720                 public PositionAssertion (Position pos) {
721                         this.pos = pos;
722                 }
723
724                 public Position Position {
725                         get { return pos; }
726                         set { pos = value; }
727                 }
728
729                 public override void Compile (ICompiler cmp, bool reverse) {
730                         cmp.EmitPosition (pos);
731                 }
732
733                 public override void GetWidth (out int min, out int max) {
734                         min = max = 0;
735                 }
736
737                 public override bool IsComplex () {
738                         return false;
739                 }
740
741                 public override AnchorInfo GetAnchorInfo (bool revers) {
742                         switch (pos) {
743                         case Position.StartOfString: case Position.StartOfLine: case Position.StartOfScan:
744                                 return new AnchorInfo (this, 0, 0, pos);
745
746                         default:
747                                 return new AnchorInfo (this, 0);
748                         }
749                 }
750
751                 private Position pos;
752         }
753
754         class Reference : Expression {
755                 public Reference (bool ignore) {
756                         this.ignore = ignore;
757                 }
758
759                 public CapturingGroup CapturingGroup {
760                         get { return group; }
761                         set { group = value; }
762                 }
763
764                 public bool IgnoreCase {
765                         get { return ignore; }
766                         set { ignore = value; }
767                 }
768
769                 public override void Compile (ICompiler cmp, bool reverse) {
770                         cmp.EmitReference (group.Number, ignore, reverse);
771                 }
772
773                 public override void GetWidth (out int min, out int max) {
774                         //group.GetWidth (out min, out max);
775                         // TODO set width to referenced group for non-cyclical references
776                         min = 0;
777                         max = Int32.MaxValue;
778                 }
779
780                 public override bool IsComplex () {
781                         return true;    // FIXME incorporate cyclic check
782                 }
783
784                 private CapturingGroup group;
785                 private bool ignore;
786         }
787
788         class CharacterClass : Expression {
789                 public CharacterClass (bool negate, bool ignore) {
790                         this.negate = negate;
791                         this.ignore = ignore;
792
793                         intervals = new IntervalCollection ();
794
795                         // initialize pos/neg category arrays
796
797                         int cat_size = (int) Category.LastValue;
798                         pos_cats = new BitArray (cat_size);
799                         neg_cats = new BitArray (cat_size);
800                 }
801
802                 public CharacterClass (Category cat, bool negate) : this (false, false) {
803                         this.AddCategory (cat, negate);
804                 }
805
806                 public bool Negate {
807                         get { return negate; }
808                         set { negate = value; }
809                 }
810
811                 public bool IgnoreCase {
812                         get { return ignore; }
813                         set { ignore = value; }
814                 }
815
816                 public void AddCategory (Category cat, bool negate) {
817                         int n = (int)cat;
818                         
819                         if (negate) {
820                                 neg_cats[n] = true;
821                         } else {
822                                 pos_cats[n] = true;
823                         }
824                 }
825
826                 public void AddCharacter (char c) {
827                         // TODO: this is certainly not the most efficient way of doing things 
828                         // TODO: but at least it produces correct results. 
829                         AddRange (c, c);
830                 }
831
832                 public void AddRange (char lo, char hi) {
833                         Interval new_interval = new Interval (lo, hi);
834  
835                         // ignore case is on. we must make sure our interval does not
836                         // use upper case. if it does, we must normalize the upper case
837                         // characters into lower case. 
838                         if (ignore) {
839                                 if (upper_case_characters.Intersects (new_interval)) {
840                                         Interval partial_new_interval;
841  
842                                         if (new_interval.low < upper_case_characters.low) {
843                                                 partial_new_interval = new Interval (upper_case_characters.low + distance_between_upper_and_lower_case, 
844                                                                                      new_interval.high +  distance_between_upper_and_lower_case);
845                                                 new_interval.high = upper_case_characters.low - 1;
846                                         }
847                                         else {
848                                                 partial_new_interval = new Interval (new_interval.low + distance_between_upper_and_lower_case, 
849                                                                                      upper_case_characters.high + distance_between_upper_and_lower_case);
850                                                 new_interval.low = upper_case_characters.high + 1;
851                                         }
852                                         intervals.Add (partial_new_interval);
853                                 }
854                                 else if (upper_case_characters.Contains (new_interval)) {
855                                         new_interval.high += distance_between_upper_and_lower_case;
856                                         new_interval.low += distance_between_upper_and_lower_case;
857                                 }
858                         }
859                         intervals.Add (new_interval);
860                 }
861
862                 public override void Compile (ICompiler cmp, bool reverse) {
863                         // create the meta-collection
864                         IntervalCollection meta =
865                                 intervals.GetMetaCollection (new IntervalCollection.CostDelegate (GetIntervalCost));
866
867                         // count ops
868                         
869                         int count = meta.Count;
870                         for (int i = 0; i < pos_cats.Length; ++ i) {
871                                 if (pos_cats[i]) ++ count;
872                                 if (neg_cats[i]) ++ count;
873                         }
874
875                         if (count == 0)
876                                 return;
877
878                         // emit in op for |meta| > 1
879
880                         LinkRef tail = cmp.NewLink ();
881                         if (count > 1)
882                                 cmp.EmitIn (tail);
883                                 
884
885                         // emit categories
886
887                         for (int i = 0; i < pos_cats.Length; ++ i) {
888                                 if (pos_cats[i]) {
889                                         if (neg_cats [i])
890                                                 cmp.EmitCategory (Category.Any, negate, reverse);
891                                         else
892                                                 cmp.EmitCategory ((Category)i, negate, reverse);
893                                 } else if (neg_cats[i]) {
894                                         cmp.EmitCategory ((Category)i, !negate, reverse);
895                                 }
896                         }
897
898                         // emit character/range/sets from meta-collection
899
900                         foreach (Interval a in meta) {
901                                 if (a.IsDiscontiguous) {                        // Set
902                                         BitArray bits = new BitArray (a.Size);
903                                         foreach (Interval b in intervals) {
904                                                 if (a.Contains (b)) {
905                                                         for (int i = b.low; i <= b.high; ++ i)
906                                                                 bits[i - a.low] = true;
907                                                 }
908                                         }
909
910                                         cmp.EmitSet ((char)a.low, bits, negate, ignore, reverse);
911                                 }
912                                 else if (a.IsSingleton)                         // Character
913                                         cmp.EmitCharacter ((char)a.low, negate, ignore, reverse);
914                                 else                                            // Range
915                                         cmp.EmitRange ((char)a.low, (char)a.high, negate, ignore, reverse);
916                         }
917                         
918                         // finish up
919
920                         if (count > 1) {
921                                 if (negate)
922                                         cmp.EmitTrue ();
923                                 else
924                                         cmp.EmitFalse ();
925
926                                 cmp.ResolveLink (tail);
927                         }
928                 }
929
930                 public override void GetWidth (out int min, out int max) {
931                         min = max = 1;
932                 }
933
934                 public override bool IsComplex () {
935                         return false;
936                 }
937
938                 // private
939
940                 private static double GetIntervalCost (Interval i) {
941                         // use op length as cost metric (=> optimize for space)
942                 
943                         if (i.IsDiscontiguous)
944                                 return 3 + ((i.Size + 0xf) >> 4);               // Set
945                         else if (i.IsSingleton)
946                                 return 2;                                       // Character
947                         else
948                                 return 3;                                       // Range
949                 }
950
951                 private static Interval upper_case_characters = new Interval ((char)65, (char)90);
952                 private const int distance_between_upper_and_lower_case = 32;
953                 private bool negate, ignore;
954                 private BitArray pos_cats, neg_cats;
955                 private IntervalCollection intervals;
956         }
957
958         class AnchorInfo {
959                 private Expression expr;
960
961                 private Position pos;
962                 private int offset;
963
964                 private string str;
965                 private int width;
966                 private bool ignore;
967
968                 public AnchorInfo (Expression expr, int width) {
969                         this.expr = expr;
970                         this.offset = 0;
971                         this.width = width;
972
973                         this.str = null;
974                         this.ignore = false;
975                         this.pos = Position.Any;
976                 }
977                 
978                 public AnchorInfo (Expression expr, int offset, int width, string str, bool ignore) {
979                         this.expr = expr;
980                         this.offset = offset;
981                         this.width = width;
982
983                         this.str = ignore ? str.ToLower () : str;
984
985                         this.ignore = ignore;
986                         this.pos = Position.Any;
987                 }
988
989                 public AnchorInfo (Expression expr, int offset, int width, Position pos) {
990                         this.expr = expr;
991                         this.offset = offset;
992                         this.width = width;
993
994                         this.pos = pos;
995
996                         this.str = null;
997                         this.ignore = false;
998                 }
999
1000                 public Expression Expression {
1001                         get { return expr; }
1002                 }
1003
1004                 public int Offset {
1005                         get { return offset; }
1006                 }
1007
1008                 public int Width {
1009                         get { return width; }
1010                 }
1011
1012                 public int Length {
1013                         get { return (str != null) ? str.Length : 0; }
1014                 }
1015
1016                 public bool IsUnknownWidth {
1017                         get { return width < 0; }
1018                 }
1019
1020                 public bool IsComplete {
1021                         get { return Length == Width; }
1022                 }
1023
1024                 public string Substring {
1025                         get { return str; }
1026                 }
1027
1028                 public bool IgnoreCase {
1029                         get { return ignore; }
1030                 }
1031
1032                 public Position Position {
1033                         get { return pos; }
1034                 }
1035
1036                 public bool IsSubstring {
1037                         get { return str != null; }
1038                 }
1039
1040                 public bool IsPosition {
1041                         get { return pos != Position.Any; }
1042                 }
1043
1044                 public Interval GetInterval () {
1045                         return GetInterval (0);
1046                 }
1047
1048                 public Interval GetInterval (int start) {
1049                         if (!IsSubstring)
1050                                 return Interval.Empty;
1051
1052                         return new Interval (start + Offset, start + Offset + Length - 1);
1053                 }
1054         }
1055 }
1056