Distinguish between the potentially ambiguous \nnn and the non-ambiguous \k<...>...
[mono.git] / mcs / class / System / System.Text.RegularExpressions / syntax.cs
1 //
2 // assembly:    System
3 // namespace:   System.Text.RegularExpressions
4 // file:        syntax.cs
5 //
6 // author:      Dan Lewis (dlewis@gmx.co.uk)
7 //              (c) 2002
8
9 //
10 // Permission is hereby granted, free of charge, to any person obtaining
11 // a copy of this software and associated documentation files (the
12 // "Software"), to deal in the Software without restriction, including
13 // without limitation the rights to use, copy, modify, merge, publish,
14 // distribute, sublicense, and/or sell copies of the Software, and to
15 // permit persons to whom the Software is furnished to do so, subject to
16 // the following conditions:
17 //
18 // The above copyright notice and this permission notice shall be
19 // included in all copies or substantial portions of the Software.
20 //
21 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
22 // EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
23 // MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
24 // NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
25 // LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
26 // OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
27 // WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
28 //
29
30 using System;
31 using System.Collections;
32
33 namespace System.Text.RegularExpressions.Syntax {
34         // collection classes
35
36         class ExpressionCollection : CollectionBase {
37                 public void Add (Expression e) {
38                         List.Add (e);
39                 }
40
41                 public Expression this[int i] {
42                         get { return (Expression)List[i]; }
43                         set { List[i] = value; }
44                 }
45
46                 protected override void OnValidate (object o) {
47                         // allow null elements
48                 }
49         }
50
51         // abstract classes
52
53         abstract class Expression {
54                 public abstract void Compile (ICompiler cmp, bool reverse);
55                 public abstract void GetWidth (out int min, out int max);
56
57                 public int GetFixedWidth () {
58                         int min, max;
59                         GetWidth (out min, out max);
60
61                         if (min == max)
62                                 return min;
63
64                         return -1;
65                 }
66
67                 public virtual AnchorInfo GetAnchorInfo (bool reverse) {
68                         return new AnchorInfo (this, GetFixedWidth ());
69                 }
70
71                 public abstract bool IsComplex ();
72         }
73
74         // composite expressions
75
76         abstract class CompositeExpression : Expression {
77                 public CompositeExpression () {
78                         expressions = new ExpressionCollection ();
79                 }
80
81                 protected ExpressionCollection Expressions {
82                         get { return expressions; }
83                 }
84
85                 protected void GetWidth (out int min, out int max, int count) {
86                         min = Int32.MaxValue;
87                         max = 0;
88                         bool empty = true;
89
90                         for (int i = 0; i < count; ++ i) {
91                                 Expression e = Expressions[i];
92                                 if (e == null)
93                                         continue;
94
95                                 empty = false;
96                                 int a, b;
97                                 e.GetWidth (out a, out b);
98                                 if (a < min) min = a;
99                                 if (b > max) max = b;
100                         }
101
102                         if (empty)
103                                 min = max = 0;
104                 }
105
106
107                 public override bool IsComplex ()
108                 {
109                         foreach (Expression e in Expressions) {
110                                 if (e.IsComplex ())
111                                         return true;
112                         }
113                         return GetFixedWidth () <= 0;
114                 }
115
116                 private ExpressionCollection expressions;
117         }
118
119         // groups
120
121         class Group : CompositeExpression {
122                 public Group () {
123                 }
124
125                 public Expression Expression {
126                         get { return Expressions[0]; }
127                         set { Expressions[0] = value; }
128                 }
129
130                 public void AppendExpression (Expression e) {
131                         Expressions.Add (e);
132                 }
133
134                 public override void Compile (ICompiler cmp, bool reverse) {
135                         int count = Expressions.Count;
136                         for (int i = 0; i < count; ++ i) {
137                                 Expression e;
138                                 if (reverse)
139                                         e = Expressions[count - i - 1];
140                                 else
141                                         e = Expressions[i];
142
143                                 e.Compile (cmp, reverse);
144                         }
145                 }
146
147                 public override void GetWidth (out int min, out int max) {
148                         min = 0;
149                         max = 0;
150
151                         foreach (Expression e in Expressions) {
152                                 int a, b;
153                                 e.GetWidth (out a, out b);
154                                 min += a;
155                                 if (max == Int32.MaxValue || b == Int32.MaxValue)
156                                         max = Int32.MaxValue;
157                                 else
158                                         max += b;
159                         }
160                 }
161
162                 public override AnchorInfo GetAnchorInfo (bool reverse)
163                 {
164                         int ptr;
165                         int width = GetFixedWidth ();
166
167                         ArrayList infos = new ArrayList ();
168                         IntervalCollection segments = new IntervalCollection ();
169
170                         // accumulate segments
171                         ptr = 0;
172                         int count = Expressions.Count;
173                         for (int i = 0; i < count; ++ i) {
174                                 Expression e;
175                                 if (reverse)
176                                         e = Expressions [count - i - 1];
177                                 else
178                                         e = Expressions [i];
179
180                                 AnchorInfo info = e.GetAnchorInfo (reverse);
181                                 infos.Add (info);
182
183                                 if (info.IsPosition)
184                                         return new AnchorInfo (this, ptr + info.Offset, width, info.Position);
185
186                                 if (info.IsSubstring)
187                                         segments.Add (info.GetInterval (ptr));
188
189                                 if (info.IsUnknownWidth)
190                                         break;
191
192                                 ptr += info.Width;
193                         }
194
195                         // normalize and find the longest segment
196                         segments.Normalize ();
197
198                         Interval longest = Interval.Empty;
199                         foreach (Interval segment in segments) {
200                                 if (segment.Size > longest.Size)
201                                         longest = segment;
202                         }
203
204                         if (longest.IsEmpty)
205                                 return new AnchorInfo (this, width);
206
207                         // now chain the substrings that made this segment together
208                         bool ignore = false;
209                         int n_strings = 0;
210
211                         ptr = 0;
212                         for (int i = 0; i < infos.Count; ++i) {
213                                 AnchorInfo info = (AnchorInfo) infos [i];
214
215                                 if (info.IsSubstring && longest.Contains (info.GetInterval (ptr))) {
216                                         ignore |= info.IgnoreCase;
217                                         infos [n_strings ++] = info;
218                                 }
219
220                                 if (info.IsUnknownWidth)
221                                         break;
222
223                                 ptr += info.Width;
224                         }
225
226                         StringBuilder sb = new StringBuilder ();
227                         for (int i = 0; i < n_strings; ++i) {
228                                 AnchorInfo info;
229                                 if (reverse)
230                                         info = (AnchorInfo) infos [n_strings - i - 1];
231                                 else
232                                         info = (AnchorInfo) infos [i];
233                                 sb.Append (info.Substring);
234                         }
235
236                         if (sb.Length == longest.Size)
237                                 return new AnchorInfo (this, longest.low, width, sb.ToString (), ignore);
238                         // were the string segments overlapping?
239                         if (sb.Length > longest.Size) {
240                                 Console.Error.WriteLine ("overlapping?");
241                                 return new AnchorInfo (this, width);
242                         }
243                         throw new SystemException ("Shouldn't happen");
244                 }
245         }
246
247         class RegularExpression : Group {
248                 public RegularExpression () {
249                         group_count = 0;
250                 }
251
252                 public int GroupCount {
253                         get { return group_count; }
254                         set { group_count = value; }
255                 }
256
257                 public override void Compile (ICompiler cmp, bool reverse) {
258                         // info block
259
260                         int min, max;
261                         GetWidth (out min, out max);
262                         cmp.EmitInfo (group_count, min, max);
263
264                         // anchoring expression
265
266                         AnchorInfo info = GetAnchorInfo (reverse);
267                         //if (reverse)
268                         //      info = new AnchorInfo (this, GetFixedWidth ()); // FIXME
269
270                         LinkRef pattern = cmp.NewLink ();
271                         cmp.EmitAnchor (reverse, info.Offset, pattern);
272
273                         if (info.IsPosition)
274                                 cmp.EmitPosition (info.Position);
275                         else if (info.IsSubstring)
276                                 cmp.EmitString (info.Substring, info.IgnoreCase, reverse);
277
278                         cmp.EmitTrue ();
279
280                         // pattern
281
282                         cmp.ResolveLink (pattern);
283                         base.Compile (cmp, reverse);
284                         cmp.EmitTrue ();
285                 }
286
287                 private int group_count;
288         }
289
290         class CapturingGroup : Group {
291                 public CapturingGroup () {
292                         this.gid = 0;
293                         this.name = null;
294                 }
295
296                 public int Number {
297                         get { return gid; }
298                         set { gid = value; }
299                 }
300
301                 public string Name {
302                         get { return name; }
303                         set { name = value; }
304                 }
305
306                 public bool IsNamed {
307                         get { return name != null; }
308                 }
309
310                 public override void Compile (ICompiler cmp, bool reverse) {
311                         cmp.EmitOpen (gid);
312                         base.Compile (cmp, reverse);
313                         cmp.EmitClose (gid);
314                 }
315
316                 public override bool IsComplex () {
317                         return true;
318                 }
319
320                 private int gid;
321                 private string name;
322         }
323
324         class BalancingGroup : CapturingGroup {
325                 public BalancingGroup () {
326                         this.balance = null;
327                 }
328
329                 public CapturingGroup Balance {
330                         get { return balance; }
331                         set { balance = value; }
332                 }
333
334                 public override void Compile (ICompiler cmp, bool reverse) {
335                         // can't invoke Group.Compile from here :(
336                         // so I'll just repeat the code
337
338                         LinkRef tail = cmp.NewLink ();
339
340                         cmp.EmitBalanceStart (this.Number, balance.Number, this.IsNamed,  tail);
341
342                         int count = Expressions.Count;
343                         for (int i = 0; i < count; ++ i) {
344                                 Expression e;
345                                 if (reverse)
346                                         e = Expressions[count - i - 1];
347                                 else
348                                         e = Expressions[i];
349
350                                 e.Compile (cmp, reverse);
351                         }
352
353                         cmp.EmitBalance ();
354                         cmp.ResolveLink(tail);
355                 }
356
357                 private CapturingGroup balance;
358         }
359
360         class NonBacktrackingGroup : Group {
361                 public NonBacktrackingGroup () {
362                 }
363
364                 public override void Compile (ICompiler cmp, bool reverse) {
365                         LinkRef tail = cmp.NewLink ();
366
367                         cmp.EmitSub (tail);
368                         base.Compile (cmp, reverse);
369                         cmp.EmitTrue ();
370                         cmp.ResolveLink (tail);
371                 }
372
373                 public override bool IsComplex () {
374                         return true;
375                 }
376         }
377
378         // repetition
379
380         class Repetition : CompositeExpression {
381                 public Repetition (int min, int max, bool lazy) {
382                         Expressions.Add (null);
383
384                         this.min = min;
385                         this.max = max;
386                         this.lazy = lazy;
387                 }
388
389                 public Expression Expression {
390                         get { return Expressions[0]; }
391                         set { Expressions[0] = value; }
392                 }
393
394                 public int Minimum {
395                         get { return min; }
396                         set { min = value; }
397                 }
398
399                 public int Maximum {
400                         get { return max; }
401                         set { max = value; }
402                 }
403
404                 public bool Lazy {
405                         get { return lazy; }
406                         set { lazy = value; }
407                 }
408
409                 public override void Compile (ICompiler cmp, bool reverse) {
410                         if (Expression.IsComplex ()) {
411                                 LinkRef until = cmp.NewLink ();
412
413                                 cmp.EmitRepeat (min, max, lazy, until);
414                                 Expression.Compile (cmp, reverse);
415                                 cmp.EmitUntil (until);
416                         }
417                         else {
418                                 LinkRef tail = cmp.NewLink ();
419
420                                 cmp.EmitFastRepeat (min, max, lazy, tail);
421                                 Expression.Compile (cmp, reverse);
422                                 cmp.EmitTrue ();
423                                 cmp.ResolveLink (tail);
424                         }
425                 }
426
427                 public override void GetWidth (out int min, out int max) {
428                         Expression.GetWidth (out min, out max);
429                         min = min * this.min;
430                         if (max == Int32.MaxValue || this.max == 0xffff)
431                                 max = Int32.MaxValue;
432                         else
433                                 max = max * this.max;
434                 }
435
436                 public override AnchorInfo GetAnchorInfo (bool reverse) {
437                         int width = GetFixedWidth ();
438                         if (Minimum == 0)
439                                 return new AnchorInfo (this, width);
440
441                         AnchorInfo info = Expression.GetAnchorInfo (reverse);
442                         if (info.IsPosition)
443                                 return new AnchorInfo (this, info.Offset, width, info.Position);
444
445                         if (info.IsSubstring) {
446                                 if (info.IsComplete) {
447                                         // Minimum > 0
448                                         string str = info.Substring;
449                                         StringBuilder sb = new StringBuilder (str);
450                                         for (int i = 1; i < Minimum; ++ i)
451                                                 sb.Append (str);
452
453                                         return new AnchorInfo (this, 0, width, sb.ToString (), info.IgnoreCase);
454                                 }
455
456                                 return new AnchorInfo (this, info.Offset, width, info.Substring, info.IgnoreCase);
457                         }
458
459                         return new AnchorInfo (this, width);
460                 }
461
462                 private int min, max;
463                 private bool lazy;
464         }
465
466         // assertions
467
468         abstract class Assertion : CompositeExpression {
469                 public Assertion () {
470                         Expressions.Add (null);         // true expression
471                         Expressions.Add (null);         // false expression
472                 }
473
474                 public Expression TrueExpression {
475                         get { return Expressions[0]; }
476                         set { Expressions[0] = value; }
477                 }
478
479                 public Expression FalseExpression {
480                         get { return Expressions[1]; }
481                         set { Expressions[1] = value; }
482                 }
483
484                 public override void GetWidth (out int min, out int max) {
485                         GetWidth (out min, out max, 2);
486
487                         if (TrueExpression == null || FalseExpression == null)
488                                 min = 0;
489                 }
490         }
491
492         class CaptureAssertion : Assertion {
493                 public CaptureAssertion (Literal l) {
494                         literal = l;
495                 }
496
497                 public CapturingGroup CapturingGroup {
498                         get { return group; }
499                         set { group = value; }
500                 }
501
502                 public override void Compile (ICompiler cmp, bool reverse) {
503                         if (group == null) {
504                                 Alternate.Compile (cmp, reverse);
505                                 return;
506                         }
507
508                         int gid = group.Number;
509                         LinkRef tail = cmp.NewLink ();
510
511                         if (FalseExpression == null) {
512                                 //    IfDefined :1
513                                 //      <yes_exp>
514                                 // 1: <tail>
515
516                                 cmp.EmitIfDefined (gid, tail);
517                                 TrueExpression.Compile (cmp, reverse);
518                         }
519                         else {
520                                 //    IfDefined :1
521                                 //      <yes_expr>
522                                 //      Jump :2
523                                 // 1:   <no_expr>
524                                 // 2: <tail>
525
526                                 LinkRef false_expr = cmp.NewLink ();
527                                 cmp.EmitIfDefined (gid, false_expr);
528                                 TrueExpression.Compile (cmp, reverse);
529                                 cmp.EmitJump (tail);
530                                 cmp.ResolveLink (false_expr);
531                                 FalseExpression.Compile (cmp, reverse);
532                         }
533
534                         cmp.ResolveLink (tail);
535                 }
536
537                 public override bool IsComplex () {
538                         if (group == null)
539                                 return Alternate.IsComplex ();
540                         if (TrueExpression != null && TrueExpression.IsComplex ())
541                                 return true;
542                         if (FalseExpression != null && FalseExpression.IsComplex ())
543                                 return true;
544                         return GetFixedWidth () <= 0;
545                 }
546
547                 ExpressionAssertion Alternate {
548                         get {
549                                 if (alternate == null) {
550                                         alternate = new ExpressionAssertion ();
551                                         alternate.TrueExpression = TrueExpression;
552                                         alternate.FalseExpression = FalseExpression;
553                                         alternate.TestExpression = literal;
554                                 }
555                                 return alternate;
556                         }
557                 }
558
559                 private ExpressionAssertion alternate;
560                 private CapturingGroup group;
561                 private Literal literal;
562         }
563
564         class ExpressionAssertion : Assertion {
565                 public ExpressionAssertion () {
566                         Expressions.Add (null);         // test expression
567                 }
568
569                 public bool Reverse {
570                         get { return reverse; }
571                         set { reverse = value; }
572                 }
573
574                 public bool Negate {
575                         get { return negate; }
576                         set { negate = value; }
577                 }
578
579                 public Expression TestExpression {
580                         get { return Expressions[2]; }
581                         set { Expressions[2] = value; }
582                 }
583
584                 public override void Compile (ICompiler cmp, bool reverse) {
585                         LinkRef true_expr = cmp.NewLink ();
586                         LinkRef false_expr = cmp.NewLink ();
587
588                         // test op: positive / negative
589
590                         if (!negate)
591                                 cmp.EmitTest (true_expr, false_expr);
592                         else
593                                 cmp.EmitTest (false_expr, true_expr);
594
595                         // test expression: lookahead / lookbehind
596
597                         TestExpression.Compile (cmp, this.reverse);
598                         cmp.EmitTrue ();
599
600                         // target expressions
601
602                         if (TrueExpression == null) {                   // (?= ...)
603                                 //    Test :1, :2
604                                 //      <test_expr>
605                                 // :2   False
606                                 // :1   <tail>
607
608                                 cmp.ResolveLink (false_expr);
609                                 cmp.EmitFalse ();
610                                 cmp.ResolveLink (true_expr);
611                         }
612                         else {
613                                 cmp.ResolveLink (true_expr);
614                                 TrueExpression.Compile (cmp, reverse);
615
616                                 if (FalseExpression == null) {          // (?(...) ...)
617                                         //    Test :1, :2
618                                         //      <test_expr>
619                                         // :1   <yes_expr>
620                                         // :2   <tail>
621
622                                         cmp.ResolveLink (false_expr);
623                                 }
624                                 else {                                  // (?(...) ... | ...)
625                                         //    Test :1, :2
626                                         //      <test_expr>
627                                         // :1   <yes_expr>
628                                         //      Jump :3
629                                         // :2   <no_expr>
630                                         // :3   <tail>
631
632                                         LinkRef tail = cmp.NewLink ();
633
634                                         cmp.EmitJump (tail);
635                                         cmp.ResolveLink (false_expr);
636                                         FalseExpression.Compile (cmp, reverse);
637                                         cmp.ResolveLink (tail);
638                                 }
639                         }
640                 }
641
642                 public override bool IsComplex ()
643                 {
644                         return true;
645                 }
646
647                 private bool reverse, negate;
648         }
649
650         // alternation
651
652         class Alternation : CompositeExpression {
653                 public Alternation () {
654                 }
655
656                 public ExpressionCollection Alternatives {
657                         get { return Expressions; }
658                 }
659
660                 public void AddAlternative (Expression e) {
661                         Alternatives.Add (e);
662                 }
663
664                 public override void Compile (ICompiler cmp, bool reverse) {
665                         //                      LinkRef next = cmp.NewLink ();
666                         LinkRef tail = cmp.NewLink ();
667
668                         foreach (Expression e in Alternatives) {
669                                 LinkRef next = cmp.NewLink ();
670                                 cmp.EmitBranch (next);
671                                 e.Compile (cmp, reverse);
672                                 cmp.EmitJump (tail);
673                                 cmp.ResolveLink (next);
674                                 cmp.EmitBranchEnd();
675                         }
676
677                         cmp.EmitFalse ();
678                         cmp.ResolveLink (tail);
679                         cmp.EmitAlternationEnd();
680                 }
681
682                 public override void GetWidth (out int min, out int max) {
683                         GetWidth (out min, out max, Alternatives.Count);
684                 }
685         }
686
687         // terminal expressions
688
689         class Literal : Expression {
690                 public Literal (string str, bool ignore) {
691                         this.str = str;
692                         this.ignore = ignore;
693                 }
694
695                 public string String {
696                         get { return str; }
697                         set { str = value; }
698                 }
699
700                 public bool IgnoreCase {
701                         get { return ignore; }
702                         set { ignore = value; }
703                 }
704
705                 public override void Compile (ICompiler cmp, bool reverse) {
706                         if (str.Length == 0)
707                                 return;
708
709                         if (str.Length == 1)
710                                 cmp.EmitCharacter (str[0], false, ignore, reverse);
711                         else
712                                 cmp.EmitString (str, ignore, reverse);
713                 }
714
715                 public override void GetWidth (out int min, out int max) {
716                         min = max = str.Length;
717                 }
718
719                 public override AnchorInfo GetAnchorInfo (bool reverse) {
720                         return new AnchorInfo (this, 0, str.Length, str, ignore);
721                 }
722
723                 public override bool IsComplex () {
724                         return false;
725                 }
726
727                 private string str;
728                 private bool ignore;
729         }
730
731         class PositionAssertion : Expression {
732                 public PositionAssertion (Position pos) {
733                         this.pos = pos;
734                 }
735
736                 public Position Position {
737                         get { return pos; }
738                         set { pos = value; }
739                 }
740
741                 public override void Compile (ICompiler cmp, bool reverse) {
742                         cmp.EmitPosition (pos);
743                 }
744
745                 public override void GetWidth (out int min, out int max) {
746                         min = max = 0;
747                 }
748
749                 public override bool IsComplex () {
750                         return false;
751                 }
752
753                 public override AnchorInfo GetAnchorInfo (bool revers) {
754                         switch (pos) {
755                         case Position.StartOfString: case Position.StartOfLine: case Position.StartOfScan:
756                                 return new AnchorInfo (this, 0, 0, pos);
757
758                         default:
759                                 return new AnchorInfo (this, 0);
760                         }
761                 }
762
763                 private Position pos;
764         }
765
766         class Reference : Expression {
767                 public Reference (bool ignore) {
768                         this.ignore = ignore;
769                 }
770
771                 public CapturingGroup CapturingGroup {
772                         get { return group; }
773                         set { group = value; }
774                 }
775
776                 public bool IgnoreCase {
777                         get { return ignore; }
778                         set { ignore = value; }
779                 }
780
781                 public override void Compile (ICompiler cmp, bool reverse) {
782                         cmp.EmitReference (group.Number, ignore, reverse);
783                 }
784
785                 public override void GetWidth (out int min, out int max) {
786                         //group.GetWidth (out min, out max);
787                         // TODO set width to referenced group for non-cyclical references
788                         min = 0;
789                         max = Int32.MaxValue;
790                 }
791
792                 public override bool IsComplex () {
793                         return true;    // FIXME incorporate cyclic check
794                 }
795
796                 private CapturingGroup group;
797                 private bool ignore;
798         }
799
800         class BackslashNumber : Reference {
801                 string literal;
802                 bool ecma;
803
804                 public BackslashNumber (bool ignore, bool ecma)
805                         : base (ignore)
806                 {
807                         this.ecma = ecma;
808                 }
809         }
810
811         class CharacterClass : Expression {
812                 public CharacterClass (bool negate, bool ignore) {
813                         this.negate = negate;
814                         this.ignore = ignore;
815
816                         intervals = new IntervalCollection ();
817
818                         // initialize pos/neg category arrays
819
820                         int cat_size = (int) Category.LastValue;
821                         pos_cats = new BitArray (cat_size);
822                         neg_cats = new BitArray (cat_size);
823                 }
824
825                 public CharacterClass (Category cat, bool negate) : this (false, false) {
826                         this.AddCategory (cat, negate);
827                 }
828
829                 public bool Negate {
830                         get { return negate; }
831                         set { negate = value; }
832                 }
833
834                 public bool IgnoreCase {
835                         get { return ignore; }
836                         set { ignore = value; }
837                 }
838
839                 public void AddCategory (Category cat, bool negate) {
840                         int n = (int)cat;
841
842                         if (negate) {
843                                 neg_cats[n] = true;
844                         } else {
845                                 pos_cats[n] = true;
846                         }
847                 }
848
849                 public void AddCharacter (char c) {
850                         // TODO: this is certainly not the most efficient way of doing things
851                         // TODO: but at least it produces correct results.
852                         AddRange (c, c);
853                 }
854
855                 public void AddRange (char lo, char hi) {
856                         Interval new_interval = new Interval (lo, hi);
857
858                         // ignore case is on. we must make sure our interval does not
859                         // use upper case. if it does, we must normalize the upper case
860                         // characters into lower case.
861                         if (ignore) {
862                                 if (upper_case_characters.Intersects (new_interval)) {
863                                         Interval partial_new_interval;
864
865                                         if (new_interval.low < upper_case_characters.low) {
866                                                 partial_new_interval = new Interval (upper_case_characters.low + distance_between_upper_and_lower_case,
867                                                                                      new_interval.high +  distance_between_upper_and_lower_case);
868                                                 new_interval.high = upper_case_characters.low - 1;
869                                         }
870                                         else {
871                                                 partial_new_interval = new Interval (new_interval.low + distance_between_upper_and_lower_case,
872                                                                                      upper_case_characters.high + distance_between_upper_and_lower_case);
873                                                 new_interval.low = upper_case_characters.high + 1;
874                                         }
875                                         intervals.Add (partial_new_interval);
876                                 }
877                                 else if (upper_case_characters.Contains (new_interval)) {
878                                         new_interval.high += distance_between_upper_and_lower_case;
879                                         new_interval.low += distance_between_upper_and_lower_case;
880                                 }
881                         }
882                         intervals.Add (new_interval);
883                 }
884
885                 public override void Compile (ICompiler cmp, bool reverse) {
886                         // create the meta-collection
887                         IntervalCollection meta =
888                                 intervals.GetMetaCollection (new IntervalCollection.CostDelegate (GetIntervalCost));
889
890                         // count ops
891                         int count = meta.Count;
892                         for (int i = 0; i < pos_cats.Length; ++ i) {
893                                 if (pos_cats[i] || neg_cats [i])
894                                         ++ count;
895                         }
896
897                         if (count == 0)
898                                 return;
899
900                         // emit in op for |meta| > 1
901                         LinkRef tail = cmp.NewLink ();
902                         if (count > 1)
903                                 cmp.EmitIn (tail);
904
905                         // emit character/range/sets from meta-collection
906                         // we emit these first so that any 'ignore' flags will be noticed by the evaluator
907                         foreach (Interval a in meta) {
908                                 if (a.IsDiscontiguous) {                        // Set
909                                         BitArray bits = new BitArray (a.Size);
910                                         foreach (Interval b in intervals) {
911                                                 if (a.Contains (b)) {
912                                                         for (int i = b.low; i <= b.high; ++ i)
913                                                                 bits[i - a.low] = true;
914                                                 }
915                                         }
916
917                                         cmp.EmitSet ((char)a.low, bits, negate, ignore, reverse);
918                                 }
919                                 else if (a.IsSingleton)                         // Character
920                                         cmp.EmitCharacter ((char)a.low, negate, ignore, reverse);
921                                 else                                            // Range
922                                         cmp.EmitRange ((char)a.low, (char)a.high, negate, ignore, reverse);
923                         }
924
925                         // emit categories
926                         for (int i = 0; i < pos_cats.Length; ++ i) {
927                                 if (pos_cats[i]) {
928                                         if (neg_cats [i])
929                                                 cmp.EmitCategory (Category.AnySingleline, negate, reverse);
930                                         else
931                                                 cmp.EmitCategory ((Category)i, negate, reverse);
932                                 } else if (neg_cats[i]) {
933                                         cmp.EmitNotCategory ((Category)i, negate, reverse);
934                                 }
935                         }
936
937                         // finish up
938                         if (count > 1) {
939                                 if (negate)
940                                         cmp.EmitTrue ();
941                                 else
942                                         cmp.EmitFalse ();
943
944                                 cmp.ResolveLink (tail);
945                         }
946                 }
947
948                 public override void GetWidth (out int min, out int max) {
949                         min = max = 1;
950                 }
951
952                 public override bool IsComplex () {
953                         return false;
954                 }
955
956                 // private
957
958                 private static double GetIntervalCost (Interval i) {
959                         // use op length as cost metric (=> optimize for space)
960
961                         if (i.IsDiscontiguous)
962                                 return 3 + ((i.Size + 0xf) >> 4);               // Set
963                         else if (i.IsSingleton)
964                                 return 2;                                       // Character
965                         else
966                                 return 3;                                       // Range
967                 }
968
969                 private static Interval upper_case_characters = new Interval ((char)65, (char)90);
970                 private const int distance_between_upper_and_lower_case = 32;
971                 private bool negate, ignore;
972                 private BitArray pos_cats, neg_cats;
973                 private IntervalCollection intervals;
974         }
975
976         class AnchorInfo {
977                 private Expression expr;
978
979                 private Position pos;
980                 private int offset;
981
982                 private string str;
983                 private int width;
984                 private bool ignore;
985
986                 public AnchorInfo (Expression expr, int width) {
987                         this.expr = expr;
988                         this.offset = 0;
989                         this.width = width;
990
991                         this.str = null;
992                         this.ignore = false;
993                         this.pos = Position.Any;
994                 }
995
996                 public AnchorInfo (Expression expr, int offset, int width, string str, bool ignore) {
997                         this.expr = expr;
998                         this.offset = offset;
999                         this.width = width;
1000
1001                         this.str = ignore ? str.ToLower () : str;
1002
1003                         this.ignore = ignore;
1004                         this.pos = Position.Any;
1005                 }
1006
1007                 public AnchorInfo (Expression expr, int offset, int width, Position pos) {
1008                         this.expr = expr;
1009                         this.offset = offset;
1010                         this.width = width;
1011
1012                         this.pos = pos;
1013
1014                         this.str = null;
1015                         this.ignore = false;
1016                 }
1017
1018                 public Expression Expression {
1019                         get { return expr; }
1020                 }
1021
1022                 public int Offset {
1023                         get { return offset; }
1024                 }
1025
1026                 public int Width {
1027                         get { return width; }
1028                 }
1029
1030                 public int Length {
1031                         get { return (str != null) ? str.Length : 0; }
1032                 }
1033
1034                 public bool IsUnknownWidth {
1035                         get { return width < 0; }
1036                 }
1037
1038                 public bool IsComplete {
1039                         get { return Length == Width; }
1040                 }
1041
1042                 public string Substring {
1043                         get { return str; }
1044                 }
1045
1046                 public bool IgnoreCase {
1047                         get { return ignore; }
1048                 }
1049
1050                 public Position Position {
1051                         get { return pos; }
1052                 }
1053
1054                 public bool IsSubstring {
1055                         get { return str != null; }
1056                 }
1057
1058                 public bool IsPosition {
1059                         get { return pos != Position.Any; }
1060                 }
1061
1062                 public Interval GetInterval () {
1063                         return GetInterval (0);
1064                 }
1065
1066                 public Interval GetInterval (int start) {
1067                         if (!IsSubstring)
1068                                 return Interval.Empty;
1069
1070                         return new Interval (start + Offset, start + Offset + Length - 1);
1071                 }
1072         }
1073 }