2004-05-27 Gonzalo Paniagua Javier <gonzalo@ximian.com>
[mono.git] / mcs / class / System / System.Text.RegularExpressions / syntax.cs
1 //
2 // assembly:    System
3 // namespace:   System.Text.RegularExpressions
4 // file:        syntax.cs
5 //
6 // author:      Dan Lewis (dlewis@gmx.co.uk)
7 //              (c) 2002
8
9 using System;
10 using System.Collections;
11
12 namespace System.Text.RegularExpressions.Syntax {
13         // collection classes
14         
15         class ExpressionCollection : CollectionBase {
16                 public void Add (Expression e) {
17                         List.Add (e);
18                 }
19
20                 public Expression this[int i] {
21                         get { return (Expression)List[i]; }
22                         set { List[i] = value; }
23                 }
24
25                 protected override void OnValidate (object o) {
26                         // allow null elements
27                 }
28         }
29
30         // abstract classes
31         
32         abstract class Expression {
33                 public abstract void Compile (ICompiler cmp, bool reverse);
34                 public abstract void GetWidth (out int min, out int max);
35
36                 public int GetFixedWidth () {
37                         int min, max;
38                         GetWidth (out min, out max);
39
40                         if (min == max)
41                                 return min;
42
43                         return -1;
44                 }
45
46                 public virtual AnchorInfo GetAnchorInfo (bool reverse) {
47                         return new AnchorInfo (this, GetFixedWidth ());
48                 }
49
50                 public virtual bool IsComplex () {
51                         return true;
52                 }
53         }
54
55         // composite expressions
56         
57         abstract class CompositeExpression : Expression {
58                 public CompositeExpression () {
59                         expressions = new ExpressionCollection ();
60                 }
61
62                 protected ExpressionCollection Expressions {
63                         get { return expressions; }
64                 }
65
66                 protected void GetWidth (out int min, out int max, int count) {
67                         min = Int32.MaxValue;
68                         max = 0;
69                         bool empty = true;
70
71                         for (int i = 0; i < count; ++ i) {
72                                 Expression e = Expressions[i];
73                                 if (e == null)
74                                         continue;
75                         
76                                 empty = false;
77                                 int a, b;
78                                 e.GetWidth (out a, out b);
79                                 if (a < min) min = a;
80                                 if (b > max) max = b;
81                         }
82
83                         if (empty)
84                                 min = max = 0;
85                 }
86
87                 private ExpressionCollection expressions;
88         }
89
90         // groups
91         
92         class Group : CompositeExpression {
93                 public Group () {
94                 }
95
96                 public Expression Expression {
97                         get { return Expressions[0]; }
98                         set { Expressions[0] = value; }
99                 }
100
101                 public void AppendExpression (Expression e) {
102                         Expressions.Add (e);
103                 }
104
105                 public override void Compile (ICompiler cmp, bool reverse) {
106                         int count = Expressions.Count;
107                         for (int i = 0; i < count; ++ i) {
108                                 Expression e;
109                                 if (reverse)
110                                         e = Expressions[count - i - 1];
111                                 else
112                                         e = Expressions[i];
113
114                                 e.Compile (cmp, reverse);
115                         }
116                 }
117
118                 public override void GetWidth (out int min, out int max) {
119                         min = 0;
120                         max = 0;
121
122                         foreach (Expression e in Expressions) {
123                                 int a, b;
124                                 e.GetWidth (out a, out b);
125                                 min += a;
126                                 if (max == Int32.MaxValue || b == Int32.MaxValue)
127                                         max = Int32.MaxValue;
128                                 else
129                                         max += b;
130                         }
131                 }
132
133                 public override AnchorInfo GetAnchorInfo (bool reverse) {
134                         int ptr;
135                         int width = GetFixedWidth ();
136
137                         ArrayList infos = new ArrayList ();
138                         IntervalCollection segments = new IntervalCollection ();
139
140                         // accumulate segments
141
142                         ptr = 0;
143                         //foreach (Expression e in Expressions) {
144                         int count = Expressions.Count;
145                         for (int i = 0; i < count; ++ i) {
146                                 Expression e;
147                                 if (reverse)
148                                         e = Expressions [count - i - 1];
149                                 else
150                                         e = Expressions [i];            
151                                 
152                                 AnchorInfo info = e.GetAnchorInfo (reverse);
153                                 infos.Add (info);
154
155                                 if (info.IsPosition)
156                                         return new AnchorInfo (this, ptr + info.Offset, width, info.Position);
157
158                                 if (info.IsSubstring)
159                                         segments.Add (info.GetInterval (ptr));
160
161                                 if (info.IsUnknownWidth)
162                                         break;
163
164                                 ptr += info.Width;
165                         }
166
167                         // normalize and find the longest segment
168
169                         segments.Normalize ();
170
171                         Interval longest = Interval.Empty;
172                         foreach (Interval segment in segments) {
173                                 if (segment.Size > longest.Size)
174                                         longest = segment;
175                         }
176
177                         // now chain the substrings that made this segment together
178
179                         if (!longest.IsEmpty) {
180                                 string str = "";
181                                 ArrayList strs = new ArrayList();
182                                 bool ignore = false;
183
184                                 ptr = 0;
185                                 
186                                 //foreach (AnchorInfo info in infos) {
187                                 for (int i = 0; i < infos.Count; ++ i) {
188                                         AnchorInfo info;
189
190                                         info = (AnchorInfo)infos[i];            
191                                         
192                                         if (info.IsSubstring && longest.Contains (info.GetInterval (ptr))) {
193                                                 //str += info.Substring;        // TODO mark subexpressions
194                                                 strs.Add(info.Substring);
195                                                 ignore |= info.IgnoreCase;
196                                         }
197
198                                         
199                                                 if (info.IsUnknownWidth)                                                        
200                                                         break;
201                                         
202                                                 ptr += info.Width;
203                                 }       
204                                         
205                                 for (int i = 0; i< strs.Count; ++i)
206                                 {
207                                         if (reverse)
208                                                 str += strs [strs.Count - i - 1];
209                                         else
210                                                 str += strs [i];
211                                                         
212                                         
213                                 }
214                         
215
216                                 return new AnchorInfo (this, longest.low, width, str, ignore);
217                         }
218
219                         return new AnchorInfo (this, width);
220                 }
221
222                 public override bool IsComplex () {
223                         bool comp = false;
224                         foreach (Expression e in Expressions) {
225                                 comp |= e.IsComplex ();
226                         }
227
228                         return comp | GetFixedWidth () <= 0;
229                 }
230         }
231
232         class RegularExpression : Group {
233                 public RegularExpression () {
234                         group_count = 0;
235                 }
236
237                 public int GroupCount {
238                         get { return group_count; }
239                         set { group_count = value; }
240                 }
241
242                 public override void Compile (ICompiler cmp, bool reverse) {
243                         // info block
244
245                         int min, max;
246                         GetWidth (out min, out max);
247                         cmp.EmitInfo (group_count, min, max);
248
249                         // anchoring expression
250
251                         AnchorInfo info = GetAnchorInfo (reverse);
252                         //if (reverse)
253                         //      info = new AnchorInfo (this, GetFixedWidth ()); // FIXME
254
255                         LinkRef pattern = cmp.NewLink ();
256                         cmp.EmitAnchor (reverse, info.Offset, pattern);
257
258                         if (info.IsPosition)
259                                 cmp.EmitPosition (info.Position);
260                         else if (info.IsSubstring)
261                                 cmp.EmitString (info.Substring, info.IgnoreCase, reverse);
262                         
263                         cmp.EmitTrue ();
264                         
265                         // pattern
266
267                         cmp.ResolveLink (pattern);
268                         base.Compile (cmp, reverse);
269                         cmp.EmitTrue ();
270                 }
271
272                 private int group_count;
273         }
274
275         class CapturingGroup : Group {
276                 public CapturingGroup () {
277                         this.gid = 0;
278                         this.name = null;
279                 }
280
281                 public int Number {
282                         get { return gid; }
283                         set { gid = value; }
284                 }
285
286                 public string Name {
287                         get { return name; }
288                         set { name = value; }
289                 }
290
291                 public bool IsNamed {
292                         get { return name != null; }
293                 }
294
295                 public override void Compile (ICompiler cmp, bool reverse) {
296                         cmp.EmitOpen (gid);
297                         base.Compile (cmp, reverse);
298                         cmp.EmitClose (gid);
299                 }
300
301                 public override bool IsComplex () {
302                         return true;
303                 }
304
305                 private int gid;
306                 private string name;
307         }
308
309         class BalancingGroup : CapturingGroup {
310                 public BalancingGroup () {
311                         this.balance = null;
312                 }
313
314                 public CapturingGroup Balance {
315                         get { return balance; }
316                         set { balance = value; }
317                 }
318
319                 public override void Compile (ICompiler cmp, bool reverse) {
320                         // can't invoke Group.Compile from here :(
321                         // so I'll just repeat the code
322                         
323                         LinkRef tail = cmp.NewLink ();
324
325                         cmp.EmitBalanceStart (this.Number, balance.Number, this.IsNamed,  tail);
326
327                         int count = Expressions.Count;
328                         for (int i = 0; i < count; ++ i) {
329                                 Expression e;
330                                 if (reverse)
331                                         e = Expressions[count - i - 1];
332                                 else
333                                         e = Expressions[i];
334
335                                 e.Compile (cmp, reverse);
336                         }
337
338                         cmp.EmitBalance ();
339                         cmp.ResolveLink(tail);
340                 }
341
342                 private CapturingGroup balance;
343         }
344
345         class NonBacktrackingGroup : Group {
346                 public NonBacktrackingGroup () {
347                 }
348
349                 public override void Compile (ICompiler cmp, bool reverse) {
350                         LinkRef tail = cmp.NewLink ();
351
352                         cmp.EmitSub (tail);
353                         base.Compile (cmp, reverse);
354                         cmp.EmitTrue ();
355                         cmp.ResolveLink (tail);
356                 }
357
358                 public override bool IsComplex () {
359                         return true;
360                 }
361         }
362
363         // repetition
364
365         class Repetition : CompositeExpression {
366                 public Repetition (int min, int max, bool lazy) {
367                         Expressions.Add (null);
368                         
369                         this.min = min;
370                         this.max = max;
371                         this.lazy = lazy;
372                 }
373
374                 public Expression Expression {
375                         get { return Expressions[0]; }
376                         set { Expressions[0] = value; }
377                 }
378
379                 public int Minimum {
380                         get { return min; }
381                         set { min = value; }
382                 }
383
384                 public int Maximum {
385                         get { return max; }
386                         set { max = value; }
387                 }
388
389                 public bool Lazy {
390                         get { return lazy; }
391                         set { lazy = value; }
392                 }
393
394                 public override void Compile (ICompiler cmp, bool reverse) {
395                         if (Expression.IsComplex ()) {
396                                 LinkRef until = cmp.NewLink ();
397                                 
398                                 cmp.EmitRepeat (min, max, lazy, until);
399                                 Expression.Compile (cmp, reverse);
400                                 cmp.EmitUntil (until);
401                         }
402                         else {
403                                 LinkRef tail = cmp.NewLink ();
404
405                                 cmp.EmitFastRepeat (min, max, lazy, tail);
406                                 Expression.Compile (cmp, reverse);
407                                 cmp.EmitTrue ();
408                                 cmp.ResolveLink (tail);
409                         }
410                 }
411
412                 public override void GetWidth (out int min, out int max) {
413                         Expression.GetWidth (out min, out max);
414                         min = min * this.min;
415                         if (max == Int32.MaxValue || this.max == 0xffff)
416                                 max = Int32.MaxValue;
417                         else
418                                 max = max * this.max;
419                 }
420
421                 public override AnchorInfo GetAnchorInfo (bool reverse) {
422                         int width = GetFixedWidth ();
423                         if (Minimum == 0)
424                                 return new AnchorInfo (this, width);
425                         
426                         AnchorInfo info = Expression.GetAnchorInfo (reverse);
427                         if (info.IsPosition)
428                                 return new AnchorInfo (this, info.Offset, width, info.Position);
429                         
430                         if (info.IsSubstring) {
431                                 if (info.IsComplete) {
432                                         string str = "";
433                                         for (int i = 0; i < Minimum; ++ i)
434                                                 str += info.Substring;
435
436                                         return new AnchorInfo (this, 0, width, str, info.IgnoreCase);
437                                 }
438
439                                 return new AnchorInfo (this, info.Offset, width, info.Substring, info.IgnoreCase);
440                         }
441
442                         return new AnchorInfo (this, width);
443                 }
444
445                 private int min, max;
446                 private bool lazy;
447         }
448
449         // assertions
450
451         abstract class Assertion : CompositeExpression {
452                 public Assertion () {
453                         Expressions.Add (null);         // true expression
454                         Expressions.Add (null);         // false expression
455                 }
456
457                 public Expression TrueExpression {
458                         get { return Expressions[0]; }
459                         set { Expressions[0] = value; }
460                 }
461
462                 public Expression FalseExpression {
463                         get { return Expressions[1]; }
464                         set { Expressions[1] = value; }
465                 }
466
467                 public override void GetWidth (out int min, out int max) {
468                         GetWidth (out min, out max, 2);
469
470                         if (TrueExpression == null || FalseExpression == null)
471                                 min = 0;
472                 }
473         }
474
475         class CaptureAssertion : Assertion {
476                 public CaptureAssertion () {
477                 }
478
479                 public CapturingGroup CapturingGroup {
480                         get { return group; }
481                         set { group = value; }
482                 }
483
484                 public override void Compile (ICompiler cmp, bool reverse) {
485                         int gid = group.Number;
486                         LinkRef tail = cmp.NewLink ();
487
488                         if (FalseExpression == null) {
489                                 //    IfDefined :1
490                                 //      <yes_exp>
491                                 // 1: <tail>
492                         
493                                 cmp.EmitIfDefined (gid, tail);
494                                 TrueExpression.Compile (cmp, reverse);
495                         }
496                         else {
497                                 //    IfDefined :1
498                                 //      <yes_expr>
499                                 //      Jump :2
500                                 // 1:   <no_expr>
501                                 // 2: <tail>
502                         
503                                 LinkRef false_expr = cmp.NewLink ();
504                                 cmp.EmitIfDefined (gid, false_expr);
505                                 TrueExpression.Compile (cmp, reverse);
506                                 cmp.EmitJump (tail);
507                                 cmp.ResolveLink (false_expr);
508                                 FalseExpression.Compile (cmp, reverse);
509                         }
510
511                         cmp.ResolveLink (tail);
512                 }
513
514                 public override bool IsComplex () {
515                         bool comp = false;
516                         if (TrueExpression != null)
517                                 comp |= TrueExpression.IsComplex ();
518                         if (FalseExpression != null)
519                                 comp |= FalseExpression.IsComplex ();
520
521                         return comp | GetFixedWidth () <= 0;
522                 }
523
524                 private CapturingGroup group;
525         }
526
527         class ExpressionAssertion : Assertion {
528                 public ExpressionAssertion () {
529                         Expressions.Add (null);         // test expression
530                 }
531
532                 public bool Reverse {
533                         get { return reverse; }
534                         set { reverse = value; }
535                 }
536
537                 public bool Negate {
538                         get { return negate; }
539                         set { negate = value; }
540                 }
541
542                 public Expression TestExpression {
543                         get { return Expressions[2]; }
544                         set { Expressions[2] = value; }
545                 }
546
547                 public override void Compile (ICompiler cmp, bool reverse) {
548                         LinkRef true_expr = cmp.NewLink ();
549                         LinkRef false_expr = cmp.NewLink ();
550
551                         // test op: positive / negative
552
553                         if (!negate)
554                                 cmp.EmitTest (true_expr, false_expr);
555                         else
556                                 cmp.EmitTest (false_expr, true_expr);
557                         
558                         // test expression: lookahead / lookbehind
559
560                         TestExpression.Compile (cmp, this.reverse);
561                         cmp.EmitTrue ();
562
563                         // target expressions
564
565                         if (TrueExpression == null) {                   // (?= ...)
566                                 //    Test :1, :2
567                                 //      <test_expr>
568                                 // :2   False
569                                 // :1   <tail>
570                         
571                                 cmp.ResolveLink (false_expr);
572                                 cmp.EmitFalse ();
573                                 cmp.ResolveLink (true_expr);
574                         }
575                         else {
576                                 cmp.ResolveLink (true_expr);
577                                 TrueExpression.Compile (cmp, reverse);
578                                 
579                                 if (FalseExpression == null) {          // (?(...) ...)
580                                         //    Test :1, :2
581                                         //      <test_expr>
582                                         // :1   <yes_expr>
583                                         // :2   <tail>
584
585                                         cmp.ResolveLink (false_expr);
586                                 }
587                                 else {                                  // (?(...) ... | ...)
588                                         //    Test :1, :2
589                                         //      <test_expr>
590                                         // :1   <yes_expr>
591                                         //      Jump :3
592                                         // :2   <no_expr>
593                                         // :3   <tail>
594                                 
595                                         LinkRef tail = cmp.NewLink ();
596                                 
597                                         cmp.EmitJump (tail);
598                                         cmp.ResolveLink (false_expr);
599                                         FalseExpression.Compile (cmp, reverse);
600                                         cmp.ResolveLink (tail);
601                                 }
602                         }
603                 }
604
605                 private bool reverse, negate;
606         }
607
608         // alternation
609
610         class Alternation : CompositeExpression {
611                 public Alternation () {
612                 }
613
614                 public ExpressionCollection Alternatives {
615                         get { return Expressions; }
616                 }
617
618                 public void AddAlternative (Expression e) {
619                         Alternatives.Add (e);
620                 }
621
622                 public override void Compile (ICompiler cmp, bool reverse) {
623                         //                      LinkRef next = cmp.NewLink ();
624                         LinkRef tail = cmp.NewLink ();
625                 
626                         foreach (Expression e in Alternatives) {
627                                 LinkRef next = cmp.NewLink ();
628                                 cmp.EmitBranch (next);
629                                 e.Compile (cmp, reverse);
630                                 cmp.EmitJump (tail);
631                                 cmp.ResolveLink (next);
632                                 cmp.EmitBranchEnd();
633                         }
634
635                         cmp.EmitFalse ();
636                         cmp.ResolveLink (tail);
637                         cmp.EmitAlternationEnd();
638                 }
639
640                 public override void GetWidth (out int min, out int max) {
641                         GetWidth (out min, out max, Alternatives.Count);
642                 }
643
644                 public override bool IsComplex () {
645                         bool comp = false;
646                         foreach (Expression e in Alternatives) {
647                                 comp |= e.IsComplex ();
648                         }
649
650                         return comp | GetFixedWidth () <= 0;
651                 }
652         }
653
654         // terminal expressions
655
656         class Literal : Expression {
657                 public Literal (string str, bool ignore) {
658                         this.str = str;
659                         this.ignore = ignore;
660                 }
661
662                 public string String {
663                         get { return str; }
664                         set { str = value; }
665                 }
666
667                 public bool IgnoreCase {
668                         get { return ignore; }
669                         set { ignore = value; }
670                 }
671
672                 public override void Compile (ICompiler cmp, bool reverse) {
673                         if (str.Length == 0)
674                                 return;
675
676                         if (str.Length == 1)
677                                 cmp.EmitCharacter (str[0], false, ignore, reverse);
678                         else
679                                 cmp.EmitString (str, ignore, reverse);
680                 }
681
682                 public override void GetWidth (out int min, out int max) {
683                         min = max = str.Length;
684                 }
685
686                 public override AnchorInfo GetAnchorInfo (bool reverse) {
687                         return new AnchorInfo (this, 0, str.Length, str, ignore);
688                 }
689
690                 public override bool IsComplex () {
691                         return false;
692                 }
693
694                 private string str;
695                 private bool ignore;
696         }
697
698         class PositionAssertion : Expression {
699                 public PositionAssertion (Position pos) {
700                         this.pos = pos;
701                 }
702
703                 public Position Position {
704                         get { return pos; }
705                         set { pos = value; }
706                 }
707
708                 public override void Compile (ICompiler cmp, bool reverse) {
709                         cmp.EmitPosition (pos);
710                 }
711
712                 public override void GetWidth (out int min, out int max) {
713                         min = max = 0;
714                 }
715
716                 public override bool IsComplex () {
717                         return false;
718                 }
719
720                 public override AnchorInfo GetAnchorInfo (bool revers) {
721                         switch (pos) {
722                         case Position.StartOfString: case Position.StartOfLine: case Position.StartOfScan:
723                                 return new AnchorInfo (this, 0, 0, pos);
724
725                         default:
726                                 return new AnchorInfo (this, 0);
727                         }
728                 }
729
730                 private Position pos;
731         }
732
733         class Reference : Expression {
734                 public Reference (bool ignore) {
735                         this.ignore = ignore;
736                 }
737
738                 public CapturingGroup CapturingGroup {
739                         get { return group; }
740                         set { group = value; }
741                 }
742
743                 public bool IgnoreCase {
744                         get { return ignore; }
745                         set { ignore = value; }
746                 }
747
748                 public override void Compile (ICompiler cmp, bool reverse) {
749                         cmp.EmitReference (group.Number, ignore, reverse);
750                 }
751
752                 public override void GetWidth (out int min, out int max) {
753                         //group.GetWidth (out min, out max);
754                         // TODO set width to referenced group for non-cyclical references
755                         min = 0;
756                         max = Int32.MaxValue;
757                 }
758
759                 public override bool IsComplex () {
760                         return true;    // FIXME incorporate cyclic check
761                 }
762
763                 private CapturingGroup group;
764                 private bool ignore;
765         }
766
767         class CharacterClass : Expression {
768                 public CharacterClass (bool negate, bool ignore) {
769                         this.negate = negate;
770                         this.ignore = ignore;
771
772                         intervals = new IntervalCollection ();
773
774                         // initialize pos/neg category arrays
775
776                         int cat_size = (int) Category.LastValue;
777                         pos_cats = new BitArray (cat_size);
778                         neg_cats = new BitArray (cat_size);
779                 }
780
781                 public CharacterClass (Category cat, bool negate) : this (false, false) {
782                         this.AddCategory (cat, negate);
783                 }
784
785                 public bool Negate {
786                         get { return negate; }
787                         set { negate = value; }
788                 }
789
790                 public bool IgnoreCase {
791                         get { return ignore; }
792                         set { ignore = value; }
793                 }
794
795                 public void AddCategory (Category cat, bool negate) {
796                         int n = (int)cat;
797                         
798                         if (negate) {
799                                 neg_cats[n] = true;
800                         } else {
801                                 pos_cats[n] = true;
802                         }
803                 }
804
805                 public void AddCharacter (char c) {
806                         // TODO: this is certainly not the most efficient way of doing things 
807                         // TODO: but at least it produces correct results. 
808                         AddRange (c, c);
809                 }
810
811                 public void AddRange (char lo, char hi) {
812                         Interval new_interval = new Interval (lo, hi);
813  
814                         // ignore case is on. we must make sure our interval does not
815                         // use upper case. if it does, we must normalize the upper case
816                         // characters into lower case. 
817                         if (ignore) {
818                                 if (upper_case_characters.Intersects (new_interval)) {
819                                         Interval partial_new_interval;
820  
821                                         if (new_interval.low < upper_case_characters.low) {
822                                                 partial_new_interval = new Interval (upper_case_characters.low + distance_between_upper_and_lower_case, 
823                                                                                      new_interval.high +  distance_between_upper_and_lower_case);
824                                                 new_interval.high = upper_case_characters.low - 1;
825                                         }
826                                         else {
827                                                 partial_new_interval = new Interval (new_interval.low + distance_between_upper_and_lower_case, 
828                                                                                      upper_case_characters.high + distance_between_upper_and_lower_case);
829                                                 new_interval.low = upper_case_characters.high + 1;
830                                         }
831                                         intervals.Add (partial_new_interval);
832                                 }
833                                 else if (upper_case_characters.Contains (new_interval)) {
834                                         new_interval.high += distance_between_upper_and_lower_case;
835                                         new_interval.low += distance_between_upper_and_lower_case;
836                                 }
837                         }
838                         intervals.Add (new_interval);
839                 }
840
841                 public override void Compile (ICompiler cmp, bool reverse) {
842                         // create the meta-collection
843                         IntervalCollection meta =
844                                 intervals.GetMetaCollection (new IntervalCollection.CostDelegate (GetIntervalCost));
845
846                         // count ops
847                         
848                         int count = meta.Count;
849                         for (int i = 0; i < pos_cats.Length; ++ i) {
850                                 if (pos_cats[i]) ++ count;
851                                 if (neg_cats[i]) ++ count;
852                         }
853
854                         if (count == 0)
855                                 return;
856
857                         // emit in op for |meta| > 1
858
859                         LinkRef tail = cmp.NewLink ();
860                         if (count > 1)
861                                 cmp.EmitIn (tail);
862                                 
863
864                         // emit categories
865
866                         for (int i = 0; i < pos_cats.Length; ++ i) {
867                                 if (pos_cats[i])
868                                         cmp.EmitCategory ((Category)i, negate, reverse);
869                                 else if (neg_cats[i])
870                                         cmp.EmitCategory ((Category)i, !negate, reverse);
871                         }
872
873                         // emit character/range/sets from meta-collection
874
875                         foreach (Interval a in meta) {
876                                 if (a.IsDiscontiguous) {                        // Set
877                                         BitArray bits = new BitArray (a.Size);
878                                         foreach (Interval b in intervals) {
879                                                 if (a.Contains (b)) {
880                                                         for (int i = b.low; i <= b.high; ++ i)
881                                                                 bits[i - a.low] = true;
882                                                 }
883                                         }
884
885                                         cmp.EmitSet ((char)a.low, bits, negate, ignore, reverse);
886                                 }
887                                 else if (a.IsSingleton)                         // Character
888                                         cmp.EmitCharacter ((char)a.low, negate, ignore, reverse);
889                                 else                                            // Range
890                                         cmp.EmitRange ((char)a.low, (char)a.high, negate, ignore, reverse);
891                         }
892                         
893                         // finish up
894
895                         if (count > 1) {
896                                 if (negate)
897                                         cmp.EmitTrue ();
898                                 else
899                                         cmp.EmitFalse ();
900
901                                 cmp.ResolveLink (tail);
902                         }
903                 }
904
905                 public override void GetWidth (out int min, out int max) {
906                         min = max = 1;
907                 }
908
909                 public override bool IsComplex () {
910                         return false;
911                 }
912
913                 // private
914
915                 private static double GetIntervalCost (Interval i) {
916                         // use op length as cost metric (=> optimize for space)
917                 
918                         if (i.IsDiscontiguous)
919                                 return 3 + ((i.Size + 0xf) >> 4);               // Set
920                         else if (i.IsSingleton)
921                                 return 2;                                       // Character
922                         else
923                                 return 3;                                       // Range
924                 }
925
926                 private static Interval upper_case_characters = new Interval ((char)65, (char)90);
927                 private const int distance_between_upper_and_lower_case = 32;
928                 private bool negate, ignore;
929                 private BitArray pos_cats, neg_cats;
930                 private IntervalCollection intervals;
931         }
932
933         class AnchorInfo {
934                 private Expression expr;
935
936                 private Position pos;
937                 private int offset;
938
939                 private string str;
940                 private int width;
941                 private bool ignore;
942
943                 public AnchorInfo (Expression expr, int width) {
944                         this.expr = expr;
945                         this.offset = 0;
946                         this.width = width;
947
948                         this.str = null;
949                         this.ignore = false;
950                         this.pos = Position.Any;
951                 }
952                 
953                 public AnchorInfo (Expression expr, int offset, int width, string str, bool ignore) {
954                         this.expr = expr;
955                         this.offset = offset;
956                         this.width = width;
957
958                         this.str = ignore ? str.ToLower () : str;
959
960                         this.ignore = ignore;
961                         this.pos = Position.Any;
962                 }
963
964                 public AnchorInfo (Expression expr, int offset, int width, Position pos) {
965                         this.expr = expr;
966                         this.offset = offset;
967                         this.width = width;
968
969                         this.pos = pos;
970
971                         this.str = null;
972                         this.ignore = false;
973                 }
974
975                 public Expression Expression {
976                         get { return expr; }
977                 }
978
979                 public int Offset {
980                         get { return offset; }
981                 }
982
983                 public int Width {
984                         get { return width; }
985                 }
986
987                 public int Length {
988                         get { return (str != null) ? str.Length : 0; }
989                 }
990
991                 public bool IsUnknownWidth {
992                         get { return width < 0; }
993                 }
994
995                 public bool IsComplete {
996                         get { return Length == Width; }
997                 }
998
999                 public string Substring {
1000                         get { return str; }
1001                 }
1002
1003                 public bool IgnoreCase {
1004                         get { return ignore; }
1005                 }
1006
1007                 public Position Position {
1008                         get { return pos; }
1009                 }
1010
1011                 public bool IsSubstring {
1012                         get { return str != null; }
1013                 }
1014
1015                 public bool IsPosition {
1016                         get { return pos != Position.Any; }
1017                 }
1018
1019                 public Interval GetInterval () {
1020                         return GetInterval (0);
1021                 }
1022
1023                 public Interval GetInterval (int start) {
1024                         if (!IsSubstring)
1025                                 return Interval.Empty;
1026
1027                         return new Interval (start + Offset, start + Offset + Length - 1);
1028                 }
1029         }
1030 }
1031