2004-04-19 Gonzalo Paniagua Javier <gonzalo@ximian.com>
[mono.git] / mcs / class / System / System.Text.RegularExpressions / syntax.cs
1 //
2 // assembly:    System
3 // namespace:   System.Text.RegularExpressions
4 // file:        syntax.cs
5 //
6 // author:      Dan Lewis (dlewis@gmx.co.uk)
7 //              (c) 2002
8
9 using System;
10 using System.Collections;
11
12 namespace System.Text.RegularExpressions.Syntax {
13         // collection classes
14         
15         class ExpressionCollection : CollectionBase {
16                 public void Add (Expression e) {
17                         List.Add (e);
18                 }
19
20                 public Expression this[int i] {
21                         get { return (Expression)List[i]; }
22                         set { List[i] = value; }
23                 }
24
25                 protected override void OnValidate (object o) {
26                         // allow null elements
27                 }
28         }
29
30         // abstract classes
31         
32         abstract class Expression {
33                 public abstract void Compile (ICompiler cmp, bool reverse);
34                 public abstract void GetWidth (out int min, out int max);
35
36                 public int GetFixedWidth () {
37                         int min, max;
38                         GetWidth (out min, out max);
39
40                         if (min == max)
41                                 return min;
42
43                         return -1;
44                 }
45
46                 public virtual AnchorInfo GetAnchorInfo (bool reverse) {
47                         return new AnchorInfo (this, GetFixedWidth ());
48                 }
49
50                 public virtual bool IsComplex () {
51                         return true;
52                 }
53         }
54
55         // composite expressions
56         
57         abstract class CompositeExpression : Expression {
58                 public CompositeExpression () {
59                         expressions = new ExpressionCollection ();
60                 }
61
62                 protected ExpressionCollection Expressions {
63                         get { return expressions; }
64                 }
65
66                 protected void GetWidth (out int min, out int max, int count) {
67                         min = Int32.MaxValue;
68                         max = 0;
69                         bool empty = true;
70
71                         for (int i = 0; i < count; ++ i) {
72                                 Expression e = Expressions[i];
73                                 if (e == null)
74                                         continue;
75                         
76                                 empty = false;
77                                 int a, b;
78                                 e.GetWidth (out a, out b);
79                                 if (a < min) min = a;
80                                 if (b > max) max = b;
81                         }
82
83                         if (empty)
84                                 min = max = 0;
85                 }
86
87                 private ExpressionCollection expressions;
88         }
89
90         // groups
91         
92         class Group : CompositeExpression {
93                 public Group () {
94                 }
95
96                 public Expression Expression {
97                         get { return Expressions[0]; }
98                         set { Expressions[0] = value; }
99                 }
100
101                 public void AppendExpression (Expression e) {
102                         Expressions.Add (e);
103                 }
104
105                 public override void Compile (ICompiler cmp, bool reverse) {
106                         int count = Expressions.Count;
107                         for (int i = 0; i < count; ++ i) {
108                                 Expression e;
109                                 if (reverse)
110                                         e = Expressions[count - i - 1];
111                                 else
112                                         e = Expressions[i];
113
114                                 e.Compile (cmp, reverse);
115                         }
116                 }
117
118                 public override void GetWidth (out int min, out int max) {
119                         min = 0;
120                         max = 0;
121
122                         foreach (Expression e in Expressions) {
123                                 int a, b;
124                                 e.GetWidth (out a, out b);
125                                 min += a;
126                                 if (max == Int32.MaxValue || b == Int32.MaxValue)
127                                         max = Int32.MaxValue;
128                                 else
129                                         max += b;
130                         }
131                 }
132
133                 public override AnchorInfo GetAnchorInfo (bool reverse) {
134                         int ptr;
135                         int width = GetFixedWidth ();
136
137                         ArrayList infos = new ArrayList ();
138                         IntervalCollection segments = new IntervalCollection ();
139
140                         // accumulate segments
141
142                         ptr = 0;
143                         //foreach (Expression e in Expressions) {
144                         int count = Expressions.Count;
145                         for (int i = 0; i < count; ++ i) {
146                                 Expression e;
147                                 if (reverse)
148                                         e = Expressions [count - i - 1];
149                                 else
150                                         e = Expressions [i];            
151                                 
152                                 AnchorInfo info = e.GetAnchorInfo (reverse);
153                                 infos.Add (info);
154
155                                 if (info.IsPosition)
156                                         return new AnchorInfo (this, ptr + info.Offset, width, info.Position);
157
158                                 if (info.IsSubstring)
159                                         segments.Add (info.GetInterval (ptr));
160
161                                 if (info.IsUnknownWidth)
162                                         break;
163
164                                 ptr += info.Width;
165                         }
166
167                         // normalize and find the longest segment
168
169                         segments.Normalize ();
170
171                         Interval longest = Interval.Empty;
172                         foreach (Interval segment in segments) {
173                                 if (segment.Size > longest.Size)
174                                         longest = segment;
175                         }
176
177                         // now chain the substrings that made this segment together
178
179                         if (!longest.IsEmpty) {
180                                 string str = "";
181                                 ArrayList strs = new ArrayList();
182                                 bool ignore = false;
183
184                                 ptr = 0;
185                                 
186                                 //foreach (AnchorInfo info in infos) {
187                                 for (int i = 0; i < infos.Count; ++ i) {
188                                         AnchorInfo info;
189
190                                         info = (AnchorInfo)infos[i];            
191                                         
192                                         if (info.IsSubstring && longest.Contains (info.GetInterval (ptr))) {
193                                                 //str += info.Substring;        // TODO mark subexpressions
194                                                 strs.Add(info.Substring);
195                                                 ignore |= info.IgnoreCase;
196                                         }
197
198                                         
199                                                 if (info.IsUnknownWidth)                                                        
200                                                         break;
201                                         
202                                                 ptr += info.Width;
203                                 }       
204                                         
205                                 for (int i = 0; i< strs.Count; ++i)
206                                 {
207                                         if (reverse)
208                                                 str += strs [strs.Count - i - 1];
209                                         else
210                                                 str += strs [i];
211                                                         
212                                         
213                                 }
214                         
215
216                                 return new AnchorInfo (this, longest.low, width, str, ignore);
217                         }
218
219                         return new AnchorInfo (this, width);
220                 }
221
222                 public override bool IsComplex () {
223                         bool comp = false;
224                         foreach (Expression e in Expressions) {
225                                 comp |= e.IsComplex ();
226                         }
227
228                         return comp | GetFixedWidth () <= 0;
229                 }
230         }
231
232         class RegularExpression : Group {
233                 public RegularExpression () {
234                         group_count = 0;
235                 }
236
237                 public int GroupCount {
238                         get { return group_count; }
239                         set { group_count = value; }
240                 }
241
242                 public override void Compile (ICompiler cmp, bool reverse) {
243                         // info block
244
245                         int min, max;
246                         GetWidth (out min, out max);
247                         cmp.EmitInfo (group_count, min, max);
248
249                         // anchoring expression
250
251                         AnchorInfo info = GetAnchorInfo (reverse);
252                         //if (reverse)
253                         //      info = new AnchorInfo (this, GetFixedWidth ()); // FIXME
254
255                         LinkRef pattern = cmp.NewLink ();
256                         cmp.EmitAnchor (reverse, info.Offset, pattern);
257
258                         if (info.IsPosition)
259                                 cmp.EmitPosition (info.Position);
260                         else if (info.IsSubstring)
261                                 cmp.EmitString (info.Substring, info.IgnoreCase, reverse);
262                         
263                         cmp.EmitTrue ();
264                         
265                         // pattern
266
267                         cmp.ResolveLink (pattern);
268                         base.Compile (cmp, reverse);
269                         cmp.EmitTrue ();
270                 }
271
272                 private int group_count;
273         }
274
275         class CapturingGroup : Group {
276                 public CapturingGroup () {
277                         this.gid = 0;
278                         this.name = null;
279                 }
280
281                 public int Number {
282                         get { return gid; }
283                         set { gid = value; }
284                 }
285
286                 public string Name {
287                         get { return name; }
288                         set { name = value; }
289                 }
290
291                 public bool IsNamed {
292                         get { return name != null; }
293                 }
294
295                 public override void Compile (ICompiler cmp, bool reverse) {
296                         cmp.EmitOpen (gid);
297                         base.Compile (cmp, reverse);
298                         cmp.EmitClose (gid);
299                 }
300
301                 public override bool IsComplex () {
302                         return true;
303                 }
304
305                 private int gid;
306                 private string name;
307         }
308
309         class BalancingGroup : CapturingGroup {
310                 public BalancingGroup () {
311                         this.balance = null;
312                 }
313
314                 public CapturingGroup Balance {
315                         get { return balance; }
316                         set { balance = value; }
317                 }
318
319                 public override void Compile (ICompiler cmp, bool reverse) {
320                         // can't invoke Group.Compile from here :(
321                         // so I'll just repeat the code
322                 
323                         int count = Expressions.Count;
324                         for (int i = 0; i < count; ++ i) {
325                                 Expression e;
326                                 if (reverse)
327                                         e = Expressions[count - i - 1];
328                                 else
329                                         e = Expressions[i];
330
331                                 e.Compile (cmp, reverse);
332                         }
333
334                         cmp.EmitBalance (this.Number, balance.Number);
335                 }
336
337                 private CapturingGroup balance;
338         }
339
340         class NonBacktrackingGroup : Group {
341                 public NonBacktrackingGroup () {
342                 }
343
344                 public override void Compile (ICompiler cmp, bool reverse) {
345                         LinkRef tail = cmp.NewLink ();
346
347                         cmp.EmitSub (tail);
348                         base.Compile (cmp, reverse);
349                         cmp.EmitTrue ();
350                         cmp.ResolveLink (tail);
351                 }
352
353                 public override bool IsComplex () {
354                         return true;
355                 }
356         }
357
358         // repetition
359
360         class Repetition : CompositeExpression {
361                 public Repetition (int min, int max, bool lazy) {
362                         Expressions.Add (null);
363                         
364                         this.min = min;
365                         this.max = max;
366                         this.lazy = lazy;
367                 }
368
369                 public Expression Expression {
370                         get { return Expressions[0]; }
371                         set { Expressions[0] = value; }
372                 }
373
374                 public int Minimum {
375                         get { return min; }
376                         set { min = value; }
377                 }
378
379                 public int Maximum {
380                         get { return max; }
381                         set { max = value; }
382                 }
383
384                 public bool Lazy {
385                         get { return lazy; }
386                         set { lazy = value; }
387                 }
388
389                 public override void Compile (ICompiler cmp, bool reverse) {
390                         if (Expression.IsComplex ()) {
391                                 LinkRef until = cmp.NewLink ();
392                                 
393                                 cmp.EmitRepeat (min, max, lazy, until);
394                                 Expression.Compile (cmp, reverse);
395                                 cmp.EmitUntil (until);
396                         }
397                         else {
398                                 LinkRef tail = cmp.NewLink ();
399
400                                 cmp.EmitFastRepeat (min, max, lazy, tail);
401                                 Expression.Compile (cmp, reverse);
402                                 cmp.EmitTrue ();
403                                 cmp.ResolveLink (tail);
404                         }
405                 }
406
407                 public override void GetWidth (out int min, out int max) {
408                         Expression.GetWidth (out min, out max);
409                         min = min * this.min;
410                         if (max == Int32.MaxValue || this.max == 0xffff)
411                                 max = Int32.MaxValue;
412                         else
413                                 max = max * this.max;
414                 }
415
416                 public override AnchorInfo GetAnchorInfo (bool reverse) {
417                         int width = GetFixedWidth ();
418                         if (Minimum == 0)
419                                 return new AnchorInfo (this, width);
420                         
421                         AnchorInfo info = Expression.GetAnchorInfo (reverse);
422                         if (info.IsPosition)
423                                 return new AnchorInfo (this, info.Offset, width, info.Position);
424                         
425                         if (info.IsSubstring) {
426                                 if (info.IsComplete) {
427                                         string str = "";
428                                         for (int i = 0; i < Minimum; ++ i)
429                                                 str += info.Substring;
430
431                                         return new AnchorInfo (this, 0, width, str, info.IgnoreCase);
432                                 }
433
434                                 return new AnchorInfo (this, info.Offset, width, info.Substring, info.IgnoreCase);
435                         }
436
437                         return new AnchorInfo (this, width);
438                 }
439
440                 private int min, max;
441                 private bool lazy;
442         }
443
444         // assertions
445
446         abstract class Assertion : CompositeExpression {
447                 public Assertion () {
448                         Expressions.Add (null);         // true expression
449                         Expressions.Add (null);         // false expression
450                 }
451
452                 public Expression TrueExpression {
453                         get { return Expressions[0]; }
454                         set { Expressions[0] = value; }
455                 }
456
457                 public Expression FalseExpression {
458                         get { return Expressions[1]; }
459                         set { Expressions[1] = value; }
460                 }
461
462                 public override void GetWidth (out int min, out int max) {
463                         GetWidth (out min, out max, 2);
464
465                         if (TrueExpression == null || FalseExpression == null)
466                                 min = 0;
467                 }
468         }
469
470         class CaptureAssertion : Assertion {
471                 public CaptureAssertion () {
472                 }
473
474                 public CapturingGroup CapturingGroup {
475                         get { return group; }
476                         set { group = value; }
477                 }
478
479                 public override void Compile (ICompiler cmp, bool reverse) {
480                         int gid = group.Number;
481                         LinkRef tail = cmp.NewLink ();
482
483                         if (FalseExpression == null) {
484                                 //    IfDefined :1
485                                 //      <yes_exp>
486                                 // 1: <tail>
487                         
488                                 cmp.EmitIfDefined (gid, tail);
489                                 TrueExpression.Compile (cmp, reverse);
490                         }
491                         else {
492                                 //    IfDefined :1
493                                 //      <yes_expr>
494                                 //      Jump :2
495                                 // 1:   <no_expr>
496                                 // 2: <tail>
497                         
498                                 LinkRef false_expr = cmp.NewLink ();
499                                 cmp.EmitIfDefined (gid, false_expr);
500                                 TrueExpression.Compile (cmp, reverse);
501                                 cmp.EmitJump (tail);
502                                 cmp.ResolveLink (false_expr);
503                                 FalseExpression.Compile (cmp, reverse);
504                         }
505
506                         cmp.ResolveLink (tail);
507                 }
508
509                 public override bool IsComplex () {
510                         bool comp = false;
511                         if (TrueExpression != null)
512                                 comp |= TrueExpression.IsComplex ();
513                         if (FalseExpression != null)
514                                 comp |= FalseExpression.IsComplex ();
515
516                         return comp | GetFixedWidth () <= 0;
517                 }
518
519                 private CapturingGroup group;
520         }
521
522         class ExpressionAssertion : Assertion {
523                 public ExpressionAssertion () {
524                         Expressions.Add (null);         // test expression
525                 }
526
527                 public bool Reverse {
528                         get { return reverse; }
529                         set { reverse = value; }
530                 }
531
532                 public bool Negate {
533                         get { return negate; }
534                         set { negate = value; }
535                 }
536
537                 public Expression TestExpression {
538                         get { return Expressions[2]; }
539                         set { Expressions[2] = value; }
540                 }
541
542                 public override void Compile (ICompiler cmp, bool reverse) {
543                         LinkRef true_expr = cmp.NewLink ();
544                         LinkRef false_expr = cmp.NewLink ();
545
546                         // test op: positive / negative
547
548                         if (!negate)
549                                 cmp.EmitTest (true_expr, false_expr);
550                         else
551                                 cmp.EmitTest (false_expr, true_expr);
552                         
553                         // test expression: lookahead / lookbehind
554
555                         TestExpression.Compile (cmp, this.reverse);
556                         cmp.EmitTrue ();
557
558                         // target expressions
559
560                         if (TrueExpression == null) {                   // (?= ...)
561                                 //    Test :1, :2
562                                 //      <test_expr>
563                                 // :2   False
564                                 // :1   <tail>
565                         
566                                 cmp.ResolveLink (false_expr);
567                                 cmp.EmitFalse ();
568                                 cmp.ResolveLink (true_expr);
569                         }
570                         else {
571                                 cmp.ResolveLink (true_expr);
572                                 TrueExpression.Compile (cmp, reverse);
573                                 
574                                 if (FalseExpression == null) {          // (?(...) ...)
575                                         //    Test :1, :2
576                                         //      <test_expr>
577                                         // :1   <yes_expr>
578                                         // :2   <tail>
579
580                                         cmp.ResolveLink (false_expr);
581                                 }
582                                 else {                                  // (?(...) ... | ...)
583                                         //    Test :1, :2
584                                         //      <test_expr>
585                                         // :1   <yes_expr>
586                                         //      Jump :3
587                                         // :2   <no_expr>
588                                         // :3   <tail>
589                                 
590                                         LinkRef tail = cmp.NewLink ();
591                                 
592                                         cmp.EmitJump (tail);
593                                         cmp.ResolveLink (false_expr);
594                                         FalseExpression.Compile (cmp, reverse);
595                                         cmp.ResolveLink (tail);
596                                 }
597                         }
598                 }
599
600                 private bool reverse, negate;
601         }
602
603         // alternation
604
605         class Alternation : CompositeExpression {
606                 public Alternation () {
607                 }
608
609                 public ExpressionCollection Alternatives {
610                         get { return Expressions; }
611                 }
612
613                 public void AddAlternative (Expression e) {
614                         Alternatives.Add (e);
615                 }
616
617                 public override void Compile (ICompiler cmp, bool reverse) {
618                         //                      LinkRef next = cmp.NewLink ();
619                         LinkRef tail = cmp.NewLink ();
620                 
621                         foreach (Expression e in Alternatives) {
622                                 LinkRef next = cmp.NewLink ();
623                                 cmp.EmitBranch (next);
624                                 e.Compile (cmp, reverse);
625                                 cmp.EmitJump (tail);
626                                 cmp.ResolveLink (next);
627                                 cmp.EmitBranchEnd();
628                         }
629
630                         cmp.EmitFalse ();
631                         cmp.ResolveLink (tail);
632                         cmp.EmitAlternationEnd();
633                 }
634
635                 public override void GetWidth (out int min, out int max) {
636                         GetWidth (out min, out max, Alternatives.Count);
637                 }
638
639                 public override bool IsComplex () {
640                         bool comp = false;
641                         foreach (Expression e in Alternatives) {
642                                 comp |= e.IsComplex ();
643                         }
644
645                         return comp | GetFixedWidth () <= 0;
646                 }
647         }
648
649         // terminal expressions
650
651         class Literal : Expression {
652                 public Literal (string str, bool ignore) {
653                         this.str = str;
654                         this.ignore = ignore;
655                 }
656
657                 public string String {
658                         get { return str; }
659                         set { str = value; }
660                 }
661
662                 public bool IgnoreCase {
663                         get { return ignore; }
664                         set { ignore = value; }
665                 }
666
667                 public override void Compile (ICompiler cmp, bool reverse) {
668                         if (str.Length == 0)
669                                 return;
670
671                         if (str.Length == 1)
672                                 cmp.EmitCharacter (str[0], false, ignore, reverse);
673                         else
674                                 cmp.EmitString (str, ignore, reverse);
675                 }
676
677                 public override void GetWidth (out int min, out int max) {
678                         min = max = str.Length;
679                 }
680
681                 public override AnchorInfo GetAnchorInfo (bool reverse) {
682                         return new AnchorInfo (this, 0, str.Length, str, ignore);
683                 }
684
685                 public override bool IsComplex () {
686                         return false;
687                 }
688
689                 private string str;
690                 private bool ignore;
691         }
692
693         class PositionAssertion : Expression {
694                 public PositionAssertion (Position pos) {
695                         this.pos = pos;
696                 }
697
698                 public Position Position {
699                         get { return pos; }
700                         set { pos = value; }
701                 }
702
703                 public override void Compile (ICompiler cmp, bool reverse) {
704                         cmp.EmitPosition (pos);
705                 }
706
707                 public override void GetWidth (out int min, out int max) {
708                         min = max = 0;
709                 }
710
711                 public override bool IsComplex () {
712                         return false;
713                 }
714
715                 public override AnchorInfo GetAnchorInfo (bool revers) {
716                         switch (pos) {
717                         case Position.StartOfString: case Position.StartOfLine: case Position.StartOfScan:
718                                 return new AnchorInfo (this, 0, 0, pos);
719
720                         default:
721                                 return new AnchorInfo (this, 0);
722                         }
723                 }
724
725                 private Position pos;
726         }
727
728         class Reference : Expression {
729                 public Reference (bool ignore) {
730                         this.ignore = ignore;
731                 }
732
733                 public CapturingGroup CapturingGroup {
734                         get { return group; }
735                         set { group = value; }
736                 }
737
738                 public bool IgnoreCase {
739                         get { return ignore; }
740                         set { ignore = value; }
741                 }
742
743                 public override void Compile (ICompiler cmp, bool reverse) {
744                         cmp.EmitReference (group.Number, ignore, reverse);
745                 }
746
747                 public override void GetWidth (out int min, out int max) {
748                         //group.GetWidth (out min, out max);
749                         // TODO set width to referenced group for non-cyclical references
750                         min = 0;
751                         max = Int32.MaxValue;
752                 }
753
754                 public override bool IsComplex () {
755                         return true;    // FIXME incorporate cyclic check
756                 }
757
758                 private CapturingGroup group;
759                 private bool ignore;
760         }
761
762         class CharacterClass : Expression {
763                 public CharacterClass (bool negate, bool ignore) {
764                         this.negate = negate;
765                         this.ignore = ignore;
766
767                         intervals = new IntervalCollection ();
768
769                         // initialize pos/neg category arrays
770
771                         Array cat_values = Enum.GetValues (typeof (Category));
772                         int cat_size = (int)(Category)cat_values.GetValue (cat_values.Length - 1) + 1;
773                         pos_cats = new bool[cat_size];
774                         neg_cats = new bool[cat_size];
775                         for (int i = 0; i < cat_size; ++ i) {
776                                 pos_cats[i] = false;
777                                 neg_cats[i] = false;
778                         }
779                 }
780
781                 public CharacterClass (Category cat, bool negate) : this (false, false) {
782                         this.AddCategory (cat, negate);
783                 }
784
785                 public bool Negate {
786                         get { return negate; }
787                         set { negate = value; }
788                 }
789
790                 public bool IgnoreCase {
791                         get { return ignore; }
792                         set { ignore = value; }
793                 }
794
795                 public void AddCategory (Category cat, bool negate) {
796                         int n = (int)cat;
797                         
798                         if (negate) {
799                                 if (pos_cats[n])
800                                         pos_cats[n] = false;
801
802                                 neg_cats[n] = true;
803                         }
804                         else {
805                                 if (neg_cats[n])
806                                         neg_cats[n] = false;
807
808                                 pos_cats[n] = true;
809                         }
810                 }
811
812                 public void AddCharacter (char c) {
813                         // TODO: this is certainly not the most efficient way of doing things 
814                         // TODO: but at least it produces correct results. 
815                         AddRange (c, c);
816                 }
817
818                 public void AddRange (char lo, char hi) {
819                         Interval new_interval = new Interval (lo, hi);
820  
821                         // ignore case is on. we must make sure our interval does not
822                         // use upper case. if it does, we must normalize the upper case
823                         // characters into lower case. 
824                         if (ignore) {
825                                 if (upper_case_characters.Intersects (new_interval)) {
826                                         Interval partial_new_interval;
827  
828                                         if (new_interval.low < upper_case_characters.low) {
829                                                 partial_new_interval = new Interval (upper_case_characters.low + distance_between_upper_and_lower_case, 
830                                                                                      new_interval.high +  distance_between_upper_and_lower_case);
831                                                 new_interval.high = upper_case_characters.low - 1;
832                                         }
833                                         else {
834                                                 partial_new_interval = new Interval (new_interval.low + distance_between_upper_and_lower_case, 
835                                                                                      upper_case_characters.high + distance_between_upper_and_lower_case);
836                                                 new_interval.low = upper_case_characters.high + 1;
837                                         }
838                                         intervals.Add (partial_new_interval);
839                                 }
840                                 else if (upper_case_characters.Contains (new_interval)) {
841                                         new_interval.high += distance_between_upper_and_lower_case;
842                                         new_interval.low += distance_between_upper_and_lower_case;
843                                 }
844                         }
845                         intervals.Add (new_interval);
846                 }
847
848                 public override void Compile (ICompiler cmp, bool reverse) {
849                         // create the meta-collection
850                         IntervalCollection meta =
851                                 intervals.GetMetaCollection (new IntervalCollection.CostDelegate (GetIntervalCost));
852
853                         // count ops
854                         
855                         int count = meta.Count;
856                         for (int i = 0; i < pos_cats.Length; ++ i) {
857                                 if (pos_cats[i]) ++ count;
858                                 if (neg_cats[i]) ++ count;
859                         }
860
861                         if (count == 0)
862                                 return;
863
864                         // emit in op for |meta| > 1
865
866                         LinkRef tail = cmp.NewLink ();
867                         if (count > 1)
868                                 cmp.EmitIn (tail);
869                                 
870
871                         // emit categories
872
873                         for (int i = 0; i < pos_cats.Length; ++ i) {
874                                 if (pos_cats[i])
875                                         cmp.EmitCategory ((Category)i, negate, reverse);
876                                 else if (neg_cats[i])
877                                         cmp.EmitCategory ((Category)i, !negate, reverse);
878                         }
879
880                         // emit character/range/sets from meta-collection
881
882                         foreach (Interval a in meta) {
883                                 if (a.IsDiscontiguous) {                        // Set
884                                         BitArray bits = new BitArray (a.Size);
885                                         foreach (Interval b in intervals) {
886                                                 if (a.Contains (b)) {
887                                                         for (int i = b.low; i <= b.high; ++ i)
888                                                                 bits[i - a.low] = true;
889                                                 }
890                                         }
891
892                                         cmp.EmitSet ((char)a.low, bits, negate, ignore, reverse);
893                                 }
894                                 else if (a.IsSingleton)                         // Character
895                                         cmp.EmitCharacter ((char)a.low, negate, ignore, reverse);
896                                 else                                            // Range
897                                         cmp.EmitRange ((char)a.low, (char)a.high, negate, ignore, reverse);
898                         }
899                         
900                         // finish up
901
902                         if (count > 1) {
903                                 if (negate)
904                                         cmp.EmitTrue ();
905                                 else
906                                         cmp.EmitFalse ();
907
908                                 cmp.ResolveLink (tail);
909                         }
910                 }
911
912                 public override void GetWidth (out int min, out int max) {
913                         min = max = 1;
914                 }
915
916                 public override bool IsComplex () {
917                         return false;
918                 }
919
920                 // private
921
922                 private static double GetIntervalCost (Interval i) {
923                         // use op length as cost metric (=> optimize for space)
924                 
925                         if (i.IsDiscontiguous)
926                                 return 3 + ((i.Size + 0xf) >> 4);               // Set
927                         else if (i.IsSingleton)
928                                 return 2;                                       // Character
929                         else
930                                 return 3;                                       // Range
931                 }
932
933                 private static Interval upper_case_characters = new Interval ((char)65, (char)90);
934                 private const int distance_between_upper_and_lower_case = 32;
935                 private bool negate, ignore;
936                 private bool[] pos_cats, neg_cats;
937                 private IntervalCollection intervals;
938         }
939
940         class AnchorInfo {
941                 private Expression expr;
942
943                 private Position pos;
944                 private int offset;
945
946                 private string str;
947                 private int width;
948                 private bool ignore;
949
950                 public AnchorInfo (Expression expr, int width) {
951                         this.expr = expr;
952                         this.offset = 0;
953                         this.width = width;
954
955                         this.str = null;
956                         this.ignore = false;
957                         this.pos = Position.Any;
958                 }
959                 
960                 public AnchorInfo (Expression expr, int offset, int width, string str, bool ignore) {
961                         this.expr = expr;
962                         this.offset = offset;
963                         this.width = width;
964
965                         this.str = ignore ? str.ToLower () : str;
966
967                         this.ignore = ignore;
968                         this.pos = Position.Any;
969                 }
970
971                 public AnchorInfo (Expression expr, int offset, int width, Position pos) {
972                         this.expr = expr;
973                         this.offset = offset;
974                         this.width = width;
975
976                         this.pos = pos;
977
978                         this.str = null;
979                         this.ignore = false;
980                 }
981
982                 public Expression Expression {
983                         get { return expr; }
984                 }
985
986                 public int Offset {
987                         get { return offset; }
988                 }
989
990                 public int Width {
991                         get { return width; }
992                 }
993
994                 public int Length {
995                         get { return (str != null) ? str.Length : 0; }
996                 }
997
998                 public bool IsUnknownWidth {
999                         get { return width < 0; }
1000                 }
1001
1002                 public bool IsComplete {
1003                         get { return Length == Width; }
1004                 }
1005
1006                 public string Substring {
1007                         get { return str; }
1008                 }
1009
1010                 public bool IgnoreCase {
1011                         get { return ignore; }
1012                 }
1013
1014                 public Position Position {
1015                         get { return pos; }
1016                 }
1017
1018                 public bool IsSubstring {
1019                         get { return str != null; }
1020                 }
1021
1022                 public bool IsPosition {
1023                         get { return pos != Position.Any; }
1024                 }
1025
1026                 public Interval GetInterval () {
1027                         return GetInterval (0);
1028                 }
1029
1030                 public Interval GetInterval (int start) {
1031                         if (!IsSubstring)
1032                                 return Interval.Empty;
1033
1034                         return new Interval (start + Offset, start + Offset + Length - 1);
1035                 }
1036         }
1037 }
1038