2006-09-11 Gonzalo Paniagua Javier <gonzalo@ximian.com>
[mono.git] / mcs / class / System / System.Text.RegularExpressions / syntax.cs
1 //
2 // assembly:    System
3 // namespace:   System.Text.RegularExpressions
4 // file:        syntax.cs
5 //
6 // author:      Dan Lewis (dlewis@gmx.co.uk)
7 //              (c) 2002
8
9 //
10 // Permission is hereby granted, free of charge, to any person obtaining
11 // a copy of this software and associated documentation files (the
12 // "Software"), to deal in the Software without restriction, including
13 // without limitation the rights to use, copy, modify, merge, publish,
14 // distribute, sublicense, and/or sell copies of the Software, and to
15 // permit persons to whom the Software is furnished to do so, subject to
16 // the following conditions:
17 //
18 // The above copyright notice and this permission notice shall be
19 // included in all copies or substantial portions of the Software.
20 //
21 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
22 // EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
23 // MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
24 // NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
25 // LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
26 // OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
27 // WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
28 //
29
30 using System;
31 using System.Collections;
32
33 namespace System.Text.RegularExpressions.Syntax {
34         // collection classes
35
36         class ExpressionCollection : CollectionBase {
37                 public void Add (Expression e) {
38                         List.Add (e);
39                 }
40
41                 public Expression this[int i] {
42                         get { return (Expression)List[i]; }
43                         set { List[i] = value; }
44                 }
45
46                 protected override void OnValidate (object o) {
47                         // allow null elements
48                 }
49         }
50
51         // abstract classes
52
53         abstract class Expression {
54                 public abstract void Compile (ICompiler cmp, bool reverse);
55                 public abstract void GetWidth (out int min, out int max);
56
57                 public int GetFixedWidth () {
58                         int min, max;
59                         GetWidth (out min, out max);
60
61                         if (min == max)
62                                 return min;
63
64                         return -1;
65                 }
66
67                 public virtual AnchorInfo GetAnchorInfo (bool reverse) {
68                         return new AnchorInfo (this, GetFixedWidth ());
69                 }
70
71                 public abstract bool IsComplex ();
72         }
73
74         // composite expressions
75
76         abstract class CompositeExpression : Expression {
77                 public CompositeExpression () {
78                         expressions = new ExpressionCollection ();
79                 }
80
81                 protected ExpressionCollection Expressions {
82                         get { return expressions; }
83                 }
84
85                 protected void GetWidth (out int min, out int max, int count) {
86                         min = Int32.MaxValue;
87                         max = 0;
88                         bool empty = true;
89
90                         for (int i = 0; i < count; ++ i) {
91                                 Expression e = Expressions[i];
92                                 if (e == null)
93                                         continue;
94
95                                 empty = false;
96                                 int a, b;
97                                 e.GetWidth (out a, out b);
98                                 if (a < min) min = a;
99                                 if (b > max) max = b;
100                         }
101
102                         if (empty)
103                                 min = max = 0;
104                 }
105
106
107                 public override bool IsComplex ()
108                 {
109                         foreach (Expression e in Expressions) {
110                                 if (e.IsComplex ())
111                                         return true;
112                         }
113                         return GetFixedWidth () <= 0;
114                 }
115
116                 private ExpressionCollection expressions;
117         }
118
119         // groups
120
121         class Group : CompositeExpression {
122                 public Group () {
123                 }
124
125                 public Expression Expression {
126                         get { return Expressions[0]; }
127                         set { Expressions[0] = value; }
128                 }
129
130                 public void AppendExpression (Expression e) {
131                         Expressions.Add (e);
132                 }
133
134                 public override void Compile (ICompiler cmp, bool reverse) {
135                         int count = Expressions.Count;
136                         for (int i = 0; i < count; ++ i) {
137                                 Expression e;
138                                 if (reverse)
139                                         e = Expressions[count - i - 1];
140                                 else
141                                         e = Expressions[i];
142
143                                 e.Compile (cmp, reverse);
144                         }
145                 }
146
147                 public override void GetWidth (out int min, out int max) {
148                         min = 0;
149                         max = 0;
150
151                         foreach (Expression e in Expressions) {
152                                 int a, b;
153                                 e.GetWidth (out a, out b);
154                                 min += a;
155                                 if (max == Int32.MaxValue || b == Int32.MaxValue)
156                                         max = Int32.MaxValue;
157                                 else
158                                         max += b;
159                         }
160                 }
161
162                 public override AnchorInfo GetAnchorInfo (bool reverse)
163                 {
164                         int ptr;
165                         int width = GetFixedWidth ();
166
167                         ArrayList infos = new ArrayList ();
168                         IntervalCollection segments = new IntervalCollection ();
169
170                         // accumulate segments
171                         ptr = 0;
172                         int count = Expressions.Count;
173                         for (int i = 0; i < count; ++ i) {
174                                 Expression e;
175                                 if (reverse)
176                                         e = Expressions [count - i - 1];
177                                 else
178                                         e = Expressions [i];
179
180                                 AnchorInfo info = e.GetAnchorInfo (reverse);
181                                 infos.Add (info);
182
183                                 if (info.IsPosition)
184                                         return new AnchorInfo (this, ptr + info.Offset, width, info.Position);
185
186                                 if (info.IsSubstring)
187                                         segments.Add (info.GetInterval (ptr));
188
189                                 if (info.IsUnknownWidth)
190                                         break;
191
192                                 ptr += info.Width;
193                         }
194
195                         // normalize and find the longest segment
196                         segments.Normalize ();
197
198                         Interval longest = Interval.Empty;
199                         foreach (Interval segment in segments) {
200                                 if (segment.Size > longest.Size)
201                                         longest = segment;
202                         }
203
204                         if (longest.IsEmpty)
205                                 return new AnchorInfo (this, width);
206
207                         // now chain the substrings that made this segment together
208                         bool ignore = false;
209                         int n_strings = 0;
210
211                         ptr = 0;
212                         for (int i = 0; i < infos.Count; ++i) {
213                                 AnchorInfo info = (AnchorInfo) infos [i];
214
215                                 if (info.IsSubstring && longest.Contains (info.GetInterval (ptr))) {
216                                         ignore |= info.IgnoreCase;
217                                         infos [n_strings ++] = info;
218                                 }
219
220                                 if (info.IsUnknownWidth)
221                                         break;
222
223                                 ptr += info.Width;
224                         }
225
226                         StringBuilder sb = new StringBuilder ();
227                         for (int i = 0; i < n_strings; ++i) {
228                                 AnchorInfo info;
229                                 if (reverse)
230                                         info = (AnchorInfo) infos [n_strings - i - 1];
231                                 else
232                                         info = (AnchorInfo) infos [i];
233                                 sb.Append (info.Substring);
234                         }
235
236                         if (sb.Length == longest.Size)
237                                 return new AnchorInfo (this, longest.low, width, sb.ToString (), ignore);
238                         // were the string segments overlapping?
239                         if (sb.Length > longest.Size) {
240                                 Console.Error.WriteLine ("overlapping?");
241                                 return new AnchorInfo (this, width);
242                         }
243                         throw new SystemException ("Shouldn't happen");
244                 }
245         }
246
247         class RegularExpression : Group {
248                 public RegularExpression () {
249                         group_count = 0;
250                 }
251
252                 public int GroupCount {
253                         get { return group_count; }
254                         set { group_count = value; }
255                 }
256
257                 public override void Compile (ICompiler cmp, bool reverse) {
258                         // info block
259
260                         int min, max;
261                         GetWidth (out min, out max);
262                         cmp.EmitInfo (group_count, min, max);
263
264                         // anchoring expression
265
266                         AnchorInfo info = GetAnchorInfo (reverse);
267                         //if (reverse)
268                         //      info = new AnchorInfo (this, GetFixedWidth ()); // FIXME
269
270                         LinkRef pattern = cmp.NewLink ();
271                         cmp.EmitAnchor (reverse, info.Offset, pattern);
272
273                         if (info.IsPosition)
274                                 cmp.EmitPosition (info.Position);
275                         else if (info.IsSubstring)
276                                 cmp.EmitString (info.Substring, info.IgnoreCase, reverse);
277
278                         cmp.EmitTrue ();
279
280                         // pattern
281
282                         cmp.ResolveLink (pattern);
283                         base.Compile (cmp, reverse);
284                         cmp.EmitTrue ();
285                 }
286
287                 private int group_count;
288         }
289
290         class CapturingGroup : Group {
291                 public CapturingGroup () {
292                         this.gid = 0;
293                         this.name = null;
294                 }
295
296                 public int Number {
297                         get { return gid; }
298                         set { gid = value; }
299                 }
300
301                 public string Name {
302                         get { return name; }
303                         set { name = value; }
304                 }
305
306                 public bool IsNamed {
307                         get { return name != null; }
308                 }
309
310                 public override void Compile (ICompiler cmp, bool reverse) {
311                         cmp.EmitOpen (gid);
312                         base.Compile (cmp, reverse);
313                         cmp.EmitClose (gid);
314                 }
315
316                 public override bool IsComplex () {
317                         return true;
318                 }
319
320                 private int gid;
321                 private string name;
322         }
323
324         class BalancingGroup : CapturingGroup {
325                 public BalancingGroup () {
326                         this.balance = null;
327                 }
328
329                 public CapturingGroup Balance {
330                         get { return balance; }
331                         set { balance = value; }
332                 }
333
334                 public override void Compile (ICompiler cmp, bool reverse) {
335                         // can't invoke Group.Compile from here :(
336                         // so I'll just repeat the code
337
338                         LinkRef tail = cmp.NewLink ();
339
340                         cmp.EmitBalanceStart (this.Number, balance.Number, this.IsNamed,  tail);
341
342                         int count = Expressions.Count;
343                         for (int i = 0; i < count; ++ i) {
344                                 Expression e;
345                                 if (reverse)
346                                         e = Expressions[count - i - 1];
347                                 else
348                                         e = Expressions[i];
349
350                                 e.Compile (cmp, reverse);
351                         }
352
353                         cmp.EmitBalance ();
354                         cmp.ResolveLink(tail);
355                 }
356
357                 private CapturingGroup balance;
358         }
359
360         class NonBacktrackingGroup : Group {
361                 public NonBacktrackingGroup () {
362                 }
363
364                 public override void Compile (ICompiler cmp, bool reverse) {
365                         LinkRef tail = cmp.NewLink ();
366
367                         cmp.EmitSub (tail);
368                         base.Compile (cmp, reverse);
369                         cmp.EmitTrue ();
370                         cmp.ResolveLink (tail);
371                 }
372
373                 public override bool IsComplex () {
374                         return true;
375                 }
376         }
377
378         // repetition
379
380         class Repetition : CompositeExpression {
381                 public Repetition (int min, int max, bool lazy) {
382                         Expressions.Add (null);
383
384                         this.min = min;
385                         this.max = max;
386                         this.lazy = lazy;
387                 }
388
389                 public Expression Expression {
390                         get { return Expressions[0]; }
391                         set { Expressions[0] = value; }
392                 }
393
394                 public int Minimum {
395                         get { return min; }
396                         set { min = value; }
397                 }
398
399                 public int Maximum {
400                         get { return max; }
401                         set { max = value; }
402                 }
403
404                 public bool Lazy {
405                         get { return lazy; }
406                         set { lazy = value; }
407                 }
408
409                 public override void Compile (ICompiler cmp, bool reverse) {
410                         if (Expression.IsComplex ()) {
411                                 LinkRef until = cmp.NewLink ();
412
413                                 cmp.EmitRepeat (min, max, lazy, until);
414                                 Expression.Compile (cmp, reverse);
415                                 cmp.EmitUntil (until);
416                         }
417                         else {
418                                 LinkRef tail = cmp.NewLink ();
419
420                                 cmp.EmitFastRepeat (min, max, lazy, tail);
421                                 Expression.Compile (cmp, reverse);
422                                 cmp.EmitTrue ();
423                                 cmp.ResolveLink (tail);
424                         }
425                 }
426
427                 public override void GetWidth (out int min, out int max) {
428                         Expression.GetWidth (out min, out max);
429                         min = min * this.min;
430                         if (max == Int32.MaxValue || this.max == 0xffff)
431                                 max = Int32.MaxValue;
432                         else
433                                 max = max * this.max;
434                 }
435
436                 public override AnchorInfo GetAnchorInfo (bool reverse) {
437                         int width = GetFixedWidth ();
438                         if (Minimum == 0)
439                                 return new AnchorInfo (this, width);
440
441                         AnchorInfo info = Expression.GetAnchorInfo (reverse);
442                         if (info.IsPosition)
443                                 return new AnchorInfo (this, info.Offset, width, info.Position);
444
445                         if (info.IsSubstring) {
446                                 if (info.IsComplete) {
447                                         // Minimum > 0
448                                         string str = info.Substring;
449                                         StringBuilder sb = new StringBuilder (str);
450                                         for (int i = 1; i < Minimum; ++ i)
451                                                 sb.Append (str);
452
453                                         return new AnchorInfo (this, 0, width, sb.ToString (), info.IgnoreCase);
454                                 }
455
456                                 return new AnchorInfo (this, info.Offset, width, info.Substring, info.IgnoreCase);
457                         }
458
459                         return new AnchorInfo (this, width);
460                 }
461
462                 private int min, max;
463                 private bool lazy;
464         }
465
466         // assertions
467
468         abstract class Assertion : CompositeExpression {
469                 public Assertion () {
470                         Expressions.Add (null);         // true expression
471                         Expressions.Add (null);         // false expression
472                 }
473
474                 public Expression TrueExpression {
475                         get { return Expressions[0]; }
476                         set { Expressions[0] = value; }
477                 }
478
479                 public Expression FalseExpression {
480                         get { return Expressions[1]; }
481                         set { Expressions[1] = value; }
482                 }
483
484                 public override void GetWidth (out int min, out int max) {
485                         GetWidth (out min, out max, 2);
486
487                         if (TrueExpression == null || FalseExpression == null)
488                                 min = 0;
489                 }
490         }
491
492         class CaptureAssertion : Assertion {
493                 public CaptureAssertion () {
494                 }
495
496                 public CapturingGroup CapturingGroup {
497                         get { return group; }
498                         set { group = value; }
499                 }
500
501                 public override void Compile (ICompiler cmp, bool reverse) {
502                         int gid = group.Number;
503                         LinkRef tail = cmp.NewLink ();
504
505                         if (FalseExpression == null) {
506                                 //    IfDefined :1
507                                 //      <yes_exp>
508                                 // 1: <tail>
509
510                                 cmp.EmitIfDefined (gid, tail);
511                                 TrueExpression.Compile (cmp, reverse);
512                         }
513                         else {
514                                 //    IfDefined :1
515                                 //      <yes_expr>
516                                 //      Jump :2
517                                 // 1:   <no_expr>
518                                 // 2: <tail>
519
520                                 LinkRef false_expr = cmp.NewLink ();
521                                 cmp.EmitIfDefined (gid, false_expr);
522                                 TrueExpression.Compile (cmp, reverse);
523                                 cmp.EmitJump (tail);
524                                 cmp.ResolveLink (false_expr);
525                                 FalseExpression.Compile (cmp, reverse);
526                         }
527
528                         cmp.ResolveLink (tail);
529                 }
530
531                 public override bool IsComplex () {
532                         if (TrueExpression != null && TrueExpression.IsComplex ())
533                                 return true;
534                         if (FalseExpression != null && FalseExpression.IsComplex ())
535                                 return true;
536                         return GetFixedWidth () <= 0;
537                 }
538
539                 private CapturingGroup group;
540         }
541
542         class ExpressionAssertion : Assertion {
543                 public ExpressionAssertion () {
544                         Expressions.Add (null);         // test expression
545                 }
546
547                 public bool Reverse {
548                         get { return reverse; }
549                         set { reverse = value; }
550                 }
551
552                 public bool Negate {
553                         get { return negate; }
554                         set { negate = value; }
555                 }
556
557                 public Expression TestExpression {
558                         get { return Expressions[2]; }
559                         set { Expressions[2] = value; }
560                 }
561
562                 public override void Compile (ICompiler cmp, bool reverse) {
563                         LinkRef true_expr = cmp.NewLink ();
564                         LinkRef false_expr = cmp.NewLink ();
565
566                         // test op: positive / negative
567
568                         if (!negate)
569                                 cmp.EmitTest (true_expr, false_expr);
570                         else
571                                 cmp.EmitTest (false_expr, true_expr);
572
573                         // test expression: lookahead / lookbehind
574
575                         TestExpression.Compile (cmp, this.reverse);
576                         cmp.EmitTrue ();
577
578                         // target expressions
579
580                         if (TrueExpression == null) {                   // (?= ...)
581                                 //    Test :1, :2
582                                 //      <test_expr>
583                                 // :2   False
584                                 // :1   <tail>
585
586                                 cmp.ResolveLink (false_expr);
587                                 cmp.EmitFalse ();
588                                 cmp.ResolveLink (true_expr);
589                         }
590                         else {
591                                 cmp.ResolveLink (true_expr);
592                                 TrueExpression.Compile (cmp, reverse);
593
594                                 if (FalseExpression == null) {          // (?(...) ...)
595                                         //    Test :1, :2
596                                         //      <test_expr>
597                                         // :1   <yes_expr>
598                                         // :2   <tail>
599
600                                         cmp.ResolveLink (false_expr);
601                                 }
602                                 else {                                  // (?(...) ... | ...)
603                                         //    Test :1, :2
604                                         //      <test_expr>
605                                         // :1   <yes_expr>
606                                         //      Jump :3
607                                         // :2   <no_expr>
608                                         // :3   <tail>
609
610                                         LinkRef tail = cmp.NewLink ();
611
612                                         cmp.EmitJump (tail);
613                                         cmp.ResolveLink (false_expr);
614                                         FalseExpression.Compile (cmp, reverse);
615                                         cmp.ResolveLink (tail);
616                                 }
617                         }
618                 }
619
620                 public override bool IsComplex ()
621                 {
622                         return true;
623                 }
624
625                 private bool reverse, negate;
626         }
627
628         // alternation
629
630         class Alternation : CompositeExpression {
631                 public Alternation () {
632                 }
633
634                 public ExpressionCollection Alternatives {
635                         get { return Expressions; }
636                 }
637
638                 public void AddAlternative (Expression e) {
639                         Alternatives.Add (e);
640                 }
641
642                 public override void Compile (ICompiler cmp, bool reverse) {
643                         //                      LinkRef next = cmp.NewLink ();
644                         LinkRef tail = cmp.NewLink ();
645
646                         foreach (Expression e in Alternatives) {
647                                 LinkRef next = cmp.NewLink ();
648                                 cmp.EmitBranch (next);
649                                 e.Compile (cmp, reverse);
650                                 cmp.EmitJump (tail);
651                                 cmp.ResolveLink (next);
652                                 cmp.EmitBranchEnd();
653                         }
654
655                         cmp.EmitFalse ();
656                         cmp.ResolveLink (tail);
657                         cmp.EmitAlternationEnd();
658                 }
659
660                 public override void GetWidth (out int min, out int max) {
661                         GetWidth (out min, out max, Alternatives.Count);
662                 }
663         }
664
665         // terminal expressions
666
667         class Literal : Expression {
668                 public Literal (string str, bool ignore) {
669                         this.str = str;
670                         this.ignore = ignore;
671                 }
672
673                 public string String {
674                         get { return str; }
675                         set { str = value; }
676                 }
677
678                 public bool IgnoreCase {
679                         get { return ignore; }
680                         set { ignore = value; }
681                 }
682
683                 public override void Compile (ICompiler cmp, bool reverse) {
684                         if (str.Length == 0)
685                                 return;
686
687                         if (str.Length == 1)
688                                 cmp.EmitCharacter (str[0], false, ignore, reverse);
689                         else
690                                 cmp.EmitString (str, ignore, reverse);
691                 }
692
693                 public override void GetWidth (out int min, out int max) {
694                         min = max = str.Length;
695                 }
696
697                 public override AnchorInfo GetAnchorInfo (bool reverse) {
698                         return new AnchorInfo (this, 0, str.Length, str, ignore);
699                 }
700
701                 public override bool IsComplex () {
702                         return false;
703                 }
704
705                 private string str;
706                 private bool ignore;
707         }
708
709         class PositionAssertion : Expression {
710                 public PositionAssertion (Position pos) {
711                         this.pos = pos;
712                 }
713
714                 public Position Position {
715                         get { return pos; }
716                         set { pos = value; }
717                 }
718
719                 public override void Compile (ICompiler cmp, bool reverse) {
720                         cmp.EmitPosition (pos);
721                 }
722
723                 public override void GetWidth (out int min, out int max) {
724                         min = max = 0;
725                 }
726
727                 public override bool IsComplex () {
728                         return false;
729                 }
730
731                 public override AnchorInfo GetAnchorInfo (bool revers) {
732                         switch (pos) {
733                         case Position.StartOfString: case Position.StartOfLine: case Position.StartOfScan:
734                                 return new AnchorInfo (this, 0, 0, pos);
735
736                         default:
737                                 return new AnchorInfo (this, 0);
738                         }
739                 }
740
741                 private Position pos;
742         }
743
744         class Reference : Expression {
745                 public Reference (bool ignore) {
746                         this.ignore = ignore;
747                 }
748
749                 public CapturingGroup CapturingGroup {
750                         get { return group; }
751                         set { group = value; }
752                 }
753
754                 public bool IgnoreCase {
755                         get { return ignore; }
756                         set { ignore = value; }
757                 }
758
759                 public override void Compile (ICompiler cmp, bool reverse) {
760                         cmp.EmitReference (group.Number, ignore, reverse);
761                 }
762
763                 public override void GetWidth (out int min, out int max) {
764                         //group.GetWidth (out min, out max);
765                         // TODO set width to referenced group for non-cyclical references
766                         min = 0;
767                         max = Int32.MaxValue;
768                 }
769
770                 public override bool IsComplex () {
771                         return true;    // FIXME incorporate cyclic check
772                 }
773
774                 private CapturingGroup group;
775                 private bool ignore;
776         }
777
778         class CharacterClass : Expression {
779                 public CharacterClass (bool negate, bool ignore) {
780                         this.negate = negate;
781                         this.ignore = ignore;
782
783                         intervals = new IntervalCollection ();
784
785                         // initialize pos/neg category arrays
786
787                         int cat_size = (int) Category.LastValue;
788                         pos_cats = new BitArray (cat_size);
789                         neg_cats = new BitArray (cat_size);
790                 }
791
792                 public CharacterClass (Category cat, bool negate) : this (false, false) {
793                         this.AddCategory (cat, negate);
794                 }
795
796                 public bool Negate {
797                         get { return negate; }
798                         set { negate = value; }
799                 }
800
801                 public bool IgnoreCase {
802                         get { return ignore; }
803                         set { ignore = value; }
804                 }
805
806                 public void AddCategory (Category cat, bool negate) {
807                         int n = (int)cat;
808
809                         if (negate) {
810                                 neg_cats[n] = true;
811                         } else {
812                                 pos_cats[n] = true;
813                         }
814                 }
815
816                 public void AddCharacter (char c) {
817                         // TODO: this is certainly not the most efficient way of doing things
818                         // TODO: but at least it produces correct results.
819                         AddRange (c, c);
820                 }
821
822                 public void AddRange (char lo, char hi) {
823                         Interval new_interval = new Interval (lo, hi);
824
825                         // ignore case is on. we must make sure our interval does not
826                         // use upper case. if it does, we must normalize the upper case
827                         // characters into lower case.
828                         if (ignore) {
829                                 if (upper_case_characters.Intersects (new_interval)) {
830                                         Interval partial_new_interval;
831
832                                         if (new_interval.low < upper_case_characters.low) {
833                                                 partial_new_interval = new Interval (upper_case_characters.low + distance_between_upper_and_lower_case,
834                                                                                      new_interval.high +  distance_between_upper_and_lower_case);
835                                                 new_interval.high = upper_case_characters.low - 1;
836                                         }
837                                         else {
838                                                 partial_new_interval = new Interval (new_interval.low + distance_between_upper_and_lower_case,
839                                                                                      upper_case_characters.high + distance_between_upper_and_lower_case);
840                                                 new_interval.low = upper_case_characters.high + 1;
841                                         }
842                                         intervals.Add (partial_new_interval);
843                                 }
844                                 else if (upper_case_characters.Contains (new_interval)) {
845                                         new_interval.high += distance_between_upper_and_lower_case;
846                                         new_interval.low += distance_between_upper_and_lower_case;
847                                 }
848                         }
849                         intervals.Add (new_interval);
850                 }
851
852                 public override void Compile (ICompiler cmp, bool reverse) {
853                         // create the meta-collection
854                         IntervalCollection meta =
855                                 intervals.GetMetaCollection (new IntervalCollection.CostDelegate (GetIntervalCost));
856
857                         // count ops
858                         int count = meta.Count;
859                         for (int i = 0; i < pos_cats.Length; ++ i) {
860                                 if (pos_cats[i] || neg_cats [i])
861                                         ++ count;
862                         }
863
864                         if (count == 0)
865                                 return;
866
867                         // emit in op for |meta| > 1
868                         LinkRef tail = cmp.NewLink ();
869                         if (count > 1)
870                                 cmp.EmitIn (tail);
871
872                         // emit character/range/sets from meta-collection
873                         // we emit these first so that any 'ignore' flags will be noticed by the evaluator
874                         foreach (Interval a in meta) {
875                                 if (a.IsDiscontiguous) {                        // Set
876                                         BitArray bits = new BitArray (a.Size);
877                                         foreach (Interval b in intervals) {
878                                                 if (a.Contains (b)) {
879                                                         for (int i = b.low; i <= b.high; ++ i)
880                                                                 bits[i - a.low] = true;
881                                                 }
882                                         }
883
884                                         cmp.EmitSet ((char)a.low, bits, negate, ignore, reverse);
885                                 }
886                                 else if (a.IsSingleton)                         // Character
887                                         cmp.EmitCharacter ((char)a.low, negate, ignore, reverse);
888                                 else                                            // Range
889                                         cmp.EmitRange ((char)a.low, (char)a.high, negate, ignore, reverse);
890                         }
891
892                         // emit categories
893                         for (int i = 0; i < pos_cats.Length; ++ i) {
894                                 if (pos_cats[i]) {
895                                         if (neg_cats [i])
896                                                 cmp.EmitCategory (Category.AnySingleline, negate, reverse);
897                                         else
898                                                 cmp.EmitCategory ((Category)i, negate, reverse);
899                                 } else if (neg_cats[i]) {
900                                         cmp.EmitNotCategory ((Category)i, negate, reverse);
901                                 }
902                         }
903
904                         // finish up
905                         if (count > 1) {
906                                 if (negate)
907                                         cmp.EmitTrue ();
908                                 else
909                                         cmp.EmitFalse ();
910
911                                 cmp.ResolveLink (tail);
912                         }
913                 }
914
915                 public override void GetWidth (out int min, out int max) {
916                         min = max = 1;
917                 }
918
919                 public override bool IsComplex () {
920                         return false;
921                 }
922
923                 // private
924
925                 private static double GetIntervalCost (Interval i) {
926                         // use op length as cost metric (=> optimize for space)
927
928                         if (i.IsDiscontiguous)
929                                 return 3 + ((i.Size + 0xf) >> 4);               // Set
930                         else if (i.IsSingleton)
931                                 return 2;                                       // Character
932                         else
933                                 return 3;                                       // Range
934                 }
935
936                 private static Interval upper_case_characters = new Interval ((char)65, (char)90);
937                 private const int distance_between_upper_and_lower_case = 32;
938                 private bool negate, ignore;
939                 private BitArray pos_cats, neg_cats;
940                 private IntervalCollection intervals;
941         }
942
943         class AnchorInfo {
944                 private Expression expr;
945
946                 private Position pos;
947                 private int offset;
948
949                 private string str;
950                 private int width;
951                 private bool ignore;
952
953                 public AnchorInfo (Expression expr, int width) {
954                         this.expr = expr;
955                         this.offset = 0;
956                         this.width = width;
957
958                         this.str = null;
959                         this.ignore = false;
960                         this.pos = Position.Any;
961                 }
962
963                 public AnchorInfo (Expression expr, int offset, int width, string str, bool ignore) {
964                         this.expr = expr;
965                         this.offset = offset;
966                         this.width = width;
967
968                         this.str = ignore ? str.ToLower () : str;
969
970                         this.ignore = ignore;
971                         this.pos = Position.Any;
972                 }
973
974                 public AnchorInfo (Expression expr, int offset, int width, Position pos) {
975                         this.expr = expr;
976                         this.offset = offset;
977                         this.width = width;
978
979                         this.pos = pos;
980
981                         this.str = null;
982                         this.ignore = false;
983                 }
984
985                 public Expression Expression {
986                         get { return expr; }
987                 }
988
989                 public int Offset {
990                         get { return offset; }
991                 }
992
993                 public int Width {
994                         get { return width; }
995                 }
996
997                 public int Length {
998                         get { return (str != null) ? str.Length : 0; }
999                 }
1000
1001                 public bool IsUnknownWidth {
1002                         get { return width < 0; }
1003                 }
1004
1005                 public bool IsComplete {
1006                         get { return Length == Width; }
1007                 }
1008
1009                 public string Substring {
1010                         get { return str; }
1011                 }
1012
1013                 public bool IgnoreCase {
1014                         get { return ignore; }
1015                 }
1016
1017                 public Position Position {
1018                         get { return pos; }
1019                 }
1020
1021                 public bool IsSubstring {
1022                         get { return str != null; }
1023                 }
1024
1025                 public bool IsPosition {
1026                         get { return pos != Position.Any; }
1027                 }
1028
1029                 public Interval GetInterval () {
1030                         return GetInterval (0);
1031                 }
1032
1033                 public Interval GetInterval (int start) {
1034                         if (!IsSubstring)
1035                                 return Interval.Empty;
1036
1037                         return new Interval (start + Offset, start + Offset + Length - 1);
1038                 }
1039         }
1040 }