Added Symbolicate tool.
[mono.git] / mcs / class / System / System.Text.RegularExpressions / syntax.cs
1 //
2 // assembly:    System
3 // namespace:   System.Text.RegularExpressions
4 // file:        syntax.cs
5 //
6 // author:      Dan Lewis (dlewis@gmx.co.uk)
7 //              (c) 2002
8
9 //
10 // Permission is hereby granted, free of charge, to any person obtaining
11 // a copy of this software and associated documentation files (the
12 // "Software"), to deal in the Software without restriction, including
13 // without limitation the rights to use, copy, modify, merge, publish,
14 // distribute, sublicense, and/or sell copies of the Software, and to
15 // permit persons to whom the Software is furnished to do so, subject to
16 // the following conditions:
17 //
18 // The above copyright notice and this permission notice shall be
19 // included in all copies or substantial portions of the Software.
20 //
21 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
22 // EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
23 // MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
24 // NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
25 // LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
26 // OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
27 // WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
28 //
29
30 using System;
31 using System.Collections;
32
33 namespace System.Text.RegularExpressions.Syntax {
34         // collection classes
35
36         class ExpressionCollection : CollectionBase {
37                 public void Add (Expression e) {
38                         List.Add (e);
39                 }
40
41                 public Expression this[int i] {
42                         get { return (Expression)List[i]; }
43                         set { List[i] = value; }
44                 }
45
46                 protected override void OnValidate (object o) {
47                         // allow null elements
48                 }
49         }
50
51         // abstract classes
52
53         abstract class Expression {
54                 public abstract void Compile (ICompiler cmp, bool reverse);
55                 public abstract void GetWidth (out int min, out int max);
56
57                 public int GetFixedWidth () {
58                         int min, max;
59                         GetWidth (out min, out max);
60
61                         if (min == max)
62                                 return min;
63
64                         return -1;
65                 }
66
67                 public virtual AnchorInfo GetAnchorInfo (bool reverse) {
68                         return new AnchorInfo (this, GetFixedWidth ());
69                 }
70
71                 public abstract bool IsComplex ();
72         }
73
74         // composite expressions
75
76         abstract class CompositeExpression : Expression {
77                 public CompositeExpression () {
78                         expressions = new ExpressionCollection ();
79                 }
80
81                 protected ExpressionCollection Expressions {
82                         get { return expressions; }
83                 }
84
85                 protected void GetWidth (out int min, out int max, int count) {
86                         min = Int32.MaxValue;
87                         max = 0;
88                         bool empty = true;
89
90                         for (int i = 0; i < count; ++ i) {
91                                 Expression e = Expressions[i];
92                                 if (e == null)
93                                         continue;
94
95                                 empty = false;
96                                 int a, b;
97                                 e.GetWidth (out a, out b);
98                                 if (a < min) min = a;
99                                 if (b > max) max = b;
100                         }
101
102                         if (empty)
103                                 min = max = 0;
104                 }
105
106
107                 public override bool IsComplex ()
108                 {
109                         foreach (Expression e in Expressions) {
110                                 if (e.IsComplex ())
111                                         return true;
112                         }
113                         return GetFixedWidth () <= 0;
114                 }
115
116                 private ExpressionCollection expressions;
117         }
118
119         // groups
120
121         class Group : CompositeExpression {
122                 public Group () {
123                 }
124
125                 public Expression Expression {
126                         get { return Expressions[0]; }
127                         set { Expressions[0] = value; }
128                 }
129
130                 public void AppendExpression (Expression e) {
131                         Expressions.Add (e);
132                 }
133
134                 public override void Compile (ICompiler cmp, bool reverse) {
135                         int count = Expressions.Count;
136                         for (int i = 0; i < count; ++ i) {
137                                 Expression e;
138                                 if (reverse)
139                                         e = Expressions[count - i - 1];
140                                 else
141                                         e = Expressions[i];
142
143                                 e.Compile (cmp, reverse);
144                         }
145                 }
146
147                 public override void GetWidth (out int min, out int max) {
148                         min = 0;
149                         max = 0;
150
151                         foreach (Expression e in Expressions) {
152                                 int a, b;
153                                 e.GetWidth (out a, out b);
154                                 min += a;
155                                 if (max == Int32.MaxValue || b == Int32.MaxValue)
156                                         max = Int32.MaxValue;
157                                 else
158                                         max += b;
159                         }
160                 }
161
162                 public override AnchorInfo GetAnchorInfo (bool reverse)
163                 {
164                         int ptr;
165                         int width = GetFixedWidth ();
166
167                         ArrayList infos = new ArrayList ();
168                         IntervalCollection segments = new IntervalCollection ();
169
170                         // accumulate segments
171                         ptr = 0;
172                         int count = Expressions.Count;
173                         for (int i = 0; i < count; ++ i) {
174                                 Expression e;
175                                 if (reverse)
176                                         e = Expressions [count - i - 1];
177                                 else
178                                         e = Expressions [i];
179
180                                 AnchorInfo info = e.GetAnchorInfo (reverse);
181                                 infos.Add (info);
182
183                                 if (info.IsPosition)
184                                         return new AnchorInfo (this, ptr + info.Offset, width, info.Position);
185
186                                 if (info.IsSubstring)
187                                         segments.Add (info.GetInterval (ptr));
188
189                                 if (info.IsUnknownWidth)
190                                         break;
191
192                                 ptr += info.Width;
193                         }
194
195                         // normalize and find the longest segment
196                         segments.Normalize ();
197
198                         Interval longest = Interval.Empty;
199                         foreach (Interval segment in segments) {
200                                 if (segment.Size > longest.Size)
201                                         longest = segment;
202                         }
203
204                         if (longest.IsEmpty)
205                                 return new AnchorInfo (this, width);
206
207                         // now chain the substrings that made this segment together
208                         bool ignore = false;
209                         int n_strings = 0;
210
211                         ptr = 0;
212                         for (int i = 0; i < infos.Count; ++i) {
213                                 AnchorInfo info = (AnchorInfo) infos [i];
214
215                                 if (info.IsSubstring && longest.Contains (info.GetInterval (ptr))) {
216                                         ignore |= info.IgnoreCase;
217                                         infos [n_strings ++] = info;
218                                 }
219
220                                 if (info.IsUnknownWidth)
221                                         break;
222
223                                 ptr += info.Width;
224                         }
225
226                         StringBuilder sb = new StringBuilder ();
227                         for (int i = 0; i < n_strings; ++i) {
228                                 AnchorInfo info;
229                                 if (reverse)
230                                         info = (AnchorInfo) infos [n_strings - i - 1];
231                                 else
232                                         info = (AnchorInfo) infos [i];
233                                 sb.Append (info.Substring);
234                         }
235
236                         if (sb.Length == longest.Size)
237                                 return new AnchorInfo (this, longest.low, width, sb.ToString (), ignore);
238                         // were the string segments overlapping?
239                         if (sb.Length > longest.Size) {
240                                 Console.Error.WriteLine ("overlapping?");
241                                 return new AnchorInfo (this, width);
242                         }
243                         throw new SystemException ("Shouldn't happen");
244                 }
245         }
246
247         class RegularExpression : Group {
248                 public RegularExpression () {
249                         group_count = 0;
250                 }
251
252                 public int GroupCount {
253                         get { return group_count; }
254                         set { group_count = value; }
255                 }
256
257                 public override void Compile (ICompiler cmp, bool reverse) {
258                         // info block
259
260                         int min, max;
261                         GetWidth (out min, out max);
262                         cmp.EmitInfo (group_count, min, max);
263
264                         // anchoring expression
265
266                         AnchorInfo info = GetAnchorInfo (reverse);
267                         //if (reverse)
268                         //      info = new AnchorInfo (this, GetFixedWidth ()); // FIXME
269
270                         LinkRef pattern = cmp.NewLink ();
271                         cmp.EmitAnchor (reverse, info.Offset, pattern);
272
273                         if (info.IsPosition)
274                                 cmp.EmitPosition (info.Position);
275                         else if (info.IsSubstring)
276                                 cmp.EmitString (info.Substring, info.IgnoreCase, reverse);
277
278                         cmp.EmitTrue ();
279
280                         // pattern
281
282                         cmp.ResolveLink (pattern);
283                         base.Compile (cmp, reverse);
284                         cmp.EmitTrue ();
285                 }
286
287                 private int group_count;
288         }
289
290         class CapturingGroup : Group, IComparable {
291                 public CapturingGroup () {
292                         this.gid = 0;
293                         this.name = null;
294                 }
295
296                 public int Index {
297                         get { return gid; }
298                         set { gid = value; }
299                 }
300
301                 public string Name {
302                         get { return name; }
303                         set { name = value; }
304                 }
305
306                 public bool IsNamed {
307                         get { return name != null; }
308                 }
309
310                 public override void Compile (ICompiler cmp, bool reverse) {
311                         cmp.EmitOpen (gid);
312                         base.Compile (cmp, reverse);
313                         cmp.EmitClose (gid);
314                 }
315
316                 public override bool IsComplex () {
317                         return true;
318                 }
319
320                 public int CompareTo (object other)
321                 {
322                         return gid - ((CapturingGroup) other).gid;
323                 }
324
325                 private int gid;
326                 private string name;
327         }
328
329         class BalancingGroup : CapturingGroup {
330                 public BalancingGroup () {
331                         this.balance = null;
332                 }
333
334                 public CapturingGroup Balance {
335                         get { return balance; }
336                         set { balance = value; }
337                 }
338
339                 public override void Compile (ICompiler cmp, bool reverse) {
340                         // can't invoke Group.Compile from here :(
341                         // so I'll just repeat the code
342
343                         LinkRef tail = cmp.NewLink ();
344
345                         cmp.EmitBalanceStart (this.Index, balance.Index, this.IsNamed,  tail);
346
347                         int count = Expressions.Count;
348                         for (int i = 0; i < count; ++ i) {
349                                 Expression e;
350                                 if (reverse)
351                                         e = Expressions[count - i - 1];
352                                 else
353                                         e = Expressions[i];
354
355                                 e.Compile (cmp, reverse);
356                         }
357
358                         cmp.EmitBalance ();
359                         cmp.ResolveLink(tail);
360                 }
361
362                 private CapturingGroup balance;
363         }
364
365         class NonBacktrackingGroup : Group {
366                 public NonBacktrackingGroup () {
367                 }
368
369                 public override void Compile (ICompiler cmp, bool reverse) {
370                         LinkRef tail = cmp.NewLink ();
371
372                         cmp.EmitSub (tail);
373                         base.Compile (cmp, reverse);
374                         cmp.EmitTrue ();
375                         cmp.ResolveLink (tail);
376                 }
377
378                 public override bool IsComplex () {
379                         return true;
380                 }
381         }
382
383         // repetition
384
385         class Repetition : CompositeExpression {
386                 public Repetition (int min, int max, bool lazy) {
387                         Expressions.Add (null);
388
389                         this.min = min;
390                         this.max = max;
391                         this.lazy = lazy;
392                 }
393
394                 public Expression Expression {
395                         get { return Expressions[0]; }
396                         set { Expressions[0] = value; }
397                 }
398
399                 public int Minimum {
400                         get { return min; }
401                         set { min = value; }
402                 }
403
404                 public int Maximum {
405                         get { return max; }
406                         set { max = value; }
407                 }
408
409                 public bool Lazy {
410                         get { return lazy; }
411                         set { lazy = value; }
412                 }
413
414                 public override void Compile (ICompiler cmp, bool reverse) {
415                         if (Expression.IsComplex ()) {
416                                 LinkRef until = cmp.NewLink ();
417
418                                 cmp.EmitRepeat (min, max, lazy, until);
419                                 Expression.Compile (cmp, reverse);
420                                 cmp.EmitUntil (until);
421                         }
422                         else {
423                                 LinkRef tail = cmp.NewLink ();
424
425                                 cmp.EmitFastRepeat (min, max, lazy, tail);
426                                 Expression.Compile (cmp, reverse);
427                                 cmp.EmitTrue ();
428                                 cmp.ResolveLink (tail);
429                         }
430                 }
431
432                 public override void GetWidth (out int min, out int max) {
433                         Expression.GetWidth (out min, out max);
434                         min = min * this.min;
435                         if (max == Int32.MaxValue || this.max == 0xffff)
436                                 max = Int32.MaxValue;
437                         else
438                                 max = max * this.max;
439                 }
440
441                 public override AnchorInfo GetAnchorInfo (bool reverse) {
442                         int width = GetFixedWidth ();
443                         if (Minimum == 0)
444                                 return new AnchorInfo (this, width);
445
446                         AnchorInfo info = Expression.GetAnchorInfo (reverse);
447                         if (info.IsPosition)
448                                 return new AnchorInfo (this, info.Offset, width, info.Position);
449
450                         if (info.IsSubstring) {
451                                 if (info.IsComplete) {
452                                         // Minimum > 0
453                                         string str = info.Substring;
454                                         StringBuilder sb = new StringBuilder (str);
455                                         for (int i = 1; i < Minimum; ++ i)
456                                                 sb.Append (str);
457
458                                         return new AnchorInfo (this, 0, width, sb.ToString (), info.IgnoreCase);
459                                 }
460
461                                 return new AnchorInfo (this, info.Offset, width, info.Substring, info.IgnoreCase);
462                         }
463
464                         return new AnchorInfo (this, width);
465                 }
466
467                 private int min, max;
468                 private bool lazy;
469         }
470
471         // assertions
472
473         abstract class Assertion : CompositeExpression {
474                 public Assertion () {
475                         Expressions.Add (null);         // true expression
476                         Expressions.Add (null);         // false expression
477                 }
478
479                 public Expression TrueExpression {
480                         get { return Expressions[0]; }
481                         set { Expressions[0] = value; }
482                 }
483
484                 public Expression FalseExpression {
485                         get { return Expressions[1]; }
486                         set { Expressions[1] = value; }
487                 }
488
489                 public override void GetWidth (out int min, out int max) {
490                         GetWidth (out min, out max, 2);
491
492                         if (TrueExpression == null || FalseExpression == null)
493                                 min = 0;
494                 }
495         }
496
497         class CaptureAssertion : Assertion {
498                 public CaptureAssertion (Literal l) {
499                         literal = l;
500                 }
501
502                 public CapturingGroup CapturingGroup {
503                         get { return group; }
504                         set { group = value; }
505                 }
506
507                 public override void Compile (ICompiler cmp, bool reverse) {
508                         if (group == null) {
509                                 Alternate.Compile (cmp, reverse);
510                                 return;
511                         }
512
513                         int gid = group.Index;
514                         LinkRef tail = cmp.NewLink ();
515
516                         if (FalseExpression == null) {
517                                 //    IfDefined :1
518                                 //      <yes_exp>
519                                 // 1: <tail>
520
521                                 cmp.EmitIfDefined (gid, tail);
522                                 TrueExpression.Compile (cmp, reverse);
523                         }
524                         else {
525                                 //    IfDefined :1
526                                 //      <yes_expr>
527                                 //      Jump :2
528                                 // 1:   <no_expr>
529                                 // 2: <tail>
530
531                                 LinkRef false_expr = cmp.NewLink ();
532                                 cmp.EmitIfDefined (gid, false_expr);
533                                 TrueExpression.Compile (cmp, reverse);
534                                 cmp.EmitJump (tail);
535                                 cmp.ResolveLink (false_expr);
536                                 FalseExpression.Compile (cmp, reverse);
537                         }
538
539                         cmp.ResolveLink (tail);
540                 }
541
542                 public override bool IsComplex () {
543                         if (group == null)
544                                 return Alternate.IsComplex ();
545                         if (TrueExpression != null && TrueExpression.IsComplex ())
546                                 return true;
547                         if (FalseExpression != null && FalseExpression.IsComplex ())
548                                 return true;
549                         return GetFixedWidth () <= 0;
550                 }
551
552                 ExpressionAssertion Alternate {
553                         get {
554                                 if (alternate == null) {
555                                         alternate = new ExpressionAssertion ();
556                                         alternate.TrueExpression = TrueExpression;
557                                         alternate.FalseExpression = FalseExpression;
558                                         alternate.TestExpression = literal;
559                                 }
560                                 return alternate;
561                         }
562                 }
563
564                 private ExpressionAssertion alternate;
565                 private CapturingGroup group;
566                 private Literal literal;
567         }
568
569         class ExpressionAssertion : Assertion {
570                 public ExpressionAssertion () {
571                         Expressions.Add (null);         // test expression
572                 }
573
574                 public bool Reverse {
575                         get { return reverse; }
576                         set { reverse = value; }
577                 }
578
579                 public bool Negate {
580                         get { return negate; }
581                         set { negate = value; }
582                 }
583
584                 public Expression TestExpression {
585                         get { return Expressions[2]; }
586                         set { Expressions[2] = value; }
587                 }
588
589                 public override void Compile (ICompiler cmp, bool reverse) {
590                         LinkRef true_expr = cmp.NewLink ();
591                         LinkRef false_expr = cmp.NewLink ();
592
593                         // test op: positive / negative
594
595                         if (!negate)
596                                 cmp.EmitTest (true_expr, false_expr);
597                         else
598                                 cmp.EmitTest (false_expr, true_expr);
599
600                         // test expression: lookahead / lookbehind
601
602                         TestExpression.Compile (cmp, this.reverse);
603                         cmp.EmitTrue ();
604
605                         // target expressions
606
607                         if (TrueExpression == null) {                   // (?= ...)
608                                 //    Test :1, :2
609                                 //      <test_expr>
610                                 // :2   False
611                                 // :1   <tail>
612
613                                 cmp.ResolveLink (false_expr);
614                                 cmp.EmitFalse ();
615                                 cmp.ResolveLink (true_expr);
616                         }
617                         else {
618                                 cmp.ResolveLink (true_expr);
619                                 TrueExpression.Compile (cmp, reverse);
620
621                                 if (FalseExpression == null) {          // (?(...) ...)
622                                         //    Test :1, :2
623                                         //      <test_expr>
624                                         // :1   <yes_expr>
625                                         // :2   <tail>
626
627                                         cmp.ResolveLink (false_expr);
628                                 }
629                                 else {                                  // (?(...) ... | ...)
630                                         //    Test :1, :2
631                                         //      <test_expr>
632                                         // :1   <yes_expr>
633                                         //      Jump :3
634                                         // :2   <no_expr>
635                                         // :3   <tail>
636
637                                         LinkRef tail = cmp.NewLink ();
638
639                                         cmp.EmitJump (tail);
640                                         cmp.ResolveLink (false_expr);
641                                         FalseExpression.Compile (cmp, reverse);
642                                         cmp.ResolveLink (tail);
643                                 }
644                         }
645                 }
646
647                 public override bool IsComplex ()
648                 {
649                         return true;
650                 }
651
652                 private bool reverse, negate;
653         }
654
655         // alternation
656
657         class Alternation : CompositeExpression {
658                 public Alternation () {
659                 }
660
661                 public ExpressionCollection Alternatives {
662                         get { return Expressions; }
663                 }
664
665                 public void AddAlternative (Expression e) {
666                         Alternatives.Add (e);
667                 }
668
669                 public override void Compile (ICompiler cmp, bool reverse) {
670                         //                      LinkRef next = cmp.NewLink ();
671                         LinkRef tail = cmp.NewLink ();
672
673                         foreach (Expression e in Alternatives) {
674                                 LinkRef next = cmp.NewLink ();
675                                 cmp.EmitBranch (next);
676                                 e.Compile (cmp, reverse);
677                                 cmp.EmitJump (tail);
678                                 cmp.ResolveLink (next);
679                                 cmp.EmitBranchEnd();
680                         }
681
682                         cmp.EmitFalse ();
683                         cmp.ResolveLink (tail);
684                         cmp.EmitAlternationEnd();
685                 }
686
687                 public override void GetWidth (out int min, out int max) {
688                         GetWidth (out min, out max, Alternatives.Count);
689                 }
690         }
691
692         // terminal expressions
693
694         class Literal : Expression {
695                 public Literal (string str, bool ignore) {
696                         this.str = str;
697                         this.ignore = ignore;
698                 }
699
700                 public string String {
701                         get { return str; }
702                         set { str = value; }
703                 }
704
705                 public bool IgnoreCase {
706                         get { return ignore; }
707                         set { ignore = value; }
708                 }
709
710                 public static void CompileLiteral (string str, ICompiler cmp, bool ignore, bool reverse)
711                 {
712                         if (str.Length == 0)
713                                 return;
714
715                         if (str.Length == 1)
716                                 cmp.EmitCharacter (str[0], false, ignore, reverse);
717                         else
718                                 cmp.EmitString (str, ignore, reverse);
719                 }
720
721                 public override void Compile (ICompiler cmp, bool reverse)
722                 {
723                         CompileLiteral (str, cmp, ignore, reverse);
724                 }
725
726                 public override void GetWidth (out int min, out int max) {
727                         min = max = str.Length;
728                 }
729
730                 public override AnchorInfo GetAnchorInfo (bool reverse) {
731                         return new AnchorInfo (this, 0, str.Length, str, ignore);
732                 }
733
734                 public override bool IsComplex () {
735                         return false;
736                 }
737
738                 private string str;
739                 private bool ignore;
740         }
741
742         class PositionAssertion : Expression {
743                 public PositionAssertion (Position pos) {
744                         this.pos = pos;
745                 }
746
747                 public Position Position {
748                         get { return pos; }
749                         set { pos = value; }
750                 }
751
752                 public override void Compile (ICompiler cmp, bool reverse) {
753                         cmp.EmitPosition (pos);
754                 }
755
756                 public override void GetWidth (out int min, out int max) {
757                         min = max = 0;
758                 }
759
760                 public override bool IsComplex () {
761                         return false;
762                 }
763
764                 public override AnchorInfo GetAnchorInfo (bool revers) {
765                         switch (pos) {
766                         case Position.StartOfString: case Position.StartOfLine: case Position.StartOfScan:
767                                 return new AnchorInfo (this, 0, 0, pos);
768
769                         default:
770                                 return new AnchorInfo (this, 0);
771                         }
772                 }
773
774                 private Position pos;
775         }
776
777         class Reference : Expression {
778                 public Reference (bool ignore) {
779                         this.ignore = ignore;
780                 }
781
782                 public CapturingGroup CapturingGroup {
783                         get { return group; }
784                         set { group = value; }
785                 }
786
787                 public bool IgnoreCase {
788                         get { return ignore; }
789                         set { ignore = value; }
790                 }
791
792                 public override void Compile (ICompiler cmp, bool reverse) {
793                         cmp.EmitReference (group.Index, ignore, reverse);
794                 }
795
796                 public override void GetWidth (out int min, out int max) {
797                         //group.GetWidth (out min, out max);
798                         // TODO set width to referenced group for non-cyclical references
799                         min = 0;
800                         max = Int32.MaxValue;
801                 }
802
803                 public override bool IsComplex () {
804                         return true;    // FIXME incorporate cyclic check
805                 }
806
807                 private CapturingGroup group;
808                 private bool ignore;
809         }
810
811         class BackslashNumber : Reference {
812                 string literal;
813                 bool ecma;
814
815                 public BackslashNumber (bool ignore, bool ecma)
816                         : base (ignore)
817                 {
818                         this.ecma = ecma;
819                 }
820
821                 // Precondition: groups [num_str] == null
822                 public bool ResolveReference (string num_str, Hashtable groups)
823                 {
824                         if (ecma) {
825                                 int last_i = 0;
826                                 for (int i = 1; i < num_str.Length; ++i) {
827                                         if (groups [num_str.Substring (0, i)] != null)
828                                                 last_i = i;
829                                 }
830                                 if (last_i != 0) {
831                                         CapturingGroup = (CapturingGroup) groups [num_str.Substring (0, last_i)];
832                                         literal = num_str.Substring (last_i);
833                                         return true;
834                                 }
835                         } else {
836                                 if (num_str.Length == 1)
837                                         return false;
838                         }
839
840                         int ptr = 0;
841                         int as_octal = Parser.ParseOctal (num_str, ref ptr);
842                         // Since ParseOctal reads at most 3 digits, as_octal <= octal 0777
843                         if (as_octal == -1)
844                                 return false;
845                         if (as_octal > 0xff && ecma) {
846                                 as_octal /= 8;
847                                 --ptr;
848                         }
849                         as_octal &= 0xff;
850                         literal = ((char) as_octal) + num_str.Substring (ptr);
851                         return true;
852                 }
853
854                 public override void Compile (ICompiler cmp, bool reverse)
855                 {
856                         if (CapturingGroup != null)
857                                 base.Compile (cmp, reverse);
858                         if (literal != null)
859                                 Literal.CompileLiteral (literal, cmp, IgnoreCase, reverse);
860                 }
861         }
862
863         class CharacterClass : Expression {
864                 public CharacterClass (bool negate, bool ignore) {
865                         this.negate = negate;
866                         this.ignore = ignore;
867
868                         intervals = new IntervalCollection ();
869
870                         // initialize pos/neg category arrays
871
872                         int cat_size = (int) Category.LastValue;
873                         pos_cats = new BitArray (cat_size);
874                         neg_cats = new BitArray (cat_size);
875                 }
876
877                 public CharacterClass (Category cat, bool negate) : this (false, false) {
878                         this.AddCategory (cat, negate);
879                 }
880
881                 public bool Negate {
882                         get { return negate; }
883                         set { negate = value; }
884                 }
885
886                 public bool IgnoreCase {
887                         get { return ignore; }
888                         set { ignore = value; }
889                 }
890
891                 public void AddCategory (Category cat, bool negate) {
892                         int n = (int)cat;
893
894                         if (negate) {
895                                 neg_cats[n] = true;
896                         } else {
897                                 pos_cats[n] = true;
898                         }
899                 }
900
901                 public void AddCharacter (char c) {
902                         // TODO: this is certainly not the most efficient way of doing things
903                         // TODO: but at least it produces correct results.
904                         AddRange (c, c);
905                 }
906
907                 public void AddRange (char lo, char hi) {
908                         Interval new_interval = new Interval (lo, hi);
909
910                         // ignore case is on. we must make sure our interval does not
911                         // use upper case. if it does, we must normalize the upper case
912                         // characters into lower case.
913                         if (ignore) {
914                                 if (upper_case_characters.Intersects (new_interval)) {
915                                         Interval partial_new_interval;
916
917                                         if (new_interval.low < upper_case_characters.low) {
918                                                 partial_new_interval = new Interval (upper_case_characters.low + distance_between_upper_and_lower_case,
919                                                                                      new_interval.high +  distance_between_upper_and_lower_case);
920                                                 new_interval.high = upper_case_characters.low - 1;
921                                         }
922                                         else {
923                                                 partial_new_interval = new Interval (new_interval.low + distance_between_upper_and_lower_case,
924                                                                                      upper_case_characters.high + distance_between_upper_and_lower_case);
925                                                 new_interval.low = upper_case_characters.high + 1;
926                                         }
927                                         intervals.Add (partial_new_interval);
928                                 }
929                                 else if (upper_case_characters.Contains (new_interval)) {
930                                         new_interval.high += distance_between_upper_and_lower_case;
931                                         new_interval.low += distance_between_upper_and_lower_case;
932                                 }
933                         }
934                         intervals.Add (new_interval);
935                 }
936
937                 public override void Compile (ICompiler cmp, bool reverse) {
938                         // create the meta-collection
939                         IntervalCollection meta =
940                                 intervals.GetMetaCollection (new IntervalCollection.CostDelegate (GetIntervalCost));
941
942                         // count ops
943                         int count = meta.Count;
944                         for (int i = 0; i < pos_cats.Length; ++ i) {
945                                 if (pos_cats[i] || neg_cats [i])
946                                         ++ count;
947                         }
948
949                         if (count == 0)
950                                 return;
951
952                         // emit in op for |meta| > 1
953                         LinkRef tail = cmp.NewLink ();
954                         if (count > 1)
955                                 cmp.EmitIn (tail);
956
957                         // emit character/range/sets from meta-collection
958                         // we emit these first so that any 'ignore' flags will be noticed by the evaluator
959                         foreach (Interval a in meta) {
960                                 if (a.IsDiscontiguous) {                        // Set
961                                         BitArray bits = new BitArray (a.Size);
962                                         foreach (Interval b in intervals) {
963                                                 if (a.Contains (b)) {
964                                                         for (int i = b.low; i <= b.high; ++ i)
965                                                                 bits[i - a.low] = true;
966                                                 }
967                                         }
968
969                                         cmp.EmitSet ((char)a.low, bits, negate, ignore, reverse);
970                                 }
971                                 else if (a.IsSingleton)                         // Character
972                                         cmp.EmitCharacter ((char)a.low, negate, ignore, reverse);
973                                 else                                            // Range
974                                         cmp.EmitRange ((char)a.low, (char)a.high, negate, ignore, reverse);
975                         }
976
977                         // emit categories
978                         for (int i = 0; i < pos_cats.Length; ++ i) {
979                                 if (pos_cats[i]) {
980                                         if (neg_cats [i])
981                                                 cmp.EmitCategory (Category.AnySingleline, negate, reverse);
982                                         else
983                                                 cmp.EmitCategory ((Category)i, negate, reverse);
984                                 } else if (neg_cats[i]) {
985                                         cmp.EmitNotCategory ((Category)i, negate, reverse);
986                                 }
987                         }
988
989                         // finish up
990                         if (count > 1) {
991                                 if (negate)
992                                         cmp.EmitTrue ();
993                                 else
994                                         cmp.EmitFalse ();
995
996                                 cmp.ResolveLink (tail);
997                         }
998                 }
999
1000                 public override void GetWidth (out int min, out int max) {
1001                         min = max = 1;
1002                 }
1003
1004                 public override bool IsComplex () {
1005                         return false;
1006                 }
1007
1008                 // private
1009
1010                 private static double GetIntervalCost (Interval i) {
1011                         // use op length as cost metric (=> optimize for space)
1012
1013                         if (i.IsDiscontiguous)
1014                                 return 3 + ((i.Size + 0xf) >> 4);               // Set
1015                         else if (i.IsSingleton)
1016                                 return 2;                                       // Character
1017                         else
1018                                 return 3;                                       // Range
1019                 }
1020
1021                 private static Interval upper_case_characters = new Interval ((char)65, (char)90);
1022                 private const int distance_between_upper_and_lower_case = 32;
1023                 private bool negate, ignore;
1024                 private BitArray pos_cats, neg_cats;
1025                 private IntervalCollection intervals;
1026         }
1027
1028         class AnchorInfo {
1029                 private Expression expr;
1030
1031                 private Position pos;
1032                 private int offset;
1033
1034                 private string str;
1035                 private int width;
1036                 private bool ignore;
1037
1038                 public AnchorInfo (Expression expr, int width) {
1039                         this.expr = expr;
1040                         this.offset = 0;
1041                         this.width = width;
1042
1043                         this.str = null;
1044                         this.ignore = false;
1045                         this.pos = Position.Any;
1046                 }
1047
1048                 public AnchorInfo (Expression expr, int offset, int width, string str, bool ignore) {
1049                         this.expr = expr;
1050                         this.offset = offset;
1051                         this.width = width;
1052
1053                         this.str = ignore ? str.ToLower () : str;
1054
1055                         this.ignore = ignore;
1056                         this.pos = Position.Any;
1057                 }
1058
1059                 public AnchorInfo (Expression expr, int offset, int width, Position pos) {
1060                         this.expr = expr;
1061                         this.offset = offset;
1062                         this.width = width;
1063
1064                         this.pos = pos;
1065
1066                         this.str = null;
1067                         this.ignore = false;
1068                 }
1069
1070                 public Expression Expression {
1071                         get { return expr; }
1072                 }
1073
1074                 public int Offset {
1075                         get { return offset; }
1076                 }
1077
1078                 public int Width {
1079                         get { return width; }
1080                 }
1081
1082                 public int Length {
1083                         get { return (str != null) ? str.Length : 0; }
1084                 }
1085
1086                 public bool IsUnknownWidth {
1087                         get { return width < 0; }
1088                 }
1089
1090                 public bool IsComplete {
1091                         get { return Length == Width; }
1092                 }
1093
1094                 public string Substring {
1095                         get { return str; }
1096                 }
1097
1098                 public bool IgnoreCase {
1099                         get { return ignore; }
1100                 }
1101
1102                 public Position Position {
1103                         get { return pos; }
1104                 }
1105
1106                 public bool IsSubstring {
1107                         get { return str != null; }
1108                 }
1109
1110                 public bool IsPosition {
1111                         get { return pos != Position.Any; }
1112                 }
1113
1114                 public Interval GetInterval () {
1115                         return GetInterval (0);
1116                 }
1117
1118                 public Interval GetInterval (int start) {
1119                         if (!IsSubstring)
1120                                 return Interval.Empty;
1121
1122                         return new Interval (start + Offset, start + Offset + Length - 1);
1123                 }
1124         }
1125 }