2004-03-17 Francois Beauchemin <beauche@softhome.net>
[mono.git] / mcs / class / System / System.Text.RegularExpressions / syntax.cs
1 //\r
2 // assembly:    System\r
3 // namespace:   System.Text.RegularExpressions\r
4 // file:        syntax.cs\r
5 //\r
6 // author:      Dan Lewis (dlewis@gmx.co.uk)\r
7 //              (c) 2002\r
8 \r
9 using System;\r
10 using System.Collections;\r
11 \r
12 namespace System.Text.RegularExpressions.Syntax {\r
13         // collection classes\r
14         \r
15         class ExpressionCollection : CollectionBase {\r
16                 public void Add (Expression e) {\r
17                         List.Add (e);\r
18                 }\r
19 \r
20                 public Expression this[int i] {\r
21                         get { return (Expression)List[i]; }\r
22                         set { List[i] = value; }\r
23                 }\r
24 \r
25                 protected override void OnValidate (object o) {\r
26                         // allow null elements\r
27                 }\r
28         }\r
29 \r
30         // abstract classes\r
31         \r
32         abstract class Expression {\r
33                 public abstract void Compile (ICompiler cmp, bool reverse);\r
34                 public abstract void GetWidth (out int min, out int max);\r
35 \r
36                 public int GetFixedWidth () {\r
37                         int min, max;\r
38                         GetWidth (out min, out max);\r
39 \r
40                         if (min == max)\r
41                                 return min;\r
42 \r
43                         return -1;\r
44                 }\r
45 \r
46                 public virtual AnchorInfo GetAnchorInfo (bool reverse) {\r
47                         return new AnchorInfo (this, GetFixedWidth ());\r
48                 }\r
49 \r
50                 public virtual bool IsComplex () {\r
51                         return true;\r
52                 }\r
53         }\r
54 \r
55         // composite expressions\r
56         \r
57         abstract class CompositeExpression : Expression {\r
58                 public CompositeExpression () {\r
59                         expressions = new ExpressionCollection ();\r
60                 }\r
61 \r
62                 protected ExpressionCollection Expressions {\r
63                         get { return expressions; }\r
64                 }\r
65 \r
66                 protected void GetWidth (out int min, out int max, int count) {\r
67                         min = Int32.MaxValue;\r
68                         max = 0;\r
69                         bool empty = true;\r
70 \r
71                         for (int i = 0; i < count; ++ i) {\r
72                                 Expression e = Expressions[i];\r
73                                 if (e == null)\r
74                                         continue;\r
75                         \r
76                                 empty = false;\r
77                                 int a, b;\r
78                                 e.GetWidth (out a, out b);\r
79                                 if (a < min) min = a;\r
80                                 if (b > max) max = b;\r
81                         }\r
82 \r
83                         if (empty)\r
84                                 min = max = 0;\r
85                 }\r
86 \r
87                 private ExpressionCollection expressions;\r
88         }\r
89 \r
90         // groups\r
91         \r
92         class Group : CompositeExpression {\r
93                 public Group () {\r
94                 }\r
95 \r
96                 public Expression Expression {\r
97                         get { return Expressions[0]; }\r
98                         set { Expressions[0] = value; }\r
99                 }\r
100 \r
101                 public void AppendExpression (Expression e) {\r
102                         Expressions.Add (e);\r
103                 }\r
104 \r
105                 public override void Compile (ICompiler cmp, bool reverse) {\r
106                         int count = Expressions.Count;\r
107                         for (int i = 0; i < count; ++ i) {\r
108                                 Expression e;\r
109                                 if (reverse)\r
110                                         e = Expressions[count - i - 1];\r
111                                 else\r
112                                         e = Expressions[i];\r
113 \r
114                                 e.Compile (cmp, reverse);\r
115                         }\r
116                 }\r
117 \r
118                 public override void GetWidth (out int min, out int max) {\r
119                         min = 0;\r
120                         max = 0;\r
121 \r
122                         foreach (Expression e in Expressions) {\r
123                                 int a, b;\r
124                                 e.GetWidth (out a, out b);\r
125                                 min += a;\r
126                                 if (max == Int32.MaxValue || b == Int32.MaxValue)\r
127                                         max = Int32.MaxValue;\r
128                                 else\r
129                                         max += b;\r
130                         }\r
131                 }\r
132 \r
133                 public override AnchorInfo GetAnchorInfo (bool reverse) {\r
134                         int ptr;\r
135                         int width = GetFixedWidth ();\r
136 \r
137                         ArrayList infos = new ArrayList ();\r
138                         IntervalCollection segments = new IntervalCollection ();\r
139 \r
140                         // accumulate segments\r
141 \r
142                         ptr = 0;\r
143                         //foreach (Expression e in Expressions) {\r
144                         int count = Expressions.Count;\r
145                         for (int i = 0; i < count; ++ i) {\r
146                                 Expression e;\r
147                                 if (reverse)\r
148                                         e = Expressions [count - i - 1];\r
149                                 else\r
150                                         e = Expressions [i];            \r
151                                 \r
152                                 AnchorInfo info = e.GetAnchorInfo (reverse);\r
153                                 infos.Add (info);\r
154 \r
155                                 if (info.IsPosition)\r
156                                         return new AnchorInfo (this, ptr + info.Offset, width, info.Position);\r
157 \r
158                                 if (info.IsSubstring)\r
159                                         segments.Add (info.GetInterval (ptr));\r
160 \r
161                                 if (info.IsUnknownWidth)\r
162                                         break;\r
163 \r
164                                 ptr += info.Width;\r
165                         }\r
166 \r
167                         // normalize and find the longest segment\r
168 \r
169                         segments.Normalize ();\r
170 \r
171                         Interval longest = Interval.Empty;\r
172                         foreach (Interval segment in segments) {\r
173                                 if (segment.Size > longest.Size)\r
174                                         longest = segment;\r
175                         }\r
176 \r
177                         // now chain the substrings that made this segment together\r
178 \r
179                         if (!longest.IsEmpty) {\r
180                                 string str = "";\r
181                                 ArrayList strs = new ArrayList();\r
182                                 bool ignore = false;\r
183 \r
184                                 ptr = 0;\r
185                                 \r
186                                 //foreach (AnchorInfo info in infos) {\r
187                                 for (int i = 0; i < infos.Count; ++ i) {\r
188                                         AnchorInfo info;\r
189 \r
190                                         info = (AnchorInfo)infos[i];            \r
191                                         \r
192                                         if (info.IsSubstring && longest.Contains (info.GetInterval (ptr))) {\r
193                                                 //str += info.Substring;        // TODO mark subexpressions\r
194                                                 strs.Add(info.Substring);\r
195                                                 ignore |= info.IgnoreCase;\r
196                                         }\r
197 \r
198                                         \r
199                                                 if (info.IsUnknownWidth)                                                        \r
200                                                         break;\r
201                                         \r
202                                                 ptr += info.Width;\r
203                                 }       \r
204                                         \r
205                                 for (int i = 0; i< strs.Count; ++i)\r
206                                 {\r
207                                         if (reverse)\r
208                                                 str += strs [strs.Count - i - 1];\r
209                                         else\r
210                                                 str += strs [i];\r
211                                                         \r
212                                         \r
213                                 }\r
214                         \r
215 \r
216                                 return new AnchorInfo (this, longest.low, width, str, ignore);\r
217                         }\r
218 \r
219                         return new AnchorInfo (this, width);\r
220                 }\r
221 \r
222                 public override bool IsComplex () {\r
223                         bool comp = false;\r
224                         foreach (Expression e in Expressions) {\r
225                                 comp |= e.IsComplex ();\r
226                         }\r
227 \r
228                         return comp | GetFixedWidth () <= 0;\r
229                 }\r
230         }\r
231 \r
232         class RegularExpression : Group {\r
233                 public RegularExpression () {\r
234                         group_count = 0;\r
235                 }\r
236 \r
237                 public int GroupCount {\r
238                         get { return group_count; }\r
239                         set { group_count = value; }\r
240                 }\r
241 \r
242                 public override void Compile (ICompiler cmp, bool reverse) {\r
243                         // info block\r
244 \r
245                         int min, max;\r
246                         GetWidth (out min, out max);\r
247                         cmp.EmitInfo (group_count, min, max);\r
248 \r
249                         // anchoring expression\r
250 \r
251                         AnchorInfo info = GetAnchorInfo (reverse);\r
252                         //if (reverse)\r
253                         //      info = new AnchorInfo (this, GetFixedWidth ()); // FIXME\r
254 \r
255                         LinkRef pattern = cmp.NewLink ();\r
256                         cmp.EmitAnchor (reverse, info.Offset, pattern);\r
257 \r
258                         if (info.IsPosition)\r
259                                 cmp.EmitPosition (info.Position);\r
260                         else if (info.IsSubstring)\r
261                                 cmp.EmitString (info.Substring, info.IgnoreCase, reverse);\r
262                         \r
263                         cmp.EmitTrue ();\r
264                         \r
265                         // pattern\r
266 \r
267                         cmp.ResolveLink (pattern);\r
268                         base.Compile (cmp, reverse);\r
269                         cmp.EmitTrue ();\r
270                 }\r
271 \r
272                 private int group_count;\r
273         }\r
274 \r
275         class CapturingGroup : Group {\r
276                 public CapturingGroup () {\r
277                         this.gid = 0;\r
278                         this.name = null;\r
279                 }\r
280 \r
281                 public int Number {\r
282                         get { return gid; }\r
283                         set { gid = value; }\r
284                 }\r
285 \r
286                 public string Name {\r
287                         get { return name; }\r
288                         set { name = value; }\r
289                 }\r
290 \r
291                 public bool IsNamed {\r
292                         get { return name != null; }\r
293                 }\r
294 \r
295                 public override void Compile (ICompiler cmp, bool reverse) {\r
296                         cmp.EmitOpen (gid);\r
297                         base.Compile (cmp, reverse);\r
298                         cmp.EmitClose (gid);\r
299                 }\r
300 \r
301                 public override bool IsComplex () {\r
302                         return true;\r
303                 }\r
304 \r
305                 private int gid;\r
306                 private string name;\r
307         }\r
308 \r
309         class BalancingGroup : CapturingGroup {\r
310                 public BalancingGroup () {\r
311                         this.balance = null;\r
312                 }\r
313 \r
314                 public CapturingGroup Balance {\r
315                         get { return balance; }\r
316                         set { balance = value; }\r
317                 }\r
318 \r
319                 public override void Compile (ICompiler cmp, bool reverse) {\r
320                         // can't invoke Group.Compile from here :(\r
321                         // so I'll just repeat the code\r
322                 \r
323                         int count = Expressions.Count;\r
324                         for (int i = 0; i < count; ++ i) {\r
325                                 Expression e;\r
326                                 if (reverse)\r
327                                         e = Expressions[count - i - 1];\r
328                                 else\r
329                                         e = Expressions[i];\r
330 \r
331                                 e.Compile (cmp, reverse);\r
332                         }\r
333 \r
334                         cmp.EmitBalance (this.Number, balance.Number);\r
335                 }\r
336 \r
337                 private CapturingGroup balance;\r
338         }\r
339 \r
340         class NonBacktrackingGroup : Group {\r
341                 public NonBacktrackingGroup () {\r
342                 }\r
343 \r
344                 public override void Compile (ICompiler cmp, bool reverse) {\r
345                         LinkRef tail = cmp.NewLink ();\r
346 \r
347                         cmp.EmitSub (tail);\r
348                         base.Compile (cmp, reverse);\r
349                         cmp.EmitTrue ();\r
350                         cmp.ResolveLink (tail);\r
351                 }\r
352 \r
353                 public override bool IsComplex () {\r
354                         return true;\r
355                 }\r
356         }\r
357 \r
358         // repetition\r
359 \r
360         class Repetition : CompositeExpression {\r
361                 public Repetition (int min, int max, bool lazy) {\r
362                         Expressions.Add (null);\r
363                         \r
364                         this.min = min;\r
365                         this.max = max;\r
366                         this.lazy = lazy;\r
367                 }\r
368 \r
369                 public Expression Expression {\r
370                         get { return Expressions[0]; }\r
371                         set { Expressions[0] = value; }\r
372                 }\r
373 \r
374                 public int Minimum {\r
375                         get { return min; }\r
376                         set { min = value; }\r
377                 }\r
378 \r
379                 public int Maximum {\r
380                         get { return max; }\r
381                         set { max = value; }\r
382                 }\r
383 \r
384                 public bool Lazy {\r
385                         get { return lazy; }\r
386                         set { lazy = value; }\r
387                 }\r
388 \r
389                 public override void Compile (ICompiler cmp, bool reverse) {\r
390                         if (Expression.IsComplex ()) {\r
391                                 LinkRef until = cmp.NewLink ();\r
392                                 \r
393                                 cmp.EmitRepeat (min, max, lazy, until);\r
394                                 Expression.Compile (cmp, reverse);\r
395                                 cmp.EmitUntil (until);\r
396                         }\r
397                         else {\r
398                                 LinkRef tail = cmp.NewLink ();\r
399 \r
400                                 cmp.EmitFastRepeat (min, max, lazy, tail);\r
401                                 Expression.Compile (cmp, reverse);\r
402                                 cmp.EmitTrue ();\r
403                                 cmp.ResolveLink (tail);\r
404                         }\r
405                 }\r
406 \r
407                 public override void GetWidth (out int min, out int max) {\r
408                         Expression.GetWidth (out min, out max);\r
409                         min = min * this.min;\r
410                         if (max == Int32.MaxValue || this.max == 0xffff)\r
411                                 max = Int32.MaxValue;\r
412                         else\r
413                                 max = max * this.max;\r
414                 }\r
415 \r
416                 public override AnchorInfo GetAnchorInfo (bool reverse) {\r
417                         int width = GetFixedWidth ();\r
418                         if (Minimum == 0)\r
419                                 return new AnchorInfo (this, width);\r
420                         \r
421                         AnchorInfo info = Expression.GetAnchorInfo (reverse);\r
422                         if (info.IsPosition)\r
423                                 return new AnchorInfo (this, info.Offset, width, info.Position);\r
424                         \r
425                         if (info.IsSubstring) {\r
426                                 if (info.IsComplete) {\r
427                                         string str = "";\r
428                                         for (int i = 0; i < Minimum; ++ i)\r
429                                                 str += info.Substring;\r
430 \r
431                                         return new AnchorInfo (this, 0, width, str, info.IgnoreCase);\r
432                                 }\r
433 \r
434                                 return new AnchorInfo (this, info.Offset, width, info.Substring, info.IgnoreCase);\r
435                         }\r
436 \r
437                         return new AnchorInfo (this, width);\r
438                 }\r
439 \r
440                 private int min, max;\r
441                 private bool lazy;\r
442         }\r
443 \r
444         // assertions\r
445 \r
446         abstract class Assertion : CompositeExpression {\r
447                 public Assertion () {\r
448                         Expressions.Add (null);         // true expression\r
449                         Expressions.Add (null);         // false expression\r
450                 }\r
451 \r
452                 public Expression TrueExpression {\r
453                         get { return Expressions[0]; }\r
454                         set { Expressions[0] = value; }\r
455                 }\r
456 \r
457                 public Expression FalseExpression {\r
458                         get { return Expressions[1]; }\r
459                         set { Expressions[1] = value; }\r
460                 }\r
461 \r
462                 public override void GetWidth (out int min, out int max) {\r
463                         GetWidth (out min, out max, 2);\r
464 \r
465                         if (TrueExpression == null || FalseExpression == null)\r
466                                 min = 0;\r
467                 }\r
468         }\r
469 \r
470         class CaptureAssertion : Assertion {\r
471                 public CaptureAssertion () {\r
472                 }\r
473 \r
474                 public CapturingGroup CapturingGroup {\r
475                         get { return group; }\r
476                         set { group = value; }\r
477                 }\r
478 \r
479                 public override void Compile (ICompiler cmp, bool reverse) {\r
480                         int gid = group.Number;\r
481                         LinkRef tail = cmp.NewLink ();\r
482 \r
483                         if (FalseExpression == null) {\r
484                                 //    IfDefined :1\r
485                                 //      <yes_exp>\r
486                                 // 1: <tail>\r
487                         \r
488                                 cmp.EmitIfDefined (gid, tail);\r
489                                 TrueExpression.Compile (cmp, reverse);\r
490                         }\r
491                         else {\r
492                                 //    IfDefined :1\r
493                                 //      <yes_expr>\r
494                                 //      Jump :2\r
495                                 // 1:   <no_expr>\r
496                                 // 2: <tail>\r
497                         \r
498                                 LinkRef false_expr = cmp.NewLink ();\r
499                                 cmp.EmitIfDefined (gid, false_expr);\r
500                                 TrueExpression.Compile (cmp, reverse);\r
501                                 cmp.EmitJump (tail);\r
502                                 cmp.ResolveLink (false_expr);\r
503                                 FalseExpression.Compile (cmp, reverse);\r
504                         }\r
505 \r
506                         cmp.ResolveLink (tail);\r
507                 }\r
508 \r
509                 public override bool IsComplex () {\r
510                         bool comp = false;\r
511                         if (TrueExpression != null)\r
512                                 comp |= TrueExpression.IsComplex ();\r
513                         if (FalseExpression != null)\r
514                                 comp |= FalseExpression.IsComplex ();\r
515 \r
516                         return comp | GetFixedWidth () <= 0;\r
517                 }\r
518 \r
519                 private CapturingGroup group;\r
520         }\r
521 \r
522         class ExpressionAssertion : Assertion {\r
523                 public ExpressionAssertion () {\r
524                         Expressions.Add (null);         // test expression\r
525                 }\r
526 \r
527                 public bool Reverse {\r
528                         get { return reverse; }\r
529                         set { reverse = value; }\r
530                 }\r
531 \r
532                 public bool Negate {\r
533                         get { return negate; }\r
534                         set { negate = value; }\r
535                 }\r
536 \r
537                 public Expression TestExpression {\r
538                         get { return Expressions[2]; }\r
539                         set { Expressions[2] = value; }\r
540                 }\r
541 \r
542                 public override void Compile (ICompiler cmp, bool reverse) {\r
543                         LinkRef true_expr = cmp.NewLink ();\r
544                         LinkRef false_expr = cmp.NewLink ();\r
545 \r
546                         // test op: positive / negative\r
547 \r
548                         if (!negate)\r
549                                 cmp.EmitTest (true_expr, false_expr);\r
550                         else\r
551                                 cmp.EmitTest (false_expr, true_expr);\r
552                         \r
553                         // test expression: lookahead / lookbehind\r
554 \r
555                         TestExpression.Compile (cmp, this.reverse);\r
556                         cmp.EmitTrue ();\r
557 \r
558                         // target expressions\r
559 \r
560                         if (TrueExpression == null) {                   // (?= ...)\r
561                                 //    Test :1, :2\r
562                                 //      <test_expr>\r
563                                 // :2   False\r
564                                 // :1   <tail>\r
565                         \r
566                                 cmp.ResolveLink (false_expr);\r
567                                 cmp.EmitFalse ();\r
568                                 cmp.ResolveLink (true_expr);\r
569                         }\r
570                         else {\r
571                                 cmp.ResolveLink (true_expr);\r
572                                 TrueExpression.Compile (cmp, reverse);\r
573                                 \r
574                                 if (FalseExpression == null) {          // (?(...) ...)\r
575                                         //    Test :1, :2\r
576                                         //      <test_expr>\r
577                                         // :1   <yes_expr>\r
578                                         // :2   <tail>\r
579 \r
580                                         cmp.ResolveLink (false_expr);\r
581                                 }\r
582                                 else {                                  // (?(...) ... | ...)\r
583                                         //    Test :1, :2\r
584                                         //      <test_expr>\r
585                                         // :1   <yes_expr>\r
586                                         //      Jump :3\r
587                                         // :2   <no_expr>\r
588                                         // :3   <tail>\r
589                                 \r
590                                         LinkRef tail = cmp.NewLink ();\r
591                                 \r
592                                         cmp.EmitJump (tail);\r
593                                         cmp.ResolveLink (false_expr);\r
594                                         FalseExpression.Compile (cmp, reverse);\r
595                                         cmp.ResolveLink (tail);\r
596                                 }\r
597                         }\r
598                 }\r
599 \r
600                 private bool reverse, negate;\r
601         }\r
602 \r
603         // alternation\r
604 \r
605         class Alternation : CompositeExpression {\r
606                 public Alternation () {\r
607                 }\r
608 \r
609                 public ExpressionCollection Alternatives {\r
610                         get { return Expressions; }\r
611                 }\r
612 \r
613                 public void AddAlternative (Expression e) {\r
614                         Alternatives.Add (e);\r
615                 }\r
616 \r
617                 public override void Compile (ICompiler cmp, bool reverse) {\r
618                         //                      LinkRef next = cmp.NewLink ();\r
619                         LinkRef tail = cmp.NewLink ();\r
620                 \r
621                         foreach (Expression e in Alternatives) {\r
622                                 LinkRef next = cmp.NewLink ();\r
623                                 cmp.EmitBranch (next);\r
624                                 e.Compile (cmp, reverse);\r
625                                 cmp.EmitJump (tail);\r
626                                 cmp.ResolveLink (next);\r
627                                 cmp.EmitBranchEnd();\r
628                         }\r
629 \r
630                         cmp.EmitFalse ();\r
631                         cmp.ResolveLink (tail);\r
632                         cmp.EmitAlternationEnd();\r
633                 }\r
634 \r
635                 public override void GetWidth (out int min, out int max) {\r
636                         GetWidth (out min, out max, Alternatives.Count);\r
637                 }\r
638 \r
639                 public override bool IsComplex () {\r
640                         bool comp = false;\r
641                         foreach (Expression e in Alternatives) {\r
642                                 comp |= e.IsComplex ();\r
643                         }\r
644 \r
645                         return comp | GetFixedWidth () <= 0;\r
646                 }\r
647         }\r
648 \r
649         // terminal expressions\r
650 \r
651         class Literal : Expression {\r
652                 public Literal (string str, bool ignore) {\r
653                         this.str = str;\r
654                         this.ignore = ignore;\r
655                 }\r
656 \r
657                 public string String {\r
658                         get { return str; }\r
659                         set { str = value; }\r
660                 }\r
661 \r
662                 public bool IgnoreCase {\r
663                         get { return ignore; }\r
664                         set { ignore = value; }\r
665                 }\r
666 \r
667                 public override void Compile (ICompiler cmp, bool reverse) {\r
668                         if (str.Length == 0)\r
669                                 return;\r
670 \r
671                         if (str.Length == 1)\r
672                                 cmp.EmitCharacter (str[0], false, ignore, reverse);\r
673                         else\r
674                                 cmp.EmitString (str, ignore, reverse);\r
675                 }\r
676 \r
677                 public override void GetWidth (out int min, out int max) {\r
678                         min = max = str.Length;\r
679                 }\r
680 \r
681                 public override AnchorInfo GetAnchorInfo (bool reverse) {\r
682                         return new AnchorInfo (this, 0, str.Length, str, ignore);\r
683                 }\r
684 \r
685                 public override bool IsComplex () {\r
686                         return false;\r
687                 }\r
688 \r
689                 private string str;\r
690                 private bool ignore;\r
691         }\r
692 \r
693         class PositionAssertion : Expression {\r
694                 public PositionAssertion (Position pos) {\r
695                         this.pos = pos;\r
696                 }\r
697 \r
698                 public Position Position {\r
699                         get { return pos; }\r
700                         set { pos = value; }\r
701                 }\r
702 \r
703                 public override void Compile (ICompiler cmp, bool reverse) {\r
704                         cmp.EmitPosition (pos);\r
705                 }\r
706 \r
707                 public override void GetWidth (out int min, out int max) {\r
708                         min = max = 0;\r
709                 }\r
710 \r
711                 public override bool IsComplex () {\r
712                         return false;\r
713                 }\r
714 \r
715                 public override AnchorInfo GetAnchorInfo (bool revers) {\r
716                         switch (pos) {\r
717                         case Position.StartOfString: case Position.StartOfLine: case Position.StartOfScan:\r
718                                 return new AnchorInfo (this, 0, 0, pos);\r
719 \r
720                         default:\r
721                                 return new AnchorInfo (this, 0);\r
722                         }\r
723                 }\r
724 \r
725                 private Position pos;\r
726         }\r
727 \r
728         class Reference : Expression {\r
729                 public Reference (bool ignore) {\r
730                         this.ignore = ignore;\r
731                 }\r
732 \r
733                 public CapturingGroup CapturingGroup {\r
734                         get { return group; }\r
735                         set { group = value; }\r
736                 }\r
737 \r
738                 public bool IgnoreCase {\r
739                         get { return ignore; }\r
740                         set { ignore = value; }\r
741                 }\r
742 \r
743                 public override void Compile (ICompiler cmp, bool reverse) {\r
744                         cmp.EmitReference (group.Number, ignore, reverse);\r
745                 }\r
746 \r
747                 public override void GetWidth (out int min, out int max) {\r
748                         //group.GetWidth (out min, out max);\r
749                         // TODO set width to referenced group for non-cyclical references\r
750                         min = 0;\r
751                         max = Int32.MaxValue;\r
752                 }\r
753 \r
754                 public override bool IsComplex () {\r
755                         return true;    // FIXME incorporate cyclic check\r
756                 }\r
757 \r
758                 private CapturingGroup group;\r
759                 private bool ignore;\r
760         }\r
761 \r
762         class CharacterClass : Expression {\r
763                 public CharacterClass (bool negate, bool ignore) {\r
764                         this.negate = negate;\r
765                         this.ignore = ignore;\r
766 \r
767                         intervals = new IntervalCollection ();\r
768 \r
769                         // initialize pos/neg category arrays\r
770 \r
771                         Array cat_values = Enum.GetValues (typeof (Category));\r
772                         int cat_size = (int)(Category)cat_values.GetValue (cat_values.Length - 1) + 1;\r
773                         pos_cats = new bool[cat_size];\r
774                         neg_cats = new bool[cat_size];\r
775                         for (int i = 0; i < cat_size; ++ i) {\r
776                                 pos_cats[i] = false;\r
777                                 neg_cats[i] = false;\r
778                         }\r
779                 }\r
780 \r
781                 public CharacterClass (Category cat, bool negate) : this (false, false) {\r
782                         this.AddCategory (cat, negate);\r
783                 }\r
784 \r
785                 public bool Negate {\r
786                         get { return negate; }\r
787                         set { negate = value; }\r
788                 }\r
789 \r
790                 public bool IgnoreCase {\r
791                         get { return ignore; }\r
792                         set { ignore = value; }\r
793                 }\r
794 \r
795                 public void AddCategory (Category cat, bool negate) {\r
796                         int n = (int)cat;\r
797                         \r
798                         if (negate) {\r
799                                 if (pos_cats[n])\r
800                                         pos_cats[n] = false;\r
801 \r
802                                 neg_cats[n] = true;\r
803                         }\r
804                         else {\r
805                                 if (neg_cats[n])\r
806                                         neg_cats[n] = false;\r
807 \r
808                                 pos_cats[n] = true;\r
809                         }\r
810                 }\r
811 \r
812                 public void AddCharacter (char c) {\r
813                         // TODO: this is certainly not the most efficient way of doing things \r
814                         // TODO: but at least it produces correct results. \r
815                         AddRange (c, c);\r
816                 }\r
817 \r
818                 public void AddRange (char lo, char hi) {\r
819                         Interval new_interval = new Interval (lo, hi);\r
820  \r
821                         // ignore case is on. we must make sure our interval does not\r
822                         // use upper case. if it does, we must normalize the upper case\r
823                         // characters into lower case. \r
824                         if (ignore) {\r
825                                 if (upper_case_characters.Intersects (new_interval)) {\r
826                                         Interval partial_new_interval;\r
827  \r
828                                         if (new_interval.low < upper_case_characters.low) {\r
829                                                 partial_new_interval = new Interval (upper_case_characters.low + distance_between_upper_and_lower_case, \r
830                                                                                      new_interval.high +  distance_between_upper_and_lower_case);\r
831                                                 new_interval.high = upper_case_characters.low - 1;\r
832                                         }\r
833                                         else {\r
834                                                 partial_new_interval = new Interval (new_interval.low + distance_between_upper_and_lower_case, \r
835                                                                                      upper_case_characters.high + distance_between_upper_and_lower_case);\r
836                                                 new_interval.low = upper_case_characters.high + 1;\r
837                                         }\r
838                                         intervals.Add (partial_new_interval);\r
839                                 }\r
840                                 else if (upper_case_characters.Contains (new_interval)) {\r
841                                         new_interval.high += distance_between_upper_and_lower_case;\r
842                                         new_interval.low += distance_between_upper_and_lower_case;\r
843                                 }\r
844                         }\r
845                         intervals.Add (new_interval);\r
846                 }\r
847 \r
848                 public override void Compile (ICompiler cmp, bool reverse) {\r
849                         // create the meta-collection\r
850                         IntervalCollection meta =\r
851                                 intervals.GetMetaCollection (new IntervalCollection.CostDelegate (GetIntervalCost));\r
852 \r
853                         // count ops\r
854                         \r
855                         int count = meta.Count;\r
856                         for (int i = 0; i < pos_cats.Length; ++ i) {\r
857                                 if (pos_cats[i]) ++ count;\r
858                                 if (neg_cats[i]) ++ count;\r
859                         }\r
860 \r
861                         if (count == 0)\r
862                                 return;\r
863 \r
864                         // emit in op for |meta| > 1\r
865 \r
866                         LinkRef tail = cmp.NewLink ();\r
867                         if (count > 1)\r
868                                 cmp.EmitIn (tail);\r
869                                 \r
870 \r
871                         // emit categories\r
872 \r
873                         for (int i = 0; i < pos_cats.Length; ++ i) {\r
874                                 if (pos_cats[i])\r
875                                         cmp.EmitCategory ((Category)i, negate, reverse);\r
876                                 else if (neg_cats[i])\r
877                                         cmp.EmitCategory ((Category)i, !negate, reverse);\r
878                         }\r
879 \r
880                         // emit character/range/sets from meta-collection\r
881 \r
882                         foreach (Interval a in meta) {\r
883                                 if (a.IsDiscontiguous) {                        // Set\r
884                                         BitArray bits = new BitArray (a.Size);\r
885                                         foreach (Interval b in intervals) {\r
886                                                 if (a.Contains (b)) {\r
887                                                         for (int i = b.low; i <= b.high; ++ i)\r
888                                                                 bits[i - a.low] = true;\r
889                                                 }\r
890                                         }\r
891 \r
892                                         cmp.EmitSet ((char)a.low, bits, negate, ignore, reverse);\r
893                                 }\r
894                                 else if (a.IsSingleton)                         // Character\r
895                                         cmp.EmitCharacter ((char)a.low, negate, ignore, reverse);\r
896                                 else                                            // Range\r
897                                         cmp.EmitRange ((char)a.low, (char)a.high, negate, ignore, reverse);\r
898                         }\r
899                         \r
900                         // finish up\r
901 \r
902                         if (count > 1) {\r
903                                 if (negate)\r
904                                         cmp.EmitTrue ();\r
905                                 else\r
906                                         cmp.EmitFalse ();\r
907 \r
908                                 cmp.ResolveLink (tail);\r
909                         }\r
910                 }\r
911 \r
912                 public override void GetWidth (out int min, out int max) {\r
913                         min = max = 1;\r
914                 }\r
915 \r
916                 public override bool IsComplex () {\r
917                         return false;\r
918                 }\r
919 \r
920                 // private\r
921 \r
922                 private static double GetIntervalCost (Interval i) {\r
923                         // use op length as cost metric (=> optimize for space)\r
924                 \r
925                         if (i.IsDiscontiguous)\r
926                                 return 3 + ((i.Size + 0xf) >> 4);               // Set\r
927                         else if (i.IsSingleton)\r
928                                 return 2;                                       // Character\r
929                         else\r
930                                 return 3;                                       // Range\r
931                 }\r
932 \r
933                 private static Interval upper_case_characters = new Interval ((char)65, (char)90);\r
934                 private const int distance_between_upper_and_lower_case = 32;\r
935                 private bool negate, ignore;\r
936                 private bool[] pos_cats, neg_cats;\r
937                 private IntervalCollection intervals;\r
938         }\r
939 \r
940         class AnchorInfo {\r
941                 private Expression expr;\r
942 \r
943                 private Position pos;\r
944                 private int offset;\r
945 \r
946                 private string str;\r
947                 private int width;\r
948                 private bool ignore;\r
949 \r
950                 public AnchorInfo (Expression expr, int width) {\r
951                         this.expr = expr;\r
952                         this.offset = 0;\r
953                         this.width = width;\r
954 \r
955                         this.str = null;\r
956                         this.ignore = false;\r
957                         this.pos = Position.Any;\r
958                 }\r
959                 \r
960                 public AnchorInfo (Expression expr, int offset, int width, string str, bool ignore) {\r
961                         this.expr = expr;\r
962                         this.offset = offset;\r
963                         this.width = width;\r
964 \r
965                         this.str = ignore ? str.ToLower () : str;\r
966 \r
967                         this.ignore = ignore;\r
968                         this.pos = Position.Any;\r
969                 }\r
970 \r
971                 public AnchorInfo (Expression expr, int offset, int width, Position pos) {\r
972                         this.expr = expr;\r
973                         this.offset = offset;\r
974                         this.width = width;\r
975 \r
976                         this.pos = pos;\r
977 \r
978                         this.str = null;\r
979                         this.ignore = false;\r
980                 }\r
981 \r
982                 public Expression Expression {\r
983                         get { return expr; }\r
984                 }\r
985 \r
986                 public int Offset {\r
987                         get { return offset; }\r
988                 }\r
989 \r
990                 public int Width {\r
991                         get { return width; }\r
992                 }\r
993 \r
994                 public int Length {\r
995                         get { return (str != null) ? str.Length : 0; }\r
996                 }\r
997 \r
998                 public bool IsUnknownWidth {\r
999                         get { return width < 0; }\r
1000                 }\r
1001 \r
1002                 public bool IsComplete {\r
1003                         get { return Length == Width; }\r
1004                 }\r
1005 \r
1006                 public string Substring {\r
1007                         get { return str; }\r
1008                 }\r
1009 \r
1010                 public bool IgnoreCase {\r
1011                         get { return ignore; }\r
1012                 }\r
1013 \r
1014                 public Position Position {\r
1015                         get { return pos; }\r
1016                 }\r
1017 \r
1018                 public bool IsSubstring {\r
1019                         get { return str != null; }\r
1020                 }\r
1021 \r
1022                 public bool IsPosition {\r
1023                         get { return pos != Position.Any; }\r
1024                 }\r
1025 \r
1026                 public Interval GetInterval () {\r
1027                         return GetInterval (0);\r
1028                 }\r
1029 \r
1030                 public Interval GetInterval (int start) {\r
1031                         if (!IsSubstring)\r
1032                                 return Interval.Empty;\r
1033 \r
1034                         return new Interval (start + Offset, start + Offset + Length - 1);\r
1035                 }\r
1036         }\r
1037 }\r
1038 \r