Regex classes, initial revision.
[mono.git] / mcs / class / System / System.Text.RegularExpressions / syntax.cs
1 //\r
2 // assembly:    System\r
3 // namespace:   System.Text.RegularExpressions\r
4 // file:        syntax.cs\r
5 //\r
6 // author:      Dan Lewis (dlewis@gmx.co.uk)\r
7 //              (c) 2002\r
8 \r
9 using System;\r
10 using System.Collections;\r
11 \r
12 namespace System.Text.RegularExpressions.Syntax {\r
13         // collection classes\r
14         \r
15         class ExpressionCollection : CollectionBase {\r
16                 public void Add (Expression e) {\r
17                         List.Add (e);\r
18                 }\r
19 \r
20                 public Expression this[int i] {\r
21                         get { return (Expression)List[i]; }\r
22                         set { List[i] = value; }\r
23                 }\r
24 \r
25                 protected override void OnValidate (object o) {\r
26                         // allow null elements\r
27                 }\r
28         }\r
29 \r
30         // abstract classes\r
31         \r
32         abstract class Expression {\r
33                 public abstract void Compile (ICompiler cmp, bool reverse);\r
34                 public abstract void GetWidth (out int min, out int max);\r
35 \r
36                 public int GetFixedWidth () {\r
37                         int min, max;\r
38                         GetWidth (out min, out max);\r
39 \r
40                         if (min == max)\r
41                                 return min;\r
42 \r
43                         return -1;\r
44                 }\r
45 \r
46                 public virtual AnchorInfo GetAnchorInfo () {\r
47                         return new AnchorInfo (this, GetFixedWidth ());\r
48                 }\r
49 \r
50                 public virtual bool IsComplex () {\r
51                         return true;\r
52                 }\r
53         }\r
54 \r
55         // composite expressions\r
56         \r
57         abstract class CompositeExpression : Expression {\r
58                 public CompositeExpression () {\r
59                         expressions = new ExpressionCollection ();\r
60                 }\r
61 \r
62                 protected ExpressionCollection Expressions {\r
63                         get { return expressions; }\r
64                 }\r
65 \r
66                 protected void GetWidth (out int min, out int max, int count) {\r
67                         min = Int32.MaxValue;\r
68                         max = 0;\r
69                         bool empty = true;\r
70 \r
71                         for (int i = 0; i < count; ++ i) {\r
72                                 Expression e = Expressions[i];\r
73                                 if (e == null)\r
74                                         continue;\r
75                         \r
76                                 empty = false;\r
77                                 int a, b;\r
78                                 e.GetWidth (out a, out b);\r
79                                 if (a < min) min = a;\r
80                                 if (b > max) max = b;\r
81                         }\r
82 \r
83                         if (empty)\r
84                                 min = max = 0;\r
85                 }\r
86 \r
87                 private ExpressionCollection expressions;\r
88         }\r
89 \r
90         // groups\r
91         \r
92         class Group : CompositeExpression {\r
93                 public Group () {\r
94                 }\r
95 \r
96                 public Expression Expression {\r
97                         get { return Expressions[0]; }\r
98                         set { Expressions[0] = value; }\r
99                 }\r
100 \r
101                 public void AppendExpression (Expression e) {\r
102                         Expressions.Add (e);\r
103                 }\r
104 \r
105                 public override void Compile (ICompiler cmp, bool reverse) {\r
106                         int count = Expressions.Count;\r
107                         for (int i = 0; i < count; ++ i) {\r
108                                 Expression e;\r
109                                 if (reverse)\r
110                                         e = Expressions[count - i - 1];\r
111                                 else\r
112                                         e = Expressions[i];\r
113 \r
114                                 e.Compile (cmp, reverse);\r
115                         }\r
116                 }\r
117 \r
118                 public override void GetWidth (out int min, out int max) {\r
119                         min = 0;\r
120                         max = 0;\r
121 \r
122                         foreach (Expression e in Expressions) {\r
123                                 int a, b;\r
124                                 e.GetWidth (out a, out b);\r
125                                 min += a;\r
126                                 if (max == Int32.MaxValue || b == Int32.MaxValue)\r
127                                         max = Int32.MaxValue;\r
128                                 else\r
129                                         max += b;\r
130                         }\r
131                 }\r
132 \r
133                 public override AnchorInfo GetAnchorInfo () {\r
134                         int ptr;\r
135                         int width = GetFixedWidth ();\r
136 \r
137                         ArrayList infos = new ArrayList ();\r
138                         IntervalCollection segments = new IntervalCollection ();\r
139 \r
140                         // accumulate segments\r
141 \r
142                         ptr = 0;\r
143                         foreach (Expression e in Expressions) {\r
144                                 AnchorInfo info = e.GetAnchorInfo ();\r
145                                 infos.Add (info);\r
146 \r
147                                 if (info.IsPosition)\r
148                                         return new AnchorInfo (this, ptr + info.Offset, width, info.Position);\r
149 \r
150                                 if (info.IsSubstring)\r
151                                         segments.Add (info.GetInterval (ptr));\r
152 \r
153                                 if (info.IsUnknownWidth)\r
154                                         break;\r
155 \r
156                                 ptr += info.Width;\r
157                         }\r
158 \r
159                         // normalize and find the longest segment\r
160 \r
161                         segments.Normalize ();\r
162 \r
163                         Interval longest = Interval.Empty;\r
164                         foreach (Interval segment in segments) {\r
165                                 if (segment.Size > longest.Size)\r
166                                         longest = segment;\r
167                         }\r
168 \r
169                         // now chain the substrings that made this segment together\r
170 \r
171                         if (!longest.IsEmpty) {\r
172                                 string str = "";\r
173                                 bool ignore = false;\r
174 \r
175                                 ptr = 0;\r
176                                 foreach (AnchorInfo info in infos) {\r
177                                         if (info.IsSubstring && longest.Contains (info.GetInterval (ptr))) {\r
178                                                 str += info.Substring;  // TODO mark subexpressions\r
179                                                 ignore |= info.IgnoreCase;\r
180                                         }\r
181 \r
182                                         if (info.IsUnknownWidth)\r
183                                                 break;\r
184 \r
185                                         ptr += info.Width;\r
186                                 }\r
187 \r
188                                 return new AnchorInfo (this, longest.low, width, str, ignore);\r
189                         }\r
190 \r
191                         return new AnchorInfo (this, width);\r
192                 }\r
193 \r
194                 public override bool IsComplex () {\r
195                         bool comp = false;\r
196                         foreach (Expression e in Expressions) {\r
197                                 comp |= e.IsComplex ();\r
198                         }\r
199 \r
200                         return comp | GetFixedWidth () <= 0;\r
201                 }\r
202         }\r
203 \r
204         class RegularExpression : Group {\r
205                 public RegularExpression () {\r
206                         group_count = 0;\r
207                 }\r
208 \r
209                 public int GroupCount {\r
210                         get { return group_count; }\r
211                         set { group_count = value; }\r
212                 }\r
213 \r
214                 public override void Compile (ICompiler cmp, bool reverse) {\r
215                         // info block\r
216 \r
217                         int min, max;\r
218                         GetWidth (out min, out max);\r
219                         cmp.EmitInfo (group_count, min, max);\r
220 \r
221                         // anchoring expression\r
222 \r
223                         AnchorInfo info = GetAnchorInfo ();\r
224                         if (reverse)\r
225                                 info = new AnchorInfo (this, GetFixedWidth ()); // FIXME\r
226 \r
227                         LinkRef pattern = cmp.NewLink ();\r
228                         cmp.EmitAnchor (info.Offset, pattern);\r
229 \r
230                         if (info.IsPosition)\r
231                                 cmp.EmitPosition (info.Position);\r
232                         else if (info.IsSubstring)\r
233                                 cmp.EmitString (info.Substring, info.IgnoreCase, reverse);\r
234                         \r
235                         cmp.EmitTrue ();\r
236                         \r
237                         // pattern\r
238 \r
239                         cmp.ResolveLink (pattern);\r
240                         base.Compile (cmp, reverse);\r
241                         cmp.EmitTrue ();\r
242                 }\r
243 \r
244                 private int group_count;\r
245         }\r
246 \r
247         class CapturingGroup : Group {\r
248                 public CapturingGroup () {\r
249                         this.gid = 0;\r
250                         this.name = null;\r
251                 }\r
252 \r
253                 public int Number {\r
254                         get { return gid; }\r
255                         set { gid = value; }\r
256                 }\r
257 \r
258                 public string Name {\r
259                         get { return name; }\r
260                         set { name = value; }\r
261                 }\r
262 \r
263                 public bool IsNamed {\r
264                         get { return name != null; }\r
265                 }\r
266 \r
267                 public override void Compile (ICompiler cmp, bool reverse) {\r
268                         cmp.EmitOpen (gid);\r
269                         base.Compile (cmp, reverse);\r
270                         cmp.EmitClose (gid);\r
271                 }\r
272 \r
273                 public override bool IsComplex () {\r
274                         return true;\r
275                 }\r
276 \r
277                 private int gid;\r
278                 private string name;\r
279         }\r
280 \r
281         class BalancingGroup : CapturingGroup {\r
282                 public BalancingGroup () {\r
283                         this.balance = null;\r
284                 }\r
285 \r
286                 public CapturingGroup Balance {\r
287                         get { return balance; }\r
288                         set { balance = value; }\r
289                 }\r
290 \r
291                 public override void Compile (ICompiler cmp, bool reverse) {\r
292                         // can't invoke Group.Compile from here :(\r
293                         // so I'll just repeat the code\r
294                 \r
295                         int count = Expressions.Count;\r
296                         for (int i = 0; i < count; ++ i) {\r
297                                 Expression e;\r
298                                 if (reverse)\r
299                                         e = Expressions[count - i - 1];\r
300                                 else\r
301                                         e = Expressions[i];\r
302 \r
303                                 e.Compile (cmp, reverse);\r
304                         }\r
305 \r
306                         cmp.EmitBalance (this.Number, balance.Number);\r
307                 }\r
308 \r
309                 private CapturingGroup balance;\r
310         }\r
311 \r
312         class NonBacktrackingGroup : Group {\r
313                 public NonBacktrackingGroup () {\r
314                 }\r
315 \r
316                 public override void Compile (ICompiler cmp, bool reverse) {\r
317                         LinkRef tail = cmp.NewLink ();\r
318 \r
319                         cmp.EmitSub (tail);\r
320                         base.Compile (cmp, reverse);\r
321                         cmp.EmitTrue ();\r
322                         cmp.ResolveLink (tail);\r
323                 }\r
324 \r
325                 public override bool IsComplex () {\r
326                         return true;\r
327                 }\r
328         }\r
329 \r
330         // repetition\r
331 \r
332         class Repetition : CompositeExpression {\r
333                 public Repetition (int min, int max, bool lazy) {\r
334                         Expressions.Add (null);\r
335                         \r
336                         this.min = min;\r
337                         this.max = max;\r
338                         this.lazy = lazy;\r
339                 }\r
340 \r
341                 public Expression Expression {\r
342                         get { return Expressions[0]; }\r
343                         set { Expressions[0] = value; }\r
344                 }\r
345 \r
346                 public int Minimum {\r
347                         get { return min; }\r
348                         set { min = value; }\r
349                 }\r
350 \r
351                 public int Maximum {\r
352                         get { return max; }\r
353                         set { max = value; }\r
354                 }\r
355 \r
356                 public bool Lazy {\r
357                         get { return lazy; }\r
358                         set { lazy = value; }\r
359                 }\r
360 \r
361                 public override void Compile (ICompiler cmp, bool reverse) {\r
362                         if (Expression.IsComplex ()) {\r
363                                 LinkRef until = cmp.NewLink ();\r
364                                 \r
365                                 cmp.EmitRepeat (min, max, lazy, until);\r
366                                 Expression.Compile (cmp, reverse);\r
367                                 cmp.EmitUntil (until);\r
368                         }\r
369                         else {\r
370                                 LinkRef tail = cmp.NewLink ();\r
371 \r
372                                 cmp.EmitFastRepeat (min, max, lazy, tail);\r
373                                 Expression.Compile (cmp, reverse);\r
374                                 cmp.EmitTrue ();\r
375                                 cmp.ResolveLink (tail);\r
376                         }\r
377                 }\r
378 \r
379                 public override void GetWidth (out int min, out int max) {\r
380                         Expression.GetWidth (out min, out max);\r
381                         min = min * this.min;\r
382                         if (max == Int32.MaxValue || this.max == 0xffff)\r
383                                 max = Int32.MaxValue;\r
384                         else\r
385                                 max = max * this.max;\r
386                 }\r
387 \r
388                 public override AnchorInfo GetAnchorInfo () {\r
389                         int width = GetFixedWidth ();\r
390                         if (Minimum == 0)\r
391                                 return new AnchorInfo (this, width);\r
392                         \r
393                         AnchorInfo info = Expression.GetAnchorInfo ();\r
394                         if (info.IsPosition)\r
395                                 return new AnchorInfo (this, info.Offset, width, info.Position);\r
396                         \r
397                         if (info.IsSubstring) {\r
398                                 if (info.IsComplete) {\r
399                                         string str = "";\r
400                                         for (int i = 0; i < Minimum; ++ i)\r
401                                                 str += info.Substring;\r
402 \r
403                                         return new AnchorInfo (this, 0, width, str, info.IgnoreCase);\r
404                                 }\r
405 \r
406                                 return new AnchorInfo (this, info.Offset, width, info.Substring, info.IgnoreCase);\r
407                         }\r
408 \r
409                         return new AnchorInfo (this, width);\r
410                 }\r
411 \r
412                 private int min, max;\r
413                 private bool lazy;\r
414         }\r
415 \r
416         // assertions\r
417 \r
418         abstract class Assertion : CompositeExpression {\r
419                 public Assertion () {\r
420                         Expressions.Add (null);         // true expression\r
421                         Expressions.Add (null);         // false expression\r
422                 }\r
423 \r
424                 public Expression TrueExpression {\r
425                         get { return Expressions[0]; }\r
426                         set { Expressions[0] = value; }\r
427                 }\r
428 \r
429                 public Expression FalseExpression {\r
430                         get { return Expressions[1]; }\r
431                         set { Expressions[1] = value; }\r
432                 }\r
433 \r
434                 public override void GetWidth (out int min, out int max) {\r
435                         GetWidth (out min, out max, 2);\r
436 \r
437                         if (TrueExpression == null || FalseExpression == null)\r
438                                 min = 0;\r
439                 }\r
440         }\r
441 \r
442         class CaptureAssertion : Assertion {\r
443                 public CaptureAssertion () {\r
444                 }\r
445 \r
446                 public CapturingGroup CapturingGroup {\r
447                         get { return group; }\r
448                         set { group = value; }\r
449                 }\r
450 \r
451                 public override void Compile (ICompiler cmp, bool reverse) {\r
452                         int gid = group.Number;\r
453                         LinkRef tail = cmp.NewLink ();\r
454 \r
455                         if (FalseExpression == null) {\r
456                                 //    IfDefined :1\r
457                                 //      <yes_exp>\r
458                                 // 1: <tail>\r
459                         \r
460                                 cmp.EmitIfDefined (gid, tail);\r
461                                 TrueExpression.Compile (cmp, reverse);\r
462                         }\r
463                         else {\r
464                                 //    IfDefined :1\r
465                                 //      <yes_expr>\r
466                                 //      Jump :2\r
467                                 // 1:   <no_expr>\r
468                                 // 2: <tail>\r
469                         \r
470                                 LinkRef false_expr = cmp.NewLink ();\r
471                                 cmp.EmitIfDefined (gid, false_expr);\r
472                                 TrueExpression.Compile (cmp, reverse);\r
473                                 cmp.EmitJump (tail);\r
474                                 cmp.ResolveLink (false_expr);\r
475                                 FalseExpression.Compile (cmp, reverse);\r
476                         }\r
477 \r
478                         cmp.ResolveLink (tail);\r
479                 }\r
480 \r
481                 public override bool IsComplex () {\r
482                         bool comp = false;\r
483                         if (TrueExpression != null)\r
484                                 comp |= TrueExpression.IsComplex ();\r
485                         if (FalseExpression != null)\r
486                                 comp |= FalseExpression.IsComplex ();\r
487 \r
488                         return comp | GetFixedWidth () <= 0;\r
489                 }\r
490 \r
491                 private CapturingGroup group;\r
492         }\r
493 \r
494         class ExpressionAssertion : Assertion {\r
495                 public ExpressionAssertion () {\r
496                         Expressions.Add (null);         // test expression\r
497                 }\r
498 \r
499                 public bool Reverse {\r
500                         get { return reverse; }\r
501                         set { reverse = value; }\r
502                 }\r
503 \r
504                 public bool Negate {\r
505                         get { return negate; }\r
506                         set { negate = value; }\r
507                 }\r
508 \r
509                 public Expression TestExpression {\r
510                         get { return Expressions[2]; }\r
511                         set { Expressions[2] = value; }\r
512                 }\r
513 \r
514                 public override void Compile (ICompiler cmp, bool reverse) {\r
515                         LinkRef true_expr = cmp.NewLink ();\r
516                         LinkRef false_expr = cmp.NewLink ();\r
517 \r
518                         // test op: positive / negative\r
519 \r
520                         if (!negate)\r
521                                 cmp.EmitTest (true_expr, false_expr);\r
522                         else\r
523                                 cmp.EmitTest (false_expr, true_expr);\r
524                         \r
525                         // test expression: lookahead / lookbehind\r
526 \r
527                         TestExpression.Compile (cmp, reverse ^ this.reverse);\r
528                         cmp.EmitTrue ();\r
529 \r
530                         // target expressions\r
531 \r
532                         if (TrueExpression == null) {                   // (?= ...)\r
533                                 //    Test :1, :2\r
534                                 //      <test_expr>\r
535                                 // :2   False\r
536                                 // :1   <tail>\r
537                         \r
538                                 cmp.ResolveLink (false_expr);\r
539                                 cmp.EmitFalse ();\r
540                                 cmp.ResolveLink (true_expr);\r
541                         }\r
542                         else {\r
543                                 cmp.ResolveLink (true_expr);\r
544                                 TrueExpression.Compile (cmp, reverse);\r
545                                 \r
546                                 if (FalseExpression == null) {          // (?(...) ...)\r
547                                         //    Test :1, :2\r
548                                         //      <test_expr>\r
549                                         // :1   <yes_expr>\r
550                                         // :2   <tail>\r
551 \r
552                                         cmp.ResolveLink (false_expr);\r
553                                 }\r
554                                 else {                                  // (?(...) ... | ...)\r
555                                         //    Test :1, :2\r
556                                         //      <test_expr>\r
557                                         // :1   <yes_expr>\r
558                                         //      Jump :3\r
559                                         // :2   <no_expr>\r
560                                         // :3   <tail>\r
561                                 \r
562                                         LinkRef tail = cmp.NewLink ();\r
563                                 \r
564                                         cmp.EmitJump (tail);\r
565                                         cmp.ResolveLink (false_expr);\r
566                                         FalseExpression.Compile (cmp, reverse);\r
567                                         cmp.ResolveLink (tail);\r
568                                 }\r
569                         }\r
570                 }\r
571 \r
572                 private bool reverse, negate;\r
573         }\r
574 \r
575         // alternation\r
576 \r
577         class Alternation : CompositeExpression {\r
578                 public Alternation () {\r
579                 }\r
580 \r
581                 public ExpressionCollection Alternatives {\r
582                         get { return Expressions; }\r
583                 }\r
584 \r
585                 public void AddAlternative (Expression e) {\r
586                         Alternatives.Add (e);\r
587                 }\r
588 \r
589                 public override void Compile (ICompiler cmp, bool reverse) {\r
590                         LinkRef next = cmp.NewLink ();\r
591                         LinkRef tail = cmp.NewLink ();\r
592                 \r
593                         foreach (Expression e in Alternatives) {\r
594                                 cmp.EmitBranch (next);\r
595                                 e.Compile (cmp, reverse);\r
596                                 cmp.EmitJump (tail);\r
597                                 cmp.ResolveLink (next);\r
598                         }\r
599 \r
600                         cmp.EmitFalse ();\r
601                         cmp.ResolveLink (tail);\r
602                 }\r
603 \r
604                 public override void GetWidth (out int min, out int max) {\r
605                         GetWidth (out min, out max, Alternatives.Count);\r
606                 }\r
607 \r
608                 public override bool IsComplex () {\r
609                         bool comp = false;\r
610                         foreach (Expression e in Alternatives) {\r
611                                 comp |= e.IsComplex ();\r
612                         }\r
613 \r
614                         return comp | GetFixedWidth () <= 0;\r
615                 }\r
616         }\r
617 \r
618         // terminal expressions\r
619 \r
620         class Literal : Expression {\r
621                 public Literal (string str, bool ignore) {\r
622                         this.str = str;\r
623                         this.ignore = ignore;\r
624                 }\r
625 \r
626                 public string String {\r
627                         get { return str; }\r
628                         set { str = value; }\r
629                 }\r
630 \r
631                 public bool IgnoreCase {\r
632                         get { return ignore; }\r
633                         set { ignore = value; }\r
634                 }\r
635 \r
636                 public override void Compile (ICompiler cmp, bool reverse) {\r
637                         if (str.Length == 0)\r
638                                 return;\r
639 \r
640                         if (str.Length == 1)\r
641                                 cmp.EmitCharacter (str[0], false, ignore, reverse);\r
642                         else\r
643                                 cmp.EmitString (str, ignore, reverse);\r
644                 }\r
645 \r
646                 public override void GetWidth (out int min, out int max) {\r
647                         min = max = str.Length;\r
648                 }\r
649 \r
650                 public override AnchorInfo GetAnchorInfo () {\r
651                         return new AnchorInfo (this, 0, str.Length, str, ignore);\r
652                 }\r
653 \r
654                 public override bool IsComplex () {\r
655                         return false;\r
656                 }\r
657 \r
658                 private string str;\r
659                 private bool ignore;\r
660         }\r
661 \r
662         class PositionAssertion : Expression {\r
663                 public PositionAssertion (Position pos) {\r
664                         this.pos = pos;\r
665                 }\r
666 \r
667                 public Position Position {\r
668                         get { return pos; }\r
669                         set { pos = value; }\r
670                 }\r
671 \r
672                 public override void Compile (ICompiler cmp, bool reverse) {\r
673                         cmp.EmitPosition (pos);\r
674                 }\r
675 \r
676                 public override void GetWidth (out int min, out int max) {\r
677                         min = max = 0;\r
678                 }\r
679 \r
680                 public override bool IsComplex () {\r
681                         return false;\r
682                 }\r
683 \r
684                 public override AnchorInfo GetAnchorInfo () {\r
685                         switch (pos) {\r
686                         case Position.StartOfString: case Position.StartOfLine: case Position.StartOfScan:\r
687                                 return new AnchorInfo (this, 0, 0, pos);\r
688 \r
689                         default:\r
690                                 return new AnchorInfo (this, 0);\r
691                         }\r
692                 }\r
693 \r
694                 private Position pos;\r
695         }\r
696 \r
697         class Reference : Expression {\r
698                 public Reference (bool ignore) {\r
699                         this.ignore = ignore;\r
700                 }\r
701 \r
702                 public CapturingGroup CapturingGroup {\r
703                         get { return group; }\r
704                         set { group = value; }\r
705                 }\r
706 \r
707                 public bool IgnoreCase {\r
708                         get { return ignore; }\r
709                         set { ignore = value; }\r
710                 }\r
711 \r
712                 public override void Compile (ICompiler cmp, bool reverse) {\r
713                         cmp.EmitReference (group.Number, ignore, reverse);\r
714                 }\r
715 \r
716                 public override void GetWidth (out int min, out int max) {\r
717                         //group.GetWidth (out min, out max);\r
718                         // TODO set width to referenced group for non-cyclical references\r
719                         min = 0;\r
720                         max = Int32.MaxValue;\r
721                 }\r
722 \r
723                 public override bool IsComplex () {\r
724                         return true;    // FIXME incorporate cyclic check\r
725                 }\r
726 \r
727                 private CapturingGroup group;\r
728                 private bool ignore;\r
729         }\r
730 \r
731         class CharacterClass : Expression {\r
732                 public CharacterClass (bool negate, bool ignore) {\r
733                         this.negate = negate;\r
734                         this.ignore = ignore;\r
735 \r
736                         intervals = new IntervalCollection ();\r
737 \r
738                         // initialize pos/neg category arrays\r
739 \r
740                         Array cat_values = Enum.GetValues (typeof (Category));\r
741                         int cat_size = (int)(Category)cat_values.GetValue (cat_values.Length - 1) + 1;\r
742                         pos_cats = new bool[cat_size];\r
743                         neg_cats = new bool[cat_size];\r
744                         for (int i = 0; i < cat_size; ++ i) {\r
745                                 pos_cats[i] = false;\r
746                                 neg_cats[i] = false;\r
747                         }\r
748                 }\r
749 \r
750                 public CharacterClass (Category cat, bool negate) : this (false, false) {\r
751                         this.AddCategory (cat, negate);\r
752                 }\r
753 \r
754                 public bool Negate {\r
755                         get { return negate; }\r
756                         set { negate = value; }\r
757                 }\r
758 \r
759                 public bool IgnoreCase {\r
760                         get { return ignore; }\r
761                         set { ignore = value; }\r
762                 }\r
763 \r
764                 public void AddCategory (Category cat, bool negate) {\r
765                         int n = (int)cat;\r
766                         \r
767                         if (negate) {\r
768                                 if (pos_cats[n])\r
769                                         pos_cats[n] = false;\r
770 \r
771                                 neg_cats[n] = true;\r
772                         }\r
773                         else {\r
774                                 if (neg_cats[n])\r
775                                         neg_cats[n] = false;\r
776 \r
777                                 pos_cats[n] = true;\r
778                         }\r
779                 }\r
780 \r
781                 public void AddCharacter (char c) {\r
782                         intervals.Add (new Interval (c, c));\r
783                 }\r
784 \r
785                 public void AddRange (char lo, char hi) {\r
786                         intervals.Add (new Interval (lo, hi));\r
787                 }\r
788 \r
789                 public override void Compile (ICompiler cmp, bool reverse) {\r
790                         // create the meta-collection\r
791 \r
792                         IntervalCollection meta =\r
793                                 intervals.GetMetaCollection (new IntervalCollection.CostDelegate (GetIntervalCost));\r
794 \r
795                         // count ops\r
796                         \r
797                         int count = meta.Count;\r
798                         for (int i = 0; i < pos_cats.Length; ++ i) {\r
799                                 if (pos_cats[i]) ++ count;\r
800                                 if (neg_cats[i]) ++ count;\r
801                         }\r
802 \r
803                         if (count == 0)\r
804                                 return;\r
805 \r
806                         // emit in op for |meta| > 1\r
807 \r
808                         LinkRef tail = cmp.NewLink ();\r
809                         if (count > 1)\r
810                                 cmp.EmitIn (tail);\r
811 \r
812                         // emit categories\r
813 \r
814                         for (int i = 0; i < pos_cats.Length; ++ i) {\r
815                                 if (pos_cats[i])\r
816                                         cmp.EmitCategory ((Category)i, negate, reverse);\r
817                                 else if (neg_cats[i])\r
818                                         cmp.EmitCategory ((Category)i, !negate, reverse);\r
819                         }\r
820 \r
821                         // emit character/range/sets from meta-collection\r
822 \r
823                         foreach (Interval a in meta) {\r
824                                 if (a.IsDiscontiguous) {                        // Set\r
825                                         BitArray bits = new BitArray (a.Size);\r
826                                         foreach (Interval b in intervals) {\r
827                                                 if (a.Contains (b)) {\r
828                                                         for (int i = b.low; i <= b.high; ++ i)\r
829                                                                 bits[i - a.low] = true;\r
830                                                 }\r
831                                         }\r
832 \r
833                                         cmp.EmitSet ((char)a.low, bits, negate, ignore, reverse);\r
834                                 }\r
835                                 else if (a.IsSingleton)                         // Character\r
836                                         cmp.EmitCharacter ((char)a.low, negate, ignore, reverse);\r
837                                 else                                            // Range\r
838                                         cmp.EmitRange ((char)a.low, (char)a.high, negate, ignore, reverse);\r
839                         }\r
840                         \r
841                         // finish up\r
842 \r
843                         if (count > 1) {\r
844                                 if (negate)\r
845                                         cmp.EmitTrue ();\r
846                                 else\r
847                                         cmp.EmitFalse ();\r
848 \r
849                                 cmp.ResolveLink (tail);\r
850                         }\r
851                 }\r
852 \r
853                 public override void GetWidth (out int min, out int max) {\r
854                         min = max = 1;\r
855                 }\r
856 \r
857                 public override bool IsComplex () {\r
858                         return false;\r
859                 }\r
860 \r
861                 // private\r
862 \r
863                 private static double GetIntervalCost (Interval i) {\r
864                         // use op length as cost metric (=> optimize for space)\r
865                 \r
866                         if (i.IsDiscontiguous)\r
867                                 return 3 + ((i.Size + 0xf) >> 4);               // Set\r
868                         else if (i.IsSingleton)\r
869                                 return 2;                                       // Character\r
870                         else\r
871                                 return 3;                                       // Range\r
872                 }\r
873 \r
874                 private bool negate, ignore;\r
875                 private bool[] pos_cats, neg_cats;\r
876                 private IntervalCollection intervals;\r
877         }\r
878 \r
879         class AnchorInfo {\r
880                 private Expression expr;\r
881 \r
882                 private Position pos;\r
883                 private int offset;\r
884 \r
885                 private string str;\r
886                 private int width;\r
887                 private bool ignore;\r
888 \r
889                 public AnchorInfo (Expression expr, int width) {\r
890                         this.expr = expr;\r
891                         this.offset = 0;\r
892                         this.width = width;\r
893 \r
894                         this.str = null;\r
895                         this.ignore = false;\r
896                         this.pos = Position.Any;\r
897                 }\r
898                 \r
899                 public AnchorInfo (Expression expr, int offset, int width, string str, bool ignore) {\r
900                         this.expr = expr;\r
901                         this.offset = offset;\r
902                         this.width = width;\r
903 \r
904                         this.str = ignore ? str.ToLower () : str;\r
905 \r
906                         this.ignore = ignore;\r
907                         this.pos = Position.Any;\r
908                 }\r
909 \r
910                 public AnchorInfo (Expression expr, int offset, int width, Position pos) {\r
911                         this.expr = expr;\r
912                         this.offset = offset;\r
913                         this.width = width;\r
914 \r
915                         this.pos = pos;\r
916 \r
917                         this.str = null;\r
918                         this.ignore = false;\r
919                 }\r
920 \r
921                 public Expression Expression {\r
922                         get { return expr; }\r
923                 }\r
924 \r
925                 public int Offset {\r
926                         get { return offset; }\r
927                 }\r
928 \r
929                 public int Width {\r
930                         get { return width; }\r
931                 }\r
932 \r
933                 public int Length {\r
934                         get { return (str != null) ? str.Length : 0; }\r
935                 }\r
936 \r
937                 public bool IsUnknownWidth {\r
938                         get { return width < 0; }\r
939                 }\r
940 \r
941                 public bool IsComplete {\r
942                         get { return Length == Width; }\r
943                 }\r
944 \r
945                 public string Substring {\r
946                         get { return str; }\r
947                 }\r
948 \r
949                 public bool IgnoreCase {\r
950                         get { return ignore; }\r
951                 }\r
952 \r
953                 public Position Position {\r
954                         get { return pos; }\r
955                 }\r
956 \r
957                 public bool IsSubstring {\r
958                         get { return str != null; }\r
959                 }\r
960 \r
961                 public bool IsPosition {\r
962                         get { return pos != Position.Any; }\r
963                 }\r
964 \r
965                 public Interval GetInterval () {\r
966                         return GetInterval (0);\r
967                 }\r
968 \r
969                 public Interval GetInterval (int start) {\r
970                         if (!IsSubstring)\r
971                                 return Interval.Empty;\r
972 \r
973                         return new Interval (start + Offset, start + Offset + Length - 1);\r
974                 }\r
975         }\r
976 }\r