Merge pull request #484 from roji/transactions_pspe
[mono.git] / mcs / class / System / System.Text.RegularExpressions / RxCompiler.cs
1 using System;
2 using System.Collections;
3 using System.Globalization;
4 using System.Reflection;
5 using System.Reflection.Emit;
6
7 namespace System.Text.RegularExpressions {
8
9         /* This behaves like a growing list of tuples (base, offsetpos) */
10         class RxLinkRef: LinkRef {
11                 public int[] offsets;
12                 public int current = 0;
13
14                 public RxLinkRef ()
15                 {
16                         offsets = new int [8];
17                 }
18
19                 // the start of the branch instruction
20                 // in the program stream
21                 public void PushInstructionBase (int offset)
22                 {
23                         if ((current & 1) != 0)
24                                 throw new Exception ();
25                         if (current == offsets.Length) {
26                                 int[] newarray = new int [offsets.Length * 2];
27                                 Array.Copy (offsets, newarray, offsets.Length);
28                                 offsets = newarray;
29                         }
30                         offsets [current++] = offset;
31                 }
32
33                 // the position in the program stream where the jump offset is stored
34                 public void PushOffsetPosition (int offset)
35                 {
36                         if ((current & 1) == 0)
37                                 throw new Exception ();
38                         offsets [current++] = offset;
39                 }
40
41         }
42
43         class RxCompiler : ICompiler {
44                 protected byte[] program = new byte [32];
45                 protected int curpos = 0;
46
47                 public RxCompiler () {
48                 }
49
50                 void MakeRoom (int bytes)
51                 {
52                         while (curpos + bytes > program.Length) {
53                                 int newsize = program.Length * 2;
54                                 byte[] newp = new byte [newsize];
55                                 Buffer.BlockCopy (program, 0, newp, 0, program.Length);
56                                 program = newp;
57                         }
58                 }
59
60                 void Emit (byte val)
61                 {
62                         MakeRoom (1);
63                         program [curpos] = val;
64                         ++curpos;
65                 }
66
67                 void Emit (RxOp opcode)
68                 {
69                         Emit ((byte)opcode);
70                 }
71
72                 void Emit (ushort val)
73                 {
74                         MakeRoom (2);
75                         program [curpos] = (byte)val;
76                         program [curpos + 1] = (byte)(val >> 8);
77                         curpos += 2;
78                 }
79
80                 void Emit (int val)
81                 {
82                         MakeRoom (4);
83                         program [curpos] = (byte)val;
84                         program [curpos + 1] = (byte)(val >> 8);
85                         program [curpos + 2] = (byte)(val >> 16);
86                         program [curpos + 3] = (byte)(val >> 24);
87                         curpos += 4;
88                 }
89
90                 void BeginLink (LinkRef lref) {
91                         RxLinkRef link = lref as RxLinkRef;
92                         link.PushInstructionBase (curpos);
93                 }
94
95                 void EmitLink (LinkRef lref)
96                 {
97                         RxLinkRef link = lref as RxLinkRef;
98                         link.PushOffsetPosition (curpos);
99                         Emit ((ushort)0);
100                 }
101
102                 // ICompiler implementation
103                 public void Reset ()
104                 {
105                         curpos = 0;
106                 }
107
108                 public IMachineFactory GetMachineFactory ()
109                 {
110                         byte[] code = new byte [curpos];
111                         Buffer.BlockCopy (program, 0, code, 0, curpos);
112                         //Console.WriteLine ("Program size: {0}", curpos);
113
114                         return new RxInterpreterFactory (code, null);
115                 }
116
117                 public void EmitFalse ()
118                 {
119                         Emit (RxOp.False);
120                 }
121
122                 public void EmitTrue ()
123                 {
124                         Emit (RxOp.True);
125                 }
126
127                 /* Overriden by CILCompiler */
128                 public virtual void EmitOp (RxOp op, bool negate, bool ignore, bool reverse) {
129                         int offset = 0;
130                         if (negate)
131                                 offset += 1;
132                         if (ignore)
133                                 offset += 2;
134                         if (reverse)
135                                 offset += 4;
136
137                         Emit ((RxOp)((int)op + offset));
138                 }
139
140                 public virtual void EmitOpIgnoreReverse (RxOp op, bool ignore, bool reverse) {
141                         int offset = 0;
142                         if (ignore)
143                                 offset += 1;
144                         if (reverse)
145                                 offset += 2;
146
147                         Emit ((RxOp)((int)op + offset));
148                 }
149
150                 public virtual void EmitOpNegateReverse (RxOp op, bool negate, bool reverse) {
151                         int offset = 0;
152                         if (negate)
153                                 offset += 1;
154                         if (reverse)
155                                 offset += 2;
156
157                         Emit ((RxOp)((int)op + offset));
158                 }
159
160                 public void EmitCharacter (char c, bool negate, bool ignore, bool reverse)
161                 {
162                         if (ignore)
163                                 c = Char.ToLower (c);
164                         if (c < 256) {
165                                 EmitOp (RxOp.Char, negate, ignore, reverse);
166                                 Emit ((byte)c);
167                         } else {
168                                 EmitOp (RxOp.UnicodeChar, negate, ignore, reverse);
169                                 Emit ((ushort)c);
170                         }
171                 }
172
173                 void EmitUniCat (UnicodeCategory cat, bool negate, bool reverse)
174                 {
175                         EmitOpNegateReverse (RxOp.CategoryUnicode, negate, reverse);
176                         Emit ((byte)cat);
177                 }
178
179                 void EmitCatGeneral (Category cat, bool negate, bool reverse)
180                 {
181                         EmitOpNegateReverse (RxOp.CategoryGeneral, negate, reverse);
182                         Emit ((byte)cat);
183                 }
184
185                 public void EmitCategory (Category cat, bool negate, bool reverse)
186                 {
187                         switch (cat) {
188                         case Category.Any:
189                         case Category.EcmaAny:
190                                 EmitOpNegateReverse (RxOp.CategoryAny, negate, reverse);
191                                 break;
192                         case Category.AnySingleline:
193                                 EmitOpNegateReverse (RxOp.CategoryAnySingleline, negate, reverse);
194                                 break;
195                         case Category.Word:
196                                 EmitOpNegateReverse (RxOp.CategoryWord, negate, reverse);
197                                 break;
198                         case Category.Digit:
199                                 EmitOpNegateReverse (RxOp.CategoryDigit, negate, reverse);
200                                 break;
201                         case Category.WhiteSpace:
202                                 EmitOpNegateReverse (RxOp.CategoryWhiteSpace, negate, reverse);
203                                 break;
204                         /* FIXME: translate EcmaWord, EcmaWhiteSpace into Bitmaps? EcmaWhiteSpace will fit very well with the IL engine */
205                         case Category.EcmaWord:
206                                 EmitOpNegateReverse (RxOp.CategoryEcmaWord, negate, reverse);
207                                 break;
208                         case Category.EcmaDigit:
209                                 EmitRange ('0', '9', negate, false, reverse);
210                                 break;
211                         case Category.EcmaWhiteSpace:
212                                 EmitOpNegateReverse (RxOp.CategoryEcmaWhiteSpace, negate, reverse);
213                                 break;
214                         case Category.UnicodeSpecials:
215                                 EmitOpNegateReverse (RxOp.CategoryUnicodeSpecials, negate, reverse);
216                                 break;
217                         // Unicode categories...
218                         // letter
219                         case Category.UnicodeLu: EmitUniCat (UnicodeCategory.UppercaseLetter, negate, reverse); break;
220                         case Category.UnicodeLl: EmitUniCat (UnicodeCategory.LowercaseLetter, negate, reverse); break;
221                         case Category.UnicodeLt: EmitUniCat (UnicodeCategory.TitlecaseLetter, negate, reverse); break;
222                         case Category.UnicodeLm: EmitUniCat (UnicodeCategory.ModifierLetter, negate, reverse); break;
223                         case Category.UnicodeLo: EmitUniCat (UnicodeCategory.OtherLetter, negate, reverse); break;
224                         // mark
225                         case Category.UnicodeMn: EmitUniCat (UnicodeCategory.NonSpacingMark, negate, reverse); break;
226                         case Category.UnicodeMe: EmitUniCat (UnicodeCategory.EnclosingMark, negate, reverse); break;
227                         case Category.UnicodeMc: EmitUniCat (UnicodeCategory.SpacingCombiningMark, negate, reverse); break;
228                         case Category.UnicodeNd: EmitUniCat (UnicodeCategory.DecimalDigitNumber, negate, reverse); break;
229                         // number
230                         case Category.UnicodeNl: EmitUniCat (UnicodeCategory.LetterNumber, negate, reverse); break;
231                         case Category.UnicodeNo: EmitUniCat (UnicodeCategory.OtherNumber, negate, reverse); break;
232                         // separator
233                         case Category.UnicodeZs: EmitUniCat (UnicodeCategory.SpaceSeparator, negate, reverse); break;
234                         case Category.UnicodeZl: EmitUniCat (UnicodeCategory.LineSeparator, negate, reverse); break;
235                         case Category.UnicodeZp: EmitUniCat (UnicodeCategory.ParagraphSeparator, negate, reverse); break;
236                         // punctuation
237                         case Category.UnicodePd: EmitUniCat (UnicodeCategory.DashPunctuation, negate, reverse); break;
238                         case Category.UnicodePs: EmitUniCat (UnicodeCategory.OpenPunctuation, negate, reverse); break;
239                         case Category.UnicodePi: EmitUniCat (UnicodeCategory.InitialQuotePunctuation, negate, reverse); break;
240                         case Category.UnicodePe: EmitUniCat (UnicodeCategory.ClosePunctuation, negate, reverse); break;
241                         case Category.UnicodePf: EmitUniCat (UnicodeCategory.FinalQuotePunctuation, negate, reverse); break;
242                         case Category.UnicodePc: EmitUniCat (UnicodeCategory.ConnectorPunctuation, negate, reverse); break;
243                         case Category.UnicodePo: EmitUniCat (UnicodeCategory.OtherPunctuation, negate, reverse); break;
244                         // symbol
245                         case Category.UnicodeSm: EmitUniCat (UnicodeCategory.MathSymbol, negate, reverse); break;
246                         case Category.UnicodeSc: EmitUniCat (UnicodeCategory.CurrencySymbol, negate, reverse); break;
247                         case Category.UnicodeSk: EmitUniCat (UnicodeCategory.ModifierSymbol, negate, reverse); break;
248                         case Category.UnicodeSo: EmitUniCat (UnicodeCategory.OtherSymbol, negate, reverse); break;
249                         // other
250                         case Category.UnicodeCc: EmitUniCat (UnicodeCategory.Control, negate, reverse); break;
251                         case Category.UnicodeCf: EmitUniCat (UnicodeCategory.Format, negate, reverse); break;
252                         case Category.UnicodeCo: EmitUniCat (UnicodeCategory.PrivateUse, negate, reverse); break;
253                         case Category.UnicodeCs: EmitUniCat (UnicodeCategory.Surrogate, negate, reverse); break;
254                         case Category.UnicodeCn: EmitUniCat (UnicodeCategory.OtherNotAssigned, negate, reverse); break; 
255                         // Unicode block ranges...
256                         case Category.UnicodeBasicLatin:
257                                 EmitRange ('\u0000', '\u007F', negate, false, reverse); break;
258                         case Category.UnicodeLatin1Supplement:
259                                 EmitRange ('\u0080', '\u00FF', negate, false, reverse); break;
260                         case Category.UnicodeLatinExtendedA:
261                                 EmitRange ('\u0100', '\u017F', negate, false, reverse); break;
262                         case Category.UnicodeLatinExtendedB:
263                                 EmitRange ('\u0180', '\u024F', negate, false, reverse); break;
264                         case Category.UnicodeIPAExtensions:
265                                 EmitRange ('\u0250', '\u02AF', negate, false, reverse); break;
266                         case Category.UnicodeSpacingModifierLetters:
267                                 EmitRange ('\u02B0', '\u02FF', negate, false, reverse); break;
268                         case Category.UnicodeCombiningDiacriticalMarks:
269                                 EmitRange ('\u0300', '\u036F', negate, false, reverse); break;
270                         case Category.UnicodeGreek:
271                                 EmitRange ('\u0370', '\u03FF', negate, false, reverse); break;
272                         case Category.UnicodeCyrillic:
273                                 EmitRange ('\u0400', '\u04FF', negate, false, reverse); break;
274                         case Category.UnicodeArmenian:
275                                 EmitRange ('\u0530', '\u058F', negate, false, reverse); break;
276                         case Category.UnicodeHebrew:
277                                 EmitRange ('\u0590', '\u05FF', negate, false, reverse); break;
278                         case Category.UnicodeArabic:
279                                 EmitRange ('\u0600', '\u06FF', negate, false, reverse); break;
280                         case Category.UnicodeSyriac:
281                                 EmitRange ('\u0700', '\u074F', negate, false, reverse); break;
282                         case Category.UnicodeThaana:
283                                 EmitRange ('\u0780', '\u07BF', negate, false, reverse); break;
284                         case Category.UnicodeDevanagari:
285                                 EmitRange ('\u0900', '\u097F', negate, false, reverse); break;
286                         case Category.UnicodeBengali:
287                                 EmitRange ('\u0980', '\u09FF', negate, false, reverse); break;
288                         case Category.UnicodeGurmukhi:
289                                 EmitRange ('\u0A00', '\u0A7F', negate, false, reverse); break;
290                         case Category.UnicodeGujarati:
291                                 EmitRange ('\u0A80', '\u0AFF', negate, false, reverse); break;
292                         case Category.UnicodeOriya:
293                                 EmitRange ('\u0B00', '\u0B7F', negate, false, reverse); break;
294                         case Category.UnicodeTamil:
295                                 EmitRange ('\u0B80', '\u0BFF', negate, false, reverse); break;
296                         case Category.UnicodeTelugu:
297                                 EmitRange ('\u0C00', '\u0C7F', negate, false, reverse); break;
298                         case Category.UnicodeKannada:
299                                 EmitRange ('\u0C80', '\u0CFF', negate, false, reverse); break;
300                         case Category.UnicodeMalayalam:
301                                 EmitRange ('\u0D00', '\u0D7F', negate, false, reverse); break;
302                         case Category.UnicodeSinhala:
303                                 EmitRange ('\u0D80', '\u0DFF', negate, false, reverse); break;
304                         case Category.UnicodeThai:
305                                 EmitRange ('\u0E00', '\u0E7F', negate, false, reverse); break;
306                         case Category.UnicodeLao:
307                                 EmitRange ('\u0E80', '\u0EFF', negate, false, reverse); break;
308                         case Category.UnicodeTibetan:
309                                 EmitRange ('\u0F00', '\u0FFF', negate, false, reverse); break;
310                         case Category.UnicodeMyanmar:
311                                 EmitRange ('\u1000', '\u109F', negate, false, reverse); break;
312                         case Category.UnicodeGeorgian:
313                                 EmitRange ('\u10A0', '\u10FF', negate, false, reverse); break;
314                         case Category.UnicodeHangulJamo:
315                                 EmitRange ('\u1100', '\u11FF', negate, false, reverse); break;
316                         case Category.UnicodeEthiopic:
317                                 EmitRange ('\u1200', '\u137F', negate, false, reverse); break;
318                         case Category.UnicodeCherokee:
319                                 EmitRange ('\u13A0', '\u13FF', negate, false, reverse); break;
320                         case Category.UnicodeUnifiedCanadianAboriginalSyllabics:
321                                 EmitRange ('\u1400', '\u167F', negate, false, reverse); break;
322                         case Category.UnicodeOgham:
323                                 EmitRange ('\u1680', '\u169F', negate, false, reverse); break;
324                         case Category.UnicodeRunic:
325                                 EmitRange ('\u16A0', '\u16FF', negate, false, reverse); break;
326                         case Category.UnicodeKhmer:
327                                 EmitRange ('\u1780', '\u17FF', negate, false, reverse); break;
328                         case Category.UnicodeMongolian:
329                                 EmitRange ('\u1800', '\u18AF', negate, false, reverse); break;
330                         case Category.UnicodeLatinExtendedAdditional:
331                                 EmitRange ('\u1E00', '\u1EFF', negate, false, reverse); break;
332                         case Category.UnicodeGreekExtended:
333                                 EmitRange ('\u1F00', '\u1FFF', negate, false, reverse); break;
334                         case Category.UnicodeGeneralPunctuation:
335                                 EmitRange ('\u2000', '\u206F', negate, false, reverse); break;
336                         case Category.UnicodeSuperscriptsandSubscripts:
337                                 EmitRange ('\u2070', '\u209F', negate, false, reverse); break;
338                         case Category.UnicodeCurrencySymbols:
339                                 EmitRange ('\u20A0', '\u20CF', negate, false, reverse); break;
340                         case Category.UnicodeCombiningMarksforSymbols:
341                                 EmitRange ('\u20D0', '\u20FF', negate, false, reverse); break;
342                         case Category.UnicodeLetterlikeSymbols:
343                                 EmitRange ('\u2100', '\u214F', negate, false, reverse); break;
344                         case Category.UnicodeNumberForms:
345                                 EmitRange ('\u2150', '\u218F', negate, false, reverse); break;
346                         case Category.UnicodeArrows:
347                                 EmitRange ('\u2190', '\u21FF', negate, false, reverse); break;
348                         case Category.UnicodeMathematicalOperators:
349                                 EmitRange ('\u2200', '\u22FF', negate, false, reverse); break;
350                         case Category.UnicodeMiscellaneousTechnical:
351                                 EmitRange ('\u2300', '\u23FF', negate, false, reverse); break;
352                         case Category.UnicodeControlPictures:
353                                 EmitRange ('\u2400', '\u243F', negate, false, reverse); break;
354                         case Category.UnicodeOpticalCharacterRecognition:
355                                 EmitRange ('\u2440', '\u245F', negate, false, reverse); break;
356                         case Category.UnicodeEnclosedAlphanumerics:
357                                 EmitRange ('\u2460', '\u24FF', negate, false, reverse); break;
358                         case Category.UnicodeBoxDrawing:
359                                 EmitRange ('\u2500', '\u257F', negate, false, reverse); break;
360                         case Category.UnicodeBlockElements:
361                                 EmitRange ('\u2580', '\u259F', negate, false, reverse); break;
362                         case Category.UnicodeGeometricShapes:
363                                 EmitRange ('\u25A0', '\u25FF', negate, false, reverse); break;
364                         case Category.UnicodeMiscellaneousSymbols:
365                                 EmitRange ('\u2600', '\u26FF', negate, false, reverse); break;
366                         case Category.UnicodeDingbats:
367                                 EmitRange ('\u2700', '\u27BF', negate, false, reverse); break;
368                         case Category.UnicodeBraillePatterns:
369                                 EmitRange ('\u2800', '\u28FF', negate, false, reverse); break;
370                         case Category.UnicodeCJKRadicalsSupplement:
371                                 EmitRange ('\u2E80', '\u2EFF', negate, false, reverse); break;
372                         case Category.UnicodeKangxiRadicals:
373                                 EmitRange ('\u2F00', '\u2FDF', negate, false, reverse); break;
374                         case Category.UnicodeIdeographicDescriptionCharacters:
375                                 EmitRange ('\u2FF0', '\u2FFF', negate, false, reverse); break;
376                         case Category.UnicodeCJKSymbolsandPunctuation:
377                                 EmitRange ('\u3000', '\u303F', negate, false, reverse); break;
378                         case Category.UnicodeHiragana:
379                                 EmitRange ('\u3040', '\u309F', negate, false, reverse); break;
380                         case Category.UnicodeKatakana:
381                                 EmitRange ('\u30A0', '\u30FF', negate, false, reverse); break;
382                         case Category.UnicodeBopomofo:
383                                 EmitRange ('\u3100', '\u312F', negate, false, reverse); break;
384                         case Category.UnicodeHangulCompatibilityJamo:
385                                 EmitRange ('\u3130', '\u318F', negate, false, reverse); break;
386                         case Category.UnicodeKanbun:
387                                 EmitRange ('\u3190', '\u319F', negate, false, reverse); break;
388                         case Category.UnicodeBopomofoExtended:
389                                 EmitRange ('\u31A0', '\u31BF', negate, false, reverse); break;
390                         case Category.UnicodeEnclosedCJKLettersandMonths:
391                                 EmitRange ('\u3200', '\u32FF', negate, false, reverse); break;
392                         case Category.UnicodeCJKCompatibility:
393                                 EmitRange ('\u3300', '\u33FF', negate, false, reverse); break;
394                         case Category.UnicodeCJKUnifiedIdeographsExtensionA:
395                                 EmitRange ('\u3400', '\u4DB5', negate, false, reverse); break;
396                         case Category.UnicodeCJKUnifiedIdeographs:
397                                 EmitRange ('\u4E00', '\u9FFF', negate, false, reverse); break;
398                         case Category.UnicodeYiSyllables:
399                                 EmitRange ('\uA000', '\uA48F', negate, false, reverse); break;
400                         case Category.UnicodeYiRadicals:
401                                 EmitRange ('\uA490', '\uA4CF', negate, false, reverse); break;
402                         case Category.UnicodeHangulSyllables:
403                                 EmitRange ('\uAC00', '\uD7A3', negate, false, reverse); break;
404                         case Category.UnicodeHighSurrogates:
405                                 EmitRange ('\uD800', '\uDB7F', negate, false, reverse); break;
406                         case Category.UnicodeHighPrivateUseSurrogates:
407                                 EmitRange ('\uDB80', '\uDBFF', negate, false, reverse); break;
408                         case Category.UnicodeLowSurrogates:
409                                 EmitRange ('\uDC00', '\uDFFF', negate, false, reverse); break;
410                         case Category.UnicodePrivateUse:
411                                 EmitRange ('\uE000', '\uF8FF', negate, false, reverse); break;
412                         case Category.UnicodeCJKCompatibilityIdeographs:
413                                 EmitRange ('\uF900', '\uFAFF', negate, false, reverse); break;
414                         case Category.UnicodeAlphabeticPresentationForms:
415                                 EmitRange ('\uFB00', '\uFB4F', negate, false, reverse); break;
416                         case Category.UnicodeArabicPresentationFormsA:
417                                 EmitRange ('\uFB50', '\uFDFF', negate, false, reverse); break;
418                         case Category.UnicodeCombiningHalfMarks:
419                                 EmitRange ('\uFE20', '\uFE2F', negate, false, reverse); break;
420                         case Category.UnicodeCJKCompatibilityForms:
421                                 EmitRange ('\uFE30', '\uFE4F', negate, false, reverse); break;
422                         case Category.UnicodeSmallFormVariants:
423                                 EmitRange ('\uFE50', '\uFE6F', negate, false, reverse); break;
424                         case Category.UnicodeArabicPresentationFormsB:
425                                 EmitRange ('\uFE70', '\uFEFE', negate, false, reverse); break;
426                         case Category.UnicodeHalfwidthandFullwidthForms:
427                                 EmitRange ('\uFF00', '\uFFEF', negate, false, reverse); break;
428
429                                 // Complex categories
430                         case Category.UnicodeL:
431                         case Category.UnicodeM:
432                         case Category.UnicodeN:
433                         case Category.UnicodeZ:
434                         case Category.UnicodeP:
435                         case Category.UnicodeS:
436                         case Category.UnicodeC:
437                                 EmitCatGeneral (cat, negate, reverse); break;
438
439                         default:
440                                 throw new NotImplementedException ("Missing category: " + cat);
441                         }
442                 }
443
444                 public void EmitNotCategory (Category cat, bool negate, bool reverse)
445                 {
446                         // not sure why the compiler needed this separate interface funtion
447                         if (negate) {
448                                 EmitCategory (cat, false, reverse);
449                         } else {
450                                 EmitCategory (cat, true, reverse);
451                         }
452                 }
453
454                 public void EmitRange (char lo, char hi, bool negate, bool ignore, bool reverse)
455                 {
456                         if (lo < 256 && hi < 256) {
457                                 EmitOp (RxOp.Range, negate, ignore, reverse);
458                                 Emit ((byte)lo);
459                                 Emit ((byte)hi);
460                         } else {
461                                 EmitOp (RxOp.UnicodeRange, negate, ignore, reverse);
462                                 Emit ((ushort)lo);
463                                 Emit ((ushort)hi);
464                         }
465                 }
466
467                 public void EmitSet (char lo, BitArray set, bool negate, bool ignore, bool reverse)
468                 {
469                         int len = (set.Length + 0x7) >> 3;
470                         if (lo < 256 && len < 256) {
471                                 EmitOp (RxOp.Bitmap, negate, ignore, reverse);
472                                 Emit ((byte)lo);
473                                 Emit ((byte)len);
474                         } else {
475                                 EmitOp (RxOp.UnicodeBitmap, negate, ignore, reverse);
476                                 Emit ((ushort)lo);
477                                 Emit ((ushort)len);
478                         }
479                         // emit the bitmap bytes
480                         int b = 0;
481                         while (len-- != 0) {
482                                 int word = 0;
483                                 for (int i = 0; i < 8; ++ i) {
484                                         if (b >= set.Length)
485                                                 break;
486                                         if (set [b ++])
487                                                 word |= 1 << i;
488                                 }
489                                 Emit ((byte)word);
490                         }
491                 }
492
493                 public void EmitString (string str, bool ignore, bool reverse)
494                 {
495                         bool islatin1 = false;
496                         int i;
497                         int offset = 0;
498                         if (ignore)
499                                 offset += 1;
500                         if (reverse)
501                                 offset += 2;
502                         if (ignore)
503                                 str = str.ToLower ();
504                         if (str.Length < 256) {
505                                 islatin1 = true;
506                                 for (i = 0; i < str.Length; ++i) {
507                                         if (str [i] >= 256) {
508                                                 islatin1 = false;
509                                                 break;
510                                         }
511                                 }
512                         }
513                         if (islatin1) {
514                                 EmitOpIgnoreReverse (RxOp.String, ignore, reverse);
515                                 Emit ((byte)str.Length);
516                                 for (i = 0; i < str.Length; ++i)
517                                         Emit ((byte)str [i]);
518                         } else {
519                                 EmitOpIgnoreReverse (RxOp.UnicodeString, ignore, reverse);
520                                 if (str.Length > ushort.MaxValue)
521                                         throw new NotSupportedException ();
522                                 Emit ((ushort)str.Length);
523                                 for (i = 0; i < str.Length; ++i)
524                                         Emit ((ushort)str [i]);
525                         }
526                 }
527
528                 public void EmitPosition (Position pos)
529                 {
530                         switch (pos) {
531                         case Position.Any:
532                                 Emit (RxOp.AnyPosition);
533                                 break;
534                         case Position.Start:
535                                 Emit (RxOp.StartOfString);
536                                 break;
537                         case Position.StartOfString:
538                                 Emit (RxOp.StartOfString);
539                                 break;
540                         case Position.StartOfLine:
541                                 Emit (RxOp.StartOfLine);
542                                 break;
543                         case Position.StartOfScan:
544                                 Emit (RxOp.StartOfScan);
545                                 break;
546                         case Position.End:
547                                 Emit (RxOp.End);
548                                 break;
549                         case Position.EndOfString:
550                                 Emit (RxOp.EndOfString);
551                                 break;
552                         case Position.EndOfLine:
553                                 Emit (RxOp.EndOfLine);
554                                 break;
555                         case Position.Boundary:
556                                 Emit (RxOp.WordBoundary);
557                                 break;
558                         case Position.NonBoundary:
559                                 Emit (RxOp.NoWordBoundary);
560                                 break;
561                         default:
562                                 throw new NotSupportedException ();
563                         }
564                 }
565
566                 public void EmitOpen (int gid)
567                 {
568                         if (gid > ushort.MaxValue)
569                                 throw new NotSupportedException ();
570                         Emit (RxOp.OpenGroup);
571                         Emit ((ushort)gid);
572                 }
573
574                 public void EmitClose (int gid)
575                 {
576                         if (gid > ushort.MaxValue)
577                                 throw new NotSupportedException ();
578                         Emit (RxOp.CloseGroup);
579                         Emit ((ushort)gid);
580                 }
581
582                 public void EmitBalanceStart(int gid, int balance, bool capture,  LinkRef tail)
583                 {
584                         BeginLink (tail);
585                         Emit (RxOp.BalanceStart);
586                         Emit ((ushort)gid);
587                         Emit ((ushort)balance);
588                         Emit ((byte)(capture ? 1 : 0));
589                         EmitLink (tail);
590                 }
591
592                 public void EmitBalance ()
593                 {
594                         Emit (RxOp.Balance);
595                 }
596
597                 public void EmitReference (int gid, bool ignore, bool reverse)
598                 {
599                         if (gid > ushort.MaxValue)
600                                 throw new NotSupportedException ();
601                         EmitOpIgnoreReverse (RxOp.Reference, ignore, reverse);
602                         Emit ((ushort)gid);
603                 }
604
605                 public void EmitIfDefined (int gid, LinkRef tail)
606                 {
607                         if (gid > ushort.MaxValue)
608                                 throw new NotSupportedException ();
609                         BeginLink (tail);
610                         Emit (RxOp.IfDefined);
611                         EmitLink (tail);
612                         Emit ((ushort)gid);
613                 }
614
615                 public void EmitSub (LinkRef tail)
616                 {
617                         BeginLink (tail);
618                         Emit (RxOp.SubExpression);
619                         EmitLink (tail);
620                 }
621
622                 public void EmitTest (LinkRef yes, LinkRef tail)
623                 {
624                         BeginLink (yes);
625                         BeginLink (tail);
626                         Emit (RxOp.Test);
627                         EmitLink (yes);
628                         EmitLink (tail);
629                 }
630
631                 public void EmitBranch (LinkRef next)
632                 {
633                         BeginLink (next);
634                         Emit (RxOp.Branch);
635                         EmitLink (next);
636                 }
637
638                 public void EmitJump (LinkRef target)
639                 {
640                         BeginLink (target);
641                         Emit (RxOp.Jump);
642                         EmitLink (target);
643                 }
644
645                 public void EmitIn (LinkRef tail)
646                 {
647                         // emitted for things like [\dabcfh]
648                         BeginLink (tail);
649                         Emit (RxOp.TestCharGroup);
650                         EmitLink (tail);
651                 }
652
653                 public void EmitRepeat (int min, int max, bool lazy, LinkRef until)
654                 {
655                         BeginLink (until);
656                         Emit (lazy ? RxOp.RepeatLazy : RxOp.Repeat);
657                         EmitLink (until);
658                         Emit (min);
659                         Emit (max);
660                 }
661
662                 public void EmitUntil (LinkRef repeat)
663                 {
664                         ResolveLink (repeat);
665                         Emit (RxOp.Until);
666                 }
667
668                 public void EmitInfo (int count, int min, int max)
669                 {
670                         Emit (RxOp.Info);
671                         if (count > ushort.MaxValue)
672                                 throw new NotSupportedException ();
673                         Emit ((ushort)count);
674                         Emit (min);
675                         Emit (max);
676                 }
677
678                 public void EmitFastRepeat (int min, int max, bool lazy, LinkRef tail)
679                 {
680                         BeginLink (tail);
681                         Emit (lazy ? RxOp.FastRepeatLazy : RxOp.FastRepeat);
682                         EmitLink (tail);
683                         Emit (min);
684                         Emit (max);
685                 }
686
687                 public void EmitAnchor (bool reverse, int offset, LinkRef tail)
688                 {
689                         BeginLink (tail);
690                         if (reverse)
691                                 Emit (RxOp.AnchorReverse);
692                         else
693                                 Emit (RxOp.Anchor);
694                         EmitLink (tail);
695                         if (offset > ushort.MaxValue)
696                                 throw new NotSupportedException ();
697                         Emit ((ushort)offset);
698                 }
699
700                 // event for the CILCompiler
701                 public void EmitBranchEnd ()
702                 {
703                 }
704
705                 public void EmitAlternationEnd ()
706                 {
707                 }
708
709                 public LinkRef NewLink ()
710                 {
711                         return new RxLinkRef ();
712                 }
713
714                 public void ResolveLink (LinkRef link)
715                 {
716                         RxLinkRef l = link as RxLinkRef;
717                         for (int i = 0; i < l.current; i += 2) {
718                                 int offset = curpos - l.offsets [i];
719                                 if (offset > ushort.MaxValue)
720                                         throw new NotSupportedException ();
721                                 int offsetpos = l.offsets [i + 1];
722                                 program [offsetpos] = (byte)offset;
723                                 program [offsetpos + 1] = (byte)(offset >> 8);
724                         }
725                 }
726
727         }
728
729         class RxInterpreterFactory : IMachineFactory {
730                 public RxInterpreterFactory (byte[] program, EvalDelegate eval_del) {
731                         this.program = program;
732                         this.eval_del = eval_del;
733                 }
734                 
735                 public IMachine NewInstance () {
736                         return new RxInterpreter (program, eval_del);
737                 }
738
739                 public int GroupCount {
740                         get { 
741                                 return (int)program [1] | ((int)program [2] << 8);
742                         }
743                 }
744
745                 public int Gap {
746                         get { return gap; }
747                         set { gap = value; }
748                 }
749
750                 public IDictionary Mapping {
751                         get { return mapping; }
752                         set { mapping = value; }
753                 }
754
755                 public string [] NamesMapping {
756                         get { return namesMapping; }
757                         set { namesMapping = value; }
758                 }
759
760                 private IDictionary mapping;
761                 private byte[] program;
762                 private EvalDelegate eval_del;
763                 private string[] namesMapping;
764                 private int gap;
765         }
766
767 }
768