2005-12-27 Atsushi Enomoto <atsushi@ximian.com>
[mono.git] / mcs / class / Commons.Xml.Relaxng / Commons.Xml.Relaxng / RelaxngGrammar.cs
1 //
2 // Commons.Xml.Relaxng.RelaxngGrammar.cs
3 //
4 // Author:
5 //      Atsushi Enomoto <ginga@kit.hi-ho.ne.jp>
6 //
7 // 2003 Atsushi Enomoto "No rights reserved."
8 //
9 // Copyright (c) 2004 Novell Inc.
10 // All rights reserved
11 //
12
13 //
14 // Permission is hereby granted, free of charge, to any person obtaining
15 // a copy of this software and associated documentation files (the
16 // "Software"), to deal in the Software without restriction, including
17 // without limitation the rights to use, copy, modify, merge, publish,
18 // distribute, sublicense, and/or sell copies of the Software, and to
19 // permit persons to whom the Software is furnished to do so, subject to
20 // the following conditions:
21 // 
22 // The above copyright notice and this permission notice shall be
23 // included in all copies or substantial portions of the Software.
24 // 
25 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
26 // EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
27 // MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
28 // NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
29 // LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
30 // OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
31 // WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
32 //
33 using System;
34 using System.Collections;
35 using System.IO;
36 using System.Net;
37 using System.Xml;
38 using Commons.Xml.Relaxng.Derivative;
39 using Commons.Xml.Relaxng.Rnc;
40
41 namespace Commons.Xml.Relaxng
42 {
43         public class RelaxngGrammar : RelaxngPattern
44         {
45                 // field
46                 public static string NamespaceURI =
47                         "http://relaxng.org/ns/structure/1.0";
48
49                 // object model fields
50                 string defaultNamespace;
51                 RelaxngGrammarContentList starts = new RelaxngGrammarContentList ();
52                 RelaxngGrammarContentList defs = new RelaxngGrammarContentList ();
53                 RelaxngGrammarContentList includes = new RelaxngGrammarContentList ();
54                 RelaxngGrammarContentList divs = new RelaxngGrammarContentList ();
55
56                 RelaxngDatatypeProvider provider;
57
58                 // compiled fields.
59                 RdpPattern startPattern;
60
61                 // compile cache fields.
62                 Hashtable assembledDefs = new Hashtable (); // [defName] = RelaxngDefine
63                 RelaxngPattern assembledStart;
64                 RdpPattern compiledStart;
65                 Hashtable elementReplacedDefs = new Hashtable ();
66
67                 Hashtable includedUris = new Hashtable ();
68                 RelaxngGrammar parentGrammar;
69                 Hashtable refPatterns = new Hashtable (); // key = RdpPattern of assembledDefs
70
71                 // only for checkRecursion()
72                 Hashtable checkedDefs = new Hashtable ();
73
74                 // this should be checked after its compilation finished to complete
75                 // missing-at-the-tracking patterns (especially of parent grammars).
76                 // key = RdpPattern, value = ArrayList of unresolvedPatterns.
77                 ArrayList unresolvedPatterns = new ArrayList ();
78
79                 // contents key = RdpElement and value = name of the parent define.
80                 private Hashtable ElementDefMap = new Hashtable ();
81
82                 // Public
83
84                 public RelaxngGrammar ()
85                 {
86                 }
87
88                 private void ResetCompileState ()
89                 {
90                         startPattern = null;
91                         assembledDefs.Clear ();
92                         assembledStart = null;
93                         compiledStart = null;
94                         elementReplacedDefs.Clear ();
95                         includedUris.Clear ();
96                         parentGrammar = null;
97                         refPatterns.Clear ();
98                         checkedDefs.Clear ();
99                         unresolvedPatterns.Clear ();
100                         ElementDefMap.Clear ();
101                 }
102
103                 internal RelaxngGrammar ParentGrammar {
104                         get { return parentGrammar; }
105                         set { parentGrammar = value; }
106                 }
107
108                 internal RelaxngDatatypeProvider Provider {
109                         get { return parentGrammar != null ? parentGrammar.Provider : provider; }
110                         set { provider = value; }
111                 }
112
113                 public override RelaxngPatternType PatternType {
114                         get { return RelaxngPatternType.Grammar; }
115                 }
116
117                 public string DefaultNamespace {
118                         get { return defaultNamespace; }
119                         set { defaultNamespace = value; }
120                 }
121
122                 public RelaxngGrammarContentList Starts {
123                         get { return starts; }
124                 }
125
126                 public RelaxngGrammarContentList Defines {
127                         get { return defs; }
128                 }
129
130                 public RelaxngGrammarContentList Includes {
131                         get { return includes; }
132                 }
133
134                 public RelaxngGrammarContentList Divs {
135                         get { return divs; }
136                 }
137
138                 public override void Write (XmlWriter writer)
139                 {
140                         writer.WriteStartElement ("", "grammar", RelaxngGrammar.NamespaceURI);
141                         if (defaultNamespace != null)
142                                 writer.WriteAttributeString ("ns", defaultNamespace);
143                         foreach (RelaxngStart start in Starts)
144                                 start.Write (writer);
145                         foreach (RelaxngDefine define in Defines)
146                                 define.Write (writer);
147                         foreach (RelaxngInclude include in Includes)
148                                 include.Write (writer);
149                         foreach (RelaxngDiv div in Divs)
150                                 div.Write (writer);
151                         writer.WriteEndElement ();
152                 }
153
154                 internal override void WriteRnc (RncWriter writer)
155                 {
156                         writer.WriteGrammar (this);
157                 }
158
159                 internal Hashtable IncludedUris {
160                         get { return includedUris; }
161                 }
162
163                 // Internal
164                 internal override void CheckConstraints ()
165                 {
166                         // do nothing here.
167                 }
168
169                 internal void CheckIncludeRecursion (string href)
170                 {
171                         if (this.includedUris [href] != null)
172                                 // FIXME: fill line info
173                                 throw new RelaxngException ("Include recursion found. href: " + href);
174                         if (parentGrammar != null)
175                                 parentGrammar.CheckIncludeRecursion (href);
176                 }
177
178                 // Compile from this simplified syntax to derivatives.
179                 internal override RdpPattern Compile (RelaxngGrammar grammar)
180                 {
181                         ResetCompileState ();
182
183                         parentGrammar = grammar;
184
185                         // First, process includes and divs. RELAX NG 4.1 - 4.15.
186                         ArrayList compiledDivs = new ArrayList ();
187                         foreach (RelaxngInclude inc in includes)
188                                 compiledDivs.Add (inc.Compile (this));
189                         compiledDivs.AddRange (divs);
190                         foreach (RelaxngDiv div in compiledDivs)
191                                 div.Compile (this);
192
193                         // Check constraints. RELAX NG 4.16
194                         foreach (RelaxngStart start in starts)
195                                 start.Pattern.CheckConstraints ();
196                         foreach (RelaxngDefine define in defs)
197                                 foreach (RelaxngPattern p in define.Patterns)
198                                         p.CheckConstraints ();
199
200                         // Assemble combine into the same name defines/start.
201                         // see RELAX NG 4.17.
202                         AssembleCombine ();
203
204                         // 4.18 : <grammar> must have at least one <start>.
205                         if (assembledStart == null)
206                                 throw new RelaxngException ("A grammar elements must contain at least one start element.");
207                         compiledStart = assembledStart.Compile (this);
208
209                         // Assemble all define components into top grammar and
210                         // return start patterns for descendant grammars.
211                         // see RELAX NG 4.18.
212                         CollectGrammars ();
213                         if (parentGrammar != null)
214                                 return compiledStart;
215                         assembledStart = null; // no use anymore
216
217                         // 4.19 (a) remove non-reachable defines
218 /*
219                         compiledStart.MarkReachableDefs ();
220                         ArrayList tmp = new ArrayList ();
221                         foreach (DictionaryEntry entry in this.assembledDefs)
222                                 if (!reachableDefines.ContainsKey (entry.Key))
223                                         tmp.Add (entry.Key);
224                         foreach (string key in tmp)
225                                 assembledDefs.Remove (key);
226 */
227                         // 4.19 (b) check illegal recursion
228                         CheckRecursion (compiledStart, 0);
229                         // here we collected element-replaced definitions
230                         foreach (DictionaryEntry entry in elementReplacedDefs)
231                                 assembledDefs.Add (entry.Key, entry.Value);
232                         startPattern = compiledStart;
233                         // 4.20,21 reduce notAllowed and empty.
234                         bool b;
235                         do {
236                                 b = false;
237                                 startPattern = startPattern.ReduceEmptyAndNotAllowed (ref b, new Hashtable ());
238                         } while (b);
239
240                         Hashtable ht = new Hashtable ();
241                         startPattern.setInternTable (ht);
242                         RdpNotAllowed.Instance.setInternTable (ht);
243                         RdpEmpty.Instance.setInternTable (ht);
244                         RdpText.Instance.setInternTable (ht);
245
246                         // Check Constraints: RELAX NG spec 7
247                         // 7.1.1-4, 7.3, 7.4
248                         startPattern.CheckConstraints (false, false, false, false, false, false);
249                         // 7.1.5
250                         CheckStartPatternContent (startPattern);
251
252                         // 4.19 (c) expandRef - actual replacement
253                         startPattern = compiledStart.ExpandRef (assembledDefs);
254
255                         // 7.2
256                         RdpContentType ct = startPattern.ContentType;
257
258                         // return its start pattern.
259                         IsCompiled = true;
260                         return startPattern;
261                 }
262
263                 private void CheckStartPatternContent (RdpPattern p)
264                 {
265                         switch (p.PatternType) {
266                         case RelaxngPatternType.Ref:
267                                 CheckStartPatternContent (((RdpUnresolvedRef) p).RefPattern);
268                                 break;
269                         case RelaxngPatternType.Element:
270                                 break;
271                         case RelaxngPatternType.Choice:
272                                 RdpChoice c = p as RdpChoice;
273                                 CheckStartPatternContent (c.LValue);
274                                 CheckStartPatternContent (c.RValue);
275                                 break;
276                         case RelaxngPatternType.NotAllowed:
277                                 break;
278                         default:
279                                 // FIXME: fill line info
280                                 throw new RelaxngException ("Start pattern contains an invalid content pattern.");
281                         }
282                 }
283
284                 Hashtable reachableDefines = new Hashtable ();
285
286                 // for step 4.19
287                 internal void MarkReacheableDefine (string name)
288                 {
289                         if (reachableDefines.ContainsKey (name))
290                                 return;
291                         RdpPattern p = assembledDefs [name] as RdpPattern;
292                         reachableDefines.Add (name, p);
293                         p.MarkReachableDefs ();
294                 }
295
296                 // 4.19 (b)
297                 private void CheckRecursion (RdpPattern p, int depth)
298                 {
299
300                         RdpAbstractBinary binary = p as RdpAbstractBinary;
301                         if (binary != null) {
302                                 // choice, interleave, group
303                                 CheckRecursion (binary.LValue, depth);
304                                 CheckRecursion (binary.RValue, depth);
305                                 return;
306                         }
307                         RdpAbstractSingleContent single = p as RdpAbstractSingleContent;
308                         if (single != null) {
309                                 CheckRecursion (single.Child, depth);
310                                 return;
311                         }
312
313                         switch (p.PatternType) {
314                         case RelaxngPatternType.Ref:
315                                 // get checkRecursionDepth from table.
316                                 int checkRecursionDepth = -1;
317                                 object checkedDepth = checkedDefs [p];
318                                 if (checkedDepth != null)
319                                         checkRecursionDepth = (int) checkedDepth;
320                                 // get refPattern
321                                 RdpUnresolvedRef pref = p as RdpUnresolvedRef;
322                                 RelaxngGrammar target = pref.TargetGrammar;
323                                 RdpPattern refPattern = pref.RefPattern;
324                                 if (refPattern == null)
325                                         // FIXME: fill line info
326                                         throw new RelaxngException ("No matching define found for " + pref.Name);
327
328                                 if (checkRecursionDepth == -1) {
329                                         checkedDefs [p] = depth;
330 /*test*/                                        if (refPattern.PatternType != RelaxngPatternType.Element)
331                                                 CheckRecursion (refPattern, depth);
332                                         checkedDefs [p] = -2;
333                                 }
334                                 else if (depth == checkRecursionDepth)
335                                         // FIXME: fill line info
336                                         throw new RelaxngException (String.Format ("Detected illegal recursion. Ref name is {0}.", pref.Name));
337
338                                 break;
339
340                         case RelaxngPatternType.Attribute:
341                                 CheckRecursion (((RdpAttribute) p).Children, depth);
342                                 break;
343
344                         case RelaxngPatternType.DataExcept:
345                                 CheckRecursion (((RdpDataExcept) p).Except, depth);
346                                 break;
347
348                         case RelaxngPatternType.Element:
349                                 RdpElement el = p as RdpElement;
350                                 CheckRecursion (el.Children, depth + 1);        // +1
351                                 break;
352                         case RelaxngPatternType.List:
353                                 CheckRecursion (((RdpList) p).Child, depth);
354                                 break;
355                         }
356                 }
357
358                 // 4.18
359                 private void CollectGrammars ()
360                 {
361                         // collect ref and parentRef for each define.
362
363                         // FIXME: This should be assembledStart.
364                         CheckReferences (compiledStart);
365                         FixupReference ();
366
367                         foreach (string name in assembledDefs.Keys) {
368                                 RdpPattern p = (RdpPattern) assembledDefs [name];
369                                 CheckReferences (p);
370                                 FixupReference ();
371                         }
372
373                         // If it is child of any other pattern:
374                         // * Remove all definitions under descendant grammars,
375                         //   replacing ref names, and
376                         // * Then return its start pattern.
377                         if (parentGrammar != null) {
378                                 // TODO: reachable check is incomplete.
379                                 foreach (string name in assembledDefs.Keys) {
380                                         ArrayList al = 
381                                                 refPatterns [assembledDefs [name] ] as ArrayList;
382                                         if (al == null)
383                                                 continue; // Not referenced.
384
385                                         // At this point, parent grammar doesn't 
386                                         // collect assembledDefs as yet
387                                         string uname = GetUniqueName (name, parentGrammar);
388                                         parentGrammar.assembledDefs [uname] = assembledDefs [name];
389                                 }
390                         }
391                 }
392
393                 private static string GetUniqueName (string name, RelaxngGrammar grammar)
394                 {
395                         foreach (RelaxngDefine def in grammar.Defines)
396                                 if (def.Name == name)
397                                         return GetUniqueName (name + '_', grammar);
398                         return name;
399                 }
400
401                 private void FixupReference ()
402                 {
403                         foreach (RdpUnresolvedRef pref in this.unresolvedPatterns) {
404                                 RdpPattern defP = assembledDefs [pref.Name] as RdpPattern;
405                                 if (defP == null)
406                                         // FIXME: fill line info
407                                         throw new RelaxngException (String.Format ("Target definition was not found: {0}", pref.Name));
408                                 ArrayList al = refPatterns [defP] as ArrayList;
409                                 if (al == null) {
410                                         al = new ArrayList ();
411                                         refPatterns [defP] = al;
412                                 }
413                                 al.Add (pref);
414                         }
415                         this.unresolvedPatterns.Clear ();
416                 }
417
418                 private void replaceDefines (string name, ArrayList al)
419                 {
420                         int idx = 0;
421                         while (true) {
422                                 string newName = "define" + idx;
423                                 if (parentGrammar.assembledDefs [newName] == null) {
424                                         parentGrammar.assembledDefs [newName] = 
425                                                 assembledDefs [name];
426                                         foreach (RdpUnresolvedRef pref in al)
427                                                 pref.Name = newName;
428                                         break;
429                                 }
430                                 idx++;
431                         }
432                 }
433
434                 // remove ref and parentRef.
435                 // add new defines for each elements.
436                 private void CheckReferences (RdpPattern p)
437                 {
438                         RdpAbstractBinary binary = p as RdpAbstractBinary;
439                         if (binary != null) {
440                                 // choice, interleave, group
441                                 CheckReferences (binary.LValue);
442                                 CheckReferences (binary.RValue);
443                                 return;
444                         }
445                         RdpAbstractSingleContent single = p as RdpAbstractSingleContent;
446                         if (single != null) {
447                                 CheckReferences (single.Child);
448                                 return;
449                         }
450
451                         switch (p.PatternType) {
452                         case RelaxngPatternType.Ref:
453                                 // FIXME: This should not re-expand ref
454                                 RdpUnresolvedRef pref = p as RdpUnresolvedRef;
455                                 if (pref.RefPattern != null)
456                                         break;
457
458                                 RelaxngGrammar target = pref.TargetGrammar;
459                                 if (target == null)
460                                         // FIXME: fill line info
461                                         throw new RelaxngException ("Referenced definition was not found.");
462                                 RdpPattern defP = target.assembledDefs [pref.Name] as RdpPattern;
463                                 if (defP == null)
464                                         target.unresolvedPatterns.Add (p);
465                                 else {
466                                         ArrayList al = target.refPatterns [defP] as ArrayList;
467                                         if (al == null) {
468                                                 al = new ArrayList ();
469                                                 target.refPatterns [defP] = al;
470                                         }
471                                         al.Add (p);
472                                         pref.RefPattern = defP;
473                                 }
474                                 break;
475
476                         case RelaxngPatternType.Attribute:
477                                 CheckReferences (((RdpAttribute) p).Children);
478                                 break;
479
480                         case RelaxngPatternType.DataExcept:
481                                 CheckReferences (((RdpDataExcept) p).Except);
482                                 break;
483
484                         case RelaxngPatternType.Element:
485                                 RdpElement el = p as RdpElement;
486                                 CheckReferences (el.Children);
487                                 string name = ElementDefMap [el] as string;
488                                 if (name == null) {
489                                         // add new define
490                                         int idx = 0;
491                                         string newName = "element0";
492                                         if (el.NameClass is RdpName)
493                                                 newName = ((RdpName) el.NameClass).LocalName;
494                                         while (true) {
495                                                 if (assembledDefs [newName] == null) {
496                                                         elementReplacedDefs [newName] = el.Children;
497                                                         break;
498                                                 }
499                                                 newName = "element" + ++idx;
500                                         }
501                                         ElementDefMap [el] = newName;
502                                 }
503                                 // Even though the element is replaced with ref,
504                                 // derivative of ref is RdpElement in fact...
505                                 break;
506
507                         case RelaxngPatternType.List:
508                                 CheckReferences (((RdpList) p).Child);
509                                 break;
510
511                         case RelaxngPatternType.Empty:
512                         case RelaxngPatternType.NotAllowed:
513                         case RelaxngPatternType.Text:
514                         case RelaxngPatternType.Value:
515                                 break;
516
517                         //case RelaxngPatternType.ExternalRef:
518                         //case RelaxngPatternType.Include:
519                         // Mixed, Optional, ZeroOrMore are already removed.
520                         // Choice, Group, Interleave, OneOrMore are already proceeded.
521                         }
522                 }
523
524                 #region 4.17 - Combine
525                 private void AssembleCombine ()
526                 {
527                         // calculate combines.
528                         bool haveHeadStart = false;
529                         string combineStart = null;
530                         Hashtable haveHeadDefs = new Hashtable ();
531                         Hashtable combineDefs = new Hashtable ();
532
533                         // 1.calculate combine for starts.
534                         foreach (RelaxngStart start in starts)
535                                 CheckCombine (ref haveHeadStart, 
536                                         ref combineStart, start.Combine, "start");
537                         // 2.calculate combine for defines.
538                         foreach (RelaxngDefine def in defs) {
539                                 bool haveHead = 
540                                         haveHeadDefs.ContainsKey (def.Name) ?
541                                         haveHead = (bool) haveHeadDefs [def.Name]
542                                         : false;
543                                 string combine = combineDefs [def.Name] as string;
544                                 CheckCombine (ref haveHead, ref combine,
545                                         def.Combine, String.Format ("define name={0}", def.Name));
546                                 haveHeadDefs [def.Name] = haveHead;
547                                 combineDefs [def.Name] = combine;
548                                 continue;
549                         }
550
551                         // assemble starts and defines with "combine" attribute.
552
553                         // 3.assemble starts.
554                         if (starts.Count == 0) {
555                                 if (ParentGrammar == null)
556                                         throw new RelaxngException (this, "grammar must have at least one start component.");
557                         } else {
558                                 assembledStart = ((RelaxngStart)starts [0]).Pattern;
559                                 for (int i=1; i<starts.Count; i++) {
560                                         RelaxngPattern p2 = ((RelaxngStart) starts [i]).Pattern;;
561                                         if (combineStart == "interleave") {
562                                                 RelaxngInterleave intlv = new RelaxngInterleave ();
563                                                 intlv.Patterns.Add (assembledStart);
564                                                 intlv.Patterns.Add (p2);
565                                                 assembledStart = intlv;
566                                         } else {
567                                                 RelaxngChoice c = new RelaxngChoice ();
568                                                 c.Patterns.Add (assembledStart);
569                                                 c.Patterns.Add (p2);
570                                                 assembledStart = c;
571                                         }
572                                 }
573                         }
574
575                         // 4.assemble defines
576                         foreach (RelaxngDefine def in defs) {
577                                 string combine = combineDefs [def.Name] as string;
578                                 RdpPattern p1 = 
579                                         assembledDefs [def.Name] as RdpPattern;
580                                 RdpPattern p2 = def.Compile (this);
581                                 if (p1 != null) {
582                                         if (combine == "interleave") {
583                                                 assembledDefs [def.Name] =
584                                                         new RdpInterleave (p1, p2);
585                                         } else {
586                                                 assembledDefs [def.Name] =
587                                                         new RdpChoice (p1, p2);
588                                         }
589                                 } else {
590                                         assembledDefs [def.Name] = p2;
591                                 }
592                         }
593
594                 }
595
596                 // check combine attributes.
597                 private void CheckCombine (ref bool haveHead, ref string combine, string newCombine, string targetSpec)
598                 {
599                         switch (newCombine) {
600                         case "interleave":
601                                 if (combine == "choice")
602                                         throw new RelaxngException (this, "\"combine\" was already specified \"choice\"");
603                                 else
604                                         combine = "interleave";
605                                 break;
606                         case "choice":
607                                 if (combine == "interleave")
608                                         throw new RelaxngException (this, "\"combine\" was already specified \"interleave\"");
609                                 else
610                                         combine = "choice";
611                                 break;
612                         case null:
613                                 if (haveHead)
614                                         throw new RelaxngException (this, String.Format ("There was already \"{0}\" element without \"combine\" attribute.", targetSpec));
615                                 haveHead = true;
616                                 break;
617                         }
618                 }
619                 #endregion
620         }
621 }