merge -r 60439:60440
[mono.git] / mcs / class / Commons.Xml.Relaxng / Commons.Xml.Relaxng / RelaxngGrammar.cs
1 //
2 // Commons.Xml.Relaxng.RelaxngGrammar.cs
3 //
4 // Author:
5 //      Atsushi Enomoto <ginga@kit.hi-ho.ne.jp>
6 //
7 // 2003 Atsushi Enomoto "No rights reserved."
8 //
9 // Copyright (c) 2004 Novell Inc.
10 // All rights reserved
11 //
12
13 //
14 // Permission is hereby granted, free of charge, to any person obtaining
15 // a copy of this software and associated documentation files (the
16 // "Software"), to deal in the Software without restriction, including
17 // without limitation the rights to use, copy, modify, merge, publish,
18 // distribute, sublicense, and/or sell copies of the Software, and to
19 // permit persons to whom the Software is furnished to do so, subject to
20 // the following conditions:
21 // 
22 // The above copyright notice and this permission notice shall be
23 // included in all copies or substantial portions of the Software.
24 // 
25 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
26 // EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
27 // MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
28 // NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
29 // LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
30 // OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
31 // WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
32 //
33 using System;
34 using System.Collections;
35 using System.IO;
36 using System.Net;
37 using System.Xml;
38 using Commons.Xml.Relaxng.Derivative;
39 using Commons.Xml.Relaxng.Rnc;
40
41 namespace Commons.Xml.Relaxng
42 {
43         public class RelaxngGrammar : RelaxngPattern
44         {
45                 // field
46                 public static string NamespaceURI =
47                         "http://relaxng.org/ns/structure/1.0";
48
49                 // object model fields
50                 string defaultNamespace;
51                 RelaxngGrammarContentList starts = new RelaxngGrammarContentList ();
52                 RelaxngGrammarContentList defs = new RelaxngGrammarContentList ();
53                 RelaxngGrammarContentList includes = new RelaxngGrammarContentList ();
54                 RelaxngGrammarContentList divs = new RelaxngGrammarContentList ();
55
56                 RelaxngDatatypeProvider provider;
57
58                 // compiled fields.
59                 RdpPattern startPattern;
60
61                 // compile cache fields.
62                 Hashtable assembledDefs = new Hashtable (); // [defName] = RelaxngDefine
63                 RelaxngPattern assembledStart;
64                 RdpPattern compiledStart;
65                 Hashtable elementReplacedDefs = new Hashtable ();
66
67                 Hashtable includedUris = new Hashtable ();
68                 RelaxngGrammar parentGrammar;
69                 Hashtable refPatterns = new Hashtable (); // key = RdpPattern of assembledDefs
70
71                 // only for checkRecursion()
72                 Hashtable checkedDefs = new Hashtable ();
73
74                 // this should be checked after its compilation finished to complete
75                 // missing-at-the-tracking patterns (especially of parent grammars).
76                 // key = RdpPattern, value = ArrayList of unresolvedPatterns.
77                 ArrayList unresolvedPatterns = new ArrayList ();
78
79                 // contents key = RdpElement and value = name of the parent define.
80                 private Hashtable ElementDefMap = new Hashtable ();
81
82                 // Public
83
84                 public RelaxngGrammar ()
85                 {
86                 }
87
88                 private void ResetCompileState ()
89                 {
90                         startPattern = null;
91                         assembledDefs.Clear ();
92                         assembledStart = null;
93                         compiledStart = null;
94                         elementReplacedDefs.Clear ();
95                         includedUris.Clear ();
96                         parentGrammar = null;
97                         refPatterns.Clear ();
98                         checkedDefs.Clear ();
99                         unresolvedPatterns.Clear ();
100                         ElementDefMap.Clear ();
101                 }
102
103                 internal RelaxngGrammar ParentGrammar {
104                         get { return parentGrammar; }
105                         set { parentGrammar = value; }
106                 }
107
108                 internal RelaxngDatatypeProvider Provider {
109                         get { return parentGrammar != null ? parentGrammar.Provider : provider; }
110                         set { provider = value; }
111                 }
112
113                 public override RelaxngPatternType PatternType {
114                         get { return RelaxngPatternType.Grammar; }
115                 }
116
117                 public string DefaultNamespace {
118                         get { return defaultNamespace; }
119                         set { defaultNamespace = value; }
120                 }
121
122                 public RelaxngGrammarContentList Starts {
123                         get { return starts; }
124                 }
125
126                 public RelaxngGrammarContentList Defines {
127                         get { return defs; }
128                 }
129
130                 public RelaxngGrammarContentList Includes {
131                         get { return includes; }
132                 }
133
134                 public RelaxngGrammarContentList Divs {
135                         get { return divs; }
136                 }
137
138                 public override void Write (XmlWriter writer)
139                 {
140                         writer.WriteStartElement ("", "grammar", RelaxngGrammar.NamespaceURI);
141                         if (defaultNamespace != null)
142                                 writer.WriteAttributeString ("ns", defaultNamespace);
143                         foreach (RelaxngStart start in Starts)
144                                 start.Write (writer);
145                         foreach (RelaxngDefine define in Defines)
146                                 define.Write (writer);
147                         foreach (RelaxngInclude include in Includes)
148                                 include.Write (writer);
149                         foreach (RelaxngDiv div in Divs)
150                                 div.Write (writer);
151                         writer.WriteEndElement ();
152                 }
153
154                 internal override void WriteRnc (RncWriter writer)
155                 {
156                         writer.WriteGrammar (this);
157                 }
158
159                 internal Hashtable IncludedUris {
160                         get { return includedUris; }
161                 }
162
163                 // Internal
164                 internal override void CheckConstraints ()
165                 {
166                         // do nothing here.
167                 }
168
169                 internal void CheckIncludeRecursion (string href)
170                 {
171                         if (this.includedUris [href] != null)
172                                 // FIXME: fill line info
173                                 throw new RelaxngException ("Include recursion found. href: " + href);
174                         if (parentGrammar != null)
175                                 parentGrammar.CheckIncludeRecursion (href);
176                 }
177
178                 // Compile from this simplified syntax to derivatives.
179                 internal override RdpPattern Compile (RelaxngGrammar grammar)
180                 {
181                         ResetCompileState ();
182
183                         parentGrammar = grammar;
184
185                         // First, process includes and divs. RELAX NG 4.1 - 4.15.
186                         ArrayList compiledDivs = new ArrayList ();
187                         foreach (RelaxngInclude inc in includes)
188                                 compiledDivs.Add (inc.Compile (this));
189                         compiledDivs.AddRange (divs);
190                         foreach (RelaxngDiv div in compiledDivs)
191                                 div.Compile (this);
192
193                         // Check constraints. RELAX NG 4.16
194                         foreach (RelaxngStart start in starts)
195                                 start.Pattern.CheckConstraints ();
196                         foreach (RelaxngDefine define in defs)
197                                 foreach (RelaxngPattern p in define.Patterns)
198                                         p.CheckConstraints ();
199
200                         // Assemble combine into the same name defines/start.
201                         // see RELAX NG 4.17.
202                         AssembleCombine ();
203
204                         // 4.18 : <grammar> must have at least one <start>.
205                         if (assembledStart == null)
206                                 throw new RelaxngException ("A grammar elements must contain at least one start element.");
207                         compiledStart = assembledStart.Compile (this);
208
209                         // Assemble all define components into top grammar and
210                         // return start patterns for descendant grammars.
211                         // see RELAX NG 4.18.
212                         CollectGrammars ();
213                         if (parentGrammar != null)
214                                 return compiledStart;
215                         assembledStart = null; // no use anymore
216
217                         // 4.19 (a) remove non-reachable defines
218 /*
219                         compiledStart.MarkReachableDefs ();
220                         ArrayList tmp = new ArrayList ();
221                         foreach (DictionaryEntry entry in this.assembledDefs)
222                                 if (!reachableDefines.ContainsKey (entry.Key))
223                                         tmp.Add (entry.Key);
224                         foreach (string key in tmp)
225                                 assembledDefs.Remove (key);
226 */
227                         // 4.19 (b) check illegal recursion
228                         CheckRecursion (compiledStart, 0);
229                         // here we collected element-replaced definitions
230                         foreach (DictionaryEntry entry in elementReplacedDefs)
231                                 assembledDefs.Add (entry.Key, entry.Value);
232                         startPattern = compiledStart;
233                         // 4.20,21 reduce notAllowed and empty.
234                         bool b;
235                         do {
236                                 b = false;
237                                 startPattern = startPattern.ReduceEmptyAndNotAllowed (ref b, new Hashtable ());
238                         } while (b);
239
240                         Hashtable ht = new Hashtable ();
241                         startPattern.setInternTable (ht);
242
243                         // Check Constraints: RELAX NG spec 7
244                         // 7.1.1-4, 7.3, 7.4
245                         startPattern.CheckConstraints (false, false, false, false, false, false);
246                         // 7.1.5
247                         CheckStartPatternContent (startPattern);
248
249                         // 4.19 (c) expandRef - actual replacement
250                         startPattern = compiledStart.ExpandRef (assembledDefs);
251
252                         // 7.2
253                         RdpContentType ct = startPattern.ContentType;
254
255                         // return its start pattern.
256                         IsCompiled = true;
257                         return startPattern;
258                 }
259
260                 private void CheckStartPatternContent (RdpPattern p)
261                 {
262                         switch (p.PatternType) {
263                         case RelaxngPatternType.Ref:
264                                 CheckStartPatternContent (((RdpUnresolvedRef) p).RefPattern);
265                                 break;
266                         case RelaxngPatternType.Element:
267                                 break;
268                         case RelaxngPatternType.Choice:
269                                 RdpChoice c = p as RdpChoice;
270                                 CheckStartPatternContent (c.LValue);
271                                 CheckStartPatternContent (c.RValue);
272                                 break;
273                         case RelaxngPatternType.NotAllowed:
274                                 break;
275                         default:
276                                 // FIXME: fill line info
277                                 throw new RelaxngException ("Start pattern contains an invalid content pattern.");
278                         }
279                 }
280
281                 Hashtable reachableDefines = new Hashtable ();
282
283                 // for step 4.19
284                 internal void MarkReacheableDefine (string name)
285                 {
286                         if (reachableDefines.ContainsKey (name))
287                                 return;
288                         RdpPattern p = assembledDefs [name] as RdpPattern;
289                         reachableDefines.Add (name, p);
290                         p.MarkReachableDefs ();
291                 }
292
293                 // 4.19 (b)
294                 private void CheckRecursion (RdpPattern p, int depth)
295                 {
296
297                         RdpAbstractBinary binary = p as RdpAbstractBinary;
298                         if (binary != null) {
299                                 // choice, interleave, group
300                                 CheckRecursion (binary.LValue, depth);
301                                 CheckRecursion (binary.RValue, depth);
302                                 return;
303                         }
304                         RdpAbstractSingleContent single = p as RdpAbstractSingleContent;
305                         if (single != null) {
306                                 CheckRecursion (single.Child, depth);
307                                 return;
308                         }
309
310                         switch (p.PatternType) {
311                         case RelaxngPatternType.Ref:
312                                 // get checkRecursionDepth from table.
313                                 int checkRecursionDepth = -1;
314                                 object checkedDepth = checkedDefs [p];
315                                 if (checkedDepth != null)
316                                         checkRecursionDepth = (int) checkedDepth;
317                                 // get refPattern
318                                 RdpUnresolvedRef pref = p as RdpUnresolvedRef;
319                                 RelaxngGrammar target = pref.TargetGrammar;
320                                 RdpPattern refPattern = pref.RefPattern;
321                                 if (refPattern == null)
322                                         // FIXME: fill line info
323                                         throw new RelaxngException ("No matching define found for " + pref.Name);
324
325                                 if (checkRecursionDepth == -1) {
326                                         checkedDefs [p] = depth;
327 /*test*/                                        if (refPattern.PatternType != RelaxngPatternType.Element)
328                                                 CheckRecursion (refPattern, depth);
329                                         checkedDefs [p] = -2;
330                                 }
331                                 else if (depth == checkRecursionDepth)
332                                         // FIXME: fill line info
333                                         throw new RelaxngException (String.Format ("Detected illegal recursion. Ref name is {0}.", pref.Name));
334
335                                 break;
336
337                         case RelaxngPatternType.Attribute:
338                                 CheckRecursion (((RdpAttribute) p).Children, depth);
339                                 break;
340
341                         case RelaxngPatternType.DataExcept:
342                                 CheckRecursion (((RdpDataExcept) p).Except, depth);
343                                 break;
344
345                         case RelaxngPatternType.Element:
346                                 RdpElement el = p as RdpElement;
347                                 CheckRecursion (el.Children, depth + 1);        // +1
348                                 break;
349                         case RelaxngPatternType.List:
350                                 CheckRecursion (((RdpList) p).Child, depth);
351                                 break;
352                         }
353                 }
354
355                 // 4.18
356                 private void CollectGrammars ()
357                 {
358                         // collect ref and parentRef for each define.
359
360                         // FIXME: This should be assembledStart.
361                         CheckReferences (compiledStart);
362                         FixupReference ();
363
364                         foreach (string name in assembledDefs.Keys) {
365                                 RdpPattern p = (RdpPattern) assembledDefs [name];
366                                 CheckReferences (p);
367                                 FixupReference ();
368                         }
369
370                         // If it is child of any other pattern:
371                         // * Remove all definitions under descendant grammars,
372                         //   replacing ref names, and
373                         // * Then return its start pattern.
374                         if (parentGrammar != null) {
375                                 // TODO: reachable check is incomplete.
376                                 foreach (string name in assembledDefs.Keys) {
377                                         ArrayList al = 
378                                                 refPatterns [assembledDefs [name] ] as ArrayList;
379                                         if (al == null)
380                                                 continue; // Not referenced.
381
382                                         // At this point, parent grammar doesn't 
383                                         // collect assembledDefs as yet
384                                         string uname = GetUniqueName (name, parentGrammar);
385                                         parentGrammar.assembledDefs [uname] = assembledDefs [name];
386                                 }
387                         }
388                 }
389
390                 private static string GetUniqueName (string name, RelaxngGrammar grammar)
391                 {
392                         foreach (RelaxngDefine def in grammar.Defines)
393                                 if (def.Name == name)
394                                         return GetUniqueName (name + '_', grammar);
395                         return name;
396                 }
397
398                 private void FixupReference ()
399                 {
400                         foreach (RdpUnresolvedRef pref in this.unresolvedPatterns) {
401                                 RdpPattern defP = assembledDefs [pref.Name] as RdpPattern;
402                                 if (defP == null)
403                                         // FIXME: fill line info
404                                         throw new RelaxngException (String.Format ("Target definition was not found: {0}", pref.Name));
405                                 ArrayList al = refPatterns [defP] as ArrayList;
406                                 if (al == null) {
407                                         al = new ArrayList ();
408                                         refPatterns [defP] = al;
409                                 }
410                                 al.Add (pref);
411                         }
412                         this.unresolvedPatterns.Clear ();
413                 }
414
415                 private void replaceDefines (string name, ArrayList al)
416                 {
417                         int idx = 0;
418                         while (true) {
419                                 string newName = "define" + idx;
420                                 if (parentGrammar.assembledDefs [newName] == null) {
421                                         parentGrammar.assembledDefs [newName] = 
422                                                 assembledDefs [name];
423                                         foreach (RdpUnresolvedRef pref in al)
424                                                 pref.Name = newName;
425                                         break;
426                                 }
427                                 idx++;
428                         }
429                 }
430
431                 // remove ref and parentRef.
432                 // add new defines for each elements.
433                 private void CheckReferences (RdpPattern p)
434                 {
435                         RdpAbstractBinary binary = p as RdpAbstractBinary;
436                         if (binary != null) {
437                                 // choice, interleave, group
438                                 CheckReferences (binary.LValue);
439                                 CheckReferences (binary.RValue);
440                                 return;
441                         }
442                         RdpAbstractSingleContent single = p as RdpAbstractSingleContent;
443                         if (single != null) {
444                                 CheckReferences (single.Child);
445                                 return;
446                         }
447
448                         switch (p.PatternType) {
449                         case RelaxngPatternType.Ref:
450                                 // FIXME: This should not re-expand ref
451                                 RdpUnresolvedRef pref = p as RdpUnresolvedRef;
452                                 if (pref.RefPattern != null)
453                                         break;
454
455                                 RelaxngGrammar target = pref.TargetGrammar;
456                                 if (target == null)
457                                         // FIXME: fill line info
458                                         throw new RelaxngException ("Referenced definition was not found.");
459                                 RdpPattern defP = target.assembledDefs [pref.Name] as RdpPattern;
460                                 if (defP == null)
461                                         target.unresolvedPatterns.Add (p);
462                                 else {
463                                         ArrayList al = target.refPatterns [defP] as ArrayList;
464                                         if (al == null) {
465                                                 al = new ArrayList ();
466                                                 target.refPatterns [defP] = al;
467                                         }
468                                         al.Add (p);
469                                         pref.RefPattern = defP;
470                                 }
471                                 break;
472
473                         case RelaxngPatternType.Attribute:
474                                 CheckReferences (((RdpAttribute) p).Children);
475                                 break;
476
477                         case RelaxngPatternType.DataExcept:
478                                 CheckReferences (((RdpDataExcept) p).Except);
479                                 break;
480
481                         case RelaxngPatternType.Element:
482                                 RdpElement el = p as RdpElement;
483                                 CheckReferences (el.Children);
484                                 string name = ElementDefMap [el] as string;
485                                 if (name == null) {
486                                         // add new define
487                                         int idx = 0;
488                                         string newName = "element0";
489                                         if (el.NameClass is RdpName)
490                                                 newName = ((RdpName) el.NameClass).LocalName;
491                                         while (true) {
492                                                 if (assembledDefs [newName] == null) {
493                                                         elementReplacedDefs [newName] = el.Children;
494                                                         break;
495                                                 }
496                                                 newName = "element" + ++idx;
497                                         }
498                                         ElementDefMap [el] = newName;
499                                 }
500                                 // Even though the element is replaced with ref,
501                                 // derivative of ref is RdpElement in fact...
502                                 break;
503
504                         case RelaxngPatternType.List:
505                                 CheckReferences (((RdpList) p).Child);
506                                 break;
507
508                         case RelaxngPatternType.Empty:
509                         case RelaxngPatternType.NotAllowed:
510                         case RelaxngPatternType.Text:
511                         case RelaxngPatternType.Value:
512                                 break;
513
514                         //case RelaxngPatternType.ExternalRef:
515                         //case RelaxngPatternType.Include:
516                         // Mixed, Optional, ZeroOrMore are already removed.
517                         // Choice, Group, Interleave, OneOrMore are already proceeded.
518                         }
519                 }
520
521                 #region 4.17 - Combine
522                 private void AssembleCombine ()
523                 {
524                         // calculate combines.
525                         bool haveHeadStart = false;
526                         string combineStart = null;
527                         Hashtable haveHeadDefs = new Hashtable ();
528                         Hashtable combineDefs = new Hashtable ();
529
530                         // 1.calculate combine for starts.
531                         foreach (RelaxngStart start in starts)
532                                 CheckCombine (ref haveHeadStart, 
533                                         ref combineStart, start.Combine, "start");
534                         // 2.calculate combine for defines.
535                         foreach (RelaxngDefine def in defs) {
536                                 bool haveHead = 
537                                         haveHeadDefs.ContainsKey (def.Name) ?
538                                         haveHead = (bool) haveHeadDefs [def.Name]
539                                         : false;
540                                 string combine = combineDefs [def.Name] as string;
541                                 CheckCombine (ref haveHead, ref combine,
542                                         def.Combine, String.Format ("define name={0}", def.Name));
543                                 haveHeadDefs [def.Name] = haveHead;
544                                 combineDefs [def.Name] = combine;
545                                 continue;
546                         }
547
548                         // assemble starts and defines with "combine" attribute.
549
550                         // 3.assemble starts.
551                         if (starts.Count == 0) {
552                                 if (ParentGrammar == null)
553                                         throw new RelaxngException (this, "grammar must have at least one start component.");
554                         } else {
555                                 assembledStart = ((RelaxngStart)starts [0]).Pattern;
556                                 for (int i=1; i<starts.Count; i++) {
557                                         RelaxngPattern p2 = ((RelaxngStart) starts [i]).Pattern;;
558                                         if (combineStart == "interleave") {
559                                                 RelaxngInterleave intlv = new RelaxngInterleave ();
560                                                 intlv.Patterns.Add (assembledStart);
561                                                 intlv.Patterns.Add (p2);
562                                                 assembledStart = intlv;
563                                         } else {
564                                                 RelaxngChoice c = new RelaxngChoice ();
565                                                 c.Patterns.Add (assembledStart);
566                                                 c.Patterns.Add (p2);
567                                                 assembledStart = c;
568                                         }
569                                 }
570                         }
571
572                         // 4.assemble defines
573                         foreach (RelaxngDefine def in defs) {
574                                 string combine = combineDefs [def.Name] as string;
575                                 RdpPattern p1 = 
576                                         assembledDefs [def.Name] as RdpPattern;
577                                 RdpPattern p2 = def.Compile (this);
578                                 if (p1 != null) {
579                                         if (combine == "interleave") {
580                                                 assembledDefs [def.Name] =
581                                                         new RdpInterleave (p1, p2);
582                                         } else {
583                                                 assembledDefs [def.Name] =
584                                                         new RdpChoice (p1, p2);
585                                         }
586                                 } else {
587                                         assembledDefs [def.Name] = p2;
588                                 }
589                         }
590
591                 }
592
593                 // check combine attributes.
594                 private void CheckCombine (ref bool haveHead, ref string combine, string newCombine, string targetSpec)
595                 {
596                         switch (newCombine) {
597                         case "interleave":
598                                 if (combine == "choice")
599                                         throw new RelaxngException (this, "\"combine\" was already specified \"choice\"");
600                                 else
601                                         combine = "interleave";
602                                 break;
603                         case "choice":
604                                 if (combine == "interleave")
605                                         throw new RelaxngException (this, "\"combine\" was already specified \"interleave\"");
606                                 else
607                                         combine = "choice";
608                                 break;
609                         case null:
610                                 if (haveHead)
611                                         throw new RelaxngException (this, String.Format ("There was already \"{0}\" element without \"combine\" attribute.", targetSpec));
612                                 haveHead = true;
613                                 break;
614                         }
615                 }
616                 #endregion
617         }
618 }