2 // Commons.Xml.Relaxng.RelaxngGrammar.cs
5 // Atsushi Enomoto <ginga@kit.hi-ho.ne.jp>
7 // 2003 Atsushi Enomoto "No rights reserved."
9 // Copyright (c) 2004 Novell Inc.
10 // All rights reserved
14 // Permission is hereby granted, free of charge, to any person obtaining
15 // a copy of this software and associated documentation files (the
16 // "Software"), to deal in the Software without restriction, including
17 // without limitation the rights to use, copy, modify, merge, publish,
18 // distribute, sublicense, and/or sell copies of the Software, and to
19 // permit persons to whom the Software is furnished to do so, subject to
20 // the following conditions:
22 // The above copyright notice and this permission notice shall be
23 // included in all copies or substantial portions of the Software.
25 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
26 // EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
27 // MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
28 // NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
29 // LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
30 // OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
31 // WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
34 using System.Collections;
38 using Commons.Xml.Relaxng.Derivative;
39 using Commons.Xml.Relaxng.Rnc;
41 namespace Commons.Xml.Relaxng
43 public class RelaxngGrammar : RelaxngPattern
46 public static string NamespaceURI =
47 "http://relaxng.org/ns/structure/1.0";
49 // object model fields
50 string defaultNamespace;
51 RelaxngGrammarContentList starts = new RelaxngGrammarContentList ();
52 RelaxngGrammarContentList defs = new RelaxngGrammarContentList ();
53 RelaxngGrammarContentList includes = new RelaxngGrammarContentList ();
54 RelaxngGrammarContentList divs = new RelaxngGrammarContentList ();
56 RelaxngDatatypeProvider provider;
59 RdpPattern startPattern;
61 // compile cache fields.
62 Hashtable assembledDefs = new Hashtable (); // [defName] = RelaxngDefine
63 RelaxngPattern assembledStart;
64 RdpPattern compiledStart;
65 Hashtable elementReplacedDefs = new Hashtable ();
67 Hashtable includedUris = new Hashtable ();
68 RelaxngGrammar parentGrammar;
69 Hashtable refPatterns = new Hashtable (); // key = RdpPattern of assembledDefs
71 // only for checkRecursion()
72 Hashtable checkedDefs = new Hashtable ();
74 // this should be checked after its compilation finished to complete
75 // missing-at-the-tracking patterns (especially of parent grammars).
76 // key = RdpPattern, value = ArrayList of unresolvedPatterns.
77 ArrayList unresolvedPatterns = new ArrayList ();
79 // contents key = RdpElement and value = name of the parent define.
80 private Hashtable ElementDefMap = new Hashtable ();
84 public RelaxngGrammar ()
88 private void ResetCompileState ()
91 assembledDefs.Clear ();
92 assembledStart = null;
94 elementReplacedDefs.Clear ();
95 includedUris.Clear ();
99 unresolvedPatterns.Clear ();
100 ElementDefMap.Clear ();
103 internal RelaxngGrammar ParentGrammar {
104 get { return parentGrammar; }
105 set { parentGrammar = value; }
108 internal RelaxngDatatypeProvider Provider {
109 get { return parentGrammar != null ? parentGrammar.Provider : provider; }
110 set { provider = value; }
113 public override RelaxngPatternType PatternType {
114 get { return RelaxngPatternType.Grammar; }
117 public string DefaultNamespace {
118 get { return defaultNamespace; }
119 set { defaultNamespace = value; }
122 public RelaxngGrammarContentList Starts {
123 get { return starts; }
126 public RelaxngGrammarContentList Defines {
130 public RelaxngGrammarContentList Includes {
131 get { return includes; }
134 public RelaxngGrammarContentList Divs {
138 public override void Write (XmlWriter writer)
140 writer.WriteStartElement ("", "grammar", RelaxngGrammar.NamespaceURI);
141 if (defaultNamespace != null)
142 writer.WriteAttributeString ("ns", defaultNamespace);
143 foreach (RelaxngStart start in Starts)
144 start.Write (writer);
145 foreach (RelaxngDefine define in Defines)
146 define.Write (writer);
147 foreach (RelaxngInclude include in Includes)
148 include.Write (writer);
149 foreach (RelaxngDiv div in Divs)
151 writer.WriteEndElement ();
154 internal override void WriteRnc (RncWriter writer)
156 writer.WriteGrammar (this);
159 internal Hashtable IncludedUris {
160 get { return includedUris; }
164 internal override void CheckConstraints ()
169 internal void CheckIncludeRecursion (string href)
171 if (this.includedUris [href] != null)
172 // FIXME: fill line info
173 throw new RelaxngException ("Include recursion found. href: " + href);
174 if (parentGrammar != null)
175 parentGrammar.CheckIncludeRecursion (href);
178 // Compile from this simplified syntax to derivatives.
179 internal override RdpPattern Compile (RelaxngGrammar grammar)
181 ResetCompileState ();
183 parentGrammar = grammar;
185 // First, process includes and divs. RELAX NG 4.1 - 4.15.
186 ArrayList compiledDivs = new ArrayList ();
187 foreach (RelaxngInclude inc in includes)
188 compiledDivs.Add (inc.Compile (this));
189 compiledDivs.AddRange (divs);
190 foreach (RelaxngDiv div in compiledDivs)
193 // Check constraints. RELAX NG 4.16
194 foreach (RelaxngStart start in starts)
195 start.Pattern.CheckConstraints ();
196 foreach (RelaxngDefine define in defs)
197 foreach (RelaxngPattern p in define.Patterns)
198 p.CheckConstraints ();
200 // Assemble combine into the same name defines/start.
201 // see RELAX NG 4.17.
204 // 4.18 : <grammar> must have at least one <start>.
205 if (assembledStart == null)
206 throw new RelaxngException ("A grammar elements must contain at least one start element.");
207 compiledStart = assembledStart.Compile (this);
209 // Assemble all define components into top grammar and
210 // return start patterns for descendant grammars.
211 // see RELAX NG 4.18.
213 if (parentGrammar != null)
214 return compiledStart;
215 assembledStart = null; // no use anymore
217 // 4.19 (a) remove non-reachable defines
219 compiledStart.MarkReachableDefs ();
220 ArrayList tmp = new ArrayList ();
221 foreach (DictionaryEntry entry in this.assembledDefs)
222 if (!reachableDefines.ContainsKey (entry.Key))
224 foreach (string key in tmp)
225 assembledDefs.Remove (key);
227 // 4.19 (b) check illegal recursion
228 CheckRecursion (compiledStart, 0);
229 // here we collected element-replaced definitions
230 foreach (DictionaryEntry entry in elementReplacedDefs)
231 assembledDefs.Add (entry.Key, entry.Value);
232 startPattern = compiledStart;
233 // 4.20,21 reduce notAllowed and empty.
237 startPattern = startPattern.ReduceEmptyAndNotAllowed (ref b, new Hashtable ());
240 Hashtable ht = new Hashtable ();
241 startPattern.setInternTable (ht);
243 // Check Constraints: RELAX NG spec 7
245 startPattern.CheckConstraints (false, false, false, false, false, false);
247 CheckStartPatternContent (startPattern);
249 // 4.19 (c) expandRef - actual replacement
250 startPattern = compiledStart.ExpandRef (assembledDefs);
253 RdpContentType ct = startPattern.ContentType;
255 // return its start pattern.
260 private void CheckStartPatternContent (RdpPattern p)
262 switch (p.PatternType) {
263 case RelaxngPatternType.Ref:
264 CheckStartPatternContent (((RdpUnresolvedRef) p).RefPattern);
266 case RelaxngPatternType.Element:
268 case RelaxngPatternType.Choice:
269 RdpChoice c = p as RdpChoice;
270 CheckStartPatternContent (c.LValue);
271 CheckStartPatternContent (c.RValue);
273 case RelaxngPatternType.NotAllowed:
276 // FIXME: fill line info
277 throw new RelaxngException ("Start pattern contains an invalid content pattern.");
281 Hashtable reachableDefines = new Hashtable ();
284 internal void MarkReacheableDefine (string name)
286 if (reachableDefines.ContainsKey (name))
288 RdpPattern p = assembledDefs [name] as RdpPattern;
289 reachableDefines.Add (name, p);
290 p.MarkReachableDefs ();
294 private void CheckRecursion (RdpPattern p, int depth)
297 RdpAbstractBinary binary = p as RdpAbstractBinary;
298 if (binary != null) {
299 // choice, interleave, group
300 CheckRecursion (binary.LValue, depth);
301 CheckRecursion (binary.RValue, depth);
304 RdpAbstractSingleContent single = p as RdpAbstractSingleContent;
305 if (single != null) {
306 CheckRecursion (single.Child, depth);
310 switch (p.PatternType) {
311 case RelaxngPatternType.Ref:
312 // get checkRecursionDepth from table.
313 int checkRecursionDepth = -1;
314 object checkedDepth = checkedDefs [p];
315 if (checkedDepth != null)
316 checkRecursionDepth = (int) checkedDepth;
318 RdpUnresolvedRef pref = p as RdpUnresolvedRef;
319 RelaxngGrammar target = pref.TargetGrammar;
320 RdpPattern refPattern = pref.RefPattern;
321 if (refPattern == null)
322 // FIXME: fill line info
323 throw new RelaxngException ("No matching define found for " + pref.Name);
325 if (checkRecursionDepth == -1) {
326 checkedDefs [p] = depth;
327 /*test*/ if (refPattern.PatternType != RelaxngPatternType.Element)
328 CheckRecursion (refPattern, depth);
329 checkedDefs [p] = -2;
331 else if (depth == checkRecursionDepth)
332 // FIXME: fill line info
333 throw new RelaxngException (String.Format ("Detected illegal recursion. Ref name is {0}.", pref.Name));
337 case RelaxngPatternType.Attribute:
338 CheckRecursion (((RdpAttribute) p).Children, depth);
341 case RelaxngPatternType.DataExcept:
342 CheckRecursion (((RdpDataExcept) p).Except, depth);
345 case RelaxngPatternType.Element:
346 RdpElement el = p as RdpElement;
347 CheckRecursion (el.Children, depth + 1); // +1
349 case RelaxngPatternType.List:
350 CheckRecursion (((RdpList) p).Child, depth);
356 private void CollectGrammars ()
358 // collect ref and parentRef for each define.
360 // FIXME: This should be assembledStart.
361 CheckReferences (compiledStart);
364 foreach (string name in assembledDefs.Keys) {
365 RdpPattern p = (RdpPattern) assembledDefs [name];
370 // If it is child of any other pattern:
371 // * Remove all definitions under descendant grammars,
372 // replacing ref names, and
373 // * Then return its start pattern.
374 if (parentGrammar != null) {
375 // TODO: reachable check is incomplete.
376 foreach (string name in assembledDefs.Keys) {
378 refPatterns [assembledDefs [name] ] as ArrayList;
380 continue; // Not referenced.
382 // At this point, parent grammar doesn't
383 // collect assembledDefs as yet
384 string uname = GetUniqueName (name, parentGrammar);
385 parentGrammar.assembledDefs [uname] = assembledDefs [name];
390 private static string GetUniqueName (string name, RelaxngGrammar grammar)
392 foreach (RelaxngDefine def in grammar.Defines)
393 if (def.Name == name)
394 return GetUniqueName (name + '_', grammar);
398 private void FixupReference ()
400 foreach (RdpUnresolvedRef pref in this.unresolvedPatterns) {
401 RdpPattern defP = assembledDefs [pref.Name] as RdpPattern;
403 // FIXME: fill line info
404 throw new RelaxngException (String.Format ("Target definition was not found: {0}", pref.Name));
405 ArrayList al = refPatterns [defP] as ArrayList;
407 al = new ArrayList ();
408 refPatterns [defP] = al;
412 this.unresolvedPatterns.Clear ();
415 private void replaceDefines (string name, ArrayList al)
419 string newName = "define" + idx;
420 if (parentGrammar.assembledDefs [newName] == null) {
421 parentGrammar.assembledDefs [newName] =
422 assembledDefs [name];
423 foreach (RdpUnresolvedRef pref in al)
431 // remove ref and parentRef.
432 // add new defines for each elements.
433 private void CheckReferences (RdpPattern p)
435 RdpAbstractBinary binary = p as RdpAbstractBinary;
436 if (binary != null) {
437 // choice, interleave, group
438 CheckReferences (binary.LValue);
439 CheckReferences (binary.RValue);
442 RdpAbstractSingleContent single = p as RdpAbstractSingleContent;
443 if (single != null) {
444 CheckReferences (single.Child);
448 switch (p.PatternType) {
449 case RelaxngPatternType.Ref:
450 // FIXME: This should not re-expand ref
451 RdpUnresolvedRef pref = p as RdpUnresolvedRef;
452 if (pref.RefPattern != null)
455 RelaxngGrammar target = pref.TargetGrammar;
457 // FIXME: fill line info
458 throw new RelaxngException ("Referenced definition was not found.");
459 RdpPattern defP = target.assembledDefs [pref.Name] as RdpPattern;
461 target.unresolvedPatterns.Add (p);
463 ArrayList al = target.refPatterns [defP] as ArrayList;
465 al = new ArrayList ();
466 target.refPatterns [defP] = al;
469 pref.RefPattern = defP;
473 case RelaxngPatternType.Attribute:
474 CheckReferences (((RdpAttribute) p).Children);
477 case RelaxngPatternType.DataExcept:
478 CheckReferences (((RdpDataExcept) p).Except);
481 case RelaxngPatternType.Element:
482 RdpElement el = p as RdpElement;
483 CheckReferences (el.Children);
484 string name = ElementDefMap [el] as string;
488 string newName = "element0";
489 if (el.NameClass is RdpName)
490 newName = ((RdpName) el.NameClass).LocalName;
492 if (assembledDefs [newName] == null) {
493 elementReplacedDefs [newName] = el.Children;
496 newName = "element" + ++idx;
498 ElementDefMap [el] = newName;
500 // Even though the element is replaced with ref,
501 // derivative of ref is RdpElement in fact...
504 case RelaxngPatternType.List:
505 CheckReferences (((RdpList) p).Child);
508 case RelaxngPatternType.Empty:
509 case RelaxngPatternType.NotAllowed:
510 case RelaxngPatternType.Text:
511 case RelaxngPatternType.Value:
514 //case RelaxngPatternType.ExternalRef:
515 //case RelaxngPatternType.Include:
516 // Mixed, Optional, ZeroOrMore are already removed.
517 // Choice, Group, Interleave, OneOrMore are already proceeded.
521 #region 4.17 - Combine
522 private void AssembleCombine ()
524 // calculate combines.
525 bool haveHeadStart = false;
526 string combineStart = null;
527 Hashtable haveHeadDefs = new Hashtable ();
528 Hashtable combineDefs = new Hashtable ();
530 // 1.calculate combine for starts.
531 foreach (RelaxngStart start in starts)
532 CheckCombine (ref haveHeadStart,
533 ref combineStart, start.Combine, "start");
534 // 2.calculate combine for defines.
535 foreach (RelaxngDefine def in defs) {
537 haveHeadDefs.ContainsKey (def.Name) ?
538 haveHead = (bool) haveHeadDefs [def.Name]
540 string combine = combineDefs [def.Name] as string;
541 CheckCombine (ref haveHead, ref combine,
542 def.Combine, String.Format ("define name={0}", def.Name));
543 haveHeadDefs [def.Name] = haveHead;
544 combineDefs [def.Name] = combine;
548 // assemble starts and defines with "combine" attribute.
550 // 3.assemble starts.
551 if (starts.Count == 0) {
552 if (ParentGrammar == null)
553 throw new RelaxngException (this, "grammar must have at least one start component.");
555 assembledStart = ((RelaxngStart)starts [0]).Pattern;
556 for (int i=1; i<starts.Count; i++) {
557 RelaxngPattern p2 = ((RelaxngStart) starts [i]).Pattern;;
558 if (combineStart == "interleave") {
559 RelaxngInterleave intlv = new RelaxngInterleave ();
560 intlv.Patterns.Add (assembledStart);
561 intlv.Patterns.Add (p2);
562 assembledStart = intlv;
564 RelaxngChoice c = new RelaxngChoice ();
565 c.Patterns.Add (assembledStart);
572 // 4.assemble defines
573 foreach (RelaxngDefine def in defs) {
574 string combine = combineDefs [def.Name] as string;
576 assembledDefs [def.Name] as RdpPattern;
577 RdpPattern p2 = def.Compile (this);
579 if (combine == "interleave") {
580 assembledDefs [def.Name] =
581 new RdpInterleave (p1, p2);
583 assembledDefs [def.Name] =
584 new RdpChoice (p1, p2);
587 assembledDefs [def.Name] = p2;
593 // check combine attributes.
594 private void CheckCombine (ref bool haveHead, ref string combine, string newCombine, string targetSpec)
596 switch (newCombine) {
598 if (combine == "choice")
599 throw new RelaxngException (this, "\"combine\" was already specified \"choice\"");
601 combine = "interleave";
604 if (combine == "interleave")
605 throw new RelaxngException (this, "\"combine\" was already specified \"interleave\"");
611 throw new RelaxngException (this, String.Format ("There was already \"{0}\" element without \"combine\" attribute.", targetSpec));