2 // Commons.Xml.Relaxng.RelaxngGrammar.cs
\r
5 // Atsushi Enomoto <ginga@kit.hi-ho.ne.jp>
\r
7 // 2003 Atsushi Enomoto "No rights reserved."
\r
9 // Copyright (c) 2004 Novell Inc.
\r
10 // All rights reserved
\r
14 // Permission is hereby granted, free of charge, to any person obtaining
15 // a copy of this software and associated documentation files (the
16 // "Software"), to deal in the Software without restriction, including
17 // without limitation the rights to use, copy, modify, merge, publish,
18 // distribute, sublicense, and/or sell copies of the Software, and to
19 // permit persons to whom the Software is furnished to do so, subject to
20 // the following conditions:
22 // The above copyright notice and this permission notice shall be
23 // included in all copies or substantial portions of the Software.
25 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
26 // EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
27 // MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
28 // NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
29 // LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
30 // OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
31 // WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
34 using System.Collections;
\r
38 using Commons.Xml.Relaxng.Derivative;
\r
39 using Commons.Xml.Relaxng.Rnc;
\r
41 namespace Commons.Xml.Relaxng
\r
43 public class RelaxngGrammar : RelaxngPattern
\r
46 public static string NamespaceURI =
\r
47 "http://relaxng.org/ns/structure/1.0";
\r
49 // object model fields
\r
50 string defaultNamespace;
\r
51 RelaxngGrammarContentList starts = new RelaxngGrammarContentList ();
\r
52 RelaxngGrammarContentList defs = new RelaxngGrammarContentList ();
\r
53 RelaxngGrammarContentList includes = new RelaxngGrammarContentList ();
\r
54 RelaxngGrammarContentList divs = new RelaxngGrammarContentList ();
\r
56 RelaxngDatatypeProvider provider;
\r
59 RdpPattern startPattern;
\r
61 // compile cache fields.
\r
62 Hashtable assembledDefs = new Hashtable (); // [defName] = RelaxngDefine
\r
63 RelaxngPattern assembledStart;
\r
64 RdpPattern compiledStart;
\r
65 Hashtable elementReplacedDefs = new Hashtable ();
\r
67 Hashtable includedUris = new Hashtable ();
\r
68 RelaxngGrammar parentGrammar;
\r
69 Hashtable refPatterns = new Hashtable (); // key = RdpPattern of assembledDefs
\r
71 // only for checkRecursion()
\r
72 Hashtable checkedDefs = new Hashtable ();
\r
74 // this should be checked after its compilation finished to complete
\r
75 // missing-at-the-tracking patterns (especially of parent grammars).
\r
76 // key = RdpPattern, value = ArrayList of unresolvedPatterns.
\r
77 ArrayList unresolvedPatterns = new ArrayList ();
\r
79 // contents key = RdpElement and value = name of the parent define.
\r
80 private Hashtable ElementDefMap = new Hashtable ();
\r
84 public RelaxngGrammar ()
\r
88 private void ResetCompileState ()
\r
90 startPattern = null;
\r
91 assembledDefs.Clear ();
\r
92 assembledStart = null;
\r
93 compiledStart = null;
\r
94 elementReplacedDefs.Clear ();
\r
95 includedUris.Clear ();
\r
96 parentGrammar = null;
\r
97 refPatterns.Clear ();
\r
98 checkedDefs.Clear ();
\r
99 unresolvedPatterns.Clear ();
\r
100 ElementDefMap.Clear ();
\r
103 internal RelaxngGrammar ParentGrammar {
\r
104 get { return parentGrammar; }
\r
105 set { parentGrammar = value; }
\r
108 internal RelaxngDatatypeProvider Provider {
\r
109 get { return parentGrammar != null ? parentGrammar.Provider : provider; }
\r
110 set { provider = value; }
\r
113 public override RelaxngPatternType PatternType {
\r
114 get { return RelaxngPatternType.Grammar; }
\r
117 public string DefaultNamespace {
\r
118 get { return defaultNamespace; }
\r
119 set { defaultNamespace = value; }
\r
122 public RelaxngGrammarContentList Starts {
\r
123 get { return starts; }
\r
126 public RelaxngGrammarContentList Defines {
\r
127 get { return defs; }
\r
130 public RelaxngGrammarContentList Includes {
\r
131 get { return includes; }
\r
134 public RelaxngGrammarContentList Divs {
\r
135 get { return divs; }
\r
138 public override void Write (XmlWriter writer)
\r
140 writer.WriteStartElement ("", "grammar", RelaxngGrammar.NamespaceURI);
\r
141 if (defaultNamespace != null)
\r
142 writer.WriteAttributeString ("ns", defaultNamespace);
\r
143 foreach (RelaxngStart start in Starts)
\r
144 start.Write (writer);
\r
145 foreach (RelaxngDefine define in Defines)
\r
146 define.Write (writer);
\r
147 foreach (RelaxngInclude include in Includes)
\r
148 include.Write (writer);
\r
149 foreach (RelaxngDiv div in Divs)
\r
150 div.Write (writer);
\r
151 writer.WriteEndElement ();
\r
154 internal override void WriteRnc (RncWriter writer)
\r
156 writer.WriteGrammar (this);
\r
159 internal Hashtable IncludedUris {
\r
160 get { return includedUris; }
\r
164 internal override void CheckConstraints ()
\r
166 // do nothing here.
\r
169 internal void CheckIncludeRecursion (string href)
\r
171 if (this.includedUris [href] != null)
\r
172 // FIXME: fill line info
\r
173 throw new RelaxngException ("Include recursion found. href: " + href);
\r
174 if (parentGrammar != null)
\r
175 parentGrammar.CheckIncludeRecursion (href);
\r
178 // Compile from this simplified syntax to derivatives.
\r
179 internal override RdpPattern Compile (RelaxngGrammar grammar)
\r
181 ResetCompileState ();
\r
183 parentGrammar = grammar;
\r
185 // First, process includes and divs. RELAX NG 4.1 - 4.15.
\r
186 ArrayList compiledDivs = new ArrayList ();
\r
187 foreach (RelaxngInclude inc in includes)
\r
188 compiledDivs.Add (inc.Compile (this));
\r
189 compiledDivs.AddRange (divs);
\r
190 foreach (RelaxngDiv div in compiledDivs)
\r
191 div.Compile (this);
\r
193 // Check constraints. RELAX NG 4.16
\r
194 foreach (RelaxngStart start in starts)
\r
195 start.Pattern.CheckConstraints ();
\r
196 foreach (RelaxngDefine define in defs)
\r
197 foreach (RelaxngPattern p in define.Patterns)
\r
198 p.CheckConstraints ();
\r
200 // Assemble combine into the same name defines/start.
\r
201 // see RELAX NG 4.17.
\r
202 AssembleCombine ();
\r
204 // FIXME: It should not return NotAllowed
\r
205 if (assembledStart != null)
\r
206 compiledStart = assembledStart.Compile (this);
\r
208 return RdpNotAllowed.Instance;
\r
210 // Assemble all define components into top grammar and
\r
211 // return start patterns for descendant grammars.
\r
212 // see RELAX NG 4.18.
\r
213 CollectGrammars ();
\r
214 if (parentGrammar != null)
\r
215 return compiledStart;
\r
216 assembledStart = null; // no use anymore
\r
218 // 4.19 (a) remove non-reachable defines
\r
220 compiledStart.MarkReachableDefs ();
\r
221 ArrayList tmp = new ArrayList ();
\r
222 foreach (DictionaryEntry entry in this.assembledDefs)
\r
223 if (!reachableDefines.ContainsKey (entry.Key))
\r
224 tmp.Add (entry.Key);
\r
225 foreach (string key in tmp)
\r
226 assembledDefs.Remove (key);
\r
228 // 4.19 (b) check illegal recursion
\r
229 CheckRecursion (compiledStart, 0);
\r
230 // here we collected element-replaced definitions
\r
231 foreach (DictionaryEntry entry in elementReplacedDefs)
\r
232 assembledDefs.Add (entry.Key, entry.Value);
\r
233 startPattern = compiledStart;
\r
234 // 4.20,21 reduce notAllowed and empty.
\r
238 startPattern = startPattern.ReduceEmptyAndNotAllowed (ref b, new Hashtable ());
\r
241 Hashtable ht = new Hashtable ();
\r
242 startPattern.setInternTable (ht);
\r
243 RdpNotAllowed.Instance.setInternTable (ht);
\r
244 RdpEmpty.Instance.setInternTable (ht);
\r
245 RdpText.Instance.setInternTable (ht);
\r
247 // Check Constraints: RELAX NG spec 7
\r
248 // 7.1.1-4, 7.3, 7.4
\r
249 startPattern.CheckConstraints (false, false, false, false, false, false);
\r
251 CheckStartPatternContent (startPattern);
\r
254 RdpContentType ct = startPattern.ContentType;
\r
256 // 4.19 (c) expandRef - actual replacement
\r
257 startPattern = compiledStart.ExpandRef (assembledDefs);
\r
259 // return its start pattern.
\r
261 return startPattern;
\r
264 private void CheckStartPatternContent (RdpPattern p)
\r
266 switch (p.PatternType) {
\r
267 case RelaxngPatternType.Ref:
\r
268 CheckStartPatternContent (((RdpUnresolvedRef) p).RefPattern);
\r
270 case RelaxngPatternType.Element:
\r
272 case RelaxngPatternType.Choice:
\r
273 RdpChoice c = p as RdpChoice;
\r
274 CheckStartPatternContent (c.LValue);
\r
275 CheckStartPatternContent (c.RValue);
\r
277 case RelaxngPatternType.NotAllowed:
\r
280 // FIXME: fill line info
\r
281 throw new RelaxngException ("Start pattern contains an invalid content pattern.");
\r
285 Hashtable reachableDefines = new Hashtable ();
\r
288 internal void MarkReacheableDefine (string name)
\r
290 if (reachableDefines.ContainsKey (name))
\r
292 RdpPattern p = assembledDefs [name] as RdpPattern;
\r
293 reachableDefines.Add (name, p);
\r
294 p.MarkReachableDefs ();
\r
298 private void CheckRecursion (RdpPattern p, int depth)
\r
301 RdpAbstractBinary binary = p as RdpAbstractBinary;
\r
302 if (binary != null) {
\r
303 // choice, interleave, group
\r
304 CheckRecursion (binary.LValue, depth);
\r
305 CheckRecursion (binary.RValue, depth);
\r
308 RdpAbstractSingleContent single = p as RdpAbstractSingleContent;
\r
309 if (single != null) {
\r
310 CheckRecursion (single.Child, depth);
\r
314 switch (p.PatternType) {
\r
315 case RelaxngPatternType.Ref:
\r
316 // get checkRecursionDepth from table.
\r
317 int checkRecursionDepth = -1;
\r
318 object checkedDepth = checkedDefs [p];
\r
319 if (checkedDepth != null)
\r
320 checkRecursionDepth = (int) checkedDepth;
\r
322 RdpUnresolvedRef pref = p as RdpUnresolvedRef;
\r
323 RelaxngGrammar target = pref.TargetGrammar;
\r
324 RdpPattern refPattern = pref.RefPattern;
\r
325 if (refPattern == null)
\r
326 // FIXME: fill line info
\r
327 throw new RelaxngException ("No matching define found for " + pref.Name);
\r
329 if (checkRecursionDepth == -1) {
\r
330 checkedDefs [p] = depth;
\r
331 /*test*/ if (refPattern.PatternType != RelaxngPatternType.Element)
\r
332 CheckRecursion (refPattern, depth);
\r
333 checkedDefs [p] = -2;
\r
335 else if (depth == checkRecursionDepth)
\r
336 // FIXME: fill line info
\r
337 throw new RelaxngException (String.Format ("Detected illegal recursion. Ref name is {0}.", pref.Name));
\r
341 case RelaxngPatternType.Attribute:
\r
342 CheckRecursion (((RdpAttribute) p).Children, depth);
\r
345 case RelaxngPatternType.DataExcept:
\r
346 CheckRecursion (((RdpDataExcept) p).Except, depth);
\r
349 case RelaxngPatternType.Element:
\r
350 RdpElement el = p as RdpElement;
\r
351 CheckRecursion (el.Children, depth + 1); // +1
\r
353 case RelaxngPatternType.List:
\r
354 CheckRecursion (((RdpList) p).Child, depth);
\r
360 private void CollectGrammars ()
\r
362 // collect ref and parentRef for each define.
\r
364 // FIXME: This should be assembledStart.
\r
365 CheckReferences (compiledStart);
\r
368 foreach (string name in assembledDefs.Keys) {
\r
369 RdpPattern p = (RdpPattern) assembledDefs [name];
\r
370 CheckReferences (p);
\r
374 // If it is child of any other pattern:
\r
375 // * Remove all definitions under descendant grammars,
\r
376 // replacing ref names, and
\r
377 // * Then return its start pattern.
\r
378 if (parentGrammar != null) {
\r
379 // TODO: reachable check is incomplete.
\r
380 foreach (string name in assembledDefs.Keys) {
\r
382 refPatterns [assembledDefs [name] ] as ArrayList;
\r
384 continue; // Not referenced.
\r
386 // At this point, parent grammar doesn't
\r
387 // collect assembledDefs as yet
\r
388 string uname = GetUniqueName (name, parentGrammar);
\r
389 parentGrammar.assembledDefs [uname] = assembledDefs [name];
\r
394 private static string GetUniqueName (string name, RelaxngGrammar grammar)
\r
396 foreach (RelaxngDefine def in grammar.Defines)
\r
397 if (def.Name == name)
\r
398 return GetUniqueName (name + '_', grammar);
\r
402 private void FixupReference ()
\r
404 foreach (RdpUnresolvedRef pref in this.unresolvedPatterns) {
\r
405 RdpPattern defP = assembledDefs [pref.Name] as RdpPattern;
\r
407 // FIXME: fill line info
\r
408 throw new RelaxngException (String.Format ("Target definition was not found: {0}", pref.Name));
\r
409 ArrayList al = refPatterns [defP] as ArrayList;
\r
411 al = new ArrayList ();
\r
412 refPatterns [defP] = al;
\r
416 this.unresolvedPatterns.Clear ();
\r
419 private void replaceDefines (string name, ArrayList al)
\r
423 string newName = "define" + idx;
\r
424 if (parentGrammar.assembledDefs [newName] == null) {
\r
425 parentGrammar.assembledDefs [newName] =
\r
426 assembledDefs [name];
\r
427 foreach (RdpUnresolvedRef pref in al)
\r
428 pref.Name = newName;
\r
435 // remove ref and parentRef.
\r
436 // add new defines for each elements.
\r
437 private void CheckReferences (RdpPattern p)
\r
439 RdpAbstractBinary binary = p as RdpAbstractBinary;
\r
440 if (binary != null) {
\r
441 // choice, interleave, group
\r
442 CheckReferences (binary.LValue);
\r
443 CheckReferences (binary.RValue);
\r
446 RdpAbstractSingleContent single = p as RdpAbstractSingleContent;
\r
447 if (single != null) {
\r
448 CheckReferences (single.Child);
\r
452 switch (p.PatternType) {
\r
453 case RelaxngPatternType.Ref:
\r
454 // FIXME: This should not re-expand ref
\r
455 RdpUnresolvedRef pref = p as RdpUnresolvedRef;
\r
456 if (pref.RefPattern != null)
\r
459 RelaxngGrammar target = pref.TargetGrammar;
\r
460 if (target == null)
\r
461 // FIXME: fill line info
\r
462 throw new RelaxngException ("Referenced definition was not found.");
\r
463 RdpPattern defP = target.assembledDefs [pref.Name] as RdpPattern;
\r
465 target.unresolvedPatterns.Add (p);
\r
467 ArrayList al = target.refPatterns [defP] as ArrayList;
\r
469 al = new ArrayList ();
\r
470 target.refPatterns [defP] = al;
\r
473 pref.RefPattern = defP;
\r
477 case RelaxngPatternType.Attribute:
\r
478 CheckReferences (((RdpAttribute) p).Children);
\r
481 case RelaxngPatternType.DataExcept:
\r
482 CheckReferences (((RdpDataExcept) p).Except);
\r
485 case RelaxngPatternType.Element:
\r
486 RdpElement el = p as RdpElement;
\r
487 CheckReferences (el.Children);
\r
488 string name = ElementDefMap [el] as string;
\r
489 if (name == null) {
\r
492 string newName = "element0";
\r
493 if (el.NameClass is RdpName)
\r
494 newName = ((RdpName) el.NameClass).LocalName;
\r
496 if (assembledDefs [newName] == null) {
\r
497 elementReplacedDefs [newName] = el.Children;
\r
500 newName = "element" + ++idx;
\r
502 ElementDefMap [el] = newName;
\r
504 // Even though the element is replaced with ref,
\r
505 // derivative of ref is RdpElement in fact...
\r
508 case RelaxngPatternType.List:
\r
509 CheckReferences (((RdpList) p).Child);
\r
512 case RelaxngPatternType.Empty:
\r
513 case RelaxngPatternType.NotAllowed:
\r
514 case RelaxngPatternType.Text:
\r
515 case RelaxngPatternType.Value:
\r
518 //case RelaxngPatternType.ExternalRef:
\r
519 //case RelaxngPatternType.Include:
\r
520 // Mixed, Optional, ZeroOrMore are already removed.
\r
521 // Choice, Group, Interleave, OneOrMore are already proceeded.
\r
525 #region 4.17 - Combine
\r
526 private void AssembleCombine ()
\r
528 // calculate combines.
\r
529 bool haveHeadStart = false;
\r
530 string combineStart = null;
\r
531 Hashtable haveHeadDefs = new Hashtable ();
\r
532 Hashtable combineDefs = new Hashtable ();
\r
534 // 1.calculate combine for starts.
\r
535 foreach (RelaxngStart start in starts)
\r
536 CheckCombine (ref haveHeadStart,
\r
537 ref combineStart, start.Combine, "start");
\r
538 // 2.calculate combine for defines.
\r
539 foreach (RelaxngDefine def in defs) {
\r
541 haveHeadDefs.ContainsKey (def.Name) ?
\r
542 haveHead = (bool) haveHeadDefs [def.Name]
\r
544 string combine = combineDefs [def.Name] as string;
\r
545 CheckCombine (ref haveHead, ref combine,
\r
546 def.Combine, String.Format ("define name={0}", def.Name));
\r
547 haveHeadDefs [def.Name] = haveHead;
\r
548 combineDefs [def.Name] = combine;
\r
552 // assemble starts and defines with "combine" attribute.
\r
554 // 3.assemble starts.
\r
555 if (starts.Count == 0) {
\r
556 if (ParentGrammar == null)
\r
557 throw new RelaxngException (this, "grammar must have at least one start component.");
\r
559 assembledStart = ((RelaxngStart)starts [0]).Pattern;
\r
560 for (int i=1; i<starts.Count; i++) {
\r
561 RelaxngPattern p2 = ((RelaxngStart) starts [i]).Pattern;;
\r
562 if (combineStart == "interleave") {
\r
563 RelaxngInterleave intlv = new RelaxngInterleave ();
\r
564 intlv.Patterns.Add (assembledStart);
\r
565 intlv.Patterns.Add (p2);
\r
566 assembledStart = intlv;
\r
568 RelaxngChoice c = new RelaxngChoice ();
\r
569 c.Patterns.Add (assembledStart);
\r
570 c.Patterns.Add (p2);
\r
571 assembledStart = c;
\r
576 // 4.assemble defines
\r
577 foreach (RelaxngDefine def in defs) {
\r
578 string combine = combineDefs [def.Name] as string;
\r
580 assembledDefs [def.Name] as RdpPattern;
\r
581 RdpPattern p2 = def.Compile (this);
\r
583 if (combine == "interleave") {
\r
584 assembledDefs [def.Name] =
\r
585 new RdpInterleave (p1, p2);
\r
587 assembledDefs [def.Name] =
\r
588 new RdpChoice (p1, p2);
\r
591 assembledDefs [def.Name] = p2;
\r
597 // check combine attributes.
\r
598 private void CheckCombine (ref bool haveHead, ref string combine, string newCombine, string targetSpec)
\r
600 switch (newCombine) {
\r
602 if (combine == "choice")
\r
603 throw new RelaxngException (this, "\"combine\" was already specified \"choice\"");
\r
605 combine = "interleave";
\r
608 if (combine == "interleave")
\r
609 throw new RelaxngException (this, "\"combine\" was already specified \"interleave\"");
\r
611 combine = "choice";
\r
615 throw new RelaxngException (this, String.Format ("There was already \"{0}\" element without \"combine\" attribute.", targetSpec));
\r