2 // Commons.Xml.Relaxng.RelaxngGrammar.cs
5 // Atsushi Enomoto <ginga@kit.hi-ho.ne.jp>
7 // 2003 Atsushi Enomoto "No rights reserved."
9 // Copyright (c) 2004 Novell Inc.
10 // All rights reserved
14 // Permission is hereby granted, free of charge, to any person obtaining
15 // a copy of this software and associated documentation files (the
16 // "Software"), to deal in the Software without restriction, including
17 // without limitation the rights to use, copy, modify, merge, publish,
18 // distribute, sublicense, and/or sell copies of the Software, and to
19 // permit persons to whom the Software is furnished to do so, subject to
20 // the following conditions:
22 // The above copyright notice and this permission notice shall be
23 // included in all copies or substantial portions of the Software.
25 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
26 // EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
27 // MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
28 // NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
29 // LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
30 // OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
31 // WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
34 using System.Collections;
38 using Commons.Xml.Relaxng.Derivative;
39 using Commons.Xml.Relaxng.Rnc;
41 namespace Commons.Xml.Relaxng
43 public class RelaxngGrammar : RelaxngPattern
46 public static string NamespaceURI =
47 "http://relaxng.org/ns/structure/1.0";
49 // Parser condition: it is used to resolve "included" source
52 // object model fields
53 string defaultNamespace;
54 RelaxngGrammarContentList starts = new RelaxngGrammarContentList ();
55 RelaxngGrammarContentList defs = new RelaxngGrammarContentList ();
56 RelaxngGrammarContentList includes = new RelaxngGrammarContentList ();
57 RelaxngGrammarContentList divs = new RelaxngGrammarContentList ();
60 RdpPattern startPattern;
62 // compile cache fields.
63 Hashtable assembledDefs = new Hashtable (); // [defName] = RelaxngDefine
64 RelaxngPattern assembledStart;
65 RdpPattern compiledStart;
66 Hashtable elementReplacedDefs = new Hashtable ();
68 Hashtable includedUris = new Hashtable ();
69 RelaxngGrammar parentGrammar;
70 Hashtable refPatterns = new Hashtable (); // key = RdpPattern of assembledDefs
72 // only for checkRecursion()
73 Hashtable checkedDefs = new Hashtable ();
75 // this should be checked after its compilation finished to complete
76 // missing-at-the-tracking patterns (especially of parent grammars).
77 // key = RdpPattern, value = ArrayList of unresolvedPatterns.
78 ArrayList unresolvedPatterns = new ArrayList ();
80 // contents key = RdpElement and value = name of the parent define.
81 private Hashtable ElementDefMap = new Hashtable ();
85 public RelaxngGrammar ()
89 private void ResetCompileState ()
92 assembledDefs.Clear ();
93 assembledStart = null;
95 elementReplacedDefs.Clear ();
96 includedUris.Clear ();
100 unresolvedPatterns.Clear ();
101 ElementDefMap.Clear ();
104 internal bool IsSourceCompactSyntax {
105 get { return isSourceRnc; }
106 set { isSourceRnc = value; }
109 internal RelaxngGrammar ParentGrammar {
110 get { return parentGrammar; }
111 set { parentGrammar = value; }
114 internal RelaxngDatatypeProvider Provider {
115 get { return (base.DataProvider == null) ? (parentGrammar != null ? parentGrammar.Provider : base.DataProvider) : base.DataProvider; }
116 set { base.DataProvider = value; }
119 public override RelaxngPatternType PatternType {
120 get { return RelaxngPatternType.Grammar; }
123 public string DefaultNamespace {
124 get { return defaultNamespace; }
125 set { defaultNamespace = value; }
128 public RelaxngGrammarContentList Starts {
129 get { return starts; }
132 public RelaxngGrammarContentList Defines {
136 public RelaxngGrammarContentList Includes {
137 get { return includes; }
140 public RelaxngGrammarContentList Divs {
144 public override void Write (XmlWriter writer)
146 writer.WriteStartElement ("", "grammar", RelaxngGrammar.NamespaceURI);
147 if (defaultNamespace != null)
148 writer.WriteAttributeString ("ns", defaultNamespace);
149 foreach (RelaxngStart start in Starts)
150 start.Write (writer);
151 foreach (RelaxngDefine define in Defines)
152 define.Write (writer);
153 foreach (RelaxngInclude include in Includes)
154 include.Write (writer);
155 foreach (RelaxngDiv div in Divs)
157 writer.WriteEndElement ();
160 internal override void WriteRnc (RncWriter writer)
162 writer.WriteGrammar (this);
165 internal Hashtable IncludedUris {
166 get { return includedUris; }
170 internal override void CheckConstraints ()
175 internal void CheckIncludeRecursion (string href)
177 if (this.includedUris [href] != null)
178 // FIXME: fill line info
179 throw new RelaxngException ("Include recursion found. href: " + href);
180 if (parentGrammar != null)
181 parentGrammar.CheckIncludeRecursion (href);
184 // Compile from this simplified syntax to derivatives.
185 internal override RdpPattern Compile (RelaxngGrammar grammar)
187 ResetCompileState ();
189 parentGrammar = grammar;
191 // First, process includes and divs. RELAX NG 4.1 - 4.15.
192 ArrayList compiledDivs = new ArrayList ();
193 foreach (RelaxngInclude inc in includes)
194 compiledDivs.Add (inc.Compile (this));
195 compiledDivs.AddRange (divs);
196 foreach (RelaxngDiv div in compiledDivs)
199 // Check constraints. RELAX NG 4.16
200 foreach (RelaxngStart start in starts)
201 start.Pattern.CheckConstraints ();
202 foreach (RelaxngDefine define in defs)
203 foreach (RelaxngPattern p in define.Patterns)
204 p.CheckConstraints ();
206 // Assemble combine into the same name defines/start.
207 // see RELAX NG 4.17.
210 // 4.18 : <grammar> must have at least one <start>.
211 if (assembledStart == null)
212 throw new RelaxngException ("A grammar elements must contain at least one start element.");
213 compiledStart = assembledStart.Compile (this);
215 // Assemble all define components into top grammar and
216 // return start patterns for descendant grammars.
217 // see RELAX NG 4.18.
219 if (parentGrammar != null)
220 return compiledStart;
221 assembledStart = null; // no use anymore
223 // 4.19 (a) remove non-reachable defines
225 compiledStart.MarkReachableDefs ();
226 ArrayList tmp = new ArrayList ();
227 foreach (DictionaryEntry entry in this.assembledDefs)
228 if (!reachableDefines.ContainsKey (entry.Key))
230 foreach (string key in tmp)
231 assembledDefs.Remove (key);
233 // 4.19 (b) check illegal recursion
234 CheckRecursion (compiledStart, 0);
235 // here we collected element-replaced definitions
236 foreach (DictionaryEntry entry in elementReplacedDefs)
237 assembledDefs.Add (entry.Key, entry.Value);
238 startPattern = compiledStart;
239 // 4.20,21 reduce notAllowed and empty.
243 startPattern = startPattern.ReduceEmptyAndNotAllowed (ref b, new Hashtable ());
246 Hashtable ht = new Hashtable ();
247 startPattern.setInternTable (ht);
249 // Check Constraints: RELAX NG spec 7
251 startPattern.CheckConstraints (false, false, false, false, false, false);
253 CheckStartPatternContent (startPattern);
255 // 4.19 (c) expandRef - actual replacement
256 startPattern = compiledStart.ExpandRef (assembledDefs);
259 RdpContentType ct = startPattern.ContentType;
261 // return its start pattern.
266 private void CheckStartPatternContent (RdpPattern p)
268 switch (p.PatternType) {
269 case RelaxngPatternType.Ref:
270 CheckStartPatternContent (((RdpUnresolvedRef) p).RefPattern);
272 case RelaxngPatternType.Element:
274 case RelaxngPatternType.Choice:
275 RdpChoice c = p as RdpChoice;
276 CheckStartPatternContent (c.LValue);
277 CheckStartPatternContent (c.RValue);
279 case RelaxngPatternType.NotAllowed:
282 // FIXME: fill line info
283 throw new RelaxngException ("Start pattern contains an invalid content pattern.");
287 Hashtable reachableDefines = new Hashtable ();
290 internal void MarkReacheableDefine (string name)
292 if (reachableDefines.ContainsKey (name))
294 RdpPattern p = assembledDefs [name] as RdpPattern;
295 reachableDefines.Add (name, p);
296 p.MarkReachableDefs ();
300 private void CheckRecursion (RdpPattern p, int depth)
303 RdpAbstractBinary binary = p as RdpAbstractBinary;
304 if (binary != null) {
305 // choice, interleave, group
306 CheckRecursion (binary.LValue, depth);
307 CheckRecursion (binary.RValue, depth);
310 RdpAbstractSingleContent single = p as RdpAbstractSingleContent;
311 if (single != null) {
312 CheckRecursion (single.Child, depth);
316 switch (p.PatternType) {
317 case RelaxngPatternType.Ref:
318 // get checkRecursionDepth from table.
319 int checkRecursionDepth = -1;
320 object checkedDepth = checkedDefs [p];
321 if (checkedDepth != null)
322 checkRecursionDepth = (int) checkedDepth;
324 RdpUnresolvedRef pref = p as RdpUnresolvedRef;
325 RelaxngGrammar target = pref.TargetGrammar;
326 RdpPattern refPattern = pref.RefPattern;
327 if (refPattern == null)
328 // FIXME: fill line info
329 throw new RelaxngException ("No matching define found for " + pref.Name);
331 if (checkRecursionDepth == -1) {
332 checkedDefs [p] = depth;
333 /*test*/ if (refPattern.PatternType != RelaxngPatternType.Element)
334 CheckRecursion (refPattern, depth);
335 checkedDefs [p] = -2;
337 else if (depth == checkRecursionDepth)
338 // FIXME: fill line info
339 throw new RelaxngException (String.Format ("Detected illegal recursion. Ref name is {0}.", pref.Name));
343 case RelaxngPatternType.Attribute:
344 CheckRecursion (((RdpAttribute) p).Children, depth);
347 case RelaxngPatternType.DataExcept:
348 CheckRecursion (((RdpDataExcept) p).Except, depth);
351 case RelaxngPatternType.Element:
352 RdpElement el = p as RdpElement;
353 CheckRecursion (el.Children, depth + 1); // +1
355 case RelaxngPatternType.List:
356 CheckRecursion (((RdpList) p).Child, depth);
362 private void CollectGrammars ()
364 // collect ref and parentRef for each define.
366 // FIXME: This should be assembledStart.
367 CheckReferences (compiledStart);
370 foreach (string name in assembledDefs.Keys) {
371 RdpPattern p = (RdpPattern) assembledDefs [name];
376 // If it is child of any other pattern:
377 // * Remove all definitions under descendant grammars,
378 // replacing ref names, and
379 // * Then return its start pattern.
380 if (parentGrammar != null) {
381 // TODO: reachable check is incomplete.
382 foreach (string name in assembledDefs.Keys) {
384 refPatterns [assembledDefs [name] ] as ArrayList;
386 continue; // Not referenced.
388 // At this point, parent grammar doesn't
389 // collect assembledDefs as yet
390 string uname = GetUniqueName (name, parentGrammar);
391 parentGrammar.assembledDefs [uname] = assembledDefs [name];
396 private static string GetUniqueName (string name, RelaxngGrammar grammar)
398 foreach (RelaxngDefine def in grammar.Defines)
399 if (def.Name == name)
400 return GetUniqueName (name + '_', grammar);
404 private void FixupReference ()
406 foreach (RdpUnresolvedRef pref in this.unresolvedPatterns) {
407 RdpPattern defP = assembledDefs [pref.Name] as RdpPattern;
409 // FIXME: fill line info
410 throw new RelaxngException (String.Format ("Target definition was not found: {0}", pref.Name));
411 ArrayList al = refPatterns [defP] as ArrayList;
413 al = new ArrayList ();
414 refPatterns [defP] = al;
418 this.unresolvedPatterns.Clear ();
421 private void replaceDefines (string name, ArrayList al)
425 string newName = "define" + idx;
426 if (parentGrammar.assembledDefs [newName] == null) {
427 parentGrammar.assembledDefs [newName] =
428 assembledDefs [name];
429 foreach (RdpUnresolvedRef pref in al)
437 // remove ref and parentRef.
438 // add new defines for each elements.
439 private void CheckReferences (RdpPattern p)
441 RdpAbstractBinary binary = p as RdpAbstractBinary;
442 if (binary != null) {
443 // choice, interleave, group
444 CheckReferences (binary.LValue);
445 CheckReferences (binary.RValue);
448 RdpAbstractSingleContent single = p as RdpAbstractSingleContent;
449 if (single != null) {
450 CheckReferences (single.Child);
454 switch (p.PatternType) {
455 case RelaxngPatternType.Ref:
456 // FIXME: This should not re-expand ref
457 RdpUnresolvedRef pref = p as RdpUnresolvedRef;
458 if (pref.RefPattern != null)
461 RelaxngGrammar target = pref.TargetGrammar;
463 // FIXME: fill line info
464 throw new RelaxngException ("Referenced definition was not found.");
465 RdpPattern defP = target.assembledDefs [pref.Name] as RdpPattern;
467 target.unresolvedPatterns.Add (p);
469 ArrayList al = target.refPatterns [defP] as ArrayList;
471 al = new ArrayList ();
472 target.refPatterns [defP] = al;
475 pref.RefPattern = defP;
479 case RelaxngPatternType.Attribute:
480 CheckReferences (((RdpAttribute) p).Children);
483 case RelaxngPatternType.DataExcept:
484 CheckReferences (((RdpDataExcept) p).Except);
487 case RelaxngPatternType.Element:
488 RdpElement el = p as RdpElement;
489 CheckReferences (el.Children);
490 string name = ElementDefMap [el] as string;
494 string newName = "element0";
495 if (el.NameClass is RdpName)
496 newName = ((RdpName) el.NameClass).LocalName;
498 if (assembledDefs [newName] == null) {
499 elementReplacedDefs [newName] = el.Children;
502 newName = "element" + ++idx;
504 ElementDefMap [el] = newName;
506 // Even though the element is replaced with ref,
507 // derivative of ref is RdpElement in fact...
510 case RelaxngPatternType.List:
511 CheckReferences (((RdpList) p).Child);
514 case RelaxngPatternType.Empty:
515 case RelaxngPatternType.NotAllowed:
516 case RelaxngPatternType.Text:
517 case RelaxngPatternType.Value:
520 //case RelaxngPatternType.ExternalRef:
521 //case RelaxngPatternType.Include:
522 // Mixed, Optional, ZeroOrMore are already removed.
523 // Choice, Group, Interleave, OneOrMore are already proceeded.
527 #region 4.17 - Combine
528 private void AssembleCombine ()
530 // calculate combines.
531 bool haveHeadStart = false;
532 string combineStart = null;
533 Hashtable haveHeadDefs = new Hashtable ();
534 Hashtable combineDefs = new Hashtable ();
536 // 1.calculate combine for starts.
537 foreach (RelaxngStart start in starts)
538 CheckCombine (ref haveHeadStart,
539 ref combineStart, start.Combine, "start");
540 // 2.calculate combine for defines.
541 foreach (RelaxngDefine def in defs) {
543 haveHeadDefs.ContainsKey (def.Name) ?
544 haveHead = (bool) haveHeadDefs [def.Name]
546 string combine = combineDefs [def.Name] as string;
547 CheckCombine (ref haveHead, ref combine,
548 def.Combine, String.Format ("define name={0}", def.Name));
549 haveHeadDefs [def.Name] = haveHead;
550 combineDefs [def.Name] = combine;
554 // assemble starts and defines with "combine" attribute.
556 // 3.assemble starts.
557 if (starts.Count == 0) {
558 if (ParentGrammar == null)
559 throw new RelaxngException (this, "grammar must have at least one start component.");
561 assembledStart = ((RelaxngStart)starts [0]).Pattern;
562 for (int i=1; i<starts.Count; i++) {
563 RelaxngPattern p2 = ((RelaxngStart) starts [i]).Pattern;;
564 if (combineStart == "interleave") {
565 RelaxngInterleave intlv = new RelaxngInterleave ();
566 intlv.Patterns.Add (assembledStart);
567 intlv.Patterns.Add (p2);
568 assembledStart = intlv;
570 RelaxngChoice c = new RelaxngChoice ();
571 c.Patterns.Add (assembledStart);
578 // 4.assemble defines
579 foreach (RelaxngDefine def in defs) {
580 string combine = combineDefs [def.Name] as string;
582 assembledDefs [def.Name] as RdpPattern;
583 RdpPattern p2 = def.Compile (this);
585 if (combine == "interleave") {
586 assembledDefs [def.Name] =
587 new RdpInterleave (p1, p2);
589 assembledDefs [def.Name] =
590 new RdpChoice (p1, p2);
593 assembledDefs [def.Name] = p2;
599 // check combine attributes.
600 private void CheckCombine (ref bool haveHead, ref string combine, string newCombine, string targetSpec)
602 switch (newCombine) {
604 if (combine == "choice")
605 throw new RelaxngException (this, "\"combine\" was already specified \"choice\"");
607 combine = "interleave";
610 if (combine == "interleave")
611 throw new RelaxngException (this, "\"combine\" was already specified \"interleave\"");
617 throw new RelaxngException (this, String.Format ("There was already \"{0}\" element without \"combine\" attribute.", targetSpec));