Merge pull request #2545 from ermshiperete/Xamarin-24974
[mono.git] / mcs / class / Commons.Xml.Relaxng / Commons.Xml.Relaxng / RelaxngGrammar.cs
1 //
2 // Commons.Xml.Relaxng.RelaxngGrammar.cs
3 //
4 // Author:
5 //      Atsushi Enomoto <ginga@kit.hi-ho.ne.jp>
6 //
7 // 2003 Atsushi Enomoto "No rights reserved."
8 //
9 // Copyright (c) 2004 Novell Inc.
10 // All rights reserved
11 //
12
13 //
14 // Permission is hereby granted, free of charge, to any person obtaining
15 // a copy of this software and associated documentation files (the
16 // "Software"), to deal in the Software without restriction, including
17 // without limitation the rights to use, copy, modify, merge, publish,
18 // distribute, sublicense, and/or sell copies of the Software, and to
19 // permit persons to whom the Software is furnished to do so, subject to
20 // the following conditions:
21 // 
22 // The above copyright notice and this permission notice shall be
23 // included in all copies or substantial portions of the Software.
24 // 
25 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
26 // EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
27 // MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
28 // NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
29 // LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
30 // OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
31 // WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
32 //
33 using System;
34 using System.Collections;
35 using System.IO;
36 using System.Net;
37 using System.Xml;
38 using Commons.Xml.Relaxng.Derivative;
39 using Commons.Xml.Relaxng.Rnc;
40
41 namespace Commons.Xml.Relaxng
42 {
43         public class RelaxngGrammar : RelaxngPattern
44         {
45                 // field
46                 public static string NamespaceURI =
47                         "http://relaxng.org/ns/structure/1.0";
48
49                 // Parser condition: it is used to resolve "included" source
50                 bool isSourceRnc;
51
52                 // object model fields
53                 string defaultNamespace;
54                 RelaxngGrammarContentList starts = new RelaxngGrammarContentList ();
55                 RelaxngGrammarContentList defs = new RelaxngGrammarContentList ();
56                 RelaxngGrammarContentList includes = new RelaxngGrammarContentList ();
57                 RelaxngGrammarContentList divs = new RelaxngGrammarContentList ();
58
59                 // compiled fields.
60                 RdpPattern startPattern;
61
62                 // compile cache fields.
63                 Hashtable assembledDefs = new Hashtable (); // [defName] = RelaxngDefine
64                 RelaxngPattern assembledStart;
65                 RdpPattern compiledStart;
66                 Hashtable elementReplacedDefs = new Hashtable ();
67
68                 Hashtable includedUris = new Hashtable ();
69                 RelaxngGrammar parentGrammar;
70                 Hashtable refPatterns = new Hashtable (); // key = RdpPattern of assembledDefs
71
72                 // only for checkRecursion()
73                 Hashtable checkedDefs = new Hashtable ();
74
75                 // this should be checked after its compilation finished to complete
76                 // missing-at-the-tracking patterns (especially of parent grammars).
77                 // key = RdpPattern, value = ArrayList of unresolvedPatterns.
78                 ArrayList unresolvedPatterns = new ArrayList ();
79
80                 // contents key = RdpElement and value = name of the parent define.
81                 private Hashtable ElementDefMap = new Hashtable ();
82
83                 // Public
84
85                 public RelaxngGrammar ()
86                 {
87                 }
88
89                 private void ResetCompileState ()
90                 {
91                         startPattern = null;
92                         assembledDefs.Clear ();
93                         assembledStart = null;
94                         compiledStart = null;
95                         elementReplacedDefs.Clear ();
96                         includedUris.Clear ();
97                         parentGrammar = null;
98                         refPatterns.Clear ();
99                         checkedDefs.Clear ();
100                         unresolvedPatterns.Clear ();
101                         ElementDefMap.Clear ();
102                 }
103
104                 internal bool IsSourceCompactSyntax {
105                         get { return isSourceRnc; }
106                         set { isSourceRnc = value; }
107                 }
108
109                 internal RelaxngGrammar ParentGrammar {
110                         get { return parentGrammar; }
111                         set { parentGrammar = value; }
112                 }
113
114                 internal RelaxngDatatypeProvider Provider {
115                         get { return (base.DataProvider == null) ? (parentGrammar != null ? parentGrammar.Provider : base.DataProvider) : base.DataProvider; }
116                         set { base.DataProvider = value; }
117                 }
118
119                 public override RelaxngPatternType PatternType {
120                         get { return RelaxngPatternType.Grammar; }
121                 }
122
123                 public string DefaultNamespace {
124                         get { return defaultNamespace; }
125                         set { defaultNamespace = value; }
126                 }
127
128                 public RelaxngGrammarContentList Starts {
129                         get { return starts; }
130                 }
131
132                 public RelaxngGrammarContentList Defines {
133                         get { return defs; }
134                 }
135
136                 public RelaxngGrammarContentList Includes {
137                         get { return includes; }
138                 }
139
140                 public RelaxngGrammarContentList Divs {
141                         get { return divs; }
142                 }
143
144                 public override void Write (XmlWriter writer)
145                 {
146                         writer.WriteStartElement ("", "grammar", RelaxngGrammar.NamespaceURI);
147                         if (defaultNamespace != null)
148                                 writer.WriteAttributeString ("ns", defaultNamespace);
149                         foreach (RelaxngStart start in Starts)
150                                 start.Write (writer);
151                         foreach (RelaxngDefine define in Defines)
152                                 define.Write (writer);
153                         foreach (RelaxngInclude include in Includes)
154                                 include.Write (writer);
155                         foreach (RelaxngDiv div in Divs)
156                                 div.Write (writer);
157                         writer.WriteEndElement ();
158                 }
159
160                 internal override void WriteRnc (RncWriter writer)
161                 {
162                         writer.WriteGrammar (this);
163                 }
164
165                 internal Hashtable IncludedUris {
166                         get { return includedUris; }
167                 }
168
169                 // Internal
170                 internal override void CheckConstraints ()
171                 {
172                         // do nothing here.
173                 }
174
175                 internal void CheckIncludeRecursion (string href)
176                 {
177                         if (this.includedUris [href] != null)
178                                 // FIXME: fill line info
179                                 throw new RelaxngException ("Include recursion found. href: " + href);
180                         if (parentGrammar != null)
181                                 parentGrammar.CheckIncludeRecursion (href);
182                 }
183
184                 // Compile from this simplified syntax to derivatives.
185                 internal override RdpPattern Compile (RelaxngGrammar grammar)
186                 {
187                         ResetCompileState ();
188
189                         parentGrammar = grammar;
190
191                         // First, process includes and divs. RELAX NG 4.1 - 4.15.
192                         ArrayList compiledDivs = new ArrayList ();
193                         foreach (RelaxngInclude inc in includes)
194                                 compiledDivs.Add (inc.Compile (this));
195                         compiledDivs.AddRange (divs);
196                         foreach (RelaxngDiv div in compiledDivs)
197                                 div.Compile (this);
198
199                         // Check constraints. RELAX NG 4.16
200                         foreach (RelaxngStart start in starts)
201                                 start.Pattern.CheckConstraints ();
202                         foreach (RelaxngDefine define in defs)
203                                 foreach (RelaxngPattern p in define.Patterns)
204                                         p.CheckConstraints ();
205
206                         // Assemble combine into the same name defines/start.
207                         // see RELAX NG 4.17.
208                         AssembleCombine ();
209
210                         // 4.18 : <grammar> must have at least one <start>.
211                         if (assembledStart == null)
212                                 throw new RelaxngException ("A grammar elements must contain at least one start element.");
213                         compiledStart = assembledStart.Compile (this);
214
215                         // Assemble all define components into top grammar and
216                         // return start patterns for descendant grammars.
217                         // see RELAX NG 4.18.
218                         CollectGrammars ();
219                         if (parentGrammar != null)
220                                 return compiledStart;
221                         assembledStart = null; // no use anymore
222
223                         // 4.19 (a) remove non-reachable defines
224 /*
225                         compiledStart.MarkReachableDefs ();
226                         ArrayList tmp = new ArrayList ();
227                         foreach (DictionaryEntry entry in this.assembledDefs)
228                                 if (!reachableDefines.ContainsKey (entry.Key))
229                                         tmp.Add (entry.Key);
230                         foreach (string key in tmp)
231                                 assembledDefs.Remove (key);
232 */
233                         // 4.19 (b) check illegal recursion
234                         CheckRecursion (compiledStart, 0);
235                         // here we collected element-replaced definitions
236                         foreach (DictionaryEntry entry in elementReplacedDefs)
237                                 assembledDefs.Add (entry.Key, entry.Value);
238                         startPattern = compiledStart;
239                         // 4.20,21 reduce notAllowed and empty.
240                         bool b;
241                         do {
242                                 b = false;
243                                 startPattern = startPattern.ReduceEmptyAndNotAllowed (ref b, new Hashtable ());
244                         } while (b);
245
246                         Hashtable ht = new Hashtable ();
247                         startPattern.setInternTable (ht);
248
249                         // Check Constraints: RELAX NG spec 7
250                         // 7.1.1-4, 7.3, 7.4
251                         startPattern.CheckConstraints (false, false, false, false, false, false);
252                         // 7.1.5
253                         CheckStartPatternContent (startPattern);
254
255                         // 4.19 (c) expandRef - actual replacement
256                         startPattern = compiledStart.ExpandRef (assembledDefs);
257
258                         // 7.2
259                         RdpContentType ct = startPattern.ContentType;
260
261                         // return its start pattern.
262                         IsCompiled = true;
263                         return startPattern;
264                 }
265
266                 private void CheckStartPatternContent (RdpPattern p)
267                 {
268                         switch (p.PatternType) {
269                         case RelaxngPatternType.Ref:
270                                 CheckStartPatternContent (((RdpUnresolvedRef) p).RefPattern);
271                                 break;
272                         case RelaxngPatternType.Element:
273                                 break;
274                         case RelaxngPatternType.Choice:
275                                 RdpChoice c = p as RdpChoice;
276                                 CheckStartPatternContent (c.LValue);
277                                 CheckStartPatternContent (c.RValue);
278                                 break;
279                         case RelaxngPatternType.NotAllowed:
280                                 break;
281                         default:
282                                 // FIXME: fill line info
283                                 throw new RelaxngException ("Start pattern contains an invalid content pattern.");
284                         }
285                 }
286
287                 Hashtable reachableDefines = new Hashtable ();
288
289                 // for step 4.19
290                 internal void MarkReacheableDefine (string name)
291                 {
292                         if (reachableDefines.ContainsKey (name))
293                                 return;
294                         RdpPattern p = assembledDefs [name] as RdpPattern;
295                         reachableDefines.Add (name, p);
296                         p.MarkReachableDefs ();
297                 }
298
299                 // 4.19 (b)
300                 private void CheckRecursion (RdpPattern p, int depth)
301                 {
302
303                         RdpAbstractBinary binary = p as RdpAbstractBinary;
304                         if (binary != null) {
305                                 // choice, interleave, group
306                                 CheckRecursion (binary.LValue, depth);
307                                 CheckRecursion (binary.RValue, depth);
308                                 return;
309                         }
310                         RdpAbstractSingleContent single = p as RdpAbstractSingleContent;
311                         if (single != null) {
312                                 CheckRecursion (single.Child, depth);
313                                 return;
314                         }
315
316                         switch (p.PatternType) {
317                         case RelaxngPatternType.Ref:
318                                 // get checkRecursionDepth from table.
319                                 int checkRecursionDepth = -1;
320                                 object checkedDepth = checkedDefs [p];
321                                 if (checkedDepth != null)
322                                         checkRecursionDepth = (int) checkedDepth;
323                                 // get refPattern
324                                 RdpUnresolvedRef pref = p as RdpUnresolvedRef;
325                                 RelaxngGrammar target = pref.TargetGrammar;
326                                 RdpPattern refPattern = pref.RefPattern;
327                                 if (refPattern == null)
328                                         // FIXME: fill line info
329                                         throw new RelaxngException ("No matching define found for " + pref.Name);
330
331                                 if (checkRecursionDepth == -1) {
332                                         checkedDefs [p] = depth;
333 /*test*/                                        if (refPattern.PatternType != RelaxngPatternType.Element)
334                                                 CheckRecursion (refPattern, depth);
335                                         checkedDefs [p] = -2;
336                                 }
337                                 else if (depth == checkRecursionDepth)
338                                         // FIXME: fill line info
339                                         throw new RelaxngException (String.Format ("Detected illegal recursion. Ref name is {0}.", pref.Name));
340
341                                 break;
342
343                         case RelaxngPatternType.Attribute:
344                                 CheckRecursion (((RdpAttribute) p).Children, depth);
345                                 break;
346
347                         case RelaxngPatternType.DataExcept:
348                                 CheckRecursion (((RdpDataExcept) p).Except, depth);
349                                 break;
350
351                         case RelaxngPatternType.Element:
352                                 RdpElement el = p as RdpElement;
353                                 CheckRecursion (el.Children, depth + 1);        // +1
354                                 break;
355                         case RelaxngPatternType.List:
356                                 CheckRecursion (((RdpList) p).Child, depth);
357                                 break;
358                         }
359                 }
360
361                 // 4.18
362                 private void CollectGrammars ()
363                 {
364                         // collect ref and parentRef for each define.
365
366                         // FIXME: This should be assembledStart.
367                         CheckReferences (compiledStart);
368                         FixupReference ();
369
370                         foreach (string name in assembledDefs.Keys) {
371                                 RdpPattern p = (RdpPattern) assembledDefs [name];
372                                 CheckReferences (p);
373                                 FixupReference ();
374                         }
375
376                         // If it is child of any other pattern:
377                         // * Remove all definitions under descendant grammars,
378                         //   replacing ref names, and
379                         // * Then return its start pattern.
380                         if (parentGrammar != null) {
381                                 // TODO: reachable check is incomplete.
382                                 foreach (string name in assembledDefs.Keys) {
383                                         ArrayList al = 
384                                                 refPatterns [assembledDefs [name] ] as ArrayList;
385                                         if (al == null)
386                                                 continue; // Not referenced.
387
388                                         // At this point, parent grammar doesn't 
389                                         // collect assembledDefs as yet
390                                         string uname = GetUniqueName (name, parentGrammar);
391                                         parentGrammar.assembledDefs [uname] = assembledDefs [name];
392                                 }
393                         }
394                 }
395
396                 private static string GetUniqueName (string name, RelaxngGrammar grammar)
397                 {
398                         foreach (RelaxngDefine def in grammar.Defines)
399                                 if (def.Name == name)
400                                         return GetUniqueName (name + '_', grammar);
401                         return name;
402                 }
403
404                 private void FixupReference ()
405                 {
406                         foreach (RdpUnresolvedRef pref in this.unresolvedPatterns) {
407                                 RdpPattern defP = assembledDefs [pref.Name] as RdpPattern;
408                                 if (defP == null)
409                                         // FIXME: fill line info
410                                         throw new RelaxngException (String.Format ("Target definition was not found: {0}", pref.Name));
411                                 ArrayList al = refPatterns [defP] as ArrayList;
412                                 if (al == null) {
413                                         al = new ArrayList ();
414                                         refPatterns [defP] = al;
415                                 }
416                                 al.Add (pref);
417                         }
418                         this.unresolvedPatterns.Clear ();
419                 }
420
421                 private void replaceDefines (string name, ArrayList al)
422                 {
423                         int idx = 0;
424                         while (true) {
425                                 string newName = "define" + idx;
426                                 if (parentGrammar.assembledDefs [newName] == null) {
427                                         parentGrammar.assembledDefs [newName] = 
428                                                 assembledDefs [name];
429                                         foreach (RdpUnresolvedRef pref in al)
430                                                 pref.Name = newName;
431                                         break;
432                                 }
433                                 idx++;
434                         }
435                 }
436
437                 // remove ref and parentRef.
438                 // add new defines for each elements.
439                 private void CheckReferences (RdpPattern p)
440                 {
441                         RdpAbstractBinary binary = p as RdpAbstractBinary;
442                         if (binary != null) {
443                                 // choice, interleave, group
444                                 CheckReferences (binary.LValue);
445                                 CheckReferences (binary.RValue);
446                                 return;
447                         }
448                         RdpAbstractSingleContent single = p as RdpAbstractSingleContent;
449                         if (single != null) {
450                                 CheckReferences (single.Child);
451                                 return;
452                         }
453
454                         switch (p.PatternType) {
455                         case RelaxngPatternType.Ref:
456                                 // FIXME: This should not re-expand ref
457                                 RdpUnresolvedRef pref = p as RdpUnresolvedRef;
458                                 if (pref.RefPattern != null)
459                                         break;
460
461                                 RelaxngGrammar target = pref.TargetGrammar;
462                                 if (target == null)
463                                         // FIXME: fill line info
464                                         throw new RelaxngException ("Referenced definition was not found.");
465                                 RdpPattern defP = target.assembledDefs [pref.Name] as RdpPattern;
466                                 if (defP == null)
467                                         target.unresolvedPatterns.Add (p);
468                                 else {
469                                         ArrayList al = target.refPatterns [defP] as ArrayList;
470                                         if (al == null) {
471                                                 al = new ArrayList ();
472                                                 target.refPatterns [defP] = al;
473                                         }
474                                         al.Add (p);
475                                         pref.RefPattern = defP;
476                                 }
477                                 break;
478
479                         case RelaxngPatternType.Attribute:
480                                 CheckReferences (((RdpAttribute) p).Children);
481                                 break;
482
483                         case RelaxngPatternType.DataExcept:
484                                 CheckReferences (((RdpDataExcept) p).Except);
485                                 break;
486
487                         case RelaxngPatternType.Element:
488                                 RdpElement el = p as RdpElement;
489                                 CheckReferences (el.Children);
490                                 string name = ElementDefMap [el] as string;
491                                 if (name == null) {
492                                         // add new define
493                                         int idx = 0;
494                                         string newName = "element0";
495                                         if (el.NameClass is RdpName)
496                                                 newName = ((RdpName) el.NameClass).LocalName;
497                                         while (true) {
498                                                 if (assembledDefs [newName] == null) {
499                                                         elementReplacedDefs [newName] = el.Children;
500                                                         break;
501                                                 }
502                                                 newName = "element" + ++idx;
503                                         }
504                                         ElementDefMap [el] = newName;
505                                 }
506                                 // Even though the element is replaced with ref,
507                                 // derivative of ref is RdpElement in fact...
508                                 break;
509
510                         case RelaxngPatternType.List:
511                                 CheckReferences (((RdpList) p).Child);
512                                 break;
513
514                         case RelaxngPatternType.Empty:
515                         case RelaxngPatternType.NotAllowed:
516                         case RelaxngPatternType.Text:
517                         case RelaxngPatternType.Value:
518                                 break;
519
520                         //case RelaxngPatternType.ExternalRef:
521                         //case RelaxngPatternType.Include:
522                         // Mixed, Optional, ZeroOrMore are already removed.
523                         // Choice, Group, Interleave, OneOrMore are already proceeded.
524                         }
525                 }
526
527                 #region 4.17 - Combine
528                 private void AssembleCombine ()
529                 {
530                         // calculate combines.
531                         bool haveHeadStart = false;
532                         string combineStart = null;
533                         Hashtable haveHeadDefs = new Hashtable ();
534                         Hashtable combineDefs = new Hashtable ();
535
536                         // 1.calculate combine for starts.
537                         foreach (RelaxngStart start in starts)
538                                 CheckCombine (ref haveHeadStart, 
539                                         ref combineStart, start.Combine, "start");
540                         // 2.calculate combine for defines.
541                         foreach (RelaxngDefine def in defs) {
542                                 bool haveHead = 
543                                         haveHeadDefs.ContainsKey (def.Name) ?
544                                         haveHead = (bool) haveHeadDefs [def.Name]
545                                         : false;
546                                 string combine = combineDefs [def.Name] as string;
547                                 CheckCombine (ref haveHead, ref combine,
548                                         def.Combine, String.Format ("define name={0}", def.Name));
549                                 haveHeadDefs [def.Name] = haveHead;
550                                 combineDefs [def.Name] = combine;
551                                 continue;
552                         }
553
554                         // assemble starts and defines with "combine" attribute.
555
556                         // 3.assemble starts.
557                         if (starts.Count == 0) {
558                                 if (ParentGrammar == null)
559                                         throw new RelaxngException (this, "grammar must have at least one start component.");
560                         } else {
561                                 assembledStart = ((RelaxngStart)starts [0]).Pattern;
562                                 for (int i=1; i<starts.Count; i++) {
563                                         RelaxngPattern p2 = ((RelaxngStart) starts [i]).Pattern;;
564                                         if (combineStart == "interleave") {
565                                                 RelaxngInterleave intlv = new RelaxngInterleave ();
566                                                 intlv.Patterns.Add (assembledStart);
567                                                 intlv.Patterns.Add (p2);
568                                                 assembledStart = intlv;
569                                         } else {
570                                                 RelaxngChoice c = new RelaxngChoice ();
571                                                 c.Patterns.Add (assembledStart);
572                                                 c.Patterns.Add (p2);
573                                                 assembledStart = c;
574                                         }
575                                 }
576                         }
577
578                         // 4.assemble defines
579                         foreach (RelaxngDefine def in defs) {
580                                 string combine = combineDefs [def.Name] as string;
581                                 RdpPattern p1 = 
582                                         assembledDefs [def.Name] as RdpPattern;
583                                 RdpPattern p2 = def.Compile (this);
584                                 if (p1 != null) {
585                                         if (combine == "interleave") {
586                                                 assembledDefs [def.Name] =
587                                                         new RdpInterleave (p1, p2);
588                                         } else {
589                                                 assembledDefs [def.Name] =
590                                                         new RdpChoice (p1, p2);
591                                         }
592                                 } else {
593                                         assembledDefs [def.Name] = p2;
594                                 }
595                         }
596
597                 }
598
599                 // check combine attributes.
600                 private void CheckCombine (ref bool haveHead, ref string combine, string newCombine, string targetSpec)
601                 {
602                         switch (newCombine) {
603                         case "interleave":
604                                 if (combine == "choice")
605                                         throw new RelaxngException (this, "\"combine\" was already specified \"choice\"");
606                                 else
607                                         combine = "interleave";
608                                 break;
609                         case "choice":
610                                 if (combine == "interleave")
611                                         throw new RelaxngException (this, "\"combine\" was already specified \"interleave\"");
612                                 else
613                                         combine = "choice";
614                                 break;
615                         case null:
616                                 if (haveHead)
617                                         throw new RelaxngException (this, String.Format ("There was already \"{0}\" element without \"combine\" attribute.", targetSpec));
618                                 haveHead = true;
619                                 break;
620                         }
621                 }
622                 #endregion
623         }
624 }