2005-05-22 Atsushi Enomoto <atsushi@ximian.com>
[mono.git] / mcs / class / System.XML / System.Xml.Schema / XmlSchemaInference.cs
1 //
2 // XmlSchemaInference.cs
3 //
4 // Author:
5 //      Atsushi Enomoto <atsushi@ximian.com>
6 //
7 // Copyright (C)2004 Novell Inc.
8 //
9
10 //
11 // Permission is hereby granted, free of charge, to any person obtaining
12 // a copy of this software and associated documentation files (the
13 // "Software"), to deal in the Software without restriction, including
14 // without limitation the rights to use, copy, modify, merge, publish,
15 // distribute, sublicense, and/or sell copies of the Software, and to
16 // permit persons to whom the Software is furnished to do so, subject to
17 // the following conditions:
18 // 
19 // The above copyright notice and this permission notice shall be
20 // included in all copies or substantial portions of the Software.
21 // 
22 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
23 // EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
24 // MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
25 // NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
26 // LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
27 // OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
28 // WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
29 //
30
31 #if NET_2_0
32
33 using System;
34 using System.Collections;
35 using System.Xml;
36 using System.Xml.Schema;
37
38 using QName = System.Xml.XmlQualifiedName;
39 using Form = System.Xml.Schema.XmlSchemaForm;
40 using Use = System.Xml.Schema.XmlSchemaUse;
41 using SOMList = System.Xml.Schema.XmlSchemaObjectCollection;
42 using SOMObject = System.Xml.Schema.XmlSchemaObject;
43 using Element = System.Xml.Schema.XmlSchemaElement;
44 using Attr = System.Xml.Schema.XmlSchemaAttribute;
45 using AttrGroup = System.Xml.Schema.XmlSchemaAttributeGroup;
46 using AttrGroupRef = System.Xml.Schema.XmlSchemaAttributeGroupRef;
47 using SimpleType = System.Xml.Schema.XmlSchemaSimpleType;
48 using ComplexType = System.Xml.Schema.XmlSchemaComplexType;
49 using SimpleModel = System.Xml.Schema.XmlSchemaSimpleContent;
50 using SimpleExt = System.Xml.Schema.XmlSchemaSimpleContentExtension;
51 using SimpleRst = System.Xml.Schema.XmlSchemaSimpleContentRestriction;
52 using ComplexModel = System.Xml.Schema.XmlSchemaComplexContent;
53 using ComplexExt = System.Xml.Schema.XmlSchemaComplexContentExtension;
54 using ComplexRst = System.Xml.Schema.XmlSchemaComplexContentRestriction;
55 using SimpleTypeRst = System.Xml.Schema.XmlSchemaSimpleTypeRestriction;
56 using SimpleList = System.Xml.Schema.XmlSchemaSimpleTypeList;
57 using SimpleUnion = System.Xml.Schema.XmlSchemaSimpleTypeUnion;
58 using SchemaFacet = System.Xml.Schema.XmlSchemaFacet;
59 using LengthFacet = System.Xml.Schema.XmlSchemaLengthFacet;
60 using MinLengthFacet = System.Xml.Schema.XmlSchemaMinLengthFacet;
61 using Particle = System.Xml.Schema.XmlSchemaParticle;
62 using Sequence = System.Xml.Schema.XmlSchemaSequence;
63 using Choice = System.Xml.Schema.XmlSchemaChoice;
64
65
66 namespace System.Xml.Schema
67 {
68         [MonoTODO ("merge primitive types; infer gYearMonth too; in some cases sequence should contain element whose minOccurs=0 (no obvious rules right now); reject some non-supported schema components")]
69         public class XmlSchemaInference
70         {
71                 public enum InferenceOption {
72                         Restricted,
73                         Relaxed,
74                 }
75
76                 InferenceOption occurrence = InferenceOption.Restricted;
77                 InferenceOption typeInference = InferenceOption.Restricted;
78
79                 public XmlSchemaInference ()
80                 {
81                 }
82
83                 public InferenceOption Occurrence {
84                         get { return occurrence; }
85                         set { occurrence = value; }
86                 }
87
88                 public InferenceOption TypeInference {
89                         get { return TypeInference; }
90                         set { typeInference = value; }
91                 }
92
93                 public XmlSchemaSet InferSchema (XmlReader xmlReader)
94                 {
95                         return InferSchema (xmlReader, new XmlSchemaSet ());
96                 }
97
98                 public XmlSchemaSet InferSchema (XmlReader xmlReader,
99                         XmlSchemaSet schemas)
100                 {
101                         return XsdInference.Process (xmlReader, schemas,
102                                 occurrence == InferenceOption.Relaxed,
103                                 typeInference == InferenceOption.Relaxed);
104                 }
105         }
106
107         class XsdInference
108         {
109                 public static XmlSchemaSet Process (XmlReader xmlReader, 
110                         XmlSchemaSet schemas,
111                         bool laxOccurrence,
112                         bool laxTypeInference)
113                 {
114                         XsdInference impl = new XsdInference (xmlReader,
115                                 schemas, laxOccurrence, laxTypeInference);
116                         impl.Run ();
117                         return impl.schemas;
118                 }
119
120                 public const string NamespaceXml =
121                         "http://www.w3.org/XML/1998/namespace";
122
123                 public const string NamespaceXmlns =
124                         "http://www.w3.org/2000/xmlns/";
125
126                 public const string XdtNamespace =
127                         "http://www.w3.org/2003/11/xpath-datatypes";
128
129                 static readonly QName QNameString = new QName (
130                         "string", XmlSchema.Namespace);
131
132                 static readonly QName QNameBoolean = new QName (
133                         "boolean", XmlSchema.Namespace);
134
135                 static readonly QName QNameAnyType = new QName (
136                         "anyType", XmlSchema.Namespace);
137
138                 static readonly QName QNameByte = new QName (
139                         "byte", XmlSchema.Namespace);
140
141                 static readonly QName QNameUByte = new QName (
142                         "unsignedByte", XmlSchema.Namespace);
143
144                 static readonly QName QNameShort = new QName (
145                         "short", XmlSchema.Namespace);
146
147                 static readonly QName QNameUShort = new QName (
148                         "unsignedShort", XmlSchema.Namespace);
149
150                 static readonly QName QNameInt = new QName (
151                         "int", XmlSchema.Namespace);
152
153                 static readonly QName QNameUInt = new QName (
154                         "unsignedInt", XmlSchema.Namespace);
155
156                 static readonly QName QNameLong = new QName (
157                         "long", XmlSchema.Namespace);
158
159                 static readonly QName QNameULong = new QName (
160                         "unsignedLong", XmlSchema.Namespace);
161
162                 static readonly QName QNameDecimal = new QName (
163                         "decimal", XmlSchema.Namespace);
164
165                 static readonly QName QNameUDecimal = new QName (
166                         "unsignedDecimal", XmlSchema.Namespace);
167
168                 static readonly QName QNameDouble = new QName (
169                         "double", XmlSchema.Namespace);
170
171                 static readonly QName QNameFloat = new QName (
172                         "float", XmlSchema.Namespace);
173
174                 static readonly QName QNameDateTime = new QName (
175                         "dateTime", XmlSchema.Namespace);
176
177                 static readonly QName QNameDuration = new QName (
178                         "duration", XmlSchema.Namespace);
179
180                 XmlReader source;
181                 XmlSchemaSet schemas;
182                 bool laxOccurrence;
183                 bool laxTypeInference;
184
185                 Hashtable newElements = new Hashtable ();
186                 Hashtable newAttributes = new Hashtable ();
187
188                 private XsdInference (XmlReader xmlReader, 
189                         XmlSchemaSet schemas, 
190                         bool laxOccurrence, 
191                         bool laxTypeInference)
192                 {
193                         this.source = xmlReader;
194                         this.schemas = schemas;
195                         this.laxOccurrence = laxOccurrence;
196                         this.laxTypeInference = laxTypeInference;
197                 }
198
199                 private void Run ()
200                 {
201                         // XmlSchemaSet need to be compiled.
202                         schemas.Compile ();
203
204                         // move to top-level element
205                         source.MoveToContent ();
206                         int depth = source.Depth;
207                         if (source.NodeType != XmlNodeType.Element)
208                                 throw new ArgumentException ("Argument XmlReader content is expected to be an element.");
209
210                         QName qname = new QName (source.LocalName,
211                                 source.NamespaceURI);
212                         Element el = GetGlobalElement (qname);
213                         if (el == null) {
214                                 el = CreateGlobalElement (qname);
215                                 InferElement (el, qname.Namespace, true);
216                         }
217                         else
218                                 InferElement (el, qname.Namespace, false);
219
220                         // finally compile again.
221                         schemas.Compile ();
222                 }
223
224                 private void IncludeXmlAttributes ()
225                 {
226                         if (schemas.Schemas (NamespaceXml).Count == 0)
227                                 // FIXME: do it from resources.
228                                 schemas.Add (NamespaceXml, 
229                                         "http://www.w3.org/2001/xml.xsd");
230                 }
231
232                 private void InferElement (Element el, string ns, bool isNew)
233                 {
234                         // Quick check for reference to another definition
235                         // (i.e. element ref='...' that should be redirected)
236                         if (el.RefName != QName.Empty) {
237                                 Element body = GetGlobalElement (el.RefName);
238                                 if (body == null) {
239                                         body = CreateElement (el.RefName);
240                                         InferElement (body, ns, true);
241                                 }
242                                 else
243                                         InferElement (body, ns, isNew);
244                                 return;
245                         }
246
247                         // Attributes
248                         if (source.MoveToFirstAttribute ()) {
249                                 InferAttributes (el, ns, isNew);
250                                 source.MoveToElement ();
251                         }
252
253                         // Content
254                         if (source.IsEmptyElement) {
255                                 InferAsEmptyElement (el, ns, isNew);
256                                 source.Read ();
257                                 source.MoveToContent ();
258                         }
259                         else {
260                                 InferContent (el, ns, isNew);
261                                 source.ReadEndElement ();
262                         }
263                         if (el.SchemaType == null &&
264                                 el.SchemaTypeName == QName.Empty)
265                                 el.SchemaTypeName = QNameString;
266                 }
267
268                 #region Attribute Inference
269
270                 private Hashtable CollectAttrTable (SOMList attList)
271                 {
272                         // get attribute definition table.
273                         Hashtable table = new Hashtable ();
274                         foreach (XmlSchemaObject obj in attList) {
275                                 Attr attr = obj as Attr;
276                                 if (attr == null)
277                                         throw Error (obj, String.Format ("Attribute inference only supports direct attribute definition. {0} is not supported.", obj.GetType ()));
278                                 if (attr.RefName != QName.Empty)
279                                         table.Add (attr.RefName, attr);
280                                 else
281                                         table.Add (new QName (attr.Name, ""),
282                                                 attr);
283                         }
284                         return table;
285                 }
286
287                 private void InferAttributes (Element el, string ns, bool isNew)
288                 {
289                         // Now this element is going to have complexType.
290                         // It currently not, then we have to replace it.
291                         ComplexType ct = null;
292                         SOMList attList = null;
293                         Hashtable table = null;
294
295                         do {
296                                 switch (source.NamespaceURI) {
297                                 case NamespaceXml:
298                                         if (schemas.Schemas (
299                                                 NamespaceXml) .Count == 0)
300                                                 IncludeXmlAttributes ();
301                                         break;
302                                 case XmlSchema.InstanceNamespace:
303                                         if (source.LocalName == "nil")
304                                                 el.IsNillable = true;
305                                         // all other xsi:* atts are ignored
306                                         continue;
307                                 case NamespaceXmlns:
308                                         continue;
309                                 }
310                                 if (ct == null) {
311                                         ct = ToComplexType (el);
312                                         attList = GetAttributes (ct);
313                                         table = CollectAttrTable (attList);
314                                 }
315                                 QName attrName = new QName (
316                                         source.LocalName, source.NamespaceURI);
317                                 Attr attr = table [attrName] as Attr;
318                                 if (attr == null) {
319                                         attList.Add (InferNewAttribute (
320                                                 attrName, isNew));
321                                 } else {
322                                         table.Remove (attrName);
323                                         if (attr.RefName != null &&
324                                                 attr.RefName != QName.Empty)
325                                                 continue; // just a reference
326                                         InferMergedAttribute (attr);
327                                 }
328                         } while (source.MoveToNextAttribute ());
329
330                         // mark all attr definitions that did not appear
331                         // as optional.
332                         if (table != null)
333                                 foreach (Attr attr in table.Values)
334                                         attr.Use = Use.Optional;
335                 }
336
337                 private XmlSchemaAttribute InferNewAttribute (
338                         QName attrName, bool isNewTypeDefinition)
339                 {
340                         Attr attr = null;
341                         bool mergedRequired = false;
342                         if (attrName.Namespace.Length > 0) {
343                                 // global attribute; might be already defined.
344                                 attr = GetGlobalAttribute (attrName) as Attr;
345                                 if (attr == null) {
346                                         attr = CreateGlobalAttribute (attrName);
347                                         attr.SchemaTypeName =
348                                                 InferSimpleType (source.Value);
349                                 } else {
350                                         InferMergedAttribute (attr);
351                                         mergedRequired =
352                                                 attr.Use == Use.Required;
353                                 }
354                                 attr = new Attr ();
355                                 attr.RefName = attrName;
356                         } else {
357                                 // local attribute
358                                 attr = new Attr ();
359                                 attr.Name = attrName.Name;
360                                 attr.SchemaTypeName =
361                                         InferSimpleType (source.Value);
362                         }
363                         if (!laxOccurrence &&
364                                 (isNewTypeDefinition || mergedRequired))
365                                 attr.Use = Use.Required;
366                         else
367                                 attr.Use = Use.Optional;
368
369                         return attr;
370                 }
371
372                 // validate string value agains attr and 
373                 // if invalid, then relax the type.
374                 private void InferMergedAttribute (Attr attr)
375                 {
376                         attr.SchemaTypeName = InferMergedType (source.Value,
377                                 attr.SchemaTypeName);
378                         attr.SchemaType = null;
379                 }
380
381                 private QName InferMergedType (string value, QName typeName)
382                 {
383                         // examine value against specified type and
384                         // if unacceptable, then return a relaxed type.
385
386                         SimpleType st = XmlSchemaType.GetBuiltInSimpleType (
387                                 typeName);
388                         if (st == null) // non-primitive type => see above.
389                                 return QNameString;
390                         do {
391                                 try {
392                                         st.Datatype.ParseValue (value,
393                                                 source.NameTable,
394                                                 source as IXmlNamespaceResolver);
395                                         return typeName;
396                                 } catch {
397                                         st = st.BaseXmlSchemaType as XmlSchemaSimpleType;
398                                         typeName = st != null ? st.QualifiedName : QName.Empty;
399                                 }
400                         } while (typeName != QName.Empty);
401                         return QNameString;
402                 }
403
404                 private SOMList GetAttributes (ComplexType ct)
405                 {
406                         if (ct.ContentModel == null)
407                                 return ct.Attributes;
408
409                         SimpleModel sc = ct.ContentModel as SimpleModel;
410                         if (sc != null) {
411                                 SimpleExt sce = sc.Content as SimpleExt;
412                                 if (sce != null)
413                                         return sce.Attributes;
414                                 SimpleRst scr = sc.Content as SimpleRst;
415                                 if (scr != null)
416                                         return scr.Attributes;
417                                 else
418                                         throw Error (sc, "Invalid simple content model.");
419                         }
420                         ComplexModel cc = ct.ContentModel as ComplexModel;
421                         if (cc != null) {
422                                 ComplexExt cce = cc.Content as ComplexExt;
423                                 if (cce != null)
424                                         return cce.Attributes;
425                                 ComplexRst ccr = cc.Content as ComplexRst;
426                                 if (ccr != null)
427                                         return ccr.Attributes;
428                                 else
429                                         throw Error (cc, "Invalid simple content model.");
430                         }
431                         throw Error (cc, "Invalid complexType. Should not happen.");
432                 }
433
434                 private ComplexType ToComplexType (Element el)
435                 {
436                         QName name = el.SchemaTypeName;
437                         XmlSchemaType type = el.SchemaType;
438
439                         // 1. element type is complex.
440                         ComplexType ct = type as ComplexType;
441                         if (ct != null)
442                                 return ct;
443
444                         // 2. reference to global complexType.
445                         XmlSchemaType globalType = schemas.GlobalTypes [name]
446                                 as XmlSchemaType;
447                         ct = globalType as ComplexType;
448                         if (ct != null)
449                                 return ct;
450
451                         ct = new ComplexType ();
452                         el.SchemaType = ct;
453                         el.SchemaTypeName = QName.Empty;
454
455                         // 3. base type name is xs:anyType or no specification.
456                         // <xs:complexType />
457                         if (name == QNameAnyType)
458                                 return ct;
459                         else if (type == null && name == QName.Empty)
460                                 return ct;
461
462                         SimpleModel sc = new SimpleModel ();
463                         ct.ContentModel = sc;
464
465                         // 4. type is simpleType
466                         //    -> extension of existing simple type.
467                         SimpleType st = type as SimpleType;
468                         if (st != null) {
469                                 SimpleRst scr = new SimpleRst ();
470                                 scr.BaseType = st;
471                                 sc.Content = scr;
472                                 return ct;
473                         }
474
475                         SimpleExt sce = new SimpleExt ();
476                         sc.Content = sce;
477
478                         // 5. type name points to primitive type
479                         //    -> simple extension of a primitive type
480                         st = XmlSchemaType.GetBuiltInSimpleType (name);
481                         if (st != null) {
482                                 sce.BaseTypeName = name;
483                                 return ct;
484                         }
485
486                         // 6. type name points to global simpleType.
487                         st = globalType as SimpleType;
488                         if (st != null) {
489                                 sce.BaseTypeName = name;
490                                 return ct;
491                         }
492
493                         throw Error (el, "Unexpected schema component that contains simpleTypeName that could not be resolved.");
494                 }
495
496                 #endregion
497
498                 #region Element Type
499
500                 private void InferAsEmptyElement (Element el, string ns,
501                         bool isNew)
502                 {
503                         ComplexType ct = el.SchemaType as ComplexType;
504                         if (ct != null) {
505                                 SimpleModel sm =
506                                         ct.ContentModel as SimpleModel;
507                                 if (sm != null) {
508                                         ToEmptiableSimpleContent (sm, isNew);
509                                         return;
510                                 }
511
512                                 ComplexModel cm = ct.ContentModel
513                                         as ComplexModel;
514                                 if (cm != null) {
515                                         ToEmptiableComplexContent (cm, isNew);
516                                         return;
517                                 }
518
519                                 if (ct.Particle != null)
520                                         ct.Particle.MinOccurs = 0;
521                                 return;
522                         }
523                         SimpleType st = el.SchemaType as SimpleType;
524                         if (st != null) {
525                                 st = MakeBaseTypeAsEmptiable (st);
526                                 switch (st.QualifiedName.Namespace) {
527                                 case XmlSchema.Namespace:
528                                 case XdtNamespace:
529                                         el.SchemaTypeName = st.QualifiedName;
530                                         break;
531                                 default:
532                                         el.SchemaType =st;
533                                         break;
534                                 }
535                         }
536                 }
537
538                 private SimpleType MakeBaseTypeAsEmptiable (SimpleType st)
539                 {
540                         switch (st.QualifiedName.Namespace) {
541                         case XmlSchema.Namespace:
542                         case XdtNamespace:
543                                 // If a primitive type
544                                 return XmlSchemaType.GetBuiltInSimpleType (
545                                         XmlTypeCode.String);
546                         }
547                         SimpleTypeRst str = st.Content as SimpleTypeRst;
548                         if (str != null) {
549                                 ArrayList al = null;
550                                 foreach (SchemaFacet f in str.Facets) {
551                                         if (f is LengthFacet ||
552                                                 f is MinLengthFacet) {
553                                                 if (al == null)
554                                                         al = new ArrayList ();
555                                                 al.Add (f);
556                                         }
557                                 }
558                                 foreach (SchemaFacet f in al)
559                                         str.Facets.Remove (f);
560                                 if (str.BaseType != null)
561                                         str.BaseType =
562                                                 MakeBaseTypeAsEmptiable (st);
563                                 else
564                                         // It might have a reference to an
565                                         // external simple type, but there is
566                                         // no assurance that any of those
567                                         // external types allow an empty
568                                         // string. So just set base type as
569                                         // xs:string.
570                                         str.BaseTypeName = QNameString;
571                         } // union/list can have empty string value.
572
573                         return st;
574                 }
575
576                 private void ToEmptiableSimpleContent (
577                         SimpleModel sm, bool isNew)
578                 {
579                         SimpleExt se = sm.Content as SimpleExt;
580                         if (se != null)
581                                 se.BaseTypeName = QNameString;
582                         else {
583                                 SimpleRst sr = sm.Content
584                                         as SimpleRst;
585                                 if (sr == null)
586                                         throw Error (sm, "Invalid simple content model was passed.");
587                                 sr.BaseTypeName = QNameString;
588                                 sr.BaseType = null;
589                         }
590                 }
591
592                 private void ToEmptiableComplexContent (
593                         ComplexModel cm, bool isNew)
594                 {
595                         ComplexExt ce = cm.Content
596                                 as ComplexExt;
597                         if (ce != null) {
598                                 if (ce.Particle != null)
599                                         ce.Particle.MinOccurs = 0;
600                                 else if (ce.BaseTypeName != null &&
601                                         ce.BaseTypeName != QName.Empty &&
602                                         ce.BaseTypeName != QNameAnyType)
603                                         throw Error (ce, "Complex type content extension has a reference to an external component that is not supported.");
604                         }
605                         else {
606                                 ComplexRst cr = cm.Content
607                                         as ComplexRst;
608                                 if (cr == null)
609                                         throw Error (cm, "Invalid complex content model was passed.");
610                                 if (cr.Particle != null)
611                                         cr.Particle.MinOccurs = 0;
612                                 else if (cr.BaseTypeName != null &&
613                                         cr.BaseTypeName != QName.Empty &&
614                                         cr.BaseTypeName != QNameAnyType)
615                                         throw Error (cr, "Complex type content extension has a reference to an external component that is not supported.");
616                         }
617                 }
618
619                 private void InferContent (Element el, string ns, bool isNew)
620                 {
621                         source.Read ();
622                         source.MoveToContent ();
623                         switch (source.NodeType) {
624                         case XmlNodeType.EndElement:
625                                 InferAsEmptyElement (el, ns, isNew);
626                                 break;
627                         case XmlNodeType.Element:
628                                 InferComplexContent (el, ns, isNew);
629                                 break;
630                         case XmlNodeType.Text:
631                         case XmlNodeType.CDATA:
632                         case XmlNodeType.SignificantWhitespace:
633                                 InferTextContent (el, isNew);
634                                 source.MoveToContent ();
635                                 if (source.NodeType == XmlNodeType.Element)
636                                         goto case XmlNodeType.Element;
637                                 break;
638                         case XmlNodeType.Whitespace:
639                                 InferContent (el, ns, isNew); // skip and retry
640                                 break;
641                         }
642                 }
643
644                 private void InferComplexContent (Element el, string ns,
645                         bool isNew)
646                 {
647                         ComplexType ct = ToComplexType (el);
648                         ToComplexContentType (ct);
649
650                         int position = 0;
651                         bool consumed = false;
652
653                         do {
654                                 switch (source.NodeType) {
655                                 case XmlNodeType.Element:
656                                         Sequence s = PopulateSequence (ct);
657                                         Choice c = s.Items.Count > 0 ?
658                                                 s.Items [0] as Choice :
659                                                 null;
660                                         if (c != null)
661                                                 ProcessLax (c, ns);
662                                         else
663                                                 ProcessSequence (ct, s, ns,
664                                                         ref position,
665                                                         ref consumed,
666                                                         isNew);
667                                         source.MoveToContent ();
668                                         break;
669                                 case XmlNodeType.Text:
670                                 case XmlNodeType.CDATA:
671                                 case XmlNodeType.SignificantWhitespace:
672                                         MarkAsMixed (ct);
673                                         source.ReadString ();
674                                         source.MoveToContent ();
675                                         break;
676                                 case XmlNodeType.EndElement:
677                                         return; // finished
678                                 case XmlNodeType.None:
679                                         throw new NotImplementedException ("Internal Error: Should not happen.");
680                                 }
681                         } while (true);
682                 }
683
684                 private void InferTextContent (Element el, bool isNew)
685                 {
686                         string value = source.ReadString ();
687                         if (el.SchemaType == null) {
688                                 if (el.SchemaTypeName == QName.Empty) {
689                                         // no type information -> infer type
690                                         if (isNew)
691                                                 el.SchemaTypeName =
692                                                         InferSimpleType (
693                                                         value);
694                                         else
695                                                 el.SchemaTypeName =
696                                                         QNameString;
697                                         return;
698                                 }
699                                 switch (el.SchemaTypeName.Namespace) {
700                                 case XmlSchema.Namespace:
701                                 case XdtNamespace:
702                                         // existing primitive type
703                                         el.SchemaTypeName = InferMergedType (
704                                                 value, el.SchemaTypeName);
705                                         break;
706                                 default:
707                                         ComplexType ct = schemas.GlobalTypes [
708                                                 el.SchemaTypeName]
709                                                 as ComplexType;
710                                         // If it is complex, then just set
711                                         // mixed='true' (type cannot be set.)
712                                         // If it is simple, then we cannot
713                                         // make sure that string value is
714                                         // valid. So just set as xs:string.
715                                         if (ct != null)
716                                                 MarkAsMixed (ct);
717                                         else
718                                                 el.SchemaTypeName = QNameString;
719                                         break;
720                                 }
721                                 return;
722                         }
723                         // simpleType
724                         SimpleType st = el.SchemaType as SimpleType;
725                         if (st != null) {
726                                 // If simple, then (described above)
727                                 el.SchemaType = null;
728                                 el.SchemaTypeName = QNameString;
729                                 return;
730                         }
731
732                         // complexType
733                         ComplexType ect = el.SchemaType as ComplexType;
734
735                         SimpleModel sm = ect.ContentModel as SimpleModel;
736                         if (sm == null) {
737                                 // - ComplexContent
738                                 MarkAsMixed (ect);
739                                 return;
740                         }
741
742                         // - SimpleContent
743                         SimpleExt se = sm.Content as SimpleExt;
744                         if (se != null)
745                                 se.BaseTypeName = InferMergedType (value,
746                                         se.BaseTypeName);
747                         SimpleRst sr = sm.Content as SimpleRst;
748                         if (sr != null) {
749                                 sr.BaseTypeName = InferMergedType (value,
750                                         sr.BaseTypeName);
751                                 sr.BaseType = null;
752                         }
753                 }
754
755                 private void MarkAsMixed (ComplexType ct)
756                 {
757                         ComplexModel cm = ct.ContentModel as ComplexModel;
758                         if (cm != null)
759                                 cm.IsMixed = true;
760                         else
761                                 ct.IsMixed = true;
762                 }
763
764                 #endregion
765
766                 #region Particles
767
768                 private void ProcessLax (Choice c, string ns)
769                 {
770                         foreach (Particle p in c.Items) {
771                                 Element el = p as Element;
772                                 if (el == null)
773                                         throw Error (c, String.Format ("Target schema item contains unacceptable particle {0}. Only element is allowed here."));
774                                 if (ElementMatches (el, ns)) {
775                                         InferElement (el, ns, false);
776                                         return;
777                                 }
778                         }
779                         // append a new element particle to lax term.
780                         Element nel = new Element ();
781                         if (source.NamespaceURI == ns)
782                                 nel.Name = source.LocalName;
783                         else
784                                 nel.RefName = new QName (source.LocalName,
785                                         source.NamespaceURI);
786                         InferElement (nel, source.NamespaceURI, true);
787                         c.Items.Add (nel);
788                 }
789
790                 private bool ElementMatches (Element el, string ns)
791                 {
792                         bool matches = false;
793                         if (el.RefName != QName.Empty) {
794                                 if (el.RefName.Name == source.LocalName &&
795                                         el.RefName.Namespace ==
796                                         source.NamespaceURI)
797                                         matches = true;
798                         }
799                         else if (el.Name == source.LocalName &&
800                                 ns == source.NamespaceURI)
801                                         matches = true;
802                         return matches;
803                 }
804
805                 private void ProcessSequence (ComplexType ct, Sequence s,
806                         string ns, ref int position, ref bool consumed,
807                         bool isNew)
808                 {
809                         for (int i = 0; i < position; i++) {
810                                 Element iel = s.Items [i] as Element;
811                                 if (ElementMatches (iel, ns)) {
812                                         // Sequence element type violation
813                                         // might happen (might not, but we
814                                         // cannot backtrack here). So switch
815                                         // to sequence of choice* here.
816                                         ProcessLax (ToSequenceOfChoice (s), ns);
817                                         return;
818                                 }
819                         }
820
821                         if (s.Items.Count <= position) {
822                                 QName name = new QName (source.LocalName,
823                                         source.NamespaceURI);
824                                 Element nel = CreateElement (name);
825                                 if (laxOccurrence)
826                                         nel.MinOccurs = 0;
827                                 InferElement (nel, ns, true);
828                                 if (ns == name.Namespace)
829                                         s.Items.Add (nel);
830                                 else {
831                                         Element re = new Element ();
832                                         if (laxOccurrence)
833                                                 re.MinOccurs = 0;
834                                         re.RefName = name;
835                                         s.Items.Add (re);
836                                 }
837                                 consumed = true;
838                                 return;
839                         }
840                         Element el = s.Items [position] as Element;
841                         if (el == null)
842                                 throw Error (s, String.Format ("Target complex type content sequence has an unacceptable type of particle {0}", s.Items [position]));
843                         bool matches = ElementMatches (el, ns);
844                         if (matches) {
845                                 if (consumed)
846                                         el.MaxOccursString = "unbounded";
847                                 InferElement (el, source.NamespaceURI, false);
848                                 source.MoveToContent ();
849                                 switch (source.NodeType) {
850                                 case XmlNodeType.None:
851                                         if (source.NodeType ==
852                                                 XmlNodeType.Element)
853                                                 goto case XmlNodeType.Element;
854                                         else if (source.NodeType ==
855                                                 XmlNodeType.EndElement)
856                                                 goto case XmlNodeType.EndElement;
857                                         break;
858                                 case XmlNodeType.Element:
859                                         ProcessSequence (ct, s, ns, ref position,
860                                                 ref consumed, isNew);
861                                         break;
862                                 case XmlNodeType.Text:
863                                 case XmlNodeType.CDATA:
864                                 case XmlNodeType.SignificantWhitespace:
865                                         MarkAsMixed (ct);
866                                         source.ReadString ();
867                                         goto case XmlNodeType.None;
868                                 case XmlNodeType.Whitespace:
869                                         source.ReadString ();
870                                         goto case XmlNodeType.None;
871                                 case XmlNodeType.EndElement:
872                                         return;
873                                 default:
874                                         source.Read ();
875                                         break;
876                                 }
877                         }
878                         else {
879                                 if (consumed) {
880                                         position++;
881                                         consumed = false;
882                                         ProcessSequence (ct, s, ns,
883                                                 ref position, ref consumed,
884                                                 isNew);
885                                 }
886                                 else
887                                         ProcessLax (ToSequenceOfChoice (s), ns);
888                         }
889                 }
890
891                 // Note that it does not return the changed sequence.
892                 private Choice ToSequenceOfChoice (Sequence s)
893                 {
894                         Choice c = new Choice ();
895                         if (laxOccurrence)
896                                 c.MinOccurs = 0;
897                         c.MaxOccursString = "unbounded";
898                         foreach (Particle p in s.Items)
899                                 c.Items.Add (p);
900                         s.Items.Clear ();
901                         s.Items.Add (c);
902                         return c;
903                 }
904
905                 // It makes complexType not to have Simple content model.
906                 private void ToComplexContentType (ComplexType type)
907                 {
908                         SimpleModel sm = type.ContentModel as SimpleModel;
909                         if (sm == null)
910                                 return;
911
912                         SOMList atts = GetAttributes (type);
913                         foreach (SOMObject o in atts)
914                                 type.Attributes.Add (o);
915                         // FIXME: need to copy AnyAttribute.
916                         // (though not considered right now)
917                         type.ContentModel = null;
918                         type.IsMixed = true;
919                 }
920
921                 private Sequence PopulateSequence (ComplexType ct)
922                 {
923                         Particle p = PopulateParticle (ct);
924                         Sequence s = p as Sequence;
925                         if (s != null)
926                                 return s;
927                         else
928                                 throw Error (ct, String.Format ("Target complexType contains unacceptable type of particle {0}", p));
929                 }
930
931                 private Sequence CreateSequence ()
932                 {
933                         Sequence s = new Sequence ();
934                         if (laxOccurrence)
935                                 s.MinOccurs = 0;
936                         return s;
937                 }
938
939                 private Particle PopulateParticle (ComplexType ct)
940                 {
941                         if (ct.ContentModel == null) {
942                                 if (ct.Particle == null)
943                                         ct.Particle = CreateSequence ();
944                                 return ct.Particle;
945                         }
946                         ComplexModel cm = ct.ContentModel as ComplexModel;
947                         if (cm != null) {
948                                 ComplexExt  ce = cm.Content as ComplexExt;
949                                 if (ce != null) {
950                                         if (ce.Particle == null)
951                                                 ce.Particle = CreateSequence ();
952                                         return ce.Particle;
953                                 }
954                                 ComplexRst cr = cm.Content as ComplexRst;
955                                 if (cr != null) {
956                                         if (cr.Particle == null)
957                                                 cr.Particle = CreateSequence ();
958                                         return cr.Particle;
959                                 }
960                         }
961                         throw Error (ct, "Schema inference internal error. The complexType should have been converted to have a complex content.");
962                 }
963
964                 #endregion
965
966                 #region String Value
967
968                 // primitive type inference.
969                 // When running lax type inference, it just returns xs:string.
970                 private QName InferSimpleType (string value)
971                 {
972                         if (laxTypeInference)
973                                 return QNameString;
974
975                         switch (value) {
976                         // 0 and 1 are not infered as byte unlike MS.XSDInfer
977 //                      case "0":
978 //                      case "1":
979                         case "true":
980                         case "false":
981                                 return QNameBoolean;
982                         }
983                         try {
984                                 long dec = XmlConvert.ToInt64 (value);
985                                 if (byte.MinValue <= dec && dec <= byte.MaxValue)
986                                         return QNameUByte;
987                                 if (sbyte.MinValue <= dec && dec <= sbyte.MaxValue)
988                                         return QNameByte;
989                                 if (ushort.MinValue <= dec && dec <= ushort.MaxValue)
990                                         return QNameUShort;
991                                 if (short.MinValue <= dec && dec <= short.MaxValue)
992                                         return QNameShort;
993                                 if (uint.MinValue <= dec && dec <= uint.MaxValue)
994                                         return QNameUInt;
995                                 if (int.MinValue <= dec && dec <= int.MaxValue)
996                                         return QNameInt;
997                                 return QNameLong;
998                         } catch (Exception) {
999                         }
1000                         try {
1001                                 XmlConvert.ToUInt64 (value);
1002                                 return QNameULong;
1003                         } catch (Exception) {
1004                         }
1005                         try {
1006                                 XmlConvert.ToDecimal (value);
1007                                 return QNameDecimal;
1008                         } catch (Exception) {
1009                         }
1010                         try {
1011                                 double dbl = XmlConvert.ToDouble (value);
1012                                 if (float.MinValue <= dbl &&
1013                                         dbl <= float.MaxValue)
1014                                         return QNameFloat;
1015                                 else
1016                                         return QNameDouble;
1017                         } catch (Exception) {
1018                         }
1019                         try {
1020                                 // FIXME: also try DateTimeSerializationMode
1021                                 // and gYearMonth
1022                                 XmlConvert.ToDateTime (value);
1023                                 return QNameDateTime;
1024                         } catch (Exception) {
1025                         }
1026                         try {
1027                                 XmlConvert.ToTimeSpan (value);
1028                                 return QNameDuration;
1029                         } catch (Exception) {
1030                         }
1031
1032                         // xs:string
1033                         return QNameString;
1034                 }
1035
1036                 #endregion
1037
1038                 #region Utilities
1039
1040                 private Element GetGlobalElement (QName name)
1041                 {
1042                         Element el = newElements [name] as Element;
1043                         if (el == null)
1044                                 el = schemas.GlobalElements [name] as Element;
1045                         return el;
1046                 }
1047
1048                 private Attr GetGlobalAttribute (QName name)
1049                 {
1050                         Attr a = newElements [name] as Attr;
1051                         if (a == null)
1052                                 a = schemas.GlobalAttributes [name] as Attr;
1053                         return a;
1054                 }
1055
1056                 private Element CreateElement (QName name)
1057                 {
1058                         Element el = new Element ();
1059                         el.Name = name.Name;
1060                         return el;
1061                 }
1062
1063                 private Element CreateGlobalElement (QName name)
1064                 {
1065                         Element el = CreateElement (name);
1066                         XmlSchema schema = PopulateSchema (name.Namespace);
1067                         schema.Items.Add (el);
1068                         newElements.Add (name, el);
1069                         return el;
1070                 }
1071
1072                 private Attr CreateGlobalAttribute (QName name)
1073                 {
1074                         Attr attr = new Attr ();
1075                         XmlSchema schema = PopulateSchema (name.Namespace);
1076                         attr.Name = name.Name;
1077                         schema.Items.Add (attr);
1078                         newAttributes.Add (name, attr);
1079                         return attr;
1080                 }
1081
1082                 // Note that the return value never assures that all the
1083                 // components in the parameter ns must reside in it.
1084                 private XmlSchema PopulateSchema (string ns)
1085                 {
1086                         ICollection list = schemas.Schemas (ns);
1087                         if (list.Count > 0) {
1088                                 IEnumerator e = list.GetEnumerator ();
1089                                 e.MoveNext ();
1090                                 return (XmlSchema) e.Current;
1091                         }
1092                         XmlSchema s = new XmlSchema ();
1093                         if (ns != null && ns.Length > 0)
1094                                 s.TargetNamespace = ns;
1095                         s.ElementFormDefault = Form.Qualified;
1096                         s.AttributeFormDefault = Form.Unqualified;
1097                         schemas.Add (s);
1098                         return s;
1099                 }
1100
1101                 private XmlSchemaInferenceException Error (
1102                         XmlSchemaObject sourceObj,
1103                         string message)
1104                 {
1105                         // This override is mainly for schema component error.
1106                         return Error (sourceObj, false, message);
1107                 }
1108
1109                 private XmlSchemaInferenceException Error (
1110                         XmlSchemaObject sourceObj,
1111                         bool useReader,
1112                         string message)
1113                 {
1114                         string msg = String.Concat (
1115                                 message,
1116                                 sourceObj != null ?
1117                                         String.Format (". Related schema component is {0}",
1118                                                 sourceObj.SourceUri,
1119                                                 sourceObj.LineNumber,
1120                                                 sourceObj.LinePosition) :
1121                                         String.Empty,
1122                                 useReader ?
1123                                         String.Format (". {0}", source.BaseURI) :
1124                                         String.Empty);
1125
1126                         IXmlLineInfo li = source as IXmlLineInfo;
1127                         if (useReader && li != null)
1128                                 return new XmlSchemaInferenceException (
1129                                         msg, null, li.LineNumber,
1130                                         li.LinePosition);
1131                         else
1132                                 return new XmlSchemaInferenceException (msg);
1133                 }
1134
1135                 #endregion
1136         }
1137 }
1138
1139 #endif