2005-01-31 Zoltan Varga <vargaz@freemail.hu>
[mono.git] / mcs / class / System.XML / System.Xml.Schema / XmlSchemaInference.cs
1 //
2 // XmlSchemaInference.cs
3 //
4 // Author:
5 //      Atsushi Enomoto <atsushi@ximian.com>
6 //
7 // Copyright (C)2004 Novell Inc.
8 //
9
10 //
11 // Permission is hereby granted, free of charge, to any person obtaining
12 // a copy of this software and associated documentation files (the
13 // "Software"), to deal in the Software without restriction, including
14 // without limitation the rights to use, copy, modify, merge, publish,
15 // distribute, sublicense, and/or sell copies of the Software, and to
16 // permit persons to whom the Software is furnished to do so, subject to
17 // the following conditions:
18 // 
19 // The above copyright notice and this permission notice shall be
20 // included in all copies or substantial portions of the Software.
21 // 
22 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
23 // EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
24 // MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
25 // NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
26 // LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
27 // OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
28 // WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
29 //
30
31 #if NET_2_0
32
33 using System;
34 using System.Collections;
35 using System.Xml;
36 using System.Xml.Schema;
37
38 using QName = System.Xml.XmlQualifiedName;
39 using Form = System.Xml.Schema.XmlSchemaForm;
40 using Use = System.Xml.Schema.XmlSchemaUse;
41 using SOMList = System.Xml.Schema.XmlSchemaObjectCollection;
42 using SOMObject = System.Xml.Schema.XmlSchemaObject;
43 using Element = System.Xml.Schema.XmlSchemaElement;
44 using Attr = System.Xml.Schema.XmlSchemaAttribute;
45 using AttrGroup = System.Xml.Schema.XmlSchemaAttributeGroup;
46 using AttrGroupRef = System.Xml.Schema.XmlSchemaAttributeGroupRef;
47 using SimpleType = System.Xml.Schema.XmlSchemaSimpleType;
48 using ComplexType = System.Xml.Schema.XmlSchemaComplexType;
49 using SimpleModel = System.Xml.Schema.XmlSchemaSimpleContent;
50 using SimpleExt = System.Xml.Schema.XmlSchemaSimpleContentExtension;
51 using SimpleRst = System.Xml.Schema.XmlSchemaSimpleContentRestriction;
52 using ComplexModel = System.Xml.Schema.XmlSchemaComplexContent;
53 using ComplexExt = System.Xml.Schema.XmlSchemaComplexContentExtension;
54 using ComplexRst = System.Xml.Schema.XmlSchemaComplexContentRestriction;
55 using SimpleTypeRst = System.Xml.Schema.XmlSchemaSimpleTypeRestriction;
56 using SimpleList = System.Xml.Schema.XmlSchemaSimpleTypeList;
57 using SimpleUnion = System.Xml.Schema.XmlSchemaSimpleTypeUnion;
58 using SchemaFacet = System.Xml.Schema.XmlSchemaFacet;
59 using LengthFacet = System.Xml.Schema.XmlSchemaLengthFacet;
60 using MinLengthFacet = System.Xml.Schema.XmlSchemaMinLengthFacet;
61 using Particle = System.Xml.Schema.XmlSchemaParticle;
62 using Sequence = System.Xml.Schema.XmlSchemaSequence;
63 using Choice = System.Xml.Schema.XmlSchemaChoice;
64
65
66 namespace System.Xml.Schema
67 {
68         [MonoTODO ("merge primitive types; infer gYearMonth too; in some cases sequence should contain element whose minOccurs=0 (no obvious rules right now); reject some non-supported schema components")]
69         public class XmlSchemaInference
70         {
71                 public enum InferenceOption {
72                         Restricted,
73                         Relaxed,
74                 }
75
76                 InferenceOption occurrence = InferenceOption.Restricted;
77                 InferenceOption typeInference = InferenceOption.Restricted;
78
79                 public XmlSchemaInference ()
80                 {
81                 }
82
83                 public InferenceOption Occurrence {
84                         get { return occurrence; }
85                         set { occurrence = value; }
86                 }
87
88                 public InferenceOption TypeInference {
89                         get { return TypeInference; }
90                         set { typeInference = value; }
91                 }
92
93                 public XmlSchemaSet InferSchema (XmlReader xmlReader)
94                 {
95                         return InferSchema (xmlReader, new XmlSchemaSet ());
96                 }
97
98                 public XmlSchemaSet InferSchema (XmlReader xmlReader,
99                         XmlSchemaSet schemas)
100                 {
101                         return XsdInference.Process (xmlReader, schemas,
102                                 occurrence == InferenceOption.Relaxed,
103                                 typeInference == InferenceOption.Relaxed);
104                 }
105         }
106
107         class XsdInference
108         {
109                 public static XmlSchemaSet Process (XmlReader xmlReader, 
110                         XmlSchemaSet schemas,
111                         bool laxOccurence,
112                         bool laxTypeInference)
113                 {
114                         XsdInference impl = new XsdInference (xmlReader,
115                                 schemas, laxOccurence, laxTypeInference);
116                         impl.Run ();
117                         return impl.schemas;
118                 }
119
120                 public const string NamespaceXml =
121                         "http://www.w3.org/XML/1998/namespace";
122
123                 public const string NamespaceXmlns =
124                         "http://www.w3.org/2000/xmlns/";
125
126                 public const string XdtNamespace =
127                         "http://www.w3.org/2003/11/xpath-datatypes";
128
129                 static readonly QName QNameString = new QName (
130                         "string", XmlSchema.Namespace);
131
132                 static readonly QName QNameBoolean = new QName (
133                         "boolean", XmlSchema.Namespace);
134
135                 static readonly QName QNameAnyType = new QName (
136                         "anyType", XmlSchema.Namespace);
137
138                 static readonly QName QNameByte = new QName (
139                         "byte", XmlSchema.Namespace);
140
141                 static readonly QName QNameUByte = new QName (
142                         "unsignedByte", XmlSchema.Namespace);
143
144                 static readonly QName QNameShort = new QName (
145                         "short", XmlSchema.Namespace);
146
147                 static readonly QName QNameUShort = new QName (
148                         "unsignedShort", XmlSchema.Namespace);
149
150                 static readonly QName QNameInt = new QName (
151                         "int", XmlSchema.Namespace);
152
153                 static readonly QName QNameUInt = new QName (
154                         "unsignedInt", XmlSchema.Namespace);
155
156                 static readonly QName QNameLong = new QName (
157                         "long", XmlSchema.Namespace);
158
159                 static readonly QName QNameULong = new QName (
160                         "unsignedLong", XmlSchema.Namespace);
161
162                 static readonly QName QNameDecimal = new QName (
163                         "decimal", XmlSchema.Namespace);
164
165                 static readonly QName QNameUDecimal = new QName (
166                         "unsignedDecimal", XmlSchema.Namespace);
167
168                 static readonly QName QNameDouble = new QName (
169                         "double", XmlSchema.Namespace);
170
171                 static readonly QName QNameFloat = new QName (
172                         "float", XmlSchema.Namespace);
173
174                 static readonly QName QNameDateTime = new QName (
175                         "dateTime", XmlSchema.Namespace);
176
177                 static readonly QName QNameDuration = new QName (
178                         "duration", XmlSchema.Namespace);
179
180                 XmlReader source;
181                 XmlSchemaSet schemas;
182                 bool laxOccurence;
183                 bool laxTypeInference;
184
185                 Hashtable newElements = new Hashtable ();
186                 Hashtable newAttributes = new Hashtable ();
187
188                 private XsdInference (XmlReader xmlReader, 
189                         XmlSchemaSet schemas, 
190                         bool laxOccurence, 
191                         bool laxTypeInference)
192                 {
193                         this.source = xmlReader;
194                         this.schemas = schemas;
195                         this.laxOccurence = laxOccurence;
196                         this.laxTypeInference = laxTypeInference;
197                 }
198
199                 private void Run ()
200                 {
201                         // XmlSchemaSet need to be compiled.
202                         schemas.Compile ();
203
204                         // move to top-level element
205                         source.MoveToContent ();
206                         int depth = source.Depth;
207                         if (source.NodeType != XmlNodeType.Element)
208                                 throw new ArgumentException ("Argument XmlReader content is expected to be an element.");
209
210                         QName qname = new QName (source.LocalName,
211                                 source.NamespaceURI);
212                         Element el = GetGlobalElement (qname);
213                         if (el == null) {
214                                 el = CreateGlobalElement (qname);
215                                 InferElement (el, qname.Namespace, true);
216                         }
217                         else
218                                 InferElement (el, qname.Namespace, false);
219
220                         // finally compile again.
221                         schemas.Compile ();
222                 }
223
224                 private void IncludeXmlAttributes ()
225                 {
226                         if (schemas.Schemas (NamespaceXml).Count == 0)
227                                 // FIXME: do it from resources.
228                                 schemas.Add (NamespaceXml, 
229                                         "http://www.w3.org/2001/xml.xsd");
230                 }
231
232                 private void InferElement (Element el, string ns, bool isNew)
233                 {
234                         // Quick check for reference to another definition
235                         // (i.e. element ref='...' that should be redirected)
236                         if (el.RefName != QName.Empty) {
237                                 Element body = GetGlobalElement (el.RefName);
238                                 if (body == null) {
239                                         body = CreateElement (el.RefName);
240                                         InferElement (body, ns, true);
241                                 }
242                                 else
243                                         InferElement (body, ns, isNew);
244                                 return;
245                         }
246
247                         // Attributes
248                         if (source.MoveToFirstAttribute ()) {
249                                 InferAttributes (el, ns, isNew);
250                                 source.MoveToElement ();
251                         }
252
253                         // Content
254                         if (source.IsEmptyElement) {
255                                 InferAsEmptyElement (el, ns, isNew);
256                                 source.Read ();
257                                 source.MoveToContent ();
258                         }
259                         else {
260                                 InferContent (el, ns, isNew);
261                                 source.ReadEndElement ();
262                         }
263                         if (el.SchemaType == null &&
264                                 el.SchemaTypeName == QName.Empty)
265                                 el.SchemaTypeName = QNameString;
266                 }
267
268                 #region Attribute Inference
269
270                 private Hashtable CollectAttrTable (SOMList attList)
271                 {
272                         // get attribute definition table.
273                         Hashtable table = new Hashtable ();
274                         foreach (XmlSchemaObject obj in attList) {
275                                 Attr attr = obj as Attr;
276                                 if (attr == null)
277                                         throw Error (obj, String.Format ("Attribute inference only supports direct attribute definition. {0} is not supported.", obj.GetType ()));
278                                 if (attr.RefName != QName.Empty)
279                                         table.Add (attr.RefName, attr);
280                                 else
281                                         table.Add (new QName (attr.Name, ""),
282                                                 attr);
283                         }
284                         return table;
285                 }
286
287                 private void InferAttributes (Element el, string ns, bool isNew)
288                 {
289                         // Now this element is going to have complexType.
290                         // It currently not, then we have to replace it.
291                         ComplexType ct = null;
292                         SOMList attList = null;
293                         Hashtable table = null;
294
295                         do {
296                                 switch (source.NamespaceURI) {
297                                 case NamespaceXml:
298                                         if (schemas.Schemas (
299                                                 NamespaceXml) .Count == 0)
300                                                 IncludeXmlAttributes ();
301                                         break;
302                                 case XmlSchema.InstanceNamespace:
303                                         if (source.LocalName == "nil")
304                                                 el.IsNillable = true;
305                                         // all other xsi:* atts are ignored
306                                         continue;
307                                 case NamespaceXmlns:
308                                         continue;
309                                 }
310                                 if (ct == null) {
311                                         ct = ToComplexType (el);
312                                         attList = GetAttributes (ct);
313                                         table = CollectAttrTable (attList);
314                                 }
315                                 QName attrName = new QName (
316                                         source.LocalName, source.NamespaceURI);
317                                 Attr attr = table [attrName] as Attr;
318                                 if (attr == null) {
319                                         attList.Add (InferNewAttribute (
320                                                 attrName, isNew));
321                                 } else {
322                                         table.Remove (attrName);
323                                         if (attr.RefName != null &&
324                                                 attr.RefName != QName.Empty)
325                                                 continue; // just a reference
326                                         InferMergedAttribute (attr);
327                                 }
328                         } while (source.MoveToNextAttribute ());
329
330                         // mark all attr definitions that did not appear
331                         // as optional.
332                         if (table != null)
333                                 foreach (Attr attr in table.Values)
334                                         attr.Use = Use.Optional;
335                 }
336
337                 private XmlSchemaAttribute InferNewAttribute (
338                         QName attrName, bool isNewTypeDefinition)
339                 {
340                         Attr attr = null;
341                         bool mergedRequired = false;
342                         if (attrName.Namespace.Length > 0) {
343                                 // global attribute; might be already defined.
344                                 attr = GetGlobalAttribute (attrName) as Attr;
345                                 if (attr == null) {
346                                         attr = CreateGlobalAttribute (attrName);
347                                         attr.SchemaTypeName =
348                                                 InferSimpleType (source.Value);
349                                 } else {
350                                         InferMergedAttribute (attr);
351                                         mergedRequired =
352                                                 attr.Use == Use.Required;
353                                 }
354                                 attr = new Attr ();
355                                 attr.RefName = attrName;
356                         } else {
357                                 // local attribute
358                                 attr = new Attr ();
359                                 attr.Name = attrName.Name;
360                                 attr.SchemaTypeName =
361                                         InferSimpleType (source.Value);
362                         }
363                         if (!laxOccurence &&
364                                 (isNewTypeDefinition || mergedRequired))
365                                 attr.Use = Use.Required;
366                         else
367                                 attr.Use = Use.Optional;
368
369                         return attr;
370                 }
371
372                 // validate string value agains attr and 
373                 // if invalid, then relax the type.
374                 private void InferMergedAttribute (Attr attr)
375                 {
376                         attr.SchemaTypeName = InferMergedType (source.Value,
377                                 attr.SchemaTypeName);
378                         attr.SchemaType = null;
379                 }
380
381                 private QName InferMergedType (string value, QName typeName)
382                 {
383                         // examine value against specified type and
384                         // if unacceptable, then return a relaxed type.
385
386                         SimpleType st = XmlSchemaType.GetBuiltInSimpleType (
387                                 typeName);
388                         if (st == null) // non-primitive type => see above.
389                                 return QNameString;
390                         do {
391                                 try {
392                                         st.Datatype.ParseValue (value,
393                                                 source.NameTable,
394                                                 source as IXmlNamespaceResolver);
395                                         return typeName;
396                                 } catch {
397                                         st = st.BaseXmlSchemaType as XmlSchemaSimpleType;
398                                         typeName = st != null ? st.QualifiedName : QName.Empty;
399                                 }
400                         } while (typeName != QName.Empty);
401                         return QNameString;
402                 }
403
404                 private SOMList GetAttributes (ComplexType ct)
405                 {
406                         if (ct.ContentModel == null)
407                                 return ct.Attributes;
408
409                         SimpleModel sc = ct.ContentModel as SimpleModel;
410                         if (sc != null) {
411                                 SimpleExt sce = sc.Content as SimpleExt;
412                                 if (sce != null)
413                                         return sce.Attributes;
414                                 SimpleRst scr = sc.Content as SimpleRst;
415                                 if (scr != null)
416                                         return scr.Attributes;
417                                 else
418                                         throw Error (sc, "Invalid simple content model.");
419                         }
420                         ComplexModel cc = ct.ContentModel as ComplexModel;
421                         if (cc != null) {
422                                 ComplexExt cce = cc.Content as ComplexExt;
423                                 if (cce != null)
424                                         return cce.Attributes;
425                                 ComplexRst ccr = cc.Content as ComplexRst;
426                                 if (ccr != null)
427                                         return ccr.Attributes;
428                                 else
429                                         throw Error (cc, "Invalid simple content model.");
430                         }
431                         throw Error (cc, "Invalid complexType. Should not happen.");
432                 }
433
434                 private ComplexType ToComplexType (Element el)
435                 {
436                         QName name = el.SchemaTypeName;
437                         XmlSchemaType type = el.SchemaType;
438
439                         // 1. element type is complex.
440                         ComplexType ct = type as ComplexType;
441                         if (ct != null)
442                                 return ct;
443
444                         // 2. reference to global complexType.
445                         XmlSchemaType globalType = schemas.GlobalTypes [name]
446                                 as XmlSchemaType;
447                         ct = globalType as ComplexType;
448                         if (ct != null)
449                                 return ct;
450
451                         ct = new ComplexType ();
452                         el.SchemaType = ct;
453                         el.SchemaTypeName = QName.Empty;
454
455                         // 3. base type name is xs:anyType or no specification.
456                         // <xs:complexType />
457                         if (name == QNameAnyType)
458                                 return ct;
459                         else if (type == null && name == QName.Empty)
460                                 return ct;
461
462                         SimpleModel sc = new SimpleModel ();
463                         ct.ContentModel = sc;
464
465                         // 4. type is simpleType
466                         //    -> extension of existing simple type.
467                         SimpleType st = type as SimpleType;
468                         if (st != null) {
469                                 SimpleRst scr = new SimpleRst ();
470                                 scr.BaseType = st;
471                                 sc.Content = scr;
472                                 return ct;
473                         }
474
475                         SimpleExt sce = new SimpleExt ();
476                         sc.Content = sce;
477
478                         // 5. type name points to primitive type
479                         //    -> simple extension of a primitive type
480                         st = XmlSchemaType.GetBuiltInSimpleType (name);
481                         if (st != null) {
482                                 sce.BaseTypeName = name;
483                                 return ct;
484                         }
485
486                         // 6. type name points to global simpleType.
487                         st = globalType as SimpleType;
488                         if (st != null) {
489                                 sce.BaseTypeName = name;
490                                 return ct;
491                         }
492
493                         throw Error (el, "Unexpected schema component that contains simpleTypeName that could not be resolved.");
494                 }
495
496                 #endregion
497
498                 #region Element Type
499
500                 private void InferAsEmptyElement (Element el, string ns,
501                         bool isNew)
502                 {
503                         ComplexType ct = el.SchemaType as ComplexType;
504                         if (ct != null) {
505                                 SimpleModel sm =
506                                         ct.ContentModel as SimpleModel;
507                                 if (sm != null) {
508                                         ToEmptiableSimpleContent (sm, isNew);
509                                         return;
510                                 }
511
512                                 ComplexModel cm = ct.ContentModel
513                                         as ComplexModel;
514                                 if (cm != null) {
515                                         ToEmptiableComplexContent (cm, isNew);
516                                         return;
517                                 }
518
519                                 if (ct.Particle != null)
520                                         ct.Particle.MinOccurs = 0;
521                                 return;
522                         }
523                         SimpleType st = el.SchemaType as SimpleType;
524                         if (st != null) {
525                                 st = MakeBaseTypeAsEmptiable (st);
526                                 switch (st.QualifiedName.Namespace) {
527                                 case XmlSchema.Namespace:
528                                 case XdtNamespace:
529                                         el.SchemaTypeName = st.QualifiedName;
530                                         break;
531                                 default:
532                                         el.SchemaType =st;
533                                         break;
534                                 }
535                         }
536                 }
537
538                 private SimpleType MakeBaseTypeAsEmptiable (SimpleType st)
539                 {
540                         switch (st.QualifiedName.Namespace) {
541                         case XmlSchema.Namespace:
542                         case XdtNamespace:
543                                 // If a primitive type
544                                 return XmlSchemaType.GetBuiltInSimpleType (
545                                         XmlTypeCode.String);
546                         }
547                         SimpleTypeRst str = st.Content as SimpleTypeRst;
548                         if (str != null) {
549                                 ArrayList al = null;
550                                 foreach (SchemaFacet f in str.Facets) {
551                                         if (f is LengthFacet ||
552                                                 f is MinLengthFacet) {
553                                                 if (al == null)
554                                                         al = new ArrayList ();
555                                                 al.Add (f);
556                                         }
557                                 }
558                                 foreach (SchemaFacet f in al)
559                                         str.Facets.Remove (f);
560                                 if (str.BaseType != null)
561                                         str.BaseType =
562                                                 MakeBaseTypeAsEmptiable (st);
563                                 else
564                                         // It might have a reference to an
565                                         // external simple type, but there is
566                                         // no assurance that any of those
567                                         // external types allow an empty
568                                         // string. So just set base type as
569                                         // xs:string.
570                                         str.BaseTypeName = QNameString;
571                         } // union/list can have empty string value.
572
573                         return st;
574                 }
575
576                 private void ToEmptiableSimpleContent (
577                         SimpleModel sm, bool isNew)
578                 {
579                         SimpleExt se = sm.Content as SimpleExt;
580                         if (se != null)
581                                 se.BaseTypeName = QNameString;
582                         else {
583                                 SimpleRst sr = sm.Content
584                                         as SimpleRst;
585                                 if (sr == null)
586                                         throw Error (sm, "Invalid simple content model was passed.");
587                                 sr.BaseTypeName = QNameString;
588                                 sr.BaseType = null;
589                         }
590                 }
591
592                 private void ToEmptiableComplexContent (
593                         ComplexModel cm, bool isNew)
594                 {
595                         ComplexExt ce = cm.Content
596                                 as ComplexExt;
597                         if (ce != null) {
598                                 if (ce.Particle != null)
599                                         ce.Particle.MinOccurs = 0;
600                                 else if (ce.BaseTypeName != null &&
601                                         ce.BaseTypeName != QName.Empty &&
602                                         ce.BaseTypeName != QNameAnyType)
603                                         throw Error (ce, "Complex type content extension has a reference to an external component that is not supported.");
604                         }
605                         else {
606                                 ComplexRst cr = cm.Content
607                                         as ComplexRst;
608                                 if (cr == null)
609                                         throw Error (cm, "Invalid complex content model was passed.");
610                                 if (cr.Particle != null)
611                                         cr.Particle.MinOccurs = 0;
612                                 else if (cr.BaseTypeName != null &&
613                                         cr.BaseTypeName != QName.Empty &&
614                                         cr.BaseTypeName != QNameAnyType)
615                                         throw Error (cr, "Complex type content extension has a reference to an external component that is not supported.");
616                         }
617                 }
618
619                 private void InferContent (Element el, string ns, bool isNew)
620                 {
621                         source.Read ();
622                         source.MoveToContent ();
623                         switch (source.NodeType) {
624                         case XmlNodeType.EndElement:
625                                 InferAsEmptyElement (el, ns, isNew);
626                                 break;
627                         case XmlNodeType.Element:
628                                 InferComplexContent (el, ns, isNew);
629                                 break;
630                         case XmlNodeType.Text:
631                         case XmlNodeType.CDATA:
632                         case XmlNodeType.SignificantWhitespace:
633                                 InferTextContent (el, isNew);
634                                 source.MoveToContent ();
635                                 if (source.NodeType == XmlNodeType.Element)
636                                         goto case XmlNodeType.Element;
637                                 break;
638                         case XmlNodeType.Whitespace:
639                                 InferContent (el, ns, isNew); // skip and retry
640                                 break;
641                         }
642                 }
643
644                 private void InferComplexContent (Element el, string ns,
645                         bool isNew)
646                 {
647                         ComplexType ct = ToComplexType (el);
648                         ToComplexContentType (ct);
649
650                         int position = 0;
651                         bool consumed = false;
652
653                         do {
654                                 switch (source.NodeType) {
655                                 case XmlNodeType.Element:
656                                         Sequence s = PopulateSequence (ct);
657                                         Choice c = s.Items.Count > 0 ?
658                                                 s.Items [0] as Choice :
659                                                 null;
660                                         if (c != null)
661                                                 ProcessLax (c, ns);
662                                         else
663                                                 ProcessSequence (ct, s, ns,
664                                                         ref position,
665                                                         ref consumed,
666                                                         isNew);
667                                         source.MoveToContent ();
668                                         break;
669                                 case XmlNodeType.Text:
670                                 case XmlNodeType.CDATA:
671                                 case XmlNodeType.SignificantWhitespace:
672                                         MarkAsMixed (ct);
673                                         source.ReadString ();
674                                         source.MoveToContent ();
675                                         break;
676                                 case XmlNodeType.EndElement:
677                                         return; // finished
678                                 case XmlNodeType.None:
679                                         throw new NotImplementedException ("Internal Error: Should not happen.");
680                                 }
681                         } while (true);
682                 }
683
684                 private void InferTextContent (Element el, bool isNew)
685                 {
686                         string value = source.ReadString ();
687                         if (el.SchemaType == null) {
688                                 if (el.SchemaTypeName == QName.Empty) {
689                                         // no type information -> infer type
690                                         if (isNew)
691                                                 el.SchemaTypeName =
692                                                         InferSimpleType (
693                                                         value);
694                                         else
695                                                 el.SchemaTypeName =
696                                                         QNameString;
697                                         return;
698                                 }
699                                 switch (el.SchemaTypeName.Namespace) {
700                                 case XmlSchema.Namespace:
701                                 case XdtNamespace:
702                                         // existing primitive type
703                                         el.SchemaTypeName = InferMergedType (
704                                                 value, el.SchemaTypeName);
705                                         break;
706                                 default:
707                                         ComplexType ct = schemas.GlobalTypes [
708                                                 el.SchemaTypeName]
709                                                 as ComplexType;
710                                         // If it is complex, then just set
711                                         // mixed='true' (type cannot be set.)
712                                         // If it is simple, then we cannot
713                                         // make sure that string value is
714                                         // valid. So just set as xs:string.
715                                         if (ct != null)
716                                                 MarkAsMixed (ct);
717                                         else
718                                                 el.SchemaTypeName = QNameString;
719                                         break;
720                                 }
721                                 return;
722                         }
723                         // simpleType
724                         SimpleType st = el.SchemaType as SimpleType;
725                         if (st != null) {
726                                 // If simple, then (described above)
727                                 el.SchemaType = null;
728                                 el.SchemaTypeName = QNameString;
729                                 return;
730                         }
731
732                         // complexType
733                         ComplexType ect = el.SchemaType as ComplexType;
734
735                         SimpleModel sm = ect.ContentModel as SimpleModel;
736                         if (sm == null) {
737                                 // - ComplexContent
738                                 MarkAsMixed (ect);
739                                 return;
740                         }
741
742                         // - SimpleContent
743                         SimpleExt se = sm.Content as SimpleExt;
744                         if (se != null)
745                                 se.BaseTypeName = InferMergedType (value,
746                                         se.BaseTypeName);
747                         SimpleRst sr = sm.Content as SimpleRst;
748                         if (sr != null) {
749                                 sr.BaseTypeName = InferMergedType (value,
750                                         sr.BaseTypeName);
751                                 sr.BaseType = null;
752                         }
753                 }
754
755                 private void MarkAsMixed (ComplexType ct)
756                 {
757                         ComplexModel cm = ct.ContentModel as ComplexModel;
758                         if (cm != null)
759                                 cm.IsMixed = true;
760                         else
761                                 ct.IsMixed = true;
762                 }
763
764                 #endregion
765
766                 #region Particles
767
768                 private void ProcessLax (Choice c, string ns)
769                 {
770                         foreach (Particle p in c.Items) {
771                                 Element el = p as Element;
772                                 if (el == null)
773                                         throw Error (c, String.Format ("Target schema item contains unacceptable particle {0}. Only element is allowed here."));
774                                 if (ElementMatches (el, ns)) {
775                                         InferElement (el, ns, false);
776                                         return;
777                                 }
778                         }
779                         // append a new element particle to lax term.
780                         Element nel = new Element ();
781                         if (source.NamespaceURI == ns)
782                                 nel.Name = source.LocalName;
783                         else
784                                 nel.RefName = new QName (source.LocalName,
785                                         source.NamespaceURI);
786                         InferElement (nel, source.NamespaceURI, true);
787                         c.Items.Add (nel);
788                 }
789
790                 private bool ElementMatches (Element el, string ns)
791                 {
792                         bool matches = false;
793                         if (el.RefName != QName.Empty) {
794                                 if (el.RefName.Name == source.LocalName &&
795                                         el.RefName.Namespace ==
796                                         source.NamespaceURI)
797                                         matches = true;
798                         }
799                         else if (el.Name == source.LocalName &&
800                                 ns == source.NamespaceURI)
801                                         matches = true;
802                         return matches;
803                 }
804
805                 private void ProcessSequence (ComplexType ct, Sequence s,
806                         string ns, ref int position, ref bool consumed,
807                         bool isNew)
808                 {
809                         for (int i = 0; i < position; i++) {
810                                 Element iel = s.Items [i] as Element;
811                                 if (ElementMatches (iel, ns)) {
812                                         // Sequence element type violation
813                                         // might happen (might not, but we
814                                         // cannot backtrack here). So switch
815                                         // to sequence of choice* here.
816                                         ProcessLax (ToSequenceOfChoice (s), ns);
817                                         return;
818                                 }
819                         }
820
821                         if (s.Items.Count <= position) {
822                                 QName name = new QName (source.LocalName,
823                                         source.NamespaceURI);
824                                 Element nel = CreateElement (name);
825                                 InferElement (nel, ns, true);
826                                 if (ns == name.Namespace)
827                                         s.Items.Add (nel);
828                                 else {
829                                         Element re = new Element ();
830                                         re.RefName = name;
831                                         s.Items.Add (re);
832                                 }
833                                 consumed = true;
834                                 return;
835                         }
836                         Element el = s.Items [position] as Element;
837                         if (el == null)
838                                 throw Error (s, String.Format ("Target complex type content sequence has an unacceptable type of particle {0}", s.Items [position]));
839                         bool matches = ElementMatches (el, ns);
840                         if (matches) {
841                                 if (consumed)
842                                         el.MaxOccursString = "unbounded";
843                                 InferElement (el, source.NamespaceURI, false);
844                                 source.MoveToContent ();
845                                 switch (source.NodeType) {
846                                 case XmlNodeType.None:
847                                         if (source.NodeType ==
848                                                 XmlNodeType.Element)
849                                                 goto case XmlNodeType.Element;
850                                         else if (source.NodeType ==
851                                                 XmlNodeType.EndElement)
852                                                 goto case XmlNodeType.EndElement;
853                                         break;
854                                 case XmlNodeType.Element:
855                                         ProcessSequence (ct, s, ns, ref position,
856                                                 ref consumed, isNew);
857                                         break;
858                                 case XmlNodeType.Text:
859                                 case XmlNodeType.CDATA:
860                                 case XmlNodeType.SignificantWhitespace:
861                                         MarkAsMixed (ct);
862                                         source.ReadString ();
863                                         goto case XmlNodeType.None;
864                                 case XmlNodeType.Whitespace:
865                                         source.ReadString ();
866                                         goto case XmlNodeType.None;
867                                 case XmlNodeType.EndElement:
868                                         return;
869                                 default:
870                                         source.Read ();
871                                         break;
872                                 }
873                         }
874                         else {
875                                 if (consumed) {
876                                         position++;
877                                         consumed = false;
878                                         ProcessSequence (ct, s, ns,
879                                                 ref position, ref consumed,
880                                                 isNew);
881                                 }
882                                 else
883                                         ProcessLax (ToSequenceOfChoice (s), ns);
884                         }
885                 }
886
887                 // Note that it does not return the changed sequence.
888                 private Choice ToSequenceOfChoice (Sequence s)
889                 {
890                         Choice c = new Choice ();
891                         if (laxOccurence)
892                                 c.MinOccurs = 0;
893                         c.MaxOccursString = "unbounded";
894                         foreach (Particle p in s.Items)
895                                 c.Items.Add (p);
896                         s.Items.Clear ();
897                         s.Items.Add (c);
898                         return c;
899                 }
900
901                 // It makes complexType not to have Simple content model.
902                 private void ToComplexContentType (ComplexType type)
903                 {
904                         SimpleModel sm = type.ContentModel as SimpleModel;
905                         if (sm == null)
906                                 return;
907
908                         SOMList atts = GetAttributes (type);
909                         foreach (SOMObject o in atts)
910                                 type.Attributes.Add (o);
911                         // FIXME: need to copy AnyAttribute.
912                         // (though not considered right now)
913                         type.ContentModel = null;
914                         type.IsMixed = true;
915                 }
916
917                 private Sequence PopulateSequence (ComplexType ct)
918                 {
919                         Particle p = PopulateParticle (ct);
920                         Sequence s = p as Sequence;
921                         if (s != null)
922                                 return s;
923                         else
924                                 throw Error (ct, String.Format ("Target complexType contains unacceptable type of particle {0}", p));
925                 }
926
927                 private Sequence CreateSequence ()
928                 {
929                         Sequence s = new Sequence ();
930                         if (laxOccurence)
931                                 s.MinOccurs = 0;
932                         return s;
933                 }
934
935                 private Particle PopulateParticle (ComplexType ct)
936                 {
937                         if (ct.ContentModel == null) {
938                                 if (ct.Particle == null)
939                                         ct.Particle = CreateSequence ();
940                                 return ct.Particle;
941                         }
942                         ComplexModel cm = ct.ContentModel as ComplexModel;
943                         if (cm != null) {
944                                 ComplexExt  ce = cm.Content as ComplexExt;
945                                 if (ce != null) {
946                                         if (ce.Particle == null)
947                                                 ce.Particle = CreateSequence ();
948                                         return ce.Particle;
949                                 }
950                                 ComplexRst cr = cm.Content as ComplexRst;
951                                 if (cr != null) {
952                                         if (cr.Particle == null)
953                                                 cr.Particle = CreateSequence ();
954                                         return cr.Particle;
955                                 }
956                         }
957                         throw Error (ct, "Schema inference internal error. The complexType should have been converted to have a complex content.");
958                 }
959
960                 #endregion
961
962                 #region String Value
963
964                 // primitive type inference.
965                 // When running lax type inference, it just returns xs:string.
966                 private QName InferSimpleType (string value)
967                 {
968                         if (laxTypeInference)
969                                 return QNameString;
970
971                         switch (value) {
972                         // 0 and 1 are not infered as byte unlike MS.XSDInfer
973 //                      case "0":
974 //                      case "1":
975                         case "true":
976                         case "false":
977                                 return QNameBoolean;
978                         }
979                         try {
980                                 long dec = XmlConvert.ToInt64 (value);
981                                 if (byte.MinValue <= dec && dec <= byte.MaxValue)
982                                         return QNameUByte;
983                                 if (sbyte.MinValue <= dec && dec <= sbyte.MaxValue)
984                                         return QNameByte;
985                                 if (ushort.MinValue <= dec && dec <= ushort.MaxValue)
986                                         return QNameUShort;
987                                 if (short.MinValue <= dec && dec <= short.MaxValue)
988                                         return QNameShort;
989                                 if (uint.MinValue <= dec && dec <= uint.MaxValue)
990                                         return QNameUInt;
991                                 if (int.MinValue <= dec && dec <= int.MaxValue)
992                                         return QNameInt;
993                                 return QNameLong;
994                         } catch (Exception) {
995                         }
996                         try {
997                                 XmlConvert.ToUInt64 (value);
998                                 return QNameULong;
999                         } catch (Exception) {
1000                         }
1001                         try {
1002                                 XmlConvert.ToDecimal (value);
1003                                 return QNameDecimal;
1004                         } catch (Exception) {
1005                         }
1006                         try {
1007                                 double dbl = XmlConvert.ToDouble (value);
1008                                 if (float.MinValue <= dbl &&
1009                                         dbl <= float.MaxValue)
1010                                         return QNameFloat;
1011                                 else
1012                                         return QNameDouble;
1013                         } catch (Exception) {
1014                         }
1015                         try {
1016                                 // FIXME: also try DateTimeSerializationMode
1017                                 // and gYearMonth
1018                                 XmlConvert.ToDateTime (value);
1019                                 return QNameDateTime;
1020                         } catch (Exception) {
1021                         }
1022                         try {
1023                                 XmlConvert.ToTimeSpan (value);
1024                                 return QNameDuration;
1025                         } catch (Exception) {
1026                         }
1027
1028                         // xs:string
1029                         return QNameString;
1030                 }
1031
1032                 #endregion
1033
1034                 #region Utilities
1035
1036                 private Element GetGlobalElement (QName name)
1037                 {
1038                         Element el = newElements [name] as Element;
1039                         if (el == null)
1040                                 el = schemas.GlobalElements [name] as Element;
1041                         return el;
1042                 }
1043
1044                 private Attr GetGlobalAttribute (QName name)
1045                 {
1046                         Attr a = newElements [name] as Attr;
1047                         if (a == null)
1048                                 a = schemas.GlobalAttributes [name] as Attr;
1049                         return a;
1050                 }
1051
1052                 private Element CreateElement (QName name)
1053                 {
1054                         Element el = new Element ();
1055                         el.Name = name.Name;
1056                         return el;
1057                 }
1058
1059                 private Element CreateGlobalElement (QName name)
1060                 {
1061                         Element el = CreateElement (name);
1062                         XmlSchema schema = PopulateSchema (name.Namespace);
1063                         schema.Items.Add (el);
1064                         newElements.Add (name, el);
1065                         return el;
1066                 }
1067
1068                 private Attr CreateGlobalAttribute (QName name)
1069                 {
1070                         Attr attr = new Attr ();
1071                         XmlSchema schema = PopulateSchema (name.Namespace);
1072                         attr.Name = name.Name;
1073                         schema.Items.Add (attr);
1074                         newAttributes.Add (name, attr);
1075                         return attr;
1076                 }
1077
1078                 // Note that the return value never assures that all the
1079                 // components in the parameter ns must reside in it.
1080                 private XmlSchema PopulateSchema (string ns)
1081                 {
1082                         ICollection list = schemas.Schemas (ns);
1083                         if (list.Count > 0) {
1084                                 IEnumerator e = list.GetEnumerator ();
1085                                 e.MoveNext ();
1086                                 return (XmlSchema) e.Current;
1087                         }
1088                         XmlSchema s = new XmlSchema ();
1089                         if (ns != null && ns.Length > 0)
1090                                 s.TargetNamespace = ns;
1091                         s.ElementFormDefault = Form.Qualified;
1092                         s.AttributeFormDefault = Form.Unqualified;
1093                         schemas.Add (s);
1094                         return s;
1095                 }
1096
1097                 private XmlSchemaInferenceException Error (
1098                         XmlSchemaObject sourceObj,
1099                         string message)
1100                 {
1101                         // This override is mainly for schema component error.
1102                         return Error (sourceObj, false, message);
1103                 }
1104
1105                 private XmlSchemaInferenceException Error (
1106                         XmlSchemaObject sourceObj,
1107                         bool useReader,
1108                         string message)
1109                 {
1110                         string msg = String.Concat (
1111                                 message,
1112                                 sourceObj != null ?
1113                                         String.Format (". Related schema component is {0}",
1114                                                 sourceObj.SourceUri,
1115                                                 sourceObj.LineNumber,
1116                                                 sourceObj.LinePosition) :
1117                                         String.Empty,
1118                                 useReader ?
1119                                         String.Format (". {0}", source.BaseURI) :
1120                                         String.Empty);
1121
1122                         IXmlLineInfo li = source as IXmlLineInfo;
1123                         if (useReader && li != null)
1124                                 return new XmlSchemaInferenceException (
1125                                         msg, null, li.LineNumber,
1126                                         li.LinePosition);
1127                         else
1128                                 return new XmlSchemaInferenceException (msg);
1129                 }
1130
1131                 #endregion
1132         }
1133 }
1134
1135 #endif