2004-11-22 Atsushi Enomoto <atsushi@ximian.com>
[mono.git] / mcs / class / System.XML / System.Xml.Schema / XmlSchemaInference.cs
1 //
2 // XmlSchemaInference.cs
3 //
4 // Author:
5 //      Atsushi Enomoto <atsushi@ximian.com>
6 //
7 // Copyright (C)2004 Novell Inc.
8 //
9
10 //
11 // Permission is hereby granted, free of charge, to any person obtaining
12 // a copy of this software and associated documentation files (the
13 // "Software"), to deal in the Software without restriction, including
14 // without limitation the rights to use, copy, modify, merge, publish,
15 // distribute, sublicense, and/or sell copies of the Software, and to
16 // permit persons to whom the Software is furnished to do so, subject to
17 // the following conditions:
18 // 
19 // The above copyright notice and this permission notice shall be
20 // included in all copies or substantial portions of the Software.
21 // 
22 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
23 // EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
24 // MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
25 // NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
26 // LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
27 // OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
28 // WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
29 //
30
31 #if NET_2_0
32
33 using System;
34 using System.Collections;
35 using System.Xml;
36 using System.Xml.Schema;
37
38 using QName = System.Xml.XmlQualifiedName;
39 using Form = System.Xml.Schema.XmlSchemaForm;
40 using Use = System.Xml.Schema.XmlSchemaUse;
41 using SOMList = System.Xml.Schema.XmlSchemaObjectCollection;
42 using SOMObject = System.Xml.Schema.XmlSchemaObject;
43 using Element = System.Xml.Schema.XmlSchemaElement;
44 using Attr = System.Xml.Schema.XmlSchemaAttribute;
45 using AttrGroup = System.Xml.Schema.XmlSchemaAttributeGroup;
46 using AttrGroupRef = System.Xml.Schema.XmlSchemaAttributeGroupRef;
47 using SimpleType = System.Xml.Schema.XmlSchemaSimpleType;
48 using ComplexType = System.Xml.Schema.XmlSchemaComplexType;
49 using SimpleModel = System.Xml.Schema.XmlSchemaSimpleContent;
50 using SimpleExt = System.Xml.Schema.XmlSchemaSimpleContentExtension;
51 using SimpleRst = System.Xml.Schema.XmlSchemaSimpleContentRestriction;
52 using ComplexModel = System.Xml.Schema.XmlSchemaComplexContent;
53 using ComplexExt = System.Xml.Schema.XmlSchemaComplexContentExtension;
54 using ComplexRst = System.Xml.Schema.XmlSchemaComplexContentRestriction;
55 using SimpleTypeRst = System.Xml.Schema.XmlSchemaSimpleTypeRestriction;
56 using SimpleList = System.Xml.Schema.XmlSchemaSimpleTypeList;
57 using SimpleUnion = System.Xml.Schema.XmlSchemaSimpleTypeUnion;
58 using SchemaFacet = System.Xml.Schema.XmlSchemaFacet;
59 using LengthFacet = System.Xml.Schema.XmlSchemaLengthFacet;
60 using MinLengthFacet = System.Xml.Schema.XmlSchemaMinLengthFacet;
61 using Particle = System.Xml.Schema.XmlSchemaParticle;
62 using Sequence = System.Xml.Schema.XmlSchemaSequence;
63 using Choice = System.Xml.Schema.XmlSchemaChoice;
64
65
66 namespace System.Xml.Schema
67 {
68         [MonoTODO ("merge primitive types; infer gYearMonth too; in some cases sequence should contain element whose minOccurs=0 (no obvious rules right now); reject some non-supported schema components")]
69         public class XmlSchemaInference
70         {
71                 public enum InferenceOption {
72                         Restricted,
73                         Relaxed,
74                 }
75
76                 InferenceOption occurrence = InferenceOption.Restricted;
77                 InferenceOption typeInference = InferenceOption.Restricted;
78
79                 public XmlSchemaInference ()
80                 {
81                 }
82
83                 public InferenceOption Occurrence {
84                         get { return occurrence; }
85                         set { occurrence = value; }
86                 }
87
88                 public InferenceOption TypeInference {
89                         get { return TypeInference; }
90                         set { typeInference = value; }
91                 }
92
93                 public XmlSchemaSet InferSchema (XmlReader xmlReader)
94                 {
95                         return InferSchema (xmlReader, new XmlSchemaSet ());
96                 }
97
98                 public XmlSchemaSet InferSchema (XmlReader xmlReader,
99                         XmlSchemaSet schemas)
100                 {
101                         return XsdInference.Process (xmlReader, schemas,
102                                 occurrence == InferenceOption.Relaxed,
103                                 typeInference == InferenceOption.Relaxed);
104                 }
105         }
106
107         class XsdInference
108         {
109                 public static XmlSchemaSet Process (XmlReader xmlReader, 
110                         XmlSchemaSet schemas,
111                         bool laxOccurence,
112                         bool laxTypeInference)
113                 {
114                         XsdInference impl = new XsdInference (xmlReader,
115                                 schemas, laxOccurence, laxTypeInference);
116                         impl.Run ();
117                         return impl.schemas;
118                 }
119
120                 public const string NamespaceXml =
121                         "http://www.w3.org/XML/1998/namespace";
122
123                 public const string NamespaceXmlns =
124                         "http://www.w3.org/2000/xmlns/";
125
126                 public const string XdtNamespace =
127                         "http://www.w3.org/2003/11/xpath-datatypes";
128
129                 static readonly QName QNameString = new QName (
130                         "string", XmlSchema.Namespace);
131
132                 static readonly QName QNameBoolean = new QName (
133                         "boolean", XmlSchema.Namespace);
134
135                 static readonly QName QNameAnyType = new QName (
136                         "anyType", XmlSchema.Namespace);
137
138                 static readonly QName QNameByte = new QName (
139                         "byte", XmlSchema.Namespace);
140
141                 static readonly QName QNameUByte = new QName (
142                         "unsignedByte", XmlSchema.Namespace);
143
144                 static readonly QName QNameShort = new QName (
145                         "short", XmlSchema.Namespace);
146
147                 static readonly QName QNameUShort = new QName (
148                         "unsignedShort", XmlSchema.Namespace);
149
150                 static readonly QName QNameInt = new QName (
151                         "int", XmlSchema.Namespace);
152
153                 static readonly QName QNameUInt = new QName (
154                         "unsignedInt", XmlSchema.Namespace);
155
156                 static readonly QName QNameLong = new QName (
157                         "long", XmlSchema.Namespace);
158
159                 static readonly QName QNameULong = new QName (
160                         "unsignedLong", XmlSchema.Namespace);
161
162                 static readonly QName QNameDecimal = new QName (
163                         "decimal", XmlSchema.Namespace);
164
165                 static readonly QName QNameUDecimal = new QName (
166                         "unsignedDecimal", XmlSchema.Namespace);
167
168                 static readonly QName QNameDouble = new QName (
169                         "double", XmlSchema.Namespace);
170
171                 static readonly QName QNameFloat = new QName (
172                         "float", XmlSchema.Namespace);
173
174                 static readonly QName QNameDateTime = new QName (
175                         "dateTime", XmlSchema.Namespace);
176
177                 static readonly QName QNameDuration = new QName (
178                         "duration", XmlSchema.Namespace);
179
180                 XmlReader source;
181                 XmlSchemaSet schemas;
182                 bool laxOccurence;
183                 bool laxTypeInference;
184
185                 Hashtable newElements = new Hashtable ();
186                 Hashtable newAttributes = new Hashtable ();
187
188                 private XsdInference (XmlReader xmlReader, 
189                         XmlSchemaSet schemas, 
190                         bool laxOccurence, 
191                         bool laxTypeInference)
192                 {
193                         this.source = xmlReader;
194                         this.schemas = schemas;
195                         this.laxOccurence = laxOccurence;
196                         this.laxTypeInference = laxTypeInference;
197                 }
198
199                 private void Run ()
200                 {
201                         // XmlSchemaSet need to be compiled.
202                         schemas.Compile ();
203
204                         // move to top-level element
205                         source.MoveToContent ();
206                         int depth = source.Depth;
207                         if (source.NodeType != XmlNodeType.Element)
208                                 throw new ArgumentException ("Argument XmlReader content is expected to be an element.");
209
210                         QName qname = new QName (source.LocalName,
211                                 source.NamespaceURI);
212                         Element el = GetGlobalElement (qname);
213                         if (el == null) {
214                                 el = CreateGlobalElement (qname);
215                                 InferElement (el, qname.Namespace, true);
216                         }
217                         else
218                                 InferElement (el, qname.Namespace, false);
219                 }
220
221                 private void IncludeXmlAttributes ()
222                 {
223                         if (schemas.Schemas (NamespaceXml).Count == 0)
224                                 // FIXME: do it from resources.
225                                 schemas.Add (NamespaceXml, 
226                                         "http://www.w3.org/2001/xml.xsd");
227                 }
228
229                 private void InferElement (Element el, string ns, bool isNew)
230                 {
231                         // Quick check for reference to another definition
232                         // (i.e. element ref='...' that should be redirected)
233                         if (el.RefName != QName.Empty) {
234                                 Element body = GetGlobalElement (el.RefName);
235                                 if (body == null) {
236                                         body = CreateElement (el.RefName);
237                                         InferElement (body, ns, true);
238                                 }
239                                 else
240                                         InferElement (body, ns, isNew);
241                                 return;
242                         }
243
244                         // Attributes
245                         if (source.MoveToFirstAttribute ()) {
246                                 InferAttributes (el, ns, isNew);
247                                 source.MoveToElement ();
248                         }
249
250                         // Content
251                         if (source.IsEmptyElement) {
252                                 InferAsEmptyElement (el, ns, isNew);
253                                 source.Read ();
254                                 source.MoveToContent ();
255                         }
256                         else {
257                                 InferContent (el, ns, isNew);
258                                 source.ReadEndElement ();
259                         }
260                         if (el.SchemaType == null &&
261                                 el.SchemaTypeName == QName.Empty)
262                                 el.SchemaTypeName = QNameString;
263                 }
264
265                 #region Attribute Inference
266
267                 private Hashtable CollectAttrTable (SOMList attList)
268                 {
269                         // get attribute definition table.
270                         Hashtable table = new Hashtable ();
271                         foreach (XmlSchemaObject obj in attList) {
272                                 Attr attr = obj as Attr;
273                                 if (attr == null)
274                                         throw Error (obj, String.Format ("Attribute inference only supports direct attribute definition. {0} is not supported.", obj.GetType ()));
275                                 if (attr.RefName != QName.Empty)
276                                         table.Add (attr.RefName, attr);
277                                 else
278                                         table.Add (new QName (attr.Name, ""),
279                                                 attr);
280                         }
281                         return table;
282                 }
283
284                 private void InferAttributes (Element el, string ns, bool isNew)
285                 {
286                         // Now this element is going to have complexType.
287                         // It currently not, then we have to replace it.
288                         ComplexType ct = null;
289                         SOMList attList = null;
290                         Hashtable table = null;
291
292                         do {
293                                 switch (source.NamespaceURI) {
294                                 case NamespaceXml:
295                                         if (schemas.Schemas (
296                                                 NamespaceXml) .Count == 0)
297                                                 IncludeXmlAttributes ();
298                                         break;
299                                 case XmlSchema.InstanceNamespace:
300                                         if (source.LocalName == "nil")
301                                                 el.IsNillable = true;
302                                         // all other xsi:* atts are ignored
303                                         continue;
304                                 case NamespaceXmlns:
305                                         continue;
306                                 }
307                                 if (ct == null) {
308                                         ct = ToComplexType (el);
309                                         attList = GetAttributes (ct);
310                                         table = CollectAttrTable (attList);
311                                 }
312                                 QName attrName = new QName (
313                                         source.LocalName, source.NamespaceURI);
314                                 Attr attr = table [attrName] as Attr;
315                                 if (attr == null) {
316                                         attList.Add (InferNewAttribute (
317                                                 attrName, isNew));
318                                 } else {
319                                         table.Remove (attrName);
320                                         if (attr.RefName != null &&
321                                                 attr.RefName != QName.Empty)
322                                                 continue; // just a reference
323                                         InferMergedAttribute (attr);
324                                 }
325                         } while (source.MoveToNextAttribute ());
326
327                         // mark all attr definitions that did not appear
328                         // as optional.
329                         if (table != null)
330                                 foreach (Attr attr in table.Values)
331                                         attr.Use = Use.Optional;
332                 }
333
334                 private XmlSchemaAttribute InferNewAttribute (
335                         QName attrName, bool isNewTypeDefinition)
336                 {
337                         Attr attr = null;
338                         bool mergedRequired = false;
339                         if (attrName.Namespace.Length > 0) {
340                                 // global attribute; might be already defined.
341                                 attr = GetGlobalAttribute (attrName) as Attr;
342                                 if (attr == null) {
343                                         attr = CreateGlobalAttribute (attrName);
344                                         attr.SchemaTypeName =
345                                                 InferSimpleType (source.Value);
346                                 } else {
347                                         InferMergedAttribute (attr);
348                                         mergedRequired =
349                                                 attr.Use == Use.Required;
350                                 }
351                                 attr = new Attr ();
352                                 attr.RefName = attrName;
353                         } else {
354                                 // local attribute
355                                 attr = new Attr ();
356                                 attr.Name = attrName.Name;
357                                 attr.SchemaTypeName =
358                                         InferSimpleType (source.Value);
359                         }
360                         if (!laxOccurence &&
361                                 (isNewTypeDefinition || mergedRequired))
362                                 attr.Use = Use.Required;
363                         else
364                                 attr.Use = Use.Optional;
365
366                         return attr;
367                 }
368
369                 // validate string value agains attr and 
370                 // if invalid, then relax the type.
371                 private void InferMergedAttribute (Attr attr)
372                 {
373                         attr.SchemaTypeName = InferMergedType (source.Value,
374                                 attr.SchemaTypeName);
375                         attr.SchemaType = null;
376                 }
377
378                 private QName InferMergedType (string value, QName typeName)
379                 {
380                         // examine value against specified type and
381                         // if unacceptable, then return a relaxed type.
382
383                         SimpleType st = XmlSchemaType.GetBuiltInSimpleType (
384                                 typeName);
385                         if (st == null) // non-primitive type => see above.
386                                 return QNameString;
387                         do {
388                                 try {
389                                         st.Datatype.ParseValue (value,
390                                                 source.NameTable,
391                                                 source as IXmlNamespaceResolver);
392                                         return typeName;
393                                 } catch {
394                                         st = st.BaseXmlSchemaType as XmlSchemaSimpleType;
395                                         typeName = st != null ? st.QualifiedName : QName.Empty;
396                                 }
397                         } while (typeName != QName.Empty);
398                         return QNameString;
399                 }
400
401                 private SOMList GetAttributes (ComplexType ct)
402                 {
403                         if (ct.ContentModel == null)
404                                 return ct.Attributes;
405
406                         SimpleModel sc = ct.ContentModel as SimpleModel;
407                         if (sc != null) {
408                                 SimpleExt sce = sc.Content as SimpleExt;
409                                 if (sce != null)
410                                         return sce.Attributes;
411                                 SimpleRst scr = sc.Content as SimpleRst;
412                                 if (scr != null)
413                                         return scr.Attributes;
414                                 else
415                                         throw Error (sc, "Invalid simple content model.");
416                         }
417                         ComplexModel cc = ct.ContentModel as ComplexModel;
418                         if (cc != null) {
419                                 ComplexExt cce = cc.Content as ComplexExt;
420                                 if (cce != null)
421                                         return cce.Attributes;
422                                 ComplexRst ccr = cc.Content as ComplexRst;
423                                 if (ccr != null)
424                                         return ccr.Attributes;
425                                 else
426                                         throw Error (cc, "Invalid simple content model.");
427                         }
428                         throw Error (cc, "Invalid complexType. Should not happen.");
429                 }
430
431                 private ComplexType ToComplexType (Element el)
432                 {
433                         QName name = el.SchemaTypeName;
434                         XmlSchemaType type = el.SchemaType;
435
436                         // 1. element type is complex.
437                         ComplexType ct = type as ComplexType;
438                         if (ct != null)
439                                 return ct;
440
441                         // 2. reference to global complexType.
442                         XmlSchemaType globalType = schemas.GlobalTypes [name]
443                                 as XmlSchemaType;
444                         ct = globalType as ComplexType;
445                         if (ct != null)
446                                 return ct;
447
448                         ct = new ComplexType ();
449                         el.SchemaType = ct;
450                         el.SchemaTypeName = QName.Empty;
451
452                         // 3. base type name is xs:anyType or no specification.
453                         // <xs:complexType />
454                         if (name == QNameAnyType)
455                                 return ct;
456                         else if (type == null && name == QName.Empty)
457                                 return ct;
458
459                         SimpleModel sc = new SimpleModel ();
460                         ct.ContentModel = sc;
461
462                         // 4. type is simpleType
463                         //    -> extension of existing simple type.
464                         SimpleType st = type as SimpleType;
465                         if (st != null) {
466                                 SimpleRst scr = new SimpleRst ();
467                                 scr.BaseType = st;
468                                 sc.Content = scr;
469                                 return ct;
470                         }
471
472                         SimpleExt sce = new SimpleExt ();
473                         sc.Content = sce;
474
475                         // 5. type name points to primitive type
476                         //    -> simple extension of a primitive type
477                         st = XmlSchemaType.GetBuiltInSimpleType (name);
478                         if (st != null) {
479                                 sce.BaseTypeName = name;
480                                 return ct;
481                         }
482
483                         // 6. type name points to global simpleType.
484                         st = globalType as SimpleType;
485                         if (st != null) {
486                                 sce.BaseTypeName = name;
487                                 return ct;
488                         }
489
490                         throw Error (el, "Unexpected schema component that contains simpleTypeName that could not be resolved.");
491                 }
492
493                 #endregion
494
495                 #region Element Type
496
497                 private void InferAsEmptyElement (Element el, string ns,
498                         bool isNew)
499                 {
500                         ComplexType ct = el.SchemaType as ComplexType;
501                         if (ct != null) {
502                                 SimpleModel sm =
503                                         ct.ContentModel as SimpleModel;
504                                 if (sm != null) {
505                                         ToEmptiableSimpleContent (sm, isNew);
506                                         return;
507                                 }
508
509                                 ComplexModel cm = ct.ContentModel
510                                         as ComplexModel;
511                                 if (cm != null) {
512                                         ToEmptiableComplexContent (cm, isNew);
513                                         return;
514                                 }
515
516                                 if (ct.Particle != null)
517                                         ct.Particle.MinOccurs = 0;
518                                 return;
519                         }
520                         SimpleType st = el.SchemaType as SimpleType;
521                         if (st != null) {
522                                 st = MakeBaseTypeAsEmptiable (st);
523                                 switch (st.QualifiedName.Namespace) {
524                                 case XmlSchema.Namespace:
525                                 case XdtNamespace:
526                                         el.SchemaTypeName = st.QualifiedName;
527                                         break;
528                                 default:
529                                         el.SchemaType =st;
530                                         break;
531                                 }
532                         }
533                 }
534
535                 private SimpleType MakeBaseTypeAsEmptiable (SimpleType st)
536                 {
537                         switch (st.QualifiedName.Namespace) {
538                         case XmlSchema.Namespace:
539                         case XdtNamespace:
540                                 // If a primitive type
541                                 return XmlSchemaType.GetBuiltInSimpleType (
542                                         XmlTypeCode.String);
543                         }
544                         SimpleTypeRst str = st.Content as SimpleTypeRst;
545                         if (str != null) {
546                                 ArrayList al = null;
547                                 foreach (SchemaFacet f in str.Facets) {
548                                         if (f is LengthFacet ||
549                                                 f is MinLengthFacet) {
550                                                 if (al == null)
551                                                         al = new ArrayList ();
552                                                 al.Add (f);
553                                         }
554                                 }
555                                 foreach (SchemaFacet f in al)
556                                         str.Facets.Remove (f);
557                                 if (str.BaseType != null)
558                                         str.BaseType =
559                                                 MakeBaseTypeAsEmptiable (st);
560                                 else
561                                         // It might have a reference to an
562                                         // external simple type, but there is
563                                         // no assurance that any of those
564                                         // external types allow an empty
565                                         // string. So just set base type as
566                                         // xs:string.
567                                         str.BaseTypeName = QNameString;
568                         } // union/list can have empty string value.
569
570                         return st;
571                 }
572
573                 private void ToEmptiableSimpleContent (
574                         SimpleModel sm, bool isNew)
575                 {
576                         SimpleExt se = sm.Content as SimpleExt;
577                         if (se != null)
578                                 se.BaseTypeName = QNameString;
579                         else {
580                                 SimpleRst sr = sm.Content
581                                         as SimpleRst;
582                                 if (sr == null)
583                                         throw Error (sm, "Invalid simple content model was passed.");
584                                 sr.BaseTypeName = QNameString;
585                                 sr.BaseType = null;
586                         }
587                 }
588
589                 private void ToEmptiableComplexContent (
590                         ComplexModel cm, bool isNew)
591                 {
592                         ComplexExt ce = cm.Content
593                                 as ComplexExt;
594                         if (ce != null) {
595                                 if (ce.Particle != null)
596                                         ce.Particle.MinOccurs = 0;
597                                 else if (ce.BaseTypeName != null &&
598                                         ce.BaseTypeName != QName.Empty &&
599                                         ce.BaseTypeName != QNameAnyType)
600                                         throw Error (ce, "Complex type content extension has a reference to an external component that is not supported.");
601                         }
602                         else {
603                                 ComplexRst cr = cm.Content
604                                         as ComplexRst;
605                                 if (cr == null)
606                                         throw Error (cm, "Invalid complex content model was passed.");
607                                 if (cr.Particle != null)
608                                         cr.Particle.MinOccurs = 0;
609                                 else if (cr.BaseTypeName != null &&
610                                         cr.BaseTypeName != QName.Empty &&
611                                         cr.BaseTypeName != QNameAnyType)
612                                         throw Error (cr, "Complex type content extension has a reference to an external component that is not supported.");
613                         }
614                 }
615
616                 private void InferContent (Element el, string ns, bool isNew)
617                 {
618                         source.Read ();
619                         source.MoveToContent ();
620                         switch (source.NodeType) {
621                         case XmlNodeType.EndElement:
622                                 InferAsEmptyElement (el, ns, isNew);
623                                 break;
624                         case XmlNodeType.Element:
625                                 InferComplexContent (el, ns, isNew);
626                                 break;
627                         case XmlNodeType.Text:
628                         case XmlNodeType.CDATA:
629                         case XmlNodeType.SignificantWhitespace:
630                                 InferTextContent (el, isNew);
631                                 source.MoveToContent ();
632                                 if (source.NodeType == XmlNodeType.Element)
633                                         goto case XmlNodeType.Element;
634                                 break;
635                         case XmlNodeType.Whitespace:
636                                 InferContent (el, ns, isNew); // skip and retry
637                                 break;
638                         }
639                 }
640
641                 private void InferComplexContent (Element el, string ns,
642                         bool isNew)
643                 {
644                         ComplexType ct = ToComplexType (el);
645                         ToComplexContentType (ct);
646
647                         int position = 0;
648                         bool consumed = false;
649
650                         do {
651                                 switch (source.NodeType) {
652                                 case XmlNodeType.Element:
653                                         Sequence s = PopulateSequence (ct);
654                                         Choice c = s.Items.Count > 0 ?
655                                                 s.Items [0] as Choice :
656                                                 null;
657                                         if (c != null)
658                                                 ProcessLax (c, ns);
659                                         else
660                                                 ProcessSequence (ct, s, ns,
661                                                         ref position,
662                                                         ref consumed,
663                                                         isNew);
664                                         source.MoveToContent ();
665                                         break;
666                                 case XmlNodeType.Text:
667                                 case XmlNodeType.CDATA:
668                                 case XmlNodeType.SignificantWhitespace:
669                                         MarkAsMixed (ct);
670                                         source.ReadString ();
671                                         source.MoveToContent ();
672                                         break;
673                                 case XmlNodeType.EndElement:
674                                         return; // finished
675                                 case XmlNodeType.None:
676                                         throw new NotImplementedException ("Internal Error: Should not happen.");
677                                 }
678                         } while (true);
679                 }
680
681                 private void InferTextContent (Element el, bool isNew)
682                 {
683                         string value = source.ReadString ();
684                         if (el.SchemaType == null) {
685                                 if (el.SchemaTypeName == QName.Empty) {
686                                         // no type information -> infer type
687                                         if (isNew)
688                                                 el.SchemaTypeName =
689                                                         InferSimpleType (
690                                                         value);
691                                         else
692                                                 el.SchemaTypeName =
693                                                         QNameString;
694                                         return;
695                                 }
696                                 switch (el.SchemaTypeName.Namespace) {
697                                 case XmlSchema.Namespace:
698                                 case XdtNamespace:
699                                         // existing primitive type
700                                         el.SchemaTypeName = InferMergedType (
701                                                 value, el.SchemaTypeName);
702                                         break;
703                                 default:
704                                         ComplexType ct = schemas.GlobalTypes [
705                                                 el.SchemaTypeName]
706                                                 as ComplexType;
707                                         // If it is complex, then just set
708                                         // mixed='true' (type cannot be set.)
709                                         // If it is simple, then we cannot
710                                         // make sure that string value is
711                                         // valid. So just set as xs:string.
712                                         if (ct != null)
713                                                 MarkAsMixed (ct);
714                                         else
715                                                 el.SchemaTypeName = QNameString;
716                                         break;
717                                 }
718                                 return;
719                         }
720                         // simpleType
721                         SimpleType st = el.SchemaType as SimpleType;
722                         if (st != null) {
723                                 // If simple, then (described above)
724                                 el.SchemaType = null;
725                                 el.SchemaTypeName = QNameString;
726                                 return;
727                         }
728
729                         // complexType
730                         ComplexType ect = el.SchemaType as ComplexType;
731
732                         SimpleModel sm = ect.ContentModel as SimpleModel;
733                         if (sm == null) {
734                                 // - ComplexContent
735                                 MarkAsMixed (ect);
736                                 return;
737                         }
738
739                         // - SimpleContent
740                         SimpleExt se = sm.Content as SimpleExt;
741                         if (se != null)
742                                 se.BaseTypeName = InferMergedType (value,
743                                         se.BaseTypeName);
744                         SimpleRst sr = sm.Content as SimpleRst;
745                         if (sr != null) {
746                                 sr.BaseTypeName = InferMergedType (value,
747                                         sr.BaseTypeName);
748                                 sr.BaseType = null;
749                         }
750                 }
751
752                 private void MarkAsMixed (ComplexType ct)
753                 {
754                         ComplexModel cm = ct.ContentModel as ComplexModel;
755                         if (cm != null)
756                                 cm.IsMixed = true;
757                         else
758                                 ct.IsMixed = true;
759                 }
760
761                 #endregion
762
763                 #region Particles
764
765                 private void ProcessLax (Choice c, string ns)
766                 {
767                         foreach (Particle p in c.Items) {
768                                 Element el = p as Element;
769                                 if (el == null)
770                                         throw Error (c, String.Format ("Target schema item contains unacceptable particle {0}. Only element is allowed here."));
771                                 if (ElementMatches (el, ns)) {
772                                         InferElement (el, ns, false);
773                                         return;
774                                 }
775                         }
776                         // append a new element particle to lax term.
777                         Element nel = new Element ();
778                         if (source.NamespaceURI == ns)
779                                 nel.Name = source.LocalName;
780                         else
781                                 nel.RefName = new QName (source.LocalName,
782                                         source.NamespaceURI);
783                         InferElement (nel, source.NamespaceURI, true);
784                         c.Items.Add (nel);
785                 }
786
787                 private bool ElementMatches (Element el, string ns)
788                 {
789                         bool matches = false;
790                         if (el.RefName != QName.Empty) {
791                                 if (el.RefName.Name == source.LocalName &&
792                                         el.RefName.Namespace ==
793                                         source.NamespaceURI)
794                                         matches = true;
795                         }
796                         else if (el.Name == source.LocalName &&
797                                 ns == source.NamespaceURI)
798                                         matches = true;
799                         return matches;
800                 }
801
802                 private void ProcessSequence (ComplexType ct, Sequence s,
803                         string ns, ref int position, ref bool consumed,
804                         bool isNew)
805                 {
806                         for (int i = 0; i < position; i++) {
807                                 Element iel = s.Items [i] as Element;
808                                 if (ElementMatches (iel, ns)) {
809                                         // Sequence element type violation
810                                         // might happen (might not, but we
811                                         // cannot backtrack here). So switch
812                                         // to sequence of choice* here.
813                                         ProcessLax (ToSequenceOfChoice (s), ns);
814                                         return;
815                                 }
816                         }
817
818                         if (s.Items.Count <= position) {
819                                 QName name = new QName (source.LocalName,
820                                         source.NamespaceURI);
821                                 Element nel = CreateElement (name);
822                                 InferElement (nel, ns, true);
823                                 if (ns == name.Namespace)
824                                         s.Items.Add (nel);
825                                 else {
826                                         Element re = new Element ();
827                                         re.RefName = name;
828                                         s.Items.Add (re);
829                                 }
830                                 consumed = true;
831                                 return;
832                         }
833                         Element el = s.Items [position] as Element;
834                         if (el == null)
835                                 throw Error (s, String.Format ("Target complex type content sequence has an unacceptable type of particle {0}", s.Items [position]));
836                         bool matches = ElementMatches (el, ns);
837                         if (matches) {
838                                 if (consumed)
839                                         el.MaxOccursString = "unbounded";
840                                 InferElement (el, source.NamespaceURI, false);
841                                 source.MoveToContent ();
842                                 switch (source.NodeType) {
843                                 case XmlNodeType.None:
844                                         if (source.NodeType ==
845                                                 XmlNodeType.Element)
846                                                 goto case XmlNodeType.Element;
847                                         else if (source.NodeType ==
848                                                 XmlNodeType.EndElement)
849                                                 goto case XmlNodeType.EndElement;
850                                         break;
851                                 case XmlNodeType.Element:
852                                         ProcessSequence (ct, s, ns, ref position,
853                                                 ref consumed, isNew);
854                                         break;
855                                 case XmlNodeType.Text:
856                                 case XmlNodeType.CDATA:
857                                 case XmlNodeType.SignificantWhitespace:
858                                         MarkAsMixed (ct);
859                                         source.ReadString ();
860                                         goto case XmlNodeType.None;
861                                 case XmlNodeType.Whitespace:
862                                         source.ReadString ();
863                                         goto case XmlNodeType.None;
864                                 case XmlNodeType.EndElement:
865                                         return;
866                                 default:
867                                         source.Read ();
868                                         break;
869                                 }
870                         }
871                         else {
872                                 if (consumed) {
873                                         position++;
874                                         consumed = false;
875                                         ProcessSequence (ct, s, ns,
876                                                 ref position, ref consumed,
877                                                 isNew);
878                                 }
879                                 else
880                                         ProcessLax (ToSequenceOfChoice (s), ns);
881                         }
882                 }
883
884                 // Note that it does not return the changed sequence.
885                 private Choice ToSequenceOfChoice (Sequence s)
886                 {
887                         Choice c = new Choice ();
888                         if (laxOccurence)
889                                 c.MinOccurs = 0;
890                         c.MaxOccursString = "unbounded";
891                         foreach (Particle p in s.Items)
892                                 c.Items.Add (p);
893                         s.Items.Clear ();
894                         s.Items.Add (c);
895                         return c;
896                 }
897
898                 // It makes complexType not to have Simple content model.
899                 private void ToComplexContentType (ComplexType type)
900                 {
901                         SimpleModel sm = type.ContentModel as SimpleModel;
902                         if (sm == null)
903                                 return;
904
905                         SOMList atts = GetAttributes (type);
906                         foreach (SOMObject o in atts)
907                                 type.Attributes.Add (o);
908                         // FIXME: need to copy AnyAttribute.
909                         // (though not considered right now)
910                         type.ContentModel = null;
911                         type.IsMixed = true;
912                 }
913
914                 private Sequence PopulateSequence (ComplexType ct)
915                 {
916                         Particle p = PopulateParticle (ct);
917                         Sequence s = p as Sequence;
918                         if (s != null)
919                                 return s;
920                         else
921                                 throw Error (ct, String.Format ("Target complexType contains unacceptable type of particle {0}", p));
922                 }
923
924                 private Sequence CreateSequence ()
925                 {
926                         Sequence s = new Sequence ();
927                         if (laxOccurence)
928                                 s.MinOccurs = 0;
929                         return s;
930                 }
931
932                 private Particle PopulateParticle (ComplexType ct)
933                 {
934                         if (ct.ContentModel == null) {
935                                 if (ct.Particle == null)
936                                         ct.Particle = CreateSequence ();
937                                 return ct.Particle;
938                         }
939                         ComplexModel cm = ct.ContentModel as ComplexModel;
940                         if (cm != null) {
941                                 ComplexExt  ce = cm.Content as ComplexExt;
942                                 if (ce != null) {
943                                         if (ce.Particle == null)
944                                                 ce.Particle = CreateSequence ();
945                                         return ce.Particle;
946                                 }
947                                 ComplexRst cr = cm.Content as ComplexRst;
948                                 if (cr != null) {
949                                         if (cr.Particle == null)
950                                                 cr.Particle = CreateSequence ();
951                                         return cr.Particle;
952                                 }
953                         }
954                         throw Error (ct, "Schema inference internal error. The complexType should have been converted to have a complex content.");
955                 }
956
957                 #endregion
958
959                 #region String Value
960
961                 // primitive type inference.
962                 // When running lax type inference, it just returns xs:string.
963                 private QName InferSimpleType (string value)
964                 {
965                         if (laxTypeInference)
966                                 return QNameString;
967
968                         switch (value) {
969                         // 0 and 1 are not infered as byte unlike MS.XSDInfer
970 //                      case "0":
971 //                      case "1":
972                         case "true":
973                         case "false":
974                                 return QNameBoolean;
975                         }
976                         try {
977                                 long dec = XmlConvert.ToInt64 (value);
978                                 if (byte.MinValue <= dec && dec <= byte.MaxValue)
979                                         return QNameUByte;
980                                 if (sbyte.MinValue <= dec && dec <= sbyte.MaxValue)
981                                         return QNameByte;
982                                 if (ushort.MinValue <= dec && dec <= ushort.MaxValue)
983                                         return QNameUShort;
984                                 if (short.MinValue <= dec && dec <= short.MaxValue)
985                                         return QNameShort;
986                                 if (uint.MinValue <= dec && dec <= uint.MaxValue)
987                                         return QNameUInt;
988                                 if (int.MinValue <= dec && dec <= int.MaxValue)
989                                         return QNameInt;
990                                 return QNameLong;
991                         } catch (Exception) {
992                         }
993                         try {
994                                 XmlConvert.ToUInt64 (value);
995                                 return QNameULong;
996                         } catch (Exception) {
997                         }
998                         try {
999                                 XmlConvert.ToDecimal (value);
1000                                 return QNameDecimal;
1001                         } catch (Exception) {
1002                         }
1003                         try {
1004                                 double dbl = XmlConvert.ToDouble (value);
1005                                 if (float.MinValue <= dbl &&
1006                                         dbl <= float.MaxValue)
1007                                         return QNameFloat;
1008                                 else
1009                                         return QNameDouble;
1010                         } catch (Exception) {
1011                         }
1012                         try {
1013                                 // FIXME: also try DateTimeSerializationMode
1014                                 // and gYearMonth
1015                                 XmlConvert.ToDateTime (value);
1016                                 return QNameDateTime;
1017                         } catch (Exception) {
1018                         }
1019                         try {
1020                                 XmlConvert.ToTimeSpan (value);
1021                                 return QNameDuration;
1022                         } catch (Exception) {
1023                         }
1024
1025                         // xs:string
1026                         return QNameString;
1027                 }
1028
1029                 #endregion
1030
1031                 #region Utilities
1032
1033                 private Element GetGlobalElement (QName name)
1034                 {
1035                         Element el = newElements [name] as Element;
1036                         if (el == null)
1037                                 el = schemas.GlobalElements [name] as Element;
1038                         return el;
1039                 }
1040
1041                 private Attr GetGlobalAttribute (QName name)
1042                 {
1043                         Attr a = newElements [name] as Attr;
1044                         if (a == null)
1045                                 a = schemas.GlobalAttributes [name] as Attr;
1046                         return a;
1047                 }
1048
1049                 private Element CreateElement (QName name)
1050                 {
1051                         Element el = new Element ();
1052                         el.Name = name.Name;
1053                         return el;
1054                 }
1055
1056                 private Element CreateGlobalElement (QName name)
1057                 {
1058                         Element el = CreateElement (name);
1059                         XmlSchema schema = PopulateSchema (name.Namespace);
1060                         schema.Items.Add (el);
1061                         newElements.Add (name, el);
1062                         return el;
1063                 }
1064
1065                 private Attr CreateGlobalAttribute (QName name)
1066                 {
1067                         Attr attr = new Attr ();
1068                         XmlSchema schema = PopulateSchema (name.Namespace);
1069                         attr.Name = name.Name;
1070                         schema.Items.Add (attr);
1071                         newAttributes.Add (name, attr);
1072                         return attr;
1073                 }
1074
1075                 // Note that the return value never assures that all the
1076                 // components in the parameter ns must reside in it.
1077                 private XmlSchema PopulateSchema (string ns)
1078                 {
1079                         ICollection list = schemas.Schemas (ns);
1080                         if (list.Count > 0) {
1081                                 IEnumerator e = list.GetEnumerator ();
1082                                 e.MoveNext ();
1083                                 return (XmlSchema) e.Current;
1084                         }
1085                         XmlSchema s = new XmlSchema ();
1086                         if (ns != null && ns.Length > 0)
1087                                 s.TargetNamespace = ns;
1088                         s.ElementFormDefault = Form.Qualified;
1089                         s.AttributeFormDefault = Form.Unqualified;
1090                         schemas.Add (s);
1091                         return s;
1092                 }
1093
1094                 private XmlSchemaInferenceException Error (
1095                         XmlSchemaObject sourceObj,
1096                         string message)
1097                 {
1098                         // This override is mainly for schema component error.
1099                         return Error (sourceObj, false, message);
1100                 }
1101
1102                 private XmlSchemaInferenceException Error (
1103                         XmlSchemaObject sourceObj,
1104                         bool useReader,
1105                         string message)
1106                 {
1107                         string msg = String.Concat (
1108                                 message,
1109                                 sourceObj != null ?
1110                                         String.Format (". Related schema component is {0}",
1111                                                 sourceObj.SourceUri,
1112                                                 sourceObj.LineNumber,
1113                                                 sourceObj.LinePosition) :
1114                                         String.Empty,
1115                                 useReader ?
1116                                         String.Format (". {0}", source.BaseURI) :
1117                                         String.Empty);
1118
1119                         IXmlLineInfo li = source as IXmlLineInfo;
1120                         if (useReader && li != null)
1121                                 return new XmlSchemaInferenceException (
1122                                         msg, null, li.LineNumber,
1123                                         li.LinePosition);
1124                         else
1125                                 return new XmlSchemaInferenceException (msg);
1126                 }
1127
1128                 #endregion
1129         }
1130 }
1131
1132 #endif