New test.
[mono.git] / mcs / class / System.XML / System.Xml.Schema / XmlSchemaInference.cs
1 //
2 // XmlSchemaInference.cs
3 //
4 // Author:
5 //      Atsushi Enomoto <atsushi@ximian.com>
6 //
7 // Copyright (C)2004 Novell Inc.
8 //
9
10 //
11 // Permission is hereby granted, free of charge, to any person obtaining
12 // a copy of this software and associated documentation files (the
13 // "Software"), to deal in the Software without restriction, including
14 // without limitation the rights to use, copy, modify, merge, publish,
15 // distribute, sublicense, and/or sell copies of the Software, and to
16 // permit persons to whom the Software is furnished to do so, subject to
17 // the following conditions:
18 // 
19 // The above copyright notice and this permission notice shall be
20 // included in all copies or substantial portions of the Software.
21 // 
22 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
23 // EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
24 // MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
25 // NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
26 // LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
27 // OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
28 // WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
29 //
30
31 #if NET_2_0
32
33 using System;
34 using System.Collections;
35 using System.Xml;
36 using System.Xml.Schema;
37
38 using QName = System.Xml.XmlQualifiedName;
39 using Form = System.Xml.Schema.XmlSchemaForm;
40 using Use = System.Xml.Schema.XmlSchemaUse;
41 using SOMList = System.Xml.Schema.XmlSchemaObjectCollection;
42 using SOMObject = System.Xml.Schema.XmlSchemaObject;
43 using Import = System.Xml.Schema.XmlSchemaImport;
44 using Element = System.Xml.Schema.XmlSchemaElement;
45 using Attr = System.Xml.Schema.XmlSchemaAttribute;
46 using AttrGroup = System.Xml.Schema.XmlSchemaAttributeGroup;
47 using AttrGroupRef = System.Xml.Schema.XmlSchemaAttributeGroupRef;
48 using SimpleType = System.Xml.Schema.XmlSchemaSimpleType;
49 using ComplexType = System.Xml.Schema.XmlSchemaComplexType;
50 using SimpleModel = System.Xml.Schema.XmlSchemaSimpleContent;
51 using SimpleExt = System.Xml.Schema.XmlSchemaSimpleContentExtension;
52 using SimpleRst = System.Xml.Schema.XmlSchemaSimpleContentRestriction;
53 using ComplexModel = System.Xml.Schema.XmlSchemaComplexContent;
54 using ComplexExt = System.Xml.Schema.XmlSchemaComplexContentExtension;
55 using ComplexRst = System.Xml.Schema.XmlSchemaComplexContentRestriction;
56 using SimpleTypeRst = System.Xml.Schema.XmlSchemaSimpleTypeRestriction;
57 using SimpleList = System.Xml.Schema.XmlSchemaSimpleTypeList;
58 using SimpleUnion = System.Xml.Schema.XmlSchemaSimpleTypeUnion;
59 using SchemaFacet = System.Xml.Schema.XmlSchemaFacet;
60 using LengthFacet = System.Xml.Schema.XmlSchemaLengthFacet;
61 using MinLengthFacet = System.Xml.Schema.XmlSchemaMinLengthFacet;
62 using Particle = System.Xml.Schema.XmlSchemaParticle;
63 using Sequence = System.Xml.Schema.XmlSchemaSequence;
64 using Choice = System.Xml.Schema.XmlSchemaChoice;
65
66
67 namespace System.Xml.Schema
68 {
69         [MonoTODO]
70         // FIXME:
71         // - merge primitive types
72         // - infer gYearMonth too
73         // - in some cases sequence should contain element whose minOccurs=0
74         //    (no obvious rules right now)
75         // - reject some non-supported schema components
76         public sealed class XmlSchemaInference
77         {
78                 public enum InferenceOption {
79                         Restricted,
80                         Relaxed,
81                 }
82
83                 InferenceOption occurrence = InferenceOption.Restricted;
84                 InferenceOption typeInference = InferenceOption.Restricted;
85
86                 public XmlSchemaInference ()
87                 {
88                 }
89
90                 public InferenceOption Occurrence {
91                         get { return occurrence; }
92                         set { occurrence = value; }
93                 }
94
95                 public InferenceOption TypeInference {
96                         get { return TypeInference; }
97                         set { typeInference = value; }
98                 }
99
100                 public XmlSchemaSet InferSchema (XmlReader xmlReader)
101                 {
102                         return InferSchema (xmlReader, new XmlSchemaSet ());
103                 }
104
105                 public XmlSchemaSet InferSchema (XmlReader xmlReader,
106                         XmlSchemaSet schemas)
107                 {
108                         return XsdInference.Process (xmlReader, schemas,
109                                 occurrence == InferenceOption.Relaxed,
110                                 typeInference == InferenceOption.Relaxed);
111                 }
112         }
113
114         class XsdInference
115         {
116                 public static XmlSchemaSet Process (XmlReader xmlReader, 
117                         XmlSchemaSet schemas,
118                         bool laxOccurrence,
119                         bool laxTypeInference)
120                 {
121                         XsdInference impl = new XsdInference (xmlReader,
122                                 schemas, laxOccurrence, laxTypeInference);
123                         impl.Run ();
124                         return impl.schemas;
125                 }
126
127                 public const string NamespaceXml =
128                         "http://www.w3.org/XML/1998/namespace";
129
130                 public const string NamespaceXmlns =
131                         "http://www.w3.org/2000/xmlns/";
132
133                 public const string XdtNamespace =
134                         "http://www.w3.org/2003/11/xpath-datatypes";
135
136                 static readonly QName QNameString = new QName (
137                         "string", XmlSchema.Namespace);
138
139                 static readonly QName QNameBoolean = new QName (
140                         "boolean", XmlSchema.Namespace);
141
142                 static readonly QName QNameAnyType = new QName (
143                         "anyType", XmlSchema.Namespace);
144
145                 static readonly QName QNameByte = new QName (
146                         "byte", XmlSchema.Namespace);
147
148                 static readonly QName QNameUByte = new QName (
149                         "unsignedByte", XmlSchema.Namespace);
150
151                 static readonly QName QNameShort = new QName (
152                         "short", XmlSchema.Namespace);
153
154                 static readonly QName QNameUShort = new QName (
155                         "unsignedShort", XmlSchema.Namespace);
156
157                 static readonly QName QNameInt = new QName (
158                         "int", XmlSchema.Namespace);
159
160                 static readonly QName QNameUInt = new QName (
161                         "unsignedInt", XmlSchema.Namespace);
162
163                 static readonly QName QNameLong = new QName (
164                         "long", XmlSchema.Namespace);
165
166                 static readonly QName QNameULong = new QName (
167                         "unsignedLong", XmlSchema.Namespace);
168
169                 static readonly QName QNameDecimal = new QName (
170                         "decimal", XmlSchema.Namespace);
171
172                 static readonly QName QNameUDecimal = new QName (
173                         "unsignedDecimal", XmlSchema.Namespace);
174
175                 static readonly QName QNameDouble = new QName (
176                         "double", XmlSchema.Namespace);
177
178                 static readonly QName QNameFloat = new QName (
179                         "float", XmlSchema.Namespace);
180
181                 static readonly QName QNameDateTime = new QName (
182                         "dateTime", XmlSchema.Namespace);
183
184                 static readonly QName QNameDuration = new QName (
185                         "duration", XmlSchema.Namespace);
186
187                 XmlReader source;
188                 XmlSchemaSet schemas;
189                 bool laxOccurrence;
190                 bool laxTypeInference;
191
192                 Hashtable newElements = new Hashtable ();
193                 Hashtable newAttributes = new Hashtable ();
194
195                 private XsdInference (XmlReader xmlReader, 
196                         XmlSchemaSet schemas, 
197                         bool laxOccurrence, 
198                         bool laxTypeInference)
199                 {
200                         this.source = xmlReader;
201                         this.schemas = schemas;
202                         this.laxOccurrence = laxOccurrence;
203                         this.laxTypeInference = laxTypeInference;
204                 }
205
206                 private void Run ()
207                 {
208                         // XmlSchemaSet need to be compiled.
209                         schemas.Compile ();
210
211                         // move to top-level element
212                         source.MoveToContent ();
213                         int depth = source.Depth;
214                         if (source.NodeType != XmlNodeType.Element)
215                                 throw new ArgumentException ("Argument XmlReader content is expected to be an element.");
216
217                         QName qname = new QName (source.LocalName,
218                                 source.NamespaceURI);
219                         Element el = GetGlobalElement (qname);
220                         if (el == null) {
221                                 el = CreateGlobalElement (qname);
222                                 InferElement (el, qname.Namespace, true);
223                         }
224                         else
225                                 InferElement (el, qname.Namespace, false);
226
227                         // FIXME: compile again.
228 //                      foreach (XmlSchema schema in schemas.Schemas ())
229 //                              schemas.Reprocess (schema);
230                 }
231
232                 private void AddImport (string current, string import)
233                 {
234                         foreach (XmlSchema schema in schemas.Schemas (current)) {
235                                 bool exists = false;
236                                 foreach (XmlSchemaExternal e in schema.Includes) {
237                                         Import imp = e as Import;
238                                         if (imp != null &&
239                                                 imp.Namespace == import)
240                                                 exists = true;
241                                 }
242                                 if (exists)
243                                         continue;
244                                 Import newimp = new Import ();
245                                 newimp.Namespace = import;
246                                 schema.Includes.Add (newimp);
247                         }
248                 }
249
250                 private void IncludeXmlAttributes ()
251                 {
252                         if (schemas.Schemas (NamespaceXml).Count == 0)
253                                 // FIXME: do it from resources.
254                                 schemas.Add (NamespaceXml, 
255                                         "http://www.w3.org/2001/xml.xsd");
256                 }
257
258                 private void InferElement (Element el, string ns, bool isNew)
259                 {
260                         // Quick check for reference to another definition
261                         // (i.e. element ref='...' that should be redirected)
262                         if (el.RefName != QName.Empty) {
263                                 Element body = GetGlobalElement (el.RefName);
264                                 if (body == null) {
265                                         body = CreateElement (el.RefName);
266                                         InferElement (body, ns, true);
267                                 }
268                                 else
269                                         InferElement (body, ns, isNew);
270                                 return;
271                         }
272
273                         // Attributes
274                         if (source.MoveToFirstAttribute ()) {
275                                 InferAttributes (el, ns, isNew);
276                                 source.MoveToElement ();
277                         }
278
279                         // Content
280                         if (source.IsEmptyElement) {
281                                 InferAsEmptyElement (el, ns, isNew);
282                                 source.Read ();
283                                 source.MoveToContent ();
284                         }
285                         else {
286                                 InferContent (el, ns, isNew);
287                                 source.ReadEndElement ();
288                         }
289                         if (el.SchemaType == null &&
290                                 el.SchemaTypeName == QName.Empty)
291                                 el.SchemaTypeName = QNameString;
292                 }
293
294                 #region Attribute Inference
295
296                 private Hashtable CollectAttrTable (SOMList attList)
297                 {
298                         // get attribute definition table.
299                         Hashtable table = new Hashtable ();
300                         foreach (XmlSchemaObject obj in attList) {
301                                 Attr attr = obj as Attr;
302                                 if (attr == null)
303                                         throw Error (obj, String.Format ("Attribute inference only supports direct attribute definition. {0} is not supported.", obj.GetType ()));
304                                 if (attr.RefName != QName.Empty)
305                                         table.Add (attr.RefName, attr);
306                                 else
307                                         table.Add (new QName (attr.Name, ""),
308                                                 attr);
309                         }
310                         return table;
311                 }
312
313                 private void InferAttributes (Element el, string ns, bool isNew)
314                 {
315                         // Now this element is going to have complexType.
316                         // It currently not, then we have to replace it.
317                         ComplexType ct = null;
318                         SOMList attList = null;
319                         Hashtable table = null;
320
321                         do {
322                                 switch (source.NamespaceURI) {
323                                 case NamespaceXml:
324                                         if (schemas.Schemas (
325                                                 NamespaceXml) .Count == 0)
326                                                 IncludeXmlAttributes ();
327                                         break;
328                                 case XmlSchema.InstanceNamespace:
329                                         if (source.LocalName == "nil")
330                                                 el.IsNillable = true;
331                                         // all other xsi:* atts are ignored
332                                         continue;
333                                 case NamespaceXmlns:
334                                         continue;
335                                 }
336                                 if (ct == null) {
337                                         ct = ToComplexType (el);
338                                         attList = GetAttributes (ct);
339                                         table = CollectAttrTable (attList);
340                                 }
341                                 QName attrName = new QName (
342                                         source.LocalName, source.NamespaceURI);
343                                 Attr attr = table [attrName] as Attr;
344                                 if (attr == null) {
345                                         attList.Add (InferNewAttribute (
346                                                 attrName, isNew, ns));
347                                 } else {
348                                         table.Remove (attrName);
349                                         if (attr.RefName != null &&
350                                                 attr.RefName != QName.Empty)
351                                                 continue; // just a reference
352                                         InferMergedAttribute (attr);
353                                 }
354                         } while (source.MoveToNextAttribute ());
355
356                         // mark all attr definitions that did not appear
357                         // as optional.
358                         if (table != null)
359                                 foreach (Attr attr in table.Values)
360                                         attr.Use = Use.Optional;
361                 }
362
363                 private XmlSchemaAttribute InferNewAttribute (
364                         QName attrName, bool isNewTypeDefinition, string ns)
365                 {
366                         Attr attr = null;
367                         bool mergedRequired = false;
368                         if (attrName.Namespace.Length > 0) {
369                                 // global attribute; might be already defined.
370                                 attr = GetGlobalAttribute (attrName) as Attr;
371                                 if (attr == null) {
372                                         attr = CreateGlobalAttribute (attrName);
373                                         attr.SchemaTypeName =
374                                                 InferSimpleType (source.Value);
375                                 } else {
376                                         InferMergedAttribute (attr);
377                                         mergedRequired =
378                                                 attr.Use == Use.Required;
379                                 }
380                                 attr = new Attr ();
381                                 attr.RefName = attrName;
382                                 AddImport (ns, attrName.Namespace);
383                         } else {
384                                 // local attribute
385                                 attr = new Attr ();
386                                 attr.Name = attrName.Name;
387                                 attr.SchemaTypeName =
388                                         InferSimpleType (source.Value);
389                         }
390                         if (!laxOccurrence &&
391                                 (isNewTypeDefinition || mergedRequired))
392                                 attr.Use = Use.Required;
393                         else
394                                 attr.Use = Use.Optional;
395
396                         return attr;
397                 }
398
399                 // validate string value agains attr and 
400                 // if invalid, then relax the type.
401                 private void InferMergedAttribute (Attr attr)
402                 {
403                         attr.SchemaTypeName = InferMergedType (source.Value,
404                                 attr.SchemaTypeName);
405                         attr.SchemaType = null;
406                 }
407
408                 private QName InferMergedType (string value, QName typeName)
409                 {
410                         // examine value against specified type and
411                         // if unacceptable, then return a relaxed type.
412
413                         SimpleType st = XmlSchemaType.GetBuiltInSimpleType (
414                                 typeName);
415                         if (st == null) // non-primitive type => see above.
416                                 return QNameString;
417                         do {
418                                 try {
419                                         st.Datatype.ParseValue (value,
420                                                 source.NameTable,
421                                                 source as IXmlNamespaceResolver);
422                                         return typeName;
423                                 } catch {
424                                         st = st.BaseXmlSchemaType as XmlSchemaSimpleType;
425                                         typeName = st != null ? st.QualifiedName : QName.Empty;
426                                 }
427                         } while (typeName != QName.Empty);
428                         return QNameString;
429                 }
430
431                 private SOMList GetAttributes (ComplexType ct)
432                 {
433                         if (ct.ContentModel == null)
434                                 return ct.Attributes;
435
436                         SimpleModel sc = ct.ContentModel as SimpleModel;
437                         if (sc != null) {
438                                 SimpleExt sce = sc.Content as SimpleExt;
439                                 if (sce != null)
440                                         return sce.Attributes;
441                                 SimpleRst scr = sc.Content as SimpleRst;
442                                 if (scr != null)
443                                         return scr.Attributes;
444                                 else
445                                         throw Error (sc, "Invalid simple content model.");
446                         }
447                         ComplexModel cc = ct.ContentModel as ComplexModel;
448                         if (cc != null) {
449                                 ComplexExt cce = cc.Content as ComplexExt;
450                                 if (cce != null)
451                                         return cce.Attributes;
452                                 ComplexRst ccr = cc.Content as ComplexRst;
453                                 if (ccr != null)
454                                         return ccr.Attributes;
455                                 else
456                                         throw Error (cc, "Invalid simple content model.");
457                         }
458                         throw Error (cc, "Invalid complexType. Should not happen.");
459                 }
460
461                 private ComplexType ToComplexType (Element el)
462                 {
463                         QName name = el.SchemaTypeName;
464                         XmlSchemaType type = el.SchemaType;
465
466                         // 1. element type is complex.
467                         ComplexType ct = type as ComplexType;
468                         if (ct != null)
469                                 return ct;
470
471                         // 2. reference to global complexType.
472                         XmlSchemaType globalType = schemas.GlobalTypes [name]
473                                 as XmlSchemaType;
474                         ct = globalType as ComplexType;
475                         if (ct != null)
476                                 return ct;
477
478                         ct = new ComplexType ();
479                         el.SchemaType = ct;
480                         el.SchemaTypeName = QName.Empty;
481
482                         // 3. base type name is xs:anyType or no specification.
483                         // <xs:complexType />
484                         if (name == QNameAnyType)
485                                 return ct;
486                         else if (type == null && name == QName.Empty)
487                                 return ct;
488
489                         SimpleModel sc = new SimpleModel ();
490                         ct.ContentModel = sc;
491
492                         // 4. type is simpleType
493                         //    -> extension of existing simple type.
494                         SimpleType st = type as SimpleType;
495                         if (st != null) {
496                                 SimpleRst scr = new SimpleRst ();
497                                 scr.BaseType = st;
498                                 sc.Content = scr;
499                                 return ct;
500                         }
501
502                         SimpleExt sce = new SimpleExt ();
503                         sc.Content = sce;
504
505                         // 5. type name points to primitive type
506                         //    -> simple extension of a primitive type
507                         st = XmlSchemaType.GetBuiltInSimpleType (name);
508                         if (st != null) {
509                                 sce.BaseTypeName = name;
510                                 return ct;
511                         }
512
513                         // 6. type name points to global simpleType.
514                         st = globalType as SimpleType;
515                         if (st != null) {
516                                 sce.BaseTypeName = name;
517                                 return ct;
518                         }
519
520                         throw Error (el, "Unexpected schema component that contains simpleTypeName that could not be resolved.");
521                 }
522
523                 #endregion
524
525                 #region Element Type
526
527                 private void InferAsEmptyElement (Element el, string ns,
528                         bool isNew)
529                 {
530                         ComplexType ct = el.SchemaType as ComplexType;
531                         if (ct != null) {
532                                 SimpleModel sm =
533                                         ct.ContentModel as SimpleModel;
534                                 if (sm != null) {
535                                         ToEmptiableSimpleContent (sm, isNew);
536                                         return;
537                                 }
538
539                                 ComplexModel cm = ct.ContentModel
540                                         as ComplexModel;
541                                 if (cm != null) {
542                                         ToEmptiableComplexContent (cm, isNew);
543                                         return;
544                                 }
545
546                                 if (ct.Particle != null)
547                                         ct.Particle.MinOccurs = 0;
548                                 return;
549                         }
550                         SimpleType st = el.SchemaType as SimpleType;
551                         if (st != null) {
552                                 st = MakeBaseTypeAsEmptiable (st);
553                                 switch (st.QualifiedName.Namespace) {
554                                 case XmlSchema.Namespace:
555                                 case XdtNamespace:
556                                         el.SchemaTypeName = st.QualifiedName;
557                                         break;
558                                 default:
559                                         el.SchemaType =st;
560                                         break;
561                                 }
562                         }
563                 }
564
565                 private SimpleType MakeBaseTypeAsEmptiable (SimpleType st)
566                 {
567                         switch (st.QualifiedName.Namespace) {
568                         case XmlSchema.Namespace:
569                         case XdtNamespace:
570                                 // If a primitive type
571                                 return XmlSchemaType.GetBuiltInSimpleType (
572                                         XmlTypeCode.String);
573                         }
574                         SimpleTypeRst str = st.Content as SimpleTypeRst;
575                         if (str != null) {
576                                 ArrayList al = null;
577                                 foreach (SchemaFacet f in str.Facets) {
578                                         if (f is LengthFacet ||
579                                                 f is MinLengthFacet) {
580                                                 if (al == null)
581                                                         al = new ArrayList ();
582                                                 al.Add (f);
583                                         }
584                                 }
585                                 foreach (SchemaFacet f in al)
586                                         str.Facets.Remove (f);
587                                 if (str.BaseType != null)
588                                         str.BaseType =
589                                                 MakeBaseTypeAsEmptiable (st);
590                                 else
591                                         // It might have a reference to an
592                                         // external simple type, but there is
593                                         // no assurance that any of those
594                                         // external types allow an empty
595                                         // string. So just set base type as
596                                         // xs:string.
597                                         str.BaseTypeName = QNameString;
598                         } // union/list can have empty string value.
599
600                         return st;
601                 }
602
603                 private void ToEmptiableSimpleContent (
604                         SimpleModel sm, bool isNew)
605                 {
606                         SimpleExt se = sm.Content as SimpleExt;
607                         if (se != null)
608                                 se.BaseTypeName = QNameString;
609                         else {
610                                 SimpleRst sr = sm.Content
611                                         as SimpleRst;
612                                 if (sr == null)
613                                         throw Error (sm, "Invalid simple content model was passed.");
614                                 sr.BaseTypeName = QNameString;
615                                 sr.BaseType = null;
616                         }
617                 }
618
619                 private void ToEmptiableComplexContent (
620                         ComplexModel cm, bool isNew)
621                 {
622                         ComplexExt ce = cm.Content
623                                 as ComplexExt;
624                         if (ce != null) {
625                                 if (ce.Particle != null)
626                                         ce.Particle.MinOccurs = 0;
627                                 else if (ce.BaseTypeName != null &&
628                                         ce.BaseTypeName != QName.Empty &&
629                                         ce.BaseTypeName != QNameAnyType)
630                                         throw Error (ce, "Complex type content extension has a reference to an external component that is not supported.");
631                         }
632                         else {
633                                 ComplexRst cr = cm.Content
634                                         as ComplexRst;
635                                 if (cr == null)
636                                         throw Error (cm, "Invalid complex content model was passed.");
637                                 if (cr.Particle != null)
638                                         cr.Particle.MinOccurs = 0;
639                                 else if (cr.BaseTypeName != null &&
640                                         cr.BaseTypeName != QName.Empty &&
641                                         cr.BaseTypeName != QNameAnyType)
642                                         throw Error (cr, "Complex type content extension has a reference to an external component that is not supported.");
643                         }
644                 }
645
646                 private void InferContent (Element el, string ns, bool isNew)
647                 {
648                         source.Read ();
649                         source.MoveToContent ();
650                         switch (source.NodeType) {
651                         case XmlNodeType.EndElement:
652                                 InferAsEmptyElement (el, ns, isNew);
653                                 break;
654                         case XmlNodeType.Element:
655                                 InferComplexContent (el, ns, isNew);
656                                 break;
657                         case XmlNodeType.Text:
658                         case XmlNodeType.CDATA:
659                         case XmlNodeType.SignificantWhitespace:
660                                 InferTextContent (el, isNew);
661                                 source.MoveToContent ();
662                                 if (source.NodeType == XmlNodeType.Element)
663                                         goto case XmlNodeType.Element;
664                                 break;
665                         case XmlNodeType.Whitespace:
666                                 InferContent (el, ns, isNew); // skip and retry
667                                 break;
668                         }
669                 }
670
671                 private void InferComplexContent (Element el, string ns,
672                         bool isNew)
673                 {
674                         ComplexType ct = ToComplexType (el);
675                         ToComplexContentType (ct);
676
677                         int position = 0;
678                         bool consumed = false;
679
680                         do {
681                                 switch (source.NodeType) {
682                                 case XmlNodeType.Element:
683                                         Sequence s = PopulateSequence (ct);
684                                         Choice c = s.Items.Count > 0 ?
685                                                 s.Items [0] as Choice :
686                                                 null;
687                                         if (c != null)
688                                                 ProcessLax (c, ns);
689                                         else
690                                                 ProcessSequence (ct, s, ns,
691                                                         ref position,
692                                                         ref consumed,
693                                                         isNew);
694                                         source.MoveToContent ();
695                                         break;
696                                 case XmlNodeType.Text:
697                                 case XmlNodeType.CDATA:
698                                 case XmlNodeType.SignificantWhitespace:
699                                         MarkAsMixed (ct);
700                                         source.ReadString ();
701                                         source.MoveToContent ();
702                                         break;
703                                 case XmlNodeType.EndElement:
704                                         return; // finished
705                                 case XmlNodeType.None:
706                                         throw new NotImplementedException ("Internal Error: Should not happen.");
707                                 }
708                         } while (true);
709                 }
710
711                 private void InferTextContent (Element el, bool isNew)
712                 {
713                         string value = source.ReadString ();
714                         if (el.SchemaType == null) {
715                                 if (el.SchemaTypeName == QName.Empty) {
716                                         // no type information -> infer type
717                                         if (isNew)
718                                                 el.SchemaTypeName =
719                                                         InferSimpleType (
720                                                         value);
721                                         else
722                                                 el.SchemaTypeName =
723                                                         QNameString;
724                                         return;
725                                 }
726                                 switch (el.SchemaTypeName.Namespace) {
727                                 case XmlSchema.Namespace:
728                                 case XdtNamespace:
729                                         // existing primitive type
730                                         el.SchemaTypeName = InferMergedType (
731                                                 value, el.SchemaTypeName);
732                                         break;
733                                 default:
734                                         ComplexType ct = schemas.GlobalTypes [
735                                                 el.SchemaTypeName]
736                                                 as ComplexType;
737                                         // If it is complex, then just set
738                                         // mixed='true' (type cannot be set.)
739                                         // If it is simple, then we cannot
740                                         // make sure that string value is
741                                         // valid. So just set as xs:string.
742                                         if (ct != null)
743                                                 MarkAsMixed (ct);
744                                         else
745                                                 el.SchemaTypeName = QNameString;
746                                         break;
747                                 }
748                                 return;
749                         }
750                         // simpleType
751                         SimpleType st = el.SchemaType as SimpleType;
752                         if (st != null) {
753                                 // If simple, then (described above)
754                                 el.SchemaType = null;
755                                 el.SchemaTypeName = QNameString;
756                                 return;
757                         }
758
759                         // complexType
760                         ComplexType ect = el.SchemaType as ComplexType;
761
762                         SimpleModel sm = ect.ContentModel as SimpleModel;
763                         if (sm == null) {
764                                 // - ComplexContent
765                                 MarkAsMixed (ect);
766                                 return;
767                         }
768
769                         // - SimpleContent
770                         SimpleExt se = sm.Content as SimpleExt;
771                         if (se != null)
772                                 se.BaseTypeName = InferMergedType (value,
773                                         se.BaseTypeName);
774                         SimpleRst sr = sm.Content as SimpleRst;
775                         if (sr != null) {
776                                 sr.BaseTypeName = InferMergedType (value,
777                                         sr.BaseTypeName);
778                                 sr.BaseType = null;
779                         }
780                 }
781
782                 private void MarkAsMixed (ComplexType ct)
783                 {
784                         ComplexModel cm = ct.ContentModel as ComplexModel;
785                         if (cm != null)
786                                 cm.IsMixed = true;
787                         else
788                                 ct.IsMixed = true;
789                 }
790
791                 #endregion
792
793                 #region Particles
794
795                 private void ProcessLax (Choice c, string ns)
796                 {
797                         foreach (Particle p in c.Items) {
798                                 Element el = p as Element;
799                                 if (el == null)
800                                         throw Error (c, String.Format ("Target schema item contains unacceptable particle {0}. Only element is allowed here."));
801                                 if (ElementMatches (el, ns)) {
802                                         InferElement (el, ns, false);
803                                         return;
804                                 }
805                         }
806                         // append a new element particle to lax term.
807                         Element nel = new Element ();
808                         if (source.NamespaceURI == ns)
809                                 nel.Name = source.LocalName;
810                         else {
811                                 nel.RefName = new QName (source.LocalName,
812                                         source.NamespaceURI);
813                                 AddImport (ns, source.NamespaceURI);
814                         }
815                         InferElement (nel, source.NamespaceURI, true);
816                         c.Items.Add (nel);
817                 }
818
819                 private bool ElementMatches (Element el, string ns)
820                 {
821                         bool matches = false;
822                         if (el.RefName != QName.Empty) {
823                                 if (el.RefName.Name == source.LocalName &&
824                                         el.RefName.Namespace ==
825                                         source.NamespaceURI)
826                                         matches = true;
827                         }
828                         else if (el.Name == source.LocalName &&
829                                 ns == source.NamespaceURI)
830                                         matches = true;
831                         return matches;
832                 }
833
834                 private void ProcessSequence (ComplexType ct, Sequence s,
835                         string ns, ref int position, ref bool consumed,
836                         bool isNew)
837                 {
838                         for (int i = 0; i < position; i++) {
839                                 Element iel = s.Items [i] as Element;
840                                 if (ElementMatches (iel, ns)) {
841                                         // Sequence element type violation
842                                         // might happen (might not, but we
843                                         // cannot backtrack here). So switch
844                                         // to sequence of choice* here.
845                                         ProcessLax (ToSequenceOfChoice (s), ns);
846                                         return;
847                                 }
848                         }
849
850                         if (s.Items.Count <= position) {
851                                 QName name = new QName (source.LocalName,
852                                         source.NamespaceURI);
853                                 Element nel = CreateElement (name);
854                                 if (laxOccurrence)
855                                         nel.MinOccurs = 0;
856                                 InferElement (nel, ns, true);
857                                 if (ns == name.Namespace)
858                                         s.Items.Add (nel);
859                                 else {
860                                         Element re = new Element ();
861                                         if (laxOccurrence)
862                                                 re.MinOccurs = 0;
863                                         re.RefName = name;
864                                         AddImport (ns, name.Namespace);
865                                         s.Items.Add (re);
866                                 }
867                                 consumed = true;
868                                 return;
869                         }
870                         Element el = s.Items [position] as Element;
871                         if (el == null)
872                                 throw Error (s, String.Format ("Target complex type content sequence has an unacceptable type of particle {0}", s.Items [position]));
873                         bool matches = ElementMatches (el, ns);
874                         if (matches) {
875                                 if (consumed)
876                                         el.MaxOccursString = "unbounded";
877                                 InferElement (el, source.NamespaceURI, false);
878                                 source.MoveToContent ();
879                                 switch (source.NodeType) {
880                                 case XmlNodeType.None:
881                                         if (source.NodeType ==
882                                                 XmlNodeType.Element)
883                                                 goto case XmlNodeType.Element;
884                                         else if (source.NodeType ==
885                                                 XmlNodeType.EndElement)
886                                                 goto case XmlNodeType.EndElement;
887                                         break;
888                                 case XmlNodeType.Element:
889                                         ProcessSequence (ct, s, ns, ref position,
890                                                 ref consumed, isNew);
891                                         break;
892                                 case XmlNodeType.Text:
893                                 case XmlNodeType.CDATA:
894                                 case XmlNodeType.SignificantWhitespace:
895                                         MarkAsMixed (ct);
896                                         source.ReadString ();
897                                         goto case XmlNodeType.None;
898                                 case XmlNodeType.Whitespace:
899                                         source.ReadString ();
900                                         goto case XmlNodeType.None;
901                                 case XmlNodeType.EndElement:
902                                         return;
903                                 default:
904                                         source.Read ();
905                                         break;
906                                 }
907                         }
908                         else {
909                                 if (consumed) {
910                                         position++;
911                                         consumed = false;
912                                         ProcessSequence (ct, s, ns,
913                                                 ref position, ref consumed,
914                                                 isNew);
915                                 }
916                                 else
917                                         ProcessLax (ToSequenceOfChoice (s), ns);
918                         }
919                 }
920
921                 // Note that it does not return the changed sequence.
922                 private Choice ToSequenceOfChoice (Sequence s)
923                 {
924                         Choice c = new Choice ();
925                         if (laxOccurrence)
926                                 c.MinOccurs = 0;
927                         c.MaxOccursString = "unbounded";
928                         foreach (Particle p in s.Items)
929                                 c.Items.Add (p);
930                         s.Items.Clear ();
931                         s.Items.Add (c);
932                         return c;
933                 }
934
935                 // It makes complexType not to have Simple content model.
936                 private void ToComplexContentType (ComplexType type)
937                 {
938                         SimpleModel sm = type.ContentModel as SimpleModel;
939                         if (sm == null)
940                                 return;
941
942                         SOMList atts = GetAttributes (type);
943                         foreach (SOMObject o in atts)
944                                 type.Attributes.Add (o);
945                         // FIXME: need to copy AnyAttribute.
946                         // (though not considered right now)
947                         type.ContentModel = null;
948                         type.IsMixed = true;
949                 }
950
951                 private Sequence PopulateSequence (ComplexType ct)
952                 {
953                         Particle p = PopulateParticle (ct);
954                         Sequence s = p as Sequence;
955                         if (s != null)
956                                 return s;
957                         else
958                                 throw Error (ct, String.Format ("Target complexType contains unacceptable type of particle {0}", p));
959                 }
960
961                 private Sequence CreateSequence ()
962                 {
963                         Sequence s = new Sequence ();
964                         if (laxOccurrence)
965                                 s.MinOccurs = 0;
966                         return s;
967                 }
968
969                 private Particle PopulateParticle (ComplexType ct)
970                 {
971                         if (ct.ContentModel == null) {
972                                 if (ct.Particle == null)
973                                         ct.Particle = CreateSequence ();
974                                 return ct.Particle;
975                         }
976                         ComplexModel cm = ct.ContentModel as ComplexModel;
977                         if (cm != null) {
978                                 ComplexExt  ce = cm.Content as ComplexExt;
979                                 if (ce != null) {
980                                         if (ce.Particle == null)
981                                                 ce.Particle = CreateSequence ();
982                                         return ce.Particle;
983                                 }
984                                 ComplexRst cr = cm.Content as ComplexRst;
985                                 if (cr != null) {
986                                         if (cr.Particle == null)
987                                                 cr.Particle = CreateSequence ();
988                                         return cr.Particle;
989                                 }
990                         }
991                         throw Error (ct, "Schema inference internal error. The complexType should have been converted to have a complex content.");
992                 }
993
994                 #endregion
995
996                 #region String Value
997
998                 // primitive type inference.
999                 // When running lax type inference, it just returns xs:string.
1000                 private QName InferSimpleType (string value)
1001                 {
1002                         if (laxTypeInference)
1003                                 return QNameString;
1004
1005                         switch (value) {
1006                         // 0 and 1 are not infered as byte unlike MS.XSDInfer
1007 //                      case "0":
1008 //                      case "1":
1009                         case "true":
1010                         case "false":
1011                                 return QNameBoolean;
1012                         }
1013                         try {
1014                                 long dec = XmlConvert.ToInt64 (value);
1015                                 if (byte.MinValue <= dec && dec <= byte.MaxValue)
1016                                         return QNameUByte;
1017                                 if (sbyte.MinValue <= dec && dec <= sbyte.MaxValue)
1018                                         return QNameByte;
1019                                 if (ushort.MinValue <= dec && dec <= ushort.MaxValue)
1020                                         return QNameUShort;
1021                                 if (short.MinValue <= dec && dec <= short.MaxValue)
1022                                         return QNameShort;
1023                                 if (uint.MinValue <= dec && dec <= uint.MaxValue)
1024                                         return QNameUInt;
1025                                 if (int.MinValue <= dec && dec <= int.MaxValue)
1026                                         return QNameInt;
1027                                 return QNameLong;
1028                         } catch (Exception) {
1029                         }
1030                         try {
1031                                 XmlConvert.ToUInt64 (value);
1032                                 return QNameULong;
1033                         } catch (Exception) {
1034                         }
1035                         try {
1036                                 XmlConvert.ToDecimal (value);
1037                                 return QNameDecimal;
1038                         } catch (Exception) {
1039                         }
1040                         try {
1041                                 double dbl = XmlConvert.ToDouble (value);
1042                                 if (float.MinValue <= dbl &&
1043                                         dbl <= float.MaxValue)
1044                                         return QNameFloat;
1045                                 else
1046                                         return QNameDouble;
1047                         } catch (Exception) {
1048                         }
1049                         try {
1050                                 // FIXME: also try DateTimeSerializationMode
1051                                 // and gYearMonth
1052                                 XmlConvert.ToDateTime (value);
1053                                 return QNameDateTime;
1054                         } catch (Exception) {
1055                         }
1056                         try {
1057                                 XmlConvert.ToTimeSpan (value);
1058                                 return QNameDuration;
1059                         } catch (Exception) {
1060                         }
1061
1062                         // xs:string
1063                         return QNameString;
1064                 }
1065
1066                 #endregion
1067
1068                 #region Utilities
1069
1070                 private Element GetGlobalElement (QName name)
1071                 {
1072                         Element el = newElements [name] as Element;
1073                         if (el == null)
1074                                 el = schemas.GlobalElements [name] as Element;
1075                         return el;
1076                 }
1077
1078                 private Attr GetGlobalAttribute (QName name)
1079                 {
1080                         Attr a = newElements [name] as Attr;
1081                         if (a == null)
1082                                 a = schemas.GlobalAttributes [name] as Attr;
1083                         return a;
1084                 }
1085
1086                 private Element CreateElement (QName name)
1087                 {
1088                         Element el = new Element ();
1089                         el.Name = name.Name;
1090                         return el;
1091                 }
1092
1093                 private Element CreateGlobalElement (QName name)
1094                 {
1095                         Element el = CreateElement (name);
1096                         XmlSchema schema = PopulateSchema (name.Namespace);
1097                         schema.Items.Add (el);
1098                         newElements.Add (name, el);
1099                         return el;
1100                 }
1101
1102                 private Attr CreateGlobalAttribute (QName name)
1103                 {
1104                         Attr attr = new Attr ();
1105                         XmlSchema schema = PopulateSchema (name.Namespace);
1106                         attr.Name = name.Name;
1107                         schema.Items.Add (attr);
1108                         newAttributes.Add (name, attr);
1109                         return attr;
1110                 }
1111
1112                 // Note that the return value never assures that all the
1113                 // components in the parameter ns must reside in it.
1114                 private XmlSchema PopulateSchema (string ns)
1115                 {
1116                         ICollection list = schemas.Schemas (ns);
1117                         if (list.Count > 0) {
1118                                 IEnumerator e = list.GetEnumerator ();
1119                                 e.MoveNext ();
1120                                 return (XmlSchema) e.Current;
1121                         }
1122                         XmlSchema s = new XmlSchema ();
1123                         if (ns != null && ns.Length > 0)
1124                                 s.TargetNamespace = ns;
1125                         s.ElementFormDefault = Form.Qualified;
1126                         s.AttributeFormDefault = Form.Unqualified;
1127                         schemas.Add (s);
1128                         return s;
1129                 }
1130
1131                 private XmlSchemaInferenceException Error (
1132                         XmlSchemaObject sourceObj,
1133                         string message)
1134                 {
1135                         // This override is mainly for schema component error.
1136                         return Error (sourceObj, false, message);
1137                 }
1138
1139                 private XmlSchemaInferenceException Error (
1140                         XmlSchemaObject sourceObj,
1141                         bool useReader,
1142                         string message)
1143                 {
1144                         string msg = String.Concat (
1145                                 message,
1146                                 sourceObj != null ?
1147                                         String.Format (". Related schema component is {0}",
1148                                                 sourceObj.SourceUri,
1149                                                 sourceObj.LineNumber,
1150                                                 sourceObj.LinePosition) :
1151                                         String.Empty,
1152                                 useReader ?
1153                                         String.Format (". {0}", source.BaseURI) :
1154                                         String.Empty);
1155
1156                         IXmlLineInfo li = source as IXmlLineInfo;
1157                         if (useReader && li != null)
1158                                 return new XmlSchemaInferenceException (
1159                                         msg, null, li.LineNumber,
1160                                         li.LinePosition);
1161                         else
1162                                 return new XmlSchemaInferenceException (msg);
1163                 }
1164
1165                 #endregion
1166         }
1167 }
1168
1169 #endif