2009-05-25 Atsushi Enomoto <atsushi@ximian.com>
[mono.git] / mcs / class / System.ServiceModel / Mono.Xml.XPath / DTMXPathDocumentBuilder2.cs
1 //
2 // Mono.Xml.XPath.DTMXPathDocumentBuilder2
3 //
4 // Author:
5 //      Atsushi Enomoto  <atsushi@ximian.com>
6 //
7 // (C)2004 Novell Inc.
8 //
9
10 //
11 // Permission is hereby granted, free of charge, to any person obtaining
12 // a copy of this software and associated documentation files (the
13 // "Software"), to deal in the Software without restriction, including
14 // without limitation the rights to use, copy, modify, merge, publish,
15 // distribute, sublicense, and/or sell copies of the Software, and to
16 // permit persons to whom the Software is furnished to do so, subject to
17 // the following conditions:
18 // 
19 // The above copyright notice and this permission notice shall be
20 // included in all copies or substantial portions of the Software.
21 // 
22 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
23 // EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
24 // MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
25 // NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
26 // LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
27 // OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
28 // WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
29 //
30 #pragma warning disable 618
31
32 using System;
33 using System.Collections;
34 using System.IO;
35 using System.Xml;
36 using System.Xml.Schema;
37 using System.Xml.XPath;
38
39 namespace Mono.Xml.XPath
40 {
41 #if OUTSIDE_SYSTEM_XML
42         public
43 #else
44         internal
45 #endif
46         class DTMXPathDocumentBuilder2
47         {
48                 public DTMXPathDocumentBuilder2 (string url)
49                         : this (url, XmlSpace.None, 200)
50                 {
51                 }
52
53                 public DTMXPathDocumentBuilder2 (string url, XmlSpace space)
54                         : this (url, space, 200)
55                 {
56                 }
57
58                 public DTMXPathDocumentBuilder2 (string url, XmlSpace space, int defaultCapacity)
59                 {
60                         XmlReader r = null;
61                         try {
62                                 r = new XmlTextReader (url);
63                                 Init (r, space, defaultCapacity);
64                         } finally {
65                                 if (r != null)
66                                         r.Close ();
67                         }
68                 }
69
70                 public DTMXPathDocumentBuilder2 (XmlReader reader)
71                         : this (reader, XmlSpace.None, 200)
72                 {
73                 }
74
75                 public DTMXPathDocumentBuilder2 (XmlReader reader, XmlSpace space)
76                         : this (reader, space, 200)
77                 {
78                 }
79
80                 public DTMXPathDocumentBuilder2 (XmlReader reader, XmlSpace space, int defaultCapacity)
81                 {
82                         Init (reader, space, defaultCapacity);
83                 }
84
85                 private void Init (XmlReader reader, XmlSpace space, int defaultCapacity)
86                 {
87                         this.xmlReader = reader;
88                         this.validatingReader = reader as XmlValidatingReader;
89                         lineInfo = reader as IXmlLineInfo;
90                         this.xmlSpace = space;
91                         this.nameTable = reader.NameTable;
92                         nodeCapacity = defaultCapacity;
93                         attributeCapacity = nodeCapacity;
94                         nsCapacity = 10;
95                         idTable = new Hashtable ();
96
97                         nodes = new DTMXPathLinkedNode2 [nodeCapacity];
98                         attributes = new DTMXPathAttributeNode2 [attributeCapacity];
99                         namespaces = new DTMXPathNamespaceNode2 [nsCapacity];
100                         atomicStringPool = new string [20];
101                         nonAtomicStringPool = new string [20];
102
103                         Compile ();
104                 }
105                 
106                 XmlReader xmlReader;
107                 XmlValidatingReader validatingReader;
108                 XmlSpace xmlSpace;
109                 XmlNameTable nameTable;
110                 IXmlLineInfo lineInfo;
111                 int nodeCapacity;
112                 int attributeCapacity;
113                 int nsCapacity;
114
115                 // Linked Node
116                 DTMXPathLinkedNode2 [] nodes;
117
118                 // Attribute
119                 DTMXPathAttributeNode2 [] attributes;
120
121                 // NamespaceNode
122                 DTMXPathNamespaceNode2 [] namespaces;
123
124                 // String pool
125                 string [] atomicStringPool;
126                 int atomicIndex;
127                 string [] nonAtomicStringPool;
128                 int nonAtomicIndex;
129
130                 // idTable [string value] -> int nodeId
131                 Hashtable idTable;
132
133                 int nodeIndex;
134                 int attributeIndex;
135                 int nsIndex;
136
137                 // for attribute processing; should be reset per each element.
138                 bool hasAttributes;
139                 bool hasLocalNs;
140                 int attrIndexAtStart;
141                 int nsIndexAtStart;
142
143                 int lastNsInScope;
144                 bool skipRead = false;
145
146                 int [] parentStack = new int [10];
147                 int parentStackIndex = 0;
148
149                 public DTMXPathDocument2 CreateDocument ()
150                 {
151                         return new DTMXPathDocument2 (nameTable,
152                                 nodes,
153                                 attributes,
154                                 namespaces,
155                                 atomicStringPool,
156                                 nonAtomicStringPool,
157                                 idTable
158                         );
159                 }
160
161                 public void Compile ()
162                 {
163                         // string pool index 0 to 3 are fixed.
164                         atomicStringPool [0] = nonAtomicStringPool [0] = "";
165                         atomicStringPool [1] = nonAtomicStringPool [1] = null;
166                         atomicStringPool [2] = nonAtomicStringPool [2] = XmlNamespaces.XML;
167                         atomicStringPool [3] = nonAtomicStringPool [3] = XmlNamespaces.XMLNS;
168                         atomicIndex = nonAtomicIndex = 4;
169
170                         // index 0 is dummy. No node (including Root) is assigned to this index
171                         // So that we can easily compare index != 0 instead of index < 0.
172                         // (Difference between jnz or jbe in 80x86.)
173                         AddNode (0, 0, 0, XPathNodeType.All, 0, false, 0, 0, 0, 0, 0, 0, 0, 0);
174                         nodeIndex++;
175                         AddAttribute (0, 0, 0, 0, 0, 0, 0);
176                         AddNsNode (0, 0, 0, 0);
177                         nsIndex++;
178                         AddNsNode (1, AtomicIndex ("xml"), AtomicIndex (XmlNamespaces.XML), 0);
179
180                         // add root.
181                         AddNode (0, 0, 0, XPathNodeType.Root, AtomicIndex (xmlReader.BaseURI), false, 0, 0, 0, 0, 0, 1, 0, 0);
182
183                         this.nodeIndex = 1;
184                         this.lastNsInScope = 1;
185                         parentStack [0] = nodeIndex;
186
187                         // LAMESPEC: it should not read more than one top-level element, but .NET sucks here. See bug #81932
188                         while (!xmlReader.EOF && parentStackIndex >= 0)
189                                 Read ();
190                         SetNodeArrayLength (nodeIndex + 1);
191                         SetAttributeArrayLength (attributeIndex + 1);
192                         SetNsArrayLength (nsIndex + 1);
193
194                         string [] newArr = new string [atomicIndex];
195                         Array.Copy (atomicStringPool, newArr, atomicIndex);
196                         atomicStringPool = newArr;
197
198                         newArr = new string [nonAtomicIndex];
199                         Array.Copy (nonAtomicStringPool, newArr, nonAtomicIndex);
200                         nonAtomicStringPool = newArr;
201
202                         xmlReader = null;       // It is no more required.
203                 }
204
205                 public void Read ()
206                 {
207                         if (!skipRead)
208                                 if (!xmlReader.Read ())
209                                         return;
210                         skipRead = false;
211                         int parent = parentStack [parentStackIndex];
212                         int prevSibling = nodeIndex;
213
214                         switch (xmlReader.NodeType) {
215                         case XmlNodeType.Element:
216                         case XmlNodeType.CDATA:
217                         case XmlNodeType.SignificantWhitespace:
218                         case XmlNodeType.Comment:
219                         case XmlNodeType.Text:
220                         case XmlNodeType.ProcessingInstruction:
221                                 if (parent == nodeIndex)
222                                         prevSibling = 0;
223                                 else
224                                         while (nodes [prevSibling].Parent != parent)
225                                                 prevSibling = nodes [prevSibling].Parent;
226
227                                 nodeIndex++;
228
229                                 if (prevSibling != 0)
230                                         nodes [prevSibling].NextSibling = nodeIndex;
231                                 if (parentStack [parentStackIndex] == nodeIndex - 1)
232                                         nodes [parent].FirstChild = nodeIndex;
233                                 break;
234                         case XmlNodeType.Whitespace:
235                                 if (xmlSpace == XmlSpace.Preserve)
236                                         goto case XmlNodeType.Text;
237                                 else
238                                         goto default;
239                         case XmlNodeType.EndElement:
240                                 int endedNode = parentStack [parentStackIndex];
241                                 AdjustLastNsInScope (endedNode);
242                                 parentStackIndex--;
243                                 return;
244                         default:
245                                 // No operations. Doctype, EntityReference, 
246                                 return;
247                         }
248
249                         string value = null;
250                         XPathNodeType nodeType = XPathNodeType.Text;
251
252                         switch (xmlReader.NodeType) {
253                         case XmlNodeType.Element:
254                                 ProcessElement (parent, prevSibling);
255                                 break;
256                         case XmlNodeType.SignificantWhitespace:
257                                 nodeType = XPathNodeType.SignificantWhitespace;
258                                 goto case XmlNodeType.Text;
259                         case XmlNodeType.Whitespace:
260                                 nodeType = XPathNodeType.Whitespace;
261                                 goto case XmlNodeType.Text;
262                         case XmlNodeType.CDATA:
263                         case XmlNodeType.Text:
264                                 AddNode (parent,
265                                         0,
266                                         prevSibling,
267                                         nodeType,
268                                         AtomicIndex (xmlReader.BaseURI),
269                                         xmlReader.IsEmptyElement,
270                                         AtomicIndex (xmlReader.LocalName),      // for PI
271                                         AtomicIndex (xmlReader.NamespaceURI),   // for PI
272                                         AtomicIndex (xmlReader.Prefix),
273                                         value == null ? 0 : NonAtomicIndex (value),
274                                         AtomicIndex (xmlReader.XmlLang),
275                                         nsIndex,
276                                         lineInfo != null ? lineInfo.LineNumber : 0,
277                                         lineInfo != null ? lineInfo.LinePosition : 0);
278                                 // this code is tricky, but after sequential
279                                 // Read() invokation, xmlReader is moved to
280                                 // next node.
281                                 if (value == null) {
282                                         bool loop = true;
283                                         value = String.Empty;
284                                         XPathNodeType type = XPathNodeType.Whitespace;
285                                         do {
286                                                 switch (xmlReader.NodeType) {
287                                                 case XmlNodeType.Text:
288                                                 case XmlNodeType.CDATA:
289                                                         type = XPathNodeType.Text;
290                                                         goto case XmlNodeType.Whitespace;
291                                                 case XmlNodeType.SignificantWhitespace:
292                                                         if (type == XPathNodeType.Whitespace)
293                                                                 type = XPathNodeType.SignificantWhitespace;
294                                                         goto case XmlNodeType.Whitespace;
295                                                 case XmlNodeType.Whitespace:
296                                                         if (xmlReader.NodeType != XmlNodeType.Whitespace || xmlSpace == XmlSpace.Preserve)
297                                                                 value += xmlReader.Value;
298                                                         loop = xmlReader.Read ();
299                                                         skipRead = true;
300                                                         continue;
301                                                 default:
302                                                         loop = false;
303                                                         break;
304                                                 }
305                                         } while (loop);
306                                         nodes [nodeIndex].Value = NonAtomicIndex (value);
307                                         nodes [nodeIndex].NodeType = type;
308                                 }
309                                 break;
310                         case XmlNodeType.Comment:
311                                 value = xmlReader.Value;
312                                 nodeType = XPathNodeType.Comment;
313                                 goto case XmlNodeType.Text;
314                         case XmlNodeType.ProcessingInstruction:
315                                 value = xmlReader.Value;
316                                 nodeType = XPathNodeType.ProcessingInstruction;
317                                 goto case XmlNodeType.Text;
318                         }
319                 }
320
321                 private void ProcessElement (int parent, int previousSibling)
322                 {
323                         WriteStartElement (parent, previousSibling);
324
325                         // process namespaces and attributes.
326                         if (xmlReader.MoveToFirstAttribute ()) {
327                                 do {
328                                         string prefix = xmlReader.Prefix;
329                                         string ns = xmlReader.NamespaceURI;
330                                         if (ns == XmlNamespaces.XMLNS)
331                                                 ProcessNamespace ((prefix == null || prefix == String.Empty) ? "" : xmlReader.LocalName, xmlReader.Value);
332                                         else
333                                                 ProcessAttribute (prefix, xmlReader.LocalName, ns, xmlReader.Value);
334
335                                 } while (xmlReader.MoveToNextAttribute ());
336                                 xmlReader.MoveToElement ();
337                         }
338
339                         CloseStartElement ();
340                 }
341
342                 private void PrepareStartElement (int previousSibling)
343                 {
344                         hasAttributes = false;
345                         hasLocalNs = false;
346                         attrIndexAtStart = attributeIndex;
347                         nsIndexAtStart = nsIndex;
348                         AdjustLastNsInScope (previousSibling);
349                 }
350
351                 private void AdjustLastNsInScope (int target)
352                 {
353                         while (namespaces [lastNsInScope].DeclaredElement == target) {
354                                 lastNsInScope = namespaces [lastNsInScope].NextNamespace;
355                         }
356                 }
357
358                 private void WriteStartElement (int parent, int previousSibling)
359                 {
360                         PrepareStartElement (previousSibling);
361
362                         AddNode (parent,
363                                 0, // dummy:firstAttribute
364                                 previousSibling,
365                                 XPathNodeType.Element,
366                                 AtomicIndex (xmlReader.BaseURI),
367                                 xmlReader.IsEmptyElement,
368                                 AtomicIndex (xmlReader.LocalName),
369                                 AtomicIndex (xmlReader.NamespaceURI),
370                                 AtomicIndex (xmlReader.Prefix),
371                                 0,      // Element has no internal value.
372                                 AtomicIndex (xmlReader.XmlLang),
373                                 lastNsInScope,
374                                 lineInfo != null ? lineInfo.LineNumber : 0,
375                                 lineInfo != null ? lineInfo.LinePosition : 0);
376
377                 }
378
379                 private void CloseStartElement ()
380                 {
381                         if (attrIndexAtStart != attributeIndex)
382                                 nodes [nodeIndex].FirstAttribute = attrIndexAtStart + 1;
383                         if (nsIndexAtStart != nsIndex) {
384                                 nodes [nodeIndex].FirstNamespace = nsIndex;
385                                 if (!xmlReader.IsEmptyElement)
386                                         lastNsInScope = nsIndex;
387                         }
388
389                         if (!nodes [nodeIndex].IsEmptyElement) {
390                                 parentStackIndex++;
391                                 if (parentStack.Length == parentStackIndex) {
392                                         int [] tmp = new int [parentStackIndex * 2];
393                                         Array.Copy (parentStack, tmp, parentStackIndex);
394                                         parentStack = tmp;
395                                 }
396                                 parentStack [parentStackIndex] = nodeIndex;
397                         }
398                 }
399
400                 private void ProcessNamespace (string prefix, string ns)
401                 {
402                         int nextTmp = hasLocalNs ?
403                                 nsIndex : nodes [nodeIndex].FirstNamespace;
404
405                         nsIndex++;
406
407                         this.AddNsNode (nodeIndex,
408                                 AtomicIndex (prefix),
409                                 AtomicIndex (ns),
410                                 nextTmp);
411                         hasLocalNs = true;
412                 }
413
414                 private void ProcessAttribute (string prefix, string localName, string ns, string value)
415                 {
416                         attributeIndex ++;
417
418                         this.AddAttribute (nodeIndex,
419                                 AtomicIndex (localName),
420                                 AtomicIndex (ns), 
421                                 prefix != null ? AtomicIndex (prefix) : 0, 
422                                 NonAtomicIndex (value),
423                                 lineInfo != null ? lineInfo.LineNumber : 0,
424                                 lineInfo != null ? lineInfo.LinePosition : 0);
425                         if (hasAttributes)
426                                 attributes [attributeIndex - 1].NextAttribute = attributeIndex;
427                         else
428                                 hasAttributes = true;
429
430                         // Identity infoset
431                         if (validatingReader != null) {
432                                 XmlSchemaDatatype dt = validatingReader.SchemaType as XmlSchemaDatatype;
433                                 if (dt == null) {
434                                         XmlSchemaType xsType = validatingReader.SchemaType as XmlSchemaType;
435                                         if (xsType != null)
436                                                 dt = xsType.Datatype;
437                                 }
438                                 if (dt != null && dt.TokenizedType == XmlTokenizedType.ID)
439                                         idTable.Add (value, nodeIndex);
440                         }
441                 }
442
443                 private int AtomicIndex (string s)
444                 {
445                         if (s == "")
446                                 return 0;
447                         if (s == null)
448                                 return 1;
449                         int i = 2;
450                         for (; i < atomicIndex; i++)
451                                 if (Object.ReferenceEquals (s, atomicStringPool [i]))
452                                         return i;
453
454                         if (atomicIndex == atomicStringPool.Length) {
455                                 string [] newArr = new string [atomicIndex * 2];
456                                 Array.Copy (atomicStringPool, newArr, atomicIndex);
457                                 atomicStringPool = newArr;
458                         }
459                         atomicStringPool [atomicIndex] = s;
460                         return atomicIndex++;
461                 }
462
463                 private int NonAtomicIndex (string s)
464                 {
465                         if (s == "")
466                                 return 0;
467                         if (s == null)
468                                 return 1;
469                         int i = 2;
470
471                         // Here we don't compare all the entries (sometimes it
472                         // goes extremely slow).
473                         int max = nonAtomicIndex < 100 ? nonAtomicIndex : 100;
474                         for (; i < max; i++)
475                                 if (s == nonAtomicStringPool [i])
476                                         return i;
477
478                         if (nonAtomicIndex == nonAtomicStringPool.Length) {
479                                 string [] newArr = new string [nonAtomicIndex * 2];
480                                 Array.Copy (nonAtomicStringPool, newArr, nonAtomicIndex);
481                                 nonAtomicStringPool = newArr;
482                         }
483                         nonAtomicStringPool [nonAtomicIndex] = s;
484                         return nonAtomicIndex++;
485                 }
486
487                 private void SetNodeArrayLength (int size)
488                 {
489                         DTMXPathLinkedNode2 [] newArr = new DTMXPathLinkedNode2 [size];
490                         Array.Copy (nodes, newArr, System.Math.Min (size, nodes.Length));
491                         nodes = newArr;
492                 }
493
494                 private void SetAttributeArrayLength (int size)
495                 {
496                         DTMXPathAttributeNode2 [] newArr = 
497                                 new DTMXPathAttributeNode2 [size];
498                         Array.Copy (attributes, newArr, System.Math.Min (size, attributes.Length));
499                         attributes = newArr;
500                 }
501
502                 private void SetNsArrayLength (int size)
503                 {
504                         DTMXPathNamespaceNode2 [] newArr =
505                                 new DTMXPathNamespaceNode2 [size];
506                         Array.Copy (namespaces, newArr, System.Math.Min (size, namespaces.Length));
507                         namespaces = newArr;
508                 }
509
510                 // Here followings are skipped: firstChild, nextSibling, 
511                 public void AddNode (int parent, int firstAttribute, int previousSibling, XPathNodeType nodeType, int baseUri, bool isEmptyElement, int localName, int ns, int prefix, int value, int xmlLang, int namespaceNode, int lineNumber, int linePosition)
512                 {
513                         if (nodes.Length < nodeIndex + 1) {
514                                 nodeCapacity *= 4;
515                                 SetNodeArrayLength (nodeCapacity);
516                         }
517
518 #if DTM_CLASS
519                         nodes [nodeIndex] = new DTMXPathLinkedNode2 ();
520 #endif
521                         nodes [nodeIndex].FirstChild = 0;               // dummy
522                         nodes [nodeIndex].Parent = parent;
523                         nodes [nodeIndex].FirstAttribute = firstAttribute;
524                         nodes [nodeIndex].PreviousSibling = previousSibling;
525                         nodes [nodeIndex].NextSibling = 0;      // dummy
526                         nodes [nodeIndex].NodeType = nodeType;
527                         nodes [nodeIndex].BaseURI = baseUri;
528                         nodes [nodeIndex].IsEmptyElement = isEmptyElement;
529                         nodes [nodeIndex].LocalName = localName;
530                         nodes [nodeIndex].NamespaceURI = ns;
531                         nodes [nodeIndex].Prefix = prefix;
532                         nodes [nodeIndex].Value = value;
533                         nodes [nodeIndex].XmlLang = xmlLang;
534                         nodes [nodeIndex].FirstNamespace = namespaceNode;
535                         nodes [nodeIndex].LineNumber = lineNumber;
536                         nodes [nodeIndex].LinePosition = linePosition;
537                 }
538
539                 // Followings are skipped: nextAttribute,
540                 public void AddAttribute (int ownerElement, int localName, int ns, int prefix, int value, int lineNumber, int linePosition)
541                 {
542                         if (attributes.Length < attributeIndex + 1) {
543                                 attributeCapacity *= 4;
544                                 SetAttributeArrayLength (attributeCapacity);
545                         }
546
547 #if DTM_CLASS
548                         attributes [attributeIndex] = new DTMXPathAttributeNode2 ();
549 #endif
550                         attributes [attributeIndex].OwnerElement = ownerElement;
551                         attributes [attributeIndex].LocalName = localName;
552                         attributes [attributeIndex].NamespaceURI = ns;
553                         attributes [attributeIndex].Prefix = prefix;
554                         attributes [attributeIndex].Value = value;
555                         attributes [attributeIndex].LineNumber = lineNumber;
556                         attributes [attributeIndex].LinePosition = linePosition;
557                 }
558
559                 // Followings are skipped: nextNsNode (may be next attribute in the same element, or ancestors' nsNode)
560                 public void AddNsNode (int declaredElement, int name, int ns, int nextNs)
561                 {
562                         if (namespaces.Length < nsIndex + 1) {
563                                 nsCapacity *= 4;
564                                 SetNsArrayLength (nsCapacity);
565                         }
566
567 #if DTM_CLASS
568                         namespaces [nsIndex] = new DTMXPathNamespaceNode2 ();
569 #endif
570                         namespaces [nsIndex].DeclaredElement = declaredElement;
571                         namespaces [nsIndex].Name = name;
572                         namespaces [nsIndex].Namespace = ns;
573                         namespaces [nsIndex].NextNamespace = nextNs;
574                 }
575         }
576 }
577