2006-07-27 Atsushi Enomoto <atsushi@ximian.com>
[mono.git] / mcs / class / System.XML / Mono.Xml.XPath / DTMXPathDocumentBuilder2.cs
1 //
2 // Mono.Xml.XPath.DTMXPathDocumentBuilder2
3 //
4 // Author:
5 //      Atsushi Enomoto  <atsushi@ximian.com>
6 //
7 // (C)2004 Novell Inc.
8 //
9
10 //
11 // Permission is hereby granted, free of charge, to any person obtaining
12 // a copy of this software and associated documentation files (the
13 // "Software"), to deal in the Software without restriction, including
14 // without limitation the rights to use, copy, modify, merge, publish,
15 // distribute, sublicense, and/or sell copies of the Software, and to
16 // permit persons to whom the Software is furnished to do so, subject to
17 // the following conditions:
18 // 
19 // The above copyright notice and this permission notice shall be
20 // included in all copies or substantial portions of the Software.
21 // 
22 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
23 // EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
24 // MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
25 // NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
26 // LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
27 // OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
28 // WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
29 //
30 using System;
31 using System.Collections;
32 using System.IO;
33 using System.Xml;
34 using System.Xml.Schema;
35 using System.Xml.XPath;
36
37 namespace Mono.Xml.XPath
38 {
39
40 #if OUTSIDE_SYSTEM_XML
41         public
42 #else
43         internal
44 #endif
45         class DTMXPathDocumentBuilder2
46         {
47                 public DTMXPathDocumentBuilder2 (string url)
48                         : this (url, XmlSpace.None, 200)
49                 {
50                 }
51
52                 public DTMXPathDocumentBuilder2 (string url, XmlSpace space)
53                         : this (url, space, 200)
54                 {
55                 }
56
57                 public DTMXPathDocumentBuilder2 (string url, XmlSpace space, int defaultCapacity)
58                 {
59                         XmlReader r = null;
60                         try {
61                                 r = new XmlTextReader (url);
62                                 Init (r, space, defaultCapacity);
63                         } finally {
64                                 if (r != null)
65                                         r.Close ();
66                         }
67                 }
68
69                 public DTMXPathDocumentBuilder2 (XmlReader reader)
70                         : this (reader, XmlSpace.None, 200)
71                 {
72                 }
73
74                 public DTMXPathDocumentBuilder2 (XmlReader reader, XmlSpace space)
75                         : this (reader, space, 200)
76                 {
77                 }
78
79                 public DTMXPathDocumentBuilder2 (XmlReader reader, XmlSpace space, int defaultCapacity)
80                 {
81                         Init (reader, space, defaultCapacity);
82                 }
83
84                 private void Init (XmlReader reader, XmlSpace space, int defaultCapacity)
85                 {
86                         this.xmlReader = reader;
87                         this.validatingReader = reader as XmlValidatingReader;
88                         lineInfo = reader as IXmlLineInfo;
89                         this.xmlSpace = space;
90                         this.nameTable = reader.NameTable;
91                         nodeCapacity = defaultCapacity;
92                         attributeCapacity = nodeCapacity;
93                         nsCapacity = 10;
94                         idTable = new Hashtable ();
95
96                         nodes = new DTMXPathLinkedNode2 [nodeCapacity];
97                         attributes = new DTMXPathAttributeNode2 [attributeCapacity];
98                         namespaces = new DTMXPathNamespaceNode2 [nsCapacity];
99                         atomicStringPool = new string [20];
100                         nonAtomicStringPool = new string [20];
101
102                         Compile ();
103                 }
104                 
105                 XmlReader xmlReader;
106                 XmlValidatingReader validatingReader;
107                 XmlSpace xmlSpace;
108                 XmlNameTable nameTable;
109                 IXmlLineInfo lineInfo;
110                 int nodeCapacity;
111                 int attributeCapacity;
112                 int nsCapacity;
113
114                 // Linked Node
115                 DTMXPathLinkedNode2 [] nodes;
116
117                 // Attribute
118                 DTMXPathAttributeNode2 [] attributes;
119
120                 // NamespaceNode
121                 DTMXPathNamespaceNode2 [] namespaces;
122
123                 // String pool
124                 string [] atomicStringPool;
125                 int atomicIndex;
126                 string [] nonAtomicStringPool;
127                 int nonAtomicIndex;
128
129                 // idTable [string value] -> int nodeId
130                 Hashtable idTable;
131
132                 int nodeIndex;
133                 int attributeIndex;
134                 int nsIndex;
135
136                 // for attribute processing; should be reset per each element.
137                 bool hasAttributes;
138                 bool hasLocalNs;
139                 int attrIndexAtStart;
140                 int nsIndexAtStart;
141
142                 int lastNsInScope;
143                 bool skipRead = false;
144
145                 int [] parentStack = new int [10];
146                 int parentStackIndex = 0;
147
148                 public DTMXPathDocument2 CreateDocument ()
149                 {
150                         return new DTMXPathDocument2 (nameTable,
151                                 nodes,
152                                 attributes,
153                                 namespaces,
154                                 atomicStringPool,
155                                 nonAtomicStringPool,
156                                 idTable
157                         );
158                 }
159
160                 public void Compile ()
161                 {
162                         // string pool index 0 to 3 are fixed.
163                         atomicStringPool [0] = nonAtomicStringPool [0] = "";
164                         atomicStringPool [1] = nonAtomicStringPool [1] = null;
165                         atomicStringPool [2] = nonAtomicStringPool [2] = XmlNamespaces.XML;
166                         atomicStringPool [3] = nonAtomicStringPool [3] = XmlNamespaces.XMLNS;
167                         atomicIndex = nonAtomicIndex = 4;
168
169                         // index 0 is dummy. No node (including Root) is assigned to this index
170                         // So that we can easily compare index != 0 instead of index < 0.
171                         // (Difference between jnz or jbe in 80x86.)
172                         AddNode (0, 0, 0, XPathNodeType.All, 0, false, 0, 0, 0, 0, 0, 0, 0, 0);
173                         nodeIndex++;
174                         AddAttribute (0, 0, 0, 0, 0, 0, 0);
175                         AddNsNode (0, 0, 0, 0);
176                         nsIndex++;
177                         AddNsNode (1, AtomicIndex ("xml"), AtomicIndex (XmlNamespaces.XML), 0);
178
179                         // add root.
180                         AddNode (0, 0, 0, XPathNodeType.Root, AtomicIndex (xmlReader.BaseURI), false, 0, 0, 0, 0, 0, 1, 0, 0);
181
182                         this.nodeIndex = 1;
183                         this.lastNsInScope = 1;
184                         parentStack [0] = nodeIndex;
185
186                         while (!xmlReader.EOF)
187                                 Read ();
188                         SetNodeArrayLength (nodeIndex + 1);
189                         SetAttributeArrayLength (attributeIndex + 1);
190                         SetNsArrayLength (nsIndex + 1);
191
192                         string [] newArr = new string [atomicIndex];
193                         Array.Copy (atomicStringPool, newArr, atomicIndex);
194                         atomicStringPool = newArr;
195
196                         newArr = new string [nonAtomicIndex];
197                         Array.Copy (nonAtomicStringPool, newArr, nonAtomicIndex);
198                         nonAtomicStringPool = newArr;
199
200                         xmlReader = null;       // It is no more required.
201                 }
202
203                 public void Read ()
204                 {
205                         if (!skipRead)
206                                 if (!xmlReader.Read ())
207                                         return;
208                         skipRead = false;
209                         int parent = parentStack [parentStackIndex];
210                         int prevSibling = nodeIndex;
211
212                         switch (xmlReader.NodeType) {
213                         case XmlNodeType.Element:
214                         case XmlNodeType.CDATA:
215                         case XmlNodeType.SignificantWhitespace:
216                         case XmlNodeType.Comment:
217                         case XmlNodeType.Text:
218                         case XmlNodeType.ProcessingInstruction:
219                                 if (parent == nodeIndex)
220                                         prevSibling = 0;
221                                 else
222                                         while (nodes [prevSibling].Parent != parent)
223                                                 prevSibling = nodes [prevSibling].Parent;
224
225                                 nodeIndex++;
226
227                                 if (prevSibling != 0)
228                                         nodes [prevSibling].NextSibling = nodeIndex;
229                                 if (parentStack [parentStackIndex] == nodeIndex - 1)
230                                         nodes [parent].FirstChild = nodeIndex;
231                                 break;
232                         case XmlNodeType.Whitespace:
233                                 if (xmlSpace == XmlSpace.Preserve)
234                                         goto case XmlNodeType.Text;
235                                 else
236                                         goto default;
237                         case XmlNodeType.EndElement:
238                                 parentStackIndex--;
239                                 return;
240                         default:
241                                 // No operations. Doctype, EntityReference, 
242                                 return;
243                         }
244
245                         string value = null;
246                         XPathNodeType nodeType = XPathNodeType.Text;
247
248                         switch (xmlReader.NodeType) {
249                         case XmlNodeType.Element:
250                                 ProcessElement (parent, prevSibling);
251                                 break;
252                         case XmlNodeType.SignificantWhitespace:
253                                 nodeType = XPathNodeType.SignificantWhitespace;
254                                 goto case XmlNodeType.Text;
255                         case XmlNodeType.Whitespace:
256                                 nodeType = XPathNodeType.Whitespace;
257                                 goto case XmlNodeType.Text;
258                         case XmlNodeType.CDATA:
259                         case XmlNodeType.Text:
260                                 AddNode (parent,
261                                         0,
262                                         prevSibling,
263                                         nodeType,
264                                         AtomicIndex (xmlReader.BaseURI),
265                                         xmlReader.IsEmptyElement,
266                                         AtomicIndex (xmlReader.LocalName),      // for PI
267                                         AtomicIndex (xmlReader.NamespaceURI),   // for PI
268                                         AtomicIndex (xmlReader.Prefix),
269                                         value == null ? 0 : NonAtomicIndex (value),
270                                         AtomicIndex (xmlReader.XmlLang),
271                                         nsIndex,
272                                         lineInfo != null ? lineInfo.LineNumber : 0,
273                                         lineInfo != null ? lineInfo.LinePosition : 0);
274                                 // this code is tricky, but after sequential
275                                 // Read() invokation, xmlReader is moved to
276                                 // next node.
277                                 if (value == null) {
278                                         bool loop = true;
279                                         value = String.Empty;
280                                         XPathNodeType type = XPathNodeType.Whitespace;
281                                         do {
282                                                 switch (xmlReader.NodeType) {
283                                                 case XmlNodeType.Text:
284                                                 case XmlNodeType.CDATA:
285                                                         type = XPathNodeType.Text;
286                                                         goto case XmlNodeType.Whitespace;
287                                                 case XmlNodeType.SignificantWhitespace:
288                                                         if (type == XPathNodeType.Whitespace)
289                                                                 type = XPathNodeType.SignificantWhitespace;
290                                                         goto case XmlNodeType.Whitespace;
291                                                 case XmlNodeType.Whitespace:
292                                                         if (xmlReader.NodeType != XmlNodeType.Whitespace || xmlSpace == XmlSpace.Preserve)
293                                                                 value += xmlReader.Value;
294                                                         loop = xmlReader.Read ();
295                                                         skipRead = true;
296                                                         continue;
297                                                 default:
298                                                         loop = false;
299                                                         break;
300                                                 }
301                                         } while (loop);
302                                         nodes [nodeIndex].Value = NonAtomicIndex (value);
303                                         nodes [nodeIndex].NodeType = type;
304                                 }
305                                 break;
306                         case XmlNodeType.Comment:
307                                 value = xmlReader.Value;
308                                 nodeType = XPathNodeType.Comment;
309                                 goto case XmlNodeType.Text;
310                         case XmlNodeType.ProcessingInstruction:
311                                 value = xmlReader.Value;
312                                 nodeType = XPathNodeType.ProcessingInstruction;
313                                 goto case XmlNodeType.Text;
314                         }
315                 }
316
317                 private void ProcessElement (int parent, int previousSibling)
318                 {
319                         WriteStartElement (parent, previousSibling);
320
321                         // process namespaces and attributes.
322                         if (xmlReader.MoveToFirstAttribute ()) {
323                                 do {
324                                         string prefix = xmlReader.Prefix;
325                                         string ns = xmlReader.NamespaceURI;
326                                         if (ns == XmlNamespaces.XMLNS)
327                                                 ProcessNamespace ((prefix == null || prefix == String.Empty) ? "" : xmlReader.LocalName, xmlReader.Value);
328                                         else
329                                                 ProcessAttribute (prefix, xmlReader.LocalName, ns, xmlReader.Value);
330
331                                 } while (xmlReader.MoveToNextAttribute ());
332                                 xmlReader.MoveToElement ();
333                         }
334
335                         CloseStartElement ();
336                 }
337
338                 private void PrepareStartElement (int previousSibling)
339                 {
340                         hasAttributes = false;
341                         hasLocalNs = false;
342                         attrIndexAtStart = attributeIndex;
343                         nsIndexAtStart = nsIndex;
344
345                         while (namespaces [lastNsInScope].DeclaredElement == previousSibling) {
346                                 lastNsInScope = namespaces [lastNsInScope].NextNamespace;
347                         }
348                 }
349
350                 private void WriteStartElement (int parent, int previousSibling)
351                 {
352                         PrepareStartElement (previousSibling);
353
354                         AddNode (parent,
355                                 0, // dummy:firstAttribute
356                                 previousSibling,
357                                 XPathNodeType.Element,
358                                 AtomicIndex (xmlReader.BaseURI),
359                                 xmlReader.IsEmptyElement,
360                                 AtomicIndex (xmlReader.LocalName),
361                                 AtomicIndex (xmlReader.NamespaceURI),
362                                 AtomicIndex (xmlReader.Prefix),
363                                 0,      // Element has no internal value.
364                                 AtomicIndex (xmlReader.XmlLang),
365                                 lastNsInScope,
366                                 lineInfo != null ? lineInfo.LineNumber : 0,
367                                 lineInfo != null ? lineInfo.LinePosition : 0);
368
369                 }
370
371                 private void CloseStartElement ()
372                 {
373                         if (attrIndexAtStart != attributeIndex)
374                                 nodes [nodeIndex].FirstAttribute = attrIndexAtStart + 1;
375                         if (nsIndexAtStart != nsIndex) {
376                                 nodes [nodeIndex].FirstNamespace = nsIndex;
377                                 if (!xmlReader.IsEmptyElement)
378                                         lastNsInScope = nsIndex;
379                         }
380
381                         if (!nodes [nodeIndex].IsEmptyElement) {
382                                 parentStackIndex++;
383                                 if (parentStack.Length == parentStackIndex) {
384                                         int [] tmp = new int [parentStackIndex * 2];
385                                         Array.Copy (parentStack, tmp, parentStackIndex);
386                                         parentStack = tmp;
387                                 }
388                                 parentStack [parentStackIndex] = nodeIndex;
389                         }
390                 }
391
392                 private void ProcessNamespace (string prefix, string ns)
393                 {
394                         int nextTmp = hasLocalNs ?
395                                 nsIndex : nodes [nodeIndex].FirstNamespace;
396
397                         nsIndex++;
398
399                         this.AddNsNode (nodeIndex,
400                                 AtomicIndex (prefix),
401                                 AtomicIndex (ns),
402                                 nextTmp);
403                         hasLocalNs = true;
404                 }
405
406                 private void ProcessAttribute (string prefix, string localName, string ns, string value)
407                 {
408                         attributeIndex ++;
409
410                         this.AddAttribute (nodeIndex,
411                                 AtomicIndex (localName),
412                                 AtomicIndex (ns), 
413                                 prefix != null ? AtomicIndex (prefix) : 0, 
414                                 NonAtomicIndex (value),
415                                 lineInfo != null ? lineInfo.LineNumber : 0,
416                                 lineInfo != null ? lineInfo.LinePosition : 0);
417                         if (hasAttributes)
418                                 attributes [attributeIndex - 1].NextAttribute = attributeIndex;
419                         else
420                                 hasAttributes = true;
421
422                         // Identity infoset
423                         if (validatingReader != null) {
424                                 XmlSchemaDatatype dt = validatingReader.SchemaType as XmlSchemaDatatype;
425                                 if (dt == null) {
426                                         XmlSchemaType xsType = validatingReader.SchemaType as XmlSchemaType;
427                                         if (xsType != null)
428                                                 dt = xsType.Datatype;
429                                 }
430                                 if (dt != null && dt.TokenizedType == XmlTokenizedType.ID)
431                                         idTable.Add (value, nodeIndex);
432                         }
433                 }
434
435                 private int AtomicIndex (string s)
436                 {
437                         if (s == "")
438                                 return 0;
439                         if (s == null)
440                                 return 1;
441                         int i = 2;
442                         for (; i < atomicIndex; i++)
443                                 if (Object.ReferenceEquals (s, atomicStringPool [i]))
444                                         return i;
445
446                         if (atomicIndex == atomicStringPool.Length) {
447                                 string [] newArr = new string [atomicIndex * 2];
448                                 Array.Copy (atomicStringPool, newArr, atomicIndex);
449                                 atomicStringPool = newArr;
450                         }
451                         atomicStringPool [atomicIndex] = s;
452                         return atomicIndex++;
453                 }
454
455                 private int NonAtomicIndex (string s)
456                 {
457                         if (s == "")
458                                 return 0;
459                         if (s == null)
460                                 return 1;
461                         int i = 2;
462
463                         // Here we don't compare all the entries (sometimes it
464                         // goes extremely slow).
465                         int max = nonAtomicIndex < 100 ? nonAtomicIndex : 100;
466                         for (; i < max; i++)
467                                 if (s == nonAtomicStringPool [i])
468                                         return i;
469
470                         if (nonAtomicIndex == nonAtomicStringPool.Length) {
471                                 string [] newArr = new string [nonAtomicIndex * 2];
472                                 Array.Copy (nonAtomicStringPool, newArr, nonAtomicIndex);
473                                 nonAtomicStringPool = newArr;
474                         }
475                         nonAtomicStringPool [nonAtomicIndex] = s;
476                         return nonAtomicIndex++;
477                 }
478
479                 private void SetNodeArrayLength (int size)
480                 {
481                         DTMXPathLinkedNode2 [] newArr = new DTMXPathLinkedNode2 [size];
482                         Array.Copy (nodes, newArr, System.Math.Min (size, nodes.Length));
483                         nodes = newArr;
484                 }
485
486                 private void SetAttributeArrayLength (int size)
487                 {
488                         DTMXPathAttributeNode2 [] newArr = 
489                                 new DTMXPathAttributeNode2 [size];
490                         Array.Copy (attributes, newArr, System.Math.Min (size, attributes.Length));
491                         attributes = newArr;
492                 }
493
494                 private void SetNsArrayLength (int size)
495                 {
496                         DTMXPathNamespaceNode2 [] newArr =
497                                 new DTMXPathNamespaceNode2 [size];
498                         Array.Copy (namespaces, newArr, System.Math.Min (size, namespaces.Length));
499                         namespaces = newArr;
500                 }
501
502                 // Here followings are skipped: firstChild, nextSibling, 
503                 public void AddNode (int parent, int firstAttribute, int previousSibling, XPathNodeType nodeType, int baseUri, bool isEmptyElement, int localName, int ns, int prefix, int value, int xmlLang, int namespaceNode, int lineNumber, int linePosition)
504                 {
505                         if (nodes.Length < nodeIndex + 1) {
506                                 nodeCapacity *= 4;
507                                 SetNodeArrayLength (nodeCapacity);
508                         }
509
510 #if DTM_CLASS
511                         nodes [nodeIndex] = new DTMXPathLinkedNode2 ();
512 #endif
513                         nodes [nodeIndex].FirstChild = 0;               // dummy
514                         nodes [nodeIndex].Parent = parent;
515                         nodes [nodeIndex].FirstAttribute = firstAttribute;
516                         nodes [nodeIndex].PreviousSibling = previousSibling;
517                         nodes [nodeIndex].NextSibling = 0;      // dummy
518                         nodes [nodeIndex].NodeType = nodeType;
519                         nodes [nodeIndex].BaseURI = baseUri;
520                         nodes [nodeIndex].IsEmptyElement = isEmptyElement;
521                         nodes [nodeIndex].LocalName = localName;
522                         nodes [nodeIndex].NamespaceURI = ns;
523                         nodes [nodeIndex].Prefix = prefix;
524                         nodes [nodeIndex].Value = value;
525                         nodes [nodeIndex].XmlLang = xmlLang;
526                         nodes [nodeIndex].FirstNamespace = namespaceNode;
527                         nodes [nodeIndex].LineNumber = lineNumber;
528                         nodes [nodeIndex].LinePosition = linePosition;
529                 }
530
531                 // Followings are skipped: nextAttribute,
532                 public void AddAttribute (int ownerElement, int localName, int ns, int prefix, int value, int lineNumber, int linePosition)
533                 {
534                         if (attributes.Length < attributeIndex + 1) {
535                                 attributeCapacity *= 4;
536                                 SetAttributeArrayLength (attributeCapacity);
537                         }
538
539 #if DTM_CLASS
540                         attributes [attributeIndex] = new DTMXPathAttributeNode2 ();
541 #endif
542                         attributes [attributeIndex].OwnerElement = ownerElement;
543                         attributes [attributeIndex].LocalName = localName;
544                         attributes [attributeIndex].NamespaceURI = ns;
545                         attributes [attributeIndex].Prefix = prefix;
546                         attributes [attributeIndex].Value = value;
547                         attributes [attributeIndex].LineNumber = lineNumber;
548                         attributes [attributeIndex].LinePosition = linePosition;
549                 }
550
551                 // Followings are skipped: nextNsNode (may be next attribute in the same element, or ancestors' nsNode)
552                 public void AddNsNode (int declaredElement, int name, int ns, int nextNs)
553                 {
554                         if (namespaces.Length < nsIndex + 1) {
555                                 nsCapacity *= 4;
556                                 SetNsArrayLength (nsCapacity);
557                         }
558
559 #if DTM_CLASS
560                         namespaces [nsIndex] = new DTMXPathNamespaceNode2 ();
561 #endif
562                         namespaces [nsIndex].DeclaredElement = declaredElement;
563                         namespaces [nsIndex].Name = name;
564                         namespaces [nsIndex].Namespace = ns;
565                         namespaces [nsIndex].NextNamespace = nextNs;
566                 }
567         }
568 }
569