2005-05-05 Atsushi Enomoto <atsushi@ximian.com>
[mono.git] / mcs / class / System.XML / Mono.Xml.XPath / DTMXPathDocumentBuilder.cs
1 //
2 // Mono.Xml.XPath.DTMXPathDocumentBuilder
3 //
4 // Author:
5 //      Atsushi Enomoto (ginga@kit.hi-ho.ne.jp)
6 //
7 // (C) 2003 Atsushi Enomoto
8 //
9 //#define DTM_CLASS
10
11 //
12 // Permission is hereby granted, free of charge, to any person obtaining
13 // a copy of this software and associated documentation files (the
14 // "Software"), to deal in the Software without restriction, including
15 // without limitation the rights to use, copy, modify, merge, publish,
16 // distribute, sublicense, and/or sell copies of the Software, and to
17 // permit persons to whom the Software is furnished to do so, subject to
18 // the following conditions:
19 // 
20 // The above copyright notice and this permission notice shall be
21 // included in all copies or substantial portions of the Software.
22 // 
23 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 // EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 // MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 // NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
27 // LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
28 // OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
29 // WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
30 //
31 using System;
32 using System.Collections;
33 using System.IO;
34 using System.Xml;
35 using System.Xml.Schema;
36 using System.Xml.XPath;
37
38 namespace Mono.Xml.XPath
39 {
40
41 #if OUTSIDE_SYSTEM_XML
42         public
43 #else
44         internal
45 #endif
46         class DTMXPathDocumentBuilder
47         {
48                 public DTMXPathDocumentBuilder (string url)
49                         : this (url, XmlSpace.None, 200)
50                 {
51                 }
52
53                 public DTMXPathDocumentBuilder (string url, XmlSpace space)
54                         : this (url, space, 200)
55                 {
56                 }
57
58                 public DTMXPathDocumentBuilder (string url, XmlSpace space, int defaultCapacity)
59                 {
60                         XmlReader r = null;
61                         try {
62                                 r = new XmlTextReader (url);
63                                 Init (r, space, defaultCapacity);
64                         } finally {
65                                 if (r != null)
66                                         r.Close ();
67                         }
68                 }
69
70                 public DTMXPathDocumentBuilder (XmlReader reader)
71                         : this (reader, XmlSpace.None, 200)
72                 {
73                 }
74
75                 public DTMXPathDocumentBuilder (XmlReader reader, XmlSpace space)
76                         : this (reader, space, 200)
77                 {
78                 }
79
80                 public DTMXPathDocumentBuilder (XmlReader reader, XmlSpace space, int defaultCapacity)
81                 {
82                         Init (reader, space, defaultCapacity);
83                 }
84
85                 private void Init (XmlReader reader, XmlSpace space, int defaultCapacity)
86                 {
87                         this.xmlReader = reader;
88                         this.validatingReader = reader as XmlValidatingReader;
89                         lineInfo = reader as IXmlLineInfo;
90                         this.xmlSpace = space;
91                         this.nameTable = reader.NameTable;
92                         nodeCapacity = defaultCapacity;
93                         attributeCapacity = nodeCapacity;
94                         nsCapacity = 10;
95                         idTable = new Hashtable ();
96
97                         nodes = new DTMXPathLinkedNode [nodeCapacity];
98                         attributes = new DTMXPathAttributeNode [attributeCapacity];
99                         namespaces = new DTMXPathNamespaceNode [nsCapacity];
100
101                         Compile ();
102                 }
103                 
104                 XmlReader xmlReader;
105                 XmlValidatingReader validatingReader;
106                 XmlSpace xmlSpace;
107                 XmlNameTable nameTable;
108                 IXmlLineInfo lineInfo;
109                 int nodeCapacity;
110                 int attributeCapacity;
111                 int nsCapacity;
112
113                 // Linked Node
114                 DTMXPathLinkedNode [] nodes;
115
116                 // Attribute
117                 DTMXPathAttributeNode [] attributes;
118
119                 // NamespaceNode
120                 DTMXPathNamespaceNode [] namespaces;
121
122                 // idTable [string value] -> int nodeId
123                 Hashtable idTable;
124
125                 int nodeIndex;
126                 int attributeIndex;
127                 int nsIndex;
128
129                 // for attribute processing; should be reset per each element.
130                 bool hasAttributes;
131                 bool hasLocalNs;
132                 int attrIndexAtStart;
133                 int nsIndexAtStart;
134
135                 int lastNsInScope;
136                 bool skipRead = false;
137
138                 int [] parentStack = new int [10];
139                 int parentStackIndex = 0;
140
141                 public DTMXPathDocument CreateDocument ()
142                 {
143                         return new DTMXPathDocument (nameTable,
144                                 nodes,
145                                 attributes,
146                                 namespaces,
147                                 idTable
148                         );
149                 }
150
151                 public void Compile ()
152                 {
153                         // index 0 is dummy. No node (including Root) is assigned to this index
154                         // So that we can easily compare index != 0 instead of index < 0.
155                         // (Difference between jnz or jbe in 80x86.)
156                         AddNode (0, 0, 0, XPathNodeType.All, "", false, "", "", "", "", "", 0, 0, 0);
157                         nodeIndex++;
158                         AddAttribute (0, null, null, null, null, 0, 0);
159                         AddNsNode (0, null, null, 0);
160                         nsIndex++;
161                         AddNsNode (1, "xml", XmlNamespaces.XML, 0);
162
163                         // add root.
164                         AddNode (0, 0, 0, XPathNodeType.Root, xmlReader.BaseURI, false, "", "", "", "", "", 1, 0, 0);
165
166                         this.nodeIndex = 1;
167                         this.lastNsInScope = 1;
168                         parentStack [0] = nodeIndex;
169
170                         while (!xmlReader.EOF)
171                                 Read ();
172                         SetNodeArrayLength (nodeIndex + 1);
173                         SetAttributeArrayLength (attributeIndex + 1);
174                         SetNsArrayLength (nsIndex + 1);
175
176                         xmlReader = null;       // It is no more required.
177                 }
178
179                 public void Read ()
180                 {
181                         if (!skipRead)
182                                 if (!xmlReader.Read ())
183                                         return;
184                         skipRead = false;
185                         int parent = parentStack [parentStackIndex];
186                         int prevSibling = nodeIndex;
187
188                         switch (xmlReader.NodeType) {
189                         case XmlNodeType.Element:
190                         case XmlNodeType.CDATA:
191                         case XmlNodeType.SignificantWhitespace:
192                         case XmlNodeType.Comment:
193                         case XmlNodeType.Text:
194                         case XmlNodeType.ProcessingInstruction:
195                                 if (parent == nodeIndex)
196                                         prevSibling = 0;
197                                 else
198                                         while (nodes [prevSibling].Parent != parent)
199                                                 prevSibling = nodes [prevSibling].Parent;
200
201                                 nodeIndex++;
202
203                                 if (prevSibling != 0)
204                                         nodes [prevSibling].NextSibling = nodeIndex;
205                                 if (parentStack [parentStackIndex] == nodeIndex - 1)
206                                         nodes [parent].FirstChild = nodeIndex;
207                                 break;
208                         case XmlNodeType.Whitespace:
209                                 if (xmlSpace == XmlSpace.Preserve)
210                                         goto case XmlNodeType.Text;
211                                 else
212                                         goto default;
213                         case XmlNodeType.EndElement:
214                                 parentStackIndex--;
215                                 return;
216                         default:
217                                 // No operations. Doctype, EntityReference, 
218                                 return;
219                         }
220
221                         string value = null;
222                         XPathNodeType nodeType = XPathNodeType.Text;
223
224                         switch (xmlReader.NodeType) {
225                         case XmlNodeType.Element:
226                                 ProcessElement (parent, prevSibling);
227                                 break;
228                         case XmlNodeType.SignificantWhitespace:
229                                 nodeType = XPathNodeType.SignificantWhitespace;
230                                 goto case XmlNodeType.Text;
231                         case XmlNodeType.Whitespace:
232                                 nodeType = XPathNodeType.Whitespace;
233                                 goto case XmlNodeType.Text;
234                         case XmlNodeType.CDATA:
235                         case XmlNodeType.Text:
236                                 AddNode (parent,
237                                         0,
238                                         prevSibling,
239                                         nodeType,
240                                         xmlReader.BaseURI,
241                                         xmlReader.IsEmptyElement,
242                                         xmlReader.LocalName,    // for PI
243                                         xmlReader.NamespaceURI, // for PI
244                                         xmlReader.Prefix,
245                                         value,
246                                         xmlReader.XmlLang,
247                                         nsIndex,
248                                         lineInfo != null ? lineInfo.LineNumber : 0,
249                                         lineInfo != null ? lineInfo.LinePosition : 0);
250                                 // this code is tricky, but after sequential
251                                 // Read() invokation, xmlReader is moved to
252                                 // next node.
253                                 if (value == null) {
254                                         bool loop = true;
255                                         value = String.Empty;
256                                         XPathNodeType type = XPathNodeType.Whitespace;
257                                         do {
258                                                 switch (xmlReader.NodeType) {
259                                                 case XmlNodeType.Text:
260                                                 case XmlNodeType.CDATA:
261                                                         type = XPathNodeType.Text;
262                                                         goto case XmlNodeType.Whitespace;
263                                                 case XmlNodeType.SignificantWhitespace:
264                                                         if (type == XPathNodeType.Whitespace)
265                                                                 type = XPathNodeType.SignificantWhitespace;
266                                                         goto case XmlNodeType.Whitespace;
267                                                 case XmlNodeType.Whitespace:
268                                                         if (xmlReader.NodeType != XmlNodeType.Whitespace || xmlSpace == XmlSpace.Preserve)
269                                                                 value += xmlReader.Value;
270                                                         loop = xmlReader.Read ();
271                                                         skipRead = true;
272                                                         continue;
273                                                 default:
274                                                         loop = false;
275                                                         break;
276                                                 }
277                                         } while (loop);
278                                         nodes [nodeIndex].Value = value;
279                                         nodes [nodeIndex].NodeType = type;
280                                 }
281                                 break;
282                         case XmlNodeType.Comment:
283                                 value = xmlReader.Value;
284                                 nodeType = XPathNodeType.Comment;
285                                 goto case XmlNodeType.Text;
286                         case XmlNodeType.ProcessingInstruction:
287                                 value = xmlReader.Value;
288                                 nodeType = XPathNodeType.ProcessingInstruction;
289                                 goto case XmlNodeType.Text;
290                         }
291                 }
292
293                 private void ProcessElement (int parent, int previousSibling)
294                 {
295                         WriteStartElement (parent, previousSibling);
296
297                         // process namespaces and attributes.
298                         if (xmlReader.MoveToFirstAttribute ()) {
299                                 do {
300                                         string prefix = xmlReader.Prefix;
301                                         string ns = xmlReader.NamespaceURI;
302                                         if (ns == XmlNamespaces.XMLNS)
303                                                 ProcessNamespace ((prefix == null || prefix == String.Empty) ? "" : xmlReader.LocalName, xmlReader.Value);
304                                         else
305                                                 ProcessAttribute (prefix, xmlReader.LocalName, ns, xmlReader.Value);
306
307                                 } while (xmlReader.MoveToNextAttribute ());
308                                 xmlReader.MoveToElement ();
309                         }
310
311                         CloseStartElement ();
312                 }
313
314                 private void PrepareStartElement (int previousSibling)
315                 {
316                         hasAttributes = false;
317                         hasLocalNs = false;
318                         attrIndexAtStart = attributeIndex;
319                         nsIndexAtStart = nsIndex;
320
321                         while (namespaces [lastNsInScope].DeclaredElement == previousSibling) {
322                                 lastNsInScope = namespaces [lastNsInScope].NextNamespace;
323                         }
324                 }
325
326                 private void WriteStartElement (int parent, int previousSibling)
327                 {
328                         PrepareStartElement (previousSibling);
329
330                         AddNode (parent,
331                                 0, // dummy:firstAttribute
332                                 previousSibling,
333                                 XPathNodeType.Element,
334                                 xmlReader.BaseURI,
335                                 xmlReader.IsEmptyElement,
336                                 xmlReader.LocalName,
337                                 xmlReader.NamespaceURI,
338                                 xmlReader.Prefix,
339                                 "",     // Element has no internal value.
340                                 xmlReader.XmlLang,
341                                 lastNsInScope,
342                                 lineInfo != null ? lineInfo.LineNumber : 0,
343                                 lineInfo != null ? lineInfo.LinePosition : 0);
344
345                 }
346
347                 private void CloseStartElement ()
348                 {
349                         if (attrIndexAtStart != attributeIndex)
350                                 nodes [nodeIndex].FirstAttribute = attrIndexAtStart + 1;
351                         if (nsIndexAtStart != nsIndex) {
352                                 nodes [nodeIndex].FirstNamespace = nsIndex;
353                                 if (!xmlReader.IsEmptyElement)
354                                         lastNsInScope = nsIndex;
355                         }
356
357                         if (!nodes [nodeIndex].IsEmptyElement) {
358                                 parentStackIndex++;
359                                 if (parentStack.Length == parentStackIndex) {
360                                         int [] tmp = new int [parentStackIndex * 2];
361                                         Array.Copy (parentStack, tmp, parentStackIndex);
362                                         parentStack = tmp;
363                                 }
364                                 parentStack [parentStackIndex] = nodeIndex;
365                         }
366                 }
367
368                 private void ProcessNamespace (string prefix, string ns)
369                 {
370                         int nextTmp = hasLocalNs ?
371                                 nsIndex : nodes [nodeIndex].FirstNamespace;
372
373                         nsIndex++;
374
375                         this.AddNsNode (nodeIndex,
376                                 prefix,
377                                 ns,
378                                 nextTmp);
379                         hasLocalNs = true;
380                 }
381
382                 private void ProcessAttribute (string prefix, string localName, string ns, string value)
383                 {
384                         attributeIndex ++;
385
386                         this.AddAttribute (nodeIndex,
387                                 localName,
388                                 ns, 
389                                 prefix != null ? prefix : String.Empty, 
390                                 value,
391                                 lineInfo != null ? lineInfo.LineNumber : 0,
392                                 lineInfo != null ? lineInfo.LinePosition : 0);
393                         if (hasAttributes)
394                                 attributes [attributeIndex - 1].NextAttribute = attributeIndex;
395                         else
396                                 hasAttributes = true;
397
398                         // Identity infoset
399                         if (validatingReader != null) {
400                                 XmlSchemaDatatype dt = validatingReader.SchemaType as XmlSchemaDatatype;
401                                 if (dt == null) {
402                                         XmlSchemaType xsType = validatingReader.SchemaType as XmlSchemaType;
403                                         if (xsType != null)
404                                                 dt = xsType.Datatype;
405                                 }
406                                 if (dt != null && dt.TokenizedType == XmlTokenizedType.ID)
407                                         idTable.Add (value, nodeIndex);
408                         }
409                 }
410
411                 private void SetNodeArrayLength (int size)
412                 {
413                         DTMXPathLinkedNode [] newArr = new DTMXPathLinkedNode [size];
414                         Array.Copy (nodes, newArr, System.Math.Min (size, nodes.Length));
415                         nodes = newArr;
416                 }
417
418                 private void SetAttributeArrayLength (int size)
419                 {
420                         DTMXPathAttributeNode [] newArr = 
421                                 new DTMXPathAttributeNode [size];
422                         Array.Copy (attributes, newArr, System.Math.Min (size, attributes.Length));
423                         attributes = newArr;
424                 }
425
426                 private void SetNsArrayLength (int size)
427                 {
428                         DTMXPathNamespaceNode [] newArr =
429                                 new DTMXPathNamespaceNode [size];
430                         Array.Copy (namespaces, newArr, System.Math.Min (size, namespaces.Length));
431                         namespaces = newArr;
432                 }
433
434                 // Here followings are skipped: firstChild, nextSibling, 
435                 public void AddNode (int parent, int firstAttribute, int previousSibling, XPathNodeType nodeType, string baseUri, bool isEmptyElement, string localName, string ns, string prefix, string value, string xmlLang, int namespaceNode, int lineNumber, int linePosition)
436                 {
437                         if (nodes.Length < nodeIndex + 1) {
438                                 nodeCapacity *= 4;
439                                 SetNodeArrayLength (nodeCapacity);
440                         }
441
442 #if DTM_CLASS
443                         nodes [nodeIndex] = new DTMXPathLinkedNode ();
444 #endif
445                         nodes [nodeIndex].FirstChild = 0;               // dummy
446                         nodes [nodeIndex].Parent = parent;
447                         nodes [nodeIndex].FirstAttribute = firstAttribute;
448                         nodes [nodeIndex].PreviousSibling = previousSibling;
449                         nodes [nodeIndex].NextSibling = 0;      // dummy
450                         nodes [nodeIndex].NodeType = nodeType;
451                         nodes [nodeIndex].BaseURI = baseUri;
452                         nodes [nodeIndex].IsEmptyElement = isEmptyElement;
453                         nodes [nodeIndex].LocalName = localName;
454                         nodes [nodeIndex].NamespaceURI = ns;
455                         nodes [nodeIndex].Prefix = prefix;
456                         nodes [nodeIndex].Value = value;
457                         nodes [nodeIndex].XmlLang = xmlLang;
458                         nodes [nodeIndex].FirstNamespace = namespaceNode;
459                         nodes [nodeIndex].LineNumber = lineNumber;
460                         nodes [nodeIndex].LinePosition = linePosition;
461                 }
462
463                 // Followings are skipped: nextAttribute,
464                 public void AddAttribute (int ownerElement, string localName, string ns, string prefix, string value, int lineNumber, int linePosition)
465                 {
466                         if (attributes.Length < attributeIndex + 1) {
467                                 attributeCapacity *= 4;
468                                 SetAttributeArrayLength (attributeCapacity);
469                         }
470
471 #if DTM_CLASS
472                         attributes [attributeIndex] = new DTMXPathAttributeNode ();
473 #endif
474                         attributes [attributeIndex].OwnerElement = ownerElement;
475                         attributes [attributeIndex].LocalName = localName;
476                         attributes [attributeIndex].NamespaceURI = ns;
477                         attributes [attributeIndex].Prefix = prefix;
478                         attributes [attributeIndex].Value = value;
479                         attributes [attributeIndex].LineNumber = lineNumber;
480                         attributes [attributeIndex].LinePosition = linePosition;
481                 }
482
483                 // Followings are skipped: nextNsNode (may be next attribute in the same element, or ancestors' nsNode)
484                 public void AddNsNode (int declaredElement, string name, string ns, int nextNs)
485                 {
486                         if (namespaces.Length < nsIndex + 1) {
487                                 nsCapacity *= 4;
488                                 SetNsArrayLength (nsCapacity);
489                         }
490
491 #if DTM_CLASS
492                         namespaces [nsIndex] = new DTMXPathNamespaceNode ();
493 #endif
494                         namespaces [nsIndex].DeclaredElement = declaredElement;
495                         namespaces [nsIndex].Name = name;
496                         namespaces [nsIndex].Namespace = ns;
497                         namespaces [nsIndex].NextNamespace = nextNs;
498                 }
499         }
500 }
501