2004-12-03 Marek Safar <marek.safar@seznam.cz>
[mono.git] / mcs / class / System.XML / Mono.Xml.XPath / DTMXPathDocumentBuilder.cs
1 //
2 // Mono.Xml.XPath.DTMXPathDocumentBuilder
3 //
4 // Author:
5 //      Atsushi Enomoto (ginga@kit.hi-ho.ne.jp)
6 //
7 // (C) 2003 Atsushi Enomoto
8 //
9 //#define DTM_CLASS
10
11 //
12 // Permission is hereby granted, free of charge, to any person obtaining
13 // a copy of this software and associated documentation files (the
14 // "Software"), to deal in the Software without restriction, including
15 // without limitation the rights to use, copy, modify, merge, publish,
16 // distribute, sublicense, and/or sell copies of the Software, and to
17 // permit persons to whom the Software is furnished to do so, subject to
18 // the following conditions:
19 // 
20 // The above copyright notice and this permission notice shall be
21 // included in all copies or substantial portions of the Software.
22 // 
23 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 // EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 // MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 // NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
27 // LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
28 // OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
29 // WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
30 //
31 using System;
32 using System.Collections;
33 using System.IO;
34 using System.Xml;
35 using System.Xml.Schema;
36 using System.Xml.XPath;
37
38 namespace Mono.Xml.XPath
39 {
40
41 #if OUTSIDE_SYSTEM_XML
42         public
43 #else
44         internal
45 #endif
46         class DTMXPathDocumentBuilder
47         {
48                 public DTMXPathDocumentBuilder (string url)
49                         : this (url, XmlSpace.None, 200)
50                 {
51                 }
52
53                 public DTMXPathDocumentBuilder (string url, XmlSpace space)
54                         : this (url, space, 200)
55                 {
56                 }
57
58                 public DTMXPathDocumentBuilder (string url, XmlSpace space, int defaultCapacity)
59                 {
60                         XmlReader r = null;
61                         try {
62                                 r = new XmlTextReader (url);
63                                 Init (r, space, defaultCapacity);
64                         } finally {
65                                 if (r != null)
66                                         r.Close ();
67                         }
68                 }
69
70                 public DTMXPathDocumentBuilder (XmlReader reader)
71                         : this (reader, XmlSpace.None, 200)
72                 {
73                 }
74
75                 public DTMXPathDocumentBuilder (XmlReader reader, XmlSpace space)
76                         : this (reader, space, 200)
77                 {
78                 }
79
80                 public DTMXPathDocumentBuilder (XmlReader reader, XmlSpace space, int defaultCapacity)
81                 {
82                         Init (reader, space, defaultCapacity);
83                 }
84
85                 private void Init (XmlReader reader, XmlSpace space, int defaultCapacity)
86                 {
87                         this.xmlReader = reader;
88                         this.validatingReader = reader as XmlValidatingReader;
89                         lineInfo = reader as IXmlLineInfo;
90                         this.xmlSpace = space;
91                         this.nameTable = reader.NameTable;
92                         nodeCapacity = defaultCapacity;
93                         attributeCapacity = nodeCapacity;
94                         idTable = new Hashtable ();
95
96                         nodes = new DTMXPathLinkedNode [nodeCapacity];
97                         attributes = new DTMXPathAttributeNode [attributeCapacity];
98                         namespaces = new DTMXPathNamespaceNode [0];
99
100                         Compile ();
101                 }
102                 
103                 XmlReader xmlReader;
104                 XmlValidatingReader validatingReader;
105                 XmlSpace xmlSpace;
106                 XmlNameTable nameTable;
107                 IXmlLineInfo lineInfo;
108                 int nodeCapacity = 200;
109                 int attributeCapacity = 200;
110                 int nsCapacity = 10;
111
112                 // Linked Node
113                 DTMXPathLinkedNode [] nodes;
114
115                 // Attribute
116                 DTMXPathAttributeNode [] attributes;
117
118                 // NamespaceNode
119                 DTMXPathNamespaceNode [] namespaces;
120
121                 // idTable [string value] -> int nodeId
122                 Hashtable idTable;
123
124                 int nodeIndex;
125                 int attributeIndex;
126                 int nsIndex;
127                 int parentForFirstChild;
128
129                 // for attribute processing; should be reset per each element.
130                 int firstAttributeIndex;
131                 int lastNsIndexInCurrent;
132                 int attrIndexAtStart;
133                 int nsIndexAtStart;
134
135                 int prevSibling;
136                 int lastNsInScope;
137                 bool skipRead = false;
138
139                 public DTMXPathDocument CreateDocument ()
140                 {
141                         return new DTMXPathDocument (nameTable,
142                                 nodes,
143                                 attributes,
144                                 namespaces,
145                                 idTable
146                         );
147                 }
148
149                 public void Compile ()
150                 {
151                         // index 0 is dummy. No node (including Root) is assigned to this index
152                         // So that we can easily compare index != 0 instead of index < 0.
153                         // (Difference between jnz or jbe in 80x86.)
154                         AddNode (0, 0, 0, 0, XPathNodeType.All, "", false, "", "", "", "", "", 0, 0, 0);
155                         nodeIndex++;
156                         AddAttribute (0, null, null, null, null, 0, 0);
157                         AddNsNode (0, null, null, 0);
158                         nsIndex++;
159                         AddNsNode (1, "xml", XmlNamespaces.XML, 0);
160
161                         // add root.
162                         AddNode (0, 0, 0, -1, XPathNodeType.Root, xmlReader.BaseURI, false, "", "", "", "", "", 1, 0, 0);
163
164                         this.nodeIndex = 1;
165                         this.lastNsInScope = 1;
166                         this.parentForFirstChild = nodeIndex;
167
168                         while (!xmlReader.EOF)
169                                 Read ();
170                         SetNodeArrayLength (nodeIndex + 1);
171                         SetAttributeArrayLength (attributeIndex + 1);
172                         SetNsArrayLength (nsIndex + 1);
173
174                         xmlReader = null;       // It is no more required.
175                 }
176
177                 public void Read ()
178                 {
179                         if (!skipRead)
180                                 if (!xmlReader.Read ())
181                                         return;
182                         skipRead = false;
183                         int parent = nodeIndex;
184
185                         if (nodes [nodeIndex].Depth >= xmlReader.Depth) {
186                                 // if not, then current node is parent.
187                                 while (xmlReader.Depth <= nodes [parent].Depth)
188                                         parent = nodes [parent].Parent;
189                         }
190
191                         prevSibling = nodeIndex;
192                         switch (xmlReader.NodeType) {
193                         case XmlNodeType.Element:
194                         case XmlNodeType.CDATA:
195                         case XmlNodeType.SignificantWhitespace:
196                         case XmlNodeType.Comment:
197                         case XmlNodeType.Text:
198                         case XmlNodeType.ProcessingInstruction:
199                                 if (parentForFirstChild >= 0)
200                                         prevSibling = 0;
201                                 else
202                                         while (nodes [prevSibling].Depth != xmlReader.Depth)
203                                                 prevSibling = nodes [prevSibling].Parent;
204
205                                 nodeIndex++;
206
207                                 if (prevSibling != 0)
208                                         nodes [prevSibling].NextSibling = nodeIndex;
209                                 if (parentForFirstChild >= 0)
210                                         nodes [parent].FirstChild = nodeIndex;
211                                 break;
212                         case XmlNodeType.Whitespace:
213                                 if (xmlSpace == XmlSpace.Preserve)
214                                         goto case XmlNodeType.Text;
215                                 else
216                                         goto default;
217                         case XmlNodeType.EndElement:
218                                 parentForFirstChild = -1;
219                                 return;
220                         default:
221                                 // No operations. Doctype, EntityReference, 
222                                 return;
223                         }
224
225                         parentForFirstChild = -1;       // Might be changed in ProcessElement().
226
227                         string value = null;
228                         XPathNodeType nodeType = xmlReader.NodeType == XmlNodeType.Whitespace ?
229                                 XPathNodeType.Whitespace : XPathNodeType.Text;
230
231                         switch (xmlReader.NodeType) {
232                         case XmlNodeType.Element:
233                                 ProcessElement (parent, prevSibling);
234                                 break;
235                         case XmlNodeType.CDATA:
236                         case XmlNodeType.SignificantWhitespace:
237                         case XmlNodeType.Text:
238                         case XmlNodeType.Whitespace:
239                                 if (value == null)
240                                         skipRead = true;
241                                 AddNode (parent,
242                                         0,
243                                         prevSibling,
244                                         xmlReader.Depth,
245                                         nodeType,
246                                         xmlReader.BaseURI,
247                                         xmlReader.IsEmptyElement,
248                                         xmlReader.LocalName,    // for PI
249                                         xmlReader.NamespaceURI, // for PI
250                                         xmlReader.Prefix,
251                                         value,
252                                         xmlReader.XmlLang,
253                                         nsIndex,
254                                         lineInfo != null ? lineInfo.LineNumber : 0,
255                                         lineInfo != null ? lineInfo.LinePosition : 0);
256                                 // this code is tricky, but after ReadString() invokation,
257                                 // xmlReader is moved to next node!!
258                                 if (value == null)
259                                         nodes [nodeIndex].Value = xmlReader.ReadString ();
260                                 break;
261                         case XmlNodeType.Comment:
262                                 value = xmlReader.Value;
263                                 nodeType = XPathNodeType.Comment;
264                                 goto case XmlNodeType.Text;
265                         case XmlNodeType.ProcessingInstruction:
266                                 value = xmlReader.Value;
267                                 nodeType = XPathNodeType.ProcessingInstruction;
268                                 goto case XmlNodeType.Text;
269                         }
270                 }
271
272                 private void ProcessElement (int parent, int previousSibling)
273                 {
274                         WriteStartElement (parent, previousSibling);
275
276                         // process namespaces and attributes.
277                         if (xmlReader.MoveToFirstAttribute ()) {
278                                 do {
279                                         string prefix = xmlReader.Prefix;
280                                         string ns = xmlReader.NamespaceURI;
281                                         if (ns == XmlNamespaces.XMLNS)
282                                                 ProcessNamespace ((prefix == null || prefix == String.Empty) ? "" : xmlReader.LocalName, xmlReader.Value);
283                                         else
284                                                 ProcessAttribute (prefix, xmlReader.LocalName, ns, xmlReader.Value);
285
286                                 } while (xmlReader.MoveToNextAttribute ());
287                                 xmlReader.MoveToElement ();
288                         }
289
290                         CloseStartElement ();
291                 }
292
293                 private void PrepareStartElement (int previousSibling)
294                 {
295                         firstAttributeIndex = 0;
296                         lastNsIndexInCurrent = 0;
297                         attrIndexAtStart = attributeIndex;
298                         nsIndexAtStart = nsIndex;
299
300                         while (namespaces [lastNsInScope].DeclaredElement == previousSibling) {
301                                 lastNsInScope = namespaces [lastNsInScope].NextNamespace;
302                         }
303                 }
304
305                 private void WriteStartElement (int parent, int previousSibling)
306                 {
307                         PrepareStartElement (previousSibling);
308
309                         AddNode (parent,
310                                 0, // dummy:firstAttribute
311                                 previousSibling,
312                                 xmlReader.Depth,
313                                 XPathNodeType.Element,
314                                 xmlReader.BaseURI,
315                                 xmlReader.IsEmptyElement,
316                                 xmlReader.LocalName,
317                                 xmlReader.NamespaceURI,
318                                 xmlReader.Prefix,
319                                 "",     // Element has no internal value.
320                                 xmlReader.XmlLang,
321                                 lastNsInScope,
322                                 lineInfo != null ? lineInfo.LineNumber : 0,
323                                 lineInfo != null ? lineInfo.LinePosition : 0);
324
325                 }
326
327                 private void CloseStartElement ()
328                 {
329                         if (attrIndexAtStart != attributeIndex)
330                                 nodes [nodeIndex].FirstAttribute = attrIndexAtStart + 1;
331                         if (nsIndexAtStart != nsIndex) {
332                                 nodes [nodeIndex].FirstNamespace = nsIndex;
333                                 lastNsInScope = nsIndex;
334                         }
335
336                         if (!nodes [nodeIndex].IsEmptyElement)
337                                 parentForFirstChild = nodeIndex;
338                 }
339
340                 private void ProcessNamespace (string prefix, string ns)
341                 {
342                         nsIndex++;
343
344                         int nextTmp = lastNsIndexInCurrent == 0 ? nodes [nodeIndex].FirstNamespace : lastNsIndexInCurrent;
345
346                         this.AddNsNode (nodeIndex,
347                                 prefix,
348                                 ns,
349                                 nextTmp);
350                         lastNsIndexInCurrent = nsIndex;
351                 }
352
353                 private void ProcessAttribute (string prefix, string localName, string ns, string value)
354                 {
355                         attributeIndex ++;
356
357                         this.AddAttribute (nodeIndex,
358                                 localName,
359                                 ns, 
360                                 prefix != null ? prefix : String.Empty, 
361                                 value,
362                                 lineInfo != null ? lineInfo.LineNumber : 0,
363                                 lineInfo != null ? lineInfo.LinePosition : 0);
364                         if (firstAttributeIndex == 0)
365                                 firstAttributeIndex = attributeIndex;
366                         else
367                                 attributes [attributeIndex - 1].NextAttribute = attributeIndex;
368
369                         // Identity infoset
370                         if (validatingReader != null) {
371                                 XmlSchemaDatatype dt = validatingReader.SchemaType as XmlSchemaDatatype;
372                                 if (dt == null) {
373                                         XmlSchemaType xsType = validatingReader.SchemaType as XmlSchemaType;
374                                         if (xsType != null)
375                                                 dt = xsType.Datatype;
376                                 }
377                                 if (dt != null && dt.TokenizedType == XmlTokenizedType.ID)
378                                         idTable.Add (value, nodeIndex);
379                         }
380                 }
381
382                 private void SetNodeArrayLength (int size)
383                 {
384                         DTMXPathLinkedNode [] newArr = new DTMXPathLinkedNode [size];
385                         Array.Copy (nodes, newArr, System.Math.Min (size, nodes.Length));
386                         nodes = newArr;
387                 }
388
389                 private void SetAttributeArrayLength (int size)
390                 {
391                         DTMXPathAttributeNode [] newArr = 
392                                 new DTMXPathAttributeNode [size];
393                         Array.Copy (attributes, newArr, System.Math.Min (size, attributes.Length));
394                         attributes = newArr;
395                 }
396
397                 private void SetNsArrayLength (int size)
398                 {
399                         DTMXPathNamespaceNode [] newArr =
400                                 new DTMXPathNamespaceNode [size];
401                         Array.Copy (namespaces, newArr, System.Math.Min (size, namespaces.Length));
402                         namespaces = newArr;
403                 }
404
405                 // Here followings are skipped: firstChild, nextSibling, 
406                 public void AddNode (int parent, int firstAttribute, int previousSibling, int depth, XPathNodeType nodeType, string baseUri, bool isEmptyElement, string localName, string ns, string prefix, string value, string xmlLang, int namespaceNode, int lineNumber, int linePosition)
407                 {
408                         if (nodes.Length < nodeIndex + 1) {
409                                 nodeCapacity *= 4;
410                                 SetNodeArrayLength (nodeCapacity);
411                         }
412
413 #if DTM_CLASS
414                         nodes [nodeIndex] = new DTMXPathLinkedNode ();
415 #endif
416                         nodes [nodeIndex].FirstChild = 0;               // dummy
417                         nodes [nodeIndex].Parent = parent;
418                         nodes [nodeIndex].FirstAttribute = firstAttribute;
419                         nodes [nodeIndex].PreviousSibling = previousSibling;
420                         nodes [nodeIndex].NextSibling = 0;      // dummy
421                         nodes [nodeIndex].Depth = depth;
422                         nodes [nodeIndex].NodeType = nodeType;
423                         nodes [nodeIndex].BaseURI = baseUri;
424                         nodes [nodeIndex].IsEmptyElement = isEmptyElement;
425                         nodes [nodeIndex].LocalName = localName;
426                         nodes [nodeIndex].NamespaceURI = ns;
427                         nodes [nodeIndex].Prefix = prefix;
428                         nodes [nodeIndex].Value = value;
429                         nodes [nodeIndex].XmlLang = xmlLang;
430                         nodes [nodeIndex].FirstNamespace = namespaceNode;
431                         nodes [nodeIndex].LineNumber = lineNumber;
432                         nodes [nodeIndex].LinePosition = linePosition;
433                 }
434
435                 // Followings are skipped: nextAttribute,
436                 public void AddAttribute (int ownerElement, string localName, string ns, string prefix, string value, int lineNumber, int linePosition)
437                 {
438                         if (attributes.Length < attributeIndex + 1) {
439                                 attributeCapacity *= 4;
440                                 SetAttributeArrayLength (attributeCapacity);
441                         }
442
443 #if DTM_CLASS
444                         attributes [attributeIndex] = new DTMXPathAttributeNode ();
445 #endif
446                         attributes [attributeIndex].OwnerElement = ownerElement;
447                         attributes [attributeIndex].LocalName = localName;
448                         attributes [attributeIndex].NamespaceURI = ns;
449                         attributes [attributeIndex].Prefix = prefix;
450                         attributes [attributeIndex].Value = value;
451                         attributes [attributeIndex].LineNumber = lineNumber;
452                         attributes [attributeIndex].LinePosition = linePosition;
453                 }
454
455                 // Followings are skipped: nextNsNode (may be next attribute in the same element, or ancestors' nsNode)
456                 public void AddNsNode (int declaredElement, string name, string ns, int nextNs)
457                 {
458                         if (namespaces.Length < nsIndex + 1) {
459                                 nsCapacity *= 4;
460                                 SetNsArrayLength (nsCapacity);
461                         }
462
463 #if DTM_CLASS
464                         namespaces [nsIndex] = new DTMXPathNamespaceNode ();
465 #endif
466                         namespaces [nsIndex].DeclaredElement = declaredElement;
467                         namespaces [nsIndex].Name = name;
468                         namespaces [nsIndex].Namespace = ns;
469                         namespaces [nsIndex].NextNamespace = nextNs;
470                 }
471         }
472 }
473