2005-01-31 Zoltan Varga <vargaz@freemail.hu>
[mono.git] / mcs / class / Microsoft.Web.Services / Mono.Xml / XmlCanonicalizer.cs
1 //
2 // XmlCanonicalizer.cs - C14N implementation for XML Signature
3 // http://www.w3.org/TR/xml-c14n
4 //
5 // Author:
6 //      Aleksey Sanin (aleksey@aleksey.com)
7 //
8 // (C) 2003 Aleksey Sanin (aleksey@aleksey.com)
9 //
10 using System;
11 using System.Collections;
12 using System.IO;
13 using System.Text;
14 using System.Xml;
15
16 namespace Mono.Xml { 
17
18         internal class XmlCanonicalizer {
19
20                 private enum XmlCanonicalizerState
21                 {
22                         BeforeDocElement,
23                         InsideDocElement,
24                         AfterDocElement
25                 }
26                 
27                 // c14n parameters
28                 private bool comments;
29                 private bool exclusive;
30
31                 // input/output
32                 private XmlNodeList xnl;
33                 private StringBuilder res;
34                 
35                 // namespaces rendering stack
36                 private XmlCanonicalizerState state;
37                 private ArrayList visibleNamespaces;
38                 private int prevVisibleNamespacesStart;
39                 private int prevVisibleNamespacesEnd;
40
41                 public XmlCanonicalizer (bool withComments, bool excC14N)
42                 {           
43                         res = new StringBuilder ();
44                         comments = withComments;
45                         exclusive = excC14N;
46                         state = XmlCanonicalizerState.BeforeDocElement;
47                         visibleNamespaces = new ArrayList ();
48                         prevVisibleNamespacesStart = 0;
49                         prevVisibleNamespacesEnd = 0;
50                 }
51                 
52                 public Stream Canonicalize (XmlDocument doc)
53                 {
54                         WriteDocumentNode (doc);
55                         
56                         UTF8Encoding utf8 = new UTF8Encoding ();
57                         byte[] data = utf8.GetBytes (res.ToString ());
58                         return new MemoryStream (data);
59                 }
60                 
61                 public Stream Canonicalize (XmlNodeList nodes)
62                 {
63                         xnl = nodes;
64                         if (nodes == null || nodes.Count < 1)
65                             return null;
66                         return Canonicalize (nodes[0].OwnerDocument);
67                 }               
68
69                 private void WriteNode (XmlNode node)
70                 {
71                         // Console.WriteLine ("C14N Debug: node=" + node.Name);
72
73                         bool visible = IsNodeVisible (node);
74                         switch (node.NodeType) {
75                         case XmlNodeType.Document:
76                         case XmlNodeType.DocumentFragment:
77                                 WriteDocumentNode (node);
78                                 break;
79                         case XmlNodeType.Element:
80                                 WriteElementNode (node, visible);
81                                 break;
82                         case XmlNodeType.CDATA:
83                         case XmlNodeType.SignificantWhitespace:
84                         case XmlNodeType.Text:
85                                 // CDATA sections are processed as text nodes
86                                 WriteTextNode (node, visible);
87                                 break;
88                         case XmlNodeType.Whitespace:
89                                 if (state == XmlCanonicalizerState.InsideDocElement)
90                                         WriteTextNode (node, visible);
91                                 break;
92                         case XmlNodeType.Comment:
93                                 WriteCommentNode (node, visible);
94                                 break;
95                         case XmlNodeType.ProcessingInstruction:
96                                 WriteProcessingInstructionNode (node, visible);
97                                 break;
98                         case XmlNodeType.EntityReference:
99                                 for (int i = 0; i < node.ChildNodes.Count; i++)
100                                         WriteNode (node.ChildNodes [i]);
101                                 break;
102                         case XmlNodeType.Attribute:
103                                 throw new XmlException ("Attribute node is impossible here", null);
104                         case XmlNodeType.EndElement:
105                                 throw new XmlException ("EndElement node is impossible here", null);
106                         case XmlNodeType.EndEntity:
107                                 throw new XmlException ("EndEntity node is impossible here", null);
108                         case XmlNodeType.DocumentType:
109                         case XmlNodeType.Entity:
110                         case XmlNodeType.Notation:
111                         case XmlNodeType.XmlDeclaration:
112                                 // just do nothing
113                                 break;
114                         }
115                 }
116
117                 private void WriteDocumentNode (XmlNode node)
118                 {
119                         state = XmlCanonicalizerState.BeforeDocElement;
120                         for (XmlNode child = node.FirstChild; child != null; child = child.NextSibling)
121                                 WriteNode (child);
122                 }
123                 
124                 // Element Nodes
125                 // If the element is not in the node-set, then the result is obtained 
126                 // by processing the namespace axis, then the attribute axis, then 
127                 // processing the child nodes of the element that are in the node-set 
128                 // (in document order). If the element is inthe node-set, then the result 
129                 // is an open angle bracket (<), the element QName, the result of 
130                 // processing the namespace axis, the result of processing the attribute 
131                 // axis, a close angle bracket (>), the result of processing the child 
132                 // nodes of the element that are in the node-set (in document order), an 
133                 // open angle bracket, a forward slash (/), the element QName, and a close 
134                 // angle bracket.
135                 private void WriteElementNode (XmlNode node, bool visible)
136                 {
137                         // Console.WriteLine ("Debug: element node");
138                     
139                         // remember current state 
140                         int savedPrevVisibleNamespacesStart = prevVisibleNamespacesStart;
141                         int savedPrevVisibleNamespacesEnd = prevVisibleNamespacesEnd;
142                         int savedVisibleNamespacesSize = visibleNamespaces.Count;
143                         XmlCanonicalizerState s = state;
144                         if (visible && state == XmlCanonicalizerState.BeforeDocElement)
145                                 state = XmlCanonicalizerState.InsideDocElement;
146                     
147                         // write start tag
148                         if (visible) {
149                                 res.Append ("<");
150                                 res.Append (node.Name);
151                         }
152                     
153                         // this is odd but you can select namespaces
154                         // and attributes even if node itself is not visible
155                         WriteNamespacesAxis (node, visible);
156                         WriteAttributesAxis (node);                     
157         
158                         if (visible)
159                                 res.Append (">");
160
161                         // write children
162                         for (XmlNode child = node.FirstChild; child != null; child = child.NextSibling)
163                                 WriteNode (child);
164                                     
165                         // write end tag            
166                         if (visible) {
167                                 res.Append ("</");
168                                 res.Append (node.Name);
169                                 res.Append (">");
170                         }
171                     
172                         // restore state
173                         if (visible && s == XmlCanonicalizerState.BeforeDocElement)
174                                 state = XmlCanonicalizerState.AfterDocElement;
175                         prevVisibleNamespacesStart = savedPrevVisibleNamespacesStart;
176                         prevVisibleNamespacesEnd = savedPrevVisibleNamespacesEnd;
177                         if (visibleNamespaces.Count > savedVisibleNamespacesSize) {
178                                 visibleNamespaces.RemoveRange (savedVisibleNamespacesSize, 
179                                         visibleNamespaces.Count - savedVisibleNamespacesSize);
180                         }
181                 }
182
183                 // Namespace Axis
184                 // Consider a list L containing only namespace nodes in the 
185                 // axis and in the node-set in lexicographic order (ascending). To begin 
186                 // processing L, if the first node is not the default namespace node (a node 
187                 // with no namespace URI and no local name), then generate a space followed 
188                 // by xmlns="" if and only if the following conditions are met:
189                 //    - the element E that owns the axis is in the node-set
190                 //    - The nearest ancestor element of E in the node-set has a default 
191                 //          namespace node in the node-set (default namespace nodes always 
192                 //      have non-empty values in XPath)
193                 // The latter condition eliminates unnecessary occurrences of xmlns="" in 
194                 // the canonical form since an element only receives an xmlns="" if its 
195                 // default namespace is empty and if it has an immediate parent in the 
196                 // canonical form that has a non-empty default namespace. To finish 
197                 // processing  L, simply process every namespace node in L, except omit 
198                 // namespace node with local name xml, which defines the xml prefix, 
199                 // if its string value is http://www.w3.org/XML/1998/namespace.
200                 private void WriteNamespacesAxis (XmlNode node, bool visible)
201                 {
202                         // Console.WriteLine ("Debug: namespaces");
203
204                         XmlDocument doc = node.OwnerDocument;    
205                         bool has_empty_namespace = false;
206                         ArrayList list = new ArrayList ();
207                         for (XmlNode cur = node; cur != null && cur != doc; cur = cur.ParentNode) {
208                                 foreach (XmlNode attribute in cur.Attributes) {         
209                                         if (!IsNamespaceNode (attribute)) 
210                                                 continue;
211                                 
212                                         // get namespace prefix
213                                         string prefix = string.Empty;
214                                         if (attribute.Prefix == "xmlns") 
215                                                 prefix = attribute.LocalName;
216                             
217                                         // check if it is "xml" namespace                           
218                                         if (prefix == "xml" && attribute.Value == "http://www.w3.org/XML/1998/namespace")
219                                                 continue;
220                             
221                                         // make sure that this is an active namespace
222                                         // for our node
223                                         string ns = node.GetNamespaceOfPrefix (prefix);
224                                         if (ns != attribute.Value) 
225                                                 continue;
226                             
227                                         // check that it is selected with XPath
228                                         if (!IsNodeVisible (attribute)) 
229                                                 continue;
230
231                                         // check that we have not rendered it yet
232                                         bool rendered = IsNamespaceRendered (prefix, attribute.Value);
233
234                                         // add to the visible namespaces stack
235                                         if (visible)
236                                                 visibleNamespaces.Add (attribute);                            
237                             
238                                         if (!rendered)
239                                                 list.Add (attribute);
240                                     
241                                         if (prefix == string.Empty)
242                                                 has_empty_namespace = true;
243                                 }
244                         }
245
246                         // add empty namespace if needed                    
247                         if (visible && !has_empty_namespace && !IsNamespaceRendered (string.Empty, string.Empty)) 
248                                 res.Append (" xmlns=\"\"");
249                     
250                         list.Sort (new XmlDsigC14NTransformNamespacesComparer ());
251                         foreach (object obj in list) {
252                                 XmlNode attribute = (obj as XmlNode);
253                                 if (attribute != null) {
254                                         res.Append (" ");
255                                         res.Append (attribute.Name);
256                                         res.Append ("=\"");
257                                         res.Append (attribute.Value);
258                                         res.Append ("\"");
259                                 }
260                         }
261                     
262                         // move the rendered namespaces stack
263                         if (visible) {
264                                 prevVisibleNamespacesStart = prevVisibleNamespacesEnd;
265                                 prevVisibleNamespacesEnd = visibleNamespaces.Count;     
266                         }
267                 }
268                 
269                 // Attribute Axis 
270                 // In lexicographic order (ascending), process each node that 
271                 // is in the element's attribute axis and in the node-set.
272                 // 
273                 // The processing of an element node E MUST be modified slightly 
274                 // when an XPath node-set is given as input and the element's 
275                 // parent is omitted from the node-set.
276                 private void WriteAttributesAxis (XmlNode node)
277                 {
278                         // Console.WriteLine ("Debug: attributes");
279                 
280                         ArrayList list = new ArrayList ();
281                         foreach (XmlNode attribute in node.Attributes) {        
282                                 if (!IsNamespaceNode (attribute) && IsNodeVisible (attribute))
283                                         list.Add (attribute);
284                         }
285                      
286                         // Add attributes from "xml" namespace for "inclusive" c14n only:
287                         //
288                         // The method for processing the attribute axis of an element E 
289                         // in the node-set is enhanced. All element nodes along E's 
290                         // ancestor axis are examined for nearest occurrences of 
291                         // attributes in the xml namespace, such as xml:lang and 
292                         // xml:space (whether or not they are in the node-set). 
293                         // From this list of attributes, remove any that are in E's 
294                         // attribute axis (whether or not they are in the node-set). 
295                         // Then, lexicographically merge this attribute list with the 
296                         // nodes of E's attribute axis that are in the node-set. The 
297                         // result of visiting the attribute axis is computed by 
298                         // processing the attribute nodes in this merged attribute list.
299                         if (!exclusive && node.ParentNode != null && node.ParentNode.ParentNode != null && !IsNodeVisible (node.ParentNode.ParentNode)) {
300                                 // if we have whole document then the node.ParentNode.ParentNode
301                                 // is always visible
302                                 for (XmlNode cur = node.ParentNode; cur != null; cur = cur.ParentNode) {
303                                         if (cur.Attributes == null)
304                                                 continue;
305                                         foreach (XmlNode attribute in cur.Attributes) {
306                                                 // we are looking for "xml:*" attributes
307                                                 if (attribute.Prefix != "xml")
308                                                         continue;
309                                 
310                                                 // exclude ones that are in the node's attributes axis
311                                                 if (node.Attributes.GetNamedItem (attribute.LocalName, attribute.NamespaceURI) != null)
312                                                         continue;
313                                 
314                                                 // finally check that we don't have the same attribute in our list
315                                                 bool found = false;
316                                                 foreach (object obj in list) {
317                                                         XmlNode n = (obj as XmlNode);
318                                                         if (n.Prefix == "xml" && n.LocalName == attribute.LocalName) {
319                                                                 found = true;
320                                                                 break;
321                                                         }
322                                                 }
323                                 
324                                                 if (found) 
325                                                         continue;
326                                 
327                                                 // now we can add this attribute to our list
328                                                 list.Add (attribute);
329                                         }
330                                 }               
331                         }
332                         
333                         // sort namespaces and write results        
334                         list.Sort (new XmlDsigC14NTransformAttributesComparer ());
335                         foreach (object obj in list) {
336                                 XmlNode attribute = (obj as XmlNode);
337                                 if (attribute != null) {
338                                         res.Append (" ");
339                                         res.Append (attribute.Name);
340                                         res.Append ("=\"");
341                                         res.Append (NormalizeString (attribute.Value, XmlNodeType.Attribute));
342                                         res.Append ("\"");
343                                 }
344                         }
345                 }
346
347                 // Text Nodes
348                 // the string value, except all ampersands are replaced 
349                 // by &amp;, all open angle brackets (<) are replaced by &lt;, all closing 
350                 // angle brackets (>) are replaced by &gt;, and all #xD characters are 
351                 // replaced by &#xD;.
352                 private void WriteTextNode (XmlNode node, bool visible)
353                 {
354                         // Console.WriteLine ("Debug: text node");
355                         if (visible)
356                                 res.Append (NormalizeString (node.Value, node.NodeType));
357 //                              res.Append (NormalizeString (node.Value, XmlNodeType.Text));
358                 }               
359
360                 // Comment Nodes
361                 // Nothing if generating canonical XML without comments. For 
362                 // canonical XML with comments, generate the opening comment 
363                 // symbol (<!--), the string value of the node, and the 
364                 // closing comment symbol (-->). Also, a trailing #xA is rendered 
365                 // after the closing comment symbol for comment children of the 
366                 // root node with a lesser document order than the document 
367                 // element, and a leading #xA is rendered before the opening 
368                 // comment symbol of comment children of the root node with a 
369                 // greater document order than the document element. (Comment 
370                 // children of the root node represent comments outside of the 
371                 // top-level document element and outside of the document type 
372                 // declaration).
373                 private void WriteCommentNode (XmlNode node, bool visible)
374                 {
375                         // Console.WriteLine ("Debug: comment node");
376                         if (visible && comments) {
377                             if (state == XmlCanonicalizerState.AfterDocElement)
378                                     res.Append ("\x0A<!--");
379                             else
380                                     res.Append ("<!--");
381                         
382                             res.Append (NormalizeString (node.Value, XmlNodeType.Comment));
383                             
384                             if (state == XmlCanonicalizerState.BeforeDocElement)
385                                     res.Append ("-->\x0A");
386                             else
387                                     res.Append ("-->");
388                         }
389                 }
390                 
391                 // Processing Instruction (PI) Nodes- 
392                 // The opening PI symbol (<?), the PI target name of the node, 
393                 // a leading space and the string value if it is not empty, and 
394                 // the closing PI symbol (?>). If the string value is empty, 
395                 // then the leading space is not added. Also, a trailing #xA is 
396                 // rendered after the closing PI symbol for PI children of the 
397                 // root node with a lesser document order than the document 
398                 // element, and a leading #xA is rendered before the opening PI 
399                 // symbol of PI children of the root node with a greater document 
400                 // order than the document element.
401                 private void WriteProcessingInstructionNode (XmlNode node, bool visible)
402                 {
403                         // Console.WriteLine ("Debug: PI node");
404
405                         if (visible) {
406                                 if (state == XmlCanonicalizerState.AfterDocElement)
407                                         res.Append ("\x0A<?");
408                                 else
409                                         res.Append ("<?");
410                         
411                                 res.Append (node.Name);
412                                 if (node.Value.Length > 0) {
413                                         res.Append (" ");
414                                         res.Append (NormalizeString (node.Value, XmlNodeType.ProcessingInstruction));
415                                 }
416                         
417                                 if (state == XmlCanonicalizerState.BeforeDocElement)
418                                         res.Append ("?>\x0A");
419                                 else
420                                         res.Append ("?>");
421                         }
422                 }
423                 
424                 private bool IsNodeVisible (XmlNode node)
425                 {
426                         // if node list is empty then we process whole document
427                         if (xnl == null) 
428                                 return true;
429                     
430                         // walk thru the list
431                         foreach (XmlNode xn in xnl) {
432                                 if (node.Equals (xn)) 
433                                         return true;
434                         }
435                     
436                         return false;
437                 }
438
439                 private bool IsNamespaceRendered (string prefix, string uri)
440                 {
441                         // if the default namespace xmlns="" is not re-defined yet
442                         // then we do not want to print it out
443                         bool IsEmptyNs = prefix == string.Empty && uri == string.Empty;
444                         int start = (IsEmptyNs) ? 0 : prevVisibleNamespacesStart;
445                         for (int i = visibleNamespaces.Count - 1; i >= start; i--) {
446                                 XmlNode node = (visibleNamespaces[i] as XmlNode);
447                                 if (node != null) {
448                                         // get namespace prefix
449                                         string p = string.Empty;
450                                         if (node.Prefix == "xmlns") 
451                                                 p = node.LocalName;
452                                         if (p == prefix)
453                                                 return node.Value == uri;
454                                 }
455                         }
456                     
457                         return IsEmptyNs;
458                 }
459                 
460                 private bool IsNamespaceNode (XmlNode node)
461                 {
462                         if (node == null || node.NodeType != XmlNodeType.Attribute) 
463                                 return false;
464                         return node.NamespaceURI == "http://www.w3.org/2000/xmlns/";
465                 }
466     
467                 private bool IsTextNode (XmlNodeType type)
468                 {
469                         switch (type) {
470                         case XmlNodeType.Text:
471                         case XmlNodeType.CDATA:
472                         case XmlNodeType.SignificantWhitespace:
473                         case XmlNodeType.Whitespace:
474                                 return true;
475                         }
476                         return false;
477                 }
478
479                 private string NormalizeString (string input, XmlNodeType type)
480                 {
481                         StringBuilder sb = new StringBuilder ();
482                         for (int i = 0; i < input.Length; i++) {
483                                 char ch = input[i];
484                                 if (ch == '<' && (type == XmlNodeType.Attribute || IsTextNode (type)))
485                                         sb.Append ("&lt;");
486                                 else if (ch == '>' && IsTextNode (type))
487                                         sb.Append ("&gt;");
488                                 else if (ch == '&' && (type == XmlNodeType.Attribute || IsTextNode (type)))
489                                         sb.Append ("&amp;");
490                                 else if (ch == '\"' && type == XmlNodeType.Attribute)
491                                         sb.Append ("&quot;");
492                                 else if (ch == '\x09' && type == XmlNodeType.Attribute)
493                                         sb.Append ("&#x9;");
494                                 else if (ch == '\x0A' && type == XmlNodeType.Attribute)
495                                         sb.Append ("&#xA;");
496                                 else if (ch == '\x0D' && (type == XmlNodeType.Attribute ||
497                                                           IsTextNode (type) && type != XmlNodeType.Whitespace ||
498                                                           type == XmlNodeType.Comment ||
499                                                           type == XmlNodeType.ProcessingInstruction))
500                                         sb.Append ("&#xD;");
501                                 else if (ch == '\x0D')
502                                         continue;
503                                 else
504                                         sb.Append (ch);
505                         }
506                     
507                         return sb.ToString ();
508                 }
509         }
510     
511         internal class XmlDsigC14NTransformAttributesComparer : IComparer
512         {
513                 public int Compare (object x, object y)
514                 {
515                         XmlNode n1 = (x as XmlNode);
516                         XmlNode n2 = (y as XmlNode);
517                 
518                         // simple cases
519                         if (n1 == n2) 
520                                 return 0;
521                         else if (n1 == null) 
522                                 return -1;
523                         else if (n2 == null) 
524                                 return 1;
525                         else if (n1.Prefix == n2.Prefix) 
526                                 return string.Compare (n1.LocalName, n2.LocalName);
527         
528                         // Attributes in the default namespace are first
529                         // because the default namespace is not applied to
530                         // unqualified attributes
531                         if (n1.Prefix == string.Empty) 
532                                 return -1;
533                         else if (n2.Prefix == string.Empty) 
534                                 return 1;
535                     
536                         int ret = string.Compare (n1.NamespaceURI, n2.NamespaceURI);
537                         if (ret == 0)
538                                 ret = string.Compare (n1.LocalName, n2.LocalName);
539                         return ret;
540                 }
541         }
542
543         internal class XmlDsigC14NTransformNamespacesComparer : IComparer
544         {
545                 public int Compare (object x, object y)
546                 {
547                         XmlNode n1 = (x as XmlNode);
548                         XmlNode n2 = (y as XmlNode);
549                 
550                         // simple cases
551                         if (n1 == n2) 
552                                 return 0;
553                         else if (n1 == null) 
554                                 return -1;
555                         else if (n2 == null) 
556                                 return 1;
557                         else if (n1.Prefix == string.Empty) 
558                                 return -1;
559                         else if (n2.Prefix == string.Empty) 
560                                 return 1;
561                     
562                         return string.Compare (n1.LocalName, n2.LocalName);
563                 }
564         }
565 }
566