merge -r 58060:58217
[mono.git] / mcs / class / Managed.Windows.Forms / System.Windows.Forms / Mime.cs
1 // Permission is hereby granted, free of charge, to any person obtaining
2 // a copy of this software and associated documentation files (the
3 // "Software"), to deal in the Software without restriction, including
4 // without limitation the rights to use, copy, modify, merge, publish,
5 // distribute, sublicense, and/or sell copies of the Software, and to
6 // permit persons to whom the Software is furnished to do so, subject to
7 // the following conditions:
8 //
9 // The above copyright notice and this permission notice shall be
10 // included in all copies or substantial portions of the Software.
11 //
12 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
13 // EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
14 // MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
15 // NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
16 // LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
17 // OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
18 // WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
19 //
20 // Copyright (c) 2005 Novell, Inc. (http://www.novell.com)
21 //
22 // Authors:
23 //
24 //  Alexander Olk       xenomorph2@onlinehome.de
25 //
26
27 using System;
28 using System.IO;
29 using System.Collections;
30 using System.Collections.Specialized;
31 using System.Text.RegularExpressions;
32 using System.Text;
33
34 // Usage:
35 // - for files:
36 //   string mimeType = Mime.GetMimeTypeForFile( string filename );
37 // - for byte array:
38 //   string mimeType = Mime.GetMimeTypeForData( byte[] data );
39 // - for string (maybe an email):
40 //   string mimeType = Mime.GetMimeTypeForString( string input );
41
42 // - get alias for mime type:
43 //   string alias = Mime.GetMimeAlias( string mimeType );
44 // - get subclass for mime type:
45 //   string subtype = Mime.GetMimeSubClass( string mimeType );
46 // - get all available mime types:
47 //   string[] available = Mime.AvailableMimeTypes;
48
49 // TODO:
50 // - optimize
51 // - little/big endian stuff for TypeHostXX
52 // - async callback ?!?
53 // - freedesktop org file extensions can have regular expressions also, resolve them too
54 // - sort match collections by magic priority ( higher = first )
55 // - MimeGenerated: use indexes to point to mime type name strings instead of repeating the name string each time (in match, subclass, etc.) !?!
56 // - buffer is currently hard coded to size 8192, value should be determined by MimeGenerated
57
58 namespace System.Windows.Forms
59 {
60         internal class Mime
61         {
62                 public static Mime Instance = new Mime();
63                 
64                 private string current_file_name;
65                 private string global_result = octet_stream;
66                 
67                 private FileStream file_stream;
68                 
69                 private byte[] buffer = new byte[ 8192 ];
70                 
71                 private const string octet_stream = "application/octet-stream";
72                 private const string text_plain = "text/plain";
73                 private const string zero_file = "application/x-zerosize";
74                 
75                 private StringDictionary mime_file_cache = new StringDictionary();
76                 
77                 private const int mime_file_cache_max_size = 5000;
78                 
79                 private string search_string;
80                 
81                 private static object lock_object = new Object();
82                 
83                 private int platform = (int) Environment.OSVersion.Platform;
84                 
85                 private bool is_zero_file = false;
86                 
87                 public Mime( )
88                 {
89                         MimeGenerated.Init( );
90                         
91 //                      Console.WriteLine( "Mime Instance created..." );
92                 }
93                 
94                 public static string GetMimeTypeForFile( string filename )
95                 {
96                         lock ( lock_object )
97                         {
98                                 Instance.StartByFileName( filename );
99                         }
100                         
101                         return Instance.global_result;
102                 }
103                 
104                 // not tested
105                 public static string GetMimeTypeForData( byte[] data )
106                 {
107                         lock ( lock_object )
108                         {
109                                 Instance.StartDataLookup( data );
110                         }
111                         
112                         return Instance.global_result;
113                 }
114                 
115                 public static string GetMimeTypeForString( string input )
116                 {
117                         lock ( lock_object )
118                         {
119                                 Instance.StartStringLookup( input );
120                         }
121                         
122                         return Instance.global_result;
123                 }
124                 
125                 public static string GetMimeAlias( string mimetype )
126                 {
127                         return MimeGenerated.Aliases[ mimetype ];
128                 }
129                 
130                 public static string GetMimeSubClass( string mimetype )
131                 {
132                         return MimeGenerated.SubClasses[ mimetype ];
133                 }
134                 
135                 public static string[] AvailableMimeTypes
136                 {
137                         get {
138                                 string[] result = new string[ MimeGenerated.MimeTypes.Count ];
139                                 
140                                 MimeGenerated.MimeTypes.Keys.CopyTo( result, 0 );
141                                 
142                                 return result;
143                         }
144                 }
145                 
146                 private void StartByFileName( string filename )
147                 {
148                         if ( mime_file_cache.ContainsKey( filename ) )
149                         {
150                                 global_result = mime_file_cache[ filename ];
151                                 return;
152                         }
153                         
154                         current_file_name = filename;
155                         is_zero_file = false;
156                         
157                         if ( !CheckForInode( ) )
158                         {
159                                 global_result = octet_stream;
160                                 
161                                 GoByFileName( );
162                         }
163                         
164                         if ( !mime_file_cache.ContainsKey( current_file_name ) )
165                                 mime_file_cache.Add( current_file_name, global_result );
166                         
167                         // not tested
168                         if ( mime_file_cache.Count > mime_file_cache_max_size )
169                         {
170                                 IEnumerator enumerator = mime_file_cache.GetEnumerator( );
171                                 
172                                 for ( int i = 0; i < mime_file_cache_max_size - 1000; i++ )
173                                 {
174                                         mime_file_cache.Remove( enumerator.Current.ToString( ) );
175                                 }
176                         }
177                 }
178                 
179                 private void StartDataLookup( byte[] data )
180                 {
181                         global_result = octet_stream;
182                         
183                         System.Array.Clear( buffer, 0, buffer.Length );
184                         
185                         if ( data.Length > buffer.Length )
186                         {
187                                 System.Array.Copy( data, buffer, buffer.Length );
188                         }
189                         else
190                         {
191                                 System.Array.Copy( data, buffer, data.Length );
192                         }
193                         
194                         if ( CheckMatch80Plus( ) )
195                                 return;
196                         
197                         if ( CheckMatchBelow80( ) )
198                                 return;
199                         
200                         CheckForBinaryOrText( );
201                 }
202                 
203                 private void StartStringLookup( string input )
204                 {
205                         global_result = text_plain;
206                         
207                         search_string = input;
208                         
209                         if ( CheckForContentTypeString( ) )
210                                 return;
211                 }
212                 
213                 private bool CheckForInode( )
214                 {
215                         if ( ( platform == 4 ) || ( platform == 128 ) )
216                         {
217 #if __MonoCS__
218                                 // *nix platform
219                                 Mono.Unix.UnixFileInfo ufi = new Mono.Unix.UnixFileInfo( current_file_name );
220                                 
221                                 if ( ufi.IsFile )
222                                 {
223                                         return false;
224                                 }
225                                 else
226                                 if ( ufi.IsDirectory )
227                                 {
228                                         global_result = "inode/directory";
229                                         return true;
230                                 }
231                                 else
232                                 if ( ufi.IsBlockDevice )
233                                 {
234                                         global_result = "inode/blockdevice";
235                                         return true;
236                                 }
237                                 else
238                                 if ( ufi.IsSocket )
239                                 {
240                                         global_result = "inode/socket";
241                                         return true;
242                                 }
243                                 else
244                                 if ( ufi.IsSymbolicLink )
245                                 {
246                                         global_result = "inode/symlink";
247                                         return true;
248                                 }
249                                 else
250                                 if ( ufi.IsCharacterDevice )
251                                 {
252                                         global_result = "inode/chardevice";
253                                         return true;
254                                 }
255                                 else
256                                 if ( ufi.IsFIFO )
257                                 {
258                                         global_result = "inode/fifo";
259                                         return true;
260                                 }
261 #endif
262                         }
263                         else
264                         {
265                                 // TODO!!!!
266                                 // windows platform
267                         }
268                         
269                         return false;
270                 }
271                 
272                 private void GoByFileName( )
273                 {
274                         // check if we can open the file
275                         if ( !OpenFile( ) )
276                         {
277                                 // couldn't open the file, check globals only
278                                 
279                                 CheckGlobalPatterns( );
280                                 
281                                 return;
282                         }
283                         
284                         if ( !is_zero_file )
285                         {
286                                 // check for matches with a priority >= 80
287                                 if ( CheckMatch80Plus( ) )
288                                         return;
289                         }
290                         
291                         // check global patterns, aka file extensions...
292                         // this should be done for zero size files also,
293                         // for example zero size file trash.ccc~ should return
294                         // application/x-trash instead of application/x-zerosize
295                         if ( CheckGlobalPatterns( ) )
296                                 return;
297                         
298                         // if file size is zero, no other checks are needed
299                         if ( is_zero_file )
300                                 return;
301                         
302                         // ok, still nothing matches then try matches with a priority < 80
303                         if ( CheckMatchBelow80( ) )
304                                 return;
305                         
306                         // wow, still nothing... return application/octet-stream for binary data, or text/plain for textual data
307                         CheckForBinaryOrText( );
308                 }
309                 
310                 private bool CheckMatch80Plus( )
311                 {
312                         foreach ( Match match in MimeGenerated.Matches80Plus )
313                         {
314                                 if ( TestMatch( match ) )
315                                 {
316                                         global_result = match.MimeType;
317                                         
318                                         return true;
319                                 }
320                         }
321                         
322                         return false;
323                 }
324                 
325                 private void CheckGlobalResult( )
326                 {
327                         int comma_index = global_result.IndexOf( "," );
328                         
329                         if ( comma_index != -1 )
330                         {
331                                 global_result = global_result.Substring( 0, comma_index );
332                         }
333                 }
334                 
335                 private bool CheckGlobalPatterns( )
336                 {
337                         string filename = Path.GetFileName( current_file_name );
338                         string filename_lower = filename.ToLower( );
339                         
340                         // first check for literals
341                         
342                         for ( int i = 0; i < MimeGenerated.GlobalLiterals.Count; i++ )
343                         {
344                                 string key = MimeGenerated.GlobalLiterals.GetKey( i );
345                                 
346                                 // no regex char
347                                 if ( key.IndexOf( '[' ) == -1 )
348                                 {
349                                         if ( key.Equals( filename ) )
350                                         {
351                                                 global_result = MimeGenerated.GlobalLiterals[ i ];
352                                                 CheckGlobalResult( );
353                                                 return true;
354                                         }
355                                 }
356                                 else // regex it ;)
357                                 {
358                                         if ( Regex.IsMatch( filename, key ) )
359                                         {
360                                                 global_result = MimeGenerated.GlobalLiterals[ i ];
361                                                 CheckGlobalResult( );
362                                                 return true;
363                                         }
364                                 }
365                         }
366                         
367                         if ( filename.IndexOf( '.' ) != -1 )
368                         {
369                                 // check for double extension like .tar.gz
370                                 
371                                 for ( int i = 0; i < MimeGenerated.GlobalPatternsLong.Count; i++ )
372                                 {
373                                         string key = MimeGenerated.GlobalPatternsLong.GetKey( i );
374                                         
375                                         if ( filename.EndsWith( key ) )
376                                         {
377                                                 global_result = MimeGenerated.GlobalPatternsLong[ i ];
378                                                 CheckGlobalResult( );
379                                                 return true;
380                                         }
381                                         else
382                                         {
383                                                 if ( filename_lower.EndsWith( key ) )
384                                                 {
385                                                         global_result = MimeGenerated.GlobalPatternsLong[ i ];
386                                                         CheckGlobalResult( );
387                                                         return true;
388                                                 }
389                                         }
390                                 }
391                                 
392                                 // check normal extensions...
393                                 
394                                 string extension = Path.GetExtension( current_file_name );
395                                 
396                                 if ( extension.Length != 0 )
397                                 {
398                                         global_result = MimeGenerated.GlobalPatternsShort[ extension ];
399                                         
400                                         if ( global_result != null )
401                                         {
402                                                 CheckGlobalResult( );
403                                                 return true;
404                                         }
405                                         
406                                         string extension_lower = extension.ToLower( );
407                                         
408                                         global_result = MimeGenerated.GlobalPatternsShort[ extension_lower ];
409                                         
410                                         if ( global_result != null )
411                                         {
412                                                 CheckGlobalResult( );
413                                                 return true;
414                                         }
415                                 }
416                         }
417                         
418                         // finally check if a prefix or suffix matches
419                         
420                         for ( int i = 0; i < MimeGenerated.GlobalSufPref.Count; i++ )
421                         {
422                                 string key = MimeGenerated.GlobalSufPref.GetKey( i );
423                                 
424                                 if ( key.StartsWith( "*" ) )
425                                 {
426                                         if ( filename.EndsWith( key.Replace( "*", "" ) ) )
427                                         {
428                                                 global_result = MimeGenerated.GlobalSufPref[ i ];
429                                                 CheckGlobalResult( );
430                                                 return true;
431                                         }
432                                 }
433                                 else
434                                 {
435                                         if ( filename.StartsWith( key.Replace( "*", "" ) ) )
436                                         {
437                                                 global_result = MimeGenerated.GlobalSufPref[ i ];
438                                                 CheckGlobalResult( );
439                                                 return true;
440                                         }
441                                 }
442                         }
443                         
444                         return false;
445                 }
446                 
447                 private bool CheckMatchBelow80( )
448                 {
449                         foreach ( Match match in MimeGenerated.MatchesBelow80 )
450                         {
451                                 if ( TestMatch( match ) )
452                                 {
453                                         global_result = match.MimeType;
454                                         
455                                         return true;
456                                 }
457                         }
458                         
459                         return false;
460                 }
461                 
462                 private void CheckForBinaryOrText( )
463                 {
464                         // check the first 32 bytes
465                         
466                         for ( int i = 0; i < 32; i++ )
467                         {
468                                 char c = System.Convert.ToChar( buffer[ i ] );
469                                 
470                                 if ( c != '\t' &&  c != '\n' && c != '\r' && c != 12 && c < 32 )
471                                 {
472                                         global_result = octet_stream;
473                                         return;
474                                 }
475                         }
476                         
477                         global_result = text_plain;
478                 }
479                 
480                 private bool TestMatch( Match match )
481                 {
482                         bool found = false;
483                         
484                         //  using a simple brute force search algorithm
485                         // compare each (masked) value from the buffer with the (masked) value from the match
486                         // TODO:
487                         // - to find some more speed, maybe we should use unsafe code
488                         // - check if buffer[0] and buffer[lastmatchbyte] match ByteValue[0] and ByteValue[lastmatchbyte] in a match
489                         
490                         for ( int offset_counter = 0; offset_counter < match.OffsetLength; offset_counter++ )
491                         {
492                                 if ( match.Mask == null )
493                                 {
494                                         if ( buffer[ match.Offset + offset_counter ] == match.ByteValue[ 0 ] )
495                                         {
496                                                 if ( match.ByteValue.Length == 1 )
497                                                 {
498                                                         if ( match.Matches.Count > 0 )
499                                                         {
500                                                                 foreach ( Match sub_match in match.Matches )
501                                                                 {
502                                                                         if ( TestMatch( sub_match ) )
503                                                                                 return true;
504                                                                 }
505                                                         }
506                                                         else
507                                                                 return true;
508                                                 }
509                                                 
510                                                 for ( int i = 1; i < match.ByteValue.Length; i++ )
511                                                 {
512                                                         if ( buffer[ match.Offset + offset_counter + i ] != match.ByteValue[ i ] )
513                                                         {
514                                                                 found = false;
515                                                                 break;
516                                                         }
517                                                         
518                                                         found = true;
519                                                 }
520                                                 
521                                                 if ( found )
522                                                 {
523                                                         found = false;
524                                                         
525                                                         if ( match.Matches.Count > 0 )
526                                                         {
527                                                                 foreach ( Match sub_match in match.Matches )
528                                                                 {
529                                                                         if ( TestMatch( sub_match ) )
530                                                                                 return true;
531                                                                 }
532                                                         }
533                                                         else
534                                                                 return true;
535                                                 }
536                                         }
537                                 }
538                                 else // with mask ( it's the same as above, only AND the byte with the corresponding mask byte
539                                 {
540                                         if ( ( buffer[ match.Offset + offset_counter ] & match.Mask[ 0 ] )  ==
541                                             ( match.ByteValue[ 0 ] & match.Mask[ 0 ] ) )
542                                         {
543                                                 if ( match.ByteValue.Length == 1 )
544                                                 {
545                                                         if ( match.Matches.Count > 0 )
546                                                         {
547                                                                 foreach ( Match sub_match in match.Matches )
548                                                                 {
549                                                                         if ( TestMatch( sub_match ) )
550                                                                                 return true;
551                                                                 }
552                                                         }
553                                                         else
554                                                                 return true;
555                                                 }
556                                                 
557                                                 for ( int i = 1; i < match.ByteValue.Length; i++ )
558                                                 {
559                                                         if ( ( buffer[ match.Offset + offset_counter + i ]  & match.Mask[ i ] ) !=
560                                                             ( match.ByteValue[ i ] & match.Mask[ i ] ) )
561                                                         {
562                                                                 found = false;
563                                                                 break;
564                                                         }
565                                                         
566                                                         found = true;
567                                                 }
568                                                 
569                                                 if ( found )
570                                                 {
571                                                         found = false;
572                                                         
573                                                         if ( match.Matches.Count > 0 )
574                                                         {
575                                                                 foreach ( Match sub_match in match.Matches )
576                                                                 {
577                                                                         if ( TestMatch( sub_match ) )
578                                                                                 return true;
579                                                                 }
580                                                         }
581                                                         else
582                                                                 return true;
583                                                 }
584                                         }
585                                 }
586                         }
587                         
588                         return found;
589                 }
590                 
591                 private bool OpenFile( )
592                 {
593                         try
594                         {
595                                 System.Array.Clear( buffer, 0, buffer.Length );
596                                 
597                                 file_stream = new FileStream( current_file_name, FileMode.Open, FileAccess.Read ); // FileShare ??? use BinaryReader ???
598                                 
599                                 if ( file_stream.Length == 0 )
600                                 {
601                                         global_result = zero_file;
602                                         is_zero_file = true;
603                                 }
604                                 else
605                                 {
606                                         file_stream.Read( buffer, 0, buffer.Length );
607                                 }
608                                 
609                                 file_stream.Close( );
610                         }
611                         catch (Exception e)
612                         {
613                                 return false;
614                         }
615                         
616                         return true;
617                 }
618                 
619                 private bool CheckForContentTypeString( )
620                 {
621                         int index = search_string.IndexOf( "Content-type:" );
622                         
623                         if ( index != -1 )
624                         {
625                                 index += 13; // Length of string "Content-type:"
626                                 
627                                 global_result = "";
628                                 
629                                 while ( search_string[ index ] != ';' )
630                                 {
631                                         global_result += search_string[ index++ ];
632                                 }
633                                 
634                                 global_result.Trim( );
635                                 
636                                 return true;
637                         }
638                         
639                         // convert string to byte array
640                         byte[] string_byte = ( new ASCIIEncoding( ) ).GetBytes( search_string );
641                         
642                         System.Array.Clear( buffer, 0, buffer.Length );
643                         
644                         if ( string_byte.Length > buffer.Length )
645                         {
646                                 System.Array.Copy( string_byte, buffer, buffer.Length );
647                         }
648                         else
649                         {
650                                 System.Array.Copy( string_byte, buffer, string_byte.Length );
651                         }
652                         
653                         if ( CheckMatch80Plus( ) )
654                                 return true;
655                         
656                         if ( CheckMatchBelow80( ) )
657                                 return true;
658                         
659                         return false;
660                 }
661         }
662         
663         internal class MimeType
664         {
665                 private string comment;
666                 private Hashtable commentsLanguage = new Hashtable();
667                 
668                 public string Comment
669                 {
670                         get {
671                                 return comment;
672                         }
673                         set {
674                                 comment = value;
675                         }
676                 }
677                 
678                 public Hashtable CommentsLanguage
679                 {
680                         get {
681                                 return commentsLanguage;
682                         }
683                         set {
684                                 commentsLanguage = value;
685                         }
686                 }
687                 public string GetCommentForLanguage( string language )
688                 {
689                         return commentsLanguage[ language ] as string;
690                 }
691         }
692         
693         internal enum MatchTypes
694         {
695                 TypeString,
696                 TypeHost16,
697                 TypeHost32,
698                 TypeBig16,
699                 TypeBig32,
700                 TypeLittle16,
701                 TypeLittle32,
702                 TypeByte
703         }
704         
705         internal class Match
706         {
707                 string mimeType;
708                 byte[] byteValue;
709                 byte[] mask = null;
710                 int priority;
711                 int offset;
712                 int offsetLength;
713                 int wordSize = 1;
714                 MatchTypes matchType;
715                 ArrayList matches = new ArrayList();
716                 
717                 public string MimeType
718                 {
719                         set {
720                                 mimeType = value;
721                         }
722                         
723                         get {
724                                 return mimeType;
725                         }
726                 }
727                 
728                 public byte[] ByteValue
729                 {
730                         set {
731                                 byteValue = value;
732                         }
733                         
734                         get {
735                                 return byteValue;
736                         }
737                 }
738                 
739                 public byte[] Mask
740                 {
741                         set {
742                                 mask = value;
743                         }
744                         
745                         get {
746                                 return mask;
747                         }
748                 }
749                 
750                 public int Priority
751                 {
752                         set {
753                                 priority = value;
754                         }
755                         
756                         get {
757                                 return priority;
758                         }
759                 }
760                 
761                 public ArrayList Matches
762                 {
763                         set {
764                                 matches = value;
765                         }
766                         
767                         get {
768                                 return matches;
769                         }
770                 }
771                 
772                 public int Offset
773                 {
774                         set {
775                                 offset = value;
776                         }
777                         
778                         get {
779                                 return offset;
780                         }
781                 }
782                 
783                 public int OffsetLength
784                 {
785                         set {
786                                 offsetLength = value;
787                         }
788                         
789                         get {
790                                 return offsetLength;
791                         }
792                 }
793                 
794                 public int WordSize
795                 {
796                         set {
797                                 wordSize = value;
798                         }
799                         
800                         get {
801                                 return wordSize;
802                         }
803                 }
804                 
805                 public MatchTypes MatchType
806                 {
807                         set {
808                                 matchType = value;
809                         }
810                         
811                         get {
812                                 return matchType;
813                         }
814                 }
815         }
816 }
817