1 // Permission is hereby granted, free of charge, to any person obtaining
2 // a copy of this software and associated documentation files (the
3 // "Software"), to deal in the Software without restriction, including
4 // without limitation the rights to use, copy, modify, merge, publish,
5 // distribute, sublicense, and/or sell copies of the Software, and to
6 // permit persons to whom the Software is furnished to do so, subject to
7 // the following conditions:
9 // The above copyright notice and this permission notice shall be
10 // included in all copies or substantial portions of the Software.
12 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
13 // EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
14 // MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
15 // NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
16 // LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
17 // OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
18 // WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
20 // Copyright (c) 2005 Novell, Inc. (http://www.novell.com)
24 // Alexander Olk xenomorph2@onlinehome.de
29 using System.Collections;
30 using System.Collections.Specialized;
31 using System.Text.RegularExpressions;
36 // string mimeType = Mime.GetMimeTypeForFile( string filename );
38 // string mimeType = Mime.GetMimeTypeForData( byte[] data );
39 // - for string (maybe an email):
40 // string mimeType = Mime.GetMimeTypeForString( string input );
42 // - get alias for mime type:
43 // string alias = Mime.GetMimeAlias( string mimeType );
44 // - get subclass for mime type:
45 // string subtype = Mime.GetMimeSubClass( string mimeType );
46 // - get all available mime types:
47 // string[] available = Mime.AvailableMimeTypes;
51 // - little/big endian stuff for TypeHostXX
52 // - async callback ?!?
53 // - freedesktop org file extensions can have regular expressions also, resolve them too
54 // - sort match collections by magic priority ( higher = first )
55 // - MimeGenerated: use indexes to point to mime type name strings instead of repeating the name string each time (in match, subclass, etc.) !?!
56 // - buffer is currently hard coded to size 8192, value should be determined by MimeGenerated
58 namespace System.Windows.Forms
62 public static Mime Instance = new Mime();
64 private string current_file_name;
65 private string global_result = octet_stream;
67 private FileStream file_stream;
69 private byte[] buffer = new byte[ 8192 ];
71 private const string octet_stream = "application/octet-stream";
72 private const string text_plain = "text/plain";
73 private const string zero_file = "application/x-zerosize";
75 private StringDictionary mime_file_cache = new StringDictionary();
77 private const int mime_file_cache_max_size = 5000;
79 private string search_string;
81 private static object lock_object = new Object();
83 private int platform = (int) Environment.OSVersion.Platform;
85 private bool is_zero_file = false;
89 MimeGenerated.Init( );
91 // Console.WriteLine( "Mime Instance created..." );
94 public static string GetMimeTypeForFile( string filename )
98 Instance.StartByFileName( filename );
101 return Instance.global_result;
105 public static string GetMimeTypeForData( byte[] data )
109 Instance.StartDataLookup( data );
112 return Instance.global_result;
115 public static string GetMimeTypeForString( string input )
119 Instance.StartStringLookup( input );
122 return Instance.global_result;
125 public static string GetMimeAlias( string mimetype )
127 return MimeGenerated.Aliases[ mimetype ];
130 public static string GetMimeSubClass( string mimetype )
132 return MimeGenerated.SubClasses[ mimetype ];
135 public static string[] AvailableMimeTypes
138 string[] result = new string[ MimeGenerated.MimeTypes.Count ];
140 MimeGenerated.MimeTypes.Keys.CopyTo( result, 0 );
146 private void StartByFileName( string filename )
148 if ( mime_file_cache.ContainsKey( filename ) )
150 global_result = mime_file_cache[ filename ];
154 current_file_name = filename;
155 is_zero_file = false;
157 if ( !CheckForInode( ) )
159 global_result = octet_stream;
164 if ( !mime_file_cache.ContainsKey( current_file_name ) )
165 mime_file_cache.Add( current_file_name, global_result );
168 if ( mime_file_cache.Count > mime_file_cache_max_size )
170 IEnumerator enumerator = mime_file_cache.GetEnumerator( );
172 for ( int i = 0; i < mime_file_cache_max_size - 1000; i++ )
174 mime_file_cache.Remove( enumerator.Current.ToString( ) );
179 private void StartDataLookup( byte[] data )
181 global_result = octet_stream;
183 System.Array.Clear( buffer, 0, buffer.Length );
185 if ( data.Length > buffer.Length )
187 System.Array.Copy( data, buffer, buffer.Length );
191 System.Array.Copy( data, buffer, data.Length );
194 if ( CheckMatch80Plus( ) )
197 if ( CheckMatchBelow80( ) )
200 CheckForBinaryOrText( );
203 private void StartStringLookup( string input )
205 global_result = text_plain;
207 search_string = input;
209 if ( CheckForContentTypeString( ) )
213 private bool CheckForInode( )
215 if ( ( platform == 4 ) || ( platform == 128 ) )
219 Mono.Unix.UnixFileInfo ufi = new Mono.Unix.UnixFileInfo( current_file_name );
226 if ( ufi.IsDirectory )
228 global_result = "inode/directory";
232 if ( ufi.IsBlockDevice )
234 global_result = "inode/blockdevice";
240 global_result = "inode/socket";
244 if ( ufi.IsSymbolicLink )
246 global_result = "inode/symlink";
250 if ( ufi.IsCharacterDevice )
252 global_result = "inode/chardevice";
258 global_result = "inode/fifo";
272 private void GoByFileName( )
274 // check if we can open the file
277 // couldn't open the file, check globals only
279 CheckGlobalPatterns( );
286 // check for matches with a priority >= 80
287 if ( CheckMatch80Plus( ) )
291 // check global patterns, aka file extensions...
292 // this should be done for zero size files also,
293 // for example zero size file trash.ccc~ should return
294 // application/x-trash instead of application/x-zerosize
295 if ( CheckGlobalPatterns( ) )
298 // if file size is zero, no other checks are needed
302 // ok, still nothing matches then try matches with a priority < 80
303 if ( CheckMatchBelow80( ) )
306 // wow, still nothing... return application/octet-stream for binary data, or text/plain for textual data
307 CheckForBinaryOrText( );
310 private bool CheckMatch80Plus( )
312 foreach ( Match match in MimeGenerated.Matches80Plus )
314 if ( TestMatch( match ) )
316 global_result = match.MimeType;
325 private void CheckGlobalResult( )
327 int comma_index = global_result.IndexOf( "," );
329 if ( comma_index != -1 )
331 global_result = global_result.Substring( 0, comma_index );
335 private bool CheckGlobalPatterns( )
337 string filename = Path.GetFileName( current_file_name );
338 string filename_lower = filename.ToLower( );
340 // first check for literals
342 for ( int i = 0; i < MimeGenerated.GlobalLiterals.Count; i++ )
344 string key = MimeGenerated.GlobalLiterals.GetKey( i );
347 if ( key.IndexOf( '[' ) == -1 )
349 if ( key.Equals( filename ) )
351 global_result = MimeGenerated.GlobalLiterals[ i ];
352 CheckGlobalResult( );
358 if ( Regex.IsMatch( filename, key ) )
360 global_result = MimeGenerated.GlobalLiterals[ i ];
361 CheckGlobalResult( );
367 if ( filename.IndexOf( '.' ) != -1 )
369 // check for double extension like .tar.gz
371 for ( int i = 0; i < MimeGenerated.GlobalPatternsLong.Count; i++ )
373 string key = MimeGenerated.GlobalPatternsLong.GetKey( i );
375 if ( filename.EndsWith( key ) )
377 global_result = MimeGenerated.GlobalPatternsLong[ i ];
378 CheckGlobalResult( );
383 if ( filename_lower.EndsWith( key ) )
385 global_result = MimeGenerated.GlobalPatternsLong[ i ];
386 CheckGlobalResult( );
392 // check normal extensions...
394 string extension = Path.GetExtension( current_file_name );
396 if ( extension.Length != 0 )
398 global_result = MimeGenerated.GlobalPatternsShort[ extension ];
400 if ( global_result != null )
402 CheckGlobalResult( );
406 string extension_lower = extension.ToLower( );
408 global_result = MimeGenerated.GlobalPatternsShort[ extension_lower ];
410 if ( global_result != null )
412 CheckGlobalResult( );
418 // finally check if a prefix or suffix matches
420 for ( int i = 0; i < MimeGenerated.GlobalSufPref.Count; i++ )
422 string key = MimeGenerated.GlobalSufPref.GetKey( i );
424 if ( key.StartsWith( "*" ) )
426 if ( filename.EndsWith( key.Replace( "*", "" ) ) )
428 global_result = MimeGenerated.GlobalSufPref[ i ];
429 CheckGlobalResult( );
435 if ( filename.StartsWith( key.Replace( "*", "" ) ) )
437 global_result = MimeGenerated.GlobalSufPref[ i ];
438 CheckGlobalResult( );
447 private bool CheckMatchBelow80( )
449 foreach ( Match match in MimeGenerated.MatchesBelow80 )
451 if ( TestMatch( match ) )
453 global_result = match.MimeType;
462 private void CheckForBinaryOrText( )
464 // check the first 32 bytes
466 for ( int i = 0; i < 32; i++ )
468 char c = System.Convert.ToChar( buffer[ i ] );
470 if ( c != '\t' && c != '\n' && c != '\r' && c != 12 && c < 32 )
472 global_result = octet_stream;
477 global_result = text_plain;
480 private bool TestMatch( Match match )
484 // using a simple brute force search algorithm
485 // compare each (masked) value from the buffer with the (masked) value from the match
487 // - to find some more speed, maybe we should use unsafe code
488 // - check if buffer[0] and buffer[lastmatchbyte] match ByteValue[0] and ByteValue[lastmatchbyte] in a match
490 for ( int offset_counter = 0; offset_counter < match.OffsetLength; offset_counter++ )
492 if ( match.Mask == null )
494 if ( buffer[ match.Offset + offset_counter ] == match.ByteValue[ 0 ] )
496 if ( match.ByteValue.Length == 1 )
498 if ( match.Matches.Count > 0 )
500 foreach ( Match sub_match in match.Matches )
502 if ( TestMatch( sub_match ) )
510 for ( int i = 1; i < match.ByteValue.Length; i++ )
512 if ( buffer[ match.Offset + offset_counter + i ] != match.ByteValue[ i ] )
525 if ( match.Matches.Count > 0 )
527 foreach ( Match sub_match in match.Matches )
529 if ( TestMatch( sub_match ) )
538 else // with mask ( it's the same as above, only AND the byte with the corresponding mask byte
540 if ( ( buffer[ match.Offset + offset_counter ] & match.Mask[ 0 ] ) ==
541 ( match.ByteValue[ 0 ] & match.Mask[ 0 ] ) )
543 if ( match.ByteValue.Length == 1 )
545 if ( match.Matches.Count > 0 )
547 foreach ( Match sub_match in match.Matches )
549 if ( TestMatch( sub_match ) )
557 for ( int i = 1; i < match.ByteValue.Length; i++ )
559 if ( ( buffer[ match.Offset + offset_counter + i ] & match.Mask[ i ] ) !=
560 ( match.ByteValue[ i ] & match.Mask[ i ] ) )
573 if ( match.Matches.Count > 0 )
575 foreach ( Match sub_match in match.Matches )
577 if ( TestMatch( sub_match ) )
591 private bool OpenFile( )
595 System.Array.Clear( buffer, 0, buffer.Length );
597 file_stream = new FileStream( current_file_name, FileMode.Open, FileAccess.Read ); // FileShare ??? use BinaryReader ???
599 if ( file_stream.Length == 0 )
601 global_result = zero_file;
606 file_stream.Read( buffer, 0, buffer.Length );
609 file_stream.Close( );
619 private bool CheckForContentTypeString( )
621 int index = search_string.IndexOf( "Content-type:" );
625 index += 13; // Length of string "Content-type:"
629 while ( search_string[ index ] != ';' )
631 global_result += search_string[ index++ ];
634 global_result.Trim( );
639 // convert string to byte array
640 byte[] string_byte = ( new ASCIIEncoding( ) ).GetBytes( search_string );
642 System.Array.Clear( buffer, 0, buffer.Length );
644 if ( string_byte.Length > buffer.Length )
646 System.Array.Copy( string_byte, buffer, buffer.Length );
650 System.Array.Copy( string_byte, buffer, string_byte.Length );
653 if ( CheckMatch80Plus( ) )
656 if ( CheckMatchBelow80( ) )
663 internal class MimeType
665 private string comment;
666 private Hashtable commentsLanguage = new Hashtable();
668 public string Comment
678 public Hashtable CommentsLanguage
681 return commentsLanguage;
684 commentsLanguage = value;
687 public string GetCommentForLanguage( string language )
689 return commentsLanguage[ language ] as string;
693 internal enum MatchTypes
714 MatchTypes matchType;
715 ArrayList matches = new ArrayList();
717 public string MimeType
728 public byte[] ByteValue
761 public ArrayList Matches
783 public int OffsetLength
786 offsetLength = value;
805 public MatchTypes MatchType