1 // Permission is hereby granted, free of charge, to any person obtaining
2 // a copy of this software and associated documentation files (the
3 // "Software"), to deal in the Software without restriction, including
4 // without limitation the rights to use, copy, modify, merge, publish,
5 // distribute, sublicense, and/or sell copies of the Software, and to
6 // permit persons to whom the Software is furnished to do so, subject to
7 // the following conditions:
9 // The above copyright notice and this permission notice shall be
10 // included in all copies or substantial portions of the Software.
12 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
13 // EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
14 // MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
15 // NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
16 // LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
17 // OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
18 // WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
20 // Copyright (c) 2005 Novell, Inc. (http://www.novell.com)
24 // Alexander Olk xenomorph2@onlinehome.de
29 using System.Collections;
30 using System.Collections.Specialized;
31 using System.Text.RegularExpressions;
36 // string mimeType = Mime.GetMimeTypeForFile( string filename );
38 // string mimeType = Mime.GetMimeTypeForData( byte[] data );
39 // - for string (maybe an email):
40 // string mimeType = Mime.GetMimeTypeForString( string input );
42 // - get alias for mime type:
43 // string alias = Mime.GetMimeAlias( string mimeType );
44 // - get subclass for mime type:
45 // string subtype = Mime.GetMimeSubClass( string mimeType );
46 // - get all available mime types:
47 // string[] available = Mime.AvailableMimeTypes;
50 // - optimize even more :)
51 // - async callback ?!?
52 // - freedesktop org file extensions can have regular expressions also, resolve them too
53 // - sort match collections by magic priority ( higher = first ) ?
56 // looking up the mime types 20 times for 2757 files in /usr/lib without caching (mime_file_cache)
57 // old version: Time: 00:00:32.3791220
58 // new version: Time: 00:00:16.9991810
60 namespace System.Windows.Forms
64 public static Mime Instance = new Mime();
66 private string current_file_name;
67 private string global_result = octet_stream;
69 private FileStream file_stream;
71 private byte[] buffer = null;
73 private const string octet_stream = "application/octet-stream";
74 private const string text_plain = "text/plain";
75 private const string zero_file = "application/x-zerosize";
77 private StringDictionary mime_file_cache = new StringDictionary();
79 private const int mime_file_cache_max_size = 3000;
81 private string search_string;
83 private static object lock_object = new Object();
85 // private int platform = (int) Environment.OSVersion.Platform;
87 private bool is_zero_file = false;
89 private int bytes_read = 0;
91 public static NameValueCollection Aliases;
92 public static NameValueCollection SubClasses;
94 public static NameValueCollection GlobalPatternsShort;
95 public static NameValueCollection GlobalPatternsLong;
96 public static NameValueCollection GlobalLiterals;
97 public static NameValueCollection GlobalSufPref;
99 public static ArrayList Matches80Plus;
100 public static ArrayList MatchesBelow80;
104 Aliases = new NameValueCollection (new CaseInsensitiveHashCodeProvider (), new Comparer (System.Globalization.CultureInfo.CurrentUICulture));
105 SubClasses = new NameValueCollection (new CaseInsensitiveHashCodeProvider (), new Comparer (System.Globalization.CultureInfo.CurrentUICulture));
106 GlobalPatternsShort = new NameValueCollection (new CaseInsensitiveHashCodeProvider (), new Comparer (System.Globalization.CultureInfo.CurrentUICulture));
107 GlobalPatternsLong = new NameValueCollection (new CaseInsensitiveHashCodeProvider (), new Comparer (System.Globalization.CultureInfo.CurrentUICulture));
108 GlobalLiterals = new NameValueCollection (new CaseInsensitiveHashCodeProvider (), new Comparer (System.Globalization.CultureInfo.CurrentUICulture));
109 GlobalSufPref = new NameValueCollection (new CaseInsensitiveHashCodeProvider (), new Comparer (System.Globalization.CultureInfo.CurrentUICulture));
110 Matches80Plus = new ArrayList ();
111 MatchesBelow80 = new ArrayList ();
113 FDOMimeConfigReader fmcr = new FDOMimeConfigReader ();
114 int buffer_length = fmcr.Init ();
116 if (buffer_length != -1) {
117 buffer = new byte[ buffer_length ];
121 public static string GetMimeTypeForFile( string filename )
125 Instance.StartByFileName( filename );
128 return Instance.global_result;
132 public static string GetMimeTypeForData( byte[] data )
136 Instance.StartDataLookup( data );
139 return Instance.global_result;
142 public static string GetMimeTypeForString( string input )
146 Instance.StartStringLookup( input );
149 return Instance.global_result;
152 public static string GetMimeAlias( string mimetype )
154 return Aliases[ mimetype ];
157 public static string GetMimeSubClass( string mimetype )
159 return SubClasses[ mimetype ];
162 private void StartByFileName( string filename )
164 if ( mime_file_cache.ContainsKey( filename ) )
166 global_result = mime_file_cache[ filename ];
170 current_file_name = filename;
171 is_zero_file = false;
173 // if ( !CheckForInode( ) )
175 global_result = octet_stream;
180 // if ( !mime_file_cache.ContainsKey( current_file_name ) )
181 mime_file_cache.Add( current_file_name, global_result );
184 if ( mime_file_cache.Count > mime_file_cache_max_size )
186 IEnumerator enumerator = mime_file_cache.GetEnumerator( );
188 for ( int i = 0; i < mime_file_cache_max_size - 1000; i++ )
190 mime_file_cache.Remove( enumerator.Current.ToString( ) );
195 private void StartDataLookup( byte[] data )
197 global_result = octet_stream;
199 System.Array.Clear( buffer, 0, buffer.Length );
201 if ( data.Length > buffer.Length )
203 System.Array.Copy( data, buffer, buffer.Length );
207 System.Array.Copy( data, buffer, data.Length );
210 if ( CheckMatch80Plus( ) )
213 if ( CheckMatchBelow80( ) )
216 CheckForBinaryOrText( );
219 private void StartStringLookup( string input )
221 global_result = text_plain;
223 search_string = input;
225 if ( CheckForContentTypeString( ) )
229 // private bool CheckForInode( )
231 // if ( ( platform == 4 ) || ( platform == 128 ) )
237 // Mono.Unix.UnixFileInfo ufi = new Mono.Unix.UnixFileInfo( current_file_name );
244 // if ( ufi.IsDirectory )
246 // global_result = "inode/directory";
250 // if ( ufi.IsBlockDevice )
252 // global_result = "inode/blockdevice";
256 // if ( ufi.IsSocket )
258 // global_result = "inode/socket";
262 // if ( ufi.IsSymbolicLink )
264 // global_result = "inode/symlink";
268 // if ( ufi.IsCharacterDevice )
270 // global_result = "inode/chardevice";
276 // global_result = "inode/fifo";
279 // } catch( Exception e )
288 // // windows platform
294 private void GoByFileName( )
296 // check if we can open the file
299 // couldn't open the file, check globals only
300 CheckGlobalPatterns( );
307 // check for matches with a priority >= 80
308 if ( CheckMatch80Plus( ) )
312 // check global patterns, aka file extensions...
313 // this should be done for zero size files also,
314 // for example zero size file trash.ccc~ should return
315 // application/x-trash instead of application/x-zerosize
316 if ( CheckGlobalPatterns( ) )
319 // if file size is zero, no other checks are needed
323 // ok, still nothing matches then try matches with a priority < 80
324 if ( CheckMatchBelow80( ) )
327 // wow, still nothing... return application/octet-stream for binary data, or text/plain for textual data
328 CheckForBinaryOrText( );
331 private bool CheckMatch80Plus( )
333 foreach ( Match match in Matches80Plus )
335 if ( TestMatch( match ) )
337 global_result = match.MimeType;
346 // this little helper method gives us a real speed improvement
347 private bool FastEndsWidth(string input, string value)
349 if (value.Length > input.Length)
352 int z = input.Length - 1;
354 for (int i = value.Length - 1; i > -1; i--) {
355 if (value[i] != input[z])
364 private bool FastStartsWith(string input, string value)
366 if (value.Length > input.Length)
369 for (int i = 0; i < value.Length; i++)
370 if (value[i] != input[i])
376 // start always with index = 0
377 private int FastIndexOf(string input, char value)
379 if (input.Length == 0)
382 for (int i = 0; i < input.Length; i++)
383 if (input[i] == value)
389 private int FastIndexOf(string input, string value)
391 if (input.Length == 0)
394 for (int i = 0; i < input.Length - value.Length; i++) {
395 if (input[i] == value[0]) {
397 for (int z = 1; z < value.Length; z++) {
398 if (input[i+z] != value[z])
403 if (counter == value.Length -1) {
412 private void CheckGlobalResult( )
414 int comma_index = FastIndexOf(global_result, ',');
416 if ( comma_index != -1 )
418 global_result = global_result.Substring( 0, comma_index );
422 private bool CheckGlobalPatterns( )
424 string filename = Path.GetFileName( current_file_name );
426 // first check for literals
427 for ( int i = 0; i < GlobalLiterals.Count; i++ )
429 string key = GlobalLiterals.GetKey(i);
432 if ( FastIndexOf(key, '[' ) == -1 )
434 if (FastIndexOf(filename, key) != -1)
436 global_result = GlobalLiterals[i];
437 CheckGlobalResult( );
443 if ( Regex.IsMatch( filename, key ) )
445 global_result = GlobalLiterals[ i ];
446 CheckGlobalResult( );
452 if ( FastIndexOf(filename, '.' ) != -1 )
454 // check for double extension like .tar.gz
455 for ( int i = 0; i < GlobalPatternsLong.Count; i++ )
457 string key = GlobalPatternsLong.GetKey( i );
459 if (FastEndsWidth (filename, key))
461 global_result = GlobalPatternsLong[ i ];
462 CheckGlobalResult( );
467 if ( FastEndsWidth (filename.ToLower( ), key ) )
469 global_result = GlobalPatternsLong[ i ];
470 CheckGlobalResult( );
476 // check normal extensions...
477 string extension = Path.GetExtension( current_file_name );
479 if ( extension.Length != 0 )
481 global_result = GlobalPatternsShort[ extension ];
483 if ( global_result != null )
485 CheckGlobalResult( );
489 global_result = GlobalPatternsShort[ extension.ToLower( ) ];
491 if ( global_result != null )
493 CheckGlobalResult( );
499 // finally check if a prefix or suffix matches
500 for ( int i = 0; i < GlobalSufPref.Count; i++ )
502 string key = GlobalSufPref.GetKey( i );
506 if (FastEndsWidth(filename, key.Replace( "*", "" )))
508 global_result = GlobalSufPref[ i ];
509 CheckGlobalResult( );
515 if ( FastStartsWith(filename, key.Replace( "*", "" ) ) )
517 global_result = GlobalSufPref[ i ];
518 CheckGlobalResult( );
527 private bool CheckMatchBelow80( )
529 foreach ( Match match in MatchesBelow80 )
531 if ( TestMatch( match ) )
533 global_result = match.MimeType;
542 private void CheckForBinaryOrText( )
544 // check the first 32 bytes
546 for ( int i = 0; i < 32; i++ )
548 char c = System.Convert.ToChar( buffer[ i ] );
550 if ( c != '\t' && c != '\n' && c != '\r' && c != 12 && c < 32 )
552 global_result = octet_stream;
557 global_result = text_plain;
560 private bool TestMatch (Match match)
562 foreach (Matchlet matchlet in match.Matchlets)
563 if (TestMatchlet (matchlet))
569 private bool TestMatchlet( Matchlet matchlet )
571 // using a simple brute force search algorithm
572 // compare each (masked) value from the buffer with the (masked) value from the matchlet
574 // no need to check if the offset + the bytevalue length exceed the # bytes read
575 if (matchlet.Offset + matchlet.ByteValue.Length > bytes_read)
578 for ( int offset_counter = 0; offset_counter < matchlet.OffsetLength; offset_counter++ )
580 if (matchlet.Offset + offset_counter + matchlet.ByteValue.Length > bytes_read)
583 if ( matchlet.Mask == null )
585 if ( buffer[ matchlet.Offset + offset_counter ] == matchlet.ByteValue[ 0 ] )
587 if ( matchlet.ByteValue.Length == 1 )
589 if ( matchlet.Matchlets.Count > 0 )
591 foreach ( Matchlet sub_matchlet in matchlet.Matchlets )
593 if ( TestMatchlet( sub_matchlet ) )
602 // check if the last matchlet byte value is the same as the byte value in the buffer...
603 if (matchlet.ByteValue.Length > 2) {
604 if (buffer[ matchlet.Offset + offset_counter + matchlet.ByteValue.Length - 1 ] != matchlet.ByteValue[ matchlet.ByteValue.Length - 1 ])
610 for ( int i = 1; i < matchlet.ByteValue.Length - minus; i++ )
612 if ( buffer[ matchlet.Offset + offset_counter + i ] != matchlet.ByteValue[ i ] )
616 if ( matchlet.Matchlets.Count > 0 )
618 foreach ( Matchlet sub_matchlets in matchlet.Matchlets )
620 if ( TestMatchlet( sub_matchlets ) )
628 else // with mask ( it's the same as above, only AND the byte with the corresponding mask byte
630 if ( ( buffer[ matchlet.Offset + offset_counter ] & matchlet.Mask[ 0 ] ) ==
631 ( matchlet.ByteValue[ 0 ] & matchlet.Mask[ 0 ] ) )
633 if ( matchlet.ByteValue.Length == 1 )
635 if ( matchlet.Matchlets.Count > 0 )
637 foreach ( Matchlet sub_matchlets in matchlet.Matchlets )
639 if ( TestMatchlet( sub_matchlets ) )
648 // check if the last matchlet byte value is the same as the byte value in the buffer...
649 if (matchlet.ByteValue.Length > 2) {
651 if ((buffer[ matchlet.Offset + offset_counter + matchlet.ByteValue.Length - 1 ] & matchlet.Mask[ matchlet.ByteValue.Length - 1 ])
652 != (matchlet.ByteValue[ matchlet.ByteValue.Length - 1 ] & matchlet.Mask[ matchlet.ByteValue.Length - 1 ]))
658 for ( int i = 1; i < matchlet.ByteValue.Length - minus; i++ )
660 if ( ( buffer[ matchlet.Offset + offset_counter + i ] & matchlet.Mask[ i ] ) !=
661 ( matchlet.ByteValue[ i ] & matchlet.Mask[ i ] ) )
665 if ( matchlet.Matchlets.Count > 0 )
667 foreach ( Matchlet sub_matchlets in matchlet.Matchlets )
669 if ( TestMatchlet( sub_matchlets ) )
682 private bool OpenFile( )
686 file_stream = new FileStream( current_file_name, FileMode.Open, FileAccess.Read ); // FileShare ??? use BinaryReader ???
688 if ( file_stream.Length == 0 )
690 global_result = zero_file;
695 bytes_read = file_stream.Read( buffer, 0, buffer.Length );
697 // do not clear the whole buffer everytime; clear only what's needed
698 if (bytes_read < buffer.Length) {
699 System.Array.Clear( buffer, bytes_read, buffer.Length - bytes_read );
703 file_stream.Close( );
713 private bool CheckForContentTypeString( )
715 int index = search_string.IndexOf( "Content-type:" );
719 index += 13; // Length of string "Content-type:"
723 while ( search_string[ index ] != ';' )
725 global_result += search_string[ index++ ];
728 global_result.Trim( );
733 // convert string to byte array
734 byte[] string_byte = ( new ASCIIEncoding( ) ).GetBytes( search_string );
736 System.Array.Clear( buffer, 0, buffer.Length );
738 if ( string_byte.Length > buffer.Length )
740 System.Array.Copy( string_byte, buffer, buffer.Length );
744 System.Array.Copy( string_byte, buffer, string_byte.Length );
747 if ( CheckMatch80Plus( ) )
750 if ( CheckMatchBelow80( ) )
757 internal class FDOMimeConfigReader {
758 bool fdo_mime_available = false;
759 StringCollection shared_mime_paths = new StringCollection ();
762 int max_offset_and_range = 0;
766 CheckFDOMimePaths ();
768 if (!fdo_mime_available)
779 shared_mime_paths = null;
782 return max_offset_and_range;
785 private void CheckFDOMimePaths ()
787 if (Directory.Exists ("/usr/share/mime"))
788 shared_mime_paths.Add ("/usr/share/mime/");
790 if (Directory.Exists ("/usr/local/share/mime"))
791 shared_mime_paths.Add ("/usr/local/share/mime/");
793 if (Directory.Exists (System.Environment.GetFolderPath (Environment.SpecialFolder.Personal) + "/.local/share/mime"))
794 shared_mime_paths.Add (System.Environment.GetFolderPath (Environment.SpecialFolder.Personal) + "/.local/share/mime/");
796 if (shared_mime_paths.Count == 0)
799 fdo_mime_available = true;
802 private void ReadMagicData ()
804 foreach (string path in shared_mime_paths) {
805 if (!File.Exists (path + "/magic"))
809 FileStream fs = File.OpenRead (path + "/magic");
810 br = new BinaryReader (fs);
812 if (CheckMagicHeader ()) {
818 } catch (Exception ) {
823 private void MakeMatches ()
825 Matchlet[] matchlets = new Matchlet [30];
827 while (br.PeekChar () != -1) {
829 string mime_type = ReadPriorityAndMimeType (ref priority);
831 if (mime_type != null) {
832 Match match = new Match ();
833 match.Priority = priority;
834 match.MimeType = mime_type;
840 if (br.PeekChar () != '>') {
841 string indent_string = "";
843 if (br.PeekChar () == '>')
849 indent = Convert.ToInt32 (indent_string);
855 if (br.PeekChar () == '>') {
857 offset = ReadValue ();
860 int value_length = 0;
862 // value length and value
863 if (br.PeekChar () == '=') {
866 // read 2 bytes value length (always big endian)
867 byte first = br.ReadByte ();
868 byte second = br.ReadByte ();
870 value_length = first * 256 + second;
872 value = br.ReadBytes (value_length);
878 if (br.PeekChar () == '&') {
881 mask = br.ReadBytes (value_length);
886 if (br.PeekChar () == '~') {
891 word_size = Convert.ToInt32 (c - 0x30);
893 // data is stored in big endian format.
894 if (word_size > 1 && System.BitConverter.IsLittleEndian) {
895 //convert the value and, if available, the mask data to little endian
896 if (word_size == 2) {
898 for (int i = 0; i < value.Length; i += 2) {
899 byte one = value [i];
900 byte two = value [i + 1];
906 for (int i = 0; i < mask.Length; i += 2) {
908 byte two = mask [i + 1];
913 } else if (word_size == 4) {
915 for (int i = 0; i < value.Length; i += 4) {
916 byte one = value [i];
917 byte two = value [i + 1];
918 byte three = value [i + 2];
919 byte four = value [i + 3];
921 value [i + 1] = three;
927 for (int i = 0; i < mask.Length; i += 4) {
929 byte two = mask [i + 1];
930 byte three = mask [i + 2];
931 byte four = mask [i + 3];
933 mask [i + 1] = three;
944 int range_length = 0;
945 if (br.PeekChar () == '+') {
947 range_length = ReadValue ();
953 // create the matchlet
954 matchlets [indent] = new Matchlet ();
955 matchlets [indent].Offset = offset;
956 matchlets [indent].OffsetLength = range_length;
957 matchlets [indent].ByteValue = value;
959 matchlets [indent].Mask = mask;
962 match.Matchlets.Add (matchlets [indent]);
964 matchlets [indent - 1].Matchlets.Add (matchlets [indent]);
967 if (max_offset_and_range < matchlets [indent].Offset + matchlets [indent].OffsetLength + matchlets [indent].ByteValue.Length + 1)
968 max_offset_and_range = matchlets [indent].Offset + matchlets [indent].OffsetLength + matchlets [indent].ByteValue.Length + 1;
970 // if '[' move to next mime type
971 if (br.PeekChar () == '[')
976 Mime.MatchesBelow80.Add (match);
978 Mime.Matches80Plus.Add (match);
983 private void ReadGlobsData ()
985 foreach (string path in shared_mime_paths) {
986 if (!File.Exists (path + "/globs"))
990 StreamReader sr = new StreamReader (path + "/globs");
992 while (sr.Peek () != -1) {
993 string line = sr.ReadLine ().Trim ();
995 if (line.StartsWith ("#"))
998 string[] split = line.Split (new char [] {':'});
1000 if (split [1].IndexOf ('*') > -1 && split [1].IndexOf ('.') == -1) {
1001 Mime.GlobalSufPref.Add (split [1], split [0]);
1002 } else if (split [1]. IndexOf ('*') == -1) {
1003 Mime.GlobalLiterals.Add (split [1], split [0]);
1005 string[] split2 = split [1].Split (new char [] {'.'});
1007 if (split2.Length > 2) {
1008 // more than one dot
1009 Mime.GlobalPatternsLong.Add (split [1].Remove(0, 1), split [0]);
1012 Mime.GlobalPatternsShort.Add (split [1].Remove(0, 1), split [0]);
1018 } catch (Exception ) {
1023 private void ReadSubclasses ()
1025 foreach (string path in shared_mime_paths) {
1026 if (!File.Exists (path + "/subclasses"))
1030 StreamReader sr = new StreamReader (path + "/subclasses");
1032 while (sr.Peek () != -1) {
1033 string line = sr.ReadLine ().Trim ();
1035 if (line.StartsWith ("#"))
1038 string[] split = line.Split (new char [] {' '});
1040 Mime.SubClasses.Add (split [0], split [1]);
1044 } catch (Exception ) {
1049 private void ReadAliases ()
1051 foreach (string path in shared_mime_paths) {
1052 if (!File.Exists (path + "/aliases"))
1056 StreamReader sr = new StreamReader (path + "/aliases");
1058 while (sr.Peek () != -1) {
1059 string line = sr.ReadLine ().Trim ();
1061 if (line.StartsWith ("#"))
1064 string[] split = line.Split (new char [] {' '});
1066 Mime.Aliases.Add (split [0], split [1]);
1070 } catch (Exception ) {
1075 private int ReadValue ()
1077 string result_string = "";
1082 if (br.PeekChar () == '=' || br.PeekChar () == '\n')
1089 result = Convert.ToInt32 (result_string);
1094 private string ReadPriorityAndMimeType (ref int priority)
1096 if (br.ReadChar () == '[') {
1097 string priority_string = "";
1099 char c = br.ReadChar ();
1102 priority_string += c;
1105 priority = System.Convert.ToInt32 (priority_string);
1107 string mime_type_result = "";
1109 char c = br.ReadChar ();
1113 mime_type_result += c;
1116 if (br.ReadChar () == '\n')
1117 return mime_type_result;
1122 private bool CheckMagicHeader ()
1124 char[] chars = br.ReadChars (10);
1125 string magic_header = new String (chars);
1127 if (magic_header != "MIME-Magic")
1130 if (br.ReadByte () != 0)
1132 if (br.ReadChar () != '\n')
1139 internal class Match {
1142 ArrayList matchlets = new ArrayList();
1144 public string MimeType {
1154 public int Priority {
1164 public ArrayList Matchlets {
1171 internal class Matchlet {
1179 ArrayList matchlets = new ArrayList ();
1181 public byte[] ByteValue {
1191 public byte[] Mask {
1211 public int OffsetLength {
1213 offsetLength = value;
1217 return offsetLength;
1221 public int WordSize {
1231 public ArrayList Matchlets {