1 // Permission is hereby granted, free of charge, to any person obtaining
2 // a copy of this software and associated documentation files (the
3 // "Software"), to deal in the Software without restriction, including
4 // without limitation the rights to use, copy, modify, merge, publish,
5 // distribute, sublicense, and/or sell copies of the Software, and to
6 // permit persons to whom the Software is furnished to do so, subject to
7 // the following conditions:
9 // The above copyright notice and this permission notice shall be
10 // included in all copies or substantial portions of the Software.
12 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
13 // EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
14 // MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
15 // NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
16 // LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
17 // OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
18 // WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
20 // Copyright (c) 2005 Novell, Inc. (http://www.novell.com)
24 // Alexander Olk xenomorph2@onlinehome.de
29 using System.Collections;
30 using System.Collections.Specialized;
31 using System.Text.RegularExpressions;
36 // string mimeType = Mime.GetMimeTypeForFile( string filename );
38 // string mimeType = Mime.GetMimeTypeForData( byte[] data );
39 // - for string (maybe an email):
40 // string mimeType = Mime.GetMimeTypeForString( string input );
42 // - get alias for mime type:
43 // string alias = Mime.GetMimeAlias( string mimeType );
44 // - get subclass for mime type:
45 // string subtype = Mime.GetMimeSubClass( string mimeType );
46 // - get all available mime types:
47 // string[] available = Mime.AvailableMimeTypes;
50 // - optimize even more :)
51 // - async callback ?!?
52 // - freedesktop org file extensions can have regular expressions also, resolve them too
53 // - sort match collections by magic priority ( higher = first ) ?
56 // looking up the mime types 20 times for 2757 files in /usr/lib without caching (mime_file_cache)
57 // old version: Time: 00:00:32.3791220
58 // new version: Time: 00:00:16.9991810
60 namespace System.Windows.Forms
64 public static Mime Instance = new Mime();
66 private string current_file_name;
67 private string global_result = octet_stream;
69 private FileStream file_stream;
71 private byte[] buffer = null;
73 private const string octet_stream = "application/octet-stream";
74 private const string text_plain = "text/plain";
75 private const string zero_file = "application/x-zerosize";
77 private StringDictionary mime_file_cache = new StringDictionary();
79 private const int mime_file_cache_max_size = 3000;
81 private string search_string;
83 private static object lock_object = new Object();
85 // private int platform = (int) Environment.OSVersion.Platform;
87 private bool is_zero_file = false;
89 private int bytes_read = 0;
91 public static NameValueCollection Aliases;
92 public static NameValueCollection SubClasses;
94 public static NameValueCollection GlobalPatternsShort;
95 public static NameValueCollection GlobalPatternsLong;
96 public static NameValueCollection GlobalLiterals;
97 public static NameValueCollection GlobalSufPref;
99 public static ArrayList Matches80Plus;
100 public static ArrayList MatchesBelow80;
105 Aliases = new NameValueCollection (StringComparer.CurrentCultureIgnoreCase);
106 SubClasses = new NameValueCollection (StringComparer.CurrentCultureIgnoreCase);
107 GlobalPatternsShort = new NameValueCollection (StringComparer.CurrentCultureIgnoreCase);
108 GlobalPatternsLong = new NameValueCollection (StringComparer.CurrentCultureIgnoreCase);
109 GlobalLiterals = new NameValueCollection (StringComparer.CurrentCultureIgnoreCase);
110 GlobalSufPref = new NameValueCollection (StringComparer.CurrentCultureIgnoreCase);
112 Aliases = new NameValueCollection (new CaseInsensitiveHashCodeProvider (), new Comparer (System.Globalization.CultureInfo.CurrentUICulture));
113 SubClasses = new NameValueCollection (new CaseInsensitiveHashCodeProvider (), new Comparer (System.Globalization.CultureInfo.CurrentUICulture));
114 GlobalPatternsShort = new NameValueCollection (new CaseInsensitiveHashCodeProvider (), new Comparer (System.Globalization.CultureInfo.CurrentUICulture));
115 GlobalPatternsLong = new NameValueCollection (new CaseInsensitiveHashCodeProvider (), new Comparer (System.Globalization.CultureInfo.CurrentUICulture));
116 GlobalLiterals = new NameValueCollection (new CaseInsensitiveHashCodeProvider (), new Comparer (System.Globalization.CultureInfo.CurrentUICulture));
117 GlobalSufPref = new NameValueCollection (new CaseInsensitiveHashCodeProvider (), new Comparer (System.Globalization.CultureInfo.CurrentUICulture));
120 Matches80Plus = new ArrayList ();
121 MatchesBelow80 = new ArrayList ();
123 FDOMimeConfigReader fmcr = new FDOMimeConfigReader ();
124 int buffer_length = fmcr.Init ();
126 if (buffer_length != -1) {
127 buffer = new byte[ buffer_length ];
131 public static string GetMimeTypeForFile( string filename )
135 Instance.StartByFileName( filename );
138 return Instance.global_result;
142 public static string GetMimeTypeForData( byte[] data )
146 Instance.StartDataLookup( data );
149 return Instance.global_result;
152 public static string GetMimeTypeForString( string input )
156 Instance.StartStringLookup( input );
159 return Instance.global_result;
162 public static string GetMimeAlias( string mimetype )
164 return Aliases[ mimetype ];
167 public static string GetMimeSubClass( string mimetype )
169 return SubClasses[ mimetype ];
172 private void StartByFileName( string filename )
174 if ( mime_file_cache.ContainsKey( filename ) )
176 global_result = mime_file_cache[ filename ];
180 current_file_name = filename;
181 is_zero_file = false;
183 // if ( !CheckForInode( ) )
185 global_result = octet_stream;
190 mime_file_cache.Add( current_file_name, global_result );
192 if (mime_file_cache.Count > mime_file_cache_max_size) {
193 IEnumerator enumerator = mime_file_cache.GetEnumerator ();
195 int counter = mime_file_cache_max_size - 1000;
197 while (enumerator.MoveNext ()) {
198 mime_file_cache.Remove (enumerator.Current.ToString ());
207 private void StartDataLookup( byte[] data )
209 global_result = octet_stream;
211 System.Array.Clear( buffer, 0, buffer.Length );
213 if ( data.Length > buffer.Length )
215 System.Array.Copy( data, buffer, buffer.Length );
219 System.Array.Copy( data, buffer, data.Length );
222 if ( CheckMatch80Plus( ) )
225 if ( CheckMatchBelow80( ) )
228 CheckForBinaryOrText( );
231 private void StartStringLookup( string input )
233 global_result = text_plain;
235 search_string = input;
237 if ( CheckForContentTypeString( ) )
241 // private bool CheckForInode( )
243 // if ( ( platform == 4 ) || ( platform == 128 ) )
249 // Mono.Unix.UnixFileInfo ufi = new Mono.Unix.UnixFileInfo( current_file_name );
256 // if ( ufi.IsDirectory )
258 // global_result = "inode/directory";
262 // if ( ufi.IsBlockDevice )
264 // global_result = "inode/blockdevice";
268 // if ( ufi.IsSocket )
270 // global_result = "inode/socket";
274 // if ( ufi.IsSymbolicLink )
276 // global_result = "inode/symlink";
280 // if ( ufi.IsCharacterDevice )
282 // global_result = "inode/chardevice";
288 // global_result = "inode/fifo";
291 // } catch( Exception e )
300 // // windows platform
306 private void GoByFileName( )
308 // check if we can open the file
311 // couldn't open the file, check globals only
312 CheckGlobalPatterns( );
319 // check for matches with a priority >= 80
320 if ( CheckMatch80Plus( ) )
324 // check global patterns, aka file extensions...
325 // this should be done for zero size files also,
326 // for example zero size file trash.ccc~ should return
327 // application/x-trash instead of application/x-zerosize
328 if ( CheckGlobalPatterns( ) )
331 // if file size is zero, no other checks are needed
335 // ok, still nothing matches then try matches with a priority < 80
336 if ( CheckMatchBelow80( ) )
339 // wow, still nothing... return application/octet-stream for binary data, or text/plain for textual data
340 CheckForBinaryOrText( );
343 private bool CheckMatch80Plus( )
345 foreach ( Match match in Matches80Plus )
347 if ( TestMatch( match ) )
349 global_result = match.MimeType;
358 // this little helper method gives us a real speed improvement
359 private bool FastEndsWidth(string input, string value)
361 if (value.Length > input.Length)
364 int z = input.Length - 1;
366 for (int i = value.Length - 1; i > -1; i--) {
367 if (value[i] != input[z])
376 private bool FastStartsWith(string input, string value)
378 if (value.Length > input.Length)
381 for (int i = 0; i < value.Length; i++)
382 if (value[i] != input[i])
388 // start always with index = 0
389 private int FastIndexOf(string input, char value)
391 if (input.Length == 0)
394 for (int i = 0; i < input.Length; i++)
395 if (input[i] == value)
401 private int FastIndexOf(string input, string value)
403 if (input.Length == 0)
406 for (int i = 0; i < input.Length - value.Length; i++) {
407 if (input[i] == value[0]) {
409 for (int z = 1; z < value.Length; z++) {
410 if (input[i+z] != value[z])
415 if (counter == value.Length -1) {
424 private void CheckGlobalResult( )
426 int comma_index = FastIndexOf(global_result, ',');
428 if ( comma_index != -1 )
430 global_result = global_result.Substring( 0, comma_index );
434 private bool CheckGlobalPatterns( )
436 string filename = Path.GetFileName( current_file_name );
438 // first check for literals
439 for ( int i = 0; i < GlobalLiterals.Count; i++ )
441 string key = GlobalLiterals.GetKey(i);
444 if ( FastIndexOf(key, '[' ) == -1 )
446 if (FastIndexOf(filename, key) != -1)
448 global_result = GlobalLiterals[i];
449 CheckGlobalResult( );
455 if ( Regex.IsMatch( filename, key ) )
457 global_result = GlobalLiterals[ i ];
458 CheckGlobalResult( );
464 if ( FastIndexOf(filename, '.' ) != -1 )
466 // check for double extension like .tar.gz
467 for ( int i = 0; i < GlobalPatternsLong.Count; i++ )
469 string key = GlobalPatternsLong.GetKey( i );
471 if (FastEndsWidth (filename, key))
473 global_result = GlobalPatternsLong[ i ];
474 CheckGlobalResult( );
479 if ( FastEndsWidth (filename.ToLower( ), key ) )
481 global_result = GlobalPatternsLong[ i ];
482 CheckGlobalResult( );
488 // check normal extensions...
489 string extension = Path.GetExtension( current_file_name );
491 if ( extension.Length != 0 )
493 string global_result_tmp = GlobalPatternsShort[ extension ];
495 if ( global_result_tmp != null )
497 global_result = global_result_tmp;
498 CheckGlobalResult( );
502 global_result_tmp = GlobalPatternsShort[ extension.ToLower( ) ];
504 if ( global_result_tmp != null )
506 global_result = global_result_tmp;
507 CheckGlobalResult( );
513 // finally check if a prefix or suffix matches
514 for ( int i = 0; i < GlobalSufPref.Count; i++ )
516 string key = GlobalSufPref.GetKey( i );
520 if (FastEndsWidth(filename, key.Replace( "*", "" )))
522 global_result = GlobalSufPref[ i ];
523 CheckGlobalResult( );
529 if ( FastStartsWith(filename, key.Replace( "*", "" ) ) )
531 global_result = GlobalSufPref[ i ];
532 CheckGlobalResult( );
541 private bool CheckMatchBelow80( )
543 foreach ( Match match in MatchesBelow80 )
545 if ( TestMatch( match ) )
547 global_result = match.MimeType;
556 private void CheckForBinaryOrText( )
558 // check the first 32 bytes
560 for ( int i = 0; i < 32; i++ )
562 char c = System.Convert.ToChar( buffer[ i ] );
564 if ( c != '\t' && c != '\n' && c != '\r' && c != 12 && c < 32 )
566 global_result = octet_stream;
571 global_result = text_plain;
574 private bool TestMatch (Match match)
576 foreach (Matchlet matchlet in match.Matchlets)
577 if (TestMatchlet (matchlet))
583 private bool TestMatchlet( Matchlet matchlet )
585 // using a simple brute force search algorithm
586 // compare each (masked) value from the buffer with the (masked) value from the matchlet
588 // no need to check if the offset + the bytevalue length exceed the # bytes read
589 if (matchlet.Offset + matchlet.ByteValue.Length > bytes_read)
592 for ( int offset_counter = 0; offset_counter < matchlet.OffsetLength; offset_counter++ )
594 if (matchlet.Offset + offset_counter + matchlet.ByteValue.Length > bytes_read)
597 if ( matchlet.Mask == null )
599 if ( buffer[ matchlet.Offset + offset_counter ] == matchlet.ByteValue[ 0 ] )
601 if ( matchlet.ByteValue.Length == 1 )
603 if ( matchlet.Matchlets.Count > 0 )
605 foreach ( Matchlet sub_matchlet in matchlet.Matchlets )
607 if ( TestMatchlet( sub_matchlet ) )
616 // check if the last matchlet byte value is the same as the byte value in the buffer...
617 if (matchlet.ByteValue.Length > 2) {
618 if (buffer[ matchlet.Offset + offset_counter + matchlet.ByteValue.Length - 1 ] != matchlet.ByteValue[ matchlet.ByteValue.Length - 1 ])
624 for ( int i = 1; i < matchlet.ByteValue.Length - minus; i++ )
626 if ( buffer[ matchlet.Offset + offset_counter + i ] != matchlet.ByteValue[ i ] )
630 if ( matchlet.Matchlets.Count > 0 )
632 foreach ( Matchlet sub_matchlets in matchlet.Matchlets )
634 if ( TestMatchlet( sub_matchlets ) )
642 else // with mask ( it's the same as above, only AND the byte with the corresponding mask byte
644 if ( ( buffer[ matchlet.Offset + offset_counter ] & matchlet.Mask[ 0 ] ) ==
645 ( matchlet.ByteValue[ 0 ] & matchlet.Mask[ 0 ] ) )
647 if ( matchlet.ByteValue.Length == 1 )
649 if ( matchlet.Matchlets.Count > 0 )
651 foreach ( Matchlet sub_matchlets in matchlet.Matchlets )
653 if ( TestMatchlet( sub_matchlets ) )
662 // check if the last matchlet byte value is the same as the byte value in the buffer...
663 if (matchlet.ByteValue.Length > 2) {
665 if ((buffer[ matchlet.Offset + offset_counter + matchlet.ByteValue.Length - 1 ] & matchlet.Mask[ matchlet.ByteValue.Length - 1 ])
666 != (matchlet.ByteValue[ matchlet.ByteValue.Length - 1 ] & matchlet.Mask[ matchlet.ByteValue.Length - 1 ]))
672 for ( int i = 1; i < matchlet.ByteValue.Length - minus; i++ )
674 if ( ( buffer[ matchlet.Offset + offset_counter + i ] & matchlet.Mask[ i ] ) !=
675 ( matchlet.ByteValue[ i ] & matchlet.Mask[ i ] ) )
679 if ( matchlet.Matchlets.Count > 0 )
681 foreach ( Matchlet sub_matchlets in matchlet.Matchlets )
683 if ( TestMatchlet( sub_matchlets ) )
696 private bool OpenFile( )
700 file_stream = new FileStream( current_file_name, FileMode.Open, FileAccess.Read ); // FileShare ??? use BinaryReader ???
702 if ( file_stream.Length == 0 )
704 global_result = zero_file;
709 bytes_read = file_stream.Read( buffer, 0, buffer.Length );
711 // do not clear the whole buffer everytime; clear only what's needed
712 if (bytes_read < buffer.Length) {
713 System.Array.Clear( buffer, bytes_read, buffer.Length - bytes_read );
717 file_stream.Close( );
727 private bool CheckForContentTypeString( )
729 int index = search_string.IndexOf( "Content-type:" );
733 index += 13; // Length of string "Content-type:"
737 while ( search_string[ index ] != ';' )
739 global_result += search_string[ index++ ];
742 global_result.Trim( );
747 // convert string to byte array
748 byte[] string_byte = ( new ASCIIEncoding( ) ).GetBytes( search_string );
750 System.Array.Clear( buffer, 0, buffer.Length );
752 if ( string_byte.Length > buffer.Length )
754 System.Array.Copy( string_byte, buffer, buffer.Length );
758 System.Array.Copy( string_byte, buffer, string_byte.Length );
761 if ( CheckMatch80Plus( ) )
764 if ( CheckMatchBelow80( ) )
771 internal class FDOMimeConfigReader {
772 bool fdo_mime_available = false;
773 StringCollection shared_mime_paths = new StringCollection ();
776 int max_offset_and_range = 0;
780 CheckFDOMimePaths ();
782 if (!fdo_mime_available)
793 shared_mime_paths = null;
796 return max_offset_and_range;
799 private void CheckFDOMimePaths ()
801 if (Directory.Exists ("/usr/share/mime"))
802 shared_mime_paths.Add ("/usr/share/mime/");
804 if (Directory.Exists ("/usr/local/share/mime"))
805 shared_mime_paths.Add ("/usr/local/share/mime/");
807 if (Directory.Exists (System.Environment.GetFolderPath (Environment.SpecialFolder.Personal) + "/.local/share/mime"))
808 shared_mime_paths.Add (System.Environment.GetFolderPath (Environment.SpecialFolder.Personal) + "/.local/share/mime/");
810 if (shared_mime_paths.Count == 0)
813 fdo_mime_available = true;
816 private void ReadMagicData ()
818 foreach (string path in shared_mime_paths) {
819 if (!File.Exists (path + "/magic"))
823 FileStream fs = File.OpenRead (path + "/magic");
824 br = new BinaryReader (fs);
826 if (CheckMagicHeader ()) {
832 } catch (Exception ) {
837 private void MakeMatches ()
839 Matchlet[] matchlets = new Matchlet [30];
841 while (br.PeekChar () != -1) {
843 string mime_type = ReadPriorityAndMimeType (ref priority);
845 if (mime_type != null) {
846 Match match = new Match ();
847 match.Priority = priority;
848 match.MimeType = mime_type;
854 if (br.PeekChar () != '>') {
855 string indent_string = "";
857 if (br.PeekChar () == '>')
863 indent = Convert.ToInt32 (indent_string);
869 if (br.PeekChar () == '>') {
871 offset = ReadValue ();
874 int value_length = 0;
876 // value length and value
877 if (br.PeekChar () == '=') {
880 // read 2 bytes value length (always big endian)
881 byte first = br.ReadByte ();
882 byte second = br.ReadByte ();
884 value_length = first * 256 + second;
886 value = br.ReadBytes (value_length);
892 if (br.PeekChar () == '&') {
895 mask = br.ReadBytes (value_length);
900 if (br.PeekChar () == '~') {
905 word_size = Convert.ToInt32 (c - 0x30);
907 // data is stored in big endian format.
908 if (word_size > 1 && System.BitConverter.IsLittleEndian) {
909 //convert the value and, if available, the mask data to little endian
910 if (word_size == 2) {
912 for (int i = 0; i < value.Length; i += 2) {
913 byte one = value [i];
914 byte two = value [i + 1];
920 for (int i = 0; i < mask.Length; i += 2) {
922 byte two = mask [i + 1];
927 } else if (word_size == 4) {
929 for (int i = 0; i < value.Length; i += 4) {
930 byte one = value [i];
931 byte two = value [i + 1];
932 byte three = value [i + 2];
933 byte four = value [i + 3];
935 value [i + 1] = three;
941 for (int i = 0; i < mask.Length; i += 4) {
943 byte two = mask [i + 1];
944 byte three = mask [i + 2];
945 byte four = mask [i + 3];
947 mask [i + 1] = three;
958 int range_length = 1;
959 if (br.PeekChar () == '+') {
961 range_length = ReadValue ();
967 // create the matchlet
968 matchlets [indent] = new Matchlet ();
969 matchlets [indent].Offset = offset;
970 matchlets [indent].OffsetLength = range_length;
971 matchlets [indent].ByteValue = value;
973 matchlets [indent].Mask = mask;
976 match.Matchlets.Add (matchlets [indent]);
978 matchlets [indent - 1].Matchlets.Add (matchlets [indent]);
981 if (max_offset_and_range < matchlets [indent].Offset + matchlets [indent].OffsetLength + matchlets [indent].ByteValue.Length + 1)
982 max_offset_and_range = matchlets [indent].Offset + matchlets [indent].OffsetLength + matchlets [indent].ByteValue.Length + 1;
984 // if '[' move to next mime type
985 if (br.PeekChar () == '[')
990 Mime.MatchesBelow80.Add (match);
992 Mime.Matches80Plus.Add (match);
997 private void ReadGlobsData ()
999 foreach (string path in shared_mime_paths) {
1000 if (!File.Exists (path + "/globs"))
1004 StreamReader sr = new StreamReader (path + "/globs");
1006 while (sr.Peek () != -1) {
1007 string line = sr.ReadLine ().Trim ();
1009 if (line.StartsWith ("#"))
1012 string[] split = line.Split (new char [] {':'});
1014 if (split [1].IndexOf ('*') > -1 && split [1].IndexOf ('.') == -1) {
1015 Mime.GlobalSufPref.Add (split [1], split [0]);
1016 } else if (split [1]. IndexOf ('*') == -1) {
1017 Mime.GlobalLiterals.Add (split [1], split [0]);
1019 string[] split2 = split [1].Split (new char [] {'.'});
1021 if (split2.Length > 2) {
1022 // more than one dot
1023 Mime.GlobalPatternsLong.Add (split [1].Remove(0, 1), split [0]);
1026 Mime.GlobalPatternsShort.Add (split [1].Remove(0, 1), split [0]);
1032 } catch (Exception ) {
1037 private void ReadSubclasses ()
1039 foreach (string path in shared_mime_paths) {
1040 if (!File.Exists (path + "/subclasses"))
1044 StreamReader sr = new StreamReader (path + "/subclasses");
1046 while (sr.Peek () != -1) {
1047 string line = sr.ReadLine ().Trim ();
1049 if (line.StartsWith ("#"))
1052 string[] split = line.Split (new char [] {' '});
1054 Mime.SubClasses.Add (split [0], split [1]);
1058 } catch (Exception ) {
1063 private void ReadAliases ()
1065 foreach (string path in shared_mime_paths) {
1066 if (!File.Exists (path + "/aliases"))
1070 StreamReader sr = new StreamReader (path + "/aliases");
1072 while (sr.Peek () != -1) {
1073 string line = sr.ReadLine ().Trim ();
1075 if (line.StartsWith ("#"))
1078 string[] split = line.Split (new char [] {' '});
1080 Mime.Aliases.Add (split [0], split [1]);
1084 } catch (Exception ) {
1089 private int ReadValue ()
1091 string result_string = "";
1096 if (br.PeekChar () == '=' || br.PeekChar () == '\n')
1103 result = Convert.ToInt32 (result_string);
1108 private string ReadPriorityAndMimeType (ref int priority)
1110 if (br.ReadChar () == '[') {
1111 string priority_string = "";
1113 char c = br.ReadChar ();
1116 priority_string += c;
1119 priority = System.Convert.ToInt32 (priority_string);
1121 string mime_type_result = "";
1123 char c = br.ReadChar ();
1127 mime_type_result += c;
1130 if (br.ReadChar () == '\n')
1131 return mime_type_result;
1136 private bool CheckMagicHeader ()
1138 char[] chars = br.ReadChars (10);
1139 string magic_header = new String (chars);
1141 if (magic_header != "MIME-Magic")
1144 if (br.ReadByte () != 0)
1146 if (br.ReadChar () != '\n')
1153 internal class Match {
1156 ArrayList matchlets = new ArrayList();
1158 public string MimeType {
1168 public int Priority {
1178 public ArrayList Matchlets {
1185 internal class Matchlet {
1193 ArrayList matchlets = new ArrayList ();
1195 public byte[] ByteValue {
1205 public byte[] Mask {
1225 public int OffsetLength {
1227 offsetLength = value;
1231 return offsetLength;
1235 public int WordSize {
1245 public ArrayList Matchlets {