1 // Permission is hereby granted, free of charge, to any person obtaining
2 // a copy of this software and associated documentation files (the
3 // "Software"), to deal in the Software without restriction, including
4 // without limitation the rights to use, copy, modify, merge, publish,
5 // distribute, sublicense, and/or sell copies of the Software, and to
6 // permit persons to whom the Software is furnished to do so, subject to
7 // the following conditions:
9 // The above copyright notice and this permission notice shall be
10 // included in all copies or substantial portions of the Software.
12 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
13 // EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
14 // MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
15 // NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
16 // LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
17 // OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
18 // WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
20 // Copyright (c) 2006 Alexander Olk
24 // Alexander Olk alex.olk@googlemail.com
29 using System.Collections;
30 using System.Collections.Specialized;
31 using System.Text.RegularExpressions;
36 // string mimeType = Mime.GetMimeTypeForFile( string filename );
38 // string mimeType = Mime.GetMimeTypeForData( byte[] data );
39 // - for string (maybe an email):
40 // string mimeType = Mime.GetMimeTypeForString( string input );
42 // - get alias for mime type:
43 // string alias = Mime.GetMimeAlias( string mimeType );
44 // - get subclass for mime type:
45 // string subtype = Mime.GetMimeSubClass( string mimeType );
46 // - get all available mime types:
47 // string[] available = Mime.AvailableMimeTypes;
50 // - optimize even more :)
51 // - async callback ?!?
52 // - freedesktop org file extensions can have regular expressions also, resolve them too
53 // - sort match collections by magic priority ( higher = first ) ?
56 // looking up the mime types 20 times for 2757 files in /usr/lib without caching (mime_file_cache)
57 // old version: Time: 00:00:32.3791220
58 // new version: Time: 00:00:16.9991810
60 namespace System.Windows.Forms
64 public static Mime Instance = new Mime();
66 private string current_file_name;
67 private string global_result = octet_stream;
69 private FileStream file_stream;
71 private byte[] buffer = null;
73 private const string octet_stream = "application/octet-stream";
74 private const string text_plain = "text/plain";
75 private const string zero_file = "application/x-zerosize";
77 private StringDictionary mime_file_cache = new StringDictionary();
79 private const int mime_file_cache_max_size = 3000;
81 private string search_string;
83 private static object lock_object = new Object();
85 private bool is_zero_file = false;
87 private int bytes_read = 0;
89 private bool mime_available = false;
91 public static NameValueCollection Aliases;
92 public static NameValueCollection SubClasses;
94 public static NameValueCollection GlobalPatternsShort;
95 public static NameValueCollection GlobalPatternsLong;
96 public static NameValueCollection GlobalLiterals;
97 public static NameValueCollection GlobalSufPref;
99 public static ArrayList Matches80Plus;
100 public static ArrayList MatchesBelow80;
105 Aliases = new NameValueCollection (StringComparer.CurrentCultureIgnoreCase);
106 SubClasses = new NameValueCollection (StringComparer.CurrentCultureIgnoreCase);
107 GlobalPatternsShort = new NameValueCollection (StringComparer.CurrentCultureIgnoreCase);
108 GlobalPatternsLong = new NameValueCollection (StringComparer.CurrentCultureIgnoreCase);
109 GlobalLiterals = new NameValueCollection (StringComparer.CurrentCultureIgnoreCase);
110 GlobalSufPref = new NameValueCollection (StringComparer.CurrentCultureIgnoreCase);
112 Aliases = new NameValueCollection (new CaseInsensitiveHashCodeProvider (), new Comparer (System.Globalization.CultureInfo.CurrentUICulture));
113 SubClasses = new NameValueCollection (new CaseInsensitiveHashCodeProvider (), new Comparer (System.Globalization.CultureInfo.CurrentUICulture));
114 GlobalPatternsShort = new NameValueCollection (new CaseInsensitiveHashCodeProvider (), new Comparer (System.Globalization.CultureInfo.CurrentUICulture));
115 GlobalPatternsLong = new NameValueCollection (new CaseInsensitiveHashCodeProvider (), new Comparer (System.Globalization.CultureInfo.CurrentUICulture));
116 GlobalLiterals = new NameValueCollection (new CaseInsensitiveHashCodeProvider (), new Comparer (System.Globalization.CultureInfo.CurrentUICulture));
117 GlobalSufPref = new NameValueCollection (new CaseInsensitiveHashCodeProvider (), new Comparer (System.Globalization.CultureInfo.CurrentUICulture));
120 Matches80Plus = new ArrayList ();
121 MatchesBelow80 = new ArrayList ();
123 FDOMimeConfigReader fmcr = new FDOMimeConfigReader ();
124 int buffer_length = fmcr.Init ();
126 if (buffer_length >= 32) {
127 buffer = new byte [buffer_length];
128 mime_available = true;
132 public static bool MimeAvailable {
134 return Instance.mime_available;
138 public static string GetMimeTypeForFile (string filename)
141 Instance.StartByFileName (filename);
144 return Instance.global_result;
148 public static string GetMimeTypeForData (byte[] data)
151 Instance.StartDataLookup (data);
154 return Instance.global_result;
157 public static string GetMimeTypeForString (string input)
160 Instance.StartStringLookup (input);
163 return Instance.global_result;
166 public static string GetMimeAlias (string mimetype)
168 return Aliases [mimetype];
171 public static string GetMimeSubClass (string mimetype)
173 return SubClasses [mimetype];
176 public static void CleanFileCache ()
179 Instance.mime_file_cache.Clear ();
183 private void StartByFileName (string filename)
185 if (mime_file_cache.ContainsKey (filename)) {
186 global_result = mime_file_cache [filename];
190 current_file_name = filename;
191 is_zero_file = false;
193 global_result = octet_stream;
197 mime_file_cache.Add (current_file_name, global_result);
199 if (mime_file_cache.Count > mime_file_cache_max_size) {
200 IEnumerator enumerator = mime_file_cache.GetEnumerator ();
202 int counter = mime_file_cache_max_size - 500;
204 while (enumerator.MoveNext ()) {
205 mime_file_cache.Remove (enumerator.Current.ToString ());
214 private void StartDataLookup (byte[] data)
216 global_result = octet_stream;
218 System.Array.Clear (buffer, 0, buffer.Length);
220 if (data.Length > buffer.Length) {
221 System.Array.Copy (data, buffer, buffer.Length);
223 System.Array.Copy (data, buffer, data.Length);
226 if (CheckMatch80Plus ())
229 if (CheckMatchBelow80 ())
232 CheckForBinaryOrText ();
235 private void StartStringLookup (string input)
237 global_result = text_plain;
239 search_string = input;
241 if (CheckForContentTypeString ())
245 private void GoByFileName ()
247 // check if we can open the file
248 if (!MimeAvailable || !OpenFile ()) {
249 // couldn't open the file, check globals only
250 CheckGlobalPatterns ();
256 // check for matches with a priority >= 80
257 if (CheckMatch80Plus ())
261 // check global patterns, aka file extensions...
262 // this should be done for zero size files also,
263 // for example zero size file trash.ccc~ should return
264 // application/x-trash instead of application/x-zerosize
265 if (CheckGlobalPatterns ())
268 // if file size is zero, no other checks are needed
272 // ok, still nothing matches then try matches with a priority < 80
273 if (CheckMatchBelow80 ())
276 // wow, still nothing... return application/octet-stream for binary data, or text/plain for textual data
277 CheckForBinaryOrText ();
280 private bool CheckMatch80Plus ()
282 foreach (Match match in Matches80Plus) {
283 if (TestMatch (match)) {
284 global_result = match.MimeType;
293 // this little helper method gives us a real speed improvement
294 private bool FastEndsWidth (string input, string value)
296 if (value.Length > input.Length)
299 int z = input.Length - 1;
301 for (int i = value.Length - 1; i > -1; i--) {
302 if (value [i] != input [z])
311 private bool FastStartsWith (string input, string value)
313 if (value.Length > input.Length)
316 for (int i = 0; i < value.Length; i++)
317 if (value [i] != input [i])
323 // start always with index = 0
324 private int FastIndexOf (string input, char value)
326 if (input.Length == 0)
329 for (int i = 0; i < input.Length; i++)
330 if (input [i] == value)
336 private int FastIndexOf (string input, string value)
338 if (input.Length == 0)
341 for (int i = 0; i < input.Length - value.Length; i++) {
342 if (input [i] == value [0]) {
344 for (int z = 1; z < value.Length; z++) {
345 if (input [i + z] != value [z])
350 if (counter == value.Length - 1) {
359 private void CheckGlobalResult ()
361 int comma_index = FastIndexOf (global_result, ',');
363 if (comma_index != -1) {
364 global_result = global_result.Substring (0, comma_index);
368 private bool CheckGlobalPatterns ()
370 string filename = Path.GetFileName (current_file_name);
372 // first check for literals
373 for (int i = 0; i < GlobalLiterals.Count; i++) {
374 string key = GlobalLiterals.GetKey (i);
377 if (FastIndexOf (key, '[') == -1) {
378 if (FastIndexOf (filename, key) != -1) {
379 global_result = GlobalLiterals [i];
380 CheckGlobalResult ();
384 if (Regex.IsMatch (filename, key)) {
385 global_result = GlobalLiterals [i];
386 CheckGlobalResult ();
392 if (FastIndexOf (filename, '.') != -1) {
393 // check for double extension like .tar.gz
394 for (int i = 0; i < GlobalPatternsLong.Count; i++) {
395 string key = GlobalPatternsLong.GetKey (i);
397 if (FastEndsWidth (filename, key)) {
398 global_result = GlobalPatternsLong [i];
399 CheckGlobalResult ();
402 if (FastEndsWidth (filename.ToLower (), key)) {
403 global_result = GlobalPatternsLong [i];
404 CheckGlobalResult ();
410 // check normal extensions...
411 string extension = Path.GetExtension (current_file_name);
413 if (extension.Length != 0) {
414 string global_result_tmp = GlobalPatternsShort [extension];
416 if (global_result_tmp != null) {
417 global_result = global_result_tmp;
418 CheckGlobalResult ();
422 global_result_tmp = GlobalPatternsShort [extension.ToLower ()];
424 if (global_result_tmp != null) {
425 global_result = global_result_tmp;
426 CheckGlobalResult ();
432 // finally check if a prefix or suffix matches
433 for (int i = 0; i < GlobalSufPref.Count; i++) {
434 string key = GlobalSufPref.GetKey (i);
436 if (key [0] == '*') {
437 if (FastEndsWidth (filename, key.Replace ("*", String.Empty))) {
438 global_result = GlobalSufPref [i];
439 CheckGlobalResult ();
443 if (FastStartsWith (filename, key.Replace ("*", String.Empty))) {
444 global_result = GlobalSufPref [i];
445 CheckGlobalResult ();
454 private bool CheckMatchBelow80 ()
456 foreach (Match match in MatchesBelow80) {
457 if (TestMatch (match)) {
458 global_result = match.MimeType;
467 private void CheckForBinaryOrText ()
469 // check the first 32 bytes
471 for (int i = 0; i < 32; i++) {
472 char c = System.Convert.ToChar (buffer [i]);
474 if (c != '\t' && c != '\n' && c != '\r' && c != 12 && c < 32) {
475 global_result = octet_stream;
480 global_result = text_plain;
483 private bool TestMatch (Match match)
485 foreach (Matchlet matchlet in match.Matchlets)
486 if (TestMatchlet (matchlet))
492 private bool TestMatchlet (Matchlet matchlet)
494 // using a simple brute force search algorithm
495 // compare each (masked) value from the buffer with the (masked) value from the matchlet
497 // no need to check if the offset + the bytevalue length exceed the # bytes read
498 if (matchlet.Offset + matchlet.ByteValue.Length > bytes_read)
501 for (int offset_counter = 0; offset_counter < matchlet.OffsetLength; offset_counter++) {
502 if (matchlet.Offset + offset_counter + matchlet.ByteValue.Length > bytes_read)
505 if (matchlet.Mask == null) {
506 if (buffer [matchlet.Offset + offset_counter] == matchlet.ByteValue [0]) {
507 if (matchlet.ByteValue.Length == 1) {
508 if (matchlet.Matchlets.Count > 0) {
509 foreach (Matchlet sub_matchlet in matchlet.Matchlets) {
510 if (TestMatchlet (sub_matchlet))
518 // check if the last matchlet byte value is the same as the byte value in the buffer...
519 if (matchlet.ByteValue.Length > 2) {
520 if (buffer [matchlet.Offset + offset_counter + matchlet.ByteValue.Length - 1] != matchlet.ByteValue [matchlet.ByteValue.Length - 1])
526 for (int i = 1; i < matchlet.ByteValue.Length - minus; i++) {
527 if (buffer [matchlet.Offset + offset_counter + i] != matchlet.ByteValue [i])
531 if (matchlet.Matchlets.Count > 0) {
532 foreach (Matchlet sub_matchlets in matchlet.Matchlets) {
533 if (TestMatchlet (sub_matchlets))
540 if ((buffer [matchlet.Offset + offset_counter] & matchlet.Mask [0]) ==
541 (matchlet.ByteValue [0] & matchlet.Mask [0])) {
542 if (matchlet.ByteValue.Length == 1) {
543 if (matchlet.Matchlets.Count > 0) {
544 foreach (Matchlet sub_matchlets in matchlet.Matchlets) {
545 if (TestMatchlet (sub_matchlets))
553 // check if the last matchlet byte value is the same as the byte value in the buffer...
554 if (matchlet.ByteValue.Length > 2) {
556 if ((buffer [matchlet.Offset + offset_counter + matchlet.ByteValue.Length - 1] & matchlet.Mask [matchlet.ByteValue.Length - 1])
557 != (matchlet.ByteValue [matchlet.ByteValue.Length - 1] & matchlet.Mask [matchlet.ByteValue.Length - 1]))
563 for (int i = 1; i < matchlet.ByteValue.Length - minus; i++) {
564 if ((buffer [matchlet.Offset + offset_counter + i] & matchlet.Mask [i]) !=
565 (matchlet.ByteValue [i] & matchlet.Mask [i]))
569 if (matchlet.Matchlets.Count > 0) {
570 foreach (Matchlet sub_matchlets in matchlet.Matchlets) {
571 if (TestMatchlet (sub_matchlets))
583 private bool OpenFile ()
586 file_stream = new FileStream (current_file_name, FileMode.Open, FileAccess.Read); // FileShare ??? use BinaryReader ???
588 if (file_stream.Length == 0) {
589 global_result = zero_file;
592 bytes_read = file_stream.Read (buffer, 0, buffer.Length);
594 // do not clear the whole buffer everytime; clear only what's needed
595 if (bytes_read < buffer.Length) {
596 System.Array.Clear (buffer, bytes_read, buffer.Length - bytes_read);
600 file_stream.Close ();
601 } catch (Exception) {
608 private bool CheckForContentTypeString ()
610 int index = search_string.IndexOf ("Content-type:");
613 index += 13; // Length of string "Content-type:"
615 global_result = String.Empty;
617 while (search_string [index] != ';') {
618 global_result += search_string [index++];
621 global_result.Trim ();
626 // convert string to byte array
627 byte[] string_byte = (new ASCIIEncoding ()).GetBytes (search_string);
629 System.Array.Clear (buffer, 0, buffer.Length);
631 if (string_byte.Length > buffer.Length) {
632 System.Array.Copy (string_byte, buffer, buffer.Length);
634 System.Array.Copy (string_byte, buffer, string_byte.Length);
637 if (CheckMatch80Plus ())
640 if (CheckMatchBelow80 ())
647 internal class FDOMimeConfigReader
649 bool fdo_mime_available = false;
650 StringCollection shared_mime_paths = new StringCollection ();
653 int max_offset_and_range = 0;
657 CheckFDOMimePaths ();
659 if (!fdo_mime_available)
670 shared_mime_paths = null;
673 return max_offset_and_range;
676 private void CheckFDOMimePaths ()
678 if (Directory.Exists ("/usr/share/mime"))
679 shared_mime_paths.Add ("/usr/share/mime/");
681 if (Directory.Exists ("/usr/local/share/mime"))
682 shared_mime_paths.Add ("/usr/local/share/mime/");
684 if (Directory.Exists (System.Environment.GetFolderPath (Environment.SpecialFolder.Personal) + "/.local/share/mime"))
685 shared_mime_paths.Add (System.Environment.GetFolderPath (Environment.SpecialFolder.Personal) + "/.local/share/mime/");
687 if (shared_mime_paths.Count == 0)
690 fdo_mime_available = true;
693 private void ReadMagicData ()
695 foreach (string path in shared_mime_paths) {
696 if (!File.Exists (path + "/magic"))
700 FileStream fs = File.OpenRead (path + "/magic");
701 br = new BinaryReader (fs);
703 if (CheckMagicHeader ()) {
709 } catch (Exception ) {
714 private void MakeMatches ()
716 Matchlet[] matchlets = new Matchlet [30];
718 while (br.PeekChar () != -1) {
720 string mime_type = ReadPriorityAndMimeType (ref priority);
722 if (mime_type != null) {
723 Match match = new Match ();
724 match.Priority = priority;
725 match.MimeType = mime_type;
731 if (br.PeekChar () != '>') {
732 StringBuilder indent_string = new StringBuilder ();
733 //string indent_string = String.Empty;
735 if (br.PeekChar () == '>')
739 //indent_string += c;
740 indent_string.Append (c);
742 indent = Convert.ToInt32 (indent_string.ToString ());
748 if (br.PeekChar () == '>') {
750 offset = ReadValue ();
753 int value_length = 0;
755 // value length and value
756 if (br.PeekChar () == '=') {
759 // read 2 bytes value length (always big endian)
760 byte first = br.ReadByte ();
761 byte second = br.ReadByte ();
763 value_length = first * 256 + second;
765 value = br.ReadBytes (value_length);
771 if (br.PeekChar () == '&') {
774 mask = br.ReadBytes (value_length);
779 if (br.PeekChar () == '~') {
784 word_size = Convert.ToInt32 (c - 0x30);
786 // data is stored in big endian format.
787 if (word_size > 1 && System.BitConverter.IsLittleEndian) {
788 //convert the value and, if available, the mask data to little endian
789 if (word_size == 2) {
791 for (int i = 0; i < value.Length; i += 2) {
792 byte one = value [i];
793 byte two = value [i + 1];
799 for (int i = 0; i < mask.Length; i += 2) {
801 byte two = mask [i + 1];
806 } else if (word_size == 4) {
808 for (int i = 0; i < value.Length; i += 4) {
809 byte one = value [i];
810 byte two = value [i + 1];
811 byte three = value [i + 2];
812 byte four = value [i + 3];
814 value [i + 1] = three;
820 for (int i = 0; i < mask.Length; i += 4) {
822 byte two = mask [i + 1];
823 byte three = mask [i + 2];
824 byte four = mask [i + 3];
826 mask [i + 1] = three;
837 int range_length = 1;
838 if (br.PeekChar () == '+') {
840 range_length = ReadValue ();
846 // create the matchlet
847 matchlets [indent] = new Matchlet ();
848 matchlets [indent].Offset = offset;
849 matchlets [indent].OffsetLength = range_length;
850 matchlets [indent].ByteValue = value;
852 matchlets [indent].Mask = mask;
855 match.Matchlets.Add (matchlets [indent]);
857 matchlets [indent - 1].Matchlets.Add (matchlets [indent]);
860 if (max_offset_and_range < matchlets [indent].Offset + matchlets [indent].OffsetLength + matchlets [indent].ByteValue.Length + 1)
861 max_offset_and_range = matchlets [indent].Offset + matchlets [indent].OffsetLength + matchlets [indent].ByteValue.Length + 1;
863 // if '[' move to next mime type
864 if (br.PeekChar () == '[')
869 Mime.MatchesBelow80.Add (match);
871 Mime.Matches80Plus.Add (match);
876 private void ReadGlobsData ()
878 foreach (string path in shared_mime_paths) {
879 if (!File.Exists (path + "/globs"))
883 StreamReader sr = new StreamReader (path + "/globs");
885 while (sr.Peek () != -1) {
886 string line = sr.ReadLine ().Trim ();
888 if (line.StartsWith ("#"))
891 string[] split = line.Split (new char [] {':'});
893 if (split [1].IndexOf ('*') > -1 && split [1].IndexOf ('.') == -1) {
894 Mime.GlobalSufPref.Add (split [1], split [0]);
895 } else if (split [1]. IndexOf ('*') == -1) {
896 Mime.GlobalLiterals.Add (split [1], split [0]);
898 string[] split2 = split [1].Split (new char [] {'.'});
900 if (split2.Length > 2) {
902 Mime.GlobalPatternsLong.Add (split [1].Remove (0, 1), split [0]);
905 Mime.GlobalPatternsShort.Add (split [1].Remove (0, 1), split [0]);
911 } catch (Exception ) {
916 private void ReadSubclasses ()
918 foreach (string path in shared_mime_paths) {
919 if (!File.Exists (path + "/subclasses"))
923 StreamReader sr = new StreamReader (path + "/subclasses");
925 while (sr.Peek () != -1) {
926 string line = sr.ReadLine ().Trim ();
928 if (line.StartsWith ("#"))
931 string[] split = line.Split (new char [] {' '});
933 Mime.SubClasses.Add (split [0], split [1]);
937 } catch (Exception ) {
942 private void ReadAliases ()
944 foreach (string path in shared_mime_paths) {
945 if (!File.Exists (path + "/aliases"))
949 StreamReader sr = new StreamReader (path + "/aliases");
951 while (sr.Peek () != -1) {
952 string line = sr.ReadLine ().Trim ();
954 if (line.StartsWith ("#"))
957 string[] split = line.Split (new char [] {' '});
959 Mime.Aliases.Add (split [0], split [1]);
963 } catch (Exception ) {
968 private int ReadValue ()
970 StringBuilder result_string = new StringBuilder ();
975 if (br.PeekChar () == '=' || br.PeekChar () == '\n')
979 result_string.Append (c);
982 result = Convert.ToInt32 (result_string.ToString ());
987 private string ReadPriorityAndMimeType (ref int priority)
989 if (br.ReadChar () == '[') {
990 StringBuilder priority_string = new StringBuilder ();
992 char c = br.ReadChar ();
995 priority_string.Append (c);
998 priority = System.Convert.ToInt32 (priority_string.ToString ());
1000 StringBuilder mime_type_result = new StringBuilder ();
1002 char c = br.ReadChar ();
1006 mime_type_result.Append (c);
1009 if (br.ReadChar () == '\n')
1010 return mime_type_result.ToString ();
1015 private bool CheckMagicHeader ()
1017 char[] chars = br.ReadChars (10);
1018 string magic_header = new String (chars);
1020 if (magic_header != "MIME-Magic")
1023 if (br.ReadByte () != 0)
1025 if (br.ReadChar () != '\n')
1032 internal class Match
1036 ArrayList matchlets = new ArrayList();
1038 public string MimeType {
1048 public int Priority {
1058 public ArrayList Matchlets {
1065 internal class Matchlet
1074 ArrayList matchlets = new ArrayList ();
1076 public byte[] ByteValue {
1086 public byte[] Mask {
1106 public int OffsetLength {
1108 offsetLength = value;
1112 return offsetLength;
1116 public int WordSize {
1126 public ArrayList Matchlets {