1 // Permission is hereby granted, free of charge, to any person obtaining
2 // a copy of this software and associated documentation files (the
3 // "Software"), to deal in the Software without restriction, including
4 // without limitation the rights to use, copy, modify, merge, publish,
5 // distribute, sublicense, and/or sell copies of the Software, and to
6 // permit persons to whom the Software is furnished to do so, subject to
7 // the following conditions:
9 // The above copyright notice and this permission notice shall be
10 // included in all copies or substantial portions of the Software.
12 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
13 // EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
14 // MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
15 // NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
16 // LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
17 // OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
18 // WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
20 // Copyright (c) 2006 Alexander Olk
24 // Alexander Olk alex.olk@googlemail.com
29 using System.Collections;
30 using System.Collections.Specialized;
31 using System.Text.RegularExpressions;
36 // string mimeType = Mime.GetMimeTypeForFile( string filename );
38 // string mimeType = Mime.GetMimeTypeForData( byte[] data );
39 // - for string (maybe an email):
40 // string mimeType = Mime.GetMimeTypeForString( string input );
42 // - get alias for mime type:
43 // string alias = Mime.GetMimeAlias( string mimeType );
44 // - get subclass for mime type:
45 // string subtype = Mime.GetMimeSubClass( string mimeType );
46 // - get all available mime types:
47 // string[] available = Mime.AvailableMimeTypes;
50 // - optimize even more :)
51 // - async callback ?!?
52 // - freedesktop org file extensions can have regular expressions also, resolve them too
53 // - sort match collections by magic priority ( higher = first ) ?
56 // looking up the mime types 20 times for 2757 files in /usr/lib without caching (mime_file_cache)
57 // old version: Time: 00:00:32.3791220
58 // new version: Time: 00:00:16.9991810
60 namespace System.Windows.Forms
64 public static Mime Instance = new Mime();
66 private string current_file_name;
67 private string global_result = octet_stream;
69 private FileStream file_stream;
71 private byte[] buffer = null;
73 private const string octet_stream = "application/octet-stream";
74 private const string text_plain = "text/plain";
75 private const string zero_file = "application/x-zerosize";
77 private StringDictionary mime_file_cache = new StringDictionary();
79 private const int mime_file_cache_max_size = 3000;
81 private string search_string;
83 private static object lock_object = new Object();
85 private bool is_zero_file = false;
87 private int bytes_read = 0;
89 private bool mime_available = false;
91 public static NameValueCollection Aliases;
92 public static NameValueCollection SubClasses;
94 public static NameValueCollection GlobalPatternsShort;
95 public static NameValueCollection GlobalPatternsLong;
96 public static NameValueCollection GlobalLiterals;
97 public static NameValueCollection GlobalSufPref;
99 public static ArrayList Matches80Plus;
100 public static ArrayList MatchesBelow80;
104 Aliases = new NameValueCollection (StringComparer.CurrentCultureIgnoreCase);
105 SubClasses = new NameValueCollection (StringComparer.CurrentCultureIgnoreCase);
106 GlobalPatternsShort = new NameValueCollection (StringComparer.CurrentCultureIgnoreCase);
107 GlobalPatternsLong = new NameValueCollection (StringComparer.CurrentCultureIgnoreCase);
108 GlobalLiterals = new NameValueCollection (StringComparer.CurrentCultureIgnoreCase);
109 GlobalSufPref = new NameValueCollection (StringComparer.CurrentCultureIgnoreCase);
111 Matches80Plus = new ArrayList ();
112 MatchesBelow80 = new ArrayList ();
114 FDOMimeConfigReader fmcr = new FDOMimeConfigReader ();
115 int buffer_length = fmcr.Init ();
117 if (buffer_length >= 32) {
118 buffer = new byte [buffer_length];
119 mime_available = true;
123 public static bool MimeAvailable {
125 return Instance.mime_available;
129 public static string GetMimeTypeForFile (string filename)
132 Instance.StartByFileName (filename);
135 return Instance.global_result;
139 public static string GetMimeTypeForData (byte[] data)
142 Instance.StartDataLookup (data);
145 return Instance.global_result;
148 public static string GetMimeTypeForString (string input)
151 Instance.StartStringLookup (input);
154 return Instance.global_result;
157 public static string GetMimeAlias (string mimetype)
159 return Aliases [mimetype];
162 public static string GetMimeSubClass (string mimetype)
164 return SubClasses [mimetype];
167 public static void CleanFileCache ()
170 Instance.mime_file_cache.Clear ();
174 private void StartByFileName (string filename)
176 if (mime_file_cache.ContainsKey (filename)) {
177 global_result = mime_file_cache [filename];
181 current_file_name = filename;
182 is_zero_file = false;
184 global_result = octet_stream;
188 mime_file_cache.Add (current_file_name, global_result);
190 if (mime_file_cache.Count > mime_file_cache_max_size) {
191 IEnumerator enumerator = mime_file_cache.GetEnumerator ();
193 int counter = mime_file_cache_max_size - 500;
195 while (enumerator.MoveNext ()) {
196 mime_file_cache.Remove (enumerator.Current.ToString ());
205 private void StartDataLookup (byte[] data)
207 global_result = octet_stream;
209 System.Array.Clear (buffer, 0, buffer.Length);
211 if (data.Length > buffer.Length) {
212 System.Array.Copy (data, buffer, buffer.Length);
214 System.Array.Copy (data, buffer, data.Length);
217 if (CheckMatch80Plus ())
220 if (CheckMatchBelow80 ())
223 CheckForBinaryOrText ();
226 private void StartStringLookup (string input)
228 global_result = text_plain;
230 search_string = input;
232 if (CheckForContentTypeString ())
236 private void GoByFileName ()
238 // check if we can open the file
239 if (!MimeAvailable || !OpenFile ()) {
240 // couldn't open the file, check globals only
241 CheckGlobalPatterns ();
247 // check for matches with a priority >= 80
248 if (CheckMatch80Plus ())
252 // check global patterns, aka file extensions...
253 // this should be done for zero size files also,
254 // for example zero size file trash.ccc~ should return
255 // application/x-trash instead of application/x-zerosize
256 if (CheckGlobalPatterns ())
259 // if file size is zero, no other checks are needed
263 // ok, still nothing matches then try matches with a priority < 80
264 if (CheckMatchBelow80 ())
267 // wow, still nothing... return application/octet-stream for binary data, or text/plain for textual data
268 CheckForBinaryOrText ();
271 private bool CheckMatch80Plus ()
273 foreach (Match match in Matches80Plus) {
274 if (TestMatch (match)) {
275 global_result = match.MimeType;
284 // this little helper method gives us a real speed improvement
285 private bool FastEndsWidth (string input, string value)
287 if (value.Length > input.Length)
290 int z = input.Length - 1;
292 for (int i = value.Length - 1; i > -1; i--) {
293 if (value [i] != input [z])
302 private bool FastStartsWith (string input, string value)
304 if (value.Length > input.Length)
307 for (int i = 0; i < value.Length; i++)
308 if (value [i] != input [i])
314 // start always with index = 0
315 private int FastIndexOf (string input, char value)
317 if (input.Length == 0)
320 for (int i = 0; i < input.Length; i++)
321 if (input [i] == value)
327 private int FastIndexOf (string input, string value)
329 if (input.Length == 0)
332 for (int i = 0; i < input.Length - value.Length; i++) {
333 if (input [i] == value [0]) {
335 for (int z = 1; z < value.Length; z++) {
336 if (input [i + z] != value [z])
341 if (counter == value.Length - 1) {
350 private void CheckGlobalResult ()
352 int comma_index = FastIndexOf (global_result, ',');
354 if (comma_index != -1) {
355 global_result = global_result.Substring (0, comma_index);
359 private bool CheckGlobalPatterns ()
361 string filename = Path.GetFileName (current_file_name);
363 // first check for literals
364 for (int i = 0; i < GlobalLiterals.Count; i++) {
365 string key = GlobalLiterals.GetKey (i);
368 if (FastIndexOf (key, '[') == -1) {
369 if (FastIndexOf (filename, key) != -1) {
370 global_result = GlobalLiterals [i];
371 CheckGlobalResult ();
375 if (Regex.IsMatch (filename, key)) {
376 global_result = GlobalLiterals [i];
377 CheckGlobalResult ();
383 if (FastIndexOf (filename, '.') != -1) {
384 // check for double extension like .tar.gz
385 for (int i = 0; i < GlobalPatternsLong.Count; i++) {
386 string key = GlobalPatternsLong.GetKey (i);
388 if (FastEndsWidth (filename, key)) {
389 global_result = GlobalPatternsLong [i];
390 CheckGlobalResult ();
393 if (FastEndsWidth (filename.ToLower (), key)) {
394 global_result = GlobalPatternsLong [i];
395 CheckGlobalResult ();
401 // check normal extensions...
402 string extension = Path.GetExtension (current_file_name);
404 if (extension.Length != 0) {
405 string global_result_tmp = GlobalPatternsShort [extension];
407 if (global_result_tmp != null) {
408 global_result = global_result_tmp;
409 CheckGlobalResult ();
413 global_result_tmp = GlobalPatternsShort [extension.ToLower ()];
415 if (global_result_tmp != null) {
416 global_result = global_result_tmp;
417 CheckGlobalResult ();
423 // finally check if a prefix or suffix matches
424 for (int i = 0; i < GlobalSufPref.Count; i++) {
425 string key = GlobalSufPref.GetKey (i);
427 if (key [0] == '*') {
428 if (FastEndsWidth (filename, key.Replace ("*", String.Empty))) {
429 global_result = GlobalSufPref [i];
430 CheckGlobalResult ();
434 if (FastStartsWith (filename, key.Replace ("*", String.Empty))) {
435 global_result = GlobalSufPref [i];
436 CheckGlobalResult ();
445 private bool CheckMatchBelow80 ()
447 foreach (Match match in MatchesBelow80) {
448 if (TestMatch (match)) {
449 global_result = match.MimeType;
458 private void CheckForBinaryOrText ()
460 // check the first 32 bytes
462 for (int i = 0; i < 32; i++) {
463 char c = System.Convert.ToChar (buffer [i]);
465 if (c != '\t' && c != '\n' && c != '\r' && c != 12 && c < 32) {
466 global_result = octet_stream;
471 global_result = text_plain;
474 private bool TestMatch (Match match)
476 foreach (Matchlet matchlet in match.Matchlets)
477 if (TestMatchlet (matchlet))
483 private bool TestMatchlet (Matchlet matchlet)
485 // using a simple brute force search algorithm
486 // compare each (masked) value from the buffer with the (masked) value from the matchlet
488 // no need to check if the offset + the bytevalue length exceed the # bytes read
489 if (matchlet.Offset + matchlet.ByteValue.Length > bytes_read)
492 for (int offset_counter = 0; offset_counter < matchlet.OffsetLength; offset_counter++) {
493 if (matchlet.Offset + offset_counter + matchlet.ByteValue.Length > bytes_read)
496 if (matchlet.Mask == null) {
497 if (buffer [matchlet.Offset + offset_counter] == matchlet.ByteValue [0]) {
498 if (matchlet.ByteValue.Length == 1) {
499 if (matchlet.Matchlets.Count > 0) {
500 foreach (Matchlet sub_matchlet in matchlet.Matchlets) {
501 if (TestMatchlet (sub_matchlet))
509 // check if the last matchlet byte value is the same as the byte value in the buffer...
510 if (matchlet.ByteValue.Length > 2) {
511 if (buffer [matchlet.Offset + offset_counter + matchlet.ByteValue.Length - 1] != matchlet.ByteValue [matchlet.ByteValue.Length - 1])
517 for (int i = 1; i < matchlet.ByteValue.Length - minus; i++) {
518 if (buffer [matchlet.Offset + offset_counter + i] != matchlet.ByteValue [i])
522 if (matchlet.Matchlets.Count > 0) {
523 foreach (Matchlet sub_matchlets in matchlet.Matchlets) {
524 if (TestMatchlet (sub_matchlets))
531 if ((buffer [matchlet.Offset + offset_counter] & matchlet.Mask [0]) ==
532 (matchlet.ByteValue [0] & matchlet.Mask [0])) {
533 if (matchlet.ByteValue.Length == 1) {
534 if (matchlet.Matchlets.Count > 0) {
535 foreach (Matchlet sub_matchlets in matchlet.Matchlets) {
536 if (TestMatchlet (sub_matchlets))
544 // check if the last matchlet byte value is the same as the byte value in the buffer...
545 if (matchlet.ByteValue.Length > 2) {
547 if ((buffer [matchlet.Offset + offset_counter + matchlet.ByteValue.Length - 1] & matchlet.Mask [matchlet.ByteValue.Length - 1])
548 != (matchlet.ByteValue [matchlet.ByteValue.Length - 1] & matchlet.Mask [matchlet.ByteValue.Length - 1]))
554 for (int i = 1; i < matchlet.ByteValue.Length - minus; i++) {
555 if ((buffer [matchlet.Offset + offset_counter + i] & matchlet.Mask [i]) !=
556 (matchlet.ByteValue [i] & matchlet.Mask [i]))
560 if (matchlet.Matchlets.Count > 0) {
561 foreach (Matchlet sub_matchlets in matchlet.Matchlets) {
562 if (TestMatchlet (sub_matchlets))
574 private bool OpenFile ()
577 file_stream = new FileStream (current_file_name, FileMode.Open, FileAccess.Read); // FileShare ??? use BinaryReader ???
579 if (file_stream.Length == 0) {
580 global_result = zero_file;
583 bytes_read = file_stream.Read (buffer, 0, buffer.Length);
585 // do not clear the whole buffer everytime; clear only what's needed
586 if (bytes_read < buffer.Length) {
587 System.Array.Clear (buffer, bytes_read, buffer.Length - bytes_read);
591 file_stream.Close ();
592 } catch (Exception) {
599 private bool CheckForContentTypeString ()
601 int index = search_string.IndexOf ("Content-type:");
604 index += 13; // Length of string "Content-type:"
606 global_result = String.Empty;
608 while (search_string [index] != ';') {
609 global_result += search_string [index++];
612 global_result.Trim ();
617 // convert string to byte array
618 byte[] string_byte = (new ASCIIEncoding ()).GetBytes (search_string);
620 System.Array.Clear (buffer, 0, buffer.Length);
622 if (string_byte.Length > buffer.Length) {
623 System.Array.Copy (string_byte, buffer, buffer.Length);
625 System.Array.Copy (string_byte, buffer, string_byte.Length);
628 if (CheckMatch80Plus ())
631 if (CheckMatchBelow80 ())
638 internal class FDOMimeConfigReader
640 bool fdo_mime_available = false;
641 StringCollection shared_mime_paths = new StringCollection ();
644 int max_offset_and_range = 0;
648 int p = (int) Environment.OSVersion.Platform;
649 if ((p != 4) && (p != 6) && (p != 128))
650 // Not running on Unix.
653 CheckFDOMimePaths ();
655 if (!fdo_mime_available)
666 shared_mime_paths = null;
669 return max_offset_and_range;
672 private void CheckFDOMimePaths ()
674 if (Directory.Exists ("/usr/share/mime"))
675 shared_mime_paths.Add ("/usr/share/mime/");
677 if (Directory.Exists ("/usr/local/share/mime"))
678 shared_mime_paths.Add ("/usr/local/share/mime/");
680 if (Directory.Exists (System.Environment.GetFolderPath (Environment.SpecialFolder.Personal) + "/.local/share/mime"))
681 shared_mime_paths.Add (System.Environment.GetFolderPath (Environment.SpecialFolder.Personal) + "/.local/share/mime/");
683 if (shared_mime_paths.Count == 0)
686 fdo_mime_available = true;
689 private void ReadMagicData ()
691 foreach (string path in shared_mime_paths) {
692 if (!File.Exists (path + "/magic"))
696 FileStream fs = File.OpenRead (path + "/magic");
697 br = new BinaryReader (fs);
699 if (CheckMagicHeader ()) {
705 } catch (Exception ) {
710 private void MakeMatches ()
712 Matchlet[] matchlets = new Matchlet [30];
714 while (br.PeekChar () != -1) {
716 string mime_type = ReadPriorityAndMimeType (ref priority);
718 if (mime_type != null) {
719 Match match = new Match ();
720 match.Priority = priority;
721 match.MimeType = mime_type;
727 if (br.PeekChar () != '>') {
728 StringBuilder indent_string = new StringBuilder ();
729 //string indent_string = String.Empty;
731 if (br.PeekChar () == '>')
735 //indent_string += c;
736 indent_string.Append (c);
738 indent = Convert.ToInt32 (indent_string.ToString ());
744 if (br.PeekChar () == '>') {
746 offset = ReadValue ();
749 int value_length = 0;
751 // value length and value
752 if (br.PeekChar () == '=') {
755 // read 2 bytes value length (always big endian)
756 byte first = br.ReadByte ();
757 byte second = br.ReadByte ();
759 value_length = first * 256 + second;
761 value = br.ReadBytes (value_length);
767 if (br.PeekChar () == '&') {
770 mask = br.ReadBytes (value_length);
775 if (br.PeekChar () == '~') {
780 word_size = Convert.ToInt32 (c - 0x30);
782 // data is stored in big endian format.
783 if (word_size > 1 && System.BitConverter.IsLittleEndian) {
784 //convert the value and, if available, the mask data to little endian
785 if (word_size == 2) {
787 for (int i = 0; i < value.Length; i += 2) {
788 byte one = value [i];
789 byte two = value [i + 1];
795 for (int i = 0; i < mask.Length; i += 2) {
797 byte two = mask [i + 1];
802 } else if (word_size == 4) {
804 for (int i = 0; i < value.Length; i += 4) {
805 byte one = value [i];
806 byte two = value [i + 1];
807 byte three = value [i + 2];
808 byte four = value [i + 3];
810 value [i + 1] = three;
816 for (int i = 0; i < mask.Length; i += 4) {
818 byte two = mask [i + 1];
819 byte three = mask [i + 2];
820 byte four = mask [i + 3];
822 mask [i + 1] = three;
833 int range_length = 1;
834 if (br.PeekChar () == '+') {
836 range_length = ReadValue ();
842 // create the matchlet
843 matchlets [indent] = new Matchlet ();
844 matchlets [indent].Offset = offset;
845 matchlets [indent].OffsetLength = range_length;
846 matchlets [indent].ByteValue = value;
848 matchlets [indent].Mask = mask;
851 match.Matchlets.Add (matchlets [indent]);
853 matchlets [indent - 1].Matchlets.Add (matchlets [indent]);
856 if (max_offset_and_range < matchlets [indent].Offset + matchlets [indent].OffsetLength + matchlets [indent].ByteValue.Length + 1)
857 max_offset_and_range = matchlets [indent].Offset + matchlets [indent].OffsetLength + matchlets [indent].ByteValue.Length + 1;
859 // if '[' move to next mime type
860 if (br.PeekChar () == '[')
865 Mime.MatchesBelow80.Add (match);
867 Mime.Matches80Plus.Add (match);
872 private void ReadGlobsData ()
874 foreach (string path in shared_mime_paths) {
875 if (!File.Exists (path + "/globs"))
879 StreamReader sr = new StreamReader (path + "/globs");
881 while (sr.Peek () != -1) {
882 string line = sr.ReadLine ().Trim ();
884 if (line.StartsWith ("#"))
887 string[] split = line.Split (new char [] {':'});
889 if (split [1].IndexOf ('*') > -1 && split [1].IndexOf ('.') == -1) {
890 Mime.GlobalSufPref.Add (split [1], split [0]);
891 } else if (split [1]. IndexOf ('*') == -1) {
892 Mime.GlobalLiterals.Add (split [1], split [0]);
894 string[] split2 = split [1].Split (new char [] {'.'});
896 if (split2.Length > 2) {
898 Mime.GlobalPatternsLong.Add (split [1].Remove (0, 1), split [0]);
901 Mime.GlobalPatternsShort.Add (split [1].Remove (0, 1), split [0]);
907 } catch (Exception ) {
912 private void ReadSubclasses ()
914 foreach (string path in shared_mime_paths) {
915 if (!File.Exists (path + "/subclasses"))
919 StreamReader sr = new StreamReader (path + "/subclasses");
921 while (sr.Peek () != -1) {
922 string line = sr.ReadLine ().Trim ();
924 if (line.StartsWith ("#"))
927 string[] split = line.Split (new char [] {' '});
929 Mime.SubClasses.Add (split [0], split [1]);
933 } catch (Exception ) {
938 private void ReadAliases ()
940 foreach (string path in shared_mime_paths) {
941 if (!File.Exists (path + "/aliases"))
945 StreamReader sr = new StreamReader (path + "/aliases");
947 while (sr.Peek () != -1) {
948 string line = sr.ReadLine ().Trim ();
950 if (line.StartsWith ("#"))
953 string[] split = line.Split (new char [] {' '});
955 Mime.Aliases.Add (split [0], split [1]);
959 } catch (Exception ) {
964 private int ReadValue ()
966 StringBuilder result_string = new StringBuilder ();
971 if (br.PeekChar () == '=' || br.PeekChar () == '\n')
975 result_string.Append (c);
978 result = Convert.ToInt32 (result_string.ToString ());
983 private string ReadPriorityAndMimeType (ref int priority)
985 if (br.ReadChar () == '[') {
986 StringBuilder priority_string = new StringBuilder ();
988 char c = br.ReadChar ();
991 priority_string.Append (c);
994 priority = System.Convert.ToInt32 (priority_string.ToString ());
996 StringBuilder mime_type_result = new StringBuilder ();
998 char c = br.ReadChar ();
1002 mime_type_result.Append (c);
1005 if (br.ReadChar () == '\n')
1006 return mime_type_result.ToString ();
1011 private bool CheckMagicHeader ()
1013 char[] chars = br.ReadChars (10);
1014 string magic_header = new String (chars);
1016 if (magic_header != "MIME-Magic")
1019 if (br.ReadByte () != 0)
1021 if (br.ReadChar () != '\n')
1028 internal class Match
1032 ArrayList matchlets = new ArrayList();
1034 public string MimeType {
1044 public int Priority {
1054 public ArrayList Matchlets {
1061 internal class Matchlet
1070 ArrayList matchlets = new ArrayList ();
1072 public byte[] ByteValue {
1082 public byte[] Mask {
1102 public int OffsetLength {
1104 offsetLength = value;
1108 return offsetLength;
1112 public int WordSize {
1122 public ArrayList Matchlets {