2002-09-27 Gonzalo Paniagua Javier <gonzalo@ximian.com>
[mono.git] / mcs / class / corlib / System.IO / StreamReader.cs
1 //
2 // System.IO.StreamReader.cs
3 //
4 // Author:
5 //   Dietmar Maurer (dietmar@ximian.com)
6 //   Miguel de Icaza (miguel@ximian.com) 
7 //
8 // (C) Ximian, Inc.  http://www.ximian.com
9 //
10
11 using System;
12 using System.Text;
13
14
15 namespace System.IO {
16         [Serializable]
17         public class StreamReader : TextReader {
18
19                 private const int DefaultBufferSize = 1024;
20                 private const int DefaultFileBufferSize = 4096;
21                 private const int MinimumBufferSize = 128;
22
23                 //
24                 // The input buffer
25                 //
26                 private byte [] input_buffer;
27
28                 //
29                 // The decoded buffer from the above input buffer
30                 //
31                 private char [] decoded_buffer;
32
33                 //
34                 // Decoded bytes in decoded_buffer.
35                 //
36                 private int decoded_count;
37
38                 //
39                 // Current position in the decoded_buffer
40                 //
41                 private int pos;
42
43                 //
44                 // The buffer size that we are using
45                 //
46                 private int buffer_size;
47
48                 //
49                 // Index into `input_buffer' where we start decoding
50                 //
51                 private int parse_start;
52
53                 int do_checks;
54                 
55                 private Encoding encoding;
56                 private Decoder decoder;
57
58                 private Stream base_stream;
59
60                 private class NullStreamReader : StreamReader {
61                         public override int Peek ()
62                         {
63                                 return -1;
64                         }
65
66                         public override int Read ()
67                         {
68                                 return -1;
69                         }
70
71                         public override int Read (char[] buffer, int index, int count)
72                         {
73                                 return 0;
74                         }
75
76                         public override string ReadLine ()
77                         {
78                                 return null;
79                         }
80
81                         public override string ReadToEnd ()
82                         {
83                                 return String.Empty;
84                         }
85
86                         public override Stream BaseStream
87                         {
88                                 get { return Stream.Null; }
89                         }
90
91                         public override Encoding CurrentEncoding
92                         {
93                                 get { return Encoding.Unicode; }
94                         }
95                 }
96
97                 public new static readonly StreamReader Null =  (StreamReader)(new NullStreamReader());
98                 
99                 internal StreamReader() {}
100
101                 public StreamReader(Stream stream)
102                         : this (stream, Encoding.UTF8, true, DefaultBufferSize) { }
103
104                 public StreamReader(Stream stream, bool detect_encoding_from_bytemarks)
105                         : this (stream, Encoding.UTF8, detect_encoding_from_bytemarks, DefaultBufferSize) { }
106
107                 public StreamReader(Stream stream, Encoding encoding)
108                         : this (stream, encoding, true, DefaultBufferSize) { }
109
110                 public StreamReader(Stream stream, Encoding encoding, bool detect_encoding_from_bytemarks)
111                         : this (stream, encoding, detect_encoding_from_bytemarks, DefaultBufferSize) { }
112                 
113                 public StreamReader(Stream stream, Encoding encoding, bool detect_encoding_from_bytemarks, int buffer_size)
114                 {
115                         Initialize (stream, encoding, detect_encoding_from_bytemarks, buffer_size);
116                 }
117
118                 public StreamReader(string path)
119                         : this (path, Encoding.UTF8, true, DefaultFileBufferSize) { }
120
121                 public StreamReader(string path, bool detect_encoding_from_bytemarks)
122                         : this (path, Encoding.UTF8, detect_encoding_from_bytemarks, DefaultFileBufferSize) { }
123
124                 public StreamReader(string path, Encoding encoding)
125                         : this (path, encoding, true, DefaultFileBufferSize) { }
126
127                 public StreamReader(string path, Encoding encoding, bool detect_encoding_from_bytemarks)
128                         : this (path, encoding, detect_encoding_from_bytemarks, DefaultFileBufferSize) { }
129                 
130                 public StreamReader(string path, Encoding encoding, bool detect_encoding_from_bytemarks, int buffer_size)
131                 {
132                         if (null == path)
133                                 throw new ArgumentNullException("path");
134                         if (String.Empty == path)
135                                 throw new ArgumentException("Empty path not allowed");
136                         if (path.IndexOfAny (Path.InvalidPathChars) != -1)
137                                 throw new ArgumentException("path contains invalid characters");
138
139                         string DirName = Path.GetDirectoryName(path);
140                         if (DirName != String.Empty && !Directory.Exists(DirName))
141                                 throw new DirectoryNotFoundException ("Directory '" + DirName + "' not found.");
142                         if (!File.Exists(path))
143                                 throw new FileNotFoundException(path);
144
145                         Stream stream = (Stream) File.OpenRead (path);
146                         Initialize (stream, encoding, detect_encoding_from_bytemarks, buffer_size);
147                 }
148
149                 protected void Initialize (Stream stream, Encoding encoding, bool detect_encoding_from_bytemarks, int buffer_size)
150                 {
151                         if (null == stream)
152                                 throw new ArgumentNullException("stream");
153                         if (!stream.CanRead)
154                                 throw new ArgumentException("Cannot read stream");
155
156                         if (buffer_size < MinimumBufferSize)
157                                 buffer_size = MinimumBufferSize;
158
159                         base_stream = stream;
160                         input_buffer = new byte [buffer_size];
161                         this.buffer_size = buffer_size;
162                         this.encoding = encoding;
163                         decoder = encoding.GetDecoder ();
164
165                         byte [] preamble = encoding.GetPreamble ();
166                         do_checks = detect_encoding_from_bytemarks ? 1 : 0;
167                         do_checks += (preamble.Length == 0) ? 0 : 2;
168                         
169                         decoded_buffer = new char [encoding.GetMaxCharCount (buffer_size)];
170                         decoded_count = 0;
171                         pos = 0;
172                 }
173
174                 public virtual Stream BaseStream
175                 {
176                         get {
177                                 return base_stream;
178                         }
179                 }
180
181                 public virtual Encoding CurrentEncoding
182                 {
183                         get {
184                                 if (encoding == null)
185                                         throw new Exception ();
186                                 return encoding;
187                         }
188                 }
189
190                 public override void Close ()
191                 {
192                         Dispose (true);
193                 }
194
195                 protected override void Dispose (bool disposing)
196                 {
197                         if (disposing && base_stream != null)
198                                 base_stream.Close ();
199                         
200                         input_buffer = null;
201                         decoded_buffer = null;
202                         encoding = null;
203                         decoder = null;
204                         base_stream = null;
205                         base.Dispose (disposing);
206                 }
207
208                 //
209                 // Provides auto-detection of the encoding, as well as skipping over
210                 // byte marks at the beginning of a stream.
211                 //
212                 int DoChecks (int count)
213                 {
214                         if ((do_checks & 2) == 2){
215                                 byte [] preamble = encoding.GetPreamble ();
216                                 int c = preamble.Length;
217                                 if (count >= c){
218                                         int i;
219                                         
220                                         for (i = 0; i < c; i++)
221                                                 if (input_buffer [i] != preamble [i])
222                                                         break;
223
224                                         if (i == c)
225                                                 return i;
226                                 }
227                         }
228
229                         if ((do_checks & 1) == 1){
230                                 if (count < 2)
231                                         return 0;
232
233                                 if (input_buffer [0] == 0xfe && input_buffer [1] == 0xff){
234                                         this.encoding = Encoding.BigEndianUnicode;
235                                         return 2;
236                                 }
237
238                                 if (input_buffer [0] == 0xff && input_buffer [1] == 0xfe){
239                                         this.encoding = Encoding.Unicode;
240                                         return 2;
241                                 }
242
243                                 if (count < 3)
244                                         return 0;
245
246                                 if (input_buffer [0] == 0xef && input_buffer [1] == 0xbb && input_buffer [2] == 0xbf){
247                                         this.encoding = Encoding.UTF8;
248                                         return 3;
249                                 }
250                         }
251
252                         return 0;
253                 }
254                 
255                 // the buffer is empty, fill it again
256                 private int ReadBuffer ()
257                 {
258                         pos = 0;
259                         int cbEncoded = 0;
260
261                         // keep looping until the decoder gives us some chars
262                         decoded_count = 0;
263                         int parse_start = 0;
264                         do      
265                         {
266                                 cbEncoded = base_stream.Read (input_buffer, 0, buffer_size);
267                                 
268                                 if (cbEncoded == 0)
269                                         return 0;
270
271                                 if (do_checks > 0){
272                                         Encoding old = encoding;
273                                         parse_start = DoChecks (cbEncoded);
274                                         if (old != encoding){
275                                                 decoder = encoding.GetDecoder ();
276                                         }
277                                         do_checks = 0;
278                                         cbEncoded -= parse_start;
279                                 }
280                                 
281                                 decoded_count += decoder.GetChars (input_buffer, parse_start, cbEncoded, decoded_buffer, 0);
282                                 parse_start = 0;
283                         } while (decoded_count == 0);
284                         
285                         return decoded_count;
286                 }
287
288                 public override int Peek ()
289                 {
290                         if (!base_stream.CanSeek)
291                                 return -1;
292
293                         if (pos >= decoded_count && ReadBuffer () == 0)
294                                 return -1;
295
296                         return decoded_buffer [pos];
297                 }
298
299                 public override int Read ()
300                 {
301                         if (pos >= decoded_count && ReadBuffer () == 0)
302                                 return -1;
303
304                         return decoded_buffer [pos++];
305                 }
306
307                 public override int Read (char[] dest_buffer, int index, int count)
308                 {
309                         if (dest_buffer == null)
310                                 throw new ArgumentException ();
311
312                         if ((index < 0) || (count < 0))
313                                 throw new ArgumentOutOfRangeException ();
314
315                         if (index + count > dest_buffer.Length)
316                                 throw new ArgumentException ();
317
318                         int chars_read = 0;
319                         while (count > 0)
320                         {
321                                 if (pos >= decoded_count && ReadBuffer () == 0)
322                                         return chars_read > 0 ? chars_read : 0;
323
324                                 int cch = Math.Min (decoded_count - pos, count);
325                                 Array.Copy (decoded_buffer, pos, dest_buffer, index, cch);
326                                 pos += cch;
327                                 index += cch;
328                                 count -= cch;
329                                 chars_read += cch;
330                         }
331                         return chars_read;
332                 }
333
334                 public override string ReadLine()
335                 {
336                         StringBuilder text = new StringBuilder ();
337
338                         while (true) {
339                                 int c = Read ();
340
341                                 if (c == -1) {                          // end of stream
342                                         if (text.Length == 0)
343                                                 return null;
344
345                                         break;
346                                 }
347
348                                 if (c == '\n') {                        // newline
349                                         if ((text.Length > 0) && (text [text.Length - 1] == '\r'))
350                                                 text.Length--;
351                                         break;
352                                 }
353
354                                 text.Append ((char) c);
355                         }
356
357                         return text.ToString ();
358                 }
359
360                 public override string ReadToEnd()
361                 {
362                         StringBuilder text = new StringBuilder ();
363
364                         int size = decoded_buffer.Length;
365                         char [] buffer = new char [size];
366                         int len;
367                         
368                         while ((len = Read (buffer, 0, size)) != 0)
369                                 text.Append (buffer, 0, len);
370
371                         return text.ToString ();
372                 }
373         }
374 }