8e7f0e71e5b19f3ae633cacd9d3e173b491a9bec
[mono.git] / mcs / class / corlib / System.IO / StreamReader.cs
1 //
2 // System.IO.StreamReader.cs
3 //
4 // Author:
5 //   Dietmar Maurer (dietmar@ximian.com)
6 //   Miguel de Icaza (miguel@ximian.com) 
7 //
8 // (C) Ximian, Inc.  http://www.ximian.com
9 // Copyright (C) 2004 Novell (http://www.novell.com)
10 //
11
12 //
13 // Copyright (C) 2004 Novell, Inc (http://www.novell.com)
14 //
15 // Permission is hereby granted, free of charge, to any person obtaining
16 // a copy of this software and associated documentation files (the
17 // "Software"), to deal in the Software without restriction, including
18 // without limitation the rights to use, copy, modify, merge, publish,
19 // distribute, sublicense, and/or sell copies of the Software, and to
20 // permit persons to whom the Software is furnished to do so, subject to
21 // the following conditions:
22 // 
23 // The above copyright notice and this permission notice shall be
24 // included in all copies or substantial portions of the Software.
25 // 
26 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
27 // EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
28 // MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
29 // NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
30 // LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
31 // OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
32 // WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
33 //
34
35 using System;
36 using System.Text;
37 using System.Runtime.InteropServices;
38
39 namespace System.IO {
40         [Serializable]
41 #if NET_2_0
42         [ComVisible (true)]
43 #endif
44         public class StreamReader : TextReader {
45
46                 const int DefaultBufferSize = 1024;
47                 const int DefaultFileBufferSize = 4096;
48                 const int MinimumBufferSize = 128;
49
50                 //
51                 // The input buffer
52                 //
53                 byte [] input_buffer;
54
55                 //
56                 // The decoded buffer from the above input buffer
57                 //
58                 char [] decoded_buffer;
59
60                 //
61                 // Decoded bytes in decoded_buffer.
62                 //
63                 int decoded_count;
64
65                 //
66                 // Current position in the decoded_buffer
67                 //
68                 int pos;
69
70                 //
71                 // The buffer size that we are using
72                 //
73                 int buffer_size;
74
75                 int do_checks;
76                 
77                 Encoding encoding;
78                 Decoder decoder;
79
80                 Stream base_stream;
81                 bool mayBlock;
82                 StringBuilder line_builder;
83
84                 private class NullStreamReader : StreamReader {
85                         public override int Peek ()
86                         {
87                                 return -1;
88                         }
89
90                         public override int Read ()
91                         {
92                                 return -1;
93                         }
94
95                         public override int Read ([In, Out] char[] buffer, int index, int count)
96                         {
97                                 return 0;
98                         }
99
100                         public override string ReadLine ()
101                         {
102                                 return null;
103                         }
104
105                         public override string ReadToEnd ()
106                         {
107                                 return String.Empty;
108                         }
109
110                         public override Stream BaseStream
111                         {
112                                 get { return Stream.Null; }
113                         }
114
115                         public override Encoding CurrentEncoding
116                         {
117                                 get { return Encoding.Unicode; }
118                         }
119                 }
120
121                 public new static readonly StreamReader Null =  (StreamReader)(new NullStreamReader());
122                 
123                 internal StreamReader() {}
124
125                 public StreamReader(Stream stream)
126                         : this (stream, Encoding.UTF8Unmarked, true, DefaultBufferSize) { }
127
128                 public StreamReader(Stream stream, bool detectEncodingFromByteOrderMarks)
129                         : this (stream, Encoding.UTF8Unmarked, detectEncodingFromByteOrderMarks, DefaultBufferSize) { }
130
131                 public StreamReader(Stream stream, Encoding encoding)
132                         : this (stream, encoding, true, DefaultBufferSize) { }
133
134                 public StreamReader(Stream stream, Encoding encoding, bool detectEncodingFromByteOrderMarks)
135                         : this (stream, encoding, detectEncodingFromByteOrderMarks, DefaultBufferSize) { }
136                 
137                 public StreamReader(Stream stream, Encoding encoding, bool detectEncodingFromByteOrderMarks, int bufferSize)
138                 {
139                         Initialize (stream, encoding, detectEncodingFromByteOrderMarks, bufferSize);
140                 }
141
142                 public StreamReader(string path)
143                         : this (path, Encoding.UTF8Unmarked, true, DefaultFileBufferSize) { }
144
145                 public StreamReader(string path, bool detectEncodingFromByteOrderMarks)
146                         : this (path, Encoding.UTF8Unmarked, detectEncodingFromByteOrderMarks, DefaultFileBufferSize) { }
147
148                 public StreamReader(string path, Encoding encoding)
149                         : this (path, encoding, true, DefaultFileBufferSize) { }
150
151                 public StreamReader(string path, Encoding encoding, bool detectEncodingFromByteOrderMarks)
152                         : this (path, encoding, detectEncodingFromByteOrderMarks, DefaultFileBufferSize) { }
153                 
154                 public StreamReader(string path, Encoding encoding, bool detectEncodingFromByteOrderMarks, int bufferSize)
155                 {
156                         if (null == path)
157                                 throw new ArgumentNullException("path");
158                         if (String.Empty == path)
159                                 throw new ArgumentException("Empty path not allowed");
160                         if (path.IndexOfAny (Path.InvalidPathChars) != -1)
161                                 throw new ArgumentException("path contains invalid characters");
162                         if (null == encoding)
163                                 throw new ArgumentNullException ("encoding");
164                         if (bufferSize <= 0)
165                                 throw new ArgumentOutOfRangeException ("bufferSize", "The minimum size of the buffer must be positive");
166
167                         Stream stream = (Stream) File.OpenRead (path);
168                         Initialize (stream, encoding, detectEncodingFromByteOrderMarks, bufferSize);
169                 }
170
171                 internal void Initialize (Stream stream, Encoding encoding, bool detectEncodingFromByteOrderMarks, int bufferSize)
172                 {
173                         if (null == stream)
174                                 throw new ArgumentNullException ("stream");
175                         if (null == encoding)
176                                 throw new ArgumentNullException ("encoding");
177                         if (!stream.CanRead)
178                                 throw new ArgumentException ("Cannot read stream");
179                         if (bufferSize <= 0)
180                                 throw new ArgumentOutOfRangeException ("bufferSize", "The minimum size of the buffer must be positive");
181
182                         if (bufferSize < MinimumBufferSize)
183                                 bufferSize = MinimumBufferSize;
184
185                         base_stream = stream;
186                         input_buffer = new byte [bufferSize];
187                         this.buffer_size = bufferSize;
188                         this.encoding = encoding;
189                         decoder = encoding.GetDecoder ();
190
191                         byte [] preamble = encoding.GetPreamble ();
192                         do_checks = detectEncodingFromByteOrderMarks ? 1 : 0;
193                         do_checks += (preamble.Length == 0) ? 0 : 2;
194                         
195                         // since GetChars() might add flushed character, it 
196                         // should have additional char buffer for extra 1 
197                         // (probably 1 is ok, but might be insufficient. I'm not sure)
198                         decoded_buffer = new char [encoding.GetMaxCharCount (bufferSize) + 1];
199                         decoded_count = 0;
200                         pos = 0;
201                 }
202
203                 public virtual Stream BaseStream
204                 {
205                         get {
206                                 return base_stream;
207                         }
208                 }
209
210                 public virtual Encoding CurrentEncoding
211                 {
212                         get {
213                                 if (encoding == null)
214                                         throw new Exception ();
215                                 return encoding;
216                         }
217                 }
218
219 #if NET_2_0
220                 public bool EndOfStream {
221                         get { return Peek () < 0; }
222                 }
223 #endif
224
225                 public override void Close ()
226                 {
227                         Dispose (true);
228                 }
229
230                 protected override void Dispose (bool disposing)
231                 {
232                         if (disposing && base_stream != null)
233                                 base_stream.Close ();
234                         
235                         input_buffer = null;
236                         decoded_buffer = null;
237                         encoding = null;
238                         decoder = null;
239                         base_stream = null;
240                         base.Dispose (disposing);
241                 }
242
243                 //
244                 // Provides auto-detection of the encoding, as well as skipping over
245                 // byte marks at the beginning of a stream.
246                 //
247                 int DoChecks (int count)
248                 {
249                         if ((do_checks & 2) == 2){
250                                 byte [] preamble = encoding.GetPreamble ();
251                                 int c = preamble.Length;
252                                 if (count >= c){
253                                         int i;
254                                         
255                                         for (i = 0; i < c; i++)
256                                                 if (input_buffer [i] != preamble [i])
257                                                         break;
258
259                                         if (i == c)
260                                                 return i;
261                                 }
262                         }
263
264                         if ((do_checks & 1) == 1){
265                                 if (count < 2)
266                                         return 0;
267
268 #if !NET_2_0
269                                 if (input_buffer [0] == 0xff && input_buffer [1] == 0xfe){
270                                         this.encoding = Encoding.Unicode;
271                                         return 2;
272                                 }
273 #endif
274
275                                 if (input_buffer [0] == 0xfe && input_buffer [1] == 0xff){
276                                         this.encoding = Encoding.BigEndianUnicode;
277                                         return 2;
278                                 }
279
280                                 if (count < 3)
281                                         return 0;
282
283                                 if (input_buffer [0] == 0xef && input_buffer [1] == 0xbb && input_buffer [2] == 0xbf){
284                                         this.encoding = Encoding.UTF8Unmarked;
285                                         return 3;
286                                 }
287
288 #if NET_2_0
289                                 if (count < 4) {
290                                         if (input_buffer [0] == 0xff && input_buffer [1] == 0xfe && input_buffer [2] != 0) {
291                                                 this.encoding = Encoding.Unicode;
292                                                 return 2;
293                                         }
294                                         return 0;
295                                 }
296
297                                 if (input_buffer [0] == 0 && input_buffer [1] == 0
298                                         && input_buffer [2] == 0xfe && input_buffer [3] == 0xff)
299                                 {
300                                         this.encoding = Encoding.BigEndianUTF32;
301                                         return 4;
302                                 }
303
304                                 if (input_buffer [0] == 0xff && input_buffer [1] == 0xfe) {
305                                         if (input_buffer [2] == 0 && input_buffer[3] == 0) {
306                                                 this.encoding = Encoding.UTF32;
307                                                 return 4;
308                                         }
309
310                                         this.encoding = Encoding.Unicode;
311                                         return 2;
312                                 }
313 #endif
314                         }
315
316                         return 0;
317                 }
318
319                 public void DiscardBufferedData ()
320                 {
321                         pos = decoded_count = 0;
322                         mayBlock = false;
323                         // Discard internal state of the decoder too.
324                         decoder = encoding.GetDecoder ();
325                 }
326                 
327                 // the buffer is empty, fill it again
328                 private int ReadBuffer ()
329                 {
330                         pos = 0;
331                         int cbEncoded = 0;
332
333                         // keep looping until the decoder gives us some chars
334                         decoded_count = 0;
335                         int parse_start = 0;
336                         do      
337                         {
338                                 cbEncoded = base_stream.Read (input_buffer, 0, buffer_size);
339                                 
340                                 if (cbEncoded <= 0)
341                                         return 0;
342
343                                 mayBlock = (cbEncoded < buffer_size);
344                                 if (do_checks > 0){
345                                         Encoding old = encoding;
346                                         parse_start = DoChecks (cbEncoded);
347                                         if (old != encoding){
348                                                 decoder = encoding.GetDecoder ();
349                                         }
350                                         do_checks = 0;
351                                         cbEncoded -= parse_start;
352                                 }
353                                 
354                                 decoded_count += decoder.GetChars (input_buffer, parse_start, cbEncoded, decoded_buffer, 0);
355                                 parse_start = 0;
356                         } while (decoded_count == 0);
357
358                         return decoded_count;
359                 }
360
361                 public override int Peek ()
362                 {
363                         if (base_stream == null)
364                                 throw new ObjectDisposedException ("StreamReader", "Cannot read from a closed StreamReader");
365                         if (pos >= decoded_count && (mayBlock || ReadBuffer () == 0))
366                                 return -1;
367
368                         return decoded_buffer [pos];
369                 }
370
371                 public override int Read ()
372                 {
373                         if (base_stream == null)
374                                 throw new ObjectDisposedException ("StreamReader", "Cannot read from a closed StreamReader");
375                         if (pos >= decoded_count && ReadBuffer () == 0)
376                                 return -1;
377
378                         return decoded_buffer [pos++];
379                 }
380
381                 public override int Read ([In, Out] char[] buffer, int index, int count)
382                 {
383                         if (base_stream == null)
384                                 throw new ObjectDisposedException ("StreamReader", "Cannot read from a closed StreamReader");
385                         if (buffer == null)
386                                 throw new ArgumentNullException ("buffer");
387                         if (index < 0)
388                                 throw new ArgumentOutOfRangeException ("index", "< 0");
389                         if (count < 0)
390                                 throw new ArgumentOutOfRangeException ("count", "< 0");
391                         // re-ordered to avoid possible integer overflow
392                         if (index > buffer.Length - count)
393                                 throw new ArgumentException ("index + count > buffer.Length");
394
395                         int chars_read = 0;
396                         while (count > 0)
397                         {
398                                 if (pos >= decoded_count && ReadBuffer () == 0)
399                                         return chars_read > 0 ? chars_read : 0;
400
401                                 int cch = Math.Min (decoded_count - pos, count);
402                                 Array.Copy (decoded_buffer, pos, buffer, index, cch);
403                                 pos += cch;
404                                 index += cch;
405                                 count -= cch;
406                                 chars_read += cch;
407                                 if (mayBlock)
408                                         break;
409                         }
410                         return chars_read;
411                 }
412
413                 bool foundCR;
414                 int FindNextEOL ()
415                 {
416                         char c = '\0';
417                         for (; pos < decoded_count; pos++) {
418                                 c = decoded_buffer [pos];
419                                 if (c == '\n') {
420                                         pos++;
421                                         int res = (foundCR) ? (pos - 2) : (pos - 1);
422                                         if (res < 0)
423                                                 res = 0; // if a new buffer starts with a \n and there was a \r at
424                                                         // the end of the previous one, we get here.
425                                         foundCR = false;
426                                         return res;
427                                 } else if (foundCR) {
428                                         foundCR = false;
429                                         if (pos == 0)
430                                                 return -2; // Need to flush the current buffered line.
431                                                            // This is a \r at the end of the previous decoded buffer that
432                                                            // is not followed by a \n in the current decoded buffer.
433                                         return pos - 1;
434                                 }
435
436                                 foundCR = (c == '\r');
437                         }
438
439                         return -1;
440                 }
441
442                 public override string ReadLine()
443                 {
444                         if (base_stream == null)
445                                 throw new ObjectDisposedException ("StreamReader", "Cannot read from a closed StreamReader");
446
447                         if (pos >= decoded_count && ReadBuffer () == 0)
448                                 return null;
449
450                         int begin = pos;
451                         int end = FindNextEOL ();
452                         if (end < decoded_count && end >= begin)
453                                 return new string (decoded_buffer, begin, end - begin);
454                         else if (end == -2)
455                                 return line_builder.ToString (0, line_builder.Length);
456
457                         if (line_builder == null)
458                                 line_builder = new StringBuilder ();
459                         else
460                                 line_builder.Length = 0;
461
462                         while (true) {
463                                 if (foundCR) // don't include the trailing CR if present
464                                         decoded_count--;
465
466                                 line_builder.Append (decoded_buffer, begin, decoded_count - begin);
467                                 if (ReadBuffer () == 0) {
468                                         if (line_builder.Capacity > 32768) {
469                                                 StringBuilder sb = line_builder;
470                                                 line_builder = null;
471                                                 return sb.ToString (0, sb.Length);
472                                         }
473                                         return line_builder.ToString (0, line_builder.Length);
474                                 }
475
476                                 begin = pos;
477                                 end = FindNextEOL ();
478                                 if (end < decoded_count && end >= begin) {
479                                         line_builder.Append (decoded_buffer, begin, end - begin);
480                                         if (line_builder.Capacity > 32768) {
481                                                 StringBuilder sb = line_builder;
482                                                 line_builder = null;
483                                                 return sb.ToString (0, sb.Length);
484                                         }
485                                         return line_builder.ToString (0, line_builder.Length);
486                                 } else if (end == -2)
487                                         return line_builder.ToString (0, line_builder.Length);
488                         }
489                 }
490
491                 public override string ReadToEnd()
492                 {
493                         if (base_stream == null)
494                                 throw new ObjectDisposedException ("StreamReader", "Cannot read from a closed StreamReader");
495
496                         StringBuilder text = new StringBuilder ();
497
498                         int size = decoded_buffer.Length;
499                         char [] buffer = new char [size];
500                         int len;
501                         
502                         while ((len = Read (buffer, 0, size)) > 0)
503                                 text.Append (buffer, 0, len);
504
505                         return text.ToString ();
506                 }
507         }
508 }