New test.
[mono.git] / mcs / class / corlib / System.IO / StreamReader.cs
1 //
2 // System.IO.StreamReader.cs
3 //
4 // Author:
5 //   Dietmar Maurer (dietmar@ximian.com)
6 //   Miguel de Icaza (miguel@ximian.com) 
7 //   Marek Safar (marek.safar@gmail.com)
8 //
9 // (C) Ximian, Inc.  http://www.ximian.com
10 // Copyright (C) 2004 Novell (http://www.novell.com)
11 //
12
13 //
14 // Copyright (C) 2004 Novell, Inc (http://www.novell.com)
15 //
16 // Permission is hereby granted, free of charge, to any person obtaining
17 // a copy of this software and associated documentation files (the
18 // "Software"), to deal in the Software without restriction, including
19 // without limitation the rights to use, copy, modify, merge, publish,
20 // distribute, sublicense, and/or sell copies of the Software, and to
21 // permit persons to whom the Software is furnished to do so, subject to
22 // the following conditions:
23 // 
24 // The above copyright notice and this permission notice shall be
25 // included in all copies or substantial portions of the Software.
26 // 
27 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
28 // EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
29 // MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
30 // NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
31 // LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
32 // OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
33 // WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
34 //
35
36 using System;
37 using System.Text;
38 using System.Runtime.InteropServices;
39
40 namespace System.IO {
41         [Serializable]
42         [ComVisible (true)]
43         public class StreamReader : TextReader {
44
45                 const int DefaultBufferSize = 1024;
46                 const int DefaultFileBufferSize = 4096;
47                 const int MinimumBufferSize = 128;
48
49                 //
50                 // The input buffer
51                 //
52                 byte [] input_buffer;
53                 
54                 // Input buffer ready for recycling
55                 static byte [] input_buffer_recycle;
56                 static object input_buffer_recycle_lock = new object ();
57
58                 //
59                 // The decoded buffer from the above input buffer
60                 //
61                 char [] decoded_buffer;
62                 static char[] decoded_buffer_recycle;
63
64                 //
65                 // Decoded bytes in decoded_buffer.
66                 //
67                 int decoded_count;
68
69                 //
70                 // Current position in the decoded_buffer
71                 //
72                 int pos;
73
74                 //
75                 // The buffer size that we are using
76                 //
77                 int buffer_size;
78
79                 int do_checks;
80                 
81                 Encoding encoding;
82                 Decoder decoder;
83
84                 Stream base_stream;
85                 bool mayBlock;
86                 StringBuilder line_builder;
87
88                 private class NullStreamReader : StreamReader {
89                         public override int Peek ()
90                         {
91                                 return -1;
92                         }
93
94                         public override int Read ()
95                         {
96                                 return -1;
97                         }
98
99                         public override int Read ([In, Out] char[] buffer, int index, int count)
100                         {
101                                 return 0;
102                         }
103
104                         public override string ReadLine ()
105                         {
106                                 return null;
107                         }
108
109                         public override string ReadToEnd ()
110                         {
111                                 return String.Empty;
112                         }
113
114                         public override Stream BaseStream
115                         {
116                                 get { return Stream.Null; }
117                         }
118
119                         public override Encoding CurrentEncoding
120                         {
121                                 get { return Encoding.Unicode; }
122                         }
123                 }
124
125                 public new static readonly StreamReader Null =  new NullStreamReader ();
126                 
127                 internal StreamReader() {}
128
129                 public StreamReader(Stream stream)
130                         : this (stream, Encoding.UTF8Unmarked, true, DefaultBufferSize) { }
131
132                 public StreamReader(Stream stream, bool detectEncodingFromByteOrderMarks)
133                         : this (stream, Encoding.UTF8Unmarked, detectEncodingFromByteOrderMarks, DefaultBufferSize) { }
134
135                 public StreamReader(Stream stream, Encoding encoding)
136                         : this (stream, encoding, true, DefaultBufferSize) { }
137
138                 public StreamReader(Stream stream, Encoding encoding, bool detectEncodingFromByteOrderMarks)
139                         : this (stream, encoding, detectEncodingFromByteOrderMarks, DefaultBufferSize) { }
140                 
141                 public StreamReader(Stream stream, Encoding encoding, bool detectEncodingFromByteOrderMarks, int bufferSize)
142                 {
143                         Initialize (stream, encoding, detectEncodingFromByteOrderMarks, bufferSize);
144                 }
145
146                 public StreamReader(string path)
147                         : this (path, Encoding.UTF8Unmarked, true, DefaultFileBufferSize) { }
148
149                 public StreamReader(string path, bool detectEncodingFromByteOrderMarks)
150                         : this (path, Encoding.UTF8Unmarked, detectEncodingFromByteOrderMarks, DefaultFileBufferSize) { }
151
152                 public StreamReader(string path, Encoding encoding)
153                         : this (path, encoding, true, DefaultFileBufferSize) { }
154
155                 public StreamReader(string path, Encoding encoding, bool detectEncodingFromByteOrderMarks)
156                         : this (path, encoding, detectEncodingFromByteOrderMarks, DefaultFileBufferSize) { }
157                 
158                 public StreamReader(string path, Encoding encoding, bool detectEncodingFromByteOrderMarks, int bufferSize)
159                 {
160                         if (null == path)
161                                 throw new ArgumentNullException("path");
162                         if (String.Empty == path)
163                                 throw new ArgumentException("Empty path not allowed");
164                         if (path.IndexOfAny (Path.InvalidPathChars) != -1)
165                                 throw new ArgumentException("path contains invalid characters");
166                         if (null == encoding)
167                                 throw new ArgumentNullException ("encoding");
168                         if (bufferSize <= 0)
169                                 throw new ArgumentOutOfRangeException ("bufferSize", "The minimum size of the buffer must be positive");
170
171                         Stream stream = (Stream) File.OpenRead (path);
172                         Initialize (stream, encoding, detectEncodingFromByteOrderMarks, bufferSize);
173                 }
174
175                 internal void Initialize (Stream stream, Encoding encoding, bool detectEncodingFromByteOrderMarks, int bufferSize)
176                 {
177                         if (null == stream)
178                                 throw new ArgumentNullException ("stream");
179                         if (null == encoding)
180                                 throw new ArgumentNullException ("encoding");
181                         if (!stream.CanRead)
182                                 throw new ArgumentException ("Cannot read stream");
183                         if (bufferSize <= 0)
184                                 throw new ArgumentOutOfRangeException ("bufferSize", "The minimum size of the buffer must be positive");
185
186                         if (bufferSize < MinimumBufferSize)
187                                 bufferSize = MinimumBufferSize;
188                         
189                         // since GetChars() might add flushed character, it 
190                         // should have additional char buffer for extra 1 
191                         // (probably 1 is ok, but might be insufficient. I'm not sure)
192                         var decoded_buffer_size = encoding.GetMaxCharCount (bufferSize) + 1;
193
194                         //
195                         // Instead of allocating a new default buffer use the
196                         // last one if there is any available
197                         //
198                         if (bufferSize <= DefaultBufferSize && input_buffer_recycle != null) {
199                                 lock (input_buffer_recycle_lock) {
200                                         if (input_buffer_recycle != null) {
201                                                 input_buffer = input_buffer_recycle;
202                                                 input_buffer_recycle = null;
203                                         }
204                                         
205                                         if (decoded_buffer_recycle != null && decoded_buffer_size <= decoded_buffer_recycle.Length) {
206                                                 decoded_buffer = decoded_buffer_recycle;
207                                                 decoded_buffer_recycle = null;
208                                         }
209                                 }
210                         }
211                         
212                         if (input_buffer == null)
213                                 input_buffer = new byte [bufferSize];
214                         else
215                                 Array.Clear (input_buffer, 0, bufferSize);
216                         
217                         if (decoded_buffer == null)
218                                 decoded_buffer = new char [decoded_buffer_size];
219                         else
220                                 Array.Clear (decoded_buffer, 0, decoded_buffer_size);
221
222                         base_stream = stream;           
223                         this.buffer_size = bufferSize;
224                         this.encoding = encoding;
225                         decoder = encoding.GetDecoder ();
226
227                         byte [] preamble = encoding.GetPreamble ();
228                         do_checks = detectEncodingFromByteOrderMarks ? 1 : 0;
229                         do_checks += (preamble.Length == 0) ? 0 : 2;
230                         
231                         decoded_count = 0;
232                         pos = 0;
233                 }
234
235                 public virtual Stream BaseStream
236                 {
237                         get {
238                                 return base_stream;
239                         }
240                 }
241
242                 public virtual Encoding CurrentEncoding
243                 {
244                         get {
245                                 if (encoding == null)
246                                         throw new Exception ();
247                                 return encoding;
248                         }
249                 }
250
251                 public bool EndOfStream {
252                         get { return Peek () < 0; }
253                 }
254
255                 public override void Close ()
256                 {
257                         Dispose (true);
258                 }
259
260                 protected override void Dispose (bool disposing)
261                 {
262                         if (disposing && base_stream != null)
263                                 base_stream.Close ();
264                         
265                         if (input_buffer != null && input_buffer.Length == DefaultBufferSize && input_buffer_recycle == null) {
266                                 lock (input_buffer_recycle_lock) {
267                                         if (input_buffer_recycle == null) {
268                                                 input_buffer_recycle = input_buffer;
269                                         }
270                                         
271                                         if (decoded_buffer_recycle == null) {
272                                                 decoded_buffer_recycle = decoded_buffer;
273                                         }
274                                 }
275                         }
276                         
277                         input_buffer = null;
278                         decoded_buffer = null;
279                         encoding = null;
280                         decoder = null;
281                         base_stream = null;
282                         base.Dispose (disposing);
283                 }
284
285                 //
286                 // Provides auto-detection of the encoding, as well as skipping over
287                 // byte marks at the beginning of a stream.
288                 //
289                 int DoChecks (int count)
290                 {
291                         if ((do_checks & 2) == 2){
292                                 byte [] preamble = encoding.GetPreamble ();
293                                 int c = preamble.Length;
294                                 if (count >= c){
295                                         int i;
296                                         
297                                         for (i = 0; i < c; i++)
298                                                 if (input_buffer [i] != preamble [i])
299                                                         break;
300
301                                         if (i == c)
302                                                 return i;
303                                 }
304                         }
305
306                         if ((do_checks & 1) == 1){
307                                 if (count < 2)
308                                         return 0;
309
310                                 if (input_buffer [0] == 0xfe && input_buffer [1] == 0xff){
311                                         this.encoding = Encoding.BigEndianUnicode;
312                                         return 2;
313                                 }
314                                 if (input_buffer [0] == 0xff && input_buffer [1] == 0xfe && count < 4) {
315                                         // If we don't have enough bytes we can't check for UTF32, so use Unicode
316                                         this.encoding = Encoding.Unicode;
317                                         return 2;
318                                 }
319
320                                 if (count < 3)
321                                         return 0;
322
323                                 if (input_buffer [0] == 0xef && input_buffer [1] == 0xbb && input_buffer [2] == 0xbf){
324                                         this.encoding = Encoding.UTF8Unmarked;
325                                         return 3;
326                                 }
327
328                                 if (count < 4) {
329                                         if (input_buffer [0] == 0xff && input_buffer [1] == 0xfe && input_buffer [2] != 0) {
330                                                 this.encoding = Encoding.Unicode;
331                                                 return 2;
332                                         }
333                                         return 0;
334                                 }
335
336                                 if (input_buffer [0] == 0 && input_buffer [1] == 0
337                                         && input_buffer [2] == 0xfe && input_buffer [3] == 0xff)
338                                 {
339                                         this.encoding = Encoding.BigEndianUTF32;
340                                         return 4;
341                                 }
342
343                                 if (input_buffer [0] == 0xff && input_buffer [1] == 0xfe) {
344                                         if (input_buffer [2] == 0 && input_buffer[3] == 0) {
345                                                 this.encoding = Encoding.UTF32;
346                                                 return 4;
347                                         }
348
349                                         this.encoding = Encoding.Unicode;
350                                         return 2;
351                                 }
352                         }
353
354                         return 0;
355                 }
356
357                 public void DiscardBufferedData ()
358                 {
359                         pos = decoded_count = 0;
360                         mayBlock = false;
361                         // Discard internal state of the decoder too.
362                         decoder = encoding.GetDecoder ();
363                 }
364                 
365                 // the buffer is empty, fill it again
366                 private int ReadBuffer ()
367                 {
368                         pos = 0;
369                         int cbEncoded = 0;
370
371                         // keep looping until the decoder gives us some chars
372                         decoded_count = 0;
373                         int parse_start = 0;
374                         do      
375                         {
376                                 cbEncoded = base_stream.Read (input_buffer, 0, buffer_size);
377                                 
378                                 if (cbEncoded <= 0)
379                                         return 0;
380
381                                 mayBlock = (cbEncoded < buffer_size);
382                                 if (do_checks > 0){
383                                         Encoding old = encoding;
384                                         parse_start = DoChecks (cbEncoded);
385                                         if (old != encoding){
386                                                 int old_decoded_size = old.GetMaxCharCount (buffer_size) + 1;
387                                                 int new_decoded_size = encoding.GetMaxCharCount (buffer_size) + 1;
388                                                 if (old_decoded_size != new_decoded_size)
389                                                         decoded_buffer = new char [new_decoded_size];
390                                                 decoder = encoding.GetDecoder ();
391                                         }
392                                         do_checks = 0;
393                                         cbEncoded -= parse_start;
394                                 }
395                                 
396                                 decoded_count += decoder.GetChars (input_buffer, parse_start, cbEncoded, decoded_buffer, 0);
397                                 parse_start = 0;
398                         } while (decoded_count == 0);
399
400                         return decoded_count;
401                 }
402
403                 //
404                 // Peek can block:
405                 // http://connect.microsoft.com/VisualStudio/feedback/ViewFeedback.aspx?FeedbackID=96484
406                 //
407                 public override int Peek ()
408                 {
409                         if (base_stream == null)
410                                 throw new ObjectDisposedException ("StreamReader", "Cannot read from a closed StreamReader");
411                         if (pos >= decoded_count && ReadBuffer () == 0)
412                                 return -1;
413
414                         return decoded_buffer [pos];
415                 }
416
417                 //
418                 // Used internally by our console, as it previously depended on Peek() being a
419                 // routine that would not block.
420                 //
421                 internal bool DataAvailable ()
422                 {
423                         return pos < decoded_count;
424                 }
425                 
426                 public override int Read ()
427                 {
428                         if (base_stream == null)
429                                 throw new ObjectDisposedException ("StreamReader", "Cannot read from a closed StreamReader");
430                         if (pos >= decoded_count && ReadBuffer () == 0)
431                                 return -1;
432
433                         return decoded_buffer [pos++];
434                 }
435
436                 public override int Read ([In, Out] char[] buffer, int index, int count)
437                 {
438                         if (base_stream == null)
439                                 throw new ObjectDisposedException ("StreamReader", "Cannot read from a closed StreamReader");
440                         if (buffer == null)
441                                 throw new ArgumentNullException ("buffer");
442                         if (index < 0)
443                                 throw new ArgumentOutOfRangeException ("index", "< 0");
444                         if (count < 0)
445                                 throw new ArgumentOutOfRangeException ("count", "< 0");
446                         // re-ordered to avoid possible integer overflow
447                         if (index > buffer.Length - count)
448                                 throw new ArgumentException ("index + count > buffer.Length");
449
450                         int chars_read = 0;
451                         while (count > 0)
452                         {
453                                 if (pos >= decoded_count && ReadBuffer () == 0)
454                                         return chars_read > 0 ? chars_read : 0;
455
456                                 int cch = Math.Min (decoded_count - pos, count);
457                                 Array.Copy (decoded_buffer, pos, buffer, index, cch);
458                                 pos += cch;
459                                 index += cch;
460                                 count -= cch;
461                                 chars_read += cch;
462                                 if (mayBlock)
463                                         break;
464                         }
465                         return chars_read;
466                 }
467
468                 bool foundCR;
469                 int FindNextEOL ()
470                 {
471                         char c = '\0';
472                         for (; pos < decoded_count; pos++) {
473                                 c = decoded_buffer [pos];
474                                 if (c == '\n') {
475                                         pos++;
476                                         int res = (foundCR) ? (pos - 2) : (pos - 1);
477                                         if (res < 0)
478                                                 res = 0; // if a new buffer starts with a \n and there was a \r at
479                                                         // the end of the previous one, we get here.
480                                         foundCR = false;
481                                         return res;
482                                 } else if (foundCR) {
483                                         foundCR = false;
484                                         if (pos == 0)
485                                                 return -2; // Need to flush the current buffered line.
486                                                            // This is a \r at the end of the previous decoded buffer that
487                                                            // is not followed by a \n in the current decoded buffer.
488                                         return pos - 1;
489                                 }
490
491                                 foundCR = (c == '\r');
492                         }
493
494                         return -1;
495                 }
496
497                 public override string ReadLine()
498                 {
499                         if (base_stream == null)
500                                 throw new ObjectDisposedException ("StreamReader", "Cannot read from a closed StreamReader");
501
502                         if (pos >= decoded_count && ReadBuffer () == 0)
503                                 return null;
504
505                         int begin = pos;
506                         int end = FindNextEOL ();
507                         if (end < decoded_count && end >= begin)
508                                 return new string (decoded_buffer, begin, end - begin);
509                         else if (end == -2)
510                                 return line_builder.ToString (0, line_builder.Length);
511
512                         if (line_builder == null)
513                                 line_builder = new StringBuilder ();
514                         else
515                                 line_builder.Length = 0;
516
517                         while (true) {
518                                 if (foundCR) // don't include the trailing CR if present
519                                         decoded_count--;
520
521                                 line_builder.Append (decoded_buffer, begin, decoded_count - begin);
522                                 if (ReadBuffer () == 0) {
523                                         if (line_builder.Capacity > 32768) {
524                                                 StringBuilder sb = line_builder;
525                                                 line_builder = null;
526                                                 return sb.ToString (0, sb.Length);
527                                         }
528                                         return line_builder.ToString (0, line_builder.Length);
529                                 }
530
531                                 begin = pos;
532                                 end = FindNextEOL ();
533                                 if (end < decoded_count && end >= begin) {
534                                         line_builder.Append (decoded_buffer, begin, end - begin);
535                                         if (line_builder.Capacity > 32768) {
536                                                 StringBuilder sb = line_builder;
537                                                 line_builder = null;
538                                                 return sb.ToString (0, sb.Length);
539                                         }
540                                         return line_builder.ToString (0, line_builder.Length);
541                                 } else if (end == -2)
542                                         return line_builder.ToString (0, line_builder.Length);
543                         }
544                 }
545
546                 public override string ReadToEnd()
547                 {
548                         if (base_stream == null)
549                                 throw new ObjectDisposedException ("StreamReader", "Cannot read from a closed StreamReader");
550
551                         StringBuilder text = new StringBuilder ();
552
553                         int size = decoded_buffer.Length;
554                         char [] buffer = new char [size];
555                         int len;
556                         
557                         while ((len = Read (buffer, 0, size)) > 0)
558                                 text.Append (buffer, 0, len);
559
560                         return text.ToString ();
561                 }
562         }
563 }