Merge pull request #167 from konrad-kruczynski/send_async_fix
[mono.git] / mcs / class / corlib / System.IO / StreamReader.cs
1 //
2 // System.IO.StreamReader.cs
3 //
4 // Authors:
5 //   Dietmar Maurer (dietmar@ximian.com)
6 //   Miguel de Icaza (miguel@ximian.com) 
7 //   Marek Safar (marek.safar@gmail.com)
8 //
9 // (C) Ximian, Inc.  http://www.ximian.com
10 // Copyright (C) 2004 Novell (http://www.novell.com)
11 // Copyright 2011 Xamarin Inc.
12 //
13 // Permission is hereby granted, free of charge, to any person obtaining
14 // a copy of this software and associated documentation files (the
15 // "Software"), to deal in the Software without restriction, including
16 // without limitation the rights to use, copy, modify, merge, publish,
17 // distribute, sublicense, and/or sell copies of the Software, and to
18 // permit persons to whom the Software is furnished to do so, subject to
19 // the following conditions:
20 // 
21 // The above copyright notice and this permission notice shall be
22 // included in all copies or substantial portions of the Software.
23 // 
24 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
25 // EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
26 // MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
27 // NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
28 // LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
29 // OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
30 // WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
31 //
32
33 using System;
34 using System.Text;
35 using System.Runtime.InteropServices;
36 #if NET_4_5
37 using System.Threading.Tasks;
38 #endif
39
40 namespace System.IO {
41         [Serializable]
42         [ComVisible (true)]
43         public class StreamReader : TextReader
44         {
45                 sealed class NullStreamReader : StreamReader
46                 {
47                         public override int Peek ()
48                         {
49                                 return -1;
50                         }
51
52                         public override int Read ()
53                         {
54                                 return -1;
55                         }
56
57                         public override int Read ([In, Out] char[] buffer, int index, int count)
58                         {
59                                 return 0;
60                         }
61
62                         public override string ReadLine ()
63                         {
64                                 return null;
65                         }
66
67                         public override string ReadToEnd ()
68                         {
69                                 return String.Empty;
70                         }
71
72                         public override Stream BaseStream {
73                                 get { return Stream.Null; }
74                         }
75
76                         public override Encoding CurrentEncoding {
77                                 get { return Encoding.Unicode; }
78                         }
79                 }
80
81                 const int DefaultBufferSize = 1024;
82                 const int DefaultFileBufferSize = 4096;
83                 const int MinimumBufferSize = 128;
84
85                 //
86                 // The input buffer
87                 //
88                 byte [] input_buffer;
89                 
90                 // Input buffer ready for recycling
91                 static byte [] input_buffer_recycle;
92                 static object input_buffer_recycle_lock = new object ();
93
94                 //
95                 // The decoded buffer from the above input buffer
96                 //
97                 char [] decoded_buffer;
98                 static char[] decoded_buffer_recycle;
99
100                 Encoding encoding;
101                 Decoder decoder;
102                 StringBuilder line_builder;
103                 Stream base_stream;
104
105                 //
106                 // Decoded bytes in decoded_buffer.
107                 //
108                 int decoded_count;
109
110                 //
111                 // Current position in the decoded_buffer
112                 //
113                 int pos;
114
115                 //
116                 // The buffer size that we are using
117                 //
118                 int buffer_size;
119
120                 int do_checks;
121                 
122                 bool mayBlock;
123
124 #if NET_4_5
125                 Task async_task;
126                 readonly bool leave_open;
127 #endif
128
129                 public new static readonly StreamReader Null =  new NullStreamReader ();
130                 
131                 private StreamReader() {}
132
133                 public StreamReader(Stream stream)
134                         : this (stream, Encoding.UTF8Unmarked, true, DefaultBufferSize) { }
135
136                 public StreamReader(Stream stream, bool detectEncodingFromByteOrderMarks)
137                         : this (stream, Encoding.UTF8Unmarked, detectEncodingFromByteOrderMarks, DefaultBufferSize) { }
138
139                 public StreamReader(Stream stream, Encoding encoding)
140                         : this (stream, encoding, true, DefaultBufferSize) { }
141
142                 public StreamReader(Stream stream, Encoding encoding, bool detectEncodingFromByteOrderMarks)
143                         : this (stream, encoding, detectEncodingFromByteOrderMarks, DefaultBufferSize) { }
144
145 #if NET_4_5
146                 public StreamReader(Stream stream, Encoding encoding, bool detectEncodingFromByteOrderMarks, int bufferSize)
147                         : this (stream, encoding, detectEncodingFromByteOrderMarks, bufferSize, false)
148                 {
149                 }
150
151                 public StreamReader(Stream stream, Encoding encoding, bool detectEncodingFromByteOrderMarks, int bufferSize, bool leaveOpen)
152 #else
153                 const bool leave_open = false;
154
155                 public StreamReader(Stream stream, Encoding encoding, bool detectEncodingFromByteOrderMarks, int bufferSize)
156 #endif
157                 {
158 #if NET_4_5
159                         leave_open = leaveOpen;
160 #endif
161                         Initialize (stream, encoding, detectEncodingFromByteOrderMarks, bufferSize);
162                 }
163
164                 public StreamReader(string path)
165                         : this (path, Encoding.UTF8Unmarked, true, DefaultFileBufferSize) { }
166
167                 public StreamReader(string path, bool detectEncodingFromByteOrderMarks)
168                         : this (path, Encoding.UTF8Unmarked, detectEncodingFromByteOrderMarks, DefaultFileBufferSize) { }
169
170                 public StreamReader(string path, Encoding encoding)
171                         : this (path, encoding, true, DefaultFileBufferSize) { }
172
173                 public StreamReader(string path, Encoding encoding, bool detectEncodingFromByteOrderMarks)
174                         : this (path, encoding, detectEncodingFromByteOrderMarks, DefaultFileBufferSize) { }
175                 
176                 public StreamReader(string path, Encoding encoding, bool detectEncodingFromByteOrderMarks, int bufferSize)
177                 {
178                         if (null == path)
179                                 throw new ArgumentNullException("path");
180                         if (String.Empty == path)
181                                 throw new ArgumentException("Empty path not allowed");
182                         if (path.IndexOfAny (Path.InvalidPathChars) != -1)
183                                 throw new ArgumentException("path contains invalid characters");
184                         if (null == encoding)
185                                 throw new ArgumentNullException ("encoding");
186                         if (bufferSize <= 0)
187                                 throw new ArgumentOutOfRangeException ("bufferSize", "The minimum size of the buffer must be positive");
188
189                         Stream stream = (Stream) File.OpenRead (path);
190                         Initialize (stream, encoding, detectEncodingFromByteOrderMarks, bufferSize);
191                 }
192
193                 internal void Initialize (Stream stream, Encoding encoding, bool detectEncodingFromByteOrderMarks, int bufferSize)
194                 {
195                         if (null == stream)
196                                 throw new ArgumentNullException ("stream");
197                         if (null == encoding)
198                                 throw new ArgumentNullException ("encoding");
199                         if (!stream.CanRead)
200                                 throw new ArgumentException ("Cannot read stream");
201                         if (bufferSize <= 0)
202                                 throw new ArgumentOutOfRangeException ("bufferSize", "The minimum size of the buffer must be positive");
203
204                         if (bufferSize < MinimumBufferSize)
205                                 bufferSize = MinimumBufferSize;
206                         
207                         // since GetChars() might add flushed character, it 
208                         // should have additional char buffer for extra 1 
209                         // (probably 1 is ok, but might be insufficient. I'm not sure)
210                         var decoded_buffer_size = encoding.GetMaxCharCount (bufferSize) + 1;
211
212                         //
213                         // Instead of allocating a new default buffer use the
214                         // last one if there is any available
215                         //
216                         if (bufferSize <= DefaultBufferSize && input_buffer_recycle != null) {
217                                 lock (input_buffer_recycle_lock) {
218                                         if (input_buffer_recycle != null) {
219                                                 input_buffer = input_buffer_recycle;
220                                                 input_buffer_recycle = null;
221                                         }
222                                         
223                                         if (decoded_buffer_recycle != null && decoded_buffer_size <= decoded_buffer_recycle.Length) {
224                                                 decoded_buffer = decoded_buffer_recycle;
225                                                 decoded_buffer_recycle = null;
226                                         }
227                                 }
228                         }
229                         
230                         if (input_buffer == null)
231                                 input_buffer = new byte [bufferSize];
232                         else
233                                 Array.Clear (input_buffer, 0, bufferSize);
234                         
235                         if (decoded_buffer == null)
236                                 decoded_buffer = new char [decoded_buffer_size];
237                         else
238                                 Array.Clear (decoded_buffer, 0, decoded_buffer_size);
239
240                         base_stream = stream;           
241                         this.buffer_size = bufferSize;
242                         this.encoding = encoding;
243                         decoder = encoding.GetDecoder ();
244
245                         byte [] preamble = encoding.GetPreamble ();
246                         do_checks = detectEncodingFromByteOrderMarks ? 1 : 0;
247                         do_checks += (preamble.Length == 0) ? 0 : 2;
248                         
249                         decoded_count = 0;
250                         pos = 0;
251                 }
252
253                 public virtual Stream BaseStream {
254                         get {
255                                 return base_stream;
256                         }
257                 }
258
259                 public virtual Encoding CurrentEncoding {
260                         get {
261                                 if (encoding == null)
262                                         throw new Exception ();
263                                 return encoding;
264                         }
265                 }
266
267                 public bool EndOfStream {
268                         get { return Peek () < 0; }
269                 }
270
271                 public override void Close ()
272                 {
273                         Dispose (true);
274                 }
275
276                 protected override void Dispose (bool disposing)
277                 {
278                         if (disposing && base_stream != null && !leave_open)
279                                 base_stream.Close ();
280                         
281                         if (input_buffer != null && input_buffer.Length == DefaultBufferSize && input_buffer_recycle == null) {
282                                 lock (input_buffer_recycle_lock) {
283                                         if (input_buffer_recycle == null) {
284                                                 input_buffer_recycle = input_buffer;
285                                         }
286                                         
287                                         if (decoded_buffer_recycle == null) {
288                                                 decoded_buffer_recycle = decoded_buffer;
289                                         }
290                                 }
291                         }
292                         
293                         input_buffer = null;
294                         decoded_buffer = null;
295                         encoding = null;
296                         decoder = null;
297                         base_stream = null;
298                         base.Dispose (disposing);
299                 }
300
301                 //
302                 // Provides auto-detection of the encoding, as well as skipping over
303                 // byte marks at the beginning of a stream.
304                 //
305                 int DoChecks (int count)
306                 {
307                         if ((do_checks & 2) == 2){
308                                 byte [] preamble = encoding.GetPreamble ();
309                                 int c = preamble.Length;
310                                 if (count >= c){
311                                         int i;
312                                         
313                                         for (i = 0; i < c; i++)
314                                                 if (input_buffer [i] != preamble [i])
315                                                         break;
316
317                                         if (i == c)
318                                                 return i;
319                                 }
320                         }
321
322                         if ((do_checks & 1) == 1){
323                                 if (count < 2)
324                                         return 0;
325
326                                 if (input_buffer [0] == 0xfe && input_buffer [1] == 0xff){
327                                         this.encoding = Encoding.BigEndianUnicode;
328                                         return 2;
329                                 }
330                                 if (input_buffer [0] == 0xff && input_buffer [1] == 0xfe && count < 4) {
331                                         // If we don't have enough bytes we can't check for UTF32, so use Unicode
332                                         this.encoding = Encoding.Unicode;
333                                         return 2;
334                                 }
335
336                                 if (count < 3)
337                                         return 0;
338
339                                 if (input_buffer [0] == 0xef && input_buffer [1] == 0xbb && input_buffer [2] == 0xbf){
340                                         this.encoding = Encoding.UTF8Unmarked;
341                                         return 3;
342                                 }
343
344                                 if (count < 4) {
345                                         if (input_buffer [0] == 0xff && input_buffer [1] == 0xfe && input_buffer [2] != 0) {
346                                                 this.encoding = Encoding.Unicode;
347                                                 return 2;
348                                         }
349                                         return 0;
350                                 }
351
352                                 if (input_buffer [0] == 0 && input_buffer [1] == 0
353                                         && input_buffer [2] == 0xfe && input_buffer [3] == 0xff)
354                                 {
355                                         this.encoding = Encoding.BigEndianUTF32;
356                                         return 4;
357                                 }
358
359                                 if (input_buffer [0] == 0xff && input_buffer [1] == 0xfe) {
360                                         if (input_buffer [2] == 0 && input_buffer[3] == 0) {
361                                                 this.encoding = Encoding.UTF32;
362                                                 return 4;
363                                         }
364
365                                         this.encoding = Encoding.Unicode;
366                                         return 2;
367                                 }
368                         }
369
370                         return 0;
371                 }
372
373                 public void DiscardBufferedData ()
374                 {
375                         CheckState ();
376
377                         pos = decoded_count = 0;
378                         mayBlock = false;
379                         // Discard internal state of the decoder too.
380                         decoder = encoding.GetDecoder ();
381                 }
382                 
383                 // the buffer is empty, fill it again
384                 private int ReadBuffer ()
385                 {
386                         pos = 0;
387                         int cbEncoded = 0;
388
389                         // keep looping until the decoder gives us some chars
390                         decoded_count = 0;
391                         int parse_start = 0;
392                         do      
393                         {
394                                 cbEncoded = base_stream.Read (input_buffer, 0, buffer_size);
395                                 
396                                 if (cbEncoded <= 0)
397                                         return 0;
398
399                                 mayBlock = (cbEncoded < buffer_size);
400                                 if (do_checks > 0){
401                                         Encoding old = encoding;
402                                         parse_start = DoChecks (cbEncoded);
403                                         if (old != encoding){
404                                                 int old_decoded_size = old.GetMaxCharCount (buffer_size) + 1;
405                                                 int new_decoded_size = encoding.GetMaxCharCount (buffer_size) + 1;
406                                                 if (old_decoded_size != new_decoded_size)
407                                                         decoded_buffer = new char [new_decoded_size];
408                                                 decoder = encoding.GetDecoder ();
409                                         }
410                                         do_checks = 0;
411                                         cbEncoded -= parse_start;
412                                 }
413                                 
414                                 decoded_count += decoder.GetChars (input_buffer, parse_start, cbEncoded, decoded_buffer, 0);
415                                 parse_start = 0;
416                         } while (decoded_count == 0);
417
418                         return decoded_count;
419                 }
420
421                 //
422                 // Peek can block:
423                 // http://connect.microsoft.com/VisualStudio/feedback/ViewFeedback.aspx?FeedbackID=96484
424                 //
425                 public override int Peek ()
426                 {
427                         CheckState ();
428
429                         if (pos >= decoded_count && ReadBuffer () == 0)
430                                 return -1;
431
432                         return decoded_buffer [pos];
433                 }
434
435                 //
436                 // Used internally by our console, as it previously depended on Peek() being a
437                 // routine that would not block.
438                 //
439                 internal bool DataAvailable ()
440                 {
441                         return pos < decoded_count;
442                 }
443                 
444                 public override int Read ()
445                 {
446                         CheckState ();
447
448                         if (pos >= decoded_count && ReadBuffer () == 0)
449                                 return -1;
450
451                         return decoded_buffer [pos++];
452                 }
453
454                 public override int Read ([In, Out] char[] buffer, int index, int count)
455                 {
456                         if (buffer == null)
457                                 throw new ArgumentNullException ("buffer");
458                         if (index < 0)
459                                 throw new ArgumentOutOfRangeException ("index", "< 0");
460                         if (count < 0)
461                                 throw new ArgumentOutOfRangeException ("count", "< 0");
462                         // re-ordered to avoid possible integer overflow
463                         if (index > buffer.Length - count)
464                                 throw new ArgumentException ("index + count > buffer.Length");
465
466                         CheckState ();
467
468                         int chars_read = 0;
469                         while (count > 0)
470                         {
471                                 if (pos >= decoded_count && ReadBuffer () == 0)
472                                         return chars_read > 0 ? chars_read : 0;
473
474                                 int cch = Math.Min (decoded_count - pos, count);
475                                 Array.Copy (decoded_buffer, pos, buffer, index, cch);
476                                 pos += cch;
477                                 index += cch;
478                                 count -= cch;
479                                 chars_read += cch;
480                                 if (mayBlock)
481                                         break;
482                         }
483                         return chars_read;
484                 }
485
486                 bool foundCR;
487                 int FindNextEOL ()
488                 {
489                         char c = '\0';
490                         for (; pos < decoded_count; pos++) {
491                                 c = decoded_buffer [pos];
492                                 if (c == '\n') {
493                                         pos++;
494                                         int res = (foundCR) ? (pos - 2) : (pos - 1);
495                                         if (res < 0)
496                                                 res = 0; // if a new buffer starts with a \n and there was a \r at
497                                                         // the end of the previous one, we get here.
498                                         foundCR = false;
499                                         return res;
500                                 } else if (foundCR) {
501                                         foundCR = false;
502                                         if (pos == 0)
503                                                 return -2; // Need to flush the current buffered line.
504                                                            // This is a \r at the end of the previous decoded buffer that
505                                                            // is not followed by a \n in the current decoded buffer.
506                                         return pos - 1;
507                                 }
508
509                                 foundCR = (c == '\r');
510                         }
511
512                         return -1;
513                 }
514
515                 public override string ReadLine()
516                 {
517                         CheckState ();
518
519                         if (pos >= decoded_count && ReadBuffer () == 0)
520                                 return null;
521
522                         int begin = pos;
523                         int end = FindNextEOL ();
524                         if (end < decoded_count && end >= begin)
525                                 return new string (decoded_buffer, begin, end - begin);
526                         else if (end == -2)
527                                 return line_builder.ToString (0, line_builder.Length);
528
529                         if (line_builder == null)
530                                 line_builder = new StringBuilder ();
531                         else
532                                 line_builder.Length = 0;
533
534                         while (true) {
535                                 if (foundCR) // don't include the trailing CR if present
536                                         decoded_count--;
537
538                                 line_builder.Append (decoded_buffer, begin, decoded_count - begin);
539                                 if (ReadBuffer () == 0) {
540                                         if (line_builder.Capacity > 32768) {
541                                                 StringBuilder sb = line_builder;
542                                                 line_builder = null;
543                                                 return sb.ToString (0, sb.Length);
544                                         }
545                                         return line_builder.ToString (0, line_builder.Length);
546                                 }
547
548                                 begin = pos;
549                                 end = FindNextEOL ();
550                                 if (end < decoded_count && end >= begin) {
551                                         line_builder.Append (decoded_buffer, begin, end - begin);
552                                         if (line_builder.Capacity > 32768) {
553                                                 StringBuilder sb = line_builder;
554                                                 line_builder = null;
555                                                 return sb.ToString (0, sb.Length);
556                                         }
557                                         return line_builder.ToString (0, line_builder.Length);
558                                 } else if (end == -2)
559                                         return line_builder.ToString (0, line_builder.Length);
560                         }
561                 }
562
563                 public override string ReadToEnd()
564                 {
565                         CheckState ();
566
567                         StringBuilder text = new StringBuilder ();
568
569                         int size = decoded_buffer.Length;
570                         char [] buffer = new char [size];
571                         int len;
572                         
573                         while ((len = Read (buffer, 0, size)) > 0)
574                                 text.Append (buffer, 0, len);
575
576                         return text.ToString ();
577                 }
578
579                 void CheckState ()
580                 {
581                         if (base_stream == null)
582                                 throw new ObjectDisposedException ("StreamReader", "Cannot read from a closed StreamReader");
583
584 #if NET_4_5
585                         if (async_task != null && async_task.IsCompleted)
586                                 throw new InvalidOperationException ();
587 #endif
588                 }
589
590 #if NET_4_5
591                 public override int ReadBlock ([In, Out] char[] buffer, int index, int count)
592                 {
593                         if (buffer == null)
594                                 throw new ArgumentNullException ("buffer");
595                         if (index < 0)
596                                 throw new ArgumentOutOfRangeException ("index", "< 0");
597                         if (count < 0)
598                                 throw new ArgumentOutOfRangeException ("count", "< 0");
599                         // re-ordered to avoid possible integer overflow
600                         if (index > buffer.Length - count)
601                                 throw new ArgumentException ("index + count > buffer.Length");
602
603                         CheckState ();
604
605                         return base.ReadBlock (buffer, index, count);
606                 }
607
608                 public override Task<int> ReadAsync (char[] buffer, int index, int count)
609                 {
610                         CheckState ();
611
612                         Task<int> res;
613                         async_task = res = base.ReadAsync (buffer, index, count);
614                         return res;
615                 }
616
617                 public override Task<int> ReadBlockAsync (char[] buffer, int index, int count)
618                 {
619                         CheckState ();
620
621                         Task<int> res;
622                         async_task = res = base.ReadBlockAsync (buffer, index, count);
623                         return res;
624                 }
625
626                 public override Task<string> ReadLineAsync ()
627                 {
628                         CheckState ();
629
630                         Task<string> res;
631                         async_task = res = base.ReadLineAsync ();
632                         return res;
633                 }
634
635                 public override Task<string> ReadToEndAsync ()
636                 {
637                         CheckState ();
638
639                         Task<string> res;
640                         async_task = res = base.ReadToEndAsync ();
641                         return res;
642                 }
643
644 #endif
645         }
646 }