3 // Copyright (C) 2001 Mike Krueger
\r
4 // Copyright (C) 2004 John Reilly
\r
6 // This file was translated from java, it was part of the GNU Classpath
\r
7 // Copyright (C) 2001 Free Software Foundation, Inc.
\r
9 // This program is free software; you can redistribute it and/or
\r
10 // modify it under the terms of the GNU General Public License
\r
11 // as published by the Free Software Foundation; either version 2
\r
12 // of the License, or (at your option) any later version.
\r
14 // This program is distributed in the hope that it will be useful,
\r
15 // but WITHOUT ANY WARRANTY; without even the implied warranty of
\r
16 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
\r
17 // GNU General Public License for more details.
\r
19 // You should have received a copy of the GNU General Public License
\r
20 // along with this program; if not, write to the Free Software
\r
21 // Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
\r
23 // Linking this library statically or dynamically with other modules is
\r
24 // making a combined work based on this library. Thus, the terms and
\r
25 // conditions of the GNU General Public License cover the whole
\r
28 // As a special exception, the copyright holders of this library give you
\r
29 // permission to link this library with independent modules to produce an
\r
30 // executable, regardless of the license terms of these independent
\r
31 // modules, and to copy and distribute the resulting executable under
\r
32 // terms of your choice, provided that you also meet, for each linked
\r
33 // independent module, the terms and conditions of the license of that
\r
34 // module. An independent module is a module which is not derived from
\r
35 // or based on this library. If you modify this library, you may extend
\r
36 // this exception to your version of the library, but you are not
\r
37 // obligated to do so. If you do not wish to do so, delete this
\r
38 // exception statement from your version.
\r
42 using ICSharpCode.SharpZipLib.Checksums;
\r
43 using ICSharpCode.SharpZipLib.Zip.Compression.Streams;
\r
45 namespace ICSharpCode.SharpZipLib.Zip.Compression
\r
49 /// Inflater is used to decompress data that has been compressed according
\r
50 /// to the "deflate" standard described in rfc1951.
\r
52 /// By default Zlib (rfc1950) headers and footers are expected in the input.
\r
53 /// You can use constructor <code> public Inflater(bool noHeader)</code> passing true
\r
54 /// if there is no Zlib header information
\r
56 /// The usage is as following. First you have to set some input with
\r
57 /// <code>setInput()</code>, then inflate() it. If inflate doesn't
\r
58 /// inflate any bytes there may be three reasons:
\r
60 /// <li>needsInput() returns true because the input buffer is empty.
\r
61 /// You have to provide more input with <code>setInput()</code>.
\r
62 /// NOTE: needsInput() also returns true when, the stream is finished.
\r
64 /// <li>needsDictionary() returns true, you have to provide a preset
\r
65 /// dictionary with <code>setDictionary()</code>.</li>
\r
66 /// <li>finished() returns true, the inflater has finished.</li>
\r
68 /// Once the first output byte is produced, a dictionary will not be
\r
69 /// needed at a later stage.
\r
71 /// author of the original java version : John Leuner, Jochen Hoenicke
\r
73 public class Inflater
\r
76 /// Copy lengths for literal codes 257..285
\r
78 static int[] CPLENS = {
\r
79 3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 15, 17, 19, 23, 27, 31,
\r
80 35, 43, 51, 59, 67, 83, 99, 115, 131, 163, 195, 227, 258
\r
84 /// Extra bits for literal codes 257..285
\r
86 static int[] CPLEXT = {
\r
87 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2,
\r
88 3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5, 0
\r
92 /// Copy offsets for distance codes 0..29
\r
94 static int[] CPDIST = {
\r
95 1, 2, 3, 4, 5, 7, 9, 13, 17, 25, 33, 49, 65, 97, 129, 193,
\r
96 257, 385, 513, 769, 1025, 1537, 2049, 3073, 4097, 6145,
\r
97 8193, 12289, 16385, 24577
\r
101 /// Extra bits for distance codes
\r
103 static int[] CPDEXT = {
\r
104 0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6,
\r
105 7, 7, 8, 8, 9, 9, 10, 10, 11, 11,
\r
110 /// These are the possible states for an inflater
\r
112 const int DECODE_HEADER = 0;
\r
113 const int DECODE_DICT = 1;
\r
114 const int DECODE_BLOCKS = 2;
\r
115 const int DECODE_STORED_LEN1 = 3;
\r
116 const int DECODE_STORED_LEN2 = 4;
\r
117 const int DECODE_STORED = 5;
\r
118 const int DECODE_DYN_HEADER = 6;
\r
119 const int DECODE_HUFFMAN = 7;
\r
120 const int DECODE_HUFFMAN_LENBITS = 8;
\r
121 const int DECODE_HUFFMAN_DIST = 9;
\r
122 const int DECODE_HUFFMAN_DISTBITS = 10;
\r
123 const int DECODE_CHKSUM = 11;
\r
124 const int FINISHED = 12;
\r
127 /// This variable contains the current state.
\r
132 /// The adler checksum of the dictionary or of the decompressed
\r
133 /// stream, as it is written in the header resp. footer of the
\r
134 /// compressed stream.
\r
135 /// Only valid if mode is DECODE_DICT or DECODE_CHKSUM.
\r
140 /// The number of bits needed to complete the current state. This
\r
141 /// is valid, if mode is DECODE_DICT, DECODE_CHKSUM,
\r
142 /// DECODE_HUFFMAN_LENBITS or DECODE_HUFFMAN_DISTBITS.
\r
150 /// True, if the last block flag was set in the last block of the
\r
151 /// inflated stream. This means that the stream ends after the
\r
157 /// The total number of inflated bytes.
\r
162 /// The total number of bytes set with setInput(). This is not the
\r
163 /// value returned by the TotalIn property, since this also includes the
\r
164 /// unprocessed input.
\r
169 /// This variable stores the noHeader flag that was given to the constructor.
\r
170 /// True means, that the inflated stream doesn't contain a Zlib header or
\r
175 StreamManipulator input;
\r
176 OutputWindow outputWindow;
\r
177 InflaterDynHeader dynHeader;
\r
178 InflaterHuffmanTree litlenTree, distTree;
\r
182 /// Creates a new inflater or RFC1951 decompressor
\r
183 /// RFC1950/Zlib headers and footers will be expected in the input data
\r
185 public Inflater() : this(false)
\r
190 /// Creates a new inflater.
\r
192 /// <param name="noHeader">
\r
193 /// True if no RFC1950/Zlib header and footer fields are expected in the input data
\r
195 /// This is used for GZIPed/Zipped input.
\r
197 /// For compatibility with
\r
198 /// Sun JDK you should provide one byte of input more than needed in
\r
201 public Inflater(bool noHeader)
\r
203 this.noHeader = noHeader;
\r
204 this.adler = new Adler32();
\r
205 input = new StreamManipulator();
\r
206 outputWindow = new OutputWindow();
\r
207 mode = noHeader ? DECODE_BLOCKS : DECODE_HEADER;
\r
211 /// Resets the inflater so that a new stream can be decompressed. All
\r
212 /// pending input and output will be discarded.
\r
214 public void Reset()
\r
216 mode = noHeader ? DECODE_BLOCKS : DECODE_HEADER;
\r
217 totalIn = totalOut = 0;
\r
219 outputWindow.Reset();
\r
223 isLastBlock = false;
\r
228 /// Decodes a zlib/RFC1950 header.
\r
231 /// False if more input is needed.
\r
233 /// <exception cref="SharpZipBaseException">
\r
234 /// The header is invalid.
\r
236 private bool DecodeHeader()
\r
238 int header = input.PeekBits(16);
\r
242 input.DropBits(16);
\r
244 /* The header is written in "wrong" byte order */
\r
245 header = ((header << 8) | (header >> 8)) & 0xffff;
\r
246 if (header % 31 != 0) {
\r
247 throw new SharpZipBaseException("Header checksum illegal");
\r
250 if ((header & 0x0f00) != (Deflater.DEFLATED << 8)) {
\r
251 throw new SharpZipBaseException("Compression Method unknown");
\r
254 /* Maximum size of the backwards window in bits.
\r
255 * We currently ignore this, but we could use it to make the
\r
256 * inflater window more space efficient. On the other hand the
\r
257 * full window (15 bits) is needed most times, anyway.
\r
258 int max_wbits = ((header & 0x7000) >> 12) + 8;
\r
261 if ((header & 0x0020) == 0) { // Dictionary flag?
\r
262 mode = DECODE_BLOCKS;
\r
264 mode = DECODE_DICT;
\r
271 /// Decodes the dictionary checksum after the deflate header.
\r
274 /// False if more input is needed.
\r
276 private bool DecodeDict()
\r
278 while (neededBits > 0) {
\r
279 int dictByte = input.PeekBits(8);
\r
280 if (dictByte < 0) {
\r
284 readAdler = (readAdler << 8) | dictByte;
\r
291 /// Decodes the huffman encoded symbols in the input stream.
\r
294 /// false if more input is needed, true if output window is
\r
295 /// full or the current block ends.
\r
297 /// <exception cref="SharpZipBaseException">
\r
298 /// if deflated stream is invalid.
\r
300 private bool DecodeHuffman()
\r
302 int free = outputWindow.GetFreeSpace();
\r
303 while (free >= 258) {
\r
306 case DECODE_HUFFMAN:
\r
307 /* This is the inner loop so it is optimized a bit */
\r
308 while (((symbol = litlenTree.GetSymbol(input)) & ~0xff) == 0) {
\r
309 outputWindow.Write(symbol);
\r
310 if (--free < 258) {
\r
315 if (symbol < 257) {
\r
319 /* symbol == 256: end of block */
\r
322 mode = DECODE_BLOCKS;
\r
328 repLength = CPLENS[symbol - 257];
\r
329 neededBits = CPLEXT[symbol - 257];
\r
330 } catch (Exception) {
\r
331 throw new SharpZipBaseException("Illegal rep length code");
\r
333 goto case DECODE_HUFFMAN_LENBITS; /* fall through */
\r
335 case DECODE_HUFFMAN_LENBITS:
\r
336 if (neededBits > 0) {
\r
337 mode = DECODE_HUFFMAN_LENBITS;
\r
338 int i = input.PeekBits(neededBits);
\r
342 input.DropBits(neededBits);
\r
345 mode = DECODE_HUFFMAN_DIST;
\r
346 goto case DECODE_HUFFMAN_DIST;/* fall through */
\r
348 case DECODE_HUFFMAN_DIST:
\r
349 symbol = distTree.GetSymbol(input);
\r
355 repDist = CPDIST[symbol];
\r
356 neededBits = CPDEXT[symbol];
\r
357 } catch (Exception) {
\r
358 throw new SharpZipBaseException("Illegal rep dist code");
\r
361 goto case DECODE_HUFFMAN_DISTBITS;/* fall through */
\r
363 case DECODE_HUFFMAN_DISTBITS:
\r
364 if (neededBits > 0) {
\r
365 mode = DECODE_HUFFMAN_DISTBITS;
\r
366 int i = input.PeekBits(neededBits);
\r
370 input.DropBits(neededBits);
\r
374 outputWindow.Repeat(repLength, repDist);
\r
376 mode = DECODE_HUFFMAN;
\r
380 throw new SharpZipBaseException("Inflater unknown mode");
\r
387 /// Decodes the adler checksum after the deflate stream.
\r
390 /// false if more input is needed.
\r
392 /// <exception cref="SharpZipBaseException">
\r
393 /// If checksum doesn't match.
\r
395 private bool DecodeChksum()
\r
397 while (neededBits > 0) {
\r
398 int chkByte = input.PeekBits(8);
\r
403 readAdler = (readAdler << 8) | chkByte;
\r
406 if ((int) adler.Value != readAdler) {
\r
407 throw new SharpZipBaseException("Adler chksum doesn't match: " + (int)adler.Value + " vs. " + readAdler);
\r
414 /// Decodes the deflated stream.
\r
417 /// false if more input is needed, or if finished.
\r
419 /// <exception cref="SharpZipBaseException">
\r
420 /// if deflated stream is invalid.
\r
422 private bool Decode()
\r
425 case DECODE_HEADER:
\r
426 return DecodeHeader();
\r
428 return DecodeDict();
\r
429 case DECODE_CHKSUM:
\r
430 return DecodeChksum();
\r
432 case DECODE_BLOCKS:
\r
438 input.SkipToByteBoundary();
\r
440 mode = DECODE_CHKSUM;
\r
445 int type = input.PeekBits(3);
\r
451 if ((type & 1) != 0) {
\r
452 isLastBlock = true;
\r
454 switch (type >> 1){
\r
455 case DeflaterConstants.STORED_BLOCK:
\r
456 input.SkipToByteBoundary();
\r
457 mode = DECODE_STORED_LEN1;
\r
459 case DeflaterConstants.STATIC_TREES:
\r
460 litlenTree = InflaterHuffmanTree.defLitLenTree;
\r
461 distTree = InflaterHuffmanTree.defDistTree;
\r
462 mode = DECODE_HUFFMAN;
\r
464 case DeflaterConstants.DYN_TREES:
\r
465 dynHeader = new InflaterDynHeader();
\r
466 mode = DECODE_DYN_HEADER;
\r
469 throw new SharpZipBaseException("Unknown block type " + type);
\r
473 case DECODE_STORED_LEN1:
\r
475 if ((uncomprLen = input.PeekBits(16)) < 0) {
\r
478 input.DropBits(16);
\r
479 mode = DECODE_STORED_LEN2;
\r
481 goto case DECODE_STORED_LEN2; /* fall through */
\r
483 case DECODE_STORED_LEN2:
\r
485 int nlen = input.PeekBits(16);
\r
489 input.DropBits(16);
\r
490 if (nlen != (uncomprLen ^ 0xffff)) {
\r
491 throw new SharpZipBaseException("broken uncompressed block");
\r
493 mode = DECODE_STORED;
\r
495 goto case DECODE_STORED;/* fall through */
\r
497 case DECODE_STORED:
\r
499 int more = outputWindow.CopyStored(input, uncomprLen);
\r
500 uncomprLen -= more;
\r
501 if (uncomprLen == 0) {
\r
502 mode = DECODE_BLOCKS;
\r
505 return !input.IsNeedingInput;
\r
508 case DECODE_DYN_HEADER:
\r
509 if (!dynHeader.Decode(input)) {
\r
513 litlenTree = dynHeader.BuildLitLenTree();
\r
514 distTree = dynHeader.BuildDistTree();
\r
515 mode = DECODE_HUFFMAN;
\r
516 goto case DECODE_HUFFMAN; /* fall through */
\r
518 case DECODE_HUFFMAN:
\r
519 case DECODE_HUFFMAN_LENBITS:
\r
520 case DECODE_HUFFMAN_DIST:
\r
521 case DECODE_HUFFMAN_DISTBITS:
\r
522 return DecodeHuffman();
\r
528 throw new SharpZipBaseException("Inflater.Decode unknown mode");
\r
533 /// Sets the preset dictionary. This should only be called, if
\r
534 /// needsDictionary() returns true and it should set the same
\r
535 /// dictionary, that was used for deflating. The getAdler()
\r
536 /// function returns the checksum of the dictionary needed.
\r
538 /// <param name="buffer">
\r
539 /// The dictionary.
\r
541 public void SetDictionary(byte[] buffer)
\r
543 SetDictionary(buffer, 0, buffer.Length);
\r
547 /// Sets the preset dictionary. This should only be called, if
\r
548 /// needsDictionary() returns true and it should set the same
\r
549 /// dictionary, that was used for deflating. The getAdler()
\r
550 /// function returns the checksum of the dictionary needed.
\r
552 /// <param name="buffer">
\r
553 /// The dictionary.
\r
555 /// <param name="offset">
\r
556 /// The offset into buffer where the dictionary starts.
\r
558 /// <param name="len">
\r
559 /// The length of the dictionary.
\r
561 /// <exception cref="System.InvalidOperationException">
\r
562 /// No dictionary is needed.
\r
564 /// <exception cref="SharpZipBaseException">
\r
565 /// The adler checksum for the buffer is invalid
\r
567 public void SetDictionary(byte[] buffer, int offset, int len)
\r
569 if (!IsNeedingDictionary) {
\r
570 throw new InvalidOperationException();
\r
573 adler.Update(buffer, offset, len);
\r
574 if ((int)adler.Value != readAdler) {
\r
575 throw new SharpZipBaseException("Wrong adler checksum");
\r
578 outputWindow.CopyDict(buffer, offset, len);
\r
579 mode = DECODE_BLOCKS;
\r
583 /// Sets the input. This should only be called, if needsInput()
\r
586 /// <param name="buf">
\r
589 public void SetInput(byte[] buf)
\r
591 SetInput(buf, 0, buf.Length);
\r
595 /// Sets the input. This should only be called, if needsInput()
\r
598 /// <param name="buffer">
\r
599 /// The source of input data
\r
601 /// <param name="offset">
\r
602 /// The offset into buffer where the input starts.
\r
604 /// <param name="length">
\r
605 /// The number of bytes of input to use.
\r
607 /// <exception cref="System.InvalidOperationException">
\r
608 /// No input is needed.
\r
610 /// <exception cref="System.ArgumentOutOfRangeException">
\r
611 /// The off and/or len are wrong.
\r
613 public void SetInput(byte[] buffer, int offset, int length)
\r
615 input.SetInput(buffer, offset, length);
\r
620 /// Inflates the compressed stream to the output buffer. If this
\r
621 /// returns 0, you should check, whether needsDictionary(),
\r
622 /// needsInput() or finished() returns true, to determine why no
\r
623 /// further output is produced.
\r
625 /// <param name = "buf">
\r
626 /// the output buffer.
\r
629 /// the number of bytes written to the buffer, 0 if no further
\r
630 /// output can be produced.
\r
632 /// <exception cref="System.ArgumentOutOfRangeException">
\r
633 /// if buf has length 0.
\r
635 /// <exception cref="System.FormatException">
\r
636 /// if deflated stream is invalid.
\r
638 public int Inflate(byte[] buf)
\r
640 return Inflate(buf, 0, buf.Length);
\r
644 /// Inflates the compressed stream to the output buffer. If this
\r
645 /// returns 0, you should check, whether needsDictionary(),
\r
646 /// needsInput() or finished() returns true, to determine why no
\r
647 /// further output is produced.
\r
649 /// <param name = "buf">
\r
650 /// the output buffer.
\r
652 /// <param name = "offset">
\r
653 /// the offset into buffer where the output should start.
\r
655 /// <param name = "len">
\r
656 /// the maximum length of the output.
\r
659 /// the number of bytes written to the buffer, 0 if no further output can be produced.
\r
661 /// <exception cref="System.ArgumentOutOfRangeException">
\r
662 /// if len is <= 0.
\r
664 /// <exception cref="System.ArgumentOutOfRangeException">
\r
665 /// if the offset and/or len are wrong.
\r
667 /// <exception cref="System.FormatException">
\r
668 /// if deflated stream is invalid.
\r
670 public int Inflate(byte[] buf, int offset, int len)
\r
673 throw new ArgumentOutOfRangeException("len < 0");
\r
676 // Special case: len may be zero
\r
678 if (IsFinished == false) { // -jr- 08-Nov-2003 INFLATE_BUG fix..
\r
684 // Check for correct buff, off, len triple
\r
685 if (off < 0 || off + len >= buf.Length) {
\r
686 throw new ArgumentException("off/len outside buf bounds");
\r
692 if (mode != DECODE_CHKSUM) {
\r
693 /* Don't give away any output, if we are waiting for the
\r
694 * checksum in the input stream.
\r
696 * With this trick we have always:
\r
697 * needsInput() and not finished()
\r
698 * implies more output can be produced.
\r
700 more = outputWindow.CopyOutput(buf, offset, len);
\r
701 adler.Update(buf, offset, more);
\r
710 } while (Decode() || (outputWindow.GetAvailable() > 0 && mode != DECODE_CHKSUM));
\r
715 /// Returns true, if the input buffer is empty.
\r
716 /// You should then call setInput().
\r
717 /// NOTE: This method also returns true when the stream is finished.
\r
719 public bool IsNeedingInput {
\r
721 return input.IsNeedingInput;
\r
726 /// Returns true, if a preset dictionary is needed to inflate the input.
\r
728 public bool IsNeedingDictionary {
\r
730 return mode == DECODE_DICT && neededBits == 0;
\r
735 /// Returns true, if the inflater has finished. This means, that no
\r
736 /// input is needed and no output can be produced.
\r
738 public bool IsFinished {
\r
740 return mode == FINISHED && outputWindow.GetAvailable() == 0;
\r
745 /// Gets the adler checksum. This is either the checksum of all
\r
746 /// uncompressed bytes returned by inflate(), or if needsDictionary()
\r
747 /// returns true (and thus no output was yet produced) this is the
\r
748 /// adler checksum of the expected dictionary.
\r
751 /// the adler checksum.
\r
755 return IsNeedingDictionary ? readAdler : (int) adler.Value;
\r
760 /// Gets the total number of output bytes returned by inflate().
\r
763 /// the total number of output bytes.
\r
765 public int TotalOut {
\r
772 /// Gets the total number of processed compressed input bytes.
\r
775 /// The total number of bytes of processed input bytes.
\r
777 public int TotalIn {
\r
779 return totalIn - RemainingInput;
\r
785 /// -jr test hak trying to figure out a bug
\r
787 public int UnseenInput {
\r
789 return totalIn - ((input.AvailableBits + 7) >> 3);
\r
794 /// -jr test hak trying to figure out a bug
\r
796 public int PlainTotalIn {
\r
804 /// Gets the number of unprocessed input bytes. Useful, if the end of the
\r
805 /// stream is reached and you want to further process the bytes after
\r
806 /// the deflate stream.
\r
809 /// The number of bytes of the input which have not been processed.
\r
811 public int RemainingInput {
\r
813 return input.AvailableBytes;
\r