2 // Copyright (C) 2001 Mike Krueger
\r
4 // This file was translated from java, it was part of the GNU Classpath
\r
5 // Copyright (C) 2001 Free Software Foundation, Inc.
\r
7 // This program is free software; you can redistribute it and/or
\r
8 // modify it under the terms of the GNU General Public License
\r
9 // as published by the Free Software Foundation; either version 2
\r
10 // of the License, or (at your option) any later version.
\r
12 // This program is distributed in the hope that it will be useful,
\r
13 // but WITHOUT ANY WARRANTY; without even the implied warranty of
\r
14 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
\r
15 // GNU General Public License for more details.
\r
17 // You should have received a copy of the GNU General Public License
\r
18 // along with this program; if not, write to the Free Software
\r
19 // Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
\r
21 // Linking this library statically or dynamically with other modules is
\r
22 // making a combined work based on this library. Thus, the terms and
\r
23 // conditions of the GNU General Public License cover the whole
\r
26 // As a special exception, the copyright holders of this library give you
\r
27 // permission to link this library with independent modules to produce an
\r
28 // executable, regardless of the license terms of these independent
\r
29 // modules, and to copy and distribute the resulting executable under
\r
30 // terms of your choice, provided that you also meet, for each linked
\r
31 // independent module, the terms and conditions of the license of that
\r
32 // module. An independent module is a module which is not derived from
\r
33 // or based on this library. If you modify this library, you may extend
\r
34 // this exception to your version of the library, but you are not
\r
35 // obligated to do so. If you do not wish to do so, delete this
\r
36 // exception statement from your version.
\r
40 using ICSharpCode.SharpZipLib.Checksums;
\r
41 using ICSharpCode.SharpZipLib.Zip.Compression.Streams;
\r
43 namespace ICSharpCode.SharpZipLib.Zip.Compression
\r
47 /// Inflater is used to decompress data that has been compressed according
\r
48 /// to the "deflate" standard described in rfc1950.
\r
50 /// The usage is as following. First you have to set some input with
\r
51 /// <code>setInput()</code>, then inflate() it. If inflate doesn't
\r
52 /// inflate any bytes there may be three reasons:
\r
54 /// <li>needsInput() returns true because the input buffer is empty.
\r
55 /// You have to provide more input with <code>setInput()</code>.
\r
56 /// NOTE: needsInput() also returns true when, the stream is finished.
\r
58 /// <li>needsDictionary() returns true, you have to provide a preset
\r
59 /// dictionary with <code>setDictionary()</code>.</li>
\r
60 /// <li>finished() returns true, the inflater has finished.</li>
\r
62 /// Once the first output byte is produced, a dictionary will not be
\r
63 /// needed at a later stage.
\r
65 /// author of the original java version : John Leuner, Jochen Hoenicke
\r
67 public class Inflater
\r
70 /// Copy lengths for literal codes 257..285
\r
72 private static int[] CPLENS = {
\r
73 3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 15, 17, 19, 23, 27, 31,
\r
74 35, 43, 51, 59, 67, 83, 99, 115, 131, 163, 195, 227, 258
\r
78 /// Extra bits for literal codes 257..285
\r
80 private static int[] CPLEXT = {
\r
81 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2,
\r
82 3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5, 0
\r
86 /// Copy offsets for distance codes 0..29
\r
88 private static int[] CPDIST = {
\r
89 1, 2, 3, 4, 5, 7, 9, 13, 17, 25, 33, 49, 65, 97, 129, 193,
\r
90 257, 385, 513, 769, 1025, 1537, 2049, 3073, 4097, 6145,
\r
91 8193, 12289, 16385, 24577
\r
95 /// Extra bits for distance codes
\r
97 private static int[] CPDEXT = {
\r
98 0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6,
\r
99 7, 7, 8, 8, 9, 9, 10, 10, 11, 11,
\r
104 /// This are the state in which the inflater can be.
\r
106 private const int DECODE_HEADER = 0;
\r
107 private const int DECODE_DICT = 1;
\r
108 private const int DECODE_BLOCKS = 2;
\r
109 private const int DECODE_STORED_LEN1 = 3;
\r
110 private const int DECODE_STORED_LEN2 = 4;
\r
111 private const int DECODE_STORED = 5;
\r
112 private const int DECODE_DYN_HEADER = 6;
\r
113 private const int DECODE_HUFFMAN = 7;
\r
114 private const int DECODE_HUFFMAN_LENBITS = 8;
\r
115 private const int DECODE_HUFFMAN_DIST = 9;
\r
116 private const int DECODE_HUFFMAN_DISTBITS = 10;
\r
117 private const int DECODE_CHKSUM = 11;
\r
118 private const int FINISHED = 12;
\r
121 /// This variable contains the current state.
\r
126 /// The adler checksum of the dictionary or of the decompressed
\r
127 /// stream, as it is written in the header resp. footer of the
\r
128 /// compressed stream.
\r
129 /// Only valid if mode is DECODE_DICT or DECODE_CHKSUM.
\r
131 private int readAdler;
\r
134 /// The number of bits needed to complete the current state. This
\r
135 /// is valid, if mode is DECODE_DICT, DECODE_CHKSUM,
\r
136 /// DECODE_HUFFMAN_LENBITS or DECODE_HUFFMAN_DISTBITS.
\r
138 private int neededBits;
\r
139 private int repLength, repDist;
\r
140 private int uncomprLen;
\r
143 /// True, if the last block flag was set in the last block of the
\r
144 /// inflated stream. This means that the stream ends after the
\r
147 private bool isLastBlock;
\r
150 /// The total number of inflated bytes.
\r
152 private int totalOut;
\r
155 /// The total number of bytes set with setInput(). This is not the
\r
156 /// value returned by getTotalIn(), since this also includes the
\r
157 /// unprocessed input.
\r
159 private int totalIn;
\r
162 /// This variable stores the nowrap flag that was given to the constructor.
\r
163 /// True means, that the inflated stream doesn't contain a header nor the
\r
164 /// checksum in the footer.
\r
166 private bool nowrap;
\r
168 private StreamManipulator input;
\r
169 private OutputWindow outputWindow;
\r
170 private InflaterDynHeader dynHeader;
\r
171 private InflaterHuffmanTree litlenTree, distTree;
\r
172 private Adler32 adler;
\r
175 /// Creates a new inflater.
\r
177 public Inflater() : this(false)
\r
182 /// Creates a new inflater.
\r
184 /// <param name="nowrap">
\r
185 /// true if no header and checksum field appears in the
\r
186 /// stream. This is used for GZIPed input. For compatibility with
\r
187 /// Sun JDK you should provide one byte of input more than needed in
\r
190 public Inflater(bool nowrap)
\r
192 this.nowrap = nowrap;
\r
193 this.adler = new Adler32();
\r
194 input = new StreamManipulator();
\r
195 outputWindow = new OutputWindow();
\r
196 mode = nowrap ? DECODE_BLOCKS : DECODE_HEADER;
\r
200 /// Resets the inflater so that a new stream can be decompressed. All
\r
201 /// pending input and output will be discarded.
\r
203 public void Reset()
\r
205 mode = nowrap ? DECODE_BLOCKS : DECODE_HEADER;
\r
206 totalIn = totalOut = 0;
\r
208 outputWindow.Reset();
\r
212 isLastBlock = false;
\r
217 /// Decodes the deflate header.
\r
220 /// false if more input is needed.
\r
222 /// <exception cref="System.FormatException">
\r
223 /// if header is invalid.
\r
225 private bool DecodeHeader()
\r
227 int header = input.PeekBits(16);
\r
231 input.DropBits(16);
\r
232 /* The header is written in "wrong" byte order */
\r
233 header = ((header << 8) | (header >> 8)) & 0xffff;
\r
234 if (header % 31 != 0) {
\r
235 throw new FormatException("Header checksum illegal");
\r
238 if ((header & 0x0f00) != (Deflater.DEFLATED << 8)) {
\r
239 throw new FormatException("Compression Method unknown");
\r
242 /* Maximum size of the backwards window in bits.
\r
243 * We currently ignore this, but we could use it to make the
\r
244 * inflater window more space efficient. On the other hand the
\r
245 * full window (15 bits) is needed most times, anyway.
\r
246 int max_wbits = ((header & 0x7000) >> 12) + 8;
\r
249 if ((header & 0x0020) == 0) { // Dictionary flag?
\r
250 mode = DECODE_BLOCKS;
\r
252 mode = DECODE_DICT;
\r
259 /// Decodes the dictionary checksum after the deflate header.
\r
262 /// false if more input is needed.
\r
264 private bool DecodeDict()
\r
266 while (neededBits > 0) {
\r
267 int dictByte = input.PeekBits(8);
\r
268 if (dictByte < 0) {
\r
272 readAdler = (readAdler << 8) | dictByte;
\r
279 /// Decodes the huffman encoded symbols in the input stream.
\r
282 /// false if more input is needed, true if output window is
\r
283 /// full or the current block ends.
\r
285 /// <exception cref="System.FormatException">
\r
286 /// if deflated stream is invalid.
\r
288 private bool DecodeHuffman()
\r
290 int free = outputWindow.GetFreeSpace();
\r
291 while (free >= 258) {
\r
294 case DECODE_HUFFMAN:
\r
295 /* This is the inner loop so it is optimized a bit */
\r
296 while (((symbol = litlenTree.GetSymbol(input)) & ~0xff) == 0) {
\r
297 outputWindow.Write(symbol);
\r
298 if (--free < 258) {
\r
302 if (symbol < 257) {
\r
306 /* symbol == 256: end of block */
\r
309 mode = DECODE_BLOCKS;
\r
315 repLength = CPLENS[symbol - 257];
\r
316 neededBits = CPLEXT[symbol - 257];
\r
317 } catch (Exception) {
\r
318 throw new FormatException("Illegal rep length code");
\r
320 goto case DECODE_HUFFMAN_LENBITS;/* fall through */
\r
321 case DECODE_HUFFMAN_LENBITS:
\r
322 if (neededBits > 0) {
\r
323 mode = DECODE_HUFFMAN_LENBITS;
\r
324 int i = input.PeekBits(neededBits);
\r
328 input.DropBits(neededBits);
\r
331 mode = DECODE_HUFFMAN_DIST;
\r
332 goto case DECODE_HUFFMAN_DIST;/* fall through */
\r
333 case DECODE_HUFFMAN_DIST:
\r
334 symbol = distTree.GetSymbol(input);
\r
339 repDist = CPDIST[symbol];
\r
340 neededBits = CPDEXT[symbol];
\r
341 } catch (Exception) {
\r
342 throw new FormatException("Illegal rep dist code");
\r
345 goto case DECODE_HUFFMAN_DISTBITS;/* fall through */
\r
346 case DECODE_HUFFMAN_DISTBITS:
\r
347 if (neededBits > 0) {
\r
348 mode = DECODE_HUFFMAN_DISTBITS;
\r
349 int i = input.PeekBits(neededBits);
\r
353 input.DropBits(neededBits);
\r
356 outputWindow.Repeat(repLength, repDist);
\r
358 mode = DECODE_HUFFMAN;
\r
361 throw new FormatException();
\r
368 /// Decodes the adler checksum after the deflate stream.
\r
371 /// false if more input is needed.
\r
373 /// <exception cref="System.FormatException">
\r
374 /// DataFormatException, if checksum doesn't match.
\r
376 private bool DecodeChksum()
\r
378 while (neededBits > 0) {
\r
379 int chkByte = input.PeekBits(8);
\r
384 readAdler = (readAdler << 8) | chkByte;
\r
387 if ((int) adler.Value != readAdler) {
\r
388 throw new FormatException("Adler chksum doesn't match: " + (int)adler.Value + " vs. " + readAdler);
\r
395 /// Decodes the deflated stream.
\r
398 /// false if more input is needed, or if finished.
\r
400 /// <exception cref="System.FormatException">
\r
401 /// DataFormatException, if deflated stream is invalid.
\r
403 private bool Decode()
\r
406 case DECODE_HEADER:
\r
407 return DecodeHeader();
\r
409 return DecodeDict();
\r
410 case DECODE_CHKSUM:
\r
411 return DecodeChksum();
\r
413 case DECODE_BLOCKS:
\r
419 input.SkipToByteBoundary();
\r
421 mode = DECODE_CHKSUM;
\r
426 int type = input.PeekBits(3);
\r
432 if ((type & 1) != 0) {
\r
433 isLastBlock = true;
\r
435 switch (type >> 1){
\r
436 case DeflaterConstants.STORED_BLOCK:
\r
437 input.SkipToByteBoundary();
\r
438 mode = DECODE_STORED_LEN1;
\r
440 case DeflaterConstants.STATIC_TREES:
\r
441 litlenTree = InflaterHuffmanTree.defLitLenTree;
\r
442 distTree = InflaterHuffmanTree.defDistTree;
\r
443 mode = DECODE_HUFFMAN;
\r
445 case DeflaterConstants.DYN_TREES:
\r
446 dynHeader = new InflaterDynHeader();
\r
447 mode = DECODE_DYN_HEADER;
\r
450 throw new FormatException("Unknown block type "+type);
\r
454 case DECODE_STORED_LEN1:
\r
456 if ((uncomprLen = input.PeekBits(16)) < 0) {
\r
459 input.DropBits(16);
\r
460 mode = DECODE_STORED_LEN2;
\r
462 goto case DECODE_STORED_LEN2; /* fall through */
\r
463 case DECODE_STORED_LEN2:
\r
465 int nlen = input.PeekBits(16);
\r
469 input.DropBits(16);
\r
470 if (nlen != (uncomprLen ^ 0xffff)) {
\r
471 throw new FormatException("broken uncompressed block");
\r
473 mode = DECODE_STORED;
\r
475 goto case DECODE_STORED;/* fall through */
\r
476 case DECODE_STORED:
\r
478 int more = outputWindow.CopyStored(input, uncomprLen);
\r
479 uncomprLen -= more;
\r
480 if (uncomprLen == 0) {
\r
481 mode = DECODE_BLOCKS;
\r
484 return !input.IsNeedingInput;
\r
487 case DECODE_DYN_HEADER:
\r
488 if (!dynHeader.Decode(input)) {
\r
492 litlenTree = dynHeader.BuildLitLenTree();
\r
493 distTree = dynHeader.BuildDistTree();
\r
494 mode = DECODE_HUFFMAN;
\r
495 goto case DECODE_HUFFMAN; /* fall through */
\r
496 case DECODE_HUFFMAN:
\r
497 case DECODE_HUFFMAN_LENBITS:
\r
498 case DECODE_HUFFMAN_DIST:
\r
499 case DECODE_HUFFMAN_DISTBITS:
\r
500 return DecodeHuffman();
\r
504 throw new FormatException();
\r
509 /// Sets the preset dictionary. This should only be called, if
\r
510 /// needsDictionary() returns true and it should set the same
\r
511 /// dictionary, that was used for deflating. The getAdler()
\r
512 /// function returns the checksum of the dictionary needed.
\r
514 /// <param name="buffer">
\r
515 /// the dictionary.
\r
517 /// <exception cref="System.InvalidOperationException">
\r
518 /// if no dictionary is needed.
\r
520 /// <exception cref="System.ArgumentException">
\r
521 /// if the dictionary checksum is wrong.
\r
523 public void SetDictionary(byte[] buffer)
\r
525 SetDictionary(buffer, 0, buffer.Length);
\r
529 /// Sets the preset dictionary. This should only be called, if
\r
530 /// needsDictionary() returns true and it should set the same
\r
531 /// dictionary, that was used for deflating. The getAdler()
\r
532 /// function returns the checksum of the dictionary needed.
\r
534 /// <param name="buffer">
\r
535 /// the dictionary.
\r
537 /// <param name="off">
\r
538 /// the offset into buffer where the dictionary starts.
\r
540 /// <param name="len">
\r
541 /// the length of the dictionary.
\r
543 /// <exception cref="System.InvalidOperationException">
\r
544 /// if no dictionary is needed.
\r
546 /// <exception cref="System.ArgumentException">
\r
547 /// if the dictionary checksum is wrong.
\r
549 /// <exception cref="System.ArgumentOutOfRangeException">
\r
550 /// if the off and/or len are wrong.
\r
552 public void SetDictionary(byte[] buffer, int off, int len)
\r
554 if (!IsNeedingDictionary) {
\r
555 throw new InvalidOperationException();
\r
558 adler.Update(buffer, off, len);
\r
559 if ((int)adler.Value != readAdler) {
\r
560 throw new ArgumentException("Wrong adler checksum");
\r
563 outputWindow.CopyDict(buffer, off, len);
\r
564 mode = DECODE_BLOCKS;
\r
568 /// Sets the input. This should only be called, if needsInput()
\r
571 /// <param name="buf">
\r
574 /// <exception cref="System.InvalidOperationException">
\r
575 /// if no input is needed.
\r
577 public void SetInput(byte[] buf)
\r
579 SetInput(buf, 0, buf.Length);
\r
583 /// Sets the input. This should only be called, if needsInput()
\r
586 /// <param name="buf">
\r
589 /// <param name="off">
\r
590 /// the offset into buffer where the input starts.
\r
592 /// <param name="len">
\r
593 /// the length of the input.
\r
595 /// <exception cref="System.InvalidOperationException">
\r
596 /// if no input is needed.
\r
598 /// <exception cref="System.ArgumentOutOfRangeException">
\r
599 /// if the off and/or len are wrong.
\r
601 public void SetInput(byte[] buf, int off, int len)
\r
603 input.SetInput(buf, off, len);
\r
608 /// Inflates the compressed stream to the output buffer. If this
\r
609 /// returns 0, you should check, whether needsDictionary(),
\r
610 /// needsInput() or finished() returns true, to determine why no
\r
611 /// further output is produced.
\r
613 /// <param name = "buf">
\r
614 /// the output buffer.
\r
617 /// the number of bytes written to the buffer, 0 if no further
\r
618 /// output can be produced.
\r
620 /// <exception cref="System.ArgumentOutOfRangeException">
\r
621 /// if buf has length 0.
\r
623 /// <exception cref="System.FormatException">
\r
624 /// if deflated stream is invalid.
\r
626 public int Inflate(byte[] buf)
\r
628 return Inflate(buf, 0, buf.Length);
\r
632 /// Inflates the compressed stream to the output buffer. If this
\r
633 /// returns 0, you should check, whether needsDictionary(),
\r
634 /// needsInput() or finished() returns true, to determine why no
\r
635 /// further output is produced.
\r
637 /// <param name = "buf">
\r
638 /// the output buffer.
\r
640 /// <param name = "off">
\r
641 /// the offset into buffer where the output should start.
\r
643 /// <param name = "len">
\r
644 /// the maximum length of the output.
\r
647 /// the number of bytes written to the buffer, 0 if no further output can be produced.
\r
649 /// <exception cref="System.ArgumentOutOfRangeException">
\r
650 /// if len is <= 0.
\r
652 /// <exception cref="System.ArgumentOutOfRangeException">
\r
653 /// if the off and/or len are wrong.
\r
655 /// <exception cref="System.FormatException">
\r
656 /// if deflated stream is invalid.
\r
658 public int Inflate(byte[] buf, int off, int len)
\r
661 throw new ArgumentOutOfRangeException("len < 0");
\r
663 // Special case: len may be zero
\r
665 if (IsFinished == false) {// -jr- 08-Nov-2003 INFLATE_BUG fix..
\r
670 /* // Check for correct buff, off, len triple
\r
671 if (off < 0 || off + len >= buf.Length) {
\r
672 throw new ArgumentException("off/len outside buf bounds");
\r
677 if (mode != DECODE_CHKSUM) {
\r
678 /* Don't give away any output, if we are waiting for the
\r
679 * checksum in the input stream.
\r
681 * With this trick we have always:
\r
682 * needsInput() and not finished()
\r
683 * implies more output can be produced.
\r
685 more = outputWindow.CopyOutput(buf, off, len);
\r
686 adler.Update(buf, off, more);
\r
695 } while (Decode() || (outputWindow.GetAvailable() > 0 && mode != DECODE_CHKSUM));
\r
700 /// Returns true, if the input buffer is empty.
\r
701 /// You should then call setInput().
\r
702 /// NOTE: This method also returns true when the stream is finished.
\r
704 public bool IsNeedingInput {
\r
706 return input.IsNeedingInput;
\r
711 /// Returns true, if a preset dictionary is needed to inflate the input.
\r
713 public bool IsNeedingDictionary {
\r
715 return mode == DECODE_DICT && neededBits == 0;
\r
720 /// Returns true, if the inflater has finished. This means, that no
\r
721 /// input is needed and no output can be produced.
\r
723 public bool IsFinished {
\r
725 return mode == FINISHED && outputWindow.GetAvailable() == 0;
\r
730 /// Gets the adler checksum. This is either the checksum of all
\r
731 /// uncompressed bytes returned by inflate(), or if needsDictionary()
\r
732 /// returns true (and thus no output was yet produced) this is the
\r
733 /// adler checksum of the expected dictionary.
\r
736 /// the adler checksum.
\r
740 return IsNeedingDictionary ? readAdler : (int) adler.Value;
\r
745 /// Gets the total number of output bytes returned by inflate().
\r
748 /// the total number of output bytes.
\r
750 public int TotalOut {
\r
757 /// Gets the total number of processed compressed input bytes.
\r
760 /// the total number of bytes of processed input bytes.
\r
762 public int TotalIn {
\r
764 return totalIn - RemainingInput;
\r
769 /// Gets the number of unprocessed input. Useful, if the end of the
\r
770 /// stream is reached and you want to further process the bytes after
\r
771 /// the deflate stream.
\r
774 /// the number of bytes of the input which were not processed.
\r
776 public int RemainingInput {
\r
778 return input.AvailableBytes;
\r