5 #include "../../../Common/Defs.h"
6 #include "../../Common/StreamUtils.h"
8 #include "LZMAEncoder.h"
10 // for minimal compressing code size define these:
11 // #define COMPRESS_MF_BT
12 // #define COMPRESS_MF_BT4
14 #if !defined(COMPRESS_MF_BT) && !defined(COMPRESS_MF_HC)
15 #define COMPRESS_MF_BT
16 #define COMPRESS_MF_HC
20 #if !defined(COMPRESS_MF_BT2) && !defined(COMPRESS_MF_BT3) && !defined(COMPRESS_MF_BT4)
21 #define COMPRESS_MF_BT2
22 #define COMPRESS_MF_BT3
23 #define COMPRESS_MF_BT4
25 #ifdef COMPRESS_MF_BT2
26 #include "../LZ/BinTree/BinTree2.h"
28 #ifdef COMPRESS_MF_BT3
29 #include "../LZ/BinTree/BinTree3.h"
31 #ifdef COMPRESS_MF_BT4
32 #include "../LZ/BinTree/BinTree4.h"
37 #include "../LZ/HashChain/HC4.h"
41 #include "../LZ/MT/MT.h"
47 const int kDefaultDictionaryLogSize = 22;
48 const UInt32 kNumFastBytesDefault = 0x20;
58 static const wchar_t *kMatchFinderIDs[] =
66 Byte g_FastPos[1 << 11];
71 CFastPosInit() { Init(); }
74 const Byte kFastSlots = 22;
79 for (Byte slotFast = 2; slotFast < kFastSlots; slotFast++)
81 UInt32 k = (1 << ((slotFast >> 1) - 1));
82 for (UInt32 j = 0; j < k; j++, c++)
83 g_FastPos[c] = slotFast;
89 void CLiteralEncoder2::Encode(NRangeCoder::CEncoder *rangeEncoder, Byte symbol)
96 UInt32 bit = (symbol >> i) & 1;
97 _encoders[context].Encode(rangeEncoder, bit);
98 context = (context << 1) | bit;
103 void CLiteralEncoder2::EncodeMatched(NRangeCoder::CEncoder *rangeEncoder,
104 Byte matchByte, Byte symbol)
111 UInt32 bit = (symbol >> i) & 1;
112 UInt32 matchBit = (matchByte >> i) & 1;
113 _encoders[0x100 + (matchBit << 8) + context].Encode(rangeEncoder, bit);
114 context = (context << 1) | bit;
120 UInt32 bit = (symbol >> i) & 1;
121 _encoders[context].Encode(rangeEncoder, bit);
122 context = (context << 1) | bit;
130 UInt32 CLiteralEncoder2::GetPrice(bool matchMode, Byte matchByte, Byte symbol) const
140 UInt32 matchBit = (matchByte >> i) & 1;
141 UInt32 bit = (symbol >> i) & 1;
142 price += _encoders[0x100 + (matchBit << 8) + context].GetPrice(bit);
143 context = (context << 1) | bit;
152 UInt32 bit = (symbol >> i) & 1;
153 price += _encoders[context].GetPrice(bit);
154 context = (context << 1) | bit;
162 void CEncoder::Init(UInt32 numPosStates)
166 for (UInt32 posState = 0; posState < numPosStates; posState++)
168 _lowCoder[posState].Init();
169 _midCoder[posState].Init();
174 void CEncoder::Encode(NRangeCoder::CEncoder *rangeEncoder, UInt32 symbol, UInt32 posState)
176 if(symbol < kNumLowSymbols)
178 _choice.Encode(rangeEncoder, 0);
179 _lowCoder[posState].Encode(rangeEncoder, symbol);
183 _choice.Encode(rangeEncoder, 1);
184 if(symbol < kNumLowSymbols + kNumMidSymbols)
186 _choice2.Encode(rangeEncoder, 0);
187 _midCoder[posState].Encode(rangeEncoder, symbol - kNumLowSymbols);
191 _choice2.Encode(rangeEncoder, 1);
192 _highCoder.Encode(rangeEncoder, symbol - kNumLowSymbols - kNumMidSymbols);
197 void CEncoder::SetPrices(UInt32 posState, UInt32 numSymbols, UInt32 *prices) const
199 UInt32 a0 = _choice.GetPrice0();
200 UInt32 a1 = _choice.GetPrice1();
201 UInt32 b0 = a1 + _choice2.GetPrice0();
202 UInt32 b1 = a1 + _choice2.GetPrice1();
204 for (i = 0; i < kNumLowSymbols; i++)
208 prices[i] = a0 + _lowCoder[posState].GetPrice(i);
210 for (; i < kNumLowSymbols + kNumMidSymbols; i++)
214 prices[i] = b0 + _midCoder[posState].GetPrice(i - kNumLowSymbols);
216 for (; i < numSymbols; i++)
217 prices[i] = b1 + _highCoder.GetPrice(i - kNumLowSymbols - kNumMidSymbols);
221 CEncoder::CEncoder():
222 _numFastBytes(kNumFastBytesDefault),
223 _distTableSize(kDefaultDictionaryLogSize * 2),
225 _posStateMask(4 - 1),
226 _numLiteralPosStateBits(0),
227 _numLiteralContextBits(3),
228 _dictionarySize(1 << kDefaultDictionaryLogSize),
229 _dictionarySizePrev(UInt32(-1)),
230 _numFastBytesPrev(UInt32(-1)),
231 _matchFinderCycles(0),
232 _matchFinderIndex(kBT4),
233 #ifdef COMPRESS_MF_MT
236 _writeEndMark(false),
243 HRESULT CEncoder::Create()
245 if (!_rangeEncoder.Create(1 << 20))
246 return E_OUTOFMEMORY;
249 switch(_matchFinderIndex)
251 #ifdef COMPRESS_MF_BT
252 #ifdef COMPRESS_MF_BT2
255 NBT2::CMatchFinder *mfSpec = new NBT2::CMatchFinder;
256 setMfPasses = mfSpec;
257 _matchFinder = mfSpec;
261 #ifdef COMPRESS_MF_BT3
264 NBT3::CMatchFinder *mfSpec = new NBT3::CMatchFinder;
265 setMfPasses = mfSpec;
266 _matchFinder = mfSpec;
270 #ifdef COMPRESS_MF_BT4
273 NBT4::CMatchFinder *mfSpec = new NBT4::CMatchFinder;
274 setMfPasses = mfSpec;
275 _matchFinder = mfSpec;
281 #ifdef COMPRESS_MF_HC
284 NHC4::CMatchFinder *mfSpec = new NHC4::CMatchFinder;
285 setMfPasses = mfSpec;
286 _matchFinder = mfSpec;
291 if (_matchFinder == 0)
292 return E_OUTOFMEMORY;
294 #ifdef COMPRESS_MF_MT
295 if (_multiThread && !(_fastMode && (_matchFinderIndex == kHC4)))
297 CMatchFinderMT *mfSpec = new CMatchFinderMT;
299 return E_OUTOFMEMORY;
300 CMyComPtr<IMatchFinder> mf = mfSpec;
301 RINOK(mfSpec->SetMatchFinder(_matchFinder));
302 _matchFinder.Release();
308 if (!_literalEncoder.Create(_numLiteralPosStateBits, _numLiteralContextBits))
309 return E_OUTOFMEMORY;
311 if (_dictionarySize == _dictionarySizePrev && _numFastBytesPrev == _numFastBytes)
313 RINOK(_matchFinder->Create(_dictionarySize, kNumOpts, _numFastBytes, kMatchMaxLen + 1)); // actually it's + _numFastBytes - _numFastBytes
314 if (_matchFinderCycles != 0 && setMfPasses != 0)
315 setMfPasses->SetNumPasses(_matchFinderCycles);
316 _dictionarySizePrev = _dictionarySize;
317 _numFastBytesPrev = _numFastBytes;
321 static bool AreStringsEqual(const wchar_t *base, const wchar_t *testString)
325 wchar_t c = *testString;
326 if (c >= 'a' && c <= 'z')
337 static int FindMatchFinder(const wchar_t *s)
339 for (int m = 0; m < (int)(sizeof(kMatchFinderIDs) / sizeof(kMatchFinderIDs[0])); m++)
340 if (AreStringsEqual(kMatchFinderIDs[m], s))
345 STDMETHODIMP CEncoder::SetCoderProperties(const PROPID *propIDs,
346 const PROPVARIANT *properties, UInt32 numProperties)
348 for (UInt32 i = 0; i < numProperties; i++)
350 const PROPVARIANT &prop = properties[i];
353 case NCoderPropID::kNumFastBytes:
355 if (prop.vt != VT_UI4)
357 UInt32 numFastBytes = prop.ulVal;
358 if(numFastBytes < 5 || numFastBytes > kMatchMaxLen)
360 _numFastBytes = numFastBytes;
363 case NCoderPropID::kMatchFinderCycles:
365 if (prop.vt != VT_UI4)
367 _matchFinderCycles = prop.ulVal;
370 case NCoderPropID::kAlgorithm:
372 if (prop.vt != VT_UI4)
374 UInt32 maximize = prop.ulVal;
375 _fastMode = (maximize == 0);
376 // _maxMode = (maximize >= 2);
379 case NCoderPropID::kMatchFinder:
381 if (prop.vt != VT_BSTR)
383 int matchFinderIndexPrev = _matchFinderIndex;
384 int m = FindMatchFinder(prop.bstrVal);
387 _matchFinderIndex = m;
388 if (_matchFinder && matchFinderIndexPrev != _matchFinderIndex)
390 _dictionarySizePrev = (UInt32)-1;
391 ReleaseMatchFinder();
395 #ifdef COMPRESS_MF_MT
396 case NCoderPropID::kMultiThread:
398 if (prop.vt != VT_BOOL)
400 bool newMultiThread = (prop.boolVal == VARIANT_TRUE);
401 if (newMultiThread != _multiThread)
403 _dictionarySizePrev = (UInt32)-1;
404 ReleaseMatchFinder();
405 _multiThread = newMultiThread;
409 case NCoderPropID::kNumThreads:
411 if (prop.vt != VT_UI4)
413 bool newMultiThread = (prop.ulVal > 1);
414 if (newMultiThread != _multiThread)
416 _dictionarySizePrev = (UInt32)-1;
417 ReleaseMatchFinder();
418 _multiThread = newMultiThread;
423 case NCoderPropID::kDictionarySize:
425 const int kDicLogSizeMaxCompress = 30;
426 if (prop.vt != VT_UI4)
428 UInt32 dictionarySize = prop.ulVal;
429 if (dictionarySize < UInt32(1 << kDicLogSizeMin) ||
430 dictionarySize > UInt32(1 << kDicLogSizeMaxCompress))
432 _dictionarySize = dictionarySize;
434 for(dicLogSize = 0; dicLogSize < (UInt32)kDicLogSizeMaxCompress; dicLogSize++)
435 if (dictionarySize <= (UInt32(1) << dicLogSize))
437 _distTableSize = dicLogSize * 2;
440 case NCoderPropID::kPosStateBits:
442 if (prop.vt != VT_UI4)
444 UInt32 value = prop.ulVal;
445 if (value > (UInt32)NLength::kNumPosStatesBitsEncodingMax)
447 _posStateBits = value;
448 _posStateMask = (1 << _posStateBits) - 1;
451 case NCoderPropID::kLitPosBits:
453 if (prop.vt != VT_UI4)
455 UInt32 value = prop.ulVal;
456 if (value > (UInt32)kNumLitPosStatesBitsEncodingMax)
458 _numLiteralPosStateBits = value;
461 case NCoderPropID::kLitContextBits:
463 if (prop.vt != VT_UI4)
465 UInt32 value = prop.ulVal;
466 if (value > (UInt32)kNumLitContextBitsMax)
468 _numLiteralContextBits = value;
471 case NCoderPropID::kEndMarker:
473 if (prop.vt != VT_BOOL)
475 SetWriteEndMarkerMode(prop.boolVal == VARIANT_TRUE);
485 STDMETHODIMP CEncoder::WriteCoderProperties(ISequentialOutStream *outStream)
487 const UInt32 kPropSize = 5;
488 Byte properties[kPropSize];
489 properties[0] = (_posStateBits * 5 + _numLiteralPosStateBits) * 9 + _numLiteralContextBits;
490 for (int i = 0; i < 4; i++)
491 properties[1 + i] = Byte(_dictionarySize >> (8 * i));
492 return WriteStream(outStream, properties, kPropSize, NULL);
495 STDMETHODIMP CEncoder::SetOutStream(ISequentialOutStream *outStream)
497 _rangeEncoder.SetStream(outStream);
501 STDMETHODIMP CEncoder::ReleaseOutStream()
503 _rangeEncoder.ReleaseStream();
507 HRESULT CEncoder::Init()
511 // RINOK(_matchFinder->Init(inStream));
512 _rangeEncoder.Init();
514 for(int i = 0; i < kNumStates; i++)
516 for (UInt32 j = 0; j <= _posStateMask; j++)
518 _isMatch[i][j].Init();
519 _isRep0Long[i][j].Init();
527 _literalEncoder.Init();
530 for(UInt32 i = 0; i < kNumLenToPosStates; i++)
531 _posSlotEncoder[i].Init();
534 for(UInt32 i = 0; i < kNumFullDistances - kEndPosModelIndex; i++)
535 _posEncoders[i].Init();
538 _lenEncoder.Init(1 << _posStateBits);
539 _repMatchLenEncoder.Init(1 << _posStateBits);
541 _posAlignEncoder.Init();
543 _longestMatchWasFound = false;
544 _optimumEndIndex = 0;
545 _optimumCurrentIndex = 0;
546 _additionalOffset = 0;
551 HRESULT CEncoder::MovePos(UInt32 num)
555 _additionalOffset += num;
556 return _matchFinder->Skip(num);
559 UInt32 CEncoder::Backward(UInt32 &backRes, UInt32 cur)
561 _optimumEndIndex = cur;
562 UInt32 posMem = _optimum[cur].PosPrev;
563 UInt32 backMem = _optimum[cur].BackPrev;
566 if (_optimum[cur].Prev1IsChar)
568 _optimum[posMem].MakeAsChar();
569 _optimum[posMem].PosPrev = posMem - 1;
570 if (_optimum[cur].Prev2)
572 _optimum[posMem - 1].Prev1IsChar = false;
573 _optimum[posMem - 1].PosPrev = _optimum[cur].PosPrev2;
574 _optimum[posMem - 1].BackPrev = _optimum[cur].BackPrev2;
577 UInt32 posPrev = posMem;
578 UInt32 backCur = backMem;
580 backMem = _optimum[posPrev].BackPrev;
581 posMem = _optimum[posPrev].PosPrev;
583 _optimum[posPrev].BackPrev = backCur;
584 _optimum[posPrev].PosPrev = cur;
588 backRes = _optimum[0].BackPrev;
589 _optimumCurrentIndex = _optimum[0].PosPrev;
590 return _optimumCurrentIndex;
595 (lenRes == 1) && (backRes == 0xFFFFFFFF) means Literal
598 HRESULT CEncoder::GetOptimum(UInt32 position, UInt32 &backRes, UInt32 &lenRes)
600 if(_optimumEndIndex != _optimumCurrentIndex)
602 const COptimal &optimum = _optimum[_optimumCurrentIndex];
603 lenRes = optimum.PosPrev - _optimumCurrentIndex;
604 backRes = optimum.BackPrev;
605 _optimumCurrentIndex = optimum.PosPrev;
608 _optimumCurrentIndex = _optimumEndIndex = 0;
610 UInt32 lenMain, numDistancePairs;
611 if (!_longestMatchWasFound)
613 RINOK(ReadMatchDistances(lenMain, numDistancePairs));
617 lenMain = _longestMatchLength;
618 numDistancePairs = _numDistancePairs;
619 _longestMatchWasFound = false;
622 const Byte *data = _matchFinder->GetPointerToCurrentPos() - 1;
623 UInt32 numAvailableBytes = _matchFinder->GetNumAvailableBytes() + 1;
624 if (numAvailableBytes < 2)
626 backRes = (UInt32)(-1);
630 if (numAvailableBytes > kMatchMaxLen)
631 numAvailableBytes = kMatchMaxLen;
633 UInt32 reps[kNumRepDistances];
634 UInt32 repLens[kNumRepDistances];
635 UInt32 repMaxIndex = 0;
637 for(i = 0; i < kNumRepDistances; i++)
639 reps[i] = _repDistances[i];
640 UInt32 backOffset = reps[i] + 1;
641 if (data[0] != data[(size_t)0 - backOffset] || data[1] != data[(size_t)1 - backOffset])
647 for (lenTest = 2; lenTest < numAvailableBytes &&
648 data[lenTest] == data[(size_t)lenTest - backOffset]; lenTest++);
649 repLens[i] = lenTest;
650 if (lenTest > repLens[repMaxIndex])
653 if(repLens[repMaxIndex] >= _numFastBytes)
655 backRes = repMaxIndex;
656 lenRes = repLens[repMaxIndex];
657 return MovePos(lenRes - 1);
660 UInt32 *matchDistances = _matchDistances + 1;
661 if(lenMain >= _numFastBytes)
663 backRes = matchDistances[numDistancePairs - 1] + kNumRepDistances;
665 return MovePos(lenMain - 1);
667 Byte currentByte = *data;
668 Byte matchByte = data[(size_t)0 - reps[0] - 1];
670 if(lenMain < 2 && currentByte != matchByte && repLens[repMaxIndex] < 2)
672 backRes = (UInt32)-1;
677 _optimum[0].State = _state;
679 UInt32 posState = (position & _posStateMask);
681 _optimum[1].Price = _isMatch[_state.Index][posState].GetPrice0() +
682 _literalEncoder.GetSubCoder(position, _previousByte)->GetPrice(!_state.IsCharState(), matchByte, currentByte);
683 _optimum[1].MakeAsChar();
685 UInt32 matchPrice = _isMatch[_state.Index][posState].GetPrice1();
686 UInt32 repMatchPrice = matchPrice + _isRep[_state.Index].GetPrice1();
688 if(matchByte == currentByte)
690 UInt32 shortRepPrice = repMatchPrice + GetRepLen1Price(_state, posState);
691 if(shortRepPrice < _optimum[1].Price)
693 _optimum[1].Price = shortRepPrice;
694 _optimum[1].MakeAsShortRep();
697 UInt32 lenEnd = ((lenMain >= repLens[repMaxIndex]) ? lenMain : repLens[repMaxIndex]);
701 backRes = _optimum[1].BackPrev;
706 _optimum[1].PosPrev = 0;
707 for (i = 0; i < kNumRepDistances; i++)
708 _optimum[0].Backs[i] = reps[i];
712 _optimum[len--].Price = kIfinityPrice;
715 for(i = 0; i < kNumRepDistances; i++)
717 UInt32 repLen = repLens[i];
720 UInt32 price = repMatchPrice + GetPureRepPrice(i, _state, posState);
723 UInt32 curAndLenPrice = price + _repMatchLenEncoder.GetPrice(repLen - 2, posState);
724 COptimal &optimum = _optimum[repLen];
725 if (curAndLenPrice < optimum.Price)
727 optimum.Price = curAndLenPrice;
729 optimum.BackPrev = i;
730 optimum.Prev1IsChar = false;
733 while(--repLen >= 2);
736 UInt32 normalMatchPrice = matchPrice + _isRep[_state.Index].GetPrice0();
738 len = ((repLens[0] >= 2) ? repLens[0] + 1 : 2);
742 while (len > matchDistances[offs])
746 UInt32 distance = matchDistances[offs + 1];
747 UInt32 curAndLenPrice = normalMatchPrice + GetPosLenPrice(distance, len, posState);
748 COptimal &optimum = _optimum[len];
749 if (curAndLenPrice < optimum.Price)
751 optimum.Price = curAndLenPrice;
753 optimum.BackPrev = distance + kNumRepDistances;
754 optimum.Prev1IsChar = false;
756 if (len == matchDistances[offs])
759 if (offs == numDistancePairs)
772 lenRes = Backward(backRes, cur);
775 UInt32 newLen, numDistancePairs;
776 RINOK(ReadMatchDistances(newLen, numDistancePairs));
777 if(newLen >= _numFastBytes)
779 _numDistancePairs = numDistancePairs;
780 _longestMatchLength = newLen;
781 _longestMatchWasFound = true;
782 lenRes = Backward(backRes, cur);
786 COptimal &curOptimum = _optimum[cur];
787 UInt32 posPrev = curOptimum.PosPrev;
789 if (curOptimum.Prev1IsChar)
792 if (curOptimum.Prev2)
794 state = _optimum[curOptimum.PosPrev2].State;
795 if (curOptimum.BackPrev2 < kNumRepDistances)
801 state = _optimum[posPrev].State;
805 state = _optimum[posPrev].State;
806 if (posPrev == cur - 1)
808 if (curOptimum.IsShortRep())
809 state.UpdateShortRep();
816 if (curOptimum.Prev1IsChar && curOptimum.Prev2)
818 posPrev = curOptimum.PosPrev2;
819 pos = curOptimum.BackPrev2;
824 pos = curOptimum.BackPrev;
825 if (pos < kNumRepDistances)
830 const COptimal &prevOptimum = _optimum[posPrev];
831 if (pos < kNumRepDistances)
833 reps[0] = prevOptimum.Backs[pos];
835 for(i = 1; i <= pos; i++)
836 reps[i] = prevOptimum.Backs[i - 1];
837 for(; i < kNumRepDistances; i++)
838 reps[i] = prevOptimum.Backs[i];
842 reps[0] = (pos - kNumRepDistances);
843 for(UInt32 i = 1; i < kNumRepDistances; i++)
844 reps[i] = prevOptimum.Backs[i - 1];
847 curOptimum.State = state;
848 for(UInt32 i = 0; i < kNumRepDistances; i++)
849 curOptimum.Backs[i] = reps[i];
850 UInt32 curPrice = curOptimum.Price;
851 const Byte *data = _matchFinder->GetPointerToCurrentPos() - 1;
852 const Byte currentByte = *data;
853 const Byte matchByte = data[(size_t)0 - reps[0] - 1];
855 UInt32 posState = (position & _posStateMask);
857 UInt32 curAnd1Price = curPrice +
858 _isMatch[state.Index][posState].GetPrice0() +
859 _literalEncoder.GetSubCoder(position, data[(size_t)0 - 1])->GetPrice(!state.IsCharState(), matchByte, currentByte);
861 COptimal &nextOptimum = _optimum[cur + 1];
863 bool nextIsChar = false;
864 if (curAnd1Price < nextOptimum.Price)
866 nextOptimum.Price = curAnd1Price;
867 nextOptimum.PosPrev = cur;
868 nextOptimum.MakeAsChar();
872 UInt32 matchPrice = curPrice + _isMatch[state.Index][posState].GetPrice1();
873 UInt32 repMatchPrice = matchPrice + _isRep[state.Index].GetPrice1();
875 if(matchByte == currentByte &&
876 !(nextOptimum.PosPrev < cur && nextOptimum.BackPrev == 0))
878 UInt32 shortRepPrice = repMatchPrice + GetRepLen1Price(state, posState);
879 if(shortRepPrice <= nextOptimum.Price)
881 nextOptimum.Price = shortRepPrice;
882 nextOptimum.PosPrev = cur;
883 nextOptimum.MakeAsShortRep();
888 if(newLen == 2 && matchDistances[2] >= kDistLimit2) // test it maybe set 2000 ?
892 UInt32 numAvailableBytesFull = _matchFinder->GetNumAvailableBytes() + 1;
893 numAvailableBytesFull = MyMin(kNumOpts - 1 - cur, numAvailableBytesFull);
894 UInt32 numAvailableBytes = numAvailableBytesFull;
896 if (numAvailableBytes < 2)
898 if (numAvailableBytes > _numFastBytes)
899 numAvailableBytes = _numFastBytes;
900 if (!nextIsChar && matchByte != currentByte) // speed optimization
902 // try Literal + rep0
903 UInt32 backOffset = reps[0] + 1;
904 UInt32 limit = MyMin(numAvailableBytesFull, _numFastBytes + 1);
906 for (temp = 1; temp < limit &&
907 data[temp] == data[(size_t)temp - backOffset]; temp++);
908 UInt32 lenTest2 = temp - 1;
911 CState state2 = state;
913 UInt32 posStateNext = (position + 1) & _posStateMask;
914 UInt32 nextRepMatchPrice = curAnd1Price +
915 _isMatch[state2.Index][posStateNext].GetPrice1() +
916 _isRep[state2.Index].GetPrice1();
917 // for (; lenTest2 >= 2; lenTest2--)
919 UInt32 offset = cur + 1 + lenTest2;
920 while(lenEnd < offset)
921 _optimum[++lenEnd].Price = kIfinityPrice;
922 UInt32 curAndLenPrice = nextRepMatchPrice + GetRepPrice(
923 0, lenTest2, state2, posStateNext);
924 COptimal &optimum = _optimum[offset];
925 if (curAndLenPrice < optimum.Price)
927 optimum.Price = curAndLenPrice;
928 optimum.PosPrev = cur + 1;
929 optimum.BackPrev = 0;
930 optimum.Prev1IsChar = true;
931 optimum.Prev2 = false;
937 UInt32 startLen = 2; // speed optimization
938 for(UInt32 repIndex = 0; repIndex < kNumRepDistances; repIndex++)
940 // UInt32 repLen = _matchFinder->GetMatchLen(0 - 1, reps[repIndex], newLen); // test it;
941 UInt32 backOffset = reps[repIndex] + 1;
942 if (data[0] != data[(size_t)0 - backOffset] ||
943 data[1] != data[(size_t)1 - backOffset])
946 for (lenTest = 2; lenTest < numAvailableBytes &&
947 data[lenTest] == data[(size_t)lenTest - backOffset]; lenTest++);
948 while(lenEnd < cur + lenTest)
949 _optimum[++lenEnd].Price = kIfinityPrice;
950 UInt32 lenTestTemp = lenTest;
951 UInt32 price = repMatchPrice + GetPureRepPrice(repIndex, state, posState);
954 UInt32 curAndLenPrice = price + _repMatchLenEncoder.GetPrice(lenTest - 2, posState);
955 COptimal &optimum = _optimum[cur + lenTest];
956 if (curAndLenPrice < optimum.Price)
958 optimum.Price = curAndLenPrice;
959 optimum.PosPrev = cur;
960 optimum.BackPrev = repIndex;
961 optimum.Prev1IsChar = false;
964 while(--lenTest >= 2);
965 lenTest = lenTestTemp;
968 startLen = lenTest + 1;
972 UInt32 lenTest2 = lenTest + 1;
973 UInt32 limit = MyMin(numAvailableBytesFull, lenTest2 + _numFastBytes);
974 for (; lenTest2 < limit &&
975 data[lenTest2] == data[(size_t)lenTest2 - backOffset]; lenTest2++);
976 lenTest2 -= lenTest + 1;
979 CState state2 = state;
981 UInt32 posStateNext = (position + lenTest) & _posStateMask;
982 UInt32 curAndLenCharPrice =
983 price + _repMatchLenEncoder.GetPrice(lenTest - 2, posState) +
984 _isMatch[state2.Index][posStateNext].GetPrice0() +
985 _literalEncoder.GetSubCoder(position + lenTest, data[(size_t)lenTest - 1])->GetPrice(
986 true, data[(size_t)lenTest - backOffset], data[lenTest]);
988 posStateNext = (position + lenTest + 1) & _posStateMask;
989 UInt32 nextRepMatchPrice = curAndLenCharPrice +
990 _isMatch[state2.Index][posStateNext].GetPrice1() +
991 _isRep[state2.Index].GetPrice1();
993 // for(; lenTest2 >= 2; lenTest2--)
995 UInt32 offset = cur + lenTest + 1 + lenTest2;
996 while(lenEnd < offset)
997 _optimum[++lenEnd].Price = kIfinityPrice;
998 UInt32 curAndLenPrice = nextRepMatchPrice + GetRepPrice(
999 0, lenTest2, state2, posStateNext);
1000 COptimal &optimum = _optimum[offset];
1001 if (curAndLenPrice < optimum.Price)
1003 optimum.Price = curAndLenPrice;
1004 optimum.PosPrev = cur + lenTest + 1;
1005 optimum.BackPrev = 0;
1006 optimum.Prev1IsChar = true;
1007 optimum.Prev2 = true;
1008 optimum.PosPrev2 = cur;
1009 optimum.BackPrev2 = repIndex;
1016 // for(UInt32 lenTest = 2; lenTest <= newLen; lenTest++)
1017 if (newLen > numAvailableBytes)
1019 newLen = numAvailableBytes;
1020 for (numDistancePairs = 0; newLen > matchDistances[numDistancePairs]; numDistancePairs += 2);
1021 matchDistances[numDistancePairs] = newLen;
1022 numDistancePairs += 2;
1024 if (newLen >= startLen)
1026 UInt32 normalMatchPrice = matchPrice + _isRep[state.Index].GetPrice0();
1027 while(lenEnd < cur + newLen)
1028 _optimum[++lenEnd].Price = kIfinityPrice;
1031 while(startLen > matchDistances[offs])
1033 UInt32 curBack = matchDistances[offs + 1];
1034 UInt32 posSlot = GetPosSlot2(curBack);
1035 for(UInt32 lenTest = /*2*/ startLen; ; lenTest++)
1037 UInt32 curAndLenPrice = normalMatchPrice;
1038 UInt32 lenToPosState = GetLenToPosState(lenTest);
1039 if (curBack < kNumFullDistances)
1040 curAndLenPrice += _distancesPrices[lenToPosState][curBack];
1042 curAndLenPrice += _posSlotPrices[lenToPosState][posSlot] + _alignPrices[curBack & kAlignMask];
1044 curAndLenPrice += _lenEncoder.GetPrice(lenTest - kMatchMinLen, posState);
1046 COptimal &optimum = _optimum[cur + lenTest];
1047 if (curAndLenPrice < optimum.Price)
1049 optimum.Price = curAndLenPrice;
1050 optimum.PosPrev = cur;
1051 optimum.BackPrev = curBack + kNumRepDistances;
1052 optimum.Prev1IsChar = false;
1055 if (/*_maxMode && */lenTest == matchDistances[offs])
1057 // Try Match + Literal + Rep0
1058 UInt32 backOffset = curBack + 1;
1059 UInt32 lenTest2 = lenTest + 1;
1060 UInt32 limit = MyMin(numAvailableBytesFull, lenTest2 + _numFastBytes);
1061 for (; lenTest2 < limit &&
1062 data[lenTest2] == data[(size_t)lenTest2 - backOffset]; lenTest2++);
1063 lenTest2 -= lenTest + 1;
1066 CState state2 = state;
1067 state2.UpdateMatch();
1068 UInt32 posStateNext = (position + lenTest) & _posStateMask;
1069 UInt32 curAndLenCharPrice = curAndLenPrice +
1070 _isMatch[state2.Index][posStateNext].GetPrice0() +
1071 _literalEncoder.GetSubCoder(position + lenTest, data[(size_t)lenTest - 1])->GetPrice(
1072 true, data[(size_t)lenTest - backOffset], data[lenTest]);
1073 state2.UpdateChar();
1074 posStateNext = (posStateNext + 1) & _posStateMask;
1075 UInt32 nextRepMatchPrice = curAndLenCharPrice +
1076 _isMatch[state2.Index][posStateNext].GetPrice1() +
1077 _isRep[state2.Index].GetPrice1();
1079 // for(; lenTest2 >= 2; lenTest2--)
1081 UInt32 offset = cur + lenTest + 1 + lenTest2;
1082 while(lenEnd < offset)
1083 _optimum[++lenEnd].Price = kIfinityPrice;
1084 UInt32 curAndLenPrice = nextRepMatchPrice + GetRepPrice(0, lenTest2, state2, posStateNext);
1085 COptimal &optimum = _optimum[offset];
1086 if (curAndLenPrice < optimum.Price)
1088 optimum.Price = curAndLenPrice;
1089 optimum.PosPrev = cur + lenTest + 1;
1090 optimum.BackPrev = 0;
1091 optimum.Prev1IsChar = true;
1092 optimum.Prev2 = true;
1093 optimum.PosPrev2 = cur;
1094 optimum.BackPrev2 = curBack + kNumRepDistances;
1099 if (offs == numDistancePairs)
1101 curBack = matchDistances[offs + 1];
1102 if (curBack >= kNumFullDistances)
1103 posSlot = GetPosSlot2(curBack);
1110 static inline bool ChangePair(UInt32 smallDist, UInt32 bigDist)
1112 return ((bigDist >> 7) > smallDist);
1116 HRESULT CEncoder::ReadMatchDistances(UInt32 &lenRes, UInt32 &numDistancePairs)
1119 RINOK(_matchFinder->GetMatches(_matchDistances));
1120 numDistancePairs = _matchDistances[0];
1121 if (numDistancePairs > 0)
1123 lenRes = _matchDistances[1 + numDistancePairs - 2];
1124 if (lenRes == _numFastBytes)
1125 lenRes += _matchFinder->GetMatchLen(lenRes - 1, _matchDistances[1 + numDistancePairs - 1],
1126 kMatchMaxLen - lenRes);
1128 _additionalOffset++;
1132 HRESULT CEncoder::GetOptimumFast(UInt32 position, UInt32 &backRes, UInt32 &lenRes)
1134 UInt32 lenMain, numDistancePairs;
1135 if (!_longestMatchWasFound)
1137 RINOK(ReadMatchDistances(lenMain, numDistancePairs));
1141 lenMain = _longestMatchLength;
1142 numDistancePairs = _numDistancePairs;
1143 _longestMatchWasFound = false;
1146 const Byte *data = _matchFinder->GetPointerToCurrentPos() - 1;
1147 UInt32 numAvailableBytes = _matchFinder->GetNumAvailableBytes() + 1;
1148 if (numAvailableBytes > kMatchMaxLen)
1149 numAvailableBytes = kMatchMaxLen;
1150 if (numAvailableBytes < 2)
1152 backRes = (UInt32)(-1);
1157 UInt32 repLens[kNumRepDistances];
1158 UInt32 repMaxIndex = 0;
1160 for(UInt32 i = 0; i < kNumRepDistances; i++)
1162 UInt32 backOffset = _repDistances[i] + 1;
1163 if (data[0] != data[(size_t)0 - backOffset] || data[1] != data[(size_t)1 - backOffset])
1169 for (len = 2; len < numAvailableBytes && data[len] == data[(size_t)len - backOffset]; len++);
1170 if(len >= _numFastBytes)
1174 return MovePos(lenRes - 1);
1177 if (len > repLens[repMaxIndex])
1180 UInt32 *matchDistances = _matchDistances + 1;
1181 if(lenMain >= _numFastBytes)
1183 backRes = matchDistances[numDistancePairs - 1] + kNumRepDistances;
1185 return MovePos(lenMain - 1);
1188 UInt32 backMain = 0;
1191 backMain = matchDistances[numDistancePairs - 1];
1192 while (numDistancePairs > 2 && lenMain == matchDistances[numDistancePairs - 4] + 1)
1194 if (!ChangePair(matchDistances[numDistancePairs - 3], backMain))
1196 numDistancePairs -= 2;
1197 lenMain = matchDistances[numDistancePairs - 2];
1198 backMain = matchDistances[numDistancePairs - 1];
1200 if (lenMain == 2 && backMain >= 0x80)
1204 if (repLens[repMaxIndex] >= 2)
1206 if (repLens[repMaxIndex] + 1 >= lenMain ||
1207 repLens[repMaxIndex] + 2 >= lenMain && (backMain > (1 << 9)) ||
1208 repLens[repMaxIndex] + 3 >= lenMain && (backMain > (1 << 15)))
1210 backRes = repMaxIndex;
1211 lenRes = repLens[repMaxIndex];
1212 return MovePos(lenRes - 1);
1216 if (lenMain >= 2 && numAvailableBytes > 2)
1218 RINOK(ReadMatchDistances(_longestMatchLength, _numDistancePairs));
1219 if (_longestMatchLength >= 2)
1221 UInt32 newDistance = matchDistances[_numDistancePairs - 1];
1222 if (_longestMatchLength >= lenMain && newDistance < backMain ||
1223 _longestMatchLength == lenMain + 1 && !ChangePair(backMain, newDistance) ||
1224 _longestMatchLength > lenMain + 1 ||
1225 _longestMatchLength + 1 >= lenMain && lenMain >= 3 && ChangePair(newDistance, backMain))
1227 _longestMatchWasFound = true;
1228 backRes = UInt32(-1);
1234 numAvailableBytes--;
1235 for(UInt32 i = 0; i < kNumRepDistances; i++)
1237 UInt32 backOffset = _repDistances[i] + 1;
1238 if (data[1] != data[(size_t)1 - backOffset] || data[2] != data[(size_t)2 - backOffset])
1244 for (len = 2; len < numAvailableBytes && data[len] == data[(size_t)len - backOffset]; len++);
1245 if (len + 1 >= lenMain)
1247 _longestMatchWasFound = true;
1248 backRes = UInt32(-1);
1253 backRes = backMain + kNumRepDistances;
1255 return MovePos(lenMain - 2);
1257 backRes = UInt32(-1);
1262 HRESULT CEncoder::Flush(UInt32 nowPos)
1265 WriteEndMarker(nowPos & _posStateMask);
1266 _rangeEncoder.FlushData();
1267 return _rangeEncoder.FlushStream();
1270 void CEncoder::WriteEndMarker(UInt32 posState)
1272 // This function for writing End Mark for stream version of LZMA.
1273 // In current version this feature is not used.
1277 _isMatch[_state.Index][posState].Encode(&_rangeEncoder, 1);
1278 _isRep[_state.Index].Encode(&_rangeEncoder, 0);
1279 _state.UpdateMatch();
1280 UInt32 len = kMatchMinLen; // kMatchMaxLen;
1281 _lenEncoder.Encode(&_rangeEncoder, len - kMatchMinLen, posState, !_fastMode);
1282 UInt32 posSlot = (1 << kNumPosSlotBits) - 1;
1283 UInt32 lenToPosState = GetLenToPosState(len);
1284 _posSlotEncoder[lenToPosState].Encode(&_rangeEncoder, posSlot);
1285 UInt32 footerBits = 30;
1286 UInt32 posReduced = (UInt32(1) << footerBits) - 1;
1287 _rangeEncoder.EncodeDirectBits(posReduced >> kNumAlignBits, footerBits - kNumAlignBits);
1288 _posAlignEncoder.ReverseEncode(&_rangeEncoder, posReduced & kAlignMask);
1291 HRESULT CEncoder::CodeReal(ISequentialInStream *inStream,
1292 ISequentialOutStream *outStream,
1293 const UInt64 *inSize, const UInt64 *outSize,
1294 ICompressProgressInfo *progress)
1296 _needReleaseMFStream = false;
1297 CCoderReleaser coderReleaser(this);
1298 RINOK(SetStreams(inStream, outStream, inSize, outSize));
1301 UInt64 processedInSize;
1302 UInt64 processedOutSize;
1304 RINOK(CodeOneBlock(&processedInSize, &processedOutSize, &finished));
1309 RINOK(progress->SetRatioInfo(&processedInSize, &processedOutSize));
1314 HRESULT CEncoder::SetStreams(ISequentialInStream *inStream,
1315 ISequentialOutStream *outStream,
1316 const UInt64 *inSize, const UInt64 *outSize)
1318 _inStream = inStream;
1321 RINOK(SetOutStream(outStream));
1324 // CCoderReleaser releaser(this);
1327 if (_matchFinder->GetNumAvailableBytes() == 0)
1333 FillDistancesPrices();
1337 _lenEncoder.SetTableSize(_numFastBytes + 1 - kMatchMinLen);
1338 _lenEncoder.UpdateTables(1 << _posStateBits);
1339 _repMatchLenEncoder.SetTableSize(_numFastBytes + 1 - kMatchMinLen);
1340 _repMatchLenEncoder.UpdateTables(1 << _posStateBits);
1346 HRESULT CEncoder::CodeOneBlock(UInt64 *inSize, UInt64 *outSize, Int32 *finished)
1350 RINOK(_matchFinder->SetStream(_inStream));
1351 RINOK(_matchFinder->Init());
1352 _needReleaseMFStream = true;
1364 if (_matchFinder->GetNumAvailableBytes() == 0)
1365 return Flush(UInt32(nowPos64));
1366 UInt32 len, numDistancePairs;
1367 RINOK(ReadMatchDistances(len, numDistancePairs));
1368 UInt32 posState = UInt32(nowPos64) & _posStateMask;
1369 _isMatch[_state.Index][posState].Encode(&_rangeEncoder, 0);
1370 _state.UpdateChar();
1371 Byte curByte = _matchFinder->GetIndexByte(0 - _additionalOffset);
1372 _literalEncoder.GetSubCoder(UInt32(nowPos64), _previousByte)->Encode(&_rangeEncoder, curByte);
1373 _previousByte = curByte;
1374 _additionalOffset--;
1378 UInt32 nowPos32 = (UInt32)nowPos64;
1379 UInt32 progressPosValuePrev = nowPos32;
1381 if (_matchFinder->GetNumAvailableBytes() == 0)
1382 return Flush(nowPos32);
1386 #ifdef _NO_EXCEPTIONS
1387 if (_rangeEncoder.Stream.ErrorCode != S_OK)
1388 return _rangeEncoder.Stream.ErrorCode;
1393 result = GetOptimumFast(nowPos32, pos, len);
1395 result = GetOptimum(nowPos32, pos, len);
1398 UInt32 posState = nowPos32 & _posStateMask;
1399 if(len == 1 && pos == 0xFFFFFFFF)
1401 _isMatch[_state.Index][posState].Encode(&_rangeEncoder, 0);
1402 Byte curByte = _matchFinder->GetIndexByte(0 - _additionalOffset);
1403 CLiteralEncoder2 *subCoder = _literalEncoder.GetSubCoder(nowPos32, _previousByte);
1404 if(_state.IsCharState())
1405 subCoder->Encode(&_rangeEncoder, curByte);
1408 Byte matchByte = _matchFinder->GetIndexByte(0 - _repDistances[0] - 1 - _additionalOffset);
1409 subCoder->EncodeMatched(&_rangeEncoder, matchByte, curByte);
1411 _state.UpdateChar();
1412 _previousByte = curByte;
1416 _isMatch[_state.Index][posState].Encode(&_rangeEncoder, 1);
1417 if(pos < kNumRepDistances)
1419 _isRep[_state.Index].Encode(&_rangeEncoder, 1);
1422 _isRepG0[_state.Index].Encode(&_rangeEncoder, 0);
1423 _isRep0Long[_state.Index][posState].Encode(&_rangeEncoder, ((len == 1) ? 0 : 1));
1427 UInt32 distance = _repDistances[pos];
1428 _isRepG0[_state.Index].Encode(&_rangeEncoder, 1);
1430 _isRepG1[_state.Index].Encode(&_rangeEncoder, 0);
1433 _isRepG1[_state.Index].Encode(&_rangeEncoder, 1);
1434 _isRepG2[_state.Index].Encode(&_rangeEncoder, pos - 2);
1436 _repDistances[3] = _repDistances[2];
1437 _repDistances[2] = _repDistances[1];
1439 _repDistances[1] = _repDistances[0];
1440 _repDistances[0] = distance;
1443 _state.UpdateShortRep();
1446 _repMatchLenEncoder.Encode(&_rangeEncoder, len - kMatchMinLen, posState, !_fastMode);
1452 _isRep[_state.Index].Encode(&_rangeEncoder, 0);
1453 _state.UpdateMatch();
1454 _lenEncoder.Encode(&_rangeEncoder, len - kMatchMinLen, posState, !_fastMode);
1455 pos -= kNumRepDistances;
1456 UInt32 posSlot = GetPosSlot(pos);
1457 _posSlotEncoder[GetLenToPosState(len)].Encode(&_rangeEncoder, posSlot);
1459 if (posSlot >= kStartPosModelIndex)
1461 UInt32 footerBits = ((posSlot >> 1) - 1);
1462 UInt32 base = ((2 | (posSlot & 1)) << footerBits);
1463 UInt32 posReduced = pos - base;
1465 if (posSlot < kEndPosModelIndex)
1466 NRangeCoder::ReverseBitTreeEncode(_posEncoders + base - posSlot - 1,
1467 &_rangeEncoder, footerBits, posReduced);
1470 _rangeEncoder.EncodeDirectBits(posReduced >> kNumAlignBits, footerBits - kNumAlignBits);
1471 _posAlignEncoder.ReverseEncode(&_rangeEncoder, posReduced & kAlignMask);
1475 _repDistances[3] = _repDistances[2];
1476 _repDistances[2] = _repDistances[1];
1477 _repDistances[1] = _repDistances[0];
1478 _repDistances[0] = pos;
1481 _previousByte = _matchFinder->GetIndexByte(len - 1 - _additionalOffset);
1483 _additionalOffset -= len;
1485 if (_additionalOffset == 0)
1489 if (_matchPriceCount >= (1 << 7))
1490 FillDistancesPrices();
1491 if (_alignPriceCount >= kAlignTableSize)
1494 if (_matchFinder->GetNumAvailableBytes() == 0)
1495 return Flush(nowPos32);
1496 if (nowPos32 - progressPosValuePrev >= (1 << 14))
1498 nowPos64 += nowPos32 - progressPosValuePrev;
1500 *outSize = _rangeEncoder.GetProcessedSize();
1509 STDMETHODIMP CEncoder::Code(ISequentialInStream *inStream,
1510 ISequentialOutStream *outStream, const UInt64 *inSize, const UInt64 *outSize,
1511 ICompressProgressInfo *progress)
1513 #ifndef _NO_EXCEPTIONS
1517 return CodeReal(inStream, outStream, inSize, outSize, progress);
1518 #ifndef _NO_EXCEPTIONS
1520 catch(const COutBufferException &e) { return e.ErrorCode; }
1521 catch(...) { return E_FAIL; }
1525 void CEncoder::FillDistancesPrices()
1527 UInt32 tempPrices[kNumFullDistances];
1528 for (UInt32 i = kStartPosModelIndex; i < kNumFullDistances; i++)
1530 UInt32 posSlot = GetPosSlot(i);
1531 UInt32 footerBits = ((posSlot >> 1) - 1);
1532 UInt32 base = ((2 | (posSlot & 1)) << footerBits);
1533 tempPrices[i] = NRangeCoder::ReverseBitTreeGetPrice(_posEncoders +
1534 base - posSlot - 1, footerBits, i - base);
1537 for (UInt32 lenToPosState = 0; lenToPosState < kNumLenToPosStates; lenToPosState++)
1540 NRangeCoder::CBitTreeEncoder<kNumMoveBits, kNumPosSlotBits> &encoder = _posSlotEncoder[lenToPosState];
1541 UInt32 *posSlotPrices = _posSlotPrices[lenToPosState];
1542 for (posSlot = 0; posSlot < _distTableSize; posSlot++)
1543 posSlotPrices[posSlot] = encoder.GetPrice(posSlot);
1544 for (posSlot = kEndPosModelIndex; posSlot < _distTableSize; posSlot++)
1545 posSlotPrices[posSlot] += ((((posSlot >> 1) - 1) - kNumAlignBits) << NRangeCoder::kNumBitPriceShiftBits);
1547 UInt32 *distancesPrices = _distancesPrices[lenToPosState];
1549 for (i = 0; i < kStartPosModelIndex; i++)
1550 distancesPrices[i] = posSlotPrices[i];
1551 for (; i < kNumFullDistances; i++)
1552 distancesPrices[i] = posSlotPrices[GetPosSlot(i)] + tempPrices[i];
1554 _matchPriceCount = 0;
1557 void CEncoder::FillAlignPrices()
1559 for (UInt32 i = 0; i < kAlignTableSize; i++)
1560 _alignPrices[i] = _posAlignEncoder.ReverseGetPrice(i);
1561 _alignPriceCount = 0;