util/cbfstool/lzma/lzma.cc - coreboot - Gitiles

 #include "endian.hh" /* For R64 */

 extern "C" {
 #include "C/LzmaDec.h"
 #include "C/LzmaEnc.h"
 }

 #include "lzma.hh"

 #include <algorithm> // min,max,swap
 #include <vector>
 #include <string>
 #include <cstring> // std::memcpy


 #include <cstdio>

 #include <stdint.h>

 /* We don't want threads */
 #ifdef linux
 #include <sched.h>
 #define ForceSwitchThread() sched_yield()
 #else
 #define ForceSwitchThread()
 #endif


 int LZMA_verbose = 0;

 // -fb
 unsigned LZMA_NumFastBytes = 273;
 /*from lzma.txt:
           Set number of fast bytes - [5, 273], default: 273
           Usually big number gives a little bit better compression ratio
           and slower compression process.
   from anonymous:
 This one is hard to explain... To my knowledge (please correct me if I
 am wrong), this refers to the optimal parsing algorithm. The algorithm
 tries many different combinations of matches to find the best one. If a
 match is found that is over the fb value, then it will not be optimised,
 and will just be used straight.
 This speeds up corner cases such as pic.
 */

 /* apparently, 0 and 1 are valid values. 0 = fast mode */
 unsigned LZMA_AlgorithmNo  = 1;

 unsigned LZMA_MatchFinderCycles = 0; // default: 0

 // -pb
 unsigned LZMA_PosStateBits = 0; // default: 2, range: 0..4
 /*from lzma.txt:
           pb switch is intended for periodical data
           when period is equal 2^N.
 */


 // -lp
 unsigned LZMA_LiteralPosStateBits = 0; // default: 0, range: 0..4
 /*from lzma.txt:
           lp switch is intended for periodical data when period is
           equal 2^N. For example, for 32-bit (4 bytes)
           periodical data you can use lp=2.
           Often it's better to set lc0, if you change lp switch.
 */

 // -lc
 unsigned LZMA_LiteralContextBits = 1; // default: 3, range: 0..8
 /*from lzma.txt:
           Sometimes lc=4 gives gain for big files.
   from anonymous:
 The context for the literal coder is 2^(lc) long. The longer it is, the
 better the statistics, but also the slower it adapts. A tradeoff, which
 is why 3 or 4 is reccommended.
 */

 /*

 Discoveries:

  INODES:
     Best LZMA for raw_inotab_inode(40->48): pb0 lp0 lc0
     Best LZMA for raw_root_inode(28->32): pb0 lp0 lc0

     Start LZMA(rootdir, 736 bytes)
     Yay result with pb0 lp0 lc0: 218
     Yay result with pb0 lp0 lc1: 217
     Best LZMA for rootdir(736->217): pb0 lp0 lc1

     Start LZMA(inotab, 379112 bytes)
     Yay result with pb0 lp0 lc0: 24504
     Best LZMA for inotab(379112->24504): pb0 lp0 lc0

  BLKTAB:
     Best LZMA for raw_blktab(10068->2940): pb2 lp2 lc0

     ---with fastbytes=128---
     Start LZMA(blktab, 12536608 bytes)
     Yay result with pb0 lp0 lc0: 1386141
     Yay result with pb0 lp1 lc0: 1308137
     Yay result with pb0 lp2 lc0: 1305403
     Yay result with pb0 lp3 lc0: 1303072
     Yay result with pb1 lp1 lc0: 1238990
     Yay result with pb1 lp2 lc0: 1227973
     Yay result with pb1 lp3 lc0: 1221205
     Yay result with pb2 lp1 lc0: 1197035
     Yay result with pb2 lp2 lc0: 1188979
     Yay result with pb2 lp3 lc0: 1184531
     Yay result with pb3 lp1 lc0: 1183866
     Yay result with pb3 lp2 lc0: 1172994
     Yay result with pb3 lp3 lc0: 1169048
     Best LZMA for blktab(12536608->1169048): pb3 lp3 lc0

     It seems, lc=0 and pb=lp=N is a wise choice,
     where N is 2 for packed blktab and 3 for unpacked.

  FBLOCKS:
     For SPC sound+code data, the best results
      are between:
       pb0 lp0 lc0 (10%)
       pb0 lp0 lc1 (90%)
      For inotab, these were observed:
       pb1 lp0 lc1
       pb2 lp0 lc0
       pb1 lp1 lc0
       pb3 lp1 lc0
       pb1 lp2 lc0
       pb2 lp1 lc0

     For C source code data, the best results
      are between:
       pb1 lp0 lc3 (10%)
       pb0 lp0 lc3 (90%)
      Occasionally:
       pb0 lp1 lc0
       pb0 lp0 lc3 (mostly)
       pb0 lp0 lc2
       pb0 lp0 lc4
      Occasionally 2:
       pb0 lp0 lc8
       pb0 lp0 lc4

     BUT:
     Best LZMA for fblock(204944->192060): pb0 lp4 lc8 -- surprise! (INOTAB PROBABLY)

 */

 static UInt32 SelectDictionarySizeFor(unsigned datasize)
 {
    #if 1
     if(datasize >= (1 << 30U)) return 1 << 30U;
     return datasize;
    #else
 #ifdef __GNUC__
     /* gnu c can optimize this switch statement into a fast binary
      * search, but it cannot do so for the list of the if statements.
      */
     switch(datasize)
     {
         case 0 ... 512 : return 512;
         case 513 ... 1024: return 2048;
         case 1025 ... 4096: return 8192;
         case 4097 ... 16384: return 32768;
         case 16385 ... 65536: return 528288;
         case 65537 ... 528288: return 1048576*4;
         case 528289 ... 786432: return 1048576*16;
         default: return 1048576*32;
     }
 #else
     if(datasize <= 512) return 512;
     if(datasize <= 1024) return 1024;
     if(datasize <= 4096) return 4096;
     if(datasize <= 16384) return 32768;
     if(datasize <= 65536) return 528288;
     if(datasize <= 528288) return 1048576*4;
     if(datasize <= 786432) reutrn 1048576*16;
     return 32*1048576;
 #endif
    #endif
 }

 static void *SzAlloc(void*, size_t size)
     { return new unsigned char[size]; }
 static void SzFree(void*, void *address)
     { unsigned char*a = (unsigned char*)address; delete[] a; }
 static ISzAlloc LZMAalloc = { SzAlloc, SzFree };

 class MemReader: public ISeqInStream
 {
 public:
     const unsigned char* const indata;
     const size_t         inlength;
     size_t pos;
 public:
     MemReader(const unsigned char* d, size_t l)
         : ISeqInStream(), indata(d), inlength(l), pos(0)
     {
         Read = ReadMethod;
     }
     static SRes ReadMethod(void *pp, void *buf, size_t *size)
     {
         MemReader& p = *(MemReader*)pp;
         size_t rem = p.inlength-p.pos;
         size_t read = *size;
         if(read > rem) read= rem;
         std::memcpy(buf, &p.indata[p.pos], read);
         *size = read;
         p.pos += read;
         return SZ_OK;
     }
 };
 class MemWriter: public ISeqOutStream
 {
 public:
     std::vector<unsigned char> buf;
 public:
     MemWriter(): ISeqOutStream(), buf() { Write = WriteMethod; }

     static size_t WriteMethod(void*pp, const void* from, size_t size)
     {
         MemWriter& p = *(MemWriter*)pp;
         const unsigned char* i = (const unsigned char*)from;
         p.buf.insert(p.buf.end(), i, i+size);
         return size;
     }
 };

 const std::vector<unsigned char> LZMACompress(const unsigned char* data, size_t length,
     unsigned pb,
     unsigned lp,
     unsigned lc)
 {
     return LZMACompress(data,length, pb,lp,lc,
         SelectDictionarySizeFor(length));
 }

 const std::vector<unsigned char> LZMACompress(
     const unsigned char* data, size_t length,
     unsigned pb,
     unsigned lp,
     unsigned lc,
     unsigned dictionarysize)
 {
     if(!length) return std::vector<unsigned char>();

     CLzmaEncProps props;
     LzmaEncProps_Init(&props);
     props.dictSize = dictionarysize;
     props.pb       = pb;
     props.lp       = lp;
     props.lc       = lc;
     props.fb       = LZMA_NumFastBytes;
     props.mc       = LZMA_MatchFinderCycles;
     props.algo     = LZMA_AlgorithmNo;
     props.numThreads = 1;

     switch(LZMA_AlgorithmNo)
     {
         case 0: // quick: HC4
             props.btMode = 0;
             props.level = 1;
             break;
         case 1: // full: BT4
         default:
             props.level = 9;
             props.btMode       = 1;
             props.numHashBytes = 4;
             break;
     }

     CLzmaEncHandle p = LzmaEnc_Create(&LZMAalloc);
     struct AutoReleaseLzmaEnc
     {
         AutoReleaseLzmaEnc(CLzmaEncHandle pp) : p(pp) { }
         ~AutoReleaseLzmaEnc()
         { LzmaEnc_Destroy(p, &LZMAalloc, &LZMAalloc); }
         CLzmaEncHandle p;


         AutoReleaseLzmaEnc(const AutoReleaseLzmaEnc&);
         void operator=(const AutoReleaseLzmaEnc&);

     } AutoReleaser(p); // Create a destructor that ensures
     // that the CLzmaEncHandle is not leaked, even if an
     // exception happens

     int res = LzmaEnc_SetProps(p, &props);
     if(res != SZ_OK)
     {
     Error:
         return std::vector<unsigned char> ();
     }

     unsigned char propsEncoded[LZMA_PROPS_SIZE + 8];
     size_t propsSize = sizeof propsEncoded;
     res = LzmaEnc_WriteProperties(p, propsEncoded, &propsSize);
     if(res != SZ_OK) goto Error;

     MemReader is(data, length);
     MemWriter os;
     put_64(propsEncoded+LZMA_PROPS_SIZE, length);
     os.buf.insert(os.buf.end(), propsEncoded, propsEncoded+LZMA_PROPS_SIZE+8);

     res = LzmaEnc_Encode(p, &os, &is, 0, &LZMAalloc, &LZMAalloc);
     if(res != SZ_OK) goto Error;

     return os.buf;
 }

 const std::vector<unsigned char> LZMACompress(const unsigned char* data, size_t length)
 {
     return LZMACompress(data, length,
         LZMA_PosStateBits,
         LZMA_LiteralPosStateBits,
         LZMA_LiteralContextBits);
 }

 #undef RC_NORMALIZE

 const std::vector<unsigned char> LZMADeCompress
     (const unsigned char* data, size_t length, bool& ok)
 {
     if(length <= LZMA_PROPS_SIZE+8)
     {
     /*clearly_not_ok:*/
         ok = false;
         return std::vector<unsigned char> ();
     }

     uint_least64_t out_sizemax = get_64(&data[LZMA_PROPS_SIZE]);

     /*if(out_sizemax >= (size_t)~0ULL)
     {
         // cannot even allocate a vector this large.
         goto clearly_not_ok;
     }*/

     std::vector<unsigned char> result(out_sizemax);

     ELzmaStatus status;
     SizeT destlen = result.size();
     SizeT srclen = length-(LZMA_PROPS_SIZE+8);
     int res = LzmaDecode(
         &result[0], &destlen,
         &data[LZMA_PROPS_SIZE+8], &srclen,
         &data[0], LZMA_PROPS_SIZE,
         LZMA_FINISH_END,
         &status,
         &LZMAalloc);

     /*
     std::fprintf(stderr, "res=%d, status=%d, in_done=%d (buf=%d), out_done=%d (max=%d)\n",
         res,
         (int)status,
         (int)srclen, (int)length,
         (int)destlen, (int)out_sizemax);
     */

     ok = res == SZ_OK && (status == LZMA_STATUS_FINISHED_WITH_MARK
                        || status == LZMA_STATUS_MAYBE_FINISHED_WITHOUT_MARK)
       && srclen == (length-(LZMA_PROPS_SIZE+8))
       && destlen == out_sizemax;
     return result;
 }

 const std::vector<unsigned char> LZMADeCompress
     (const unsigned char* data, size_t length)
 {
     bool ok_unused;
     return LZMADeCompress(data, length, ok_unused);
 }

 #if 0
 #include <stdio.h>
 int main(void)
 {
     char Buf[2048*2048];
     int s = fread(Buf,1,sizeof(Buf),stdin);
     std::vector<unsigned char> result = LZMADeCompress(std::vector<unsigned char>(Buf,Buf+s));
     fwrite(&result[0],1,result.size(),stdout);
 }
 #endif

 const std::vector<unsigned char> LZMACompressHeavy(const unsigned char* data, size_t length,
     const char* why)
 {
     std::vector<unsigned char> bestresult;
     char best[512];
     bool first = true;
     if(LZMA_verbose >= 1)
     {
         std::fprintf(stderr, "Start LZMA(%s, %u bytes)\n", why, (unsigned)length);
         std::fflush(stderr);
     }

     unsigned minresultsize=0, maxresultsize=0;
     unsigned sizemap[5][5][9] = {{{0}}};

     bool use_small_dict = false;

     for(int compress_mode = 0; compress_mode < (5*5*9); ++compress_mode)
     {
         const unsigned pb = compress_mode % 5;
         const unsigned lp = (compress_mode / 5) % 5;
         const unsigned lc = (compress_mode / 5 / 5) % 9;

         std::vector<unsigned char>
             result = use_small_dict
                 ? LZMACompress(data,length,pb,lp,lc, 4096)
                 : LZMACompress(data,length,pb,lp,lc);

        {
         sizemap[pb][lp][lc] = result.size();

         if(first || result.size() < minresultsize) minresultsize = result.size();
         if(first || result.size() > maxresultsize) maxresultsize = result.size();
         if(first || result.size() < bestresult.size())
         {
             sprintf(best, "pb%u lp%u lc%u",
                 pb,lp,lc);
             if(LZMA_verbose >= 1)
                 std::fprintf(stderr, "Yay result with %s: %u\n", best, (unsigned)result.size());
             bestresult.swap(result);
             first = false;
         }
         else
         {
             char tmp[512];
             sprintf(tmp, "pb%u lp%u lc%u",
                 pb,lp,lc);
             if(LZMA_verbose >= 2)
                 std::fprintf(stderr, "Blaa result with %s: %u\n", tmp, (unsigned)result.size());
         }
         if(LZMA_verbose >= 2)
         {
             std::fprintf(stderr, "%*s\n", (5 * (4+9+2)), "");
             /* Visualize the size map: */
             std::string lines[6] = {};
             for(unsigned pbt = 0; pbt <= 4; ++pbt)
             {
                 char buf[64]; sprintf(buf, "pb%u:%11s", pbt,"");
                 lines[0] += buf;

                 for(unsigned lpt = 0; lpt <= 4; ++lpt)
                 {
                     char buf[64]; sprintf(buf, "lp%u:", lpt);
                     std::string line;
                     line += buf;
                     for(unsigned lct = 0; lct <= 8; ++lct)
                     {
                         unsigned s = sizemap[pbt][lpt][lct];
                         char c;
                         if(!s) c = '.';
                         else c = 'a' + ('z'-'a'+1)
                                      * (s - minresultsize)
                                      / (maxresultsize-minresultsize+1);
                         line += c;
                     }
                     lines[1 + lpt] += line + "  ";
                 }
             }
             for(unsigned a=0; a<6; ++a) std::fprintf(stderr, "%s\n", lines[a].c_str());
             std::fprintf(stderr, "\33[%uA", 7);
         }
        }
     }
     if(LZMA_verbose >= 2)
         std::fprintf(stderr, "\n\n\n\n\n\n\n\n");

     if(LZMA_verbose >= 1)
     {
         std::fprintf(stderr, "Best LZMA for %s(%u->%u): %s\n",
             why,
             (unsigned)length,
             (unsigned)bestresult.size(),
             best);
     }
     std::fflush(stderr);
     return bestresult;
 }

 /*

 The LZMA compression power is controlled by these parameters:
   Dictionary size (we use the maximum)
   Compression algorithm (we use BT4, the heaviest available)
   Number of fast bytes (we use the maximum)
   pb (0..4), lp (0..4) and lc (0..8) -- the effect of these depends on data.

 Since the only parameters whose effect depends on the data to be compressed
 are the three (pb, lp, lc), the "auto" and "full" compression algorithms
 only try to find the optimal values for those.

 The "auto" LZMA compression algorithm is based on these two assumptions:
   - It is possible to find the best value for each component (pb, lp, lc)
     by individually testing the most effective one of them while keeping
     the others static.
     I.e.,    step 1: pb=<find best>, lp=0, lc=0
              step 2: pb=<use result>, lp=<find best>, lc=0
              step 3: pb=<use result>, lp=<use result>, lc=<find best>
              final: pb=<use result>, lp=<use result>, lc=<use result>
   - That the effect of each of these components forms a parabolic function
     that has a starting point, ending point, and possibly a mountain or a
     valley somewhere in the middle, but never a valley _and_ a mountain, nor
     two valleys nor two mountains.
 These assumptions are not always true, but it gets very close to the optimum.

 The ParabolicFinder class below finds the lowest point in a parabolic curve
 with a small number of tests, determining the shape of the curve by sampling
 a few cue values as needed.

 The algorithm is like this:
   Never check any value more than once.
   Check the first two values.
   If they differ, then check the last in sequence.
     If not, then check everything in sequential order.
   If the first two values and the last form an ascending sequence, accept the first value.
     If they form a descending sequence, start Focus Mode
     such that the focus lower limit is index 2 and upper
     limit is the second last. Then check the second last.
       If they don't, then check the third value of sequence,
       and everything else in sequential order.
   If in Focus Mode, check if being in the lower or upper end of the focus.
     If in upper end, check if the current value is bigger than the next one.
       If it is, end the process, because the smallest value has already been found.
         If not, next check the value at focus_low, and increase focus_low.
     If in lower end, check if the current value is bigger than the previous one.
       If it is, end the process, because the smallest value has already been found.
         If not, next check the value at focus_high, and decrease focus_high.

 For any sample space, it generally does 3 tests, but if it detects a curve
 forming a valley, it may do more.

 Note that ParabolicFinder does not _indicate_ the lowest value. It leaves that
 to the caller. It just stops searching when it thinks that no lower value will
 be found.

 Note: The effect of pb, lp and lc depend also on the dictionary size setting
 and compression algorithm. You cannot estimate the optimal value for those
 parameters reliably using different compression settings than in the actual case.

 */
 class ParabolicFinder
 {
 public:
     enum QueryState      { Unknown, Pending, Done };
     enum InstructionType { HereYouGo, WaitingResults, End };
 public:
     ParabolicFinder(unsigned Start, unsigned End)
         : begin(Start),
           results(End-Start+1, 0),
           state  (End-Start+1, Unknown),
           LeftRightSwap(false)
     {
     }

     InstructionType GetNextInstruction(unsigned& attempt)
     {
       InstructionType result = End;

       const int Last  = begin + results.size()-1;

       #define RetIns(n) do{ result = (n); goto DoneCrit; }while(0)
       #define RetVal(n) do{ state[attempt = (n)] = Pending; RetIns(HereYouGo); }while(0)

       {
         /*
         std::fprintf(stderr, "NextInstruction...");
         for(unsigned a=0; a<state.size(); ++a)
             std::fprintf(stderr, " %u=%s", a,
                 state[a]==Unknown?"??"
                :state[a]==Done?"Ok"
                :"..");
         std::fprintf(stderr, "\n");*/

         if(CountUnknown() == 0)
         {
             // No unassigned slots remain. Don't need more workers.
             RetIns(End);
         }

         if(1) // scope for local variables
         {
             // Alternate which side to do next if both are available.
             bool LeftSideFirst = LeftRightSwap ^= 1;

             // Check left side descend type
             int LeftSideNext = -1; bool LeftSideDoable = false;
             for(int c=0; c<=Last; ++c)
                 switch(state[c])
                 {
                     case Unknown: LeftSideNext = c; LeftSideDoable = true; goto ExitLeftSideFor;
                     case Pending: LeftSideNext = c; LeftSideDoable = false; goto ExitLeftSideFor;
                     case Done:
                         if(c == 0) continue;
                         if(results[c] > results[c-1])
                         {
                             // Left side stopped descending.
                             if(state[Last] != Unknown) RetIns(End);
                             goto ExitLeftSideFor;
                         }
                         else if(results[c] == results[c-1])
                             LeftSideFirst = true;
                 }
         ExitLeftSideFor: ;

             // Check right side descend type
             int RightSideNext = -1; bool RightSideDoable = false;
             for(int c=Last; c>=0; --c)
                 switch(state[c])
                 {
                     case Unknown: RightSideNext = c; RightSideDoable = true; goto ExitRightSideFor;
                     case Pending: RightSideNext = c; RightSideDoable = false; goto ExitRightSideFor;
                     case Done:
                         if(c == Last) continue;
                         if(results[c] > results[c+1])
                         {
                             // Right side stopped descending.
                             if(state[0] != Unknown) RetIns(End);
                             goto ExitRightSideFor;
                         }
                         else if(results[c] == results[c+1])
                             LeftSideFirst = false;
                 }
         ExitRightSideFor: ;

             if(!LeftSideFirst)
                  { std::swap(LeftSideDoable, RightSideDoable);
                    std::swap(LeftSideNext,   RightSideNext); }

             if(LeftSideDoable) RetVal(LeftSideNext);
             if(RightSideDoable) RetVal(RightSideNext);

             // If we have excess threads and work to do, give them something
             if(CountHandled() > 2) if(LeftSideNext >= 0) RetVal(LeftSideNext);
             if(CountHandled() > 3) if(RightSideNext >= 0) RetVal(RightSideNext);

             RetIns(WaitingResults);
         }

       DoneCrit: ;
       }
       return result;
     }

     void GotResult(unsigned attempt, unsigned value)
     {
       {
         results[attempt] = value;
         state[attempt]   = Done;
       }
     }

 private:
     unsigned CountUnknown() const
     {
         unsigned result=0;
         for(size_t a=0, b=state.size(); a<b; ++a)
             if(state[a] == Unknown) ++result;
         return result;
     }
     unsigned CountHandled() const
     {
         return state.size() - CountUnknown();
     }
 private:
     unsigned begin;
     std::vector<unsigned>   results;
     std::vector<QueryState> state;
     bool LeftRightSwap;
 };

 static void LZMACompressAutoHelper(
     const unsigned char* data, size_t length,
     bool use_small_dict,
     const char* why,
     unsigned& pb, unsigned& lp, unsigned& lc,
     unsigned& which_iterate, ParabolicFinder& finder,
     bool&first, std::vector<unsigned char>& bestresult)
 {
     for(;;)
     {
         unsigned t=0;
         switch(finder.GetNextInstruction(t))
         {
             case ParabolicFinder::End:
                 return;
             case ParabolicFinder::HereYouGo:
                 break;
             case ParabolicFinder::WaitingResults:
                 ForceSwitchThread();
                 continue;
         }

         const unsigned try_pb = &which_iterate == &pb ? t : pb;
         const unsigned try_lp = &which_iterate == &lp ? t : lp;
         const unsigned try_lc = &which_iterate == &lc ? t : lc;

         if(LZMA_verbose >= 2)
             std::fprintf(stderr, "%s:Trying pb%u lp%u lc%u\n",
                 why,try_pb,try_lp,try_lc);

         std::vector<unsigned char> result = use_small_dict
             ? LZMACompress(data,length,try_pb,try_lp,try_lc, 65536)
             : LZMACompress(data,length,try_pb,try_lp,try_lc);

         if(LZMA_verbose >= 2)
             std::fprintf(stderr, "%s:       pb%u lp%u lc%u -> %u\n",
                 why,try_pb,try_lp,try_lc, (unsigned)result.size());

         finder.GotResult(t, result.size());

       {
         if(first || result.size() <= bestresult.size())
         {
             first    = false;
             bestresult.swap(result);
             which_iterate = t;
         }
       }
     }
 }


 const std::vector<unsigned char> LZMACompressAuto(const unsigned char* data, size_t length,
     const char* why)
 {
     if(LZMA_verbose >= 1)
     {
         std::fprintf(stderr, "Start LZMA(%s, %u bytes)\n", why, (unsigned)length);
         std::fflush(stderr);
     }

     unsigned backup_algorithm = LZMA_AlgorithmNo;

     bool use_small_dict = false;//length >= 1048576;

     if(use_small_dict) LZMA_AlgorithmNo = 0;

     unsigned pb=0, lp=0, lc=0;

     std::vector<unsigned char> bestresult;

   {
     ParabolicFinder pb_finder(0,4);
     ParabolicFinder lp_finder(0,4);
     ParabolicFinder lc_finder(0,8);
     bool first=true;
    {
     /* Using parallelism here. However, we need barriers after
      * each step, because the comparisons are made based on the
      * result size, and if the pb/lp/lc values other than the
      * one being focused change, it won't work. Only one parameter
      * must change in the loop.
      */

     /* step 1: find best value in pb axis */
     LZMACompressAutoHelper(data,length,use_small_dict,why,
         pb, lp, lc,
         pb, pb_finder, first, bestresult);


     lp_finder.GotResult(lp, bestresult.size());

     /* step 2: find best value in lp axis */
     LZMACompressAutoHelper(data,length,use_small_dict,why,
         pb, lp, lc,
         lp, lp_finder, first, bestresult);

     lc_finder.GotResult(lc, bestresult.size());

     /* step 3: find best value in lc axis */
     LZMACompressAutoHelper(data,length,use_small_dict,why,
         pb, lp, lc,
         lc, lc_finder, first, bestresult);
    }
   }

     if(use_small_dict || LZMA_AlgorithmNo != backup_algorithm)
     {
         LZMA_AlgorithmNo = backup_algorithm;
         bestresult = LZMACompress(data,length, pb,lp,lc);
     }

     if(LZMA_verbose >= 1)
     {
         std::fprintf(stderr, "Best LZMA for %s(%u->%u): pb%u lp%u lc%u\n",
             why,
             (unsigned)length,
             (unsigned)bestresult.size(),
             pb,lp,lc);
     }
     std::fflush(stderr);

     return bestresult;
 }

 const std::vector<unsigned char>
     DoLZMACompress(int HeavyLevel,
         const unsigned char* data, size_t length,
         const char* why)
 {
     if(HeavyLevel >= 2) return LZMACompressHeavy(data,length, why);
     if(HeavyLevel >= 1) return LZMACompressAuto(data,length, why);
     return LZMACompress(data,length);
 }

 extern "C" {

 /**
  * Compress a buffer with lzma
  * Don't copy the result back if it is too large.
  * @param in a pointer to the buffer
  * @param in_len the length in bytes
  * @param out a pointer to a buffer of at least size in_len
  * @param out_len a pointer to the compressed length of in
  */

 void do_lzma_compress(char *in, int in_len, char *out, int *out_len) {
 	std::vector<unsigned char> result;
 	result = LZMACompress(std::vector<unsigned char>(in, in + in_len));
 	*out_len = result.size();
 	if (*out_len < in_len)
 		std::memcpy(out, &result[0], *out_len);
 }

 void do_lzma_uncompress(char *dst, int dst_len, char *src, int src_len) {
 	std::vector<unsigned char> result;
 	result = LZMADeCompress(std::vector<unsigned char>(src, src + src_len));
 	if (result.size() <= (SizeT)dst_len)
 		std::memcpy(dst, &result[0], result.size());
 	else
 	{
 		fprintf(stderr, "Not copying %d bytes to %d-byte buffer!\n",
 			(unsigned int)result.size(), dst_len);
 		exit(1);
 	}
 }

 }