什么是长度代码超过错误以及为什么我会收到此错误?

问题描述 投票:0回答:1

我正在开发一个改进的程序,尝试恢复PKZIP密钥,以支持CUDA,因此我集成了infgen的解压缩算法来解压缩字节数组而不是文件流,并查找特定的字节序列,在这种情况下

哑剧

,如果找到这些,则说明密钥是正确的。

该程序在以下假设下运行:

  • 给出的字节流不是最终块的
  • 区块是动态的
  • 区块头在128字节范围内

因此,对于这个测试,我只是读取前 128 个字节的文件,没有 zip 标头,然后尝试解压缩它并查看它是否有效,但不幸的是,动态函数由于某种原因返回

-4
,但如果我运行
 ./infgen -dd file
使用相同的文件我到达目标字节

这是我的代码:

#define IG_INCOMPLETE 1
#define IG_OK 0
#define IG_BLOCK_TYPE_ERR -1
#define IG_STORED_LENGTH_ERR -2
#define IG_TOO_MANY_CODES_ERR -3
#define IG_CODE_LENGTHS_CODE_OVER_ERR -4
#define IG_CODE_LENGTHS_CODE_UNDER_ERR -5
#define IG_REPEAT_NO_FIRST_ERR -6
#define IG_REPEAT_TOO_MANY_ERR -7
#define IG_LITLEN_CODE_OVER_ERR -8
#define IG_LITLEN_CODE_UNDER_ERR -9
#define IG_DIST_CODE_OVER_ERR -10
#define IG_DIST_CODE_UNDER_ERR -11
#define IG_NO_END_CODE_ERR -12
#define IG_BAD_CODE_ERR -13

struct state {
    int info;
    int data;
    int tree;
    int draw;
    unsigned max;
    unsigned win;
    // Input data buffer.
    const uint8_t *in;     // Input data array
    size_t in_size;             // Size of the input data array
    size_t in_pos;              // Current read position in input data array
    // Bit and chunk processing.
    int bitcnt;
    int bitbuf;
    int seqs;
    short seq[MAXSEQS];
    char len[MAXSEQS];
    // Block and total statistics.
    unsigned reach;
    unsigned headlen;
    uintmax_t blockin;
    uintmax_t blockout;
    uintmax_t symbols;
    uintmax_t matches;
    uintmax_t matchlen;
    uintmax_t litbits;
    uintmax_t matbits;
    uintmax_t blocks;
    uintmax_t inbits;
    uintmax_t outbytes;
    uintmax_t symbnum;
    uintmax_t matchnum;
    uintmax_t matchtot;
    uintmax_t littot;
};

int get(struct state *s) {
    if (s->in_pos >= s->in_size) return -1;  // End of input array
    return s->in[s->in_pos++];           // Return next byte and increment position
}

int bits(struct state *s, int need) {
    long val = s->bitbuf;
    while (s->bitcnt < need) {
        int next = get(s);
        if (next == -1)
            return -1;                 // out of input
        val |= (long)(next) << s->bitcnt;       // load eight bits
        s->bitcnt += 8;
    }
    s->bitbuf = (int)(val >> need);
    s->bitcnt -= need;
    s->blockin += need;
    val &= (1L << need) - 1;
    if (s->draw > 1 && s->seqs < MAXSEQS) {
        s->seq[s->seqs] = val;
        s->len[s->seqs] = need;
        s->seqs++;
    }
    return (int)val;
}

// Show and accumulate statistics at end of block.
void end(struct state *s) {
    s->blocks++;
    s->inbits += s->blockin;
    s->outbytes += s->blockout;
    s->symbnum += s->symbols;
}

struct huffman {
    short *count;       // number of symbols of each length
    short *symbol;      // canonically ordered symbols
};

int decode(struct state *s, struct huffman *h) {
    int bitbuf = s->bitbuf;     // bits to decode from the input
    int left = s->bitcnt;       // number of bits in bitbuf
    int len = 1;                // length of code in consideration
    int code = 0;               // len bits pulled from bitbuf
    int first = 0;              // first code of length len
    int index = 0;              // index of that code in the symbol table
    short *next = h->count + 1; // pointer to number of codes of next length
    for (;;) {
        while (left--) {
            code |= bitbuf & 1;
            bitbuf >>= 1;
            int count = *next++;
            if (code < first + count) {
                if (s->draw > 1 && s->seqs < MAXSEQS) {
                    int rev = 0;
                    for (int i = 0; i < len; i++)
                        rev = (rev << 1) | ((code >> i) & 1);
                    s->seq[s->seqs] = rev;
                    s->len[s->seqs] = len;
                    s->seqs++;
                }
                s->bitbuf = bitbuf;
                s->bitcnt = (s->bitcnt - len) & 7;
                s->blockin += len;
                return h->symbol[index + (code - first)];
            }
            index += count;
            first += count;
            first <<= 1;
            code <<= 1;
            len++;
        }
        left = (MAXBITS+1) - len;
        if (left == 0)
            break;
        bitbuf = get(s);
        if (bitbuf == -1)
            return -1;         // out of input
        if (left > 8)
            left = 8;
    }
    return IG_BAD_CODE_ERR;             // ran out of codes
}

int construct(struct huffman *h, short *length, int n) {
    // Count the number of codes of each length.
    for (int len = 0; len <= MAXBITS; len++)
        h->count[len] = 0;
    for (int symbol = 0; symbol < n; symbol++)
        (h->count[length[symbol]])++;   // assumes lengths are within bounds
    if (h->count[0] == n)               // no codes!
        return 0;                       // complete, but decode() will fail

    // Check for an over-subscribed or incomplete set of lengths.
    int left = 1;                       // one possible code of zero length
    for (int len = 1; len <= MAXBITS; len++) {
        left <<= 1;                     // one more bit, double codes left
        left -= h->count[len];          // deduct count from possible codes
        if (left < 0)
            return left;                // over-subscribed--return negative
    }                                   // left > 0 means incomplete
    short offs[MAXBITS+1];      // offsets in symbol table for each length
    offs[1] = 0;
    for (int len = 1; len < MAXBITS; len++)
        offs[len + 1] = offs[len] + h->count[len];
    for (int symbol = 0; symbol < n; symbol++)
        if (length[symbol] != 0)
            h->symbol[offs[length[symbol]]++] = symbol;
    return left;
}
// Decode literal/length and distance codes until an end-of-block code.
int codes(struct state *s,struct huffman *lencode,struct huffman *distcode, int *found,int *trak) {
    static const short lens[29] = { // size base for length codes 257..285
        3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 15, 17, 19, 23, 27, 31,
        35, 43, 51, 59, 67, 83, 99, 115, 131, 163, 195, 227, 258};
    static const short lext[29] = { // extra bits for length codes 257..285
        0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2,
        3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5, 0};
    static const short dists[30] = { // offset base for distance codes 0..29
        1, 2, 3, 4, 5, 7, 9, 13, 17, 25, 33, 49, 65, 97, 129, 193,
        257, 385, 513, 769, 1025, 1537, 2049, 3073, 4097, 6145,
        8193, 12289, 16385, 24577};
    static const short dext[30] = { // extra bits for distance codes 0..29
        0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6,
        7, 7, 8, 8, 9, 9, 10, 10, 11, 11,
        12, 12, 13, 13};
    int symbol;
    do {
        uintmax_t beg = s->blockin;
        symbol = decode(s, lencode);
        s->symbols++;
        if (symbol < 0)
            return symbol;              // invalid symbol
        if (symbol < 256) {             // literal: symbol is the byte
            // Write out the literal.
            if (symbol == 0x4D && trak ==0){
                *trak++;
            }
            if(symbol == 0x49 && trak == 1){
                *trak++;
            } 
            if (symbol == 0x4D && trak ==2){
                *trak++;
            }
            if(symbol == 0x45 && trak == 3){
                *found = 1;
                return IG_OK;
            } 
            s->blockout += 1;
            if (s->max < s->win)
                s->max++;
            s->litbits += s->blockin - beg;
        }
        else if (symbol > 256) {        // length
            // Get and compute length.
            if (symbol >= MAXLCODES)
                return IG_BAD_CODE_ERR;         // invalid fixed code
            symbol -= 257;
            int len = lens[symbol] + bits(s, lext[symbol]);

            // Get distance.
            symbol = decode(s, distcode);
            if (symbol < 0)
                return symbol;                  // invalid symbol
            unsigned dist = dists[symbol] + bits(s, dext[symbol]);
            if (dist > s->max) {
                s->max = MAXDIST;       // issue warning only once
            }
            // Update state for match.
            if (dist > s->blockout) {
                dist -= s->blockout;
                if (dist > s->reach)
                    s->reach = dist;
            }
            s->blockout += len;
            s->matches++;
            s->matchlen += len;
            if (s->max < s->win) {
                if (len > (int)(s->win - s->max))
                    s->max = s->win;
                else
                    s->max += len;
            }
            s->matbits += s->blockin - beg;
        }
    } while (symbol != 256);            // end of block symbol
    s->symbols--;
    // Done with a valid fixed or dynamic block.
    return IG_OK;
}

// Process a dynamic codes block.
int dynamic(struct state *s, int *found, int* trak) {
    // Get number of lengths in each table, check lengths.
    int nlen = bits(s, 5) + 257;
    int ndist = bits(s, 5) + 1;
    int ncode = bits(s, 4) + 4;
    if (nlen > MAXLCODES || ndist > MAXDCODES)
        return IG_TOO_MANY_CODES_ERR;       // bad counts

    // Read code length code lengths (really), missing lengths are zero.
    short lengths[MAXCODES];            // descriptor code lengths
    static const short order[19] =      // permutation of code length codes
        {16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15};
    int index;
    for (index = 0; index < ncode; index++) {
        int len = bits(s, 3);
        lengths[order[index]] = len;
    }
    for (; index < 19; index++)
        lengths[order[index]] = 0;
    // Build huffman table for code lengths codes (use lencode temporarily).
    short lencnt[MAXBITS+1], lensym[MAXLCODES];         // lencode memory
    struct huffman lencode = {lencnt, lensym};          // length code
    int err = construct(&lencode, lengths, 19);
    if (err < 0)
        return IG_CODE_LENGTHS_CODE_OVER_ERR;   // oversubscribed
    else if (err > 0)
        return IG_CODE_LENGTHS_CODE_UNDER_ERR;  // incomplete

    // Read length/literal and distance code length tables.
    index = 0;
    while (index < nlen + ndist) {
        int symbol = decode(s, &lencode);
        if (symbol < 0)
            return symbol;              // invalid symbol
        if (symbol < 16) {              // length in 0..15
            lengths[index++] = symbol;
        }
        else {                          // repeat instruction
            int len = -1;               // assume repeating zeros
            if (symbol == 16) {         // repeat last length 3..6 times
                if (index == 0)
                    return IG_REPEAT_NO_FIRST_ERR;  // no last length!
                len = lengths[index - 1];           // last length
                symbol = 3 + bits(s, 2);
            }
            else if (symbol == 17)      // repeat zero 3..10 times
                symbol = 3 + bits(s, 3);
            else                        // == 18, repeat zero 11..138 times
                symbol = 11 + bits(s, 7);
            if (index + symbol > nlen + ndist)
                return IG_REPEAT_TOO_MANY_ERR;  // too many lengths!
            if (len == -1)
                len = 0;
            while (symbol--)            // repeat last or zero symbol times
                lengths[index++] = len;
        }
    }
    if (lengths[256] == 0)
        return IG_NO_END_CODE_ERR;
    err = construct(&lencode, lengths, nlen);
    if (err < 0)
        return IG_LITLEN_CODE_OVER_ERR;
    else if (err > 0 && nlen - lencode.count[0] != 1)
        return IG_LITLEN_CODE_UNDER_ERR;    // incomplete with one code ok.
    short distcnt[MAXBITS+1], distsym[MAXDCODES];       // distcode memory
    struct huffman distcode = {distcnt, distsym};       // distance code
    err = construct(&distcode, lengths + nlen, ndist);
    if (err < 0)
        return IG_DIST_CODE_OVER_ERR;
    else if (err > 0 && ndist - distcode.count[0] != 1)
        return IG_DIST_CODE_UNDER_ERR;      // incomplete with one code ok
    s->headlen = s->blockin;
    return codes(s, &lencode, &distcode, found, trak);
}

int infgen(struct state *s, int *found, int * trak) {
    // Initialize input state.
    s->bitcnt = 0;
    s->bitbuf = 0;
    s->seqs = 0;
    s->max = 0;
    // Initialize statistics.
    s->blocks = 0;
    s->inbits = 0;
    s->outbytes = 0;
    s->symbnum = 0;
    s->matchnum = 0;
    s->matchtot = 0;
    s->littot = 0;

    // Return if bits() or decode() tries to read past available input.
    int err = 0;
    if (*found != 0) {  // if came back here via longjmp()
        err = IG_INCOMPLETE;     // then skip do-loop, return error
    } else {
        // Process blocks until last block or error.
        int last;
        do {
            s->reach = 0;
            s->blockin = 0;
            s->blockout = 0;
            s->symbols = 0;
            s->matches = 0;
            s->matchlen = 0;
            s->litbits = 0;
            s->matbits = 0;

            last = bits(s, 1);  // one if last block
            int type = bits(s, 2);  // block type 0..3
            printf("type is %d\n",type);
            switch (type) {
            case 0:
                err = IG_BLOCK_TYPE_ERR;
                break;
            case 1:
                err = IG_BLOCK_TYPE_ERR;
                break;
            case 2:
                err = dynamic(s,found);
                break;
            default:  // case 3
                err = IG_BLOCK_TYPE_ERR;
            }

            if (err != IG_OK) {
                break;  // return with error
            }
        } while (!last);
    }
    // Return error state.
    return err;
}


void clean(uint8_t *input, size_t length, int *found){
struct state s;
    s.info = 0;
    s.data = 1;
    s.tree = 1;
    s.draw = 0;
    s.win = MAXDIST;
    s.in_pos = 0;

    // Set input to the provided byte array
    s.in = input;
    s.in_size = length; // Assuming you have a way to track the length of the input array

    // Process the deflate data
    int ret =0;
    do {
        // Read bytes and process them as deflate data
        int ch = get(&s);
        //printf("%d\n",*found);
        if (ch == -1) {
            ret = -1;
            break; // End of input
        }
        // Handle deflate data processing
        ret = infgen(&s,found);
        printf("%d\n",ret);
        if (ret < 0) {
            break;
        }
    } while (ret > 0);

    // Finalize processing
    if (ret == 0) {
    }
    
}


int main(int argc, char **argv){
    int     cryptlength, plainlength;
    time_t      now;
    int     i,x,y,z, offset=12;
    char        *cryptname=NULL, *plainname=NULL;
    size_t start,port;
    uint16_t p;
    uint8_t ch;
    for( i = 1; i < argc; i++ )
    {
        if( !strcmp( "-C", argv[i] ) )
        { /* Check byte */
            ch = atoi( argv[++i] );
        }
        if( !strcmp( "-o", argv[i] ) )
        { /* offset */
            offset += atoi( argv[++i] );
        }
        else if( !strcmp( "-c", argv[i] ) )
        { /* ciphertext filename */
            cryptname = argv[++i];
        }
        else if( !strcmp( "-p", argv[i] ) )
        { /* plaintext filename */
            plainname = argv[++i];
        }
    }
    if( !cryptname || !plainname )
    {
        printf("Please pass ciphertext with -c first then plaintext with -p\n");
        exit(1);
    }

    if(cryptname )
    {
        cryptlength = get_size(cryptname);
        ciphertext = malloc( cryptlength );
    }
    if(plainname )
    {
        plainlength = get_size(plainname);
        plaintext = malloc( plainlength + offset );
    }
    
    if( plainlength > cryptlength-offset )
    {
        x =readdata(plainname,plaintext,plainlength);
        int frt =0;
        printf("frt is %d\n",frt);
        clean(plaintext,plainlength,&frt);
        if(frt == 1){
            printf("its working correctly\n");
            exit(0);
        }
        fprintf( stderr, "Warning! Plaintext is longer than Ciphertext!\n" );
    }
    if( plainlength < 1 )
    {
        fprintf( stderr, "Plaintext must be at least 3 bytes! Aborting.\n" );
        exit(1);
    }

    //cryptlength = plainlength + offset;
    if( !ciphertext || !plaintext )
    {
        fprintf( stderr, "Not enough memory!\n" );
        exit(1);
    }
    x=0;
    x = readdata(cryptname,ciphertext,cryptlength);
    if(x != cryptlength){
        printf("Couldn't read ciphertext.. exiting\n");
        exit(1);
    }
    key2i = malloc( sizeof(uint32_t)*KEY2SPACE );
    x =0;
    x =readdata(plainname,&plaintext[offset],plainlength);
    if(x != plainlength){
        printf("Couldn't read plaintext.. exiting\n");
        exit(1);
    }
    fprintf( stderr, "Finished on %s", ctime(&now) );
    exit(0);    
}

现在,我尝试了我所知道的一切,但减压很难理解,所以我不明白为什么会发生这种情况。

还有直播:

\BD\C7\A4H\A4\BB/\B3z\87\DE33\E8\FEZk\CDH \D1Z>\FD\A5\EE\A2e]D\9C\E3\FE9   \91\BA\AC\F3\FF7(\96\B5\87\FF\FD\FF?\E8\EF.݊\FF\FD\E7\FD\F6\FF\F3\8C\FD\A7\A4\C3C\FF\C1\D0\FFp\F8\F8\E7\BF\FF\E1\D0\FBׄe\EC\FF\F7\DF\DF?nDT̸/~x\92\9DL\DF\DC̾\80\A3\E7{N
c gzip deflate pkzip
1个回答
0
投票

这一行:

`return left;`

在构造函数中执行。意味着给定的

HDIST
HLEN
对于给定的流来说是不正确的,因此会出现错误
LENGTH CODE OVER
。 在 deflate 压缩流中,前 2 个字节包含 17 位块头中的 16 位,因此在上面给出的代码中,问题是

int ch = get(&s);

从流中提取字节并重置光标或将这些位放入缓冲区然后调用

ret = infgen(&s,found);

不会从第二个字节开始处理该流。

删除这个:

int ch = get(&s);
//printf("%d\n",*found);
if (ch == -1) {
   ret = -1;
   break; // End of input
   }

解决了问题并让我有机会改进代码以仅处理一个块。

© www.soinside.com 2019 - 2024. All rights reserved.