2012-11-18 16:27:29Morris

[演算法][程式作業] huffman code 壓縮與解壓縮




根據Huffman code 編碼法

寫 一壓縮程式  與 一解壓縮程式 

能將任一檔案給定檔名的檔案壓縮後存成  另一新檔

此新檔案能再解壓縮還原成原檔案

並 附note檔 說明 如何儲存 codeword tree. 處理Header 等等


用中序的做法存 codeword tree,0就是葉節點,1就是內節點。

header 不必處理,只要用二元檔的方式讀進來,就不用考慮 header 得問題。

上次學長們好幾個做出非遞迴的 merge sort 實在讓我大開眼界,原來學長們那麼厲害。


#include <cstdio>
#include <vector>
#include <queue>
#include <iostream>
using namespace std;
struct ele {
    int nd;
    long long v;
};
struct NODE {
    int l, r;
    long long v;
    unsigned char c;
};
struct cmp {
    bool operator() (ele a, ele b) {
        return a.v > b.v;
    }
};
NODE ND[1024];
unsigned char code[1000];
unsigned char mapcode[512][515];
FILE *fin, *fout;
int push_buf(int bit) {
    static int idx = 7, now = 0;
    static unsigned char bufcode = 0;
    bufcode |= bit<<idx;
    idx--;
    if(idx < 0) {
        fwrite(&bufcode, 1, 1, fout);
        idx = 7;
        bufcode = 0;
        now++;
    }
}
void dfs(int nd, int dep) {
    if(ND[nd].l == 0 && ND[nd].r == 0) {
        code[dep] = '\0';
        for(int i = 0; i <= dep; i++)
            mapcode[ND[nd].c][i] = code[i];
        push_buf(0);
        for(int i = 7; i >= 0; i--)
            push_buf((ND[nd].c>>i)&1);
        return;
    }
    push_buf(1);
    code[dep] = '0';
    dfs(ND[nd].l, dep+1);
    code[dep] = '1';
    dfs(ND[nd].r, dep+1);
}
int main(int argc, char* argv[]) {
    if(argc != 2) {
        puts("argc error");
        return 0;
    }
    int file_length;
    unsigned char *buf;

    fin = fopen(argv[1], "rb");
    string filename(argv[1]);
    filename += ".morris";
    fout = fopen(filename.c_str(), "wb");

    if(fin == NULL || fout == NULL) {
        puts("fopen r/w fail");
        return 0;
    }
    fseek(fin, 0, SEEK_END);
    file_length = ftell(fin);
    rewind(fin);

    printf("file size %d\n", file_length);
    buf = new unsigned char[file_length];
    fread(buf, 1, file_length, fin);

    int i, j, ASCI[256] = {}, n;
    for(i = 0; i < file_length; i++)
        ASCI[buf[i]]++;
    ele l, r, e;
    priority_queue<ele, vector<ele>, cmp> pQ;
    for(i = 0, n = 0; i < 256; i++) {
        if(ASCI[i]) {
            e.nd = ++n, e.v = ASCI[i];
            ND[n].l = 0, ND[n].r = 0;
            ND[n].v = e.v;
            ND[n].c = i;
            pQ.push(e);
        }
    }
    int size = n;
    while(pQ.size() > 1) {
        l = pQ.top(), pQ.pop();
        r = pQ.top(), pQ.pop();
        size++;
        e.nd = size, e.v = l.v+r.v;
        ND[size].l = l.nd;
        ND[size].r = r.nd;
        ND[size].v = l.v+r.v;
        pQ.push(e);
    }

    dfs(size, 0);

    for(i = 31; i >= 0; i--)
        push_buf((file_length>>i)&1);
    int percent = 0;
    for(i = 0; i < file_length; i++) {
        for(j = 0; mapcode[buf[i]][j]; j++)
            push_buf(mapcode[buf[i]][j]-'0');
        if(i*100/file_length > percent) {
            percent += 10;
            printf("%d%% ...\n", percent);
        }
    }
    for(i = 0; i < 8; i++)
        push_buf(1);
    fclose(fin);
    fclose(fout);
    delete[] buf;
    return 0;
}




#include <cstdio>
#include <cstdlib>
#include <vector>
#include <queue>
using namespace std;
struct NODE {
    int l, r;
    unsigned char c;
};
NODE ND[1024];
unsigned char mapcode[515][515];
FILE *fin, *fout;
unsigned char *buf;
int pop_buf() {
    static int idx = 7, now = 0, ans;
    ans = (buf[now]>>idx)&1;
    idx--;
    if(idx < 0)
        idx = 7, now++;
    return ans;
}
int size;
void build(int nd) {
    int l, r;
    l = r = pop_buf();
    if(l == 0 && r == 0) {
        ND[nd].l = ND[nd].r = 0;
        ND[nd].c = 0;
        for(int i = 7; i >= 0; i--)
            ND[nd].c |= pop_buf()<<i;
        return;
    }
    ND[nd].l = ++size;
    build(size);
    ND[nd].r = ++size;
    build(size);
}
int main(int argc, char* argv[]) {
    if(argc != 3) {
        puts("argc error");
        return 0;
    }
    int file_length;

    fin = fopen(argv[1], "rb");
    fout = fopen(argv[2], "wb");

    if(fin == NULL || fout == NULL) {
        puts("fopen r/w fail");
        return 0;
    }

    fseek(fin, 0, SEEK_END);
    file_length = ftell(fin);
    rewind(fin);

    buf = new unsigned char[file_length];

    fread(buf, 1, file_length, fin);

    size = 1;
    build(size);
    int i, j, n, idx;
    for(i = 31, n = 0; i >= 0; i--)
        n |= (1&pop_buf())<<i;

    int percent = 100, on = n;
    while(n--) {
        idx = 1;
        while(1) {
            if(!ND[idx].l && !ND[idx].r) {
                fwrite(&ND[idx].c, 1, 1, fout);
                break;
            }
            i = pop_buf();
            if(i)   idx = ND[idx].r;
            else    idx = ND[idx].l;
        }
        if(n*100/on < percent) {
            percent -= 10;
            printf("%d%% ...\n", 100-percent);
        }
    }
    fclose(fin);
    fclose(fout);
    delete[] buf;
    return 0;
}

春藥 2020-02-20 14:03:49

很讚的分享~~

start 2016-04-05 10:16:38

Decode 顯示 argc error 是什麼原因呢?

版主回應
請確定壓縮檔案名稱和輸出的解壓縮檔案名稱 2016-04-05 10:23:31