c語言壓縮算法?
方法1:最簡單就是將所有字符加起來,代碼如下:
unsigned long HashString(const char *pString, unsigned long tableSize)
{
unsigned long hashValue = 0;
while(*pString)
hashValue += *pString++;
return hashValue % tableSize;
}
分析:如果字符串的長度有限,而散列表比較大的話,浪費(fèi)比較大。例如,如果字符串最長為16字節(jié),那么用到的僅僅是散列表的前16*127=2032。假如散列表含2729項(xiàng),那么2032以后的項(xiàng)都用不到。
方法2:將上次計算出來的hash值左移5位(乘以32),再和當(dāng)前關(guān)鍵字相加,能得到較好的均勻分布的效果。
unsigned long HashString(const char *pString,unsigned long tableSize)
{
unsigned long hashValue = 0;
while (*pString)
hashValue = (hashValue << 5) + *pString++;
return hashValue % tableSize;
}
分析:這種方法需要遍歷整個字符串,如果字符串比較大,效率比較低。
方法3:利用哈夫曼算法,假設(shè)只有0-9這十個字符組成的字符串,我們借助哈夫曼算法,直接來看實(shí)例:
#define Size 10
int freq[Size];
string code[Size];
string word;
struct Node
{
int id;
int freq;
Node *left;
Node *right;
Node(int freq_in):id(-1), freq(freq_in)
{
left = right = NULL;
}
};
struct NodeLess
{
bool operator()(const Node *a, const Node *b) const
{
return a->freq < b->freq;
}
};
void init()
{
for(int i = 0; i < Size; ++i)
freq[i] = 0;
for(int i = 0; i < word.size(); ++i)
++freq[word[i]];
}
void dfs(Node *root, string res)
{
if(root->id >= 0)
code[root->id] = res;
else
{
if(NULL != root->left)
dfs(root->left, res+"0");
if(NULL != root->right)
dfs(root->right, res+"1");
}
}
void deleteNodes(Node *root)
{
if(NULL == root)
return ;
if(NULL == root->left && NULL == root->right)
delete root;
else
{
deleteNodes(root->left);
deleteNodes(root->right);
delete root;
}
}
void BuildTree()
{
priority_queue<Node*, vector<Node*>, NodeLess> nodes;
for(int i = 0; i < Size; ++i)
{
//0 == freq[i] 的情況未處理
Node *newNode = new Node(freq[i]);
newNode->id = i;
nodes.push(newNode);
}
while(nodes.size() > 1)
{
Node *left = nodes.top();
nodes.pop();
Node *right = nodes.top();
nodes.pop();
Node *newNode = new Node(left->freq + right->freq);
newNode->left = left;
newNode->right = right;
nodes.push(newNode);
}
Node *root = nodes.top();
dfs(root, string(""));deleteNodes(root);
}