c语言可以说是一门让人既爱又恨的语言,其效率是众所周知的,但其出现的bug也是大名鼎鼎。对于高手来说,它是多么优雅,可是对门外汉而言,可以掐着指头一年一年数着啥时候能入门。或许很多人不同意这句话,可在愚钝的我身上是适得其所。最近学习c语言,通过实现一些数据结构练手,为一些bug恼火过,也为解决一些bug而拍案而起。没怎么写过博客,写得不好敬请大家谅解。下面分享一下哈夫曼编码过程。最后附上代码。
首先声明代码有bug,当输入的字符串只含一种字符时发生异常。本来想修正这个问题的,但这两天写累了,想歇歇。如果有高手其中不足,小的不胜感激。废话不多说,步入正题吧。
哈夫曼编码思路比较明了,就是每次根据最小的两个节点(每次找到最小权值节点时都要马上删掉该节点)生成新的根节点,根节点的权重是这两个节点权重之和,然后把新生成的根节点放进节点堆里,待下一轮筛选选择最小值,直到节点堆里只剩下一个节点,也就是哈夫曼树的树根啦。我的实现思路是,为输入的每一种字符统计其出现的次数作为权重做成链表,然后根据链表生成哈夫曼树,接着根据构造好的哈夫曼树进行编码。到这里就可以设计数据结构了,数据结构设计如下:
typedef struct tNode { char ch; int weight; struct tNode *left;//左孩子 struct tNode *right;//右孩子 } tnode; typedef struct cNode { char ch; int count; struct cNode *next; tnode *tree;//指向当前节点的左子树的跟节点 } clist; typedef struct endeNode { char ch; char code[20]; struct endeNode *next; } codelist;
接下来一步就是对输入字符进行统计,构建单链表:
/************************************************** Description:将字符添加进链表里(出现相同字符则不用新建节点,仅仅更改该字符出现的次数即可) **************************************************/ void pull_in_list(char in_string[]) { int i; clist **link; clist *current; clist *new_node; char ch; int exist;//判断字符是否已经存在,0表示不存在,1表示存在 char_count = strlen(in_string); for(i = 0; (ch = in_string[i]) != '\0'; i += 1) { char_mount += 1; exist = 0; link = &clistp; while((current = *link) != NULL) { link = ¤t->next; if(current->ch == ch) { current->count += 1; exist = 1; continue; } } //in_string += 1; if(exist == 1) { continue; } new_node = (clist *)malloc(sizeof(clist)); assert(new_node != NULL); new_node->ch = ch; new_node->count = 1; new_node->next = NULL; new_node->tree = NULL; *link = new_node; } current = clistp; char_num = count_rest_list(); printf("各个字符出现的次数为:\n"); while(current != NULL) { printf("%c: %d 次\t", current->ch, current->count); current = current->next; } printf("\n"); }
好了,既然链表已经创建完成,是时候构建哈夫曼树了:
/************************************************** Description:构建哈夫曼树 **************************************************/ void create_huffman_tree() { int min1;//链表最小值 int min2;//链表次小值 clist *current;//指向链表节点的指针 clist *pre;//最小节点的前一节点 clist *min1_l; clist *min2_l; clist **link; clist *temp_pre; clist *new_lnode; tnode *new_tnode; tnode *left; tnode *right; while(count_rest_list() >= 2) { current = clistp; min1 = clistp->count; min1_l = clistp; while(current != NULL) { if(min1 > current->count) { min1 = current->count; temp_pre = pre; min1_l = pre->next; } pre = current; current = current->next; } //删除最小值节点 if(min1_l == clistp) { clistp = min1_l->next; } else { temp_pre->next = min1_l->next; } current = clistp; min2 = clistp->count; min2_l = clistp; while(current != NULL) { if(min2 > current->count) { min2 = current->count; temp_pre = pre; min2_l = pre->next; } pre = current; current = current->next; } //删除次小值节点 if(min2_l == clistp) { clistp = min2_l->next; } else { temp_pre->next = min2_l->next; } //左子树 if(min1_l->tree == NULL) { left = (tnode *)malloc(sizeof(tnode)); assert(left != NULL); left->ch = min1_l->ch; left->left = NULL; left->right = NULL; left->weight = min1_l->count; } else { left = min1_l->tree; } //右子树 if(min2_l->tree == NULL) { right = (tnode *)malloc(sizeof(tnode)); assert(right != NULL); right->ch = min2_l->ch; right->left = NULL; right->right = NULL; right->weight = min2_l->count; } else { right = min2_l->tree; } //生成新的树根节点 new_tnode = (tnode *)malloc(sizeof(tnode)); assert(new_tnode != NULL); new_tnode->ch = ' '; new_tnode->left = left; new_tnode->right = right; new_tnode->weight = left->weight + right->weight; //生成新的链表节点 new_lnode = (clist *)malloc(sizeof(clist)); assert(new_lnode != NULL); new_lnode->ch = new_tnode->ch; new_lnode->count = new_tnode->weight; new_lnode->next = NULL; new_lnode->tree = new_tnode; //将新生成的链表节点添加到链表中 link = &clistp; while((current = *link) != NULL) { link = ¤t->next; } *link = new_lnode; free(min1_l); free(min2_l); } huff_tree = new_tnode; }
万事俱备,只欠东风啦,最后一部当然是根据哈夫曼树进行哈夫曼编码了:
/************************************************** Description:实现哈夫曼编码(递归遍历树) **************************************************/ void encoding(tnode *bt) { codelist *new_node; codelist *current; codelist **link; if(bt->left == NULL && bt->right == NULL) { int i; new_node = (codelist *)malloc(sizeof(codelist)); assert(new_node != NULL); new_node->ch = bt->ch; for(i = 0; i < strlen(stack); i += 1) { new_node->code[i] = stack[i]; } new_node->code[i] = '\0'; new_node ->next = NULL; //将新生成的编码节点添加进链表中 link = &head; while((current = *link) != NULL) { link = ¤t->next; } *link = new_node; //init_stack(); } else { if(bt->left != NULL) { stack[++top] = '0'; encoding(bt->left); stack[top] = '\0'; top -= 1; } if(bt->right != NULL) { stack[++top] = '1'; encoding(bt->right); stack[top] = '\0'; top -= 1; } } }
刚接触时间不长,很多问题都欠缺考虑,欢迎各位大神指正,谢谢
本文为原创文章,转载请注明出处!
admin:支持一下,感谢分享!,+10,