汉字字频统计[行稳书苑].doc

上传人:scccc 文档编号:11845093 上传时间:2021-09-24 格式:DOC 页数:8 大小:52.50KB
返回 下载 相关 举报
汉字字频统计[行稳书苑].doc_第1页
第1页 / 共8页
汉字字频统计[行稳书苑].doc_第2页
第2页 / 共8页
汉字字频统计[行稳书苑].doc_第3页
第3页 / 共8页
亲,该文档总共8页,到这儿已超出免费预览范围,如果喜欢就下载吧!
资源描述

《汉字字频统计[行稳书苑].doc》由会员分享,可在线阅读,更多相关《汉字字频统计[行稳书苑].doc(8页珍藏版)》请在三一文库上搜索。

1、import java.awt.List;import java.io.*;import jxl.*;import jxl.write.*;import java.text.DecimalFormat;import java.util.ArrayList;public class statistics public static void main(String args) / 读字表ArrayList chtable = readFromTable(CHTable.txt);System.out.println(字表大小为: + chtable.size();/ 读文件ArrayList n

2、umlist = readFromFile(10.txt, chtable);/ 排序ArrayList chlist = sort(chtable, numlist);/ 计算汉字的总数int sum = 0;for (int i = 0; i numlist.size(); i+) sum = sum + (Integer) numlist.get(i);System.out.println(-显示结果-);/ 返回指定个数的汉字频率统计结果ArrayList freqlist = frequency(chlist, numlist, sum, 100);/ 计算熵值float sh =

3、entropy(freqlist);/ 计算指定个汉字的字频总和float fre1 = freqSum(freqlist, 1);float fre2 = freqSum(freqlist, 20);float fre3 = freqSum(freqlist, 100);float fre4 = freqSum(freqlist, 600);float fre5 = freqSum(freqlist, 2000);float fre6 = freqSum(freqlist, 3000);float fre7 = freqSum(freqlist, 6000);ArrayList freal

4、= new ArrayList();freal.add(fre1);freal.add(fre2);freal.add(fre3);freal.add(fre4);freal.add(fre5);freal.add(fre6);freal.add(fre7);ArrayList nal = new ArrayList();nal.add(1);nal.add(20);nal.add(100);nal.add(600);nal.add(2000);nal.add(3000);nal.add(6000);System.out.println(-程序结束-);/ 生成Excel的类 try / 打开

5、文件 WritableWorkbook book = Workbook.createWorkbook(new File(统计结果.xls);/ 生成工作表,参数0表示这是第一页 WritableSheet sheet = book.createSheet(sum+字, 0);/* * 生成一个保存数字的单元格 必须使用Number的完整包路径,否则有语法歧义 */表头Label label1 = new Label(0, 0, 字符);sheet.addCell(label1);Label label2 = new Label(1, 0, 频率);sheet.addCell(label2);f

6、or(int i=0;i100;i+)/ 中文字符 Label label = new Label(0, i+1, chlist.get(i).toString();sheet.addCell(label);/ 出现的频率 jxl.write.Number number = new jxl.write.Number(1, i+1, (Float)freqlist.get(i);sheet.addCell(number);/写入熵值Label lsh = new Label(0, 101, 熵值);sheet.addCell(lsh);jxl.write.Number nsh = new jxl

7、.write.Number(1, 101, sh);sheet.addCell(nsh);/写入字频总和for(int i=0;i= u4e00 & tempint = uf900 & tempint = ufa2d) char tempchar = (char) tempint;/ System.out.println(tempchar);/ System.out.println(list.size: + chlist.size();/ 判断该字符是否出现过int i = 0;for (i = 0; i chlist.size(); i+) / 一旦重复,跳出循环char c = ;Obje

8、ct ob = chlist.get(i);if (ob instanceof Character) c = (Character) ob;/ System.out.println(c: + c);if (tempchar = c) / System.out.println(重复!);break;/ 字符从未出现过if (i = chlist.size() / System.out.println(新字符!);chlist.add(tempchar);reader.close(); catch (Exception e) e.printStackTrace();return chlist;/*

9、 * 该函数用于从文件中读取中文字符,并返回它出现的次数 * * param filename * return */public static ArrayList readFromFile(String filename, ArrayList chtable) File file = new File(filename);Reader reader = null;ArrayList numlist = new ArrayList();/ 初始化字符出现的次数集合for (int i = 0; i = u4e00 & tempint = uf900 & tempint = ufa2d) cha

10、r tempchar = (char) tempint;/ System.out.println(tempchar);/ System.out.println(list.size: + chlist.size();/ 判断该字符是否在字表里int i = 0;for (i = 0; i chtable.size(); i+) / 在字表里,统计重复次数并跳出循环char c = ;Object ob = chtable.get(i);if (ob instanceof Character) c = (Character) ob;/ System.out.println(c: + c);if (

11、tempchar = c) int num = (Integer) numlist.get(i) + 1;numlist.set(i, num);break;reader.close(); catch (Exception e) e.printStackTrace();return numlist;/* * 该函数用来对汉字出现的次数进行从大到小的排序,返回排序结果 * * param chlist * param numlist */public static ArrayList sort(ArrayList chtable, ArrayList numlist) ArrayList chl

12、ist = chtable;for (int i = 0; i numlist.size(); i+) for (int j = i + 1; j numlist.size(); j+) int listi = (Integer) numlist.get(i);int listj = (Integer) numlist.get(j);if (listi listj) numlist.set(i, listj);numlist.set(j, listi);char chi = (Character) chlist.get(i);char chj = (Character) chlist.get(

13、j);chlist.set(i, chj);chlist.set(j, chi);return chlist;/* * 该函数用来计算各个汉字出现的频率,并且显示出指定个数的结果 * * param chlist * param numlist * param sum * param count */public static ArrayList frequency(ArrayList chlist, ArrayList numlist, int sum,int count) ArrayList freqlist = new ArrayList();/ 计算频率for (int j = 0;

14、j chlist.size(); j+) float freq = (Integer) numlist.get(j) / (float) sum;freqlist.add(freq);/ 按指定格式输出(保留6位有效数字)for (int j = 0; j freqlist.size() & j count; j+) System.out.println(字符: + chlist.get(j);System.out.println(出现次数: + numlist.get(j);System.out.println(频率: + freqlist.get(j);System.out.println

15、(-);System.out.println(中文字符总数: + sum);return freqlist;/* * 该函数用来计算熵值 * * param freqlist */public static float entropy(ArrayList freqlist) float sum = 0f;for (int i = 0; i freqlist.size()return 0f;for (int i = 0; i count; i+) freqsum += (Float) freqlist.get(i);System.out.println(前 + count + 个汉字字频总和为: + freqsum);return freqsum;8基础教学c

展开阅读全文
相关资源
猜你喜欢
相关搜索

当前位置:首页 > 社会民生


经营许可证编号:宁ICP备18001539号-1