资源简介

基于深度学习、语音识别的结巴分词Jieba-Analysis项目,可在Eclipse 和 Android Studio下运行,识别语音的同时,可将关键词分割并输出

资源截图

代码片段和文件信息

package com.huaban.analysis.jieba;

import java.util.regex.Pattern;


public class CharacterUtil {
    public static Pattern reSkip = Pattern.compile(“(\\d+\\.\\d+|[a-zA-Z0-9]+)“);
    private static final char[] connectors = new char[] { ‘+‘ ‘#‘ ‘&‘ ‘.‘ ‘_‘ ‘-‘ };


    public static boolean isChineseLetter(char ch) {
        if (ch >= 0x4E00 && ch <= 0x9FA5)
            return true;
        return false;
    }


    public static boolean isEnglishLetter(char ch) {
        if ((ch >= 0x0041 && ch <= 0x005A) || (ch >= 0x0061 && ch <= 0x007A))
            return true;
        return false;
    }


    public static boolean isDigit(char ch) {
        if (ch >= 0x0030 && ch <= 0x0039)
            return true;
        return false;
    }


    public static boolean isConnector(char ch) {
        for (char connector : connectors)
            if (ch == connector)
                return true;
        return false;
    }


    public static boolean ccFind(char ch) {
        if (isChineseLetter(ch))
            return true;
        if (isEnglishLetter(ch))
            return true;
        if (isDigit(ch))
            return true;
        if (isConnector(ch))
            return true;
        return false;
    }


    /**
     * 全角 to 半角大写 to 小写
     * 
     * @param input
     *            输入字符
     * @return 转换后的字符
     */
    public static char regularize(char input) {
        if (input == 12288) {
            return 32;
        }
        else if (input > 65280 && input < 65375) {
            return (char) (input - 65248);
        }
        else if (input >= ‘A‘ && input <= ‘Z‘) {
            return (input += 32);
        }
        return input;
    }

}

 属性            大小     日期    时间   名称
----------- ---------  ---------- -----  ----
     目录           0  2017-09-18 10:59  Jieba-Analysis\
     文件          77  2016-08-23 01:23  Jieba-Analysis\.gitignore
     目录           0  2017-09-18 10:59  Jieba-Analysis\bin\
     文件          98  2016-08-23 01:23  Jieba-Analysis\bin\build.sh
     目录           0  2017-09-18 10:59  Jieba-Analysis\conf\
     文件     1006092  2016-08-23 01:23  Jieba-Analysis\conf\sougou.dict
     文件          85  2016-08-23 01:23  Jieba-Analysis\conf\user.dict
     文件       10273  2016-08-23 01:23  Jieba-Analysis\LICENSE
     文件        7194  2016-08-23 01:23  Jieba-Analysis\pom.xml
     文件        4886  2016-08-23 01:23  Jieba-Analysis\README.md
     文件        4621  2016-08-23 01:23  Jieba-Analysis\README.org
     目录           0  2017-09-18 10:59  Jieba-Analysis\src\
     目录           0  2017-09-18 10:59  Jieba-Analysis\src\main\
     目录           0  2017-09-18 10:59  Jieba-Analysis\src\main\java\
     目录           0  2017-09-18 10:59  Jieba-Analysis\src\main\java\com\
     目录           0  2017-09-18 10:59  Jieba-Analysis\src\main\java\com\huaban\
     目录           0  2017-09-18 10:59  Jieba-Analysis\src\main\java\com\huaban\analysis\
     目录           0  2017-09-18 10:59  Jieba-Analysis\src\main\java\com\huaban\analysis\jieba\
     文件        1717  2016-08-23 01:23  Jieba-Analysis\src\main\java\com\huaban\analysis\jieba\CharacterUtil.java
     文件        9967  2016-08-23 01:23  Jieba-Analysis\src\main\java\com\huaban\analysis\jieba\DictSegment.java
     文件        2735  2016-08-23 01:23  Jieba-Analysis\src\main\java\com\huaban\analysis\jieba\Hit.java
     文件        8023  2016-08-23 01:23  Jieba-Analysis\src\main\java\com\huaban\analysis\jieba\Jiebasegmenter.java
     文件         223  2016-08-23 01:23  Jieba-Analysis\src\main\java\com\huaban\analysis\jieba\Node.java
     文件         301  2016-08-23 01:23  Jieba-Analysis\src\main\java\com\huaban\analysis\jieba\Pair.java
     文件         446  2016-08-23 01:23  Jieba-Analysis\src\main\java\com\huaban\analysis\jieba\SegToken.java
     目录           0  2017-09-18 10:59  Jieba-Analysis\src\main\java\com\huaban\analysis\jieba\viterbi\
     文件        8232  2016-08-23 01:23  Jieba-Analysis\src\main\java\com\huaban\analysis\jieba\viterbi\FinalSeg.java
     文件        6163  2016-08-23 01:23  Jieba-Analysis\src\main\java\com\huaban\analysis\jieba\WordDictionary.java
     目录           0  2017-09-18 10:59  Jieba-Analysis\src\main\resources\
     文件     8860738  2016-08-23 01:23  Jieba-Analysis\src\main\resources\dict.big.txt
     文件     5071839  2016-08-23 01:23  Jieba-Analysis\src\main\resources\dict.txt
............此处省略11个文件信息

评论

共有 条评论