• 大小:
    文件类型: .rar
    金币: 2
    下载: 1 次
    发布日期: 2021-12-14
  • 语言: 其他
  • 标签: 入门  

资源简介

知识图谱入门pdf

资源截图

代码片段和文件信息

import java.io.*;
import java.util.HashMap;
import java.util.HashSet;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

/**
 * Created by Dell on 2017/11/3.
 */
public class Assignment4Class3 {

    //任务2中过滤实体用的可选属性
    enum CleanAttr {
        ABSTRACT(“摘要“0) CATEGORY(“类别“1) NAME(“名字“2) SECTION(“宗派“3);

        private String name ;
        private int index ;

        private CleanAttr( String name  int index ){
            this.name = name ;
            this.index = index ;
        }

        public String getName() {
            return name;
        }
        public int getIndex() {
            return index;
        }
    }

    /**********************************************************************
     * 给定字符串与正则表达式,打印所有匹配的子串
     * String str : 带匹配的字符串
     * String regex : 模板(正则表达式)
     **********************************************************************/
    public void extract(String str String regex) {
        Pattern pattern = Pattern.compile(regex);
        Matcher matcher = pattern.matcher(str);
        while (matcher.find()) {
            System.out.println(matcher.group());
        }
    }

    /**********************************************************************
     * 给定知识库的abstract文件与正则表达式,使用正则从abstract中抽取属性值,每抽出一条属性值打印一行abstract一行属性的主语、宾语对
     * String input : 实体abstract属性文件的完整路径
     * String regex : 模板
     **********************************************************************/
    public void extractFromFile(String input String regex) throws IOException {
        Pattern pattern = Pattern.compile(regex);
        BufferedReader br = new BufferedReader(new InputStreamReader(new FileInputStream(input) “utf-8“));
        String line = ““;
        while ((line=br.readLine())!=null) {
            String subject = line.split(“/resource/“)[1].split(“> <“)[0];
            String sentence = line.split(“> \““)[1].split(“\““)[0];
            Matcher matcher = pattern.matcher(sentence);
            while (matcher.find()) {
                System.out.println(“sentence: “+sentence);
                System.out.println(“relation: “+subject+“\t:\t“+matcher.group());
            }
        }
        br.close();
    }

    /**********************************************************************
     * 给定人工标注文件路径和阈值,被标注为正确实体的次数不小于阈值则被视为正确实体返回,否则被视为错误实体打印出来
     * String path : 人工标注结果文件的路径
     * int threshold : 阈值
     **********************************************************************/
    public HashSet clean(String path int threshold) throws IOException {
        HashSet result = new HashSet<>();
        HashMap countMap = new HashMap<>();
        File[] fileList = new File(path).listFiles();
        for (File file:fileList){
            BufferedReader br = new BufferedReader(new InputStreamReader(new FileInputStream(path+file.getName())“utf-8“));
            String line = ““;
            while ((li

 属性            大小     日期    时间   名称
----------- ---------  ---------- -----  ----

     文件        273  2017-11-03 18:41  资料\Assignment\Assignment\.idea\misc.xml

     文件        260  2017-11-03 18:40  资料\Assignment\Assignment\.idea\modules.xml

     文件      31996  2017-11-03 22:51  资料\Assignment\Assignment\.idea\workspace.xml

     文件        502  2017-11-03 18:42  资料\Assignment\Assignment\Assignment.iml

     文件     632610  2017-11-03 20:23  资料\Assignment\Assignment\out\production\Assignment\Assignment1\abstracts.ttl

     文件    1773471  2017-11-03 20:23  资料\Assignment\Assignment\out\production\Assignment\Assignment2\abstracts.ttl

     文件     152910  2017-11-03 20:23  资料\Assignment\Assignment\out\production\Assignment\Assignment2\aliases.ttl

     文件    2292411  2017-11-03 20:23  资料\Assignment\Assignment\out\production\Assignment\Assignment2\categories.ttl

     文件     199701  2017-11-03 20:23  资料\Assignment\Assignment\out\production\Assignment\Assignment2\entities.txt

     文件      86476  2017-11-03 20:23  资料\Assignment\Assignment\out\production\Assignment\Assignment2\entities_labeled\1.txt

     文件     142152  2017-11-03 20:23  资料\Assignment\Assignment\out\production\Assignment\Assignment2\entities_labeled\2.txt

     文件      69384  2017-11-03 20:23  资料\Assignment\Assignment\out\production\Assignment\Assignment2\entities_labeled\3.txt

     文件     101518  2017-11-03 20:23  资料\Assignment\Assignment\out\production\Assignment\Assignment2\entities_labeled\4.txt

     文件     101740  2017-11-03 20:23  资料\Assignment\Assignment\out\production\Assignment\Assignment2\sections.ttl

     文件       1524  2017-11-03 22:51  资料\Assignment\Assignment\out\production\Assignment\Assignment4Class3$CleanAttr.class

     文件       6349  2017-11-03 22:51  资料\Assignment\Assignment\out\production\Assignment\Assignment4Class3.class

     文件       3073  2017-11-03 22:48  资料\Assignment\Assignment\readme.md

     文件     632610  2017-08-09 19:16  资料\Assignment\Assignment\resource\Assignment1\abstracts.ttl

     文件    1773471  2017-08-14 10:56  资料\Assignment\Assignment\resource\Assignment2\abstracts.ttl

     文件     152910  2017-10-31 18:07  资料\Assignment\Assignment\resource\Assignment2\aliases.ttl

     文件    2292411  2017-08-14 10:58  资料\Assignment\Assignment\resource\Assignment2\categories.ttl

     文件     199701  2017-08-14 10:37  资料\Assignment\Assignment\resource\Assignment2\entities.txt

     文件      86476  2017-08-14 10:38  资料\Assignment\Assignment\resource\Assignment2\entities_labeled\1.txt

     文件     142152  2017-08-14 10:39  资料\Assignment\Assignment\resource\Assignment2\entities_labeled\2.txt

     文件      69384  2017-08-14 10:39  资料\Assignment\Assignment\resource\Assignment2\entities_labeled\3.txt

     文件     101518  2017-08-14 10:39  资料\Assignment\Assignment\resource\Assignment2\entities_labeled\4.txt

     文件     101740  2017-10-31 18:56  资料\Assignment\Assignment\resource\Assignment2\sections.ttl

     文件       8455  2017-11-03 22:50  资料\Assignment\Assignment\src\Assignment4Class3.java

     文件        182  2017-11-03 20:25  资料\Assignment\__MACOSX\._Assignment

     文件        182  2017-11-03 18:41  资料\Assignment\__MACOSX\Assignment\.idea\._misc.xml

............此处省略405个文件信息

评论

共有 条评论