|
16 | 16 | import data.lab.ongdb.result.MapResult; |
17 | 17 | import data.lab.ongdb.schema.auto.AutoCypher; |
18 | 18 | import data.lab.ongdb.structure.AdjacencyNode; |
| 19 | +import data.lab.ongdb.util.ArrayUtils; |
19 | 20 | import data.lab.ongdb.util.CypherBuilder; |
20 | 21 | import data.lab.ongdb.util.FileUtil; |
21 | 22 | import data.lab.wltea.analyzer.cfg.Configuration; |
22 | 23 | import data.lab.wltea.analyzer.core.IKSegmenter; |
23 | 24 | import data.lab.wltea.analyzer.core.Lexeme; |
24 | 25 | import org.neo4j.graphdb.GraphDatabaseService; |
25 | | -import org.neo4j.procedure.Context; |
26 | | -import org.neo4j.procedure.Description; |
27 | | -import org.neo4j.procedure.Name; |
28 | | -import org.neo4j.procedure.UserFunction; |
| 26 | +import org.neo4j.procedure.*; |
29 | 27 |
|
30 | 28 | import java.io.IOException; |
31 | 29 | import java.io.StringReader; |
|
37 | 35 | import java.util.stream.Stream; |
38 | 36 |
|
39 | 37 | /** |
40 | | - * 基于图数据的智能搜索 |
| 38 | + * 基于图数据上下文语义的智能搜索 |
41 | 39 | * |
42 | 40 | * @author Yc-Ma |
43 | 41 | * @PACKAGE_NAME: data.lab.ongdb.inferencing |
@@ -72,13 +70,13 @@ public class Inference implements SemanticInter { |
72 | 70 | @Description("RETURN olab.entity.recognition({graphDataSchema},{nodeHitsRules},{askString},{operator}) AS entityRecognitionHits") |
73 | 71 | public Map<String, Object> entityRecognition(@Name("graphDataSchema") String graphDataSchema, @Name("nodeHitsRules") String nodeHitsRules, @Name("askString") String askString, @Name("operator") String operator, @Name(value = "words", defaultValue = "[]") List<String> words) throws IOException { |
74 | 72 | if (words != null && !words.isEmpty()) { |
75 | | - return executeEntityRecognition(graphDataSchema, nodeHitsRules, operator,words); |
| 73 | + return executeEntityRecognition(graphDataSchema, nodeHitsRules, operator, words); |
76 | 74 | } else { |
77 | 75 | /* |
78 | 76 | * 获取分词结果 |
79 | 77 | * */ |
80 | 78 | words = getSemanticSmartIKSegmenter(askString); |
81 | | - return executeEntityRecognition(graphDataSchema, nodeHitsRules, operator,words); |
| 79 | + return executeEntityRecognition(graphDataSchema, nodeHitsRules, operator, words); |
82 | 80 | } |
83 | 81 | } |
84 | 82 |
|
@@ -763,6 +761,113 @@ private List<Map<String, Object>> executeIntentSchemaParse(String graphDataSchem |
763 | 761 | }).collect(Collectors.toList()); |
764 | 762 | } |
765 | 763 |
|
| 764 | + /** |
| 765 | + * @param graphDataSchemaStr:图数据模型定义【schema主要定义标签和标签之间的关联类型】 |
| 766 | + * @param entityRecognitionHits :实体识别结果{semantic_schema} |
| 767 | + * @param inferenceWeightStr :搜索本体的权重配置 |
| 768 | + * @return entityRecognitionHits:排序实体识别结果{semantic_schema} |
| 769 | + * @Description: TODO(根据实体识别结果和本体权重配置 , 对实体对进行排列组合 【 权重搜索队列 】 过程) |
| 770 | + * //inferenceWeight格式【暂只支持LABEL配置】 |
| 771 | + * { |
| 772 | + * "LABEL": |
| 773 | + * { |
| 774 | + * "label1": "weight", |
| 775 | + * "label2": "weight" |
| 776 | + * } |
| 777 | + * } |
| 778 | + */ |
| 779 | + @Override |
| 780 | + @Procedure(name = "olab.entity.ptmd.queue") |
| 781 | + @Description("RETURN olab.entity.ptmd.queue({graphDataSchemaStr},{entityRecognitionHits},{inferenceWeightStr}) AS entityRecognitionHits") |
| 782 | + public Stream<MapResult> entityPtmdQueue(@Name("graphDataSchemaStr") String graphDataSchemaStr, @Name("entityRecognitionHits") Map<String, Object> entityRecognitionHits, @Name("inferenceWeightStr") String inferenceWeightStr) { |
| 783 | + |
| 784 | + Map<String, Object> graphDataSchema = JSONObject.parseObject(graphDataSchemaStr); |
| 785 | + |
| 786 | + List<Map<String, Object>> nodes = getGraphObject(graphDataSchema, "nodes"); |
| 787 | + //本体模型中所有节点标签,使用entities时node中labels需要和labels求交集 |
| 788 | + List<String> labels = nodes.stream().map(v -> { |
| 789 | + List<String> list = (List<String>) v.get("labels"); |
| 790 | + return list.get(0); |
| 791 | + }).collect(Collectors.toList()); |
| 792 | + |
| 793 | + Map<String, List<Map<String, Object>>> entities = (Map<String, List<Map<String, Object>>>) entityRecognitionHits.get("entities"); |
| 794 | + |
| 795 | + //将词填充到hits列表的每个对象 |
| 796 | + Set<String> words = entities.keySet(); |
| 797 | + for (String word : words) { |
| 798 | + List<Map<String, Object>> list = entities.get(word); |
| 799 | + List<Map<String, Object>> listReset = list.stream().peek(v -> v.put("word", word)).collect(Collectors.toList()); |
| 800 | + entities.put(word, listReset); |
| 801 | + } |
| 802 | + |
| 803 | + //一、不移除词,直接求笛卡尔积 |
| 804 | + List<List<Map<String, Object>>> descartesL = new ArrayList<>(new ArrayUtils().descartesStringKey(entities)); |
| 805 | + |
| 806 | + //二、循环移除一个词,求笛卡尔积 |
| 807 | + List<String> arrWords = Arrays.asList(words.toArray(new String[0])); |
| 808 | + int size = arrWords.size(); |
| 809 | + for (int i = size - 1; i >= 0; i--) { |
| 810 | + //移除一个词 |
| 811 | + Map<String, List<Map<String, Object>>> entitiesTemp = new HashMap<>(entities); |
| 812 | + entitiesTemp.remove(arrWords.get(i)); |
| 813 | + //求笛卡尔积 |
| 814 | + List<List<Map<String, Object>>> descartesList = new ArrayUtils().descartesStringKey(entitiesTemp); |
| 815 | + descartesL.addAll(descartesList); |
| 816 | + } |
| 817 | + |
| 818 | + //三、循环移除两个词,求笛卡尔积 |
| 819 | +// List<String> arrWords = Arrays.asList(words.toArray(new String[0])); |
| 820 | + for (int i = size - 1; i >= 0; i--) { |
| 821 | + //移除两个词 |
| 822 | + Map<String, List<Map<String, Object>>> entitiesTemp = new HashMap<>(entities); |
| 823 | + entitiesTemp.remove(arrWords.get(i)); |
| 824 | + if (i > 0) { |
| 825 | + entitiesTemp.remove(arrWords.get(i - 1)); |
| 826 | + } |
| 827 | + //求笛卡尔积 |
| 828 | + List<List<Map<String, Object>>> descartesList = new ArrayUtils().descartesStringKey(entitiesTemp); |
| 829 | + descartesL.addAll(descartesList); |
| 830 | + } |
| 831 | + |
| 832 | + //笛卡尔积,组合列表,按照权重排序 |
| 833 | + List<List<Map<String, Object>>> descartesLSort = descartesLSort(descartesL, inferenceWeightStr, labels); |
| 834 | + |
| 835 | + List<MapResult> entityRecognitionHitsList = new ArrayList<>(); |
| 836 | + for (List<Map<String, Object>> list : descartesLSort) { |
| 837 | + Map<String, List<Map<String, Object>>> entitiesMap = new HashMap<>(); |
| 838 | + for (Map<String, Object> map : list) { |
| 839 | + String word = String.valueOf(map.get("word")); |
| 840 | + entitiesMap.put(word, new ArrayList<Map<String, Object>>() {{ |
| 841 | + add(map); |
| 842 | + }}); |
| 843 | + } |
| 844 | + entityRecognitionHitsList.add(new MapResult(new HashMap<String, Object>() {{ |
| 845 | + put("entities", entitiesMap); |
| 846 | + }})); |
| 847 | + } |
| 848 | + return entityRecognitionHitsList.stream(); |
| 849 | + } |
| 850 | + |
| 851 | + private List<List<Map<String, Object>>> descartesLSort(List<List<Map<String, Object>>> descartesL, String inferenceWeightStr, List<String> labels) { |
| 852 | + JSONObject inferenceWeightObj = JSONObject.parseObject(inferenceWeightStr); |
| 853 | + JSONObject labelsWeight = inferenceWeightObj.getJSONObject("LABEL"); |
| 854 | + return descartesL.stream().sorted((v1, v2) -> { |
| 855 | + //计算weight |
| 856 | + Integer v1f = v1.stream().map(v -> getWeight(labelsWeight, v, labels)).reduce((x, y) -> x += y).get(); |
| 857 | + Integer v2f = v2.stream().map(v -> getWeight(labelsWeight, v, labels)).reduce((x, y) -> x += y).get(); |
| 858 | + Integer v1Score = v1.size() + v1f; |
| 859 | + Integer v2Score = v2.size() + v2f; |
| 860 | + return v2Score.compareTo(v1Score); |
| 861 | + }).collect(Collectors.toList()); |
| 862 | + } |
| 863 | + |
| 864 | + private int getWeight(JSONObject labelsWeight, Map<String, Object> v, List<String> labels) { |
| 865 | + List<String> labelsRe = new ArrayList<>(labels); |
| 866 | + labelsRe.retainAll((List<String>) v.get("labels")); |
| 867 | + Integer wei = labelsWeight.getInteger(labelsRe.size() > 0 ? labelsRe.get(0) : null); |
| 868 | + return wei == null ? 0 : wei; |
| 869 | + } |
| 870 | + |
766 | 871 | /** |
767 | 872 | * 执行QUERY |
768 | 873 | **/ |
|
0 commit comments