fix: 优化全文召回部分条件

This commit is contained in:
haokai
2025-05-26 15:21:46 +08:00
parent 4ee8328671
commit 8b18d6081c
2 changed files with 49 additions and 31 deletions

View File

@@ -41,18 +41,23 @@ public class EntityConstants {
public static final String ES_FOREIGN_NAME = "foreign_name";
public static final String ES_FOREIGN_NAME_TEXT = "foreign_name.text";
public static final String ES_ALIAS = "alias";
public static final String ES_ALIAS_1GRAM = "alias.1gram";
public static final String ES_URL = "url";
public static final String ES_STYLE_NAME = "style_name";
public static final String ES_PSEUDONYM = "pseudonym";
public static final String ES_PSEUDONYM_1GRAM = "pseudonym.1gram";
public static final String ES_SUMMARY = "summary";
public static final String ES_SUMMARY_1GRAM = "summary.1gram";
public static final String ES_CONTENT = "content";
public static final String ES_CONTENT_1GRAM = "content.1gram";
public static final String ES_TAGS = "tags";
public static final String ES_TAGS_COLON = "tags.colon";
public static final String ES_SPINFO = "spinfo";
public static final String ES_SPINFO_TEXT = "spinfo.text";
public static final String ES_BIZINFO = "bizinfo";
public static final String ES_INFOBOX = "infoboxs";
public static final String ES_CATEGORIES = "categories";
public static final String ES_PPL_OCCUPATION = "occupation.text";
public static final String ES_PPL_IDENTITY = "identity.text";

View File

@@ -30,8 +30,14 @@ import java.util.Map;
import java.util.Set;
import java.util.regex.Matcher;
import static com.shuwen.data.entity.manage.api.model.constants.EntityConstants.ES_ALIAS;
import static com.shuwen.data.entity.manage.api.model.constants.EntityConstants.ES_ALIAS_1GRAM;
import static com.shuwen.data.entity.manage.api.model.constants.EntityConstants.ES_CATEGORIES;
import static com.shuwen.data.entity.manage.api.model.constants.EntityConstants.ES_CONTENT;
import static com.shuwen.data.entity.manage.api.model.constants.EntityConstants.ES_CONTENT_1GRAM;
import static com.shuwen.data.entity.manage.api.model.constants.EntityConstants.ES_CULTURE_AWARD_YEAR;
import static com.shuwen.data.entity.manage.api.model.constants.EntityConstants.ES_NAME;
import static com.shuwen.data.entity.manage.api.model.constants.EntityConstants.ES_NAME_1GRAM;
import static com.shuwen.data.entity.manage.api.model.constants.EntityConstants.ES_NAME_TEXT;
import static com.shuwen.data.entity.manage.api.model.constants.EntityConstants.ES_PSEUDONYM;
import static com.shuwen.data.entity.manage.api.model.constants.EntityConstants.ES_PSEUDONYM_1GRAM;
@@ -39,6 +45,9 @@ import static com.shuwen.data.entity.manage.api.model.constants.EntityConstants.
import static com.shuwen.data.entity.manage.api.model.constants.EntityConstants.ES_SPINFO_TEXT;
import static com.shuwen.data.entity.manage.api.model.constants.EntityConstants.ES_STYLE_NAME;
import static com.shuwen.data.entity.manage.api.model.constants.EntityConstants.ES_SUMMARY;
import static com.shuwen.data.entity.manage.api.model.constants.EntityConstants.ES_SUMMARY_1GRAM;
import static com.shuwen.data.entity.manage.api.model.constants.EntityConstants.ES_TAGS;
import static com.shuwen.data.entity.manage.api.model.constants.EntityConstants.ES_TAGS_COLON;
import static com.shuwen.data.entity.manage.common.entity.constant.GraphConstants.Label.LABEL_CREATIVE_WORK;
import static com.shuwen.data.entity.manage.common.entity.constant.GraphConstants.Label.LABEL_CULTURE;
import static com.shuwen.data.entity.manage.common.entity.constant.GraphConstants.Label.LABEL_MULTI;
@@ -204,20 +213,20 @@ public class VertexSearchReqWrapUtils {
fieldText.setSearchType("cross_fields");
Map<String, Double> boost = new HashMap<>();
if (queryField.contains("name")) {
boost.put("name", GraphComplexSearchParams.getFieldBoost("cross", "name"));
boost.put("name.1gram", GraphComplexSearchParams.getFieldBoost("cross", "name.1gram"));
boost.put(ES_NAME, GraphComplexSearchParams.getFieldBoost("cross", ES_NAME));
boost.put(ES_NAME_1GRAM, GraphComplexSearchParams.getFieldBoost("cross", ES_NAME_1GRAM));
}
if (queryField.contains("alias")) {
boost.put("alias", GraphComplexSearchParams.getFieldBoost("cross", "alias"));
boost.put("alias.1gram", GraphComplexSearchParams.getFieldBoost("cross", "alias.1gram"));
boost.put(ES_ALIAS, GraphComplexSearchParams.getFieldBoost("cross", ES_ALIAS));
boost.put(ES_ALIAS_1GRAM, GraphComplexSearchParams.getFieldBoost("cross", ES_ALIAS_1GRAM));
}
if (queryField.contains("summary")) {
boost.put("summary", GraphComplexSearchParams.getFieldBoost("cross", "summary"));
boost.put("summary.1gram", GraphComplexSearchParams.getFieldBoost("cross", "summary.1gram"));
boost.put(ES_SUMMARY, GraphComplexSearchParams.getFieldBoost("cross", ES_SUMMARY));
boost.put(ES_SUMMARY_1GRAM, GraphComplexSearchParams.getFieldBoost("cross", ES_SUMMARY_1GRAM));
}
if (queryField.contains("content")) {
boost.put("content", GraphComplexSearchParams.getFieldBoost("cross", "content"));
boost.put("content.1gram", GraphComplexSearchParams.getFieldBoost("cross", "content.1gram"));
boost.put(ES_CONTENT, GraphComplexSearchParams.getFieldBoost("cross", ES_CONTENT));
boost.put(ES_CONTENT_1GRAM, GraphComplexSearchParams.getFieldBoost("cross", ES_CONTENT_1GRAM));
}
fieldText.setFieldBoostAppend(boost);
String minimumShouldMatch = GraphComplexSearchParams.getMinimumShouldMatch("cross");
@@ -232,24 +241,24 @@ public class VertexSearchReqWrapUtils {
private static void wrapKeyword(BoolQuery queryItem, Set<String> queryField, List<String> keywords) {
for (String keyword : keywords) {
if (queryField.contains("name")) {
queryItem.should(new FieldFilter("name.1gram", FieldFilterTypeEnum.PHRASE, new FieldPhrase(keyword))
queryItem.should(new FieldFilter(ES_NAME_1GRAM, FieldFilterTypeEnum.PHRASE, new FieldPhrase(keyword))
.analyzer("1gram")
.boost((float) GraphComplexSearchParams.getFieldBoost("keyword", "name.1gram")));
.boost((float) GraphComplexSearchParams.getFieldBoost("keyword", ES_NAME_1GRAM)));
}
if (queryField.contains("alias")) {
queryItem.should(new FieldFilter("alias.1gram", FieldFilterTypeEnum.PHRASE, new FieldPhrase(keyword))
queryItem.should(new FieldFilter(ES_ALIAS_1GRAM, FieldFilterTypeEnum.PHRASE, new FieldPhrase(keyword))
.analyzer("1gram")
.boost((float) GraphComplexSearchParams.getFieldBoost("keyword", "alias.1gram")));
.boost((float) GraphComplexSearchParams.getFieldBoost("keyword", ES_ALIAS_1GRAM)));
}
if (queryField.contains("summary")) {
queryItem.should(new FieldFilter("summary.1gram", FieldFilterTypeEnum.PHRASE, new FieldPhrase(keyword))
queryItem.should(new FieldFilter(ES_SUMMARY_1GRAM, FieldFilterTypeEnum.PHRASE, new FieldPhrase(keyword))
.analyzer("1gram")
.boost((float) GraphComplexSearchParams.getFieldBoost("keyword", "summary.1gram")));
.boost((float) GraphComplexSearchParams.getFieldBoost("keyword", ES_SUMMARY_1GRAM)));
}
if (queryField.contains("content")) {
queryItem.should(new FieldFilter("content.1gram", FieldFilterTypeEnum.PHRASE, new FieldPhrase(keyword))
queryItem.should(new FieldFilter(ES_CONTENT_1GRAM, FieldFilterTypeEnum.PHRASE, new FieldPhrase(keyword))
.analyzer("1gram")
.boost((float) GraphComplexSearchParams.getFieldBoost("keyword", "content.1gram")));
.boost((float) GraphComplexSearchParams.getFieldBoost("keyword", ES_CONTENT_1GRAM)));
}
}
}
@@ -257,23 +266,23 @@ public class VertexSearchReqWrapUtils {
private static void wrapTags(BoolQuery queryItem, Set<String> tags) {
BoolQuery tagsBool = new BoolQuery();
for (String tag : tags) {
tagsBool.should(new FieldFilter("tags", FieldFilterTypeEnum.TERM, tag)
.boost((float) GraphComplexSearchParams.getFieldBoost("term", "tags")));
tagsBool.should(new FieldFilter("tags", FieldFilterTypeEnum.PREFIX, tag)
.boost((float) GraphComplexSearchParams.getFieldBoost("prefix", "tags")));
tagsBool.should(new FieldFilter(ES_TAGS, FieldFilterTypeEnum.TERM, tag)
.boost((float) GraphComplexSearchParams.getFieldBoost("term", ES_TAGS)));
tagsBool.should(new FieldFilter(ES_TAGS, FieldFilterTypeEnum.PREFIX, tag)
.boost((float) GraphComplexSearchParams.getFieldBoost("prefix", ES_TAGS)));
}
queryItem.should(new FieldFilter(FieldFilterTypeEnum.BOOL, tagsBool));
}
private static void wrapCategories(BoolQuery queryItem, Set<String> categories) {
BoolQuery projectBool = new BoolQuery();
BoolQuery categoryBool = new BoolQuery();
for (String category : categories) {
projectBool.should(new FieldFilter("categories", FieldFilterTypeEnum.TERM, category)
.boost((float) GraphComplexSearchParams.getFieldBoost("term", "categories")));
projectBool.should(new FieldFilter("categories", FieldFilterTypeEnum.PREFIX, category)
.boost((float) GraphComplexSearchParams.getFieldBoost("prefix", "categories")));
categoryBool.should(new FieldFilter(ES_CATEGORIES, FieldFilterTypeEnum.TERM, category)
.boost((float) GraphComplexSearchParams.getFieldBoost("term", ES_CATEGORIES)));
categoryBool.should(new FieldFilter(ES_CATEGORIES, FieldFilterTypeEnum.PREFIX, category)
.boost((float) GraphComplexSearchParams.getFieldBoost("prefix", ES_CATEGORIES)));
}
queryItem.should(new FieldFilter(FieldFilterTypeEnum.BOOL, projectBool));
queryItem.should(new FieldFilter(FieldFilterTypeEnum.BOOL, categoryBool));
}
private static void wrapNer(BoolQuery queryItem, JSONObject ner) {
@@ -332,10 +341,14 @@ public class VertexSearchReqWrapUtils {
//别名搜索
if (queryField.contains("alias")) {
queryItem.should(new FieldFilter(EntityConstants.ES_ALIAS, FieldFilterTypeEnum.TERM, queryEntity));
FieldPhrase contentPhrase = new FieldPhrase(queryEntity);
contentPhrase.setSlop(0);
queryItem.should(new FieldFilter(ES_ALIAS_1GRAM, FieldFilterTypeEnum.PHRASE, contentPhrase).analyzer("1gram").boost(0.5f));
}
if (mode.equalsIgnoreCase(COMPLEX)) {
//正文搜索
//副标题搜索
if (queryField.contains("spinfo")) {
queryItem.should(new FieldFilter(ES_SPINFO, FieldFilterTypeEnum.TERM, queryEntity));
FieldPhrase contentPhrase = new FieldPhrase(queryEntity);
@@ -359,21 +372,21 @@ public class VertexSearchReqWrapUtils {
if (queryField.contains("summary")) {
FieldPhrase contentPhrase = new FieldPhrase(queryEntity);
contentPhrase.setSlop(0);
queryItem.should(new FieldFilter(ES_SUMMARY, FieldFilterTypeEnum.PHRASE, contentPhrase).boost(0.3f));
queryItem.should(new FieldFilter(ES_SUMMARY_1GRAM, FieldFilterTypeEnum.PHRASE, contentPhrase).analyzer("1gram").boost(0.5f));
}
//正文搜索
if (queryField.contains("content")) {
FieldPhrase contentPhrase = new FieldPhrase(queryEntity);
contentPhrase.setSlop(0);
queryItem.should(new FieldFilter(ES_CONTENT, FieldFilterTypeEnum.PHRASE, contentPhrase).boost(0.3f));
queryItem.should(new FieldFilter(ES_CONTENT_1GRAM, FieldFilterTypeEnum.PHRASE, contentPhrase).analyzer("1gram").boost(0.5f));
}
//标签搜索
if (queryField.contains("tags")) {
queryItem.should(new FieldFilter("tags", FieldFilterTypeEnum.TERM, queryEntity));
queryItem.should(new FieldFilter(ES_TAGS, FieldFilterTypeEnum.TERM, queryEntity));
if (StringUtils.isNotEmpty(label) && LABEL_CREATIVE_WORK.equals(label)) {
queryItem.should(new FieldFilter("tags.colon", FieldFilterTypeEnum.TERM, queryEntity));
queryItem.should(new FieldFilter(ES_TAGS_COLON, FieldFilterTypeEnum.TERM, queryEntity));
}
}
}