fix: 优化全文召回部分条件
This commit is contained in:
@@ -41,18 +41,23 @@ public class EntityConstants {
|
||||
public static final String ES_FOREIGN_NAME = "foreign_name";
|
||||
public static final String ES_FOREIGN_NAME_TEXT = "foreign_name.text";
|
||||
public static final String ES_ALIAS = "alias";
|
||||
public static final String ES_ALIAS_1GRAM = "alias.1gram";
|
||||
public static final String ES_URL = "url";
|
||||
|
||||
public static final String ES_STYLE_NAME = "style_name";
|
||||
public static final String ES_PSEUDONYM = "pseudonym";
|
||||
public static final String ES_PSEUDONYM_1GRAM = "pseudonym.1gram";
|
||||
public static final String ES_SUMMARY = "summary";
|
||||
public static final String ES_SUMMARY_1GRAM = "summary.1gram";
|
||||
public static final String ES_CONTENT = "content";
|
||||
public static final String ES_CONTENT_1GRAM = "content.1gram";
|
||||
public static final String ES_TAGS = "tags";
|
||||
public static final String ES_TAGS_COLON = "tags.colon";
|
||||
public static final String ES_SPINFO = "spinfo";
|
||||
public static final String ES_SPINFO_TEXT = "spinfo.text";
|
||||
public static final String ES_BIZINFO = "bizinfo";
|
||||
public static final String ES_INFOBOX = "infoboxs";
|
||||
public static final String ES_CATEGORIES = "categories";
|
||||
|
||||
public static final String ES_PPL_OCCUPATION = "occupation.text";
|
||||
public static final String ES_PPL_IDENTITY = "identity.text";
|
||||
|
||||
@@ -30,8 +30,14 @@ import java.util.Map;
|
||||
import java.util.Set;
|
||||
import java.util.regex.Matcher;
|
||||
|
||||
import static com.shuwen.data.entity.manage.api.model.constants.EntityConstants.ES_ALIAS;
|
||||
import static com.shuwen.data.entity.manage.api.model.constants.EntityConstants.ES_ALIAS_1GRAM;
|
||||
import static com.shuwen.data.entity.manage.api.model.constants.EntityConstants.ES_CATEGORIES;
|
||||
import static com.shuwen.data.entity.manage.api.model.constants.EntityConstants.ES_CONTENT;
|
||||
import static com.shuwen.data.entity.manage.api.model.constants.EntityConstants.ES_CONTENT_1GRAM;
|
||||
import static com.shuwen.data.entity.manage.api.model.constants.EntityConstants.ES_CULTURE_AWARD_YEAR;
|
||||
import static com.shuwen.data.entity.manage.api.model.constants.EntityConstants.ES_NAME;
|
||||
import static com.shuwen.data.entity.manage.api.model.constants.EntityConstants.ES_NAME_1GRAM;
|
||||
import static com.shuwen.data.entity.manage.api.model.constants.EntityConstants.ES_NAME_TEXT;
|
||||
import static com.shuwen.data.entity.manage.api.model.constants.EntityConstants.ES_PSEUDONYM;
|
||||
import static com.shuwen.data.entity.manage.api.model.constants.EntityConstants.ES_PSEUDONYM_1GRAM;
|
||||
@@ -39,6 +45,9 @@ import static com.shuwen.data.entity.manage.api.model.constants.EntityConstants.
|
||||
import static com.shuwen.data.entity.manage.api.model.constants.EntityConstants.ES_SPINFO_TEXT;
|
||||
import static com.shuwen.data.entity.manage.api.model.constants.EntityConstants.ES_STYLE_NAME;
|
||||
import static com.shuwen.data.entity.manage.api.model.constants.EntityConstants.ES_SUMMARY;
|
||||
import static com.shuwen.data.entity.manage.api.model.constants.EntityConstants.ES_SUMMARY_1GRAM;
|
||||
import static com.shuwen.data.entity.manage.api.model.constants.EntityConstants.ES_TAGS;
|
||||
import static com.shuwen.data.entity.manage.api.model.constants.EntityConstants.ES_TAGS_COLON;
|
||||
import static com.shuwen.data.entity.manage.common.entity.constant.GraphConstants.Label.LABEL_CREATIVE_WORK;
|
||||
import static com.shuwen.data.entity.manage.common.entity.constant.GraphConstants.Label.LABEL_CULTURE;
|
||||
import static com.shuwen.data.entity.manage.common.entity.constant.GraphConstants.Label.LABEL_MULTI;
|
||||
@@ -204,20 +213,20 @@ public class VertexSearchReqWrapUtils {
|
||||
fieldText.setSearchType("cross_fields");
|
||||
Map<String, Double> boost = new HashMap<>();
|
||||
if (queryField.contains("name")) {
|
||||
boost.put("name", GraphComplexSearchParams.getFieldBoost("cross", "name"));
|
||||
boost.put("name.1gram", GraphComplexSearchParams.getFieldBoost("cross", "name.1gram"));
|
||||
boost.put(ES_NAME, GraphComplexSearchParams.getFieldBoost("cross", ES_NAME));
|
||||
boost.put(ES_NAME_1GRAM, GraphComplexSearchParams.getFieldBoost("cross", ES_NAME_1GRAM));
|
||||
}
|
||||
if (queryField.contains("alias")) {
|
||||
boost.put("alias", GraphComplexSearchParams.getFieldBoost("cross", "alias"));
|
||||
boost.put("alias.1gram", GraphComplexSearchParams.getFieldBoost("cross", "alias.1gram"));
|
||||
boost.put(ES_ALIAS, GraphComplexSearchParams.getFieldBoost("cross", ES_ALIAS));
|
||||
boost.put(ES_ALIAS_1GRAM, GraphComplexSearchParams.getFieldBoost("cross", ES_ALIAS_1GRAM));
|
||||
}
|
||||
if (queryField.contains("summary")) {
|
||||
boost.put("summary", GraphComplexSearchParams.getFieldBoost("cross", "summary"));
|
||||
boost.put("summary.1gram", GraphComplexSearchParams.getFieldBoost("cross", "summary.1gram"));
|
||||
boost.put(ES_SUMMARY, GraphComplexSearchParams.getFieldBoost("cross", ES_SUMMARY));
|
||||
boost.put(ES_SUMMARY_1GRAM, GraphComplexSearchParams.getFieldBoost("cross", ES_SUMMARY_1GRAM));
|
||||
}
|
||||
if (queryField.contains("content")) {
|
||||
boost.put("content", GraphComplexSearchParams.getFieldBoost("cross", "content"));
|
||||
boost.put("content.1gram", GraphComplexSearchParams.getFieldBoost("cross", "content.1gram"));
|
||||
boost.put(ES_CONTENT, GraphComplexSearchParams.getFieldBoost("cross", ES_CONTENT));
|
||||
boost.put(ES_CONTENT_1GRAM, GraphComplexSearchParams.getFieldBoost("cross", ES_CONTENT_1GRAM));
|
||||
}
|
||||
fieldText.setFieldBoostAppend(boost);
|
||||
String minimumShouldMatch = GraphComplexSearchParams.getMinimumShouldMatch("cross");
|
||||
@@ -232,24 +241,24 @@ public class VertexSearchReqWrapUtils {
|
||||
private static void wrapKeyword(BoolQuery queryItem, Set<String> queryField, List<String> keywords) {
|
||||
for (String keyword : keywords) {
|
||||
if (queryField.contains("name")) {
|
||||
queryItem.should(new FieldFilter("name.1gram", FieldFilterTypeEnum.PHRASE, new FieldPhrase(keyword))
|
||||
queryItem.should(new FieldFilter(ES_NAME_1GRAM, FieldFilterTypeEnum.PHRASE, new FieldPhrase(keyword))
|
||||
.analyzer("1gram")
|
||||
.boost((float) GraphComplexSearchParams.getFieldBoost("keyword", "name.1gram")));
|
||||
.boost((float) GraphComplexSearchParams.getFieldBoost("keyword", ES_NAME_1GRAM)));
|
||||
}
|
||||
if (queryField.contains("alias")) {
|
||||
queryItem.should(new FieldFilter("alias.1gram", FieldFilterTypeEnum.PHRASE, new FieldPhrase(keyword))
|
||||
queryItem.should(new FieldFilter(ES_ALIAS_1GRAM, FieldFilterTypeEnum.PHRASE, new FieldPhrase(keyword))
|
||||
.analyzer("1gram")
|
||||
.boost((float) GraphComplexSearchParams.getFieldBoost("keyword", "alias.1gram")));
|
||||
.boost((float) GraphComplexSearchParams.getFieldBoost("keyword", ES_ALIAS_1GRAM)));
|
||||
}
|
||||
if (queryField.contains("summary")) {
|
||||
queryItem.should(new FieldFilter("summary.1gram", FieldFilterTypeEnum.PHRASE, new FieldPhrase(keyword))
|
||||
queryItem.should(new FieldFilter(ES_SUMMARY_1GRAM, FieldFilterTypeEnum.PHRASE, new FieldPhrase(keyword))
|
||||
.analyzer("1gram")
|
||||
.boost((float) GraphComplexSearchParams.getFieldBoost("keyword", "summary.1gram")));
|
||||
.boost((float) GraphComplexSearchParams.getFieldBoost("keyword", ES_SUMMARY_1GRAM)));
|
||||
}
|
||||
if (queryField.contains("content")) {
|
||||
queryItem.should(new FieldFilter("content.1gram", FieldFilterTypeEnum.PHRASE, new FieldPhrase(keyword))
|
||||
queryItem.should(new FieldFilter(ES_CONTENT_1GRAM, FieldFilterTypeEnum.PHRASE, new FieldPhrase(keyword))
|
||||
.analyzer("1gram")
|
||||
.boost((float) GraphComplexSearchParams.getFieldBoost("keyword", "content.1gram")));
|
||||
.boost((float) GraphComplexSearchParams.getFieldBoost("keyword", ES_CONTENT_1GRAM)));
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -257,23 +266,23 @@ public class VertexSearchReqWrapUtils {
|
||||
private static void wrapTags(BoolQuery queryItem, Set<String> tags) {
|
||||
BoolQuery tagsBool = new BoolQuery();
|
||||
for (String tag : tags) {
|
||||
tagsBool.should(new FieldFilter("tags", FieldFilterTypeEnum.TERM, tag)
|
||||
.boost((float) GraphComplexSearchParams.getFieldBoost("term", "tags")));
|
||||
tagsBool.should(new FieldFilter("tags", FieldFilterTypeEnum.PREFIX, tag)
|
||||
.boost((float) GraphComplexSearchParams.getFieldBoost("prefix", "tags")));
|
||||
tagsBool.should(new FieldFilter(ES_TAGS, FieldFilterTypeEnum.TERM, tag)
|
||||
.boost((float) GraphComplexSearchParams.getFieldBoost("term", ES_TAGS)));
|
||||
tagsBool.should(new FieldFilter(ES_TAGS, FieldFilterTypeEnum.PREFIX, tag)
|
||||
.boost((float) GraphComplexSearchParams.getFieldBoost("prefix", ES_TAGS)));
|
||||
}
|
||||
queryItem.should(new FieldFilter(FieldFilterTypeEnum.BOOL, tagsBool));
|
||||
}
|
||||
|
||||
private static void wrapCategories(BoolQuery queryItem, Set<String> categories) {
|
||||
BoolQuery projectBool = new BoolQuery();
|
||||
BoolQuery categoryBool = new BoolQuery();
|
||||
for (String category : categories) {
|
||||
projectBool.should(new FieldFilter("categories", FieldFilterTypeEnum.TERM, category)
|
||||
.boost((float) GraphComplexSearchParams.getFieldBoost("term", "categories")));
|
||||
projectBool.should(new FieldFilter("categories", FieldFilterTypeEnum.PREFIX, category)
|
||||
.boost((float) GraphComplexSearchParams.getFieldBoost("prefix", "categories")));
|
||||
categoryBool.should(new FieldFilter(ES_CATEGORIES, FieldFilterTypeEnum.TERM, category)
|
||||
.boost((float) GraphComplexSearchParams.getFieldBoost("term", ES_CATEGORIES)));
|
||||
categoryBool.should(new FieldFilter(ES_CATEGORIES, FieldFilterTypeEnum.PREFIX, category)
|
||||
.boost((float) GraphComplexSearchParams.getFieldBoost("prefix", ES_CATEGORIES)));
|
||||
}
|
||||
queryItem.should(new FieldFilter(FieldFilterTypeEnum.BOOL, projectBool));
|
||||
queryItem.should(new FieldFilter(FieldFilterTypeEnum.BOOL, categoryBool));
|
||||
}
|
||||
|
||||
private static void wrapNer(BoolQuery queryItem, JSONObject ner) {
|
||||
@@ -332,10 +341,14 @@ public class VertexSearchReqWrapUtils {
|
||||
//别名搜索
|
||||
if (queryField.contains("alias")) {
|
||||
queryItem.should(new FieldFilter(EntityConstants.ES_ALIAS, FieldFilterTypeEnum.TERM, queryEntity));
|
||||
|
||||
FieldPhrase contentPhrase = new FieldPhrase(queryEntity);
|
||||
contentPhrase.setSlop(0);
|
||||
queryItem.should(new FieldFilter(ES_ALIAS_1GRAM, FieldFilterTypeEnum.PHRASE, contentPhrase).analyzer("1gram").boost(0.5f));
|
||||
}
|
||||
|
||||
if (mode.equalsIgnoreCase(COMPLEX)) {
|
||||
//正文搜索
|
||||
//副标题搜索
|
||||
if (queryField.contains("spinfo")) {
|
||||
queryItem.should(new FieldFilter(ES_SPINFO, FieldFilterTypeEnum.TERM, queryEntity));
|
||||
FieldPhrase contentPhrase = new FieldPhrase(queryEntity);
|
||||
@@ -359,21 +372,21 @@ public class VertexSearchReqWrapUtils {
|
||||
if (queryField.contains("summary")) {
|
||||
FieldPhrase contentPhrase = new FieldPhrase(queryEntity);
|
||||
contentPhrase.setSlop(0);
|
||||
queryItem.should(new FieldFilter(ES_SUMMARY, FieldFilterTypeEnum.PHRASE, contentPhrase).boost(0.3f));
|
||||
queryItem.should(new FieldFilter(ES_SUMMARY_1GRAM, FieldFilterTypeEnum.PHRASE, contentPhrase).analyzer("1gram").boost(0.5f));
|
||||
}
|
||||
|
||||
//正文搜索
|
||||
if (queryField.contains("content")) {
|
||||
FieldPhrase contentPhrase = new FieldPhrase(queryEntity);
|
||||
contentPhrase.setSlop(0);
|
||||
queryItem.should(new FieldFilter(ES_CONTENT, FieldFilterTypeEnum.PHRASE, contentPhrase).boost(0.3f));
|
||||
queryItem.should(new FieldFilter(ES_CONTENT_1GRAM, FieldFilterTypeEnum.PHRASE, contentPhrase).analyzer("1gram").boost(0.5f));
|
||||
}
|
||||
|
||||
//标签搜索
|
||||
if (queryField.contains("tags")) {
|
||||
queryItem.should(new FieldFilter("tags", FieldFilterTypeEnum.TERM, queryEntity));
|
||||
queryItem.should(new FieldFilter(ES_TAGS, FieldFilterTypeEnum.TERM, queryEntity));
|
||||
if (StringUtils.isNotEmpty(label) && LABEL_CREATIVE_WORK.equals(label)) {
|
||||
queryItem.should(new FieldFilter("tags.colon", FieldFilterTypeEnum.TERM, queryEntity));
|
||||
queryItem.should(new FieldFilter(ES_TAGS_COLON, FieldFilterTypeEnum.TERM, queryEntity));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user