我正在尝试为我的前端应用程序实现一个灵活的搜索器。
我的文档结构如下所示:
{
"student": {
"id": 1,
"name": "Joe"
},
"city": {
"id": 102,
"name": "London"
},
"tags": [
{ "id": 33, "name": "football" },
{ "id": 34, "name": "basketball" },
{ "id": 35, "name": "music" },
...
],
"skills": [
{ "id": 302, "name": "Active listening" },
{ "id": 23, "name": "Collaboration" },
{ "id": 34, "name": "Communication" },
...
]
}
我上传了多个结构相同但数据不同的文档。不同的学生可能有不同的标签,但某些标签也可以分配给多个用户。同样的关系也适用于技能。
假设我有三个
<Selects />
。第一个用于选择城市,第二个用于选择标签,第三个用于选择技能。当我从第一个选择伦敦时,选择我想获取居住在伦敦的学生的所有标签列表。我希望能够根据我输入到 <Select />
的文本过滤掉标签(标签名称)。更重要的是 - 该列表应该分页。
{
"mappings": {
"properties": {
"student": {
"type": "nested",
"properties": {
"id": { "type": "integer" },
"name": {
"type": "keyword"
}
}
},
"city": {
"type": "nested",
"properties": {
"id": { "type": "integer" },
"name": {
"type": "keyword"
}
}
},
"tags": {
"type": "nested",
"properties": {
"id": { "type": "integer" },
"name": { "type": "keyword" }
},
"skills": {
"type": "nested",
"properties": {
"id": { "type": "integer" },
"name": { "type": "keyword" }
}
这是我第一次尝试解决我的案子。它允许分页,但不支持根据
<Select />
中提供的文本过滤结果
{
"aggs": {
"aggregatorField": {
"nested": {
"path": "tags"
},
"aggs": {
"aggregator": {
"composite": {
"size": 11,
"sources": [
{
"aggregator": {
"terms": {
"field": "tags.id"
}
}
}
]
},
"aggs": {
"item": {
"top_hits": {
"size": 1,
"sort": [
{
"tags.id": {
"order": "desc"
}
}
],
"_source": {
"include": [
"tags"
]
}
}
}
}
}
}
}
},
"size": 0
}
为了解决添加过滤问题,我在映射中添加了标准化器,并将其应用于标签和技能
"settings": {
"analysis": {
"normalizer": {
"lowercase_normalizer": {
"type": "custom",
"char_filter": [],
"filter": ["lowercase", "asciifolding"]
}
}
}
},
现在我的搜索主体如下所示:
{
"aggs": {
"aggregatorField": {
"nested": {
"path": "tags"
},
"aggs": {
"aggregator": {
"terms": {
"field": "tags.name",
"include": ".*ball.*",
"size": 10000
},
"aggs": {
"item": {
"top_hits": {
"size": 1,
"sort": [
{
"tags.id": {
"order": "desc"
}
}
],
"_source": {
"include": [
"tags"
]
}
}
},
"bucks": {
"bucket_sort": {
"from": 0,
"size": 11
}
}
}
}
}
}
},
"size": 0
}
支持文本搜索,但不支持分页。此外,对我来说,它看起来不像是一个优雅的查询,所以我认为也许有更好的方法来处理这种情况。也许有一种方法可以将所有标签和技能提取到单独的索引中并进行类似于sql join的操作?
感谢您的帮助!
我向字段添加了带有
edge_ngram
过滤器的分析器。该过滤器允许用户通过城市名称、标签或技能的开头进行输入
映射
PUT /city_tags_skills
{
"settings": {
"max_ngram_diff": 4,
"analysis": {
"analyzer": {
"lowercase_asciifolding_edgengrams_analyzer": {
"tokenizer": "whitespace",
"filter": [
"lowercase",
"asciifolding",
"edgegrams"
]
}
},
"filter": {
"edgegrams": {
"type": "edge_ngram",
"min_gram": 2,
"max_gram": 5
}
}
}
},
"mappings": {
"properties": {
"student": {
"type": "nested",
"properties": {
"id": {
"type": "integer"
},
"name": {
"type": "text",
"analyzer": "lowercase_asciifolding_edgengrams_analyzer"
}
}
},
"city": {
"type": "nested",
"properties": {
"id": {
"type": "integer"
},
"name": {
"type": "text",
"analyzer": "lowercase_asciifolding_edgengrams_analyzer",
"fields": {
"to_keyword": {
"type": "keyword"
}
}
}
}
},
"tags": {
"type": "nested",
"properties": {
"id": {
"type": "integer"
},
"name": {
"type": "text",
"analyzer": "lowercase_asciifolding_edgengrams_analyzer",
"fields": {
"to_keyword": {
"type": "keyword"
}
}
}
}
},
"skills": {
"type": "nested",
"properties": {
"id": {
"type": "integer"
},
"name": {
"type": "text",
"analyzer": "lowercase_asciifolding_edgengrams_analyzer"
}
}
}
}
}
}
无技能的文件
PUT /city_tags_skills/_bulk
{"create":{"_id":1}}
{"city":{"id":"301","name":"London"},"tags":[{"id":33,"name":"football"},{"id":"2","name":"bcde"},{"id":"4","name":"defg"},{"id":"5","name":"efgh"}]}
{"create":{"_id":2}}
{"city":{"id":"303","name":"Londonium"},"tags":[{"id":33,"name":"football"},{"id":"4","name":"defg"},{"id":"5","name":"efgh"},{"id":"6","name":"fghi"},{"id":"7","name":"ghij"}]}
{"create":{"_id":3}}
{"city":{"id":"302","name":"New York"},"tags":[{"id":33,"name":"football"},{"id":"5","name":"efgh"},{"id":"7","name":"ghij"},{"id":"8","name":"hijk"},{"id":"9","name":"ijkl"}]}
{"create":{"_id":4}}
{"city":{"id":"301","name":"London"},"tags":[{"id":"4","name":"defg"},{"id":"5","name":"efgh"},{"id":"7","name":"ghij"},{"id":"8","name":"hijk"},{"id":"9","name":"ijkl"}]}
第一个查询选择城市。
pagination
聚合是通过from
和size
参数来选择页面
GET /city_tags_skills/_search?filter_path=aggregations
{
"query": {
"nested": {
"path": "city",
"query": {
"match": {
"city.name": {
"query": "lond"
}
}
}
}
},
"aggs": {
"inside_city": {
"nested": {
"path": "city"
},
"aggs": {
"by_city": {
"terms": {
"field": "city.name.to_keyword",
"size": 10
},
"aggs": {
"pagination": {
"bucket_sort": {
"sort": [
{
"_key": {
"order": "asc"
}
}
],
"from": 0,
"size": 3
}
}
}
}
}
}
}
}
有效
{
"aggregations" : {
"inside_city" : {
"doc_count" : 3,
"by_city" : {
"doc_count_error_upper_bound" : 0,
"sum_other_doc_count" : 0,
"buckets" : [
{
"key" : "London",
"doc_count" : 2
},
{
"key" : "Londonium",
"doc_count" : 1
}
]
}
}
}
}
第二个分页查询
GET /city_tags_skills/_search?filter_path=aggregations
{
"query": {
"bool": {
"must": [
{
"nested": {
"path": "city",
"query": {
"term": {
"city.name.to_keyword": {
"value": "London"
}
}
}
}
}
]
}
},
"aggs": {
"inside_city": {
"nested": {
"path": "tags"
},
"aggs": {
"autocomplete_filter": {
"filter": {
"match": {
"tags.name": {
"query": "foo"
}
}
},
"aggs": {
"by_tags": {
"terms": {
"field": "tags.name.to_keyword",
"size": 10
},
"aggs": {
"pagination": {
"bucket_sort": {
"sort": [
{
"_key": {
"order": "asc"
}
}
],
"from": 0,
"size": 3
}
}
}
}
}
}
}
}
}
}
回应
{
"aggregations" : {
"inside_city" : {
"doc_count" : 9,
"autocomplete_filter" : {
"doc_count" : 2,
"by_tags" : {
"doc_count_error_upper_bound" : 0,
"sum_other_doc_count" : 0,
"buckets" : [
{
"key" : "football",
"doc_count" : 1
},
{
"key" : "footwork",
"doc_count" : 1
}
]
}
}
}
}
}
您可以编写类似的技能查询
您可以通过转换提取到单独的索引中
创建变换
PUT _transform/tag_extraction
{
"source": {
"index": "city_tags_skills"
},
"dest": {
"index": "city_tags_skills_tags_extracted"
},
"pivot": {
"group_by": {
"tags.name": {
"terms": {
"script": {
"source": """
List tags = params['_source']['tags'];
List tagNames = new LinkedList();
for (Map tag : tags) {
tagNames.add(tag.name);
}
return tagNames;
"""
}
}
}
},
"aggregations": {
"fictitious": {
"terms": {
"field": "tags"
}
}
}
}
}
开始转型
POST _transform/tag_extraction/_start
让我们看看提取的标签
GET /city_tags_skills_tags_extracted/_search?filter_path=hits.hits._source.tags
回应
{
"hits" : {
"hits" : [
{
"_source" : {
"tags" : {
"name" : "abcd"
}
}
},
{
"_source" : {
"tags" : {
"name" : "bcde"
}
}
},
{
"_source" : {
"tags" : {
"name" : "cdef"
}
}
},
{
"_source" : {
"tags" : {
"name" : "defg"
}
}
},
{
"_source" : {
"tags" : {
"name" : "efgh"
}
}
},
{
"_source" : {
"tags" : {
"name" : "fghi"
}
}
},
{
"_source" : {
"tags" : {
"name" : "ghij"
}
}
},
{
"_source" : {
"tags" : {
"name" : "hijk"
}
}
},
{
"_source" : {
"tags" : {
"name" : "ijkl"
}
}
}
]
}
}
您可以通过查询停止转换
POST _transform/tag_extraction/_stop