我在 MongoDB 中尝试 Atlas Search,发现了一个奇怪的行为。
考虑 100000 个文档的集合,如下所示:
{
_id: "1",
description: "Lorem Ipsum",
creator: "UserA"
}
使用具有以下基本定义的 Atlas 搜索索引:
{
mappings: { dynamic: true }
}
出于示例的目的,Atlas Search 索引是此集合上唯一创建的索引。
现在这里有一些聚合并估计每个聚合的执行时间:
$单独搜索~100ms
[
{
$search: {
wildcard: {
query: "*b*",
path: {
wildcard: "*"
},
allowAnalyzedField: true
}
}
}
]
$search 与简单的 $match 不返回任何内容 ~25 秒(请记住,这只是 100000 个文档,如果我们不必担心网络,此时过滤客户端会更快)
[
{
$search: {
wildcard: {
query: "*b*",
path: {
wildcard: "*"
},
allowAnalyzedField: true
}
}
},
{
$match:{creator:null}
},
{
$limit: 100
}
]
$match 不返回任何结果 ~100ms
[
{
$match:{creator:null}
},
{
$limit: 100
}
]
假设所有文档都与 $search 匹配,则这些 $match 都需要扫描所有文档。
我想也许是因为 $match 是第一阶段,Mongo 可以直接在集合上工作,但不,这个故意未优化的管道工作得很好:
$match 与 $set 强制 $match 直接在管道上工作 ~200ms
[
{
$set:
{
creator: {
$concat: ["$creator", "ABC"]
}
}
},
{
$match: {
creator: null
}
},
{
$limit: 100
}
]
用 $sort 替换 $match 得到类似的结果
我知道 Atlas Search 不鼓励使用 $match 和 $sort 并提供替代方案,但看起来性能应该不会那么糟糕。我有一个非常具体的用例,非常希望能够在 $search 后使用 $match 或 $sort,而 Mongo 提出的替代方案并不完全是我所需要的。
什么可以解释这一点?是Mongo缺乏优化吗?这是一个错误吗?
编辑:我将单独 $search 的解释()调用与 $search + $match 进行了比较,几乎没有什么区别......就好像 Mongo 说它在做几乎相同的事情,但实际上我们得到了非常不同的结果。
$搜索
{
"explainVersion": "1",
"stages": [
{
"$_internalSearchMongotRemote": {
"mongotQuery": {
"wildcard": {
"query": "*u*",
"path": { "wildcard": "*" },
"allowAnalyzedField": true
}
},
"explain": {
"query": {
"type": "BooleanQuery",
"stats": {
"context": {
"millisElapsed": 83.683183,
"invocationCounts": {
"createWeight": 1,
"createScorer": 8
}
},
"match": {
"millisElapsed": 0.292426,
"invocationCounts": {
"nextDoc": 1005,
"refineRoughMatch": 1001
}
},
"score": {
"millisElapsed": 0.170923,
"invocationCounts": {
"score": 1001,
"setMinCompetitiveScore": 4
}
}
},
"args": {
"must": [],
"mustNot": [],
"should": [
{
"type": "MultiTermQueryConstantScoreBlendedWrapper",
"stats": {
"context": {
"millisElapsed": 0.312473,
"invocationCounts": {
"createWeight": 1,
"createScorer": 12
}
},
"match": {
"millisElapsed": 0
},
"score": {
"millisElapsed": 0.04765,
"invocationCounts": {
"score": 1001,
"setMinCompetitiveScore": 4
}
}
},
"args": {
"queries": [
{
"type": "WildcardQuery",
"stats": {
"context": {
"millisElapsed": 0
},
"match": {
"millisElapsed": 0
},
"score": {
"millisElapsed": 0
}
},
"args": {
"path": "creator",
"value": "*u*"
}
}
]
}
},
{
"type": "MultiTermQueryConstantScoreBlendedWrapper",
"stats": {
"context": {
"millisElapsed": 83.198421,
"invocationCounts": {
"createWeight": 1,
"createScorer": 12
}
},
"match": {
"millisElapsed": 0
},
"score": {
"millisElapsed": 0.007195,
"invocationCounts": {
"score": 117,
"setMinCompetitiveScore": 4
}
}
},
"args": {
"queries": [
{
"type": "WildcardQuery",
"stats": {
"context": {
"millisElapsed": 0
},
"match": {
"millisElapsed": 0
},
"score": {
"millisElapsed": 0
}
},
"args": {
"path": "description",
"value": "*u*"
}
}
]
}
}
],
"filter": [],
"minimumShouldMatch": 0
}
},
"collectStats": {
"millisElapsed": 0.31243,
"invocationCounts": {
"collect": 1001,
"competitiveIterator": 4,
"setScorer": 4
}
},
"resourceUsage": {
"majorFaults": 131,
"minorFaults": 116,
"userTimeMs": 40,
"systemTimeMs": 10,
"reportingThreads": 1
}
},
"mongotDocsRequested": 100
},
"nReturned": 0,
"executionTimeMillisEstimate": 92
},
{
"$_internalSearchIdLookup": {
"limit": 100
},
"nReturned": 0,
"executionTimeMillisEstimate": 92
},
{
"$limit": 100,
"nReturned": 0,
"executionTimeMillisEstimate": 92
}
],
"serverInfo": {
"host": "******.mongodb.net",
"port": 27017,
"version": "7.0.11",
"gitVersion": "f451220f0df2b9dfe073f1521837f8ec5c208a8c"
},
"serverParameters": {
"internalQueryFacetBufferSizeBytes": 104857600,
"internalQueryFacetMaxOutputDocSizeBytes": 104857600,
"internalLookupStageIntermediateDocumentMaxSizeBytes": 104857600,
"internalDocumentSourceGroupMaxMemoryBytes": 104857600,
"internalQueryMaxBlockingSortMemoryUsageBytes": 104857600,
"internalQueryProhibitBlockingMergeOnMongoS": 0,
"internalQueryMaxAddToSetBytes": 104857600,
"internalDocumentSourceSetWindowFieldsMaxMemoryBytes": 104857600,
"internalQueryFrameworkControl": "trySbeRestricted"
},
"command": {
"aggregate": "test",
"pipeline": [
{
"$search": {
"wildcard": {
"query": "*u*",
"path": { "wildcard": "*" },
"allowAnalyzedField": true
}
}
},
{ "$limit": 100 }
],
"cursor": {},
"maxTimeMS": 60000,
"$db": "jeansam"
},
"ok": 1,
"$clusterTime": {
"clusterTime": {
"$timestamp": "7381870393121833030"
},
"signature": {
"hash": "rJnaSn2/sBcXYnvz1aQZLl1wkr8=",
"keyId": {
"low": 6,
"high": 1706112144,
"unsigned": false
}
}
},
"operationTime": {
"$timestamp": "7381870393121833030"
}
}
$搜索+$匹配
{
"explainVersion": "1",
"stages": [
{
"$_internalSearchMongotRemote": {
"mongotQuery": {
"wildcard": {
"query": "*u*",
"path": { "wildcard": "*" },
"allowAnalyzedField": true
}
},
"explain": {
"query": {
"type": "BooleanQuery",
"stats": {
"context": {
"millisElapsed": 46.088005,
"invocationCounts": {
"createWeight": 1,
"createScorer": 8
}
},
"match": {
"millisElapsed": 0.255412,
"invocationCounts": {
"nextDoc": 1005,
"refineRoughMatch": 1001
}
},
"score": {
"millisElapsed": 0.164519,
"invocationCounts": {
"score": 1001,
"setMinCompetitiveScore": 4
}
}
},
"args": {
"must": [],
"mustNot": [],
"should": [
{
"type": "MultiTermQueryConstantScoreBlendedWrapper",
"stats": {
"context": {
"millisElapsed": 0.255677,
"invocationCounts": {
"createWeight": 1,
"createScorer": 12
}
},
"match": {
"millisElapsed": 0
},
"score": {
"millisElapsed": 0.045942,
"invocationCounts": {
"score": 1001,
"setMinCompetitiveScore": 4
}
}
},
"args": {
"queries": [
{
"type": "WildcardQuery",
"stats": {
"context": {
"millisElapsed": 0
},
"match": {
"millisElapsed": 0
},
"score": {
"millisElapsed": 0
}
},
"args": {
"path": "creator",
"value": "*u*"
}
}
]
}
},
{
"type": "MultiTermQueryConstantScoreBlendedWrapper",
"stats": {
"context": {
"millisElapsed": 45.70447,
"invocationCounts": {
"createWeight": 1,
"createScorer": 12
}
},
"match": {
"millisElapsed": 0
},
"score": {
"millisElapsed": 0.006891,
"invocationCounts": {
"score": 117,
"setMinCompetitiveScore": 4
}
}
},
"args": {
"queries": [
{
"type": "WildcardQuery",
"stats": {
"context": {
"millisElapsed": 0
},
"match": {
"millisElapsed": 0
},
"score": {
"millisElapsed": 0
}
},
"args": {
"path": "description",
"value": "*u*"
}
}
]
}
}
],
"filter": [],
"minimumShouldMatch": 0
}
},
"collectStats": {
"millisElapsed": 0.317097,
"invocationCounts": {
"collect": 1001,
"competitiveIterator": 4,
"setScorer": 4
}
},
"resourceUsage": {
"majorFaults": 0,
"minorFaults": 0,
"userTimeMs": 40,
"systemTimeMs": 0,
"reportingThreads": 1
}
}
},
"nReturned": 0,
"executionTimeMillisEstimate": 51
},
{
"$_internalSearchIdLookup": {},
"nReturned": 0,
"executionTimeMillisEstimate": 51
},
{
"$match": { "creator": { "$eq": null } },
"nReturned": 0,
"executionTimeMillisEstimate": 51
},
{
"$limit": 100,
"nReturned": 0,
"executionTimeMillisEstimate": 51
}
],
"serverInfo": {
"host": "******.mongodb.net",
"port": 27017,
"version": "7.0.11",
"gitVersion": "f451220f0df2b9dfe073f1521837f8ec5c208a8c"
},
"serverParameters": {
"internalQueryFacetBufferSizeBytes": 104857600,
"internalQueryFacetMaxOutputDocSizeBytes": 104857600,
"internalLookupStageIntermediateDocumentMaxSizeBytes": 104857600,
"internalDocumentSourceGroupMaxMemoryBytes": 104857600,
"internalQueryMaxBlockingSortMemoryUsageBytes": 104857600,
"internalQueryProhibitBlockingMergeOnMongoS": 0,
"internalQueryMaxAddToSetBytes": 104857600,
"internalDocumentSourceSetWindowFieldsMaxMemoryBytes": 104857600,
"internalQueryFrameworkControl": "trySbeRestricted"
},
"command": {
"aggregate": "test",
"pipeline": [
{
"$search": {
"wildcard": {
"query": "*u*",
"path": { "wildcard": "*" },
"allowAnalyzedField": true
}
}
},
{ "$match": { "creator": null } },
{ "$limit": 100 }
],
"cursor": {},
"maxTimeMS": 60000,
"$db": "jeansam"
},
"ok": 1,
"$clusterTime": {
"clusterTime": {
"$timestamp": "7381870560625557505"
},
"signature": {
"hash": "FoPtxZnHvY1wEjkiaNL5jLwqxbA=",
"keyId": {
"low": 6,
"high": 1706112144,
"unsigned": false
}
}
},
"operationTime": {
"$timestamp": "7381870560625557505"
}
}
我确实发现使用 StoredSource 显着提高了性能(~500ms),但我仍然不明白为什么这么想......
编辑:我在另一个数据库中尝试了完全相同的操作,但它仍然很慢,所以我更加困惑......