Milvus 搜索时返回值为空

问题描述 投票:0回答:1

我尝试使用下面的这个函数创建一个 Milvus 集合

def __generate_collection(self):
    if self.collection_name not in self.milvusClient.list_collections():
        schema = MilvusClient.create_schema(auto_id=True, enable_dynamic_field=True, primary_field="id")

        schema.add_field(field_name="id", datatype=DataType.INT64, is_primary=True)
        schema.add_field(field_name="source", datatype=DataType.VARCHAR, max_length=50000)
        schema.add_field(field_name="embeddings", datatype=DataType.FLOAT_VECTOR, dim=self.truncate_dim)

        index_params = MilvusClient.prepare_index_params()

        index_params.add_index(field_name="id", index_type="STL_SORT")
        index_params.add_index(
            field_name="embeddings",
            index_type="IVF_FLAT",
            metric_type="L2",
            params={"nlist": 1024},
        )

        self.milvusClient.create_collection(
            collection_name=self.collection_name, schema=schema, index_params=index_params
        )

然后我继续使用下面所示的代码将数据插入到集合中

entities = [
    {"source": "hello", "embeddings": [1,2,3,4, etc..]}
]
insert_result = self.milvusClient.insert(collection_name=self.collection_name, data=entities)
print(insert_result)

#outputs {'insert_count': 1, 'ids': [450065725884255501], 'cost': 0}

根据输出,将数据插入集合似乎成功。但是,当我尝试使用 Milvus 进行搜索时,没有返回任何内容。

def search_and_query(
    self,
    search_vectors: list,
    search_field: str = "embeddings",
    search_params={"metric_type": "L2", "params": {"nprobe": 10}},
    vector_search_limit=10,
):
    result = self.milvusClient.search(
        collection_name=self.collection_name,
        data=search_vectors,
        search_params=search_params,
        anns_field=search_field,
        limit=vector_search_limit,
        output_fields=["source"],
    )
        
    print(result)
    return result

# output: data: ['[]'] , extra_info: {'cost': 0}

我很困惑是什么导致了这个问题。有谁可以帮我看一下吗?

search vector-database milvus
1个回答
0
投票

潜在的根本原因是您的集合中的“嵌入”,或者

search_vectors
具有无效的元素来计算距离。

这是一个简化的示例(免责声明:我在本地运行它,我的

index_type
是“平面”):

  • 对于
    vector_for_db
    vector_for_query
    配对,大距离搜索返回了预期的插入记录
  • 当我输入一些 L2 距离结果 nan 时:
    vector_for_query=[1, 2, 3, float("nan")]
    没有返回任何结果。

因此我怀疑这将是一个潜在的根本原因。

from pymilvus import (
    MilvusClient,
    DataType
)


def test_search(vector_for_db, vector_for_query):
    truncate_dim = 4
    collection_name = "test"
    vector_field_name = "embeddings"

    client = MilvusClient("test.db")
    schema = client.create_schema(auto_id=True, enable_dynamic_field=True, primary_field="id")
    
    schema.add_field(field_name="id", datatype=DataType.INT64, is_primary=True)
    schema.add_field(field_name="source", datatype=DataType.VARCHAR, max_length=50000)
    schema.add_field(field_name=vector_field_name, datatype=DataType.FLOAT_VECTOR, dim=truncate_dim)
    
    index_params = client.prepare_index_params()
    index_params.add_index(
        field_name=vector_field_name,
        index_type="FLAT",
        metric_type="L2",
        params={"nlist": 1024}
    )
    
    if client.has_collection(collection_name=collection_name):
        client.drop_collection(collection_name=collection_name)
    client.create_collection(
        collection_name=collection_name, schema=schema, index_params=index_params
    )
    entities = [
        {"source": "hello", vector_field_name: vector_for_db}
    ]
    insert_result = client.insert(collection_name=collection_name, data=entities)
    
    return client.search(
        collection_name=collection_name,
        data=[vector_for_query],
        search_params={"metric_type": "L2", "params": {"nprobe": 10}},
        anns_field=vector_field_name,
        limit=10,
        output_fields=["source"],
    )

print(test_search([1, 2, 3, 4], [1e10, 1e10, 1e10, 1e10])) # Returns the inserted record
print(test_search([1, 2, 3, 4], [1, 2, 3, float("nan")])) # Results in 0 search results
© www.soinside.com 2019 - 2024. All rights reserved.