✏️ 编者按:
在历经 9 个 RC 版本的迭代与全球 1000 家用户的实战验证后,Milvus 2.0 正式 GA!Zilliz 质量保障团队负责人乔燕良撰文解析新功能、新亮点。 Milvus 2.0 背后,有哪些开发秘辛?文末预约直播,与开发者们在线畅聊!
删除功能(Entity Deletion)
from pymilvus import connections, utilityfrom pymilvus import Collection, DataType, FieldSchema, CollectionSchema# connect to milvushost = 'x.x.x.x'connections.add_connection(default={"host": host, "port": 19530})connections.connect(alias='default')# create a collection with customized primary field: id_fielddim = 128id_field = FieldSchema(name="cus_id", dtype=DataType.INT64, is_primary=True)age_field = FieldSchema(name="age", dtype=DataType.INT64, description="age")embedding_field = FieldSchema(name="embedding", dtype=DataType.FLOAT_VECTOR, dim=dim)schema = CollectionSchema(fields=[id_field, age_field, embedding_field],auto_id=False, description="hello MilMil")collection_name = "hello_milmil"collection = Collection(name=collection_name, schema=schema)import random# insert data with customized idsnb = 300ids = [i for i in range(nb)]ages = [random.randint(20, 40) for i in range(nb)]embeddings = [[random.random() for _ in range(dim)] for _ in range(nb)]entities = [ids, ages, embeddings]ins_res = collection.insert(entities)print(f"insert entities primary keys: {ins_res.primary_keys}")
insert entities primary keys: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, 224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, 240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 255, 256, 257, 258, 259, 260, 261, 262, 263, 264, 265, 266, 267, 268, 269, 270, 271, 272, 273, 274, 275, 276, 277, 278, 279, 280, 281, 282, 283, 284, 285, 286, 287, 288, 289, 290, 291, 292, 293, 294, 295, 296, 297, 298, 299]
# searchnq = 10search_vec = [[random.random() for _ in range(dim)] for _ in range(nq)]search_params = {"metric_type": "L2", "params": {"nprobe": 16}}limit = 3# search 2 times to verify the vector persistsfor i in range(2):results = collection.search(search_vec, embedding_field.name, search_params, limit)ids = results[0].idsprint(f"search result ids: {ids}")expr = f"cus_id in {ids}"# query to verify the ids existquery_res = collection.query(expr)print(f"query results: {query_res}")
search result ids: [76, 2, 246]query results: [{'cus_id': 246}, {'cus_id': 2}, {'cus_id': 76}]search result ids: [76, 2, 246]query results: [{'cus_id': 246}, {'cus_id': 2}, {'cus_id': 76}]
print(f"trying to delete one vector: id={ids[0]}")collection.delete(expr=f"cus_id in {[ids[0]]}")results = collection.search(search_vec, embedding_field.name, search_params, limit)ids = results[0].idsprint(f"after deleted: search result ids: {ids}")expr = f"cus_id in {ids}"# query to verify the id existsquery_res = collection.query(expr)print(f"after deleted: query res: {query_res}")print("completed")
trying to delete one vector: id=76after deleted: search result ids: [76, 2, 246]after deleted: query res: [{'cus_id': 246}, {'cus_id': 2}, {'cus_id': 76}]completed
expr = f"cus_id in {[76, 2, 246]}"# query to verify the id existsquery_res = collection.query(expr)print(f"after deleted: query res: {query_res}")print("completed")
after deleted: query res: [{'cus_id': 246}, {'cus_id': 2}]completed
一致性等级 (Consistency_level)
强一致性(CONSISTENCY_STRONG):GuaranteeTs 设为系统最新时间戳,QueryNodes 需要等待 ServiceTime 推进到当前最新时间戳才能执行该 Search 请求; 最终一致性(CONSISTENCY_EVENTUALLY):GuaranteeTs 设为一个特别小的值(比如说设为 1),跳过一致性检查,立刻在当前已有数据上执行 Search 查询; 有界一致性(CONSISTENCY_BOUNDED):GuaranteeTs 是一个比系统最新时间稍旧的时间,在可容忍范围内可以立刻执行查询; 客户端一致性(CONSISTENCY_SESSION):客户端使用上一次写入的时间戳作为 GuaranteeTs,那么每个客户端至少能看到 自己插入的全部数据。
for i in range(5):start = time.time()results = collection.search(search_vec, embedding_field.name, search_params, limit)end = time.time()print(f"search latency: {round(end-start, 4)}")ids = results[0].idsprint(f"search result ids: {ids}")
collection_name = "hello_milmil_consist_strong"collection = Collection(name=collection_name, schema=schema,consistency_level=CONSISTENCY_STRONG)
search latency: 0.3293search latency: 0.1949search latency: 0.1998search latency: 0.2016search latency: 0.198completed
collection_name = "hello_milmil_consist_bounded"collection = Collection(name=collection_name, schema=schema,consistency_level=CONSISTENCY_BOUNDED)
search latency: 0.0144search latency: 0.0104search latency: 0.0107search latency: 0.0104search latency: 0.0102completed
动态加载索引(Handoff)
# indexindex_params = {"index_type": "IVF_SQ8", "metric_type": "L2", "params": {"nlist": 64}}collection.create_index(field_name=embedding_field.name, index_params=index_params)# loadcollection.load()
import random# insert data with customized idsnb = 50000ids = [i for i in range(nb)]ages = [random.randint(20, 40) for i in range(nb)]embeddings = [[random.random() for _ in range(dim)] for _ in range(nb)]entities = [ids, ages, embeddings]for i in range(200):ins_res = collection.insert(entities)print(f"insert entities primary keys: {ins_res.primary_keys}")
# I did this in another python console>>> utility.get_query_segment_info("hello_milmil_handoff")[segmentID: 430640405514551298collectionID: 430640403705757697partitionID: 430640403705757698mem_size: 394463520num_rows: 747090index_name: "_default_idx"indexID: 430640403745079297nodeID: 7state: Sealed, segmentID: 430640405514551297collectionID: 430640403705757697partitionID: 430640403705757698mem_size: 397536480num_rows: 752910index_name: "_default_idx"indexID: 430640403745079297nodeID: 7state: Sealed...
结语
Milvus 项目地址:https://github.com/milvus-io/milvus Milvus 主页及文档地址:https://milvus.io/ Milvus Slack Channel:milvusio.slack.com


阅读原文,解锁更多应用场景文章转载自ZILLIZ,如果涉嫌侵权,请发送邮件至:contact@modb.pro进行举报,并提供相关证据,一经查实,墨天轮将立刻删除相关内容。




