使用milvus-sdk-go的迭代器导出数据
创始人
2025-01-10 12:37:46
0

使用milvus-sdk-go的迭代器导出数据

迭代器是一种功能强大的工具,可帮助您使用主键值和布尔表达式迭代集合中的大量数据或所有数据。这可以显著改善您检索数据的方式。与传统的offsetlimit参数用法不同,后者可能会随着时间的推移而变得效率低下,而迭代器提供了更具可扩展性的解决方案。

当表数据很大,需要全量导出,我们可以使用迭代器,例如每次只查询1000行数据,直到所有数据查询完成,同时也可以减少服务器压力。

需要注意的是迭代器是一个客户端实现。

下面列举一个例子:写入3000条数据,每次读取100条,直至完全读完完毕。

package main  import ( 	"context" 	"fmt" 	"io" 	"log" 	"math/rand" 	"strconv"  	"github.com/milvus-io/milvus-sdk-go/v2/client" 	"github.com/milvus-io/milvus-sdk-go/v2/entity" )  const ( 	milvusAddr     = `192.168.230.71:19530` 	nEntities, dim = 3000, 128 	collectionName = "hello_iterator"  	msgFmt                                     = "==== %s ====\n" 	idCol, randomCol, addressCol, embeddingCol = "ID", "random", "address", "embeddings" 	topK                                       = 3 )  func main() { 	ctx := context.Background()  	log.Printf(msgFmt, "start connecting to Milvus") 	c, err := client.NewClient(ctx, client.Config{ 		Address: milvusAddr, 	}) 	if err != nil { 		log.Fatal("failed to connect to milvus, err: ", err.Error()) 	} 	defer c.Close()  	// delete collection if exists 	has, err := c.HasCollection(ctx, collectionName) 	if err != nil { 		log.Fatalf("failed to check collection exists, err: %v", err) 	} 	if has { 		c.DropCollection(ctx, collectionName) 	}  	// create collection 	log.Printf(msgFmt, fmt.Sprintf("create collection, `%s`", collectionName)) 	schema := entity.NewSchema().WithName(collectionName).WithDescription("hello_milvus is the simplest demo to introduce the APIs"). 		WithField(entity.NewField().WithName(idCol).WithDataType(entity.FieldTypeInt64).WithIsPrimaryKey(true).WithIsAutoID(false)). 		WithField(entity.NewField().WithName(randomCol).WithDataType(entity.FieldTypeDouble)). 		WithField(entity.NewField().WithName(addressCol).WithDataType(entity.FieldTypeVarChar).WithTypeParams(entity.TypeParamMaxLength, "50")). 		WithField(entity.NewField().WithName(embeddingCol).WithDataType(entity.FieldTypeFloatVector).WithDim(dim))  	if err := c.CreateCollection(ctx, schema, entity.DefaultShardNumber); err != nil { // use default shard number 		log.Fatalf("create collection failed, err: %v", err) 	}  	// build index 	log.Printf(msgFmt, "start creating index IVF_FLAT") 	idx, err := entity.NewIndexIvfFlat(entity.L2, 128) 	if err != nil { 		log.Fatalf("failed to create ivf flat index, err: %v", err) 	} 	if err := c.CreateIndex(ctx, collectionName, embeddingCol, idx, false); err != nil { 		log.Fatalf("failed to create index, err: %v", err) 	}  	log.Printf(msgFmt, "start loading collection") 	err = c.LoadCollection(ctx, collectionName, false) 	if err != nil { 		log.Fatalf("failed to load collection, err: %v", err) 	}  	// insert data 	log.Printf(msgFmt, "start inserting random entities") 	idList, randomList := make([]int64, 0, nEntities), make([]float64, 0, nEntities) 	addressList := make([]string, 0, nEntities) 	embeddingList := make([][]float32, 0, nEntities)  	// generate data 	for i := 0; i < nEntities; i++ { 		idList = append(idList, int64(i)) 	} 	for i := 0; i < nEntities; i++ { 		randomList = append(randomList, rand.Float64()) 	} 	for i := 0; i < nEntities; i++ { 		addressList = append(addressList, "wuhan"+strconv.Itoa(i)) 	} 	for i := 0; i < nEntities; i++ { 		vec := make([]float32, 0, dim) 		for j := 0; j < dim; j++ { 			vec = append(vec, rand.Float32()) 		} 		embeddingList = append(embeddingList, vec) 	} 	idColData := entity.NewColumnInt64(idCol, idList) 	randomColData := entity.NewColumnDouble(randomCol, randomList) 	addressColData := entity.NewColumnVarChar(addressCol, addressList) 	embeddingColData := entity.NewColumnFloatVector(embeddingCol, dim, embeddingList)  	if _, err := c.Insert(ctx, collectionName, "", idColData, randomColData, addressColData, embeddingColData); err != nil { 		log.Fatalf("failed to insert random data into `hello_milvus, err: %v", err) 	}  	if err := c.Flush(ctx, collectionName, false); err != nil { 		log.Fatalf("failed to flush data, err: %v", err) 	}     // 使用迭代器,每次读取100行数据 	itr, err := c.QueryIterator(ctx, client.NewQueryIteratorOption(collectionName).WithOutputFields(idCol, randomCol, embeddingCol).WithBatchSize(100)) 	if err != nil { 		log.Fatal("failed to query iterator: ", err.Error()) 	} 	for { 		rs, err := itr.Next(ctx) 		if err != nil { 			if err == io.EOF { 				log.Println("iterator reach EOF") 				break 			} 			log.Fatal("failed to query iterator. next: ", err.Error()) 		} 		var idlist []int64 		var randomlist []float64 		for _, col := range rs { 			if col.Name() == idCol { 				idColumn := col.(*entity.ColumnInt64) 				for i := 0; i < col.Len(); i++ { 					val, err := idColumn.ValueByIdx(i) 					if err != nil { 						log.Fatal(err) 					} 					idlist = append(idlist, val) 				} 			} 			if col.Name() == randomCol { 				randomColumn := col.(*entity.ColumnDouble) 				for i := 0; i < col.Len(); i++ { 					val, err := randomColumn.ValueByIdx(i) 					if err != nil { 						log.Fatal(err) 					} 					randomlist = append(randomlist, val) 				} 			} 		} 		log.Printf("\tids: %#v\n", idlist) 		log.Printf("\trandoms: %#v\n", randomlist) 	}  	// drop collection 	log.Printf(msgFmt, "drop collection `hello_milvus`") 	if err := c.DropCollection(ctx, collectionName); err != nil { 		log.Fatalf("failed to drop collection, err: %v", err) 	} } 

相关内容

热门资讯

据文件显示!吉祥填大坑辅助器攻... 据文件显示!吉祥填大坑辅助器攻略,竟然真的是有辅助下载(有挂分享)-哔哩哔哩1、上手简单,内置详细流...
黑科技技巧!菠萝辅助器免费版的... 黑科技技巧!菠萝辅助器免费版的特点,建德十三道辅助,指南辅助器(有挂细节)-哔哩哔哩1、让任何用户在...
据公告内容!内蒙麻将外卦神器下... 据公告内容!内蒙麻将外卦神器下载,好像是真的辅助器(有挂猫腻)-哔哩哔哩1、内蒙麻将外卦神器下载破解...
今天下午!哈糖大菠萝软件下载,... 您好,哈糖大菠萝软件下载这款游戏可以开挂的,确实是有挂的,需要了解加去威信【136704302】很多...
针对!暗宝辅助好用吗,本来有挂... 针对!暗宝辅助好用吗,本来有挂辅助修改器(有挂技巧)-哔哩哔哩小薇(辅助器软件下载)致您一封信;亲爱...
透视教学!智星德州可以透视吗,... 透视教学!智星德州可以透视吗,游戏大厅浙江脚本辅助,要领辅助软件(有挂方式)-哔哩哔哩1、游戏大厅浙...
此事备受玩家关注!闲闲辅助器,... 此事备受玩家关注!闲闲辅助器,一贯存在有辅助软件(有挂总结)-哔哩哔哩1、实时闲闲辅助器透视辅助更新...
透视肯定!智星菠萝可以辅助吗,... 透视肯定!智星菠萝可以辅助吗,来几局是正规平台吗,窍要辅助app(有挂分析)-哔哩哔哩1、智星菠萝可...
昨日!牌乐门插件,总是有挂辅助... 昨日!牌乐门插件,总是有挂辅助安装(有挂方法)-哔哩哔哩1、游戏颠覆性的策略玩法,独创攻略技巧玩法,...
截至发稿!约局吧能不能开挂,微... 截至发稿!约局吧能不能开挂,微信小程序财神破解版,方案辅助教程(有挂方法)-哔哩哔哩微信小程序财神破...