本文介绍向量检索服务DashVector的数据类型定义。
Doc
@dataclass(frozen=True)
class Doc(object):
id: str # 主键
vector: Union[List[int], List[float], np.ndarray] # 向量数据
sparse_vector: Optional[Dict[int, float]] = None # 稀疏向量数据
fields: Optional[FieldDataType] = None # Doc自定义字段
score: float = 0.0 # 向量相似度
@Data
@Builder
public class Doc {
// 主键
@NonNull private String id;
// 向量数据
@NonNull private Vector vector;
// 稀疏向量数据
private TreeMap<Integer, Float> sparseVector;
// 文档自定义字段
@Builder.Default private Map<String, Object> fields = new HashMap<>();
// 向量相似度
private float score;
public void addField(String key, String value) {
this.fields.put(key, value);
}
public void addField(String key, Integer value) {
this.fields.put(key, value);
}
public void addField(String key, Float value) {
this.fields.put(key, value);
}
public void addField(String key, Boolean value) {
this.fields.put(key, value);
}
}
DocOpResult
@dataclass(frozen=True)
class DocOpResult(object):
doc_op: DocOp
id: str
code: int
message: str
@Getter
@Builder
@AllArgsConstructor
@NoArgsConstructor
public class DocOpResult implements Serializable {
@JsonProperty("doc_op")
private com.aliyun.dashvector.proto.DocOpResult.DocOp docOp;
private String id;
private int code;
private String message;
public DocOpResult(com.aliyun.dashvector.proto.DocOpResult docOpResult) {
this.docOp = docOpResult.getDocOp();
this.id = docOpResult.getId();
this.code = docOpResult.getCode();
this.message = docOpResult.getMessage();
}
}
CollectionMeta
@dataclass(frozen=True)
class CollectionMeta(object):
name: str # Collection名称
dimension: int # 向量维度
dtype: str # 向量数据类型,FLOAT/INT
metric: str # 距离度量方式,euclidean/dotproduct/cosine
status: Status # Collection状态
fields: Dict[str, str] # Collection Fields定义,字典value可选值: FLOAT/BOOL/INT/STRING
partitions: Dict[str, Status] # Collection 分区信息
@Getter
public class CollectionMeta {
// Collection名称
private final String name;
// 向量维度
private final int dimension;
// 向量数据类型,FLOAT/INT
private final CollectionInfo.DataType dataType;
// 距离度量方式,euclidean/dotproduct/cosine
private final CollectionInfo.Metric metric;
// Collection状态
private final String status;
// Collection Fields定义,字典value可选值: FLOAT/BOOL/INT/STRIN
private final Map<String, FieldType> fieldsSchema;
// Collection 分区信息
private final Map<String, Status> partitionStatus;
public CollectionMeta(CollectionInfo collectionInfo) {
this.name = collectionInfo.getName();
this.dimension = collectionInfo.getDimension();
this.dataType = collectionInfo.getDtype();
this.metric = collectionInfo.getMetric();
this.status = collectionInfo.getStatus().name();
this.fieldsSchema = collectionInfo.getFieldsSchemaMap();
this.partitionStatus = collectionInfo.getPartitionsMap();
}
}
CollectionStats
@dataclass(frozen=True)
class CollectionStats(object):
total_doc_count: int # Collection 插入数据总量
index_completeness: float # Collection 插入数据完成度
partitions: Dict[str, PartitionStats] # Collection 分区信息
@Getter
public class CollectionStats {
// Collection 插入数据总数
private final long totalDocCount;
// Collection 插入数据完成度
private final float indexCompleteness;
// Collection 分区信息
private final Map<String, PartitionStats> partitions;
public CollectionStats(StatsCollectionResponse.CollectionStats collectionStats) {
this.totalDocCount = collectionStats.getTotalDocCount();
this.indexCompleteness = collectionStats.getIndexCompleteness();
this.partitions = new HashMap<>();
collectionStats
.getPartitionsMap()
.forEach((key, value) -> this.partitions.put(key, new PartitionStats(value)));
}
}
PartitionStats
@dataclass(frozen=True)
class PartitionStats(object):
total_doc_count: int # Partition 分区内数据总量
@Getter
public class PartitionStats {
// Partition 分区内数据总量
private final long totalDocCount;
public PartitionStats(com.aliyun.dashvector.proto.PartitionStats partitionStats) {
this.totalDocCount = partitionStats.getTotalDocCount();
}
}
Status
class Status(IntEnum):
INITIALIZED = 0 # Collection/Partition 创建中
SERVING = 1 # Collection/Partition 服务中
DROPPING = 2 # Collection/Partition 删除中
ERROR = 3 # Collection/Partition 状态异常
Group
@dataclass(frozen=True)
class Group(object):
group_id: str # 分组标识
docs: List[Doc] # 分组下的文档列表
@Getter
@Builder
public class Group {
// 分组标识
@NonNull private String groupId;
// 分组下的文档列表
@Singular private List<Doc> docs;
}
其他
FieldDataType = Dict[str, Union[Type[str], Type[int], Type[float], Type[bool]]]