diff --git a/client/column/columns.go b/client/column/columns.go index 3e04557d95d51..79669d8f279dd 100644 --- a/client/column/columns.go +++ b/client/column/columns.go @@ -284,6 +284,7 @@ func FieldDataColumn(fd *schemapb.FieldData, begin, end int) (Column, error) { vector = append(vector, v) } return NewColumnBFloat16Vector(fd.GetFieldName(), dim, vector), nil + case schemapb.DataType_SparseFloatVector: sparseVectors := fd.GetVectors().GetSparseFloatVector() if sparseVectors == nil { @@ -303,6 +304,29 @@ func FieldDataColumn(fd *schemapb.FieldData, begin, end int) (Column, error) { vectors = append(vectors, vector) } return NewColumnSparseVectors(fd.GetFieldName(), vectors), nil + + case schemapb.DataType_Int8Vector: + vectors := fd.GetVectors() + x, ok := vectors.GetData().(*schemapb.VectorField_Int8Vector) + if !ok { + return nil, errFieldDataTypeNotMatch + } + data := x.Int8Vector + dim := int(vectors.GetDim()) + if end < 0 { + end = len(data) / dim + } + vector := make([][]int8, 0, end-begin) // shall not have remanunt + // TODO caiyd: has better way to convert []byte to []int8 ? + for i := begin; i < end; i++ { + v := make([]int8, dim) + for j := 0; j < dim; j++ { + v[j] = int8(data[i*dim+j]) + } + vector = append(vector, v) + } + return NewColumnInt8Vector(fd.GetFieldName(), dim, vector), nil + default: return nil, fmt.Errorf("unsupported data type %s", fd.GetType()) } diff --git a/client/column/vector.go b/client/column/vector.go index e1f7882cbb730..3f912cdfd18db 100644 --- a/client/column/vector.go +++ b/client/column/vector.go @@ -213,3 +213,36 @@ func (c *ColumnBFloat16Vector) Slice(start, end int) Column { vectorBase: c.vectorBase.slice(start, end), } } + +/* int8 vector */ + +type ColumnInt8Vector struct { + *vectorBase[entity.Int8Vector] +} + +func NewColumnInt8Vector(fieldName string, dim int, data [][]int8) *ColumnInt8Vector { + vectors := lo.Map(data, func(row []int8, _ int) entity.Int8Vector { return entity.Int8Vector(row) }) + return &ColumnInt8Vector{ + vectorBase: newVectorBase(fieldName, dim, vectors, entity.FieldTypeInt8Vector), + } +} + +// AppendValue appends vector value into values. +// override default type constrains, add `[]int8` conversion +func (c *ColumnInt8Vector) AppendValue(i interface{}) error { + switch vector := i.(type) { + case entity.Int8Vector: + c.values = append(c.values, vector) + case []int8: + c.values = append(c.values, vector) + default: + return errors.Newf("unexpected append value type %T, field type %v", vector, c.fieldType) + } + return nil +} + +func (c *ColumnInt8Vector) Slice(start, end int) Column { + return &ColumnInt8Vector{ + vectorBase: c.vectorBase.slice(start, end), + } +} diff --git a/client/column/vector_test.go b/client/column/vector_test.go index 70acebb12119d..e1be32fefe93a 100644 --- a/client/column/vector_test.go +++ b/client/column/vector_test.go @@ -187,6 +187,38 @@ func (s *VectorSuite) TestBasic() { } } }) + + s.Run("int8_vector", func() { + name := fmt.Sprintf("field_%d", rand.Intn(1000)) + n := 3 + dim := rand.Intn(10) + 2 + data := make([][]int8, 0, n) + for i := 0; i < n; i++ { + row := lo.RepeatBy(dim, func(i int) int8 { + return int8(rand.Intn(256) - 128) + }) + data = append(data, row) + } + column := NewColumnInt8Vector(name, dim, data) + s.Equal(entity.FieldTypeInt8Vector, column.Type()) + s.Equal(name, column.Name()) + s.Equal(lo.Map(data, func(row []int8, _ int) entity.Int8Vector { return entity.Int8Vector(row) }), column.Data()) + s.Equal(dim, column.Dim()) + + fd := column.FieldData() + s.Equal(name, fd.GetFieldName()) + s.Equal(lo.Flatten(data), fd.GetVectors().GetInt8Vector()) + + result, err := FieldDataColumn(fd, 0, -1) + s.NoError(err) + parsed, ok := result.(*ColumnInt8Vector) + if s.True(ok) { + s.Equal(entity.FieldTypeInt8Vector, parsed.Type()) + s.Equal(name, parsed.Name()) + s.Equal(lo.Map(data, func(row []int8, _ int) entity.Int8Vector { return entity.Int8Vector(row) }), parsed.Data()) + s.Equal(dim, parsed.Dim()) + } + }) } func (s *VectorSuite) TestSlice() { @@ -277,6 +309,28 @@ func (s *VectorSuite) TestSlice() { s.Equal(lo.Map(data[:l], func(row []byte, _ int) entity.BFloat16Vector { return entity.BFloat16Vector(row) }), slicedColumn.Data()) } }) + + s.Run("int8_vector", func() { + name := fmt.Sprintf("field_%d", rand.Intn(1000)) + n := 100 + dim := rand.Intn(10) + 2 + data := make([][]int8, 0, n) + for i := 0; i < n; i++ { + row := lo.RepeatBy(dim, func(i int) int8 { + return int8(rand.Intn(256) - 128) + }) + data = append(data, row) + } + column := NewColumnInt8Vector(name, dim, data) + + l := rand.Intn(n) + sliced := column.Slice(0, l) + slicedColumn, ok := sliced.(*ColumnInt8Vector) + if s.True(ok) { + s.Equal(dim, slicedColumn.Dim()) + s.Equal(lo.Map(data[:l], func(row []int8, _ int) entity.Int8Vector { return entity.Int8Vector(row) }), slicedColumn.Data()) + } + }) } func TestVectors(t *testing.T) { diff --git a/client/entity/field.go b/client/entity/field.go index fa910d8e5c17a..9ef956ff8b32f 100644 --- a/client/entity/field.go +++ b/client/entity/field.go @@ -62,6 +62,8 @@ func (t FieldType) Name() string { return "Float16Vector" case FieldTypeBFloat16Vector: return "BFloat16Vector" + case FieldTypeInt8Vector: + return "Int8Vector" default: return "undefined" } @@ -100,6 +102,8 @@ func (t FieldType) String() string { return "[]byte" case FieldTypeBFloat16Vector: return "[]byte" + case FieldTypeInt8Vector: + return "[]int8" default: return "undefined" } diff --git a/client/entity/vectors.go b/client/entity/vectors.go index b8e101bc49c17..0b7bf002c4d27 100644 --- a/client/entity/vectors.go +++ b/client/entity/vectors.go @@ -56,7 +56,7 @@ func (fv FloatVector) ToBFloat16Vector() BFloat16Vector { return typeutil.Float32ArrayToBFloat16Bytes(fv) } -// FloatVector float32 vector wrapper. +// Float16Vector float16 vector wrapper. type Float16Vector []byte // Dim returns vector dimension. @@ -77,7 +77,7 @@ func (fv Float16Vector) ToFloat32Vector() FloatVector { return typeutil.Float16BytesToFloat32Vector(fv) } -// FloatVector float32 vector wrapper. +// BFloat16Vector bfloat16 vector wrapper. type BFloat16Vector []byte // Dim returns vector dimension. @@ -131,3 +131,21 @@ func (t Text) FieldType() FieldType { func (t Text) Serialize() []byte { return []byte(t) } + +// Int8Vector []int8 vector wrapper +type Int8Vector []int8 + +// Dim return vector dimension +func (iv Int8Vector) Dim() int { + return len(iv) +} + +// Serialize just return bytes +func (iv Int8Vector) Serialize() []byte { + return typeutil.Int8ArrayToBytes(iv) +} + +// entity.FieldType returns coresponding field type. +func (iv Int8Vector) FieldType() FieldType { + return FieldTypeInt8Vector +} diff --git a/client/entity/vectors_test.go b/client/entity/vectors_test.go index 861ab56563336..0c6eabbd2e5b3 100644 --- a/client/entity/vectors_test.go +++ b/client/entity/vectors_test.go @@ -92,4 +92,15 @@ func TestVectors(t *testing.T) { assert.Equal(t, dim*8, bv.Dim()) assert.ElementsMatch(t, raw, bv.Serialize()) }) + + t.Run("test int8 vector", func(t *testing.T) { + raw := make([]int8, dim) + for i := 0; i < dim; i++ { + raw[i] = int8(rand.Intn(256) - 128) + } + + iv := Int8Vector(raw) + assert.Equal(t, dim, iv.Dim()) + assert.Equal(t, dim, len(iv.Serialize())) + }) } diff --git a/client/go.mod b/client/go.mod index 07492c7b2bd9b..4a50c97f1ae50 100644 --- a/client/go.mod +++ b/client/go.mod @@ -6,14 +6,13 @@ require ( github.com/blang/semver/v4 v4.0.0 github.com/cockroachdb/errors v1.9.1 github.com/grpc-ecosystem/go-grpc-middleware v1.3.0 - github.com/milvus-io/milvus-proto/go-api/v2 v2.5.0-beta.0.20241211060635-410431d7865b - github.com/milvus-io/milvus/pkg v0.0.2-0.20241126032235-cb6542339e84 + github.com/milvus-io/milvus-proto/go-api/v2 v2.5.0-beta.0.20250102080446-c3ba3d26a90f + github.com/milvus-io/milvus/pkg v0.0.2-0.20250115044500-f5234c3c11a3 github.com/quasilyte/go-ruleguard/dsl v0.3.22 github.com/samber/lo v1.27.0 github.com/stretchr/testify v1.9.0 github.com/tidwall/gjson v1.17.1 go.uber.org/atomic v1.10.0 - golang.org/x/exp v0.0.0-20230224173230-c95f2b4c22f2 google.golang.org/grpc v1.65.0 google.golang.org/protobuf v1.34.2 ) @@ -99,6 +98,7 @@ require ( go.uber.org/multierr v1.11.0 // indirect go.uber.org/zap v1.27.0 // indirect golang.org/x/crypto v0.31.0 // indirect + golang.org/x/exp v0.0.0-20230224173230-c95f2b4c22f2 // indirect golang.org/x/net v0.33.0 // indirect golang.org/x/sync v0.10.0 // indirect golang.org/x/sys v0.28.0 // indirect diff --git a/client/go.sum b/client/go.sum index d4d7e5a1601a5..70171495c36d1 100644 --- a/client/go.sum +++ b/client/go.sum @@ -318,10 +318,10 @@ github.com/matttproud/golang_protobuf_extensions v1.0.4/go.mod h1:BSXmuO+STAnVfr github.com/mediocregopher/radix/v3 v3.4.2/go.mod h1:8FL3F6UQRXHXIBSPUs5h0RybMF8i4n7wVopoX3x7Bv8= github.com/microcosm-cc/bluemonday v1.0.2/go.mod h1:iVP4YcDBq+n/5fb23BhYFvIMq/leAFZyRl6bYmGDlGc= github.com/miekg/dns v1.0.14/go.mod h1:W1PPwlIAgtquWBMBEV9nkV9Cazfe8ScdGz/Lj7v3Nrg= -github.com/milvus-io/milvus-proto/go-api/v2 v2.5.0-beta.0.20241211060635-410431d7865b h1:iPPhnFx+s7FF53UeWj7A4EYhPRMFPL6mHqyQw7qRjeQ= -github.com/milvus-io/milvus-proto/go-api/v2 v2.5.0-beta.0.20241211060635-410431d7865b/go.mod h1:/6UT4zZl6awVeXLeE7UGDWZvXj3IWkRsh3mqsn0DiAs= -github.com/milvus-io/milvus/pkg v0.0.2-0.20241126032235-cb6542339e84 h1:EAFxmxUVp5yYFDCrX1MQoSxkTO+ycy8NXEqEDEB3cRM= -github.com/milvus-io/milvus/pkg v0.0.2-0.20241126032235-cb6542339e84/go.mod h1:RATa0GS4jhkPpsYOvQ/QvcNz8rd+TlRPDiSyXQnMMxs= +github.com/milvus-io/milvus-proto/go-api/v2 v2.5.0-beta.0.20250102080446-c3ba3d26a90f h1:So6RKU5wqP/8EaKogicJP8gZ2SrzzS/JprusBaE3RKc= +github.com/milvus-io/milvus-proto/go-api/v2 v2.5.0-beta.0.20250102080446-c3ba3d26a90f/go.mod h1:/6UT4zZl6awVeXLeE7UGDWZvXj3IWkRsh3mqsn0DiAs= +github.com/milvus-io/milvus/pkg v0.0.2-0.20250115044500-f5234c3c11a3 h1:WF9BkNk1XjLtwMbaB/cniRBMMNLnqG6e+AUbK8DciHQ= +github.com/milvus-io/milvus/pkg v0.0.2-0.20250115044500-f5234c3c11a3/go.mod h1:nxnHkDFB3jh27nTQJBaC4azAQO8chT03DkmoiZ5086s= github.com/mitchellh/cli v1.0.0/go.mod h1:hNIlj7HEI86fIcpObd7a0FcrxTWetlwJDGcceTlRvqc= github.com/mitchellh/go-homedir v1.0.0/go.mod h1:SfyaCUpYCn1Vlf4IUYiD9fPX4A5wJrkLzIz1N1q0pr0= github.com/mitchellh/go-homedir v1.1.0/go.mod h1:SfyaCUpYCn1Vlf4IUYiD9fPX4A5wJrkLzIz1N1q0pr0= diff --git a/internal/distributed/proxy/httpserver/handler_v1_test.go b/internal/distributed/proxy/httpserver/handler_v1_test.go index ef7d206c320d1..bb20a188a838e 100644 --- a/internal/distributed/proxy/httpserver/handler_v1_test.go +++ b/internal/distributed/proxy/httpserver/handler_v1_test.go @@ -1270,7 +1270,8 @@ func TestFp16Bf16VectorsV1(t *testing.T) { "binaryVector": "AQ==", "float16Vector": [3.0], "bfloat16Vector": [4.4, 442], - "sparseFloatVector": {"1": 0.1, "2": 0.44} + "sparseFloatVector": {"1": 0.1, "2": 0.44}, + "int8Vector": [1, 2] } ] }`), @@ -1289,7 +1290,8 @@ func TestFp16Bf16VectorsV1(t *testing.T) { "binaryVector": "AQ==", "float16Vector": [3, 3.0], "bfloat16Vector": [4.4, 442], - "sparseFloatVector": {"1": 0.1, "2": 0.44} + "sparseFloatVector": {"1": 0.1, "2": 0.44}, + "int8Vector": [1, 2] } ] }`), @@ -1308,7 +1310,8 @@ func TestFp16Bf16VectorsV1(t *testing.T) { "binaryVector": "AQ==", "float16Vector": [3, 3], "bfloat16Vector": [4.4, 442], - "sparseFloatVector": {"1": 0.1, "2": 0.44} + "sparseFloatVector": {"1": 0.1, "2": 0.44}, + "int8Vector": [1, 2] } ] }`), @@ -1326,7 +1329,8 @@ func TestFp16Bf16VectorsV1(t *testing.T) { "binaryVector": "AQ==", "float16Vector": "AQIDBA==", "bfloat16Vector": "AQIDBA==", - "sparseFloatVector": {"1": 0.1, "2": 0.44} + "sparseFloatVector": {"1": 0.1, "2": 0.44}, + "int8Vector": [1, 2] } ] }`), @@ -1344,7 +1348,8 @@ func TestFp16Bf16VectorsV1(t *testing.T) { "binaryVector": "AQ==", "float16Vector": [3, 3.0, 3], "bfloat16Vector": [4.4, 44], - "sparseFloatVector": {"1": 0.1, "2": 0.44} + "sparseFloatVector": {"1": 0.1, "2": 0.44}, + "int8Vector": [1, 2] } ] }`), @@ -1363,7 +1368,8 @@ func TestFp16Bf16VectorsV1(t *testing.T) { "binaryVector": "AQ==", "float16Vector": [3, 3.0], "bfloat16Vector": [4.4, 442, 44], - "sparseFloatVector": {"1": 0.1, "2": 0.44} + "sparseFloatVector": {"1": 0.1, "2": 0.44}, + "int8Vector": [1, 2] } ] }`), @@ -1382,7 +1388,8 @@ func TestFp16Bf16VectorsV1(t *testing.T) { "binaryVector": "AQ==", "float16Vector": "AQIDBA==", "bfloat16Vector": [4.4, 442], - "sparseFloatVector": {"1": 0.1, "2": 0.44} + "sparseFloatVector": {"1": 0.1, "2": 0.44}, + "int8Vector": [1, 2] }, { "book_id": 1, @@ -1391,7 +1398,8 @@ func TestFp16Bf16VectorsV1(t *testing.T) { "binaryVector": "AQ==", "float16Vector": [3.1, 3.1], "bfloat16Vector": "AQIDBA==", - "sparseFloatVector": {"3": 1.1, "2": 0.44} + "sparseFloatVector": {"3": 1.1, "2": 0.44}, + "int8Vector": [1, 2] } ] }`), diff --git a/internal/distributed/proxy/httpserver/handler_v2_test.go b/internal/distributed/proxy/httpserver/handler_v2_test.go index 44eeb6cb5640b..28d15678d7d6b 100644 --- a/internal/distributed/proxy/httpserver/handler_v2_test.go +++ b/internal/distributed/proxy/httpserver/handler_v2_test.go @@ -2000,10 +2000,13 @@ func generateCollectionSchemaWithVectorFields() *schemapb.CollectionSchema { bfloat16VectorField.Name = "bfloat16Vector" sparseFloatVectorField := generateVectorFieldSchema(schemapb.DataType_SparseFloatVector) sparseFloatVectorField.Name = "sparseFloatVector" + int8VectorField := generateVectorFieldSchema(schemapb.DataType_Int8Vector) + int8VectorField.Name = "int8Vector" collSchema.Fields = append(collSchema.Fields, binaryVectorField) collSchema.Fields = append(collSchema.Fields, float16VectorField) collSchema.Fields = append(collSchema.Fields, bfloat16VectorField) collSchema.Fields = append(collSchema.Fields, sparseFloatVectorField) + collSchema.Fields = append(collSchema.Fields, int8VectorField) return collSchema } @@ -2031,7 +2034,8 @@ func TestFp16Bf16VectorsV2(t *testing.T) { "binaryVector": "AQ==", "float16Vector": [3.0], "bfloat16Vector": [4.4, 442], - "sparseFloatVector": {"1": 0.1, "2": 0.44} + "sparseFloatVector": {"1": 0.1, "2": 0.44}, + "int8Vector": [1, 2] } ] }`), @@ -2050,7 +2054,8 @@ func TestFp16Bf16VectorsV2(t *testing.T) { "binaryVector": "AQ==", "float16Vector": [3, 3.0], "bfloat16Vector": [4.4, 442], - "sparseFloatVector": {"1": 0.1, "2": 0.44} + "sparseFloatVector": {"1": 0.1, "2": 0.44}, + "int8Vector": [1, 2] } ] }`), @@ -2068,7 +2073,8 @@ func TestFp16Bf16VectorsV2(t *testing.T) { "binaryVector": "AQ==", "float16Vector": [3, 3], "bfloat16Vector": [4.4, 442], - "sparseFloatVector": {"1": 0.1, "2": 0.44} + "sparseFloatVector": {"1": 0.1, "2": 0.44}, + "int8Vector": [1, 2] } ] }`), @@ -2085,7 +2091,8 @@ func TestFp16Bf16VectorsV2(t *testing.T) { "binaryVector": "AQ==", "float16Vector": "AQIDBA==", "bfloat16Vector": "AQIDBA==", - "sparseFloatVector": {"1": 0.1, "2": 0.44} + "sparseFloatVector": {"1": 0.1, "2": 0.44}, + "int8Vector": [1, 2] } ] }`), @@ -2102,7 +2109,8 @@ func TestFp16Bf16VectorsV2(t *testing.T) { "binaryVector": "AQ==", "float16Vector": [3, 3.0, 3], "bfloat16Vector": [4.4, 44], - "sparseFloatVector": {"1": 0.1, "2": 0.44} + "sparseFloatVector": {"1": 0.1, "2": 0.44}, + "int8Vector": [1, 2] } ] }`), @@ -2121,7 +2129,8 @@ func TestFp16Bf16VectorsV2(t *testing.T) { "binaryVector": "AQ==", "float16Vector": [3, 3.0], "bfloat16Vector": [4.4, 442, 44], - "sparseFloatVector": {"1": 0.1, "2": 0.44} + "sparseFloatVector": {"1": 0.1, "2": 0.44}, + "int8Vector": [1, 2] } ] }`), @@ -2140,7 +2149,8 @@ func TestFp16Bf16VectorsV2(t *testing.T) { "binaryVector": "AQ==", "float16Vector": "AQIDBA==", "bfloat16Vector": [4.4, 442], - "sparseFloatVector": {"1": 0.1, "2": 0.44} + "sparseFloatVector": {"1": 0.1, "2": 0.44}, + "int8Vector": [1, 2] }, { "book_id": 1, @@ -2149,7 +2159,8 @@ func TestFp16Bf16VectorsV2(t *testing.T) { "binaryVector": "AQ==", "float16Vector": [3.1, 3.1], "bfloat16Vector": "AQIDBA==", - "sparseFloatVector": {"3": 1.1, "2": 0.44} + "sparseFloatVector": {"3": 1.1, "2": 0.44}, + "int8Vector": [1, 2] } ] }`), diff --git a/internal/distributed/proxy/httpserver/utils.go b/internal/distributed/proxy/httpserver/utils.go index 9ed396d2203f3..86a092e9d4c0b 100644 --- a/internal/distributed/proxy/httpserver/utils.go +++ b/internal/distributed/proxy/httpserver/utils.go @@ -397,6 +397,16 @@ func checkAndSetData(body string, collSchema *schemapb.CollectionSchema) (error, } else { return merr.WrapErrParameterInvalid(schemapb.DataType_name[int32(fieldType)], dataString, "invalid vector field: "+fieldName), reallyDataArray, validDataMap } + case schemapb.DataType_Int8Vector: + if dataString == "" { + return merr.WrapErrParameterInvalid(schemapb.DataType_name[int32(fieldType)], "", "missing vector field: "+fieldName), reallyDataArray, validDataMap + } + var vectorArray []int8 + err := json.Unmarshal([]byte(dataString), &vectorArray) + if err != nil { + return merr.WrapErrParameterInvalid(schemapb.DataType_name[int32(fieldType)], dataString, err.Error()), reallyDataArray, validDataMap + } + reallyData[fieldName] = vectorArray case schemapb.DataType_Bool: result, err := cast.ToBoolE(dataString) if err != nil { @@ -664,6 +674,20 @@ func convertBinaryVectorToArray(vector [][]byte, dim int64, dataType schemapb.Da return binaryArray, nil } +func convertInt8VectorToArray(vector [][]int8, dim int64) ([]byte, error) { + byteArray := make([]byte, 0) + for _, arr := range vector { + if int64(len(arr)) != dim { + return nil, fmt.Errorf("[]int8 size %d doesn't equal to vector dimension %d of %s", + len(arr), dim, schemapb.DataType_name[int32(schemapb.DataType_Int8Vector)]) + } + for i := int64(0); i < dim; i++ { + byteArray = append(byteArray, byte(arr[i])) + } + } + return byteArray, nil +} + type fieldCandi struct { name string v reflect.Value @@ -770,6 +794,10 @@ func anyToColumns(rows []map[string]interface{}, validDataMap map[string][]bool, case schemapb.DataType_SparseFloatVector: data = make([][]byte, 0, rowsLen) nameDims[field.Name] = int64(0) + case schemapb.DataType_Int8Vector: + data = make([][]int8, 0, rowsLen) + dim, _ := getDim(field) + nameDims[field.Name] = dim default: return nil, fmt.Errorf("the type(%v) of field(%v) is not supported, use other sdk please", field.DataType, field.Name) } @@ -871,6 +899,8 @@ func anyToColumns(rows []map[string]interface{}, validDataMap map[string][]bool, nameDims[field.Name] = rowSparseDim } nameColumns[field.Name] = append(nameColumns[field.Name].([][]byte), content) + case schemapb.DataType_Int8Vector: + nameColumns[field.Name] = append(nameColumns[field.Name].([][]int8), candi.v.Interface().([]int8)) default: return nil, fmt.Errorf("the type(%v) of field(%v) is not supported, use other sdk please", field.DataType, field.Name) } @@ -1074,6 +1104,20 @@ func anyToColumns(rows []map[string]interface{}, validDataMap map[string][]bool, }, }, } + case schemapb.DataType_Int8Vector: + dim := nameDims[name] + arr, err := convertInt8VectorToArray(column.([][]int8), dim) + if err != nil { + return nil, err + } + colData.Field = &schemapb.FieldData_Vectors{ + Vectors: &schemapb.VectorField{ + Dim: dim, + Data: &schemapb.VectorField_Int8Vector{ + Int8Vector: arr, + }, + }, + } default: return nil, fmt.Errorf("the type(%v) of field(%v) is not supported, use other sdk please", colData.Type, name) } @@ -1163,6 +1207,24 @@ func serializeSparseFloatVectors(vectors []gjson.Result, dataType schemapb.DataT return values, nil } +func serializeInt8Vectors(vectorStr string, dataType schemapb.DataType, dimension int64, int8ArrayToBytesFunc func([]int8) []byte) ([][]byte, error) { + var int8Values [][]int8 + err := json.Unmarshal([]byte(vectorStr), &int8Values) + if err != nil { + return nil, merr.WrapErrParameterInvalid(schemapb.DataType_name[int32(dataType)], vectorStr, err.Error()) + } + values := make([][]byte, 0, len(int8Values)) + for _, vectorArray := range int8Values { + if int64(len(vectorArray)) != dimension { + return nil, merr.WrapErrParameterInvalid(schemapb.DataType_name[int32(dataType)], vectorStr, + fmt.Sprintf("dimension: %d, but length of []int8: %d", dimension, len(vectorArray))) + } + vectorBytes := int8ArrayToBytesFunc(vectorArray) + values = append(values, vectorBytes) + } + return values, nil +} + func convertQueries2Placeholder(body string, dataType schemapb.DataType, dimension int64) (*commonpb.PlaceholderValue, error) { var valueType commonpb.PlaceholderType var values [][]byte @@ -1183,6 +1245,9 @@ func convertQueries2Placeholder(body string, dataType schemapb.DataType, dimensi case schemapb.DataType_SparseFloatVector: valueType = commonpb.PlaceholderType_SparseFloatVector values, err = serializeSparseFloatVectors(gjson.Get(body, HTTPRequestData).Array(), dataType) + case schemapb.DataType_Int8Vector: + valueType = commonpb.PlaceholderType_Int8Vector + values, err = serializeInt8Vectors(gjson.Get(body, HTTPRequestData).Raw, dataType, dimension, typeutil.Int8ArrayToBytes) case schemapb.DataType_VarChar: valueType = commonpb.PlaceholderType_VarChar res := gjson.Get(body, HTTPRequestData).Array() @@ -1280,6 +1345,8 @@ func buildQueryResp(rowsNum int64, needFields []string, fieldDataList []*schemap rowsNum = int64(len(fieldDataList[0].GetVectors().GetBfloat16Vector())/2) / fieldDataList[0].GetVectors().GetDim() case schemapb.DataType_SparseFloatVector: rowsNum = int64(len(fieldDataList[0].GetVectors().GetSparseFloatVector().Contents)) + case schemapb.DataType_Int8Vector: + rowsNum = int64(len(fieldDataList[0].GetVectors().GetInt8Vector())) / fieldDataList[0].GetVectors().GetDim() default: return nil, fmt.Errorf("the type(%v) of field(%v) is not supported, use other sdk please", fieldDataList[0].Type, fieldDataList[0].FieldName) } @@ -1374,6 +1441,8 @@ func buildQueryResp(rowsNum int64, needFields []string, fieldDataList []*schemap row[fieldDataList[j].FieldName] = fieldDataList[j].GetVectors().GetBfloat16Vector()[i*(fieldDataList[j].GetVectors().GetDim()*2) : (i+1)*(fieldDataList[j].GetVectors().GetDim()*2)] case schemapb.DataType_SparseFloatVector: row[fieldDataList[j].FieldName] = typeutil.SparseFloatBytesToMap(fieldDataList[j].GetVectors().GetSparseFloatVector().Contents[i]) + case schemapb.DataType_Int8Vector: + row[fieldDataList[j].FieldName] = fieldDataList[j].GetVectors().GetInt8Vector()[i*fieldDataList[j].GetVectors().GetDim() : (i+1)*fieldDataList[j].GetVectors().GetDim()] case schemapb.DataType_Array: if len(fieldDataList[j].ValidData) != 0 && !fieldDataList[j].ValidData[i] { row[fieldDataList[j].FieldName] = nil diff --git a/internal/distributed/proxy/httpserver/utils_test.go b/internal/distributed/proxy/httpserver/utils_test.go index 78004ebaea87c..939319c941015 100644 --- a/internal/distributed/proxy/httpserver/utils_test.go +++ b/internal/distributed/proxy/httpserver/utils_test.go @@ -287,6 +287,20 @@ func generateVectorFieldData(vectorType schemapb.DataType) schemapb.FieldData { }, IsDynamic: false, } + case schemapb.DataType_Int8Vector: + return schemapb.FieldData{ + Type: schemapb.DataType_Int8Vector, + FieldName: FieldBookIntro, + Field: &schemapb.FieldData_Vectors{ + Vectors: &schemapb.VectorField{ + Dim: 2, + Data: &schemapb.VectorField_Int8Vector{ + Int8Vector: []byte{0x00, 0x1, 0x2, 0x3, 0x4, 0x5}, + }, + }, + }, + IsDynamic: false, + } default: panic("unsupported vector type") } @@ -735,6 +749,8 @@ func TestCheckAndSetData(t *testing.T) { float16VectorField.Name = "float16Vector" bfloat16VectorField := generateVectorFieldSchema(schemapb.DataType_BFloat16Vector) bfloat16VectorField.Name = "bfloat16Vector" + int8VectorField := generateVectorFieldSchema(schemapb.DataType_Int8Vector) + int8VectorField.Name = "int8Vector" err, _, _ = checkAndSetData(body, &schemapb.CollectionSchema{ Name: DefaultCollectionName, Fields: []*schemapb.FieldSchema{ @@ -771,6 +787,15 @@ func TestCheckAndSetData(t *testing.T) { }) assert.Error(t, err) assert.Equal(t, true, strings.HasPrefix(err.Error(), "missing vector field")) + err, _, _ = checkAndSetData(body, &schemapb.CollectionSchema{ + Name: DefaultCollectionName, + Fields: []*schemapb.FieldSchema{ + primaryField, int8VectorField, + }, + EnableDynamicField: true, + }) + assert.Error(t, err) + assert.Equal(t, true, strings.HasPrefix(err.Error(), "missing vector field")) }) t.Run("with pk when autoID == True when upsert", func(t *testing.T) { @@ -974,6 +999,27 @@ func TestSerialize(t *testing.T) { }) assert.Nil(t, err) } + + { + request := map[string]interface{}{ + HTTPRequestData: []interface{}{ + []int8{1, 2}, + }, + } + requestBody, _ := json.Marshal(request) + values, err = serializeInt8Vectors(gjson.Get(string(requestBody), HTTPRequestData).Raw, schemapb.DataType_Int8Vector, 2, typeutil.Int8ArrayToBytes) + assert.Nil(t, err) + placeholderValue = &commonpb.PlaceholderValue{ + Tag: "$0", + Values: values, + } + _, err = proto.Marshal(&commonpb.PlaceholderGroup{ + Placeholders: []*commonpb.PlaceholderValue{ + placeholderValue, + }, + }) + assert.Nil(t, err) + } } func TestConvertQueries2Placeholder(t *testing.T) { @@ -1611,6 +1657,9 @@ func newFieldData(fieldDatas []*schemapb.FieldData, firstFieldType schemapb.Data case schemapb.DataType_BFloat16Vector: vectorField := generateVectorFieldData(firstFieldType) return []*schemapb.FieldData{&vectorField} + case schemapb.DataType_Int8Vector: + vectorField := generateVectorFieldData(firstFieldType) + return []*schemapb.FieldData{&vectorField} case schemapb.DataType_Array: return []*schemapb.FieldData{&fieldData10} case schemapb.DataType_JSON: @@ -1850,6 +1899,9 @@ func newNullableFieldData(fieldDatas []*schemapb.FieldData, firstFieldType schem case schemapb.DataType_BFloat16Vector: vectorField := generateVectorFieldData(firstFieldType) return []*schemapb.FieldData{&vectorField} + case schemapb.DataType_Int8Vector: + vectorField := generateVectorFieldData(firstFieldType) + return []*schemapb.FieldData{&vectorField} case schemapb.DataType_Array: return []*schemapb.FieldData{&fieldData10} case schemapb.DataType_JSON: @@ -2047,6 +2099,7 @@ func TestVector(t *testing.T) { float16Vector := "vector-float16" bfloat16Vector := "vector-bfloat16" sparseFloatVector := "vector-sparse-float" + int8Vector := "vector-int8" testcaseRows := []map[string]interface{}{ { FieldBookID: int64(1), @@ -2055,6 +2108,7 @@ func TestVector(t *testing.T) { float16Vector: []byte{1, 1, 11, 11}, bfloat16Vector: []byte{1, 1, 11, 11}, sparseFloatVector: map[uint32]float32{0: 0.1, 1: 0.11}, + int8Vector: []int8{1, 11}, }, { FieldBookID: int64(2), @@ -2063,6 +2117,7 @@ func TestVector(t *testing.T) { float16Vector: []byte{2, 2, 22, 22}, bfloat16Vector: []byte{2, 2, 22, 22}, sparseFloatVector: map[uint32]float32{1000: 0.3, 200: 0.44}, + int8Vector: []int8{2, 22}, }, { FieldBookID: int64(3), @@ -2071,6 +2126,7 @@ func TestVector(t *testing.T) { float16Vector: []byte{3, 3, 33, 33}, bfloat16Vector: []byte{3, 3, 33, 33}, sparseFloatVector: map[uint32]float32{987621: 32190.31, 32189: 0.0001}, + int8Vector: []int8{3, 33}, }, { FieldBookID: int64(4), @@ -2079,6 +2135,7 @@ func TestVector(t *testing.T) { float16Vector: []float32{0.4, 0.44}, bfloat16Vector: []float32{0.4, 0.44}, sparseFloatVector: map[uint32]float32{25: 0.1, 1: 0.11}, + int8Vector: []int8{4, 44}, }, { FieldBookID: int64(5), @@ -2087,6 +2144,7 @@ func TestVector(t *testing.T) { float16Vector: []int64{99999999, -99999999}, bfloat16Vector: []int64{99999999, -99999999}, sparseFloatVector: map[uint32]float32{1121: 0.1, 3: 0.11}, + int8Vector: []int8{-128, 127}, }, } body, err := wrapRequestBody(testcaseRows) @@ -2102,6 +2160,8 @@ func TestVector(t *testing.T) { bfloat16VectorField.Name = bfloat16Vector sparseFloatVectorField := generateVectorFieldSchema(schemapb.DataType_SparseFloatVector) sparseFloatVectorField.Name = sparseFloatVector + int8VectorField := generateVectorFieldSchema(schemapb.DataType_Int8Vector) + int8VectorField.Name = int8Vector collectionSchema := &schemapb.CollectionSchema{ Name: DefaultCollectionName, Description: "", @@ -2167,7 +2227,8 @@ func TestBuildQueryResps(t *testing.T) { } dataTypes := []schemapb.DataType{ - schemapb.DataType_FloatVector, schemapb.DataType_BinaryVector, schemapb.DataType_Float16Vector, schemapb.DataType_BFloat16Vector, schemapb.DataType_SparseFloatVector, + schemapb.DataType_FloatVector, schemapb.DataType_BinaryVector, schemapb.DataType_Float16Vector, + schemapb.DataType_BFloat16Vector, schemapb.DataType_SparseFloatVector, schemapb.DataType_Int8Vector, schemapb.DataType_Bool, schemapb.DataType_Int8, schemapb.DataType_Int16, schemapb.DataType_Int32, schemapb.DataType_Float, schemapb.DataType_Double, schemapb.DataType_String, schemapb.DataType_VarChar, diff --git a/internal/distributed/proxy/httpserver/wrap_request.go b/internal/distributed/proxy/httpserver/wrap_request.go index 2536d0fcd3678..3d04c886879b3 100644 --- a/internal/distributed/proxy/httpserver/wrap_request.go +++ b/internal/distributed/proxy/httpserver/wrap_request.go @@ -310,6 +310,37 @@ func (f *FieldData) AsSchemapb() (*schemapb.FieldData, error) { }, }, } + case schemapb.DataType_Int8Vector: + wrappedData := [][]int8{} + err := json.Unmarshal(raw, &wrappedData) + if err != nil { + return nil, newFieldDataError(f.FieldName, err) + } + if len(wrappedData) < 1 { + return nil, errors.New("at least one row for insert") + } + array0 := wrappedData[0] + dim := len(array0) + if dim < 1 { + return nil, errors.New("dim must >= 1") + } + data := make([]byte, len(wrappedData)*dim) + + var i int + for _, dataArray := range wrappedData { + for _, v := range dataArray { + data[i] = byte(v) + i++ + } + } + ret.Field = &schemapb.FieldData_Vectors{ + Vectors: &schemapb.VectorField{ + Dim: int64(dim), + Data: &schemapb.VectorField_Int8Vector{ + Int8Vector: data, + }, + }, + } default: return nil, errors.New("unsupported data type") } diff --git a/internal/distributed/proxy/httpserver/wrap_request_test.go b/internal/distributed/proxy/httpserver/wrap_request_test.go index e4026119029dd..0f05039bff344 100644 --- a/internal/distributed/proxy/httpserver/wrap_request_test.go +++ b/internal/distributed/proxy/httpserver/wrap_request_test.go @@ -345,6 +345,63 @@ func TestFieldData_AsSchemapb(t *testing.T) { _, err := fieldData.AsSchemapb() assert.Error(t, err) }) + + t.Run("int8vector_ok_1", func(t *testing.T) { + fieldData := FieldData{ + Type: schemapb.DataType_Int8Vector, + Field: []byte(`[ + [1, 2, 3, 4], + [-11, -52, 37, 121], + [-128, -35, 31, 127] + ]`), + } + raw, _ := json.Marshal(fieldData) + json.Unmarshal(raw, &fieldData) + _, err := fieldData.AsSchemapb() + assert.NoError(t, err) + }) + t.Run("int8vector_ok_1", func(t *testing.T) { + fieldData := FieldData{ + Type: schemapb.DataType_Int8Vector, + Field: []byte(`[ + [-200, 141] + ]`), + } + raw, _ := json.Marshal(fieldData) + json.Unmarshal(raw, &fieldData) + _, err := fieldData.AsSchemapb() + assert.Error(t, err) + }) + t.Run("int8vector_empty_err", func(t *testing.T) { + fieldData := FieldData{ + Type: schemapb.DataType_Int8Vector, + Field: []byte(""), + } + raw, _ := json.Marshal(fieldData) + json.Unmarshal(raw, &fieldData) + _, err := fieldData.AsSchemapb() + assert.Error(t, err) + }) + t.Run("int8vector_dim0_err", func(t *testing.T) { + fieldData := FieldData{ + Type: schemapb.DataType_Int8Vector, + Field: []byte(`[]`), + } + raw, _ := json.Marshal(fieldData) + json.Unmarshal(raw, &fieldData) + _, err := fieldData.AsSchemapb() + assert.Error(t, err) + }) + t.Run("int8vector_datatype_err", func(t *testing.T) { + fieldData := FieldData{ + Type: schemapb.DataType_Int8Vector, + Field: []byte(`['a', 'b', 'c']`), + } + raw, _ := json.Marshal(fieldData) + json.Unmarshal(raw, &fieldData) + _, err := fieldData.AsSchemapb() + assert.Error(t, err) + }) } func Test_vector2Bytes(t *testing.T) { diff --git a/tests/go_client/go.mod b/tests/go_client/go.mod index 26d4204b02752..29f330bf52d36 100644 --- a/tests/go_client/go.mod +++ b/tests/go_client/go.mod @@ -6,7 +6,7 @@ toolchain go1.21.11 require ( github.com/milvus-io/milvus/client/v2 v2.0.0-20241125024034-0b9edb62a92d - github.com/milvus-io/milvus/pkg v0.0.2-0.20241126032235-cb6542339e84 + github.com/milvus-io/milvus/pkg v0.0.2-0.20250115044500-f5234c3c11a3 github.com/quasilyte/go-ruleguard/dsl v0.3.22 github.com/stretchr/testify v1.9.0 github.com/x448/float16 v0.8.4 @@ -52,7 +52,7 @@ require ( github.com/kr/text v0.2.0 // indirect github.com/lufia/plan9stats v0.0.0-20211012122336-39d0f177ccd0 // indirect github.com/matttproud/golang_protobuf_extensions v1.0.4 // indirect - github.com/milvus-io/milvus-proto/go-api/v2 v2.5.0-beta.0.20241211060635-410431d7865b // indirect + github.com/milvus-io/milvus-proto/go-api/v2 v2.5.0-beta.0.20250102080446-c3ba3d26a90f // indirect github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect github.com/modern-go/reflect2 v1.0.2 // indirect github.com/opencontainers/runtime-spec v1.0.2 // indirect diff --git a/tests/go_client/go.sum b/tests/go_client/go.sum index d4d7e5a1601a5..70171495c36d1 100644 --- a/tests/go_client/go.sum +++ b/tests/go_client/go.sum @@ -318,10 +318,10 @@ github.com/matttproud/golang_protobuf_extensions v1.0.4/go.mod h1:BSXmuO+STAnVfr github.com/mediocregopher/radix/v3 v3.4.2/go.mod h1:8FL3F6UQRXHXIBSPUs5h0RybMF8i4n7wVopoX3x7Bv8= github.com/microcosm-cc/bluemonday v1.0.2/go.mod h1:iVP4YcDBq+n/5fb23BhYFvIMq/leAFZyRl6bYmGDlGc= github.com/miekg/dns v1.0.14/go.mod h1:W1PPwlIAgtquWBMBEV9nkV9Cazfe8ScdGz/Lj7v3Nrg= -github.com/milvus-io/milvus-proto/go-api/v2 v2.5.0-beta.0.20241211060635-410431d7865b h1:iPPhnFx+s7FF53UeWj7A4EYhPRMFPL6mHqyQw7qRjeQ= -github.com/milvus-io/milvus-proto/go-api/v2 v2.5.0-beta.0.20241211060635-410431d7865b/go.mod h1:/6UT4zZl6awVeXLeE7UGDWZvXj3IWkRsh3mqsn0DiAs= -github.com/milvus-io/milvus/pkg v0.0.2-0.20241126032235-cb6542339e84 h1:EAFxmxUVp5yYFDCrX1MQoSxkTO+ycy8NXEqEDEB3cRM= -github.com/milvus-io/milvus/pkg v0.0.2-0.20241126032235-cb6542339e84/go.mod h1:RATa0GS4jhkPpsYOvQ/QvcNz8rd+TlRPDiSyXQnMMxs= +github.com/milvus-io/milvus-proto/go-api/v2 v2.5.0-beta.0.20250102080446-c3ba3d26a90f h1:So6RKU5wqP/8EaKogicJP8gZ2SrzzS/JprusBaE3RKc= +github.com/milvus-io/milvus-proto/go-api/v2 v2.5.0-beta.0.20250102080446-c3ba3d26a90f/go.mod h1:/6UT4zZl6awVeXLeE7UGDWZvXj3IWkRsh3mqsn0DiAs= +github.com/milvus-io/milvus/pkg v0.0.2-0.20250115044500-f5234c3c11a3 h1:WF9BkNk1XjLtwMbaB/cniRBMMNLnqG6e+AUbK8DciHQ= +github.com/milvus-io/milvus/pkg v0.0.2-0.20250115044500-f5234c3c11a3/go.mod h1:nxnHkDFB3jh27nTQJBaC4azAQO8chT03DkmoiZ5086s= github.com/mitchellh/cli v1.0.0/go.mod h1:hNIlj7HEI86fIcpObd7a0FcrxTWetlwJDGcceTlRvqc= github.com/mitchellh/go-homedir v1.0.0/go.mod h1:SfyaCUpYCn1Vlf4IUYiD9fPX4A5wJrkLzIz1N1q0pr0= github.com/mitchellh/go-homedir v1.1.0/go.mod h1:SfyaCUpYCn1Vlf4IUYiD9fPX4A5wJrkLzIz1N1q0pr0=