bson/vector.go

// Copyright (C) MongoDB, Inc. 2024-present.
//
// Licensed under the Apache License, Version 2.0 (the "License"); you may
// not use this file except in compliance with the License. You may obtain
// a copy of the License at http://www.apache.org/licenses/LICENSE-2.0

package bson

import (
	"encoding/binary"
	"errors"
	"fmt"
	"math"
)

// BSON binary vector types as described in https://bsonspec.org/spec.html.
const (
	Int8Vector      byte = 0x03
	Float32Vector   byte = 0x27
	PackedBitVector byte = 0x10
)

// These are vector conversion errors.
var (
	errInsufficientVectorData = errors.New("insufficient data")
	errNonZeroVectorPadding   = errors.New("padding must be 0")
	errVectorPaddingTooLarge  = errors.New("padding cannot be larger than 7")
)

type vectorTypeError struct {
	Method string
	Type   byte
}

// Error implements the error interface.
func (vte vectorTypeError) Error() string {
	t := "invalid"
	switch vte.Type {
	case Int8Vector:
		t = "int8"
	case Float32Vector:
		t = "float32"
	case PackedBitVector:
		t = "packed bit"
	}
	return fmt.Sprintf("cannot call %s, on a type %s vector", vte.Method, t)
}

// Vector represents a densely packed array of numbers / bits.
type Vector struct {
	dType       byte
	int8Data    []int8
	float32Data []float32
	bitData     []byte
	bitPadding  uint8
}

// Type returns the vector type.
func (v Vector) Type() byte {
	return v.dType
}

// Int8 returns the int8 slice hold by the vector.
// It panics if v is not an int8 vector.
func (v Vector) Int8() []int8 {
	d, ok := v.Int8OK()
	if !ok {
		panic(vectorTypeError{"bson.Vector.Int8", v.dType})
	}
	return d
}

// Int8OK is the same as Int8, but returns a boolean instead of panicking.
func (v Vector) Int8OK() ([]int8, bool) {
	if v.dType != Int8Vector {
		return nil, false
	}
	return v.int8Data, true
}

// Float32 returns the float32 slice hold by the vector.
// It panics if v is not a float32 vector.
func (v Vector) Float32() []float32 {
	d, ok := v.Float32OK()
	if !ok {
		panic(vectorTypeError{"bson.Vector.Float32", v.dType})
	}
	return d
}

// Float32OK is the same as Float32, but returns a boolean instead of panicking.
func (v Vector) Float32OK() ([]float32, bool) {
	if v.dType != Float32Vector {
		return nil, false
	}
	return v.float32Data, true
}

// PackedBit returns the byte slice representing the binary quantized (packed bit) vector and the byte padding, which
// is the number of bits in the final byte that are to be ignored.
// It panics if v is not a packed bit vector.
func (v Vector) PackedBit() ([]byte, uint8) {
	d, p, ok := v.PackedBitOK()
	if !ok {
		panic(vectorTypeError{"bson.Vector.PackedBit", v.dType})
	}
	return d, p
}

// PackedBitOK is the same as PackedBit, but returns a boolean instead of panicking.
func (v Vector) PackedBitOK() ([]byte, uint8, bool) {
	if v.dType != PackedBitVector {
		return nil, 0, false
	}
	return v.bitData, v.bitPadding, true
}

// Binary returns the BSON Binary representation of the Vector.
func (v Vector) Binary() Binary {
	switch v.Type() {
	case Int8Vector:
		return binaryFromInt8Vector(v.Int8())
	case Float32Vector:
		return binaryFromFloat32Vector(v.Float32())
	case PackedBitVector:
		return binaryFromBitVector(v.PackedBit())
	default:
		panic(fmt.Sprintf("invalid Vector data type: %d", v.dType))
	}
}

func binaryFromInt8Vector(v []int8) Binary {
	data := make([]byte, len(v)+2)
	data[0] = Int8Vector
	data[1] = 0

	for i, e := range v {
		data[i+2] = byte(e)
	}

	return Binary{
		Subtype: TypeBinaryVector,
		Data:    data,
	}
}

func binaryFromFloat32Vector(v []float32) Binary {
	data := make([]byte, 2, len(v)*4+2)
	data[0] = Float32Vector
	data[1] = 0
	var a [4]byte
	for _, e := range v {
		binary.LittleEndian.PutUint32(a[:], math.Float32bits(e))
		data = append(data, a[:]...)
	}

	return Binary{
		Subtype: TypeBinaryVector,
		Data:    data,
	}
}

func binaryFromBitVector(bits []byte, padding uint8) Binary {
	data := make([]byte, len(bits)+2)
	data[0] = PackedBitVector
	data[1] = padding
	copy(data[2:], bits)
	return Binary{
		Subtype: TypeBinaryVector,
		Data:    data,
	}
}

// NewVector constructs a Vector from a slice of int8 or float32.
func NewVector[T int8 | float32](data []T) Vector {
	var v Vector
	switch a := any(data).(type) {
	case []int8:
		v.dType = Int8Vector
		v.int8Data = make([]int8, len(data))
		copy(v.int8Data, a)
	case []float32:
		v.dType = Float32Vector
		v.float32Data = make([]float32, len(data))
		copy(v.float32Data, a)
	default:
		panic(fmt.Errorf("unsupported type %T", data))
	}
	return v
}

// NewPackedBitVector constructs a Vector from a byte slice and a value of byte padding.
func NewPackedBitVector(bits []byte, padding uint8) (Vector, error) {
	var v Vector
	if padding > 7 {
		return v, errVectorPaddingTooLarge
	}
	if padding > 0 && len(bits) == 0 {
		return v, errNonZeroVectorPadding
	}
	v.dType = PackedBitVector
	v.bitData = make([]byte, len(bits))
	copy(v.bitData, bits)
	v.bitPadding = padding
	return v, nil
}

// NewVectorFromBinary unpacks a BSON Binary into a Vector.
func NewVectorFromBinary(b Binary) (Vector, error) {
	var v Vector
	if b.Subtype != TypeBinaryVector {
		return v, errors.New("not a vector")
	}
	if len(b.Data) < 2 {
		return v, errInsufficientVectorData
	}
	switch t := b.Data[0]; t {
	case Int8Vector:
		return newInt8Vector(b.Data[1:])
	case Float32Vector:
		return newFloat32Vector(b.Data[1:])
	case PackedBitVector:
		return newBitVector(b.Data[1:])
	default:
		return v, fmt.Errorf("invalid Vector data type: %d", t)
	}
}

func newInt8Vector(b []byte) (Vector, error) {
	var v Vector
	if len(b) == 0 {
		return v, errInsufficientVectorData
	}
	if padding := b[0]; padding > 0 {
		return v, errNonZeroVectorPadding
	}
	s := make([]int8, 0, len(b)-1)
	for i := 1; i < len(b); i++ {
		s = append(s, int8(b[i]))
	}
	return NewVector(s), nil
}

func newFloat32Vector(b []byte) (Vector, error) {
	var v Vector
	if len(b) == 0 {
		return v, errInsufficientVectorData
	}
	if padding := b[0]; padding > 0 {
		return v, errNonZeroVectorPadding
	}
	l := (len(b) - 1) / 4
	if l*4 != len(b)-1 {
		return v, errInsufficientVectorData
	}
	s := make([]float32, 0, l)
	for i := 1; i < len(b); i += 4 {
		s = append(s, math.Float32frombits(binary.LittleEndian.Uint32(b[i:i+4])))
	}
	return NewVector(s), nil
}

func newBitVector(b []byte) (Vector, error) {
	if len(b) == 0 {
		return Vector{}, errInsufficientVectorData
	}
	return NewPackedBitVector(b[1:], b[0])
}