bson/bson_value_reader.go

843 lines
18 KiB
Go

// Copyright (C) MongoDB, Inc. 2017-present.
//
// Licensed under the Apache License, Version 2.0 (the "License"); you may
// not use this file except in compliance with the License. You may obtain
// a copy of the License at http://www.apache.org/licenses/LICENSE-2.0
package bson
import (
"bufio"
"encoding/binary"
"errors"
"fmt"
"io"
"math"
)
var _ ValueReader = &valueReader{}
// ErrEOA is the error returned when the end of a BSON array has been reached.
var ErrEOA = errors.New("end of array")
// ErrEOD is the error returned when the end of a BSON document has been reached.
var ErrEOD = errors.New("end of document")
type vrState struct {
mode mode
vType Type
end int64
}
// valueReader is for reading BSON values.
type valueReader struct {
r *bufio.Reader
offset int64
stack []vrState
frame int64
}
// NewDocumentReader returns a ValueReader using b for the underlying BSON
// representation.
func NewDocumentReader(r io.Reader) ValueReader {
return newDocumentReader(r)
}
// newValueReader returns a ValueReader that starts in the Value mode instead of in top
// level document mode. This enables the creation of a ValueReader for a single BSON value.
func newValueReader(t Type, r io.Reader) ValueReader {
vr := newDocumentReader(r)
if len(vr.stack) == 0 {
vr.stack = make([]vrState, 1, 5)
}
vr.stack[0].mode = mValue
vr.stack[0].vType = t
return vr
}
func newDocumentReader(r io.Reader) *valueReader {
stack := make([]vrState, 1, 5)
stack[0] = vrState{
mode: mTopLevel,
}
return &valueReader{
r: bufio.NewReader(r),
stack: stack,
}
}
func (vr *valueReader) advanceFrame() {
if vr.frame+1 >= int64(len(vr.stack)) { // We need to grow the stack
length := len(vr.stack)
if length+1 >= cap(vr.stack) {
// double it
buf := make([]vrState, 2*cap(vr.stack)+1)
copy(buf, vr.stack)
vr.stack = buf
}
vr.stack = vr.stack[:length+1]
}
vr.frame++
// Clean the stack
vr.stack[vr.frame].mode = 0
vr.stack[vr.frame].vType = 0
vr.stack[vr.frame].end = 0
}
func (vr *valueReader) pushDocument() error {
vr.advanceFrame()
vr.stack[vr.frame].mode = mDocument
length, err := vr.readLength()
if err != nil {
return err
}
vr.stack[vr.frame].end = int64(length) + vr.offset - 4
return nil
}
func (vr *valueReader) pushArray() error {
vr.advanceFrame()
vr.stack[vr.frame].mode = mArray
length, err := vr.readLength()
if err != nil {
return err
}
vr.stack[vr.frame].end = int64(length) + vr.offset - 4
return nil
}
func (vr *valueReader) pushElement(t Type) {
vr.advanceFrame()
vr.stack[vr.frame].mode = mElement
vr.stack[vr.frame].vType = t
}
func (vr *valueReader) pushValue(t Type) {
vr.advanceFrame()
vr.stack[vr.frame].mode = mValue
vr.stack[vr.frame].vType = t
}
func (vr *valueReader) pushCodeWithScope() (int64, error) {
vr.advanceFrame()
vr.stack[vr.frame].mode = mCodeWithScope
length, err := vr.readLength()
if err != nil {
return 0, err
}
vr.stack[vr.frame].end = int64(length) + vr.offset - 4
return int64(length), nil
}
func (vr *valueReader) pop() error {
var cnt int
switch vr.stack[vr.frame].mode {
case mElement, mValue:
cnt = 1
case mDocument, mArray, mCodeWithScope:
cnt = 2 // we pop twice to jump over the vrElement: vrDocument -> vrElement -> vrDocument/TopLevel/etc...
}
for i := 0; i < cnt && vr.frame > 0; i++ {
if vr.offset < vr.stack[vr.frame].end {
_, err := vr.r.Discard(int(vr.stack[vr.frame].end - vr.offset))
if err != nil {
return err
}
}
vr.frame--
}
if vr.frame == 0 {
if vr.stack[0].end > vr.offset {
vr.stack[0].end -= vr.offset
} else {
vr.stack[0].end = 0
}
vr.offset = 0
}
return nil
}
func (vr *valueReader) invalidTransitionErr(destination mode, name string, modes []mode) error {
te := TransitionError{
name: name,
current: vr.stack[vr.frame].mode,
destination: destination,
modes: modes,
action: "read",
}
if vr.frame != 0 {
te.parent = vr.stack[vr.frame-1].mode
}
return te
}
func (vr *valueReader) typeError(t Type) error {
return fmt.Errorf("positioned on %s, but attempted to read %s", vr.stack[vr.frame].vType, t)
}
func (vr *valueReader) invalidDocumentLengthError() error {
return fmt.Errorf("document is invalid, end byte is at %d, but null byte found at %d", vr.stack[vr.frame].end, vr.offset)
}
func (vr *valueReader) ensureElementValue(t Type, destination mode, callerName string) error {
switch vr.stack[vr.frame].mode {
case mElement, mValue:
if vr.stack[vr.frame].vType != t {
return vr.typeError(t)
}
default:
return vr.invalidTransitionErr(destination, callerName, []mode{mElement, mValue})
}
return nil
}
func (vr *valueReader) Type() Type {
return vr.stack[vr.frame].vType
}
func (vr *valueReader) appendNextElement(dst []byte) ([]byte, error) {
var length int32
var err error
switch vr.stack[vr.frame].vType {
case TypeArray, TypeEmbeddedDocument, TypeCodeWithScope:
length, err = vr.peekLength()
case TypeBinary:
length, err = vr.peekLength()
length += 4 + 1 // binary length + subtype byte
case TypeBoolean:
length = 1
case TypeDBPointer:
length, err = vr.peekLength()
length += 4 + 12 // string length + ObjectID length
case TypeDateTime, TypeDouble, TypeInt64, TypeTimestamp:
length = 8
case TypeDecimal128:
length = 16
case TypeInt32:
length = 4
case TypeJavaScript, TypeString, TypeSymbol:
length, err = vr.peekLength()
length += 4
case TypeMaxKey, TypeMinKey, TypeNull, TypeUndefined:
length = 0
case TypeObjectID:
length = 12
case TypeRegex:
for n := 0; n < 2; n++ { // Read two C strings.
str, err := vr.r.ReadBytes(0x00)
if err != nil {
return nil, err
}
dst = append(dst, str...)
vr.offset += int64(len(str))
}
return dst, nil
default:
return nil, fmt.Errorf("attempted to read bytes of unknown BSON type %v", vr.stack[vr.frame].vType)
}
if err != nil {
return nil, err
}
buf := make([]byte, length)
_, err = io.ReadFull(vr.r, buf)
if err != nil {
return nil, err
}
dst = append(dst, buf...)
vr.offset += int64(len(buf))
return dst, err
}
func (vr *valueReader) readValueBytes(dst []byte) (Type, []byte, error) {
switch vr.stack[vr.frame].mode {
case mTopLevel:
length, err := vr.peekLength()
if err != nil {
return Type(0), nil, err
}
dst, err = vr.appendBytes(dst, length)
if err != nil {
return Type(0), nil, err
}
return Type(0), dst, nil
case mElement, mValue:
dst, err := vr.appendNextElement(dst)
if err != nil {
return Type(0), dst, err
}
t := vr.stack[vr.frame].vType
err = vr.pop()
if err != nil {
return Type(0), nil, err
}
return t, dst, nil
default:
return Type(0), nil, vr.invalidTransitionErr(0, "ReadValueBytes", []mode{mElement, mValue})
}
}
func (vr *valueReader) Skip() error {
switch vr.stack[vr.frame].mode {
case mElement, mValue:
default:
return vr.invalidTransitionErr(0, "Skip", []mode{mElement, mValue})
}
_, err := vr.appendNextElement(nil)
if err != nil {
return err
}
return vr.pop()
}
func (vr *valueReader) ReadArray() (ArrayReader, error) {
if err := vr.ensureElementValue(TypeArray, mArray, "ReadArray"); err != nil {
return nil, err
}
err := vr.pushArray()
if err != nil {
return nil, err
}
return vr, nil
}
func (vr *valueReader) ReadBinary() (b []byte, btype byte, err error) {
if err := vr.ensureElementValue(TypeBinary, 0, "ReadBinary"); err != nil {
return nil, 0, err
}
length, err := vr.readLength()
if err != nil {
return nil, 0, err
}
btype, err = vr.readByte()
if err != nil {
return nil, 0, err
}
// Check length in case it is an old binary without a length.
if btype == 0x02 && length > 4 {
length, err = vr.readLength()
if err != nil {
return nil, 0, err
}
}
b = make([]byte, length)
err = vr.read(b)
if err != nil {
return nil, 0, err
}
if err := vr.pop(); err != nil {
return nil, 0, err
}
return b, btype, nil
}
func (vr *valueReader) ReadBoolean() (bool, error) {
if err := vr.ensureElementValue(TypeBoolean, 0, "ReadBoolean"); err != nil {
return false, err
}
b, err := vr.readByte()
if err != nil {
return false, err
}
if b > 1 {
return false, fmt.Errorf("invalid byte for boolean, %b", b)
}
if err := vr.pop(); err != nil {
return false, err
}
return b == 1, nil
}
func (vr *valueReader) ReadDocument() (DocumentReader, error) {
switch vr.stack[vr.frame].mode {
case mTopLevel:
length, err := vr.readLength()
if err != nil {
return nil, err
}
if length <= 4 {
return nil, fmt.Errorf("invalid string length: %d", length)
}
vr.stack[vr.frame].end = int64(length) + vr.offset - 4
return vr, nil
case mElement, mValue:
if vr.stack[vr.frame].vType != TypeEmbeddedDocument {
return nil, vr.typeError(TypeEmbeddedDocument)
}
default:
return nil, vr.invalidTransitionErr(mDocument, "ReadDocument", []mode{mTopLevel, mElement, mValue})
}
err := vr.pushDocument()
if err != nil {
return nil, err
}
return vr, nil
}
func (vr *valueReader) ReadCodeWithScope() (code string, dr DocumentReader, err error) {
if err := vr.ensureElementValue(TypeCodeWithScope, 0, "ReadCodeWithScope"); err != nil {
return "", nil, err
}
totalLength, err := vr.readLength()
if err != nil {
return "", nil, err
}
strLength, err := vr.readLength()
if err != nil {
return "", nil, err
}
if strLength <= 0 {
return "", nil, fmt.Errorf("invalid string length: %d", strLength)
}
strBytes := make([]byte, strLength)
err = vr.read(strBytes)
if err != nil {
return "", nil, err
}
code = string(strBytes[:len(strBytes)-1])
size, err := vr.pushCodeWithScope()
if err != nil {
return "", nil, err
}
// The total length should equal:
// 4 (total length) + strLength + 4 (the length of str itself) + (document length)
componentsLength := int64(4+strLength+4) + size
if int64(totalLength) != componentsLength {
return "", nil, fmt.Errorf(
"length of CodeWithScope does not match lengths of components; total: %d; components: %d",
totalLength, componentsLength,
)
}
return code, vr, nil
}
func (vr *valueReader) ReadDBPointer() (ns string, oid ObjectID, err error) {
if err := vr.ensureElementValue(TypeDBPointer, 0, "ReadDBPointer"); err != nil {
return "", oid, err
}
ns, err = vr.readString()
if err != nil {
return "", oid, err
}
err = vr.read(oid[:])
if err != nil {
return "", ObjectID{}, err
}
if err := vr.pop(); err != nil {
return "", ObjectID{}, err
}
return ns, oid, nil
}
func (vr *valueReader) ReadDateTime() (int64, error) {
if err := vr.ensureElementValue(TypeDateTime, 0, "ReadDateTime"); err != nil {
return 0, err
}
i, err := vr.readi64()
if err != nil {
return 0, err
}
if err := vr.pop(); err != nil {
return 0, err
}
return i, nil
}
func (vr *valueReader) ReadDecimal128() (Decimal128, error) {
if err := vr.ensureElementValue(TypeDecimal128, 0, "ReadDecimal128"); err != nil {
return Decimal128{}, err
}
var b [16]byte
err := vr.read(b[:])
if err != nil {
return Decimal128{}, err
}
l := binary.LittleEndian.Uint64(b[0:8])
h := binary.LittleEndian.Uint64(b[8:16])
if err := vr.pop(); err != nil {
return Decimal128{}, err
}
return NewDecimal128(h, l), nil
}
func (vr *valueReader) ReadDouble() (float64, error) {
if err := vr.ensureElementValue(TypeDouble, 0, "ReadDouble"); err != nil {
return 0, err
}
u, err := vr.readu64()
if err != nil {
return 0, err
}
if err := vr.pop(); err != nil {
return 0, err
}
return math.Float64frombits(u), nil
}
func (vr *valueReader) ReadInt32() (int32, error) {
if err := vr.ensureElementValue(TypeInt32, 0, "ReadInt32"); err != nil {
return 0, err
}
if err := vr.pop(); err != nil {
return 0, err
}
return vr.readi32()
}
func (vr *valueReader) ReadInt64() (int64, error) {
if err := vr.ensureElementValue(TypeInt64, 0, "ReadInt64"); err != nil {
return 0, err
}
if err := vr.pop(); err != nil {
return 0, err
}
return vr.readi64()
}
func (vr *valueReader) ReadJavascript() (string, error) {
if err := vr.ensureElementValue(TypeJavaScript, 0, "ReadJavascript"); err != nil {
return "", err
}
if err := vr.pop(); err != nil {
return "", err
}
return vr.readString()
}
func (vr *valueReader) ReadMaxKey() error {
if err := vr.ensureElementValue(TypeMaxKey, 0, "ReadMaxKey"); err != nil {
return err
}
return vr.pop()
}
func (vr *valueReader) ReadMinKey() error {
if err := vr.ensureElementValue(TypeMinKey, 0, "ReadMinKey"); err != nil {
return err
}
return vr.pop()
}
func (vr *valueReader) ReadNull() error {
if err := vr.ensureElementValue(TypeNull, 0, "ReadNull"); err != nil {
return err
}
return vr.pop()
}
func (vr *valueReader) ReadObjectID() (ObjectID, error) {
if err := vr.ensureElementValue(TypeObjectID, 0, "ReadObjectID"); err != nil {
return ObjectID{}, err
}
var oid ObjectID
err := vr.read(oid[:])
if err != nil {
return ObjectID{}, err
}
if err := vr.pop(); err != nil {
return ObjectID{}, err
}
return oid, nil
}
func (vr *valueReader) ReadRegex() (string, string, error) {
if err := vr.ensureElementValue(TypeRegex, 0, "ReadRegex"); err != nil {
return "", "", err
}
pattern, err := vr.readCString()
if err != nil {
return "", "", err
}
options, err := vr.readCString()
if err != nil {
return "", "", err
}
if err := vr.pop(); err != nil {
return "", "", err
}
return pattern, options, nil
}
func (vr *valueReader) ReadString() (string, error) {
if err := vr.ensureElementValue(TypeString, 0, "ReadString"); err != nil {
return "", err
}
if err := vr.pop(); err != nil {
return "", err
}
return vr.readString()
}
func (vr *valueReader) ReadSymbol() (string, error) {
if err := vr.ensureElementValue(TypeSymbol, 0, "ReadSymbol"); err != nil {
return "", err
}
if err := vr.pop(); err != nil {
return "", err
}
return vr.readString()
}
func (vr *valueReader) ReadTimestamp() (t uint32, i uint32, err error) {
if err := vr.ensureElementValue(TypeTimestamp, 0, "ReadTimestamp"); err != nil {
return 0, 0, err
}
i, err = vr.readu32()
if err != nil {
return 0, 0, err
}
t, err = vr.readu32()
if err != nil {
return 0, 0, err
}
if err := vr.pop(); err != nil {
return 0, 0, err
}
return t, i, nil
}
func (vr *valueReader) ReadUndefined() error {
if err := vr.ensureElementValue(TypeUndefined, 0, "ReadUndefined"); err != nil {
return err
}
return vr.pop()
}
func (vr *valueReader) ReadElement() (string, ValueReader, error) {
switch vr.stack[vr.frame].mode {
case mTopLevel, mDocument, mCodeWithScope:
default:
return "", nil, vr.invalidTransitionErr(mElement, "ReadElement", []mode{mTopLevel, mDocument, mCodeWithScope})
}
t, err := vr.readByte()
if err != nil {
return "", nil, err
}
if t == 0 {
if vr.offset != vr.stack[vr.frame].end {
return "", nil, vr.invalidDocumentLengthError()
}
_ = vr.pop() // Ignore the error because the call here never reads from the underlying reader.
return "", nil, ErrEOD
}
name, err := vr.readCString()
if err != nil {
return "", nil, err
}
vr.pushElement(Type(t))
return name, vr, nil
}
func (vr *valueReader) ReadValue() (ValueReader, error) {
switch vr.stack[vr.frame].mode {
case mArray:
default:
return nil, vr.invalidTransitionErr(mValue, "ReadValue", []mode{mArray})
}
t, err := vr.readByte()
if err != nil {
return nil, err
}
if t == 0 {
if vr.offset != vr.stack[vr.frame].end {
return nil, vr.invalidDocumentLengthError()
}
_ = vr.pop() // Ignore the error because the call here never reads from the underlying reader.
return nil, ErrEOA
}
if _, err := vr.readCString(); err != nil {
return nil, err
}
vr.pushValue(Type(t))
return vr, nil
}
func (vr *valueReader) read(p []byte) error {
n, err := io.ReadFull(vr.r, p)
if err != nil {
return err
}
vr.offset += int64(n)
return nil
}
func (vr *valueReader) appendBytes(dst []byte, length int32) ([]byte, error) {
buf := make([]byte, length)
err := vr.read(buf)
if err != nil {
return nil, err
}
return append(dst, buf...), nil
}
func (vr *valueReader) readByte() (byte, error) {
b, err := vr.r.ReadByte()
if err != nil {
return 0x0, err
}
vr.offset++
return b, nil
}
func (vr *valueReader) readCString() (string, error) {
str, err := vr.r.ReadString(0x00)
if err != nil {
return "", err
}
l := len(str)
vr.offset += int64(l)
return str[:l-1], nil
}
func (vr *valueReader) readString() (string, error) {
length, err := vr.readLength()
if err != nil {
return "", err
}
if length <= 0 {
return "", fmt.Errorf("invalid string length: %d", length)
}
buf := make([]byte, length)
err = vr.read(buf)
if err != nil {
return "", err
}
if buf[length-1] != 0x00 {
return "", fmt.Errorf("string does not end with null byte, but with %v", buf[length-1])
}
return string(buf[:length-1]), nil
}
func (vr *valueReader) peekLength() (int32, error) {
buf, err := vr.r.Peek(4)
if err != nil {
return 0, err
}
return int32(binary.LittleEndian.Uint32(buf)), nil
}
func (vr *valueReader) readLength() (int32, error) {
l, err := vr.readi32()
if err != nil {
return 0, err
}
if l < 0 {
return 0, fmt.Errorf("invalid negative length: %d", l)
}
return l, nil
}
func (vr *valueReader) readi32() (int32, error) {
var buf [4]byte
err := vr.read(buf[:])
if err != nil {
return 0, err
}
return int32(binary.LittleEndian.Uint32(buf[:])), nil
}
func (vr *valueReader) readu32() (uint32, error) {
var buf [4]byte
err := vr.read(buf[:])
if err != nil {
return 0, err
}
return binary.LittleEndian.Uint32(buf[:]), nil
}
func (vr *valueReader) readi64() (int64, error) {
var buf [8]byte
err := vr.read(buf[:])
if err != nil {
return 0, err
}
return int64(binary.LittleEndian.Uint64(buf[:])), nil
}
func (vr *valueReader) readu64() (uint64, error) {
var buf [8]byte
err := vr.read(buf[:])
if err != nil {
return 0, err
}
return binary.LittleEndian.Uint64(buf[:]), nil
}