From 16e98b76f096728e78f73327d9ad297cbb0ce723 Mon Sep 17 00:00:00 2001 From: Alex Gaynor Date: Sun, 25 Aug 2019 23:25:13 -0400 Subject: [PATCH] Optimize computing codepoing indices with all ASCII data --- decode.go | 43 +++++++++++++++++++++++++------------------ 1 file changed, 25 insertions(+), 18 deletions(-) diff --git a/decode.go b/decode.go index a8588d9..4467b2a 100644 --- a/decode.go +++ b/decode.go @@ -140,33 +140,40 @@ func newRawValue(bytes []byte, useCodepointIndices bool) (rawValue, error) { bytes: bytes, } if useCodepointIndices { - bytesIdx := 0 - // Lazily allocate this only if the value actually contains a - // multi-byte character. - codepointIndices := []int(nil) - for bytesIdx < len(bytes) { - _, codepointSize := utf8.DecodeRune(bytes[bytesIdx:]) - if codepointSize == 0 { - return rawValue{}, errors.New("fixedwidth: Invalid codepoint") + bytesIdx := findFirstMultiByteChar(bytes) + // If we've got multi-byte characters, fill in the rest of codepointIndices. + if bytesIdx < len(bytes) { + codepointIndices := make([]int, bytesIdx) + for i := 0; i < bytesIdx; i++ { + codepointIndices[i] = i } - // We have a multi-byte codepoint, we need to allocate - // codepointIndices - if codepointIndices == nil && codepointSize > 1 { - codepointIndices = make([]int, bytesIdx) - for i := 0; i < bytesIdx; i++ { - codepointIndices[i] = i + for bytesIdx < len(bytes) { + _, codepointSize := utf8.DecodeRune(bytes[bytesIdx:]) + if codepointSize == 0 { + return rawValue{}, errors.New("fixedwidth: Invalid codepoint") } - } - if codepointIndices != nil { codepointIndices = append(codepointIndices, bytesIdx) + bytesIdx += codepointSize } - bytesIdx += codepointSize + value.codepointIndices = codepointIndices } - value.codepointIndices = codepointIndices } return value, nil } +// Scans bytes, looking for multi-byte characters, returns either the index of +// the first multi-byte chracter or the length of the string if there are none. +func findFirstMultiByteChar(bytes []byte) int { + for bytesIdx, b := range bytes { + // We have a multi-byte codepoint, we need to allocate + // codepointIndices + if b&0x80 == 0x80 { + return bytesIdx + } + } + return len(bytes) +} + func (d *Decoder) readLine(v reflect.Value) (err error, ok bool) { var line []byte line, err = d.data.ReadBytes('\n')