Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Correctly update and trim codepoint indices after trimming data #62

Merged
merged 6 commits into from
Feb 8, 2024
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
53 changes: 43 additions & 10 deletions decode.go
Original file line number Diff line number Diff line change
Expand Up @@ -197,20 +197,26 @@ func (d *Decoder) readLine(v reflect.Value) (err error, ok bool) {
}

func rawValueFromLine(value rawValue, startPos, endPos int, format format) rawValue {
var trimFunc func(string) string
var trimFunc func(in string) (out string, leftRemoved int, rightRemoved int)

switch format.alignment {
case left:
trimFunc = func(s string) string {
return strings.TrimRight(s, string(format.padChar))
trimFunc = func(s string) (out string, leftRemoved int, rightRemoved int) {
out = strings.TrimRight(s, string(format.padChar))
return out, 0, len(s) - len(out)
}
case right:
trimFunc = func(s string) string {
return strings.TrimLeft(s, string(format.padChar))
trimFunc = func(s string) (out string, leftRemoved int, rightRemoved int) {
out = strings.TrimLeft(s, string(format.padChar))
return out, len(s) - len(out), 0
}
default:
trimFunc = func(s string) string {
return strings.Trim(s, string(format.padChar))
trimFunc = func(s string) (out string, leftRemoved int, rightRemoved int) {
leftTrimmed := strings.TrimLeft(s, string(format.padChar))
leftRemoved = len(s) - len(leftTrimmed)
rightTrimmed := strings.TrimRight(leftTrimmed, string(format.padChar))
rightRemoved = len(leftTrimmed) - len(rightTrimmed)
return rightTrimmed, leftRemoved, rightRemoved
}
}

Expand All @@ -227,9 +233,35 @@ func rawValueFromLine(value rawValue, startPos, endPos int, format format) rawVa
relevantIndices = value.codepointIndices[startPos-1 : endPos]
lineData = value.data[relevantIndices[0]:value.codepointIndices[endPos]]
}

newIndices := relevantIndices
if relevantIndices[0] > 0 {
// We trimmed data from the front of the string.
// We need to adjust the codepoint indices to reflect this, as they have shifted.
removedFromFront := relevantIndices[0]
newIndices = make([]int, 0, len(relevantIndices))
for _, idx := range relevantIndices {
newIndices = append(newIndices, idx-removedFromFront)
}
}

// Trim the new line data.
newLineData, leftRemovedBytes, rightRemovedBytes := trimFunc(lineData)
trimmedIndices := newIndices
if leftRemovedBytes > 0 || rightRemovedBytes > 0 {
// We must trim our codepoint indices list in order to match
// the newly trimmed line data string.
trimmedIndices = []int{}
for _, idx := range newIndices {
if idx >= leftRemovedBytes && idx < len(lineData)-rightRemovedBytes {
trimmedIndices = append(trimmedIndices, idx-leftRemovedBytes)
}
}
}

return rawValue{
data: trimFunc(lineData),
codepointIndices: relevantIndices,
data: newLineData,
codepointIndices: trimmedIndices,
}
} else {
if len(value.data) == 0 || startPos > len(value.data) {
Expand All @@ -238,8 +270,9 @@ func rawValueFromLine(value rawValue, startPos, endPos int, format format) rawVa
if endPos > len(value.data) {
endPos = len(value.data)
}
newLineData, _, _ := trimFunc(value.data[startPos-1 : endPos])
return rawValue{
data: trimFunc(value.data[startPos-1 : endPos]),
data: newLineData,
}
}
}
Expand Down
116 changes: 116 additions & 0 deletions decode_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -386,6 +386,122 @@ func TestDecodeSetUseCodepointIndices(t *testing.T) {

}

func TestDecodeSetUseCodepointIndices_Nested(t *testing.T) {
type Nested struct {
First string `fixed:"1,3"`
Second string `fixed:"4,6"`
}

type Test struct {
First string `fixed:"1,3"`
Second Nested `fixed:"4,9"`
Third string `fixed:"10,12"`
Fourth Nested `fixed:"13,18"`
Fifth string `fixed:"19,21"`
}

for _, tt := range []struct {
name string
raw []byte
expected Test
}{
{
name: "All ASCII characters",
raw: []byte("123ABC456DEF789GHI012\n"),
expected: Test{
First: "123",
Second: Nested{First: "ABC", Second: "456"},
Third: "DEF",
Fourth: Nested{First: "789", Second: "GHI"},
Fifth: "012",
},
},
{
name: "Multi-byte characters",
raw: []byte("123x☃x456x☃x789x☃x012\n"),
expected: Test{
First: "123",
Second: Nested{First: "x☃x", Second: "456"},
Third: "x☃x",
Fourth: Nested{First: "789", Second: "x☃x"},
Fifth: "012",
},
},
} {
t.Run(tt.name, func(t *testing.T) {
d := NewDecoder(bytes.NewReader(tt.raw))
d.SetUseCodepointIndices(true)
var s Test
err := d.Decode(&s)
if err != nil {
t.Errorf("Unexpected err: %v", err)
}
if !reflect.DeepEqual(tt.expected, s) {
t.Errorf("Decode(%v) want %v, have %v", tt.raw, tt.expected, s)
}
})
}
}

func TestDecodeSetUseCodepointIndices_PaddingTrimmed(t *testing.T) {
type Nested struct {
First int64 `fixed:"1,2,right,0"`
Second string `fixed:"3,4"`
Third string `fixed:"5,6"`
Fourth string `fixed:"7,8"`
}
type Test struct {
First Nested `fixed:"1,8"`
Second string `fixed:"9,10"`
}

for _, tt := range []struct {
name string
raw []byte
expected Test
}{
{
name: "All ASCII characters",
raw: []byte("00 11"),
expected: Test{
First: Nested{
First: 0,
Second: "",
Third: "",
Fourth: "",
},
Second: "11",
},
},
{
name: "Multi-byte characters",
raw: []byte("00 ☃☃"),
expected: Test{
First: Nested{
First: 0,
Second: "",
Third: "",
Fourth: "",
},
Second: "☃☃",
},
},
} {
t.Run(tt.name, func(t *testing.T) {
d := NewDecoder(bytes.NewReader(tt.raw))
d.SetUseCodepointIndices(true)
var s Test
err := d.Decode(&s)
if err != nil {
t.Errorf("Unexpected err: %v", err)
}
if !reflect.DeepEqual(tt.expected, s) {
t.Errorf("Decode(%v) want %v, have %v", tt.raw, tt.expected, s)
}
})
}
}

// Verify the behavior of Decoder.Decode at the end of a file. See
// https://github.com/ianlopshire/go-fixedwidth/issues/6 for more details.
func TestDecode_EOF(t *testing.T) {
Expand Down
Loading