diff --git a/CHANGELOG.md b/CHANGELOG.md index a68f1575e..e2b0ff251 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,12 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Fixed +## [0.44.1] + +### Fixed +- Fixed a bug where `wrap_string` would not correctly split very long Unicode words. + + ## [0.44.0] ### Added diff --git a/Project.toml b/Project.toml index 5a35589f8..77b37816f 100644 --- a/Project.toml +++ b/Project.toml @@ -1,7 +1,7 @@ name = "PromptingTools" uuid = "670122d1-24a8-4d70-bfce-740807c42192" authors = ["J S @svilupp and contributors"] -version = "0.44.0" +version = "0.44.1" [deps] AbstractTrees = "1520ce14-60c1-5f80-bbc7-55ef81b5835c" diff --git a/src/utils.jl b/src/utils.jl index 5dc017a8d..20523a32b 100644 --- a/src/utils.jl +++ b/src/utils.jl @@ -231,8 +231,10 @@ function wrap_string(str::AbstractString, current_line_length = 0 end while word_length > text_width - write(output, word[1:(text_width - 1)], "-$newline") - word = word[text_width:end] + chop_idx = prevind(word, text_width, 1) + write(output, word[1:(chop_idx)], "-$newline") + start_idx = nextind(word, chop_idx, 1) + word = word[start_idx:end] word_length -= text_width - 1 end end diff --git a/test/utils.jl b/test/utils.jl index a0495aeb7..93ff42899 100644 --- a/test/utils.jl +++ b/test/utils.jl @@ -102,6 +102,11 @@ end ## ensure newlines are not removed str = "This function\n will wrap\n words into lines" @test wrap_string(str, length(str)) == str + # Unicode testing + long_unicode_sentence = "Überraschenderweise ℕ𝕖𝕦𝕣𝕠π•₯π•£π•’π•Ÿπ•€π•žπ•šπ•₯π•₯𝕖𝕣 ℂ𝕙𝕣π•ͺπ•€π•’π•Ÿπ•₯π•™π•–π•žπ•¦π•žπ•€ π•Šπ•ͺπ•Ÿπ•”π•™π•£π• π•‘π•™π•’π•€π• π•₯π•£π• π•Ÿ Ξžπ•©π•₯π•£π•’π• π•£π••π•šπ•Ÿπ•’π•šπ•£π•–" + wrapped = wrap_string(long_unicode_sentence, 20) + @test all(length(line) ≀ 20 for line in split(wrapped, "\n")) + @test join(split(wrapped, "\n"), "") == replace(long_unicode_sentence, " " => "") end @testset "length_longest_common_subsequence" begin