From bd1677eacf1c21eed3b610efc3f6fd6c291a21e2 Mon Sep 17 00:00:00 2001 From: Jakob Nybo Nissen Date: Mon, 6 May 2024 08:37:14 +0200 Subject: [PATCH 01/35] Add take_string! This function creates a string from a byte vector, truncating the vector and reusing its memory where possible. --- NEWS.md | 1 + base/exports.jl | 1 + base/strings/string.jl | 37 ++++++++++++++++++++++++++++++++----- doc/src/base/strings.md | 1 + 4 files changed, 35 insertions(+), 5 deletions(-) diff --git a/NEWS.md b/NEWS.md index c12cc3c64300c..06bca20bde95a 100644 --- a/NEWS.md +++ b/NEWS.md @@ -103,6 +103,7 @@ New library features * `RegexMatch` objects can now be used to construct `NamedTuple`s and `Dict`s ([#50988]) * `Lockable` is now exported ([#54595]) * New `ltruncate`, `rtruncate` and `ctruncate` functions for truncating strings to text width, accounting for char widths ([#55351]) +* `take_string!(v)` creates a `String` from `v`, truncating `v` and reusing its memory if possible. Standard library changes ------------------------ diff --git a/base/exports.jl b/base/exports.jl index daba9a010a9e6..a14e2aa43224f 100644 --- a/base/exports.jl +++ b/base/exports.jl @@ -638,6 +638,7 @@ export split, string, strip, + take_string!, textwidth, thisind, titlecase, diff --git a/base/strings/string.jl b/base/strings/string.jl index 90d6e5b26ccd3..1e6291cfea2e9 100644 --- a/base/strings/string.jl +++ b/base/strings/string.jl @@ -62,11 +62,8 @@ In other cases, `Vector{UInt8}` data may be copied, but `v` is truncated anyway to guarantee consistent behavior. """ String(v::AbstractVector{UInt8}) = String(copyto!(StringMemory(length(v)), v)) -function String(v::Memory{UInt8}) - len = length(v) - len == 0 && return "" - return ccall(:jl_genericmemory_to_string, Ref{String}, (Any, Int), v, len) -end +String(v::Memory{UInt8}) = take_string!(copy(v)) + function String(v::Vector{UInt8}) #return ccall(:jl_array_to_string, Ref{String}, (Any,), v) len = length(v) @@ -83,6 +80,36 @@ function String(v::Vector{UInt8}) return str end +""" + take_string!(v::Union{Memory{UInt8}, Vector{UInt8}}) -> String + +Create a string, truncating `v` to zero length. If `v` is a `Vector`, further +modification of `v` will not modify the string. +When possible, the returned string will reuse the memory of `v`. + +# Examples +```jldoctest +julia> v = Memory{UInt8}([0x61, 0x62, 0x63]); + +julia> s = take_string!(v) +"abc" + +julia> isempty(v) +true +``` +""" +function take_string!(v::Memory{UInt8}) + len = length(v) + len == 0 && return "" + return ccall(:jl_genericmemory_to_string, Ref{String}, (Any, Int), v, len) +end + +# Note: Currently, this constructor truncates for Vector, but not for AbstractVector. +# See issue 32528 +function take_string!(v::Vector{UInt8}) + String(v) +end + """ unsafe_string(p::Ptr{UInt8}, [length::Integer]) diff --git a/doc/src/base/strings.md b/doc/src/base/strings.md index b7d16ffc7d487..5fdd6aa459e23 100644 --- a/doc/src/base/strings.md +++ b/doc/src/base/strings.md @@ -34,6 +34,7 @@ Base.SubstitutionString Base.@s_str Base.@raw_str Base.@b_str +Base.take_string! Base.Docs.@html_str Base.Docs.@text_str Base.isvalid(::Any) From 71a030f349c8860f42e1baa343de3766acd14039 Mon Sep 17 00:00:00 2001 From: Jakob Nybo Nissen Date: Mon, 6 May 2024 14:28:48 +0200 Subject: [PATCH 02/35] Use take_string! on StringMemory uses in Base --- base/gmp.jl | 2 +- base/intfuncs.jl | 12 ++++++------ base/strings/string.jl | 2 +- base/strings/util.jl | 2 +- base/uuid.jl | 2 +- stdlib/FileWatching/src/pidfile.jl | 2 +- 6 files changed, 11 insertions(+), 11 deletions(-) diff --git a/base/gmp.jl b/base/gmp.jl index 1eaa20d6baecf..b0618e033e25d 100644 --- a/base/gmp.jl +++ b/base/gmp.jl @@ -761,7 +761,7 @@ function string(n::BigInt; base::Integer = 10, pad::Integer = 1) sv[i] = '0' % UInt8 end isneg(n) && (sv[1] = '-' % UInt8) - String(sv) + take_string!(sv) end function digits!(a::AbstractVector{T}, n::BigInt; base::Integer = 10) where {T<:Integer} diff --git a/base/intfuncs.jl b/base/intfuncs.jl index 8d46fcffa3ad5..2cec055babcb6 100644 --- a/base/intfuncs.jl +++ b/base/intfuncs.jl @@ -766,7 +766,7 @@ function bin(x::Unsigned, pad::Int, neg::Bool) i -= 1 end neg && (@inbounds a[1] = 0x2d) # UInt8('-') - String(a) + take_string!(a) end function oct(x::Unsigned, pad::Int, neg::Bool) @@ -780,7 +780,7 @@ function oct(x::Unsigned, pad::Int, neg::Bool) i -= 1 end neg && (@inbounds a[1] = 0x2d) # UInt8('-') - String(a) + take_string!(a) end # 2-digit decimal characters ("00":"99") @@ -850,7 +850,7 @@ function dec(x::Unsigned, pad::Int, neg::Bool) a = StringMemory(n) append_c_digits_fast(n, x, a, 1) neg && (@inbounds a[1] = 0x2d) # UInt8('-') - String(a) + take_string!(a) end function hex(x::Unsigned, pad::Int, neg::Bool) @@ -871,7 +871,7 @@ function hex(x::Unsigned, pad::Int, neg::Bool) @inbounds a[i] = d + ifelse(d > 0x9, 0x57, 0x30) end neg && (@inbounds a[1] = 0x2d) # UInt8('-') - String(a) + take_string!(a) end const base36digits = UInt8['0':'9';'a':'z'] @@ -896,7 +896,7 @@ function _base(base::Integer, x::Integer, pad::Int, neg::Bool) i -= 1 end neg && (@inbounds a[1] = 0x2d) # UInt8('-') - String(a) + take_string!(a) end split_sign(n::Integer) = unsigned(abs(n)), n < 0 @@ -972,7 +972,7 @@ function bitstring(x::T) where {T} x = lshr_int(x, 4) i -= 4 end - return String(str) + return take_string!(str) end """ diff --git a/base/strings/string.jl b/base/strings/string.jl index 1e6291cfea2e9..2b043ea7486a4 100644 --- a/base/strings/string.jl +++ b/base/strings/string.jl @@ -61,7 +61,7 @@ by [`take!`](@ref) on a writable [`IOBuffer`](@ref) and by calls to In other cases, `Vector{UInt8}` data may be copied, but `v` is truncated anyway to guarantee consistent behavior. """ -String(v::AbstractVector{UInt8}) = String(copyto!(StringMemory(length(v)), v)) +String(v::AbstractVector{UInt8}) = take_string!(copyto!(StringMemory(length(v)), v)) String(v::Memory{UInt8}) = take_string!(copy(v)) function String(v::Vector{UInt8}) diff --git a/base/strings/util.jl b/base/strings/util.jl index 0ba76e1c76fa0..7f8b069d8c8c4 100644 --- a/base/strings/util.jl +++ b/base/strings/util.jl @@ -1214,7 +1214,7 @@ function bytes2hex(itr) b[2i - 1] = hex_chars[1 + x >> 4] b[2i ] = hex_chars[1 + x & 0xf] end - return String(b) + return take_string!(b) end function bytes2hex(io::IO, itr) diff --git a/base/uuid.jl b/base/uuid.jl index 9b2da3c6409db..9c13d2ea306d0 100644 --- a/base/uuid.jl +++ b/base/uuid.jl @@ -96,7 +96,7 @@ let groupings = [36:-1:25; 23:-1:20; 18:-1:15; 13:-1:10; 8:-1:1] u >>= 4 end @inbounds a[24] = a[19] = a[14] = a[9] = '-' - return String(a) + return take_string!(a) end end diff --git a/stdlib/FileWatching/src/pidfile.jl b/stdlib/FileWatching/src/pidfile.jl index 4c821a3d897e4..6819387d51d05 100644 --- a/stdlib/FileWatching/src/pidfile.jl +++ b/stdlib/FileWatching/src/pidfile.jl @@ -285,7 +285,7 @@ function _rand_filename(len::Int=4) # modified from Base.Libc for i = 1:len slug[i] = chars[(Libc.rand() % length(chars)) + 1] end - return String(slug) + return take_string!(slug) end function tryrmopenfile(path::String) From d247d9290e35104e61d2dd835f815ff88e87ec5d Mon Sep 17 00:00:00 2001 From: Jakob Nybo Nissen Date: Fri, 10 May 2024 17:24:58 +0200 Subject: [PATCH 03/35] Rename to takestring! --- NEWS.md | 2 +- base/exports.jl | 2 +- base/gmp.jl | 2 +- base/intfuncs.jl | 12 ++++++------ base/strings/string.jl | 12 ++++++------ base/strings/util.jl | 2 +- base/uuid.jl | 2 +- doc/src/base/strings.md | 2 +- stdlib/FileWatching/src/pidfile.jl | 2 +- 9 files changed, 19 insertions(+), 19 deletions(-) diff --git a/NEWS.md b/NEWS.md index 06bca20bde95a..f9e7756f25fd4 100644 --- a/NEWS.md +++ b/NEWS.md @@ -103,7 +103,7 @@ New library features * `RegexMatch` objects can now be used to construct `NamedTuple`s and `Dict`s ([#50988]) * `Lockable` is now exported ([#54595]) * New `ltruncate`, `rtruncate` and `ctruncate` functions for truncating strings to text width, accounting for char widths ([#55351]) -* `take_string!(v)` creates a `String` from `v`, truncating `v` and reusing its memory if possible. +* `takestring!(v)` creates a `String` from `v`, truncating `v` and reusing its memory if possible. Standard library changes ------------------------ diff --git a/base/exports.jl b/base/exports.jl index a14e2aa43224f..b1eda2b60c52c 100644 --- a/base/exports.jl +++ b/base/exports.jl @@ -638,7 +638,7 @@ export split, string, strip, - take_string!, + takestring!, textwidth, thisind, titlecase, diff --git a/base/gmp.jl b/base/gmp.jl index b0618e033e25d..0c6de5355de62 100644 --- a/base/gmp.jl +++ b/base/gmp.jl @@ -761,7 +761,7 @@ function string(n::BigInt; base::Integer = 10, pad::Integer = 1) sv[i] = '0' % UInt8 end isneg(n) && (sv[1] = '-' % UInt8) - take_string!(sv) + takestring!(sv) end function digits!(a::AbstractVector{T}, n::BigInt; base::Integer = 10) where {T<:Integer} diff --git a/base/intfuncs.jl b/base/intfuncs.jl index 2cec055babcb6..23e243a699276 100644 --- a/base/intfuncs.jl +++ b/base/intfuncs.jl @@ -766,7 +766,7 @@ function bin(x::Unsigned, pad::Int, neg::Bool) i -= 1 end neg && (@inbounds a[1] = 0x2d) # UInt8('-') - take_string!(a) + takestring!(a) end function oct(x::Unsigned, pad::Int, neg::Bool) @@ -780,7 +780,7 @@ function oct(x::Unsigned, pad::Int, neg::Bool) i -= 1 end neg && (@inbounds a[1] = 0x2d) # UInt8('-') - take_string!(a) + takestring!(a) end # 2-digit decimal characters ("00":"99") @@ -850,7 +850,7 @@ function dec(x::Unsigned, pad::Int, neg::Bool) a = StringMemory(n) append_c_digits_fast(n, x, a, 1) neg && (@inbounds a[1] = 0x2d) # UInt8('-') - take_string!(a) + takestring!(a) end function hex(x::Unsigned, pad::Int, neg::Bool) @@ -871,7 +871,7 @@ function hex(x::Unsigned, pad::Int, neg::Bool) @inbounds a[i] = d + ifelse(d > 0x9, 0x57, 0x30) end neg && (@inbounds a[1] = 0x2d) # UInt8('-') - take_string!(a) + takestring!(a) end const base36digits = UInt8['0':'9';'a':'z'] @@ -896,7 +896,7 @@ function _base(base::Integer, x::Integer, pad::Int, neg::Bool) i -= 1 end neg && (@inbounds a[1] = 0x2d) # UInt8('-') - take_string!(a) + takestring!(a) end split_sign(n::Integer) = unsigned(abs(n)), n < 0 @@ -972,7 +972,7 @@ function bitstring(x::T) where {T} x = lshr_int(x, 4) i -= 4 end - return take_string!(str) + return takestring!(str) end """ diff --git a/base/strings/string.jl b/base/strings/string.jl index 2b043ea7486a4..b63ba3357b15a 100644 --- a/base/strings/string.jl +++ b/base/strings/string.jl @@ -61,8 +61,8 @@ by [`take!`](@ref) on a writable [`IOBuffer`](@ref) and by calls to In other cases, `Vector{UInt8}` data may be copied, but `v` is truncated anyway to guarantee consistent behavior. """ -String(v::AbstractVector{UInt8}) = take_string!(copyto!(StringMemory(length(v)), v)) -String(v::Memory{UInt8}) = take_string!(copy(v)) +String(v::AbstractVector{UInt8}) = takestring!(copyto!(StringMemory(length(v)), v)) +String(v::Memory{UInt8}) = takestring!(copy(v)) function String(v::Vector{UInt8}) #return ccall(:jl_array_to_string, Ref{String}, (Any,), v) @@ -81,7 +81,7 @@ function String(v::Vector{UInt8}) end """ - take_string!(v::Union{Memory{UInt8}, Vector{UInt8}}) -> String + takestring!(v::Union{Memory{UInt8}, Vector{UInt8}}) -> String Create a string, truncating `v` to zero length. If `v` is a `Vector`, further modification of `v` will not modify the string. @@ -91,14 +91,14 @@ When possible, the returned string will reuse the memory of `v`. ```jldoctest julia> v = Memory{UInt8}([0x61, 0x62, 0x63]); -julia> s = take_string!(v) +julia> s = takestring!(v) "abc" julia> isempty(v) true ``` """ -function take_string!(v::Memory{UInt8}) +function takestring!(v::Memory{UInt8}) len = length(v) len == 0 && return "" return ccall(:jl_genericmemory_to_string, Ref{String}, (Any, Int), v, len) @@ -106,7 +106,7 @@ end # Note: Currently, this constructor truncates for Vector, but not for AbstractVector. # See issue 32528 -function take_string!(v::Vector{UInt8}) +function takestring!(v::Vector{UInt8}) String(v) end diff --git a/base/strings/util.jl b/base/strings/util.jl index 7f8b069d8c8c4..c6f853a54c940 100644 --- a/base/strings/util.jl +++ b/base/strings/util.jl @@ -1214,7 +1214,7 @@ function bytes2hex(itr) b[2i - 1] = hex_chars[1 + x >> 4] b[2i ] = hex_chars[1 + x & 0xf] end - return take_string!(b) + return takestring!(b) end function bytes2hex(io::IO, itr) diff --git a/base/uuid.jl b/base/uuid.jl index 9c13d2ea306d0..d7a8f51a3c9d2 100644 --- a/base/uuid.jl +++ b/base/uuid.jl @@ -96,7 +96,7 @@ let groupings = [36:-1:25; 23:-1:20; 18:-1:15; 13:-1:10; 8:-1:1] u >>= 4 end @inbounds a[24] = a[19] = a[14] = a[9] = '-' - return take_string!(a) + return takestring!(a) end end diff --git a/doc/src/base/strings.md b/doc/src/base/strings.md index 5fdd6aa459e23..b2ba92fc54981 100644 --- a/doc/src/base/strings.md +++ b/doc/src/base/strings.md @@ -34,7 +34,7 @@ Base.SubstitutionString Base.@s_str Base.@raw_str Base.@b_str -Base.take_string! +Base.takestring! Base.Docs.@html_str Base.Docs.@text_str Base.isvalid(::Any) diff --git a/stdlib/FileWatching/src/pidfile.jl b/stdlib/FileWatching/src/pidfile.jl index 6819387d51d05..9fecf7d415fa7 100644 --- a/stdlib/FileWatching/src/pidfile.jl +++ b/stdlib/FileWatching/src/pidfile.jl @@ -285,7 +285,7 @@ function _rand_filename(len::Int=4) # modified from Base.Libc for i = 1:len slug[i] = chars[(Libc.rand() % length(chars)) + 1] end - return take_string!(slug) + return takestring!(slug) end function tryrmopenfile(path::String) From 699916e03251fe14ea1840c21bde8c6bb855e888 Mon Sep 17 00:00:00 2001 From: Jakob Nybo Nissen Date: Fri, 10 May 2024 17:28:15 +0200 Subject: [PATCH 04/35] Use more efficient fallback for String(::Memory) as suggested --- base/strings/string.jl | 1 - 1 file changed, 1 deletion(-) diff --git a/base/strings/string.jl b/base/strings/string.jl index b63ba3357b15a..a12ba1163b781 100644 --- a/base/strings/string.jl +++ b/base/strings/string.jl @@ -62,7 +62,6 @@ In other cases, `Vector{UInt8}` data may be copied, but `v` is truncated anyway to guarantee consistent behavior. """ String(v::AbstractVector{UInt8}) = takestring!(copyto!(StringMemory(length(v)), v)) -String(v::Memory{UInt8}) = takestring!(copy(v)) function String(v::Vector{UInt8}) #return ccall(:jl_array_to_string, Ref{String}, (Any,), v) From 1ab178541c37629e274cd36e3be2a59c0f4bc8c0 Mon Sep 17 00:00:00 2001 From: Jakob Nybo Nissen Date: Fri, 10 May 2024 18:09:11 +0200 Subject: [PATCH 05/35] Update docstring --- base/strings/string.jl | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/base/strings/string.jl b/base/strings/string.jl index a12ba1163b781..40cf0415deaa4 100644 --- a/base/strings/string.jl +++ b/base/strings/string.jl @@ -80,11 +80,9 @@ function String(v::Vector{UInt8}) end """ - takestring!(v::Union{Memory{UInt8}, Vector{UInt8}}) -> String + takestring!(x) -> String -Create a string, truncating `v` to zero length. If `v` is a `Vector`, further -modification of `v` will not modify the string. -When possible, the returned string will reuse the memory of `v`. +Create a string from the content of `x`, emptying `x`. # Examples ```jldoctest @@ -97,6 +95,8 @@ julia> isempty(v) true ``` """ +function takestring! end + function takestring!(v::Memory{UInt8}) len = length(v) len == 0 && return "" From 50d62c828ee20a0b7eb039295416433c26de0417 Mon Sep 17 00:00:00 2001 From: Jakob Nybo Nissen Date: Fri, 10 May 2024 18:09:26 +0200 Subject: [PATCH 06/35] Implement takestring(::IOBuffer) and unsafe_takestring! --- base/iobuffer.jl | 46 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 46 insertions(+) diff --git a/base/iobuffer.jl b/base/iobuffer.jl index c0c2731eec08b..8e1c6b5879788 100644 --- a/base/iobuffer.jl +++ b/base/iobuffer.jl @@ -489,6 +489,52 @@ function take!(io::IOBuffer) return data end +"Internal method. Copies the data from the IOBuffer to a string, leaving the +IOBuffer in an unusable (erroneous) state. +This assumes the IOBuffer is writable and seekable" +function unsafe_takestring!(io::IOBuffer) + off = io.offset + nbytes = io.size - off + iszero(nbytes) && return "" + mem = StringMemory(nbytes) + unsafe_copyto!(mem, 1, io.data, off+1, nbytes) + return takestring!(mem) +end + +""" + takestring!(io::IOBuffer) -> String + +Return the content of `io` as a `String`, emptying the buffer. +This is preferred over calling `String(take!(io))` to create a string from +an `IOBuffer`. + +# Examples +```jldoctest +julia> io = IOBuffer(); + +julia> write(io, 0x61); write(io, 0x62); write(io, 0x63); + +julia> s = takestring!(io) +"abc" + +julia> isempty(take!(io)) # io is now empty +true +``` +""" +function takestring!(io::IOBuffer) + s = unsafe_takestring!(io) + + # Put the IO in a well-defined state after emptying its underlying memory + ismarked(io) && unmark(io) + if io.writable + io.reinit = true + io.ptr = 1 + io.size = 0 + io.offset = 0 + end + return s +end + """ _unsafe_take!(io::IOBuffer) From 9e14b924344c47e5aa78929f2398c5e615acf753 Mon Sep 17 00:00:00 2001 From: Jakob Nybo Nissen Date: Fri, 10 May 2024 18:45:54 +0200 Subject: [PATCH 07/35] Use (unsafe_)takestring! throughout Base --- base/compiler/ssair/show.jl | 2 +- base/indices.jl | 2 +- base/int.jl | 2 +- base/io.jl | 12 ++++++------ base/iobuffer.jl | 4 ++-- base/iostream.jl | 4 ++-- base/pkgid.jl | 2 +- base/show.jl | 8 ++++---- base/stat.jl | 2 +- base/strings/annotated.jl | 4 ++-- base/strings/io.jl | 10 +++++----- base/strings/unicode.jl | 2 +- base/strings/util.jl | 4 ++-- base/task.jl | 2 +- base/util.jl | 4 ++-- 15 files changed, 32 insertions(+), 32 deletions(-) diff --git a/base/compiler/ssair/show.jl b/base/compiler/ssair/show.jl index 7d936a1688aba..53799e980462b 100644 --- a/base/compiler/ssair/show.jl +++ b/base/compiler/ssair/show.jl @@ -312,7 +312,7 @@ function compute_ir_line_annotations(code::IRCode) loc_method = string(" "^printing_depth, loc_method) last_stack = stack end - push!(loc_annotations, String(take!(buf))) + push!(loc_annotations, takestring!(buf)) push!(loc_lineno, (lineno != 0 && lineno != last_lineno) ? string(lineno) : "") push!(loc_methods, loc_method) (lineno != 0) && (last_lineno = lineno) diff --git a/base/indices.jl b/base/indices.jl index 455bb0f7656a1..fbcf6b924a3a9 100644 --- a/base/indices.jl +++ b/base/indices.jl @@ -132,7 +132,7 @@ function throw_promote_shape_mismatch(a::Tuple, b::Union{Nothing,Tuple}, i = not if i ≢ nothing print(msg, ", mismatch at dim ", i) end - throw(DimensionMismatch(String(take!(msg)))) + throw(DimensionMismatch(unsafe_takestring!(msg))) end function promote_shape(a::Tuple{Int,}, b::Tuple{Int,}) diff --git a/base/int.jl b/base/int.jl index a25b17e2cc958..581a875af4253 100644 --- a/base/int.jl +++ b/base/int.jl @@ -718,7 +718,7 @@ macro big_str(s) is_prev_dot = (c == '.') end print(bf, s[end]) - s = String(take!(bf)) + s = unsafe_takestring!(bf) end n = tryparse(BigInt, s) n === nothing || return n diff --git a/base/io.jl b/base/io.jl index 83a215d6359fc..ebe4b4036264d 100644 --- a/base/io.jl +++ b/base/io.jl @@ -277,13 +277,13 @@ julia> io = IOBuffer(); julia> write(io, "JuliaLang is a GitHub organization.", " It has many members.") 56 -julia> String(take!(io)) +julia> takestring!(io) "JuliaLang is a GitHub organization. It has many members." julia> write(io, "Sometimes those members") + write(io, " write documentation.") 44 -julia> String(take!(io)) +julia> takestring!(io) "Sometimes those members write documentation." ``` User-defined plain-data types without `write` methods can be written when wrapped in a `Ref`: @@ -566,10 +566,10 @@ Similar to [`readuntil`](@ref), which returns a `String`; in contrast, ```jldoctest julia> write("my_file.txt", "JuliaLang is a GitHub organization.\\nIt has many members.\\n"); -julia> String(take!(copyuntil(IOBuffer(), "my_file.txt", 'L'))) +julia> takestring!(copyuntil(IOBuffer(), "my_file.txt", 'L')) "Julia" -julia> String(take!(copyuntil(IOBuffer(), "my_file.txt", '.', keep = true))) +julia> takestring!(copyuntil(IOBuffer(), "my_file.txt", '.', keep = true)) "JuliaLang is a GitHub organization." julia> rm("my_file.txt") @@ -642,10 +642,10 @@ See also [`copyuntil`](@ref) for reading until more general delimiters. ```jldoctest julia> write("my_file.txt", "JuliaLang is a GitHub organization.\\nIt has many members.\\n"); -julia> String(take!(copyline(IOBuffer(), "my_file.txt"))) +julia> takestring!(copyline(IOBuffer(), "my_file.txt")) "JuliaLang is a GitHub organization." -julia> String(take!(copyline(IOBuffer(), "my_file.txt", keep=true))) +julia> takestring!(copyline(IOBuffer(), "my_file.txt", keep=true)) "JuliaLang is a GitHub organization.\\n" julia> rm("my_file.txt") diff --git a/base/iobuffer.jl b/base/iobuffer.jl index 8e1c6b5879788..aba892a4bea63 100644 --- a/base/iobuffer.jl +++ b/base/iobuffer.jl @@ -66,7 +66,7 @@ julia> io = IOBuffer(); julia> write(io, "JuliaLang is a GitHub organization.", " It has many members.") 56 -julia> String(take!(io)) +julia> takestring!(io) "JuliaLang is a GitHub organization. It has many members." julia> io = IOBuffer(b"JuliaLang is a GitHub organization.") @@ -84,7 +84,7 @@ IOBuffer(data=UInt8[...], readable=true, writable=true, seekable=true, append=fa julia> write(io, "JuliaLang is a GitHub organization.") 34 -julia> String(take!(io)) +julia> takestring!(io) "JuliaLang is a GitHub organization" julia> length(read(IOBuffer(b"data", read=true, truncate=false))) diff --git a/base/iostream.jl b/base/iostream.jl index 762f881cfbecb..4f1f7d94593be 100644 --- a/base/iostream.jl +++ b/base/iostream.jl @@ -98,7 +98,7 @@ julia> write(io, "JuliaLang is a GitHub organization.") julia> truncate(io, 15) IOBuffer(data=UInt8[...], readable=true, writable=true, seekable=true, append=false, size=15, maxsize=Inf, ptr=16, mark=-1) -julia> String(take!(io)) +julia> takestring!(io) "JuliaLang is a " julia> io = IOBuffer(); @@ -107,7 +107,7 @@ julia> write(io, "JuliaLang is a GitHub organization."); julia> truncate(io, 40); -julia> String(take!(io)) +julia> takestring!(io) "JuliaLang is a GitHub organization.\\0\\0\\0\\0\\0" ``` """ diff --git a/base/pkgid.jl b/base/pkgid.jl index 8c776d79a69cb..9878c0b0a8a40 100644 --- a/base/pkgid.jl +++ b/base/pkgid.jl @@ -32,7 +32,7 @@ function binpack(pkg::PkgId) uuid = pkg.uuid write(io, uuid === nothing ? UInt128(0) : UInt128(uuid)) write(io, pkg.name) - return String(take!(io)) + return unsafe_takestring!(io) end function binunpack(s::String) diff --git a/base/show.jl b/base/show.jl index 0a2976e7ebe42..91ee9d5d5a04b 100644 --- a/base/show.jl +++ b/base/show.jl @@ -375,12 +375,12 @@ julia> io = IOBuffer(); julia> printstyled(IOContext(io, :color => true), "string", color=:red) -julia> String(take!(io)) +julia> takestring!(io) "\\e[31mstring\\e[39m" julia> printstyled(io, "string", color=:red) -julia> String(take!(io)) +julia> takestring!(io) "string" ``` @@ -2718,7 +2718,7 @@ function type_depth_limit(str::String, n::Int; maxdepth = nothing) end prev = di end - return String(take!(output)) + return unsafe_takestring!(output) end function print_type_bicolor(io, type; kwargs...) @@ -3150,7 +3150,7 @@ summary(io::IO, x) = print(io, typeof(x)) function summary(x) io = IOBuffer() summary(io, x) - String(take!(io)) + unsafe_takestring!(io) end ## `summary` for AbstractArrays diff --git a/base/stat.jl b/base/stat.jl index 506b5644dccbc..48967bbae0035 100644 --- a/base/stat.jl +++ b/base/stat.jl @@ -301,7 +301,7 @@ function filemode_string(mode) end complete && write(str, "-") end - return String(take!(str)) + return unsafe_takestring!(str) end """ diff --git a/base/strings/annotated.jl b/base/strings/annotated.jl index be4c6887d4a6d..57f80e15b7a47 100644 --- a/base/strings/annotated.jl +++ b/base/strings/annotated.jl @@ -274,7 +274,7 @@ function annotatedstring(xs...) print(s, x) end end - str = String(take!(buf)) + str = unsafe_takestring!(buf) AnnotatedString(str, annotations) end @@ -457,7 +457,7 @@ function annotated_chartransform(f::Function, str::AnnotatedString, state=nothin stop_offset = last(offsets[findlast(<=(stop) ∘ first, offsets)::Int]) push!(annots, ((start + start_offset):(stop + stop_offset), value)) end - AnnotatedString(String(take!(outstr)), annots) + AnnotatedString(unsafe_takestring!(outstr), annots) end ## AnnotatedIOBuffer diff --git a/base/strings/io.jl b/base/strings/io.jl index acbd945c8e137..4d39fdb9e8a71 100644 --- a/base/strings/io.jl +++ b/base/strings/io.jl @@ -25,7 +25,7 @@ julia> io = IOBuffer(); julia> print(io, "Hello", ' ', :World!) -julia> String(take!(io)) +julia> takestring!(io) "Hello World!" ``` """ @@ -68,7 +68,7 @@ julia> io = IOBuffer(); julia> println(io, "Hello", ',', " world.") -julia> String(take!(io)) +julia> takestring!(io) "Hello, world.\\n" ``` """ @@ -298,10 +298,10 @@ Create a read-only `IOBuffer` on the data underlying the given string. ```jldoctest julia> io = IOBuffer("Haho"); -julia> String(take!(io)) +julia> takestring!(io) "Haho" -julia> String(take!(io)) +julia> takestring!(io) "Haho" ``` """ @@ -777,7 +777,7 @@ function unindent(str::AbstractString, indent::Int; tabwidth=8) print(buf, ' ') end end - String(take!(buf)) + unsafe_takestring!(buf) end function String(a::AbstractVector{Char}) diff --git a/base/strings/unicode.jl b/base/strings/unicode.jl index ad047514c85a6..68e4b3f09d1da 100644 --- a/base/strings/unicode.jl +++ b/base/strings/unicode.jl @@ -689,7 +689,7 @@ function titlecase(s::AbstractString; wordsep::Function = !isletter, strict::Boo end c0 = c end - return String(take!(b)) + return unsafe_takestring!(b) end # TODO: improve performance characteristics, room for a ~10x improvement. diff --git a/base/strings/util.jl b/base/strings/util.jl index c6f853a54c940..58a87a111de34 100644 --- a/base/strings/util.jl +++ b/base/strings/util.jl @@ -1028,7 +1028,7 @@ function _replace_(str, pat_repl::NTuple{N, Pair}, count::Int) where N return String(str) end out = IOBuffer(sizehint=floor(Int, 1.2sizeof(str))) - return String(take!(_replace_finish(out, str, count, e1, patterns, replaces, rs))) + return takestring!(_replace_finish(out, str, count, e1, patterns, replaces, rs)) end """ @@ -1260,5 +1260,5 @@ function Base.rest(s::AbstractString, st...) for c in Iterators.rest(s, st...) print(io, c) end - return String(take!(io)) + return unsafe_takestring!(io) end diff --git a/base/task.jl b/base/task.jl index 6cb1ff785eeee..f7439418f2762 100644 --- a/base/task.jl +++ b/base/task.jl @@ -95,7 +95,7 @@ function show_task_exception(io::IO, t::Task; indent = true) else show_exception_stack(IOContext(b, io), stack) end - str = String(take!(b)) + str = unsafe_takestring!(b) if indent str = replace(str, "\n" => "\n ") end diff --git a/base/util.jl b/base/util.jl index 95d62c4a16e1d..e606e092ef959 100644 --- a/base/util.jl +++ b/base/util.jl @@ -77,7 +77,7 @@ function with_output_color(@nospecialize(f::Function), color::Union{Int, Symbol} iscolor = get(io, :color, false)::Bool try f(IOContext(buf, io), args...) finally - str = String(take!(buf)) + str = takestring!(buf) if !iscolor print(io, str) else @@ -109,7 +109,7 @@ function with_output_color(@nospecialize(f::Function), color::Union{Int, Symbol} isempty(line) && continue print(buf, enable_ansi, line, disable_ansi) end - print(io, String(take!(buf))) + print(io, takestring!(buf)) end end end From 55f8d2307373e780dec80d173fa458902b64cd9a Mon Sep 17 00:00:00 2001 From: Jakob Nybo Nissen Date: Fri, 10 May 2024 19:45:30 +0200 Subject: [PATCH 08/35] Fixup String(::Vector) impl --- base/strings/string.jl | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/base/strings/string.jl b/base/strings/string.jl index 40cf0415deaa4..b38733a898c96 100644 --- a/base/strings/string.jl +++ b/base/strings/string.jl @@ -64,15 +64,12 @@ to guarantee consistent behavior. String(v::AbstractVector{UInt8}) = takestring!(copyto!(StringMemory(length(v)), v)) function String(v::Vector{UInt8}) - #return ccall(:jl_array_to_string, Ref{String}, (Any,), v) len = length(v) len == 0 && return "" - ref = v.ref - if ref.ptr_or_offset == ref.mem.ptr - str = ccall(:jl_genericmemory_to_string, Ref{String}, (Any, Int), ref.mem, len) - else - str = ccall(:jl_pchar_to_string, Ref{String}, (Ptr{UInt8}, Int), ref, len) - end + # This method copies the content of the Memory such that the underlying + # Memory is unchanged, but then empties the Vector and re-assigns a new + # empty memory to the string. + str = String(StringMemory(len)) # optimized empty!(v); sizehint!(v, 0) calls setfield!(v, :size, (0,)) setfield!(v, :ref, memoryref(Memory{UInt8}())) @@ -100,6 +97,7 @@ function takestring! end function takestring!(v::Memory{UInt8}) len = length(v) len == 0 && return "" + # This function will truncate the memory to zero length return ccall(:jl_genericmemory_to_string, Ref{String}, (Any, Int), v, len) end From 46d0357054c6465a0c95721db570cb12b71a4767 Mon Sep 17 00:00:00 2001 From: Jakob Nybo Nissen Date: Fri, 10 May 2024 19:50:16 +0200 Subject: [PATCH 09/35] Replace uses of String(_unsafe_take!(x)) --- base/io.jl | 9 ++++----- base/iostream.jl | 2 +- base/strings/basic.jl | 2 +- base/strings/io.jl | 6 +++--- 4 files changed, 9 insertions(+), 10 deletions(-) diff --git a/base/io.jl b/base/io.jl index ebe4b4036264d..03d9ac9614616 100644 --- a/base/io.jl +++ b/base/io.jl @@ -544,7 +544,7 @@ julia> rm("my_file.txt") """ readuntil(filename::AbstractString, delim; kw...) = open(io->readuntil(io, delim; kw...), convert(String, filename)::String) readuntil(stream::IO, delim::UInt8; kw...) = _unsafe_take!(copyuntil(IOBuffer(sizehint=16), stream, delim; kw...)) -readuntil(stream::IO, delim::Union{AbstractChar, AbstractString}; kw...) = String(_unsafe_take!(copyuntil(IOBuffer(sizehint=16), stream, delim; kw...))) +readuntil(stream::IO, delim::Union{AbstractChar, AbstractString}; kw...) = unsafe_takestring!(copyuntil(IOBuffer(sizehint=16), stream, delim; kw...)) readuntil(stream::IO, delim::T; keep::Bool=false) where T = _copyuntil(Vector{T}(), stream, delim, keep) @@ -616,8 +616,7 @@ Logan """ readline(filename::AbstractString; keep::Bool=false) = open(io -> readline(io; keep), filename) -readline(s::IO=stdin; keep::Bool=false) = - String(_unsafe_take!(copyline(IOBuffer(sizehint=16), s; keep))) +readline(s::IO=stdin; keep::Bool=false) = unsafe_takestring!(copyline(IOBuffer(sizehint=16), s; keep)) """ copyline(out::IO, io::IO=stdin; keep::Bool=false) @@ -1291,7 +1290,7 @@ function iterate(r::Iterators.Reverse{<:EachLine}, state) buf.size = _stripnewline(r.itr.keep, buf.size, buf.data) empty!(chunks) # will cause next iteration to terminate seekend(r.itr.stream) # reposition to end of stream for isdone - s = String(_unsafe_take!(buf)) + s = unsafe_takestring!(buf) else # extract the string from chunks[ichunk][inewline+1] to chunks[jchunk][jnewline] if ichunk == jchunk # common case: current and previous newline in same chunk @@ -1308,7 +1307,7 @@ function iterate(r::Iterators.Reverse{<:EachLine}, state) end write(buf, view(chunks[jchunk], 1:jnewline)) buf.size = _stripnewline(r.itr.keep, buf.size, buf.data) - s = String(_unsafe_take!(buf)) + s = unsafe_takestring!(buf) # overwrite obsolete chunks (ichunk+1:jchunk) i = jchunk diff --git a/base/iostream.jl b/base/iostream.jl index 4f1f7d94593be..ff035ba384256 100644 --- a/base/iostream.jl +++ b/base/iostream.jl @@ -456,7 +456,7 @@ function readuntil_string(s::IOStream, delim::UInt8, keep::Bool) end readuntil(s::IOStream, delim::AbstractChar; keep::Bool=false) = isascii(delim) ? readuntil_string(s, delim % UInt8, keep) : - String(_unsafe_take!(copyuntil(IOBuffer(sizehint=70), s, delim; keep))) + unsafe_takestring!(copyuntil(IOBuffer(sizehint=70), s, delim; keep)) function readline(s::IOStream; keep::Bool=false) @_lock_ios s ccall(:jl_readuntil, Ref{String}, (Ptr{Cvoid}, UInt8, UInt8, UInt8), s.ios, '\n', 1, keep ? 0 : 2) diff --git a/base/strings/basic.jl b/base/strings/basic.jl index bf11199143c1e..2710fc926c9a1 100644 --- a/base/strings/basic.jl +++ b/base/strings/basic.jl @@ -681,7 +681,7 @@ function filter(f, s::AbstractString) for c in s f(c) && write(out, c) end - String(_unsafe_take!(out)) + unsafe_takestring!(out) end ## string first and last ## diff --git a/base/strings/io.jl b/base/strings/io.jl index 4d39fdb9e8a71..72eb14d76f996 100644 --- a/base/strings/io.jl +++ b/base/strings/io.jl @@ -113,7 +113,7 @@ function sprint(f::Function, args...; context=nothing, sizehint::Integer=0) else f(s, args...) end - String(_unsafe_take!(s)) + unsafe_takestring!(s) end function _str_sizehint(x) @@ -147,7 +147,7 @@ function print_to_string(xs...) for x in xs print(s, x) end - String(_unsafe_take!(s)) + unsafe_takestring!(s) end function string_with_env(env, xs...) @@ -164,7 +164,7 @@ function string_with_env(env, xs...) for x in xs print(env_io, x) end - String(_unsafe_take!(s)) + unsafe_takestring!(s) end """ From 2f3691291cacd639e1052b3e75f3c945754e4049 Mon Sep 17 00:00:00 2001 From: Jakob Nybo Nissen Date: Fri, 10 May 2024 20:02:28 +0200 Subject: [PATCH 10/35] Fixup IOBuffer --- base/iobuffer.jl | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/base/iobuffer.jl b/base/iobuffer.jl index aba892a4bea63..c88e68abe709e 100644 --- a/base/iobuffer.jl +++ b/base/iobuffer.jl @@ -522,7 +522,12 @@ true ``` """ function takestring!(io::IOBuffer) - s = unsafe_takestring!(io) + nbytes = filesize(io) + # If the memory is invalidated (and hence unused), or no bytes, return empty string + (io.reinit || iszero(nbytes)) && return "" + mem = StringMemory(nbytes) + start = io.seekable ? io.offset + 1 : io.ptr + unsafe_copyto!(mem, 1, io.data, start, nbytes) # Put the IO in a well-defined state after emptying its underlying memory ismarked(io) && unmark(io) From d7174bf9da290c38d0f97e0a2cb57409c7b388c3 Mon Sep 17 00:00:00 2001 From: Jakob Nybo Nissen Date: Fri, 10 May 2024 20:22:16 +0200 Subject: [PATCH 11/35] Try fixing takestring of IOBuffer --- base/iobuffer.jl | 24 +++++++++++++----------- 1 file changed, 13 insertions(+), 11 deletions(-) diff --git a/base/iobuffer.jl b/base/iobuffer.jl index c88e68abe709e..23c312dfbf6ec 100644 --- a/base/iobuffer.jl +++ b/base/iobuffer.jl @@ -523,19 +523,21 @@ true """ function takestring!(io::IOBuffer) nbytes = filesize(io) + # If the memory is invalidated (and hence unused), or no bytes, return empty string - (io.reinit || iszero(nbytes)) && return "" - mem = StringMemory(nbytes) - start = io.seekable ? io.offset + 1 : io.ptr - unsafe_copyto!(mem, 1, io.data, start, nbytes) + s = if iszero(nbytes) || io.reinit + "" + else + mem = StringMemory(nbytes) + start = io.seekable ? io.offset + 1 : io.ptr + unsafe_copyto!(mem, 1, io.data, start, nbytes) + end - # Put the IO in a well-defined state after emptying its underlying memory - ismarked(io) && unmark(io) - if io.writable - io.reinit = true - io.ptr = 1 - io.size = 0 - io.offset = 0 + # Empty the IOBuffer, resetting it. + io.mark = -1 + io.ptr = 1 + io.size = 0 + io.offset = 0 end return s end From 94485e6e3a93c63520f1061a797f0948c83c57d4 Mon Sep 17 00:00:00 2001 From: Jakob Nybo Nissen Date: Fri, 10 May 2024 20:46:49 +0200 Subject: [PATCH 12/35] Fix IOBuffer again --- base/iobuffer.jl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/base/iobuffer.jl b/base/iobuffer.jl index 23c312dfbf6ec..ee063f1c68c68 100644 --- a/base/iobuffer.jl +++ b/base/iobuffer.jl @@ -525,12 +525,13 @@ function takestring!(io::IOBuffer) nbytes = filesize(io) # If the memory is invalidated (and hence unused), or no bytes, return empty string - s = if iszero(nbytes) || io.reinit + s = if (iszero(nbytes) || io.reinit) "" else mem = StringMemory(nbytes) start = io.seekable ? io.offset + 1 : io.ptr unsafe_copyto!(mem, 1, io.data, start, nbytes) + takestring!(mem) end # Empty the IOBuffer, resetting it. @@ -538,7 +539,6 @@ function takestring!(io::IOBuffer) io.ptr = 1 io.size = 0 io.offset = 0 - end return s end From a28c45d39fcd3f1d39964577608c585e1010683c Mon Sep 17 00:00:00 2001 From: Jakob Nybo Nissen Date: Fri, 10 May 2024 21:08:24 +0200 Subject: [PATCH 13/35] Fix string from vector --- base/strings/string.jl | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/base/strings/string.jl b/base/strings/string.jl index b38733a898c96..2ed368bba3175 100644 --- a/base/strings/string.jl +++ b/base/strings/string.jl @@ -69,7 +69,9 @@ function String(v::Vector{UInt8}) # This method copies the content of the Memory such that the underlying # Memory is unchanged, but then empties the Vector and re-assigns a new # empty memory to the string. - str = String(StringMemory(len)) + mem = StringMemory(len) + GC.@preserve mem v unsafe_copyto!(pointer(mem), pointer(v), len) + str = takestring!(mem) # optimized empty!(v); sizehint!(v, 0) calls setfield!(v, :size, (0,)) setfield!(v, :ref, memoryref(Memory{UInt8}())) From ba78f8381771ab50e49cd53bedb8f144f74b6d42 Mon Sep 17 00:00:00 2001 From: Jakob Nybo Nissen Date: Fri, 10 May 2024 21:26:06 +0200 Subject: [PATCH 14/35] Imports --- base/strings/unicode.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/base/strings/unicode.jl b/base/strings/unicode.jl index 68e4b3f09d1da..ad903ecfb793d 100644 --- a/base/strings/unicode.jl +++ b/base/strings/unicode.jl @@ -6,7 +6,7 @@ module Unicode import Base: show, ==, hash, string, Symbol, isless, length, eltype, convert, isvalid, ismalformed, isoverlong, iterate, AnnotatedString, AnnotatedChar, annotated_chartransform, - @assume_effects, annotations + @assume_effects, annotations, unsafe_takestring! # whether codepoints are valid Unicode scalar values, i.e. 0-0xd7ff, 0xe000-0x10ffff From ac5753b32f525639b1d5ab9c2c4e8adaa2cb6959 Mon Sep 17 00:00:00 2001 From: Jakob Nybo Nissen Date: Fri, 10 May 2024 21:43:42 +0200 Subject: [PATCH 15/35] Remove takestring!(::Memory) This commit builds upon #54438, re-using the old String(::Vector{UInt8}) impl, which now no longer truncates. Also remove takestring!(::Memory) - that function probably should not be defined --- base/strings/string.jl | 27 +++++++-------------------- 1 file changed, 7 insertions(+), 20 deletions(-) diff --git a/base/strings/string.jl b/base/strings/string.jl index 2ed368bba3175..ea1078ce75b58 100644 --- a/base/strings/string.jl +++ b/base/strings/string.jl @@ -66,12 +66,12 @@ String(v::AbstractVector{UInt8}) = takestring!(copyto!(StringMemory(length(v)), function String(v::Vector{UInt8}) len = length(v) len == 0 && return "" - # This method copies the content of the Memory such that the underlying - # Memory is unchanged, but then empties the Vector and re-assigns a new - # empty memory to the string. - mem = StringMemory(len) - GC.@preserve mem v unsafe_copyto!(pointer(mem), pointer(v), len) - str = takestring!(mem) + ref = v.ref + if ref.ptr_or_offset == ref.mem.ptr + str = ccall(:jl_genericmemory_to_string, Ref{String}, (Any, Int), ref.mem, len) + else + str = ccall(:jl_pchar_to_string, Ref{String}, (Ptr{UInt8}, Int), ref, len) + end # optimized empty!(v); sizehint!(v, 0) calls setfield!(v, :size, (0,)) setfield!(v, :ref, memoryref(Memory{UInt8}())) @@ -94,20 +94,7 @@ julia> isempty(v) true ``` """ -function takestring! end - -function takestring!(v::Memory{UInt8}) - len = length(v) - len == 0 && return "" - # This function will truncate the memory to zero length - return ccall(:jl_genericmemory_to_string, Ref{String}, (Any, Int), v, len) -end - -# Note: Currently, this constructor truncates for Vector, but not for AbstractVector. -# See issue 32528 -function takestring!(v::Vector{UInt8}) - String(v) -end +takestring!(v::Vector{UInt8}) = String(v) """ unsafe_string(p::Ptr{UInt8}, [length::Integer]) From 4f838cddd2e15b45aaa3a37c562b53c5f262d7c9 Mon Sep 17 00:00:00 2001 From: Jakob Nybo Nissen Date: Fri, 10 May 2024 21:51:48 +0200 Subject: [PATCH 16/35] Fold changes from #54438 into this PR --- src/genericmemory.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/src/genericmemory.c b/src/genericmemory.c index ea52fca66ba48..4a64b571ac367 100644 --- a/src/genericmemory.c +++ b/src/genericmemory.c @@ -193,7 +193,6 @@ JL_DLLEXPORT jl_value_t *jl_genericmemory_to_string(jl_genericmemory_t *m, size_ } int how = jl_genericmemory_how(m); size_t mlength = m->length; - m->length = 0; if (how != 0) { jl_value_t *o = jl_genericmemory_data_owner_field(m); jl_genericmemory_data_owner_field(m) = NULL; @@ -208,8 +207,6 @@ JL_DLLEXPORT jl_value_t *jl_genericmemory_to_string(jl_genericmemory_t *m, size_ JL_GC_PUSH1(&o); jl_value_t *str = jl_pchar_to_string((const char*)m->ptr, len); JL_GC_POP(); - if (how == 1) // TODO: we might like to early-call jl_gc_free_memory here instead actually, but hopefully `m` will die soon - jl_gc_count_freed(mlength); return str; } // n.b. how == 0 is always pool-allocated, so the freed bytes are computed from the pool not the object From 1ef6b590740f4ca944e720ca1d82527f7a22dcea Mon Sep 17 00:00:00 2001 From: Jakob Nybo Nissen Date: Fri, 10 May 2024 22:36:44 +0200 Subject: [PATCH 17/35] Update NEWS.md Co-authored-by: Steven G. Johnson --- NEWS.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/NEWS.md b/NEWS.md index f9e7756f25fd4..37edaa843a17e 100644 --- a/NEWS.md +++ b/NEWS.md @@ -103,7 +103,7 @@ New library features * `RegexMatch` objects can now be used to construct `NamedTuple`s and `Dict`s ([#50988]) * `Lockable` is now exported ([#54595]) * New `ltruncate`, `rtruncate` and `ctruncate` functions for truncating strings to text width, accounting for char widths ([#55351]) -* `takestring!(v)` creates a `String` from `v`, truncating `v` and reusing its memory if possible. +* `takestring!(v)` creates a `String` from `v` (a `Vector{UInt8}` or `IOBuffer`), truncating `v` and reusing its memory if possible )([#54372]). Standard library changes ------------------------ From f3046c08b2a423de34f1a6e86d9c3ddc6d779521 Mon Sep 17 00:00:00 2001 From: Jakob Nybo Nissen Date: Sun, 12 May 2024 13:59:03 +0200 Subject: [PATCH 18/35] Define internal unsafe_takestring And use it unstead of takestring!(::Memory) --- base/gmp.jl | 2 +- base/int.jl | 2 +- base/intfuncs.jl | 12 ++++++------ base/iobuffer.jl | 11 +++++++++-- base/strings/string.jl | 10 ++++++++-- base/strings/util.jl | 2 +- base/uuid.jl | 2 +- stdlib/FileWatching/src/pidfile.jl | 2 +- test/iobuffer.jl | 11 +++++++++++ test/strings/basic.jl | 18 ++++++++++++++++++ 10 files changed, 57 insertions(+), 15 deletions(-) diff --git a/base/gmp.jl b/base/gmp.jl index 0c6de5355de62..25ba3901fe639 100644 --- a/base/gmp.jl +++ b/base/gmp.jl @@ -761,7 +761,7 @@ function string(n::BigInt; base::Integer = 10, pad::Integer = 1) sv[i] = '0' % UInt8 end isneg(n) && (sv[1] = '-' % UInt8) - takestring!(sv) + unsafe_takestring(sv) end function digits!(a::AbstractVector{T}, n::BigInt; base::Integer = 10) where {T<:Integer} diff --git a/base/int.jl b/base/int.jl index 581a875af4253..a22728033abb8 100644 --- a/base/int.jl +++ b/base/int.jl @@ -718,7 +718,7 @@ macro big_str(s) is_prev_dot = (c == '.') end print(bf, s[end]) - s = unsafe_takestring!(bf) + s = unsafe_takestring(bf) end n = tryparse(BigInt, s) n === nothing || return n diff --git a/base/intfuncs.jl b/base/intfuncs.jl index 23e243a699276..1515c2f763d7e 100644 --- a/base/intfuncs.jl +++ b/base/intfuncs.jl @@ -766,7 +766,7 @@ function bin(x::Unsigned, pad::Int, neg::Bool) i -= 1 end neg && (@inbounds a[1] = 0x2d) # UInt8('-') - takestring!(a) + unsafe_takestring(a) end function oct(x::Unsigned, pad::Int, neg::Bool) @@ -780,7 +780,7 @@ function oct(x::Unsigned, pad::Int, neg::Bool) i -= 1 end neg && (@inbounds a[1] = 0x2d) # UInt8('-') - takestring!(a) + unsafe_takestring(a) end # 2-digit decimal characters ("00":"99") @@ -850,7 +850,7 @@ function dec(x::Unsigned, pad::Int, neg::Bool) a = StringMemory(n) append_c_digits_fast(n, x, a, 1) neg && (@inbounds a[1] = 0x2d) # UInt8('-') - takestring!(a) + unsafe_takestring(a) end function hex(x::Unsigned, pad::Int, neg::Bool) @@ -871,7 +871,7 @@ function hex(x::Unsigned, pad::Int, neg::Bool) @inbounds a[i] = d + ifelse(d > 0x9, 0x57, 0x30) end neg && (@inbounds a[1] = 0x2d) # UInt8('-') - takestring!(a) + unsafe_takestring(a) end const base36digits = UInt8['0':'9';'a':'z'] @@ -896,7 +896,7 @@ function _base(base::Integer, x::Integer, pad::Int, neg::Bool) i -= 1 end neg && (@inbounds a[1] = 0x2d) # UInt8('-') - takestring!(a) + unsafe_takestring(a) end split_sign(n::Integer) = unsigned(abs(n)), n < 0 @@ -972,7 +972,7 @@ function bitstring(x::T) where {T} x = lshr_int(x, 4) i -= 4 end - return takestring!(str) + return unsafe_takestring(str) end """ diff --git a/base/iobuffer.jl b/base/iobuffer.jl index ee063f1c68c68..ea3c2417ab790 100644 --- a/base/iobuffer.jl +++ b/base/iobuffer.jl @@ -498,7 +498,7 @@ function unsafe_takestring!(io::IOBuffer) iszero(nbytes) && return "" mem = StringMemory(nbytes) unsafe_copyto!(mem, 1, io.data, off+1, nbytes) - return takestring!(mem) + return unsafe_takestring(mem) end """ @@ -531,7 +531,13 @@ function takestring!(io::IOBuffer) mem = StringMemory(nbytes) start = io.seekable ? io.offset + 1 : io.ptr unsafe_copyto!(mem, 1, io.data, start, nbytes) - takestring!(mem) + + # We can use unsafe_takestring here, because the IOBuffer + # gets its memory reallocated on next use due to setting + # io.reinit = true, so this can only cause UB if the user + # holds a reference to the internal .data field and mutates + # it. + unsafe_takestring(mem) end # Empty the IOBuffer, resetting it. @@ -539,6 +545,7 @@ function takestring!(io::IOBuffer) io.ptr = 1 io.size = 0 io.offset = 0 + io.reinit = true return s end diff --git a/base/strings/string.jl b/base/strings/string.jl index ea1078ce75b58..270716f4e37a3 100644 --- a/base/strings/string.jl +++ b/base/strings/string.jl @@ -61,7 +61,7 @@ by [`take!`](@ref) on a writable [`IOBuffer`](@ref) and by calls to In other cases, `Vector{UInt8}` data may be copied, but `v` is truncated anyway to guarantee consistent behavior. """ -String(v::AbstractVector{UInt8}) = takestring!(copyto!(StringMemory(length(v)), v)) +String(v::AbstractVector{UInt8}) = unsafe_takestring(copyto!(StringMemory(length(v)), v)) function String(v::Vector{UInt8}) len = length(v) @@ -85,7 +85,7 @@ Create a string from the content of `x`, emptying `x`. # Examples ```jldoctest -julia> v = Memory{UInt8}([0x61, 0x62, 0x63]); +julia> v = [0x61, 0x62, 0x63]; julia> s = takestring!(v) "abc" @@ -96,6 +96,12 @@ true """ takestring!(v::Vector{UInt8}) = String(v) +"Create a string re-using the memory, if possible. +Mutating the memory after calling this function is undefined behaviour." +function unsafe_takestring(m::Memory{UInt8}) + ccall(:jl_genericmemory_to_string, Ref{String}, (Any, Int), m, length(m)) +end + """ unsafe_string(p::Ptr{UInt8}, [length::Integer]) diff --git a/base/strings/util.jl b/base/strings/util.jl index 58a87a111de34..5848d80d2cbd1 100644 --- a/base/strings/util.jl +++ b/base/strings/util.jl @@ -1214,7 +1214,7 @@ function bytes2hex(itr) b[2i - 1] = hex_chars[1 + x >> 4] b[2i ] = hex_chars[1 + x & 0xf] end - return takestring!(b) + return unsafe_takestring(b) end function bytes2hex(io::IO, itr) diff --git a/base/uuid.jl b/base/uuid.jl index d7a8f51a3c9d2..217c0b55adf7d 100644 --- a/base/uuid.jl +++ b/base/uuid.jl @@ -96,7 +96,7 @@ let groupings = [36:-1:25; 23:-1:20; 18:-1:15; 13:-1:10; 8:-1:1] u >>= 4 end @inbounds a[24] = a[19] = a[14] = a[9] = '-' - return takestring!(a) + return unsafe_takestring(a) end end diff --git a/stdlib/FileWatching/src/pidfile.jl b/stdlib/FileWatching/src/pidfile.jl index 9fecf7d415fa7..f9085d67308e5 100644 --- a/stdlib/FileWatching/src/pidfile.jl +++ b/stdlib/FileWatching/src/pidfile.jl @@ -285,7 +285,7 @@ function _rand_filename(len::Int=4) # modified from Base.Libc for i = 1:len slug[i] = chars[(Libc.rand() % length(chars)) + 1] end - return takestring!(slug) + return unsafe_takestring(slug) end function tryrmopenfile(path::String) diff --git a/test/iobuffer.jl b/test/iobuffer.jl index b5b34a2dbed8c..89245878e25bd 100644 --- a/test/iobuffer.jl +++ b/test/iobuffer.jl @@ -54,6 +54,17 @@ bufcontents(io::Base.GenericIOBuffer) = unsafe_string(pointer(io.data), io.size) @test_throws ArgumentError seek(io, 0) end +@testset "takestring!" begin + buf = IOBuffer() + write(buf, "abcø") + s = takestring!(buf) + @test isempty(takestring!(buf)) + @test s == "abcø" + write(buf, "xyz") + @test takestring!(buf) == "xyz" + buf = IOBuffer() +end + @testset "Read/write readonly IOBuffer" begin io = IOBuffer("hamster\nguinea pig\nturtle") @test position(io) == 0 diff --git a/test/strings/basic.jl b/test/strings/basic.jl index 874607f3c1b20..f081d87a32d3e 100644 --- a/test/strings/basic.jl +++ b/test/strings/basic.jl @@ -49,6 +49,24 @@ using Random end end +@testset "takestring!" begin + v = [0x61, 0x62, 0x63] + old_mem = v.ref.mem + @test takestring!(v) == "abc" + @test isempty(v) + @test v.ref.mem !== old_mem # memory is changed + for v in [ + UInt8[], + [0x01, 0x02, 0x03], + collect(codeunits("æøå")) + ] + cp = copy(v) + s = takestring!(v) + @test isempty(v) + @test codeunits(s) == cp + end +end + @testset "{starts,ends}with" begin @test startswith("abcd", 'a') @test startswith('a')("abcd") From 10475772b8526b98f0668b042c69bae0fabc634e Mon Sep 17 00:00:00 2001 From: Jakob Nybo Nissen Date: Sun, 12 May 2024 18:26:22 +0200 Subject: [PATCH 19/35] Fix imports --- base/gmp.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/base/gmp.jl b/base/gmp.jl index 25ba3901fe639..df0d9fee49348 100644 --- a/base/gmp.jl +++ b/base/gmp.jl @@ -11,7 +11,7 @@ import .Base: *, +, -, /, <, <<, >>, >>>, <=, ==, >, >=, ^, (~), (&), (|), xor, bin, oct, dec, hex, isequal, invmod, _prevpow2, _nextpow2, ndigits0zpb, widen, signed, unsafe_trunc, trunc, iszero, isone, big, flipsign, signbit, sign, hastypemax, isodd, iseven, digits!, hash, hash_integer, top_set_bit, - clamp + clamp, unsafe_takestring if Clong == Int32 const ClongMax = Union{Int8, Int16, Int32} From 513038e64536e20da9961c6074551eea827fd0a3 Mon Sep 17 00:00:00 2001 From: Jakob Nybo Nissen Date: Sun, 12 May 2024 18:39:35 +0200 Subject: [PATCH 20/35] More uses of takestring --- base/errorshow.jl | 4 ++-- base/logging/ConsoleLogger.jl | 2 +- base/show.jl | 2 +- base/toml_parser.jl | 2 +- doc/src/devdocs/functions.md | 2 +- doc/src/manual/getting-started.md | 2 +- stdlib/Base64/src/encode.jl | 4 ++-- stdlib/InteractiveUtils/src/InteractiveUtils.jl | 2 +- stdlib/LibGit2/src/utils.jl | 2 +- stdlib/Markdown/src/Common/block.jl | 8 ++++---- stdlib/Markdown/src/parse/parse.jl | 4 ++-- stdlib/REPL/src/LineEdit.jl | 10 +++++----- stdlib/REPL/src/REPL.jl | 4 ++-- 13 files changed, 24 insertions(+), 24 deletions(-) diff --git a/base/errorshow.jl b/base/errorshow.jl index a3bf464439d44..1454193a11393 100644 --- a/base/errorshow.jl +++ b/base/errorshow.jl @@ -305,7 +305,7 @@ function showerror(io::IO, ex::MethodError) iob = IOContext(buf, io) # for type abbreviation as in #49795; some, like `convert(T, x)`, should not abbreviate show_signature_function(iob, Core.Typeof(f)) show_tuple_as_call(iob, :function, arg_types; hasfirst=false, kwargs = isempty(kwargs) ? nothing : kwargs) - str = String(take!(buf)) + str = takestring!(buf) str = type_limited_string_from_context(io, str) print(io, str) end @@ -596,7 +596,7 @@ function show_method_candidates(io::IO, ex::MethodError, kwargs=[]) m = parentmodule_before_main(method) modulecolor = get!(() -> popfirst!(STACKTRACE_MODULECOLORS), STACKTRACE_FIXEDCOLORS, m) print_module_path_file(iob, m, string(file), line; modulecolor, digit_align_width = 3) - push!(lines, String(take!(buf))) + push!(lines, takestring!(buf)) push!(line_score, -(right_matches * 2 + (length(arg_types_param) < 2 ? 1 : 0))) end end diff --git a/base/logging/ConsoleLogger.jl b/base/logging/ConsoleLogger.jl index c4596dd86c3f5..c0a98822b52d4 100644 --- a/base/logging/ConsoleLogger.jl +++ b/base/logging/ConsoleLogger.jl @@ -141,7 +141,7 @@ function handle_message(logger::ConsoleLogger, level::LogLevel, message, _module for (key, val) in kwargs key === :maxlog && continue showvalue(valio, val) - vallines = split(String(take!(valbuf)), '\n') + vallines = split(takestring!(valbuf), '\n') if length(vallines) == 1 push!(msglines, (indent=2, msg=SubString("$key = $(vallines[1])"))) else diff --git a/base/show.jl b/base/show.jl index 91ee9d5d5a04b..def83e4eaf14e 100644 --- a/base/show.jl +++ b/base/show.jl @@ -2609,7 +2609,7 @@ function show_tuple_as_call(out::IO, name::Symbol, sig::Type; end print_within_stacktrace(io, ")", bold=true) show_method_params(io, tv) - str = String(take!(buf)) + str = takestring!(buf) str = type_limited_string_from_context(out, str) print(out, str) nothing diff --git a/base/toml_parser.jl b/base/toml_parser.jl index 4d07cfed05d8a..7dac4af022eca 100644 --- a/base/toml_parser.jl +++ b/base/toml_parser.jl @@ -315,7 +315,7 @@ function point_to_line(str::AbstractString, a::Int, b::Int, context) c == '\n' && break print(io1, c) end - return String(take!(io1.io)), String(take!(io2.io)) + return takestring!(io1.io), takestring!(io2.io) end function Base.showerror(io::IO, err::ParserError) diff --git a/doc/src/devdocs/functions.md b/doc/src/devdocs/functions.md index 777afaa56348d..fb67dfd17c3e2 100644 --- a/doc/src/devdocs/functions.md +++ b/doc/src/devdocs/functions.md @@ -117,7 +117,7 @@ function lines(words) n += length(w)+1 end end - String(take!(io)) + takestring!(io) end import Markdown [string(n) for n in names(Core;all=true) diff --git a/doc/src/manual/getting-started.md b/doc/src/manual/getting-started.md index 2c69aabbda192..edec633245021 100644 --- a/doc/src/manual/getting-started.md +++ b/doc/src/manual/getting-started.md @@ -13,7 +13,7 @@ known as a read-eval-print loop or "REPL") by double-clicking the Julia executab using REPL io = IOBuffer() REPL.banner(io) -banner = String(take!(io)) +banner = takestring!(io) import Markdown Markdown.parse("```\n\$ julia\n\n$(banner)\njulia> 1 + 2\n3\n\njulia> ans\n3\n```") ``` diff --git a/stdlib/Base64/src/encode.jl b/stdlib/Base64/src/encode.jl index 588b49aa28d97..958bd8ce061fc 100644 --- a/stdlib/Base64/src/encode.jl +++ b/stdlib/Base64/src/encode.jl @@ -24,7 +24,7 @@ julia> write(iob64_encode, "Hello!") julia> close(iob64_encode); -julia> str = String(take!(io)) +julia> str = takestring!(io) "SGVsbG8h" julia> String(base64decode(str)) @@ -211,6 +211,6 @@ function base64encode(f::Function, args...; context=nothing) f(IOContext(b, context), args...) end close(b) - return String(take!(s)) + return unsafe_takestring!(s) end base64encode(args...; context=nothing) = base64encode(write, args...; context=context) diff --git a/stdlib/InteractiveUtils/src/InteractiveUtils.jl b/stdlib/InteractiveUtils/src/InteractiveUtils.jl index 835988ddf149f..e7d10266677c6 100644 --- a/stdlib/InteractiveUtils/src/InteractiveUtils.jl +++ b/stdlib/InteractiveUtils/src/InteractiveUtils.jl @@ -147,7 +147,7 @@ function versioninfo(io::IO=stdout; verbose::Bool=false) if verbose cpuio = IOBuffer() # print cpu_summary with correct alignment Sys.cpu_summary(cpuio) - for (i, line) in enumerate(split(chomp(String(take!(cpuio))), "\n")) + for (i, line) in enumerate(split(chomp(takestring!(cpuio)), "\n")) prefix = i == 1 ? " CPU: " : " " println(io, prefix, line) end diff --git a/stdlib/LibGit2/src/utils.jl b/stdlib/LibGit2/src/utils.jl index f62663a6ea4ca..a1edce7ab403a 100644 --- a/stdlib/LibGit2/src/utils.jl +++ b/stdlib/LibGit2/src/utils.jl @@ -162,7 +162,7 @@ function git_url(; end seekstart(io) - return String(take!(io)) + return takestring!(io) end function credential_identifier(scheme::AbstractString, host::AbstractString) diff --git a/stdlib/Markdown/src/Common/block.jl b/stdlib/Markdown/src/Common/block.jl index 1b5cc1a752bcb..d68db483f999d 100644 --- a/stdlib/Markdown/src/Common/block.jl +++ b/stdlib/Markdown/src/Common/block.jl @@ -114,7 +114,7 @@ function indentcode(stream::IO, block::MD) break end end - code = String(take!(buffer)) + code = takestring!(buffer) !isempty(code) && (push!(block, Code(rstrip(code))); return true) return false end @@ -178,7 +178,7 @@ function blockquote(stream::IO, block::MD) end empty && return false - md = String(take!(buffer)) + md = takestring!(buffer) push!(block, BlockQuote(parse(md, flavor = config(block)).content)) return true end @@ -236,7 +236,7 @@ function admonition(stream::IO, block::MD) end end # Parse the nested block as markdown and create a new Admonition block. - nested = parse(String(take!(buffer)), flavor = config(block)) + nested = parse(takestring!(buffer), flavor = config(block)) push!(block, Admonition(category, title, nested.content)) return true end @@ -326,7 +326,7 @@ function list(stream::IO, block::MD) return true end end -pushitem!(list, buffer) = push!(list.items, parse(String(take!(buffer))).content) +pushitem!(list, buffer) = push!(list.items, parse(takestring!(buffer)).content) # –––––––––––––– # HorizontalRule diff --git a/stdlib/Markdown/src/parse/parse.jl b/stdlib/Markdown/src/parse/parse.jl index 389099b2984f6..371d52cfaf53f 100644 --- a/stdlib/Markdown/src/parse/parse.jl +++ b/stdlib/Markdown/src/parse/parse.jl @@ -65,7 +65,7 @@ function parseinline(stream::IO, md::MD, config::Config) char = peek(stream, Char) if haskey(config.inner, char) && (inner = parseinline(stream, md, config.inner[char])) !== nothing - c = String(take!(buffer)) + c = takestring!(buffer) !isempty(c) && push!(content, c) buffer = IOBuffer() push!(content, inner) @@ -73,7 +73,7 @@ function parseinline(stream::IO, md::MD, config::Config) write(buffer, read(stream, Char)) end end - c = String(take!(buffer)) + c = takestring!(buffer) !isempty(c) && push!(content, c) return content end diff --git a/stdlib/REPL/src/LineEdit.jl b/stdlib/REPL/src/LineEdit.jl index 5af03e0df9b6d..464fdab5a8337 100644 --- a/stdlib/REPL/src/LineEdit.jl +++ b/stdlib/REPL/src/LineEdit.jl @@ -166,7 +166,7 @@ region_active(s::PromptState) = s.region_active region_active(s::ModeState) = :off -input_string(s::PromptState) = String(take!(copy(s.input_buffer))) +input_string(s::PromptState) = takestring!(copy(s.input_buffer)) input_string_newlines(s::PromptState) = count(c->(c == '\n'), input_string(s)) function input_string_newlines_aftercursor(s::PromptState) @@ -1443,7 +1443,7 @@ function edit_input(s, f = (filename, line, column) -> InteractiveUtils.edit(fil end buf = buffer(s) pos = position(buf) - str = String(take!(buf)) + str = takestring!(buf) lines = readlines(IOBuffer(str); keep=true) # Compute line @@ -1672,7 +1672,7 @@ function normalize_key(key::Union{String,SubString{String}}) write(buf, c) end end - return String(take!(buf)) + return takestring!(buf) end function normalize_keys(keymap::Union{Dict{Char,Any},AnyDict}) @@ -2012,7 +2012,7 @@ function history_set_backward(s::SearchState, backward::Bool) nothing end -input_string(s::SearchState) = String(take!(copy(s.query_buffer))) +input_string(s::SearchState) = takestring!(copy(s.query_buffer)) function reset_state(s::SearchState) if s.query_buffer.size != 0 @@ -2100,7 +2100,7 @@ function refresh_multi_line(termbuf::TerminalBuffer, terminal::UnixTerminal, return ias end -input_string(s::PrefixSearchState) = String(take!(copy(s.response_buffer))) +input_string(s::PrefixSearchState) = takestring!(copy(s.response_buffer)) write_prompt(terminal, s::PrefixSearchState, color::Bool) = write_prompt(terminal, s.histprompt.parent_prompt, color) prompt_string(s::PrefixSearchState) = prompt_string(s.histprompt.parent_prompt.prompt) diff --git a/stdlib/REPL/src/REPL.jl b/stdlib/REPL/src/REPL.jl index ddf2f55d0b9f7..5c62935488937 100644 --- a/stdlib/REPL/src/REPL.jl +++ b/stdlib/REPL/src/REPL.jl @@ -880,7 +880,7 @@ function hist_from_file(hp::REPLHistoryProvider, path::String) end function add_history(hist::REPLHistoryProvider, s::PromptState) - str = rstrip(String(take!(copy(s.input_buffer)))) + str = rstrip(takestring!(copy(s.input_buffer))) isempty(strip(str)) && return mode = mode_idx(hist, LineEdit.mode(s)) !isempty(hist.history) && @@ -1007,7 +1007,7 @@ function history_move_prefix(s::LineEdit.PrefixSearchState, prefix::AbstractString, backwards::Bool, cur_idx::Int = hist.cur_idx) - cur_response = String(take!(copy(LineEdit.buffer(s)))) + cur_response = takestring!(copy(LineEdit.buffer(s))) # when searching forward, start at last_idx if !backwards && hist.last_idx > 0 cur_idx = hist.last_idx From ec6e1289322b793cd1f653f879692b7999fb43ec Mon Sep 17 00:00:00 2001 From: Jakob Nybo Nissen Date: Mon, 13 May 2024 08:14:04 +0200 Subject: [PATCH 21/35] Add imports --- stdlib/Base64/src/Base64.jl | 2 +- stdlib/FileWatching/src/FileWatching.jl | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/stdlib/Base64/src/Base64.jl b/stdlib/Base64/src/Base64.jl index f1fef096888ed..311ff68ce3c9a 100644 --- a/stdlib/Base64/src/Base64.jl +++ b/stdlib/Base64/src/Base64.jl @@ -8,7 +8,7 @@ a method to represent binary data using text, common on the web. """ module Base64 -using Base: require_one_based_indexing +using Base: require_one_based_indexing, unsafe_takestring! export Base64EncodePipe, diff --git a/stdlib/FileWatching/src/FileWatching.jl b/stdlib/FileWatching/src/FileWatching.jl index 0c987ad01c828..27fc7231bcd2a 100644 --- a/stdlib/FileWatching/src/FileWatching.jl +++ b/stdlib/FileWatching/src/FileWatching.jl @@ -25,7 +25,7 @@ import Base: @handle_as, wait, close, eventloop, notify_error, IOError, _sizeof_uv_poll, _sizeof_uv_fs_poll, _sizeof_uv_fs_event, _uv_hook_close, uv_error, _UVError, iolock_begin, iolock_end, associate_julia_struct, disassociate_julia_struct, preserve_handle, unpreserve_handle, isreadable, iswritable, isopen, - |, getproperty, propertynames + |, getproperty, propertynames, unsafe_takestring import Base.Filesystem.StatStruct if Sys.iswindows() import Base.WindowsRawSocket From 1d6715035ae1b45d9ff6155cfec4546707a4132d Mon Sep 17 00:00:00 2001 From: Jakob Nybo Nissen Date: Mon, 13 May 2024 08:19:52 +0200 Subject: [PATCH 22/35] More uses --- stdlib/Markdown/src/GitHub/GitHub.jl | 4 ++-- stdlib/Markdown/src/Markdown.jl | 2 +- stdlib/Markdown/src/parse/util.jl | 4 ++-- stdlib/REPL/src/REPL.jl | 6 +++--- 4 files changed, 8 insertions(+), 8 deletions(-) diff --git a/stdlib/Markdown/src/GitHub/GitHub.jl b/stdlib/Markdown/src/GitHub/GitHub.jl index 61807d267511d..f53aaaa00e2aa 100644 --- a/stdlib/Markdown/src/GitHub/GitHub.jl +++ b/stdlib/Markdown/src/GitHub/GitHub.jl @@ -21,9 +21,9 @@ function fencedcode(stream::IO, block::MD) if startswith(stream, string(ch) ^ n) if !startswith(stream, string(ch)) if flavor == "math" - push!(block, LaTeX(String(take!(buffer)) |> chomp)) + push!(block, LaTeX(takestring!(buffer) |> chomp)) else - push!(block, Code(flavor, String(take!(buffer)) |> chomp)) + push!(block, Code(flavor, takestring!(buffer) |> chomp)) end return true else diff --git a/stdlib/Markdown/src/Markdown.jl b/stdlib/Markdown/src/Markdown.jl index b9ff56297fe51..2503c1c01049b 100644 --- a/stdlib/Markdown/src/Markdown.jl +++ b/stdlib/Markdown/src/Markdown.jl @@ -9,7 +9,7 @@ literals `md"..."` and `doc"..."`. """ module Markdown -import Base: AnnotatedString, AnnotatedIOBuffer, show, ==, with_output_color, mapany +import Base: AnnotatedString, AnnotatedIOBuffer, show, ==, with_output_color, mapany, unsafe_takestring! using Base64: stringmime using StyledStrings: StyledStrings, Face, addface!, @styled_str, styled diff --git a/stdlib/Markdown/src/parse/util.jl b/stdlib/Markdown/src/parse/util.jl index aabfcbb3ddc62..6394351110656 100644 --- a/stdlib/Markdown/src/parse/util.jl +++ b/stdlib/Markdown/src/parse/util.jl @@ -141,7 +141,7 @@ function readuntil(stream::IO, delimiter; newlines = false, match = nothing) while !eof(stream) if startswith(stream, delimiter) if count == 0 - return String(take!(buffer)) + return unsafe_takestring!(buffer) else count -= 1 write(buffer, delimiter) @@ -187,7 +187,7 @@ function parse_inline_wrapper(stream::IO, delimiter::AbstractString; rep = false if !(char in whitespace || char == '\n' || char in delimiter) && startswith(stream, delimiter^n) trailing = 0 while startswith(stream, delimiter); trailing += 1; end - trailing == 0 && return String(take!(buffer)) + trailing == 0 && return takestring!(buffer) write(buffer, delimiter ^ (n + trailing)) end end diff --git a/stdlib/REPL/src/REPL.jl b/stdlib/REPL/src/REPL.jl index 5c62935488937..7e84b83509041 100644 --- a/stdlib/REPL/src/REPL.jl +++ b/stdlib/REPL/src/REPL.jl @@ -1049,7 +1049,7 @@ function history_search(hist::REPLHistoryProvider, query_buffer::IOBuffer, respo qpos = position(query_buffer) qpos > 0 || return true searchdata = beforecursor(query_buffer) - response_str = String(take!(copy(response_buffer))) + response_str = takestring!(copy(response_buffer)) # Alright, first try to see if the current match still works a = position(response_buffer) + 1 # position is zero-indexed @@ -1106,7 +1106,7 @@ end LineEdit.reset_state(hist::REPLHistoryProvider) = history_reset_state(hist) function return_callback(s) - ast = Base.parse_input_line(String(take!(copy(LineEdit.buffer(s)))), depwarn=false) + ast = Base.parse_input_line(takestring!(copy(LineEdit.buffer(s))), depwarn=false) return !(isa(ast, Expr) && ast.head === :incomplete) end @@ -1668,7 +1668,7 @@ let matchend = Dict("\"" => r"\"", "\"\"\"" => r"\"\"\"", "'" => r"'", pos = nextind(code, last(j)) end print(buf, SubString(code, pos, lastindex(code))) - return String(take!(buf)) + return takestring!(buf) end end From 1a498f459607dc32c8e1bd26a2daf197558ba066 Mon Sep 17 00:00:00 2001 From: Jakob Nybo Nissen Date: Mon, 13 May 2024 08:36:47 +0200 Subject: [PATCH 23/35] More imports --- stdlib/FileWatching/src/pidfile.jl | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/stdlib/FileWatching/src/pidfile.jl b/stdlib/FileWatching/src/pidfile.jl index f9085d67308e5..3a8b9430f4f68 100644 --- a/stdlib/FileWatching/src/pidfile.jl +++ b/stdlib/FileWatching/src/pidfile.jl @@ -5,7 +5,8 @@ export mkpidlock, trymkpidlock using Base: IOError, UV_EEXIST, UV_ESRCH, - Process + Process, + unsafe_takestring using Base.Filesystem: File, open, JL_O_CREAT, JL_O_RDWR, JL_O_RDONLY, JL_O_EXCL, From 7c7ce9c34485b1744eb95c4e9acaecc4494a0a27 Mon Sep 17 00:00:00 2001 From: Jakob Nybo Nissen Date: Mon, 13 May 2024 09:03:06 +0200 Subject: [PATCH 24/35] Fix typo --- base/int.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/base/int.jl b/base/int.jl index a22728033abb8..581a875af4253 100644 --- a/base/int.jl +++ b/base/int.jl @@ -718,7 +718,7 @@ macro big_str(s) is_prev_dot = (c == '.') end print(bf, s[end]) - s = unsafe_takestring(bf) + s = unsafe_takestring!(bf) end n = tryparse(BigInt, s) n === nothing || return n From 0b9aff57b0c6a36b9bc2f909274e6e8a2212d3f9 Mon Sep 17 00:00:00 2001 From: Jakob Nybo Nissen Date: Mon, 13 May 2024 10:46:59 +0200 Subject: [PATCH 25/35] Imports --- base/filesystem.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/base/filesystem.jl b/base/filesystem.jl index bc1f4942877e8..9a5256f2d6478 100644 --- a/base/filesystem.jl +++ b/base/filesystem.jl @@ -141,7 +141,7 @@ import .Base: bytesavailable, position, read, read!, readavailable, seek, seekend, show, skip, stat, unsafe_read, unsafe_write, write, transcode, uv_error, setup_stdio, rawhandle, OS_HANDLE, INVALID_OS_HANDLE, windowserror, filesize, - isexecutable, isreadable, iswritable, MutableDenseArrayType + isexecutable, isreadable, iswritable, MutableDenseArrayType, unsafe_takestring! import .Base.RefValue From 8fc3cfd5e3fba580475b9053b57ad3309b18ff6a Mon Sep 17 00:00:00 2001 From: Jakob Nybo Nissen Date: Mon, 13 May 2024 11:22:21 +0200 Subject: [PATCH 26/35] Don't reset non-writable streams --- base/iobuffer.jl | 24 ++++++++++-------------- 1 file changed, 10 insertions(+), 14 deletions(-) diff --git a/base/iobuffer.jl b/base/iobuffer.jl index ea3c2417ab790..9a2b73100299a 100644 --- a/base/iobuffer.jl +++ b/base/iobuffer.jl @@ -504,7 +504,8 @@ end """ takestring!(io::IOBuffer) -> String -Return the content of `io` as a `String`, emptying the buffer. +Return the content of `io` as a `String`, resetting the buffer to its initial +state. This is preferred over calling `String(take!(io))` to create a string from an `IOBuffer`. @@ -512,7 +513,7 @@ an `IOBuffer`. ```jldoctest julia> io = IOBuffer(); -julia> write(io, 0x61); write(io, 0x62); write(io, 0x63); +julia> write(io, [0x61, 0x62, 0x63]); julia> s = takestring!(io) "abc" @@ -531,21 +532,16 @@ function takestring!(io::IOBuffer) mem = StringMemory(nbytes) start = io.seekable ? io.offset + 1 : io.ptr unsafe_copyto!(mem, 1, io.data, start, nbytes) - - # We can use unsafe_takestring here, because the IOBuffer - # gets its memory reallocated on next use due to setting - # io.reinit = true, so this can only cause UB if the user - # holds a reference to the internal .data field and mutates - # it. unsafe_takestring(mem) end - # Empty the IOBuffer, resetting it. - io.mark = -1 - io.ptr = 1 - io.size = 0 - io.offset = 0 - io.reinit = true + # Empty the IOBuffer, resetting it, if the buffer is writable + if io.writable + io.mark = -1 + io.ptr = 1 + io.size = 0 + io.offset = 0 + end return s end From 9948ec579893b8b6db23d7721a70a100dda90fb4 Mon Sep 17 00:00:00 2001 From: Jakob Nybo Nissen Date: Sat, 18 May 2024 14:07:53 +0200 Subject: [PATCH 27/35] Do not copy string when possible --- base/iobuffer.jl | 57 +++++++++++++++++++++++++++++++++--------------- 1 file changed, 39 insertions(+), 18 deletions(-) diff --git a/base/iobuffer.jl b/base/iobuffer.jl index 9a2b73100299a..48791ba22f114 100644 --- a/base/iobuffer.jl +++ b/base/iobuffer.jl @@ -489,16 +489,25 @@ function take!(io::IOBuffer) return data end -"Internal method. Copies the data from the IOBuffer to a string, leaving the -IOBuffer in an unusable (erroneous) state. -This assumes the IOBuffer is writable and seekable" +"Internal method. This method can be faster than takestring!, because it assumes +the buffer is writable and seekable, and because it does not reset the buffer +to a usable state. +Using the buffer after calling unsafe_takestring! may cause illegal behaviour. +This function is meant to be used when the buffer is only used as a temporary +string builder, which is discarded after the string is built." function unsafe_takestring!(io::IOBuffer) off = io.offset nbytes = io.size - off iszero(nbytes) && return "" - mem = StringMemory(nbytes) - unsafe_copyto!(mem, 1, io.data, off+1, nbytes) - return unsafe_takestring(mem) + # The C function can only copy from the start of the memory. + # Fortunately, in most cases, the offset will be zero. + return if iszero(off) + ccall(:jl_genericmemory_to_string, Ref{String}, (Any, Int), io.data, nbytes) + else + mem = StringMemory(nbytes) + unsafe_copyto!(mem, 1, io.data, off + 1, nbytes) + unsafe_takestring!(mem) + end end """ @@ -526,23 +535,35 @@ function takestring!(io::IOBuffer) nbytes = filesize(io) # If the memory is invalidated (and hence unused), or no bytes, return empty string - s = if (iszero(nbytes) || io.reinit) + return if (iszero(nbytes) || io.reinit) "" else - mem = StringMemory(nbytes) start = io.seekable ? io.offset + 1 : io.ptr - unsafe_copyto!(mem, 1, io.data, start, nbytes) - unsafe_takestring(mem) - end + # The C function can only copy from the start of the memory. + # Fortunately, in most cases, start will be 1. + s = if isone(start) + ccall(:jl_genericmemory_to_string, Ref{String}, (Any, Int), io.data, nbytes) + else + mem = StringMemory(nbytes) + unsafe_copyto!(mem, 1, io.data, start, nbytes) + unsafe_takestring!(mem) + end - # Empty the IOBuffer, resetting it, if the buffer is writable - if io.writable - io.mark = -1 - io.ptr = 1 - io.size = 0 - io.offset = 0 + # Empty the IOBuffer, resetting it, if the buffer is writable. + # By setting io.reinit and setting its size to zero, we ensure the memory + # is written. + # If the buffer is not writable, the assumption is that the memory will + # never be mutated, and as such, it's okay that a string shares memory + # with this object. + if io.writable + io.reinit = true + io.mark = -1 + io.ptr = 1 + io.size = 0 + io.offset = 0 + end + s end - return s end """ From 176506c4708cb68c07259f826a749a79c96635ba Mon Sep 17 00:00:00 2001 From: Jakob Nybo Nissen Date: Mon, 27 May 2024 10:02:33 +0200 Subject: [PATCH 28/35] Add 1.12 compat --- base/iobuffer.jl | 3 +++ 1 file changed, 3 insertions(+) diff --git a/base/iobuffer.jl b/base/iobuffer.jl index 48791ba22f114..b4e87a88d3e02 100644 --- a/base/iobuffer.jl +++ b/base/iobuffer.jl @@ -530,6 +530,9 @@ julia> s = takestring!(io) julia> isempty(take!(io)) # io is now empty true ``` + +!!! compat "Julia 1.12" + This function requires at least Julia 1.12. """ function takestring!(io::IOBuffer) nbytes = filesize(io) From 1689258e0c8e09f569c9b891d703ee3c5c4981c3 Mon Sep 17 00:00:00 2001 From: Jakob Nybo Nissen Date: Tue, 23 Jul 2024 10:39:07 +0200 Subject: [PATCH 29/35] Try using unsafe_takestring internally in takestring --- base/iobuffer.jl | 53 +++++++++++++++++------------------------------- 1 file changed, 19 insertions(+), 34 deletions(-) diff --git a/base/iobuffer.jl b/base/iobuffer.jl index b4e87a88d3e02..896900687839a 100644 --- a/base/iobuffer.jl +++ b/base/iobuffer.jl @@ -490,9 +490,8 @@ function take!(io::IOBuffer) end "Internal method. This method can be faster than takestring!, because it assumes -the buffer is writable and seekable, and because it does not reset the buffer -to a usable state. -Using the buffer after calling unsafe_takestring! may cause illegal behaviour. +the buffer is seekable, and because it does not reset the buffer to a usable state. +Using the buffer after calling unsafe_takestring! may cause undefined behaviour. This function is meant to be used when the buffer is only used as a temporary string builder, which is discarded after the string is built." function unsafe_takestring!(io::IOBuffer) @@ -535,38 +534,24 @@ true This function requires at least Julia 1.12. """ function takestring!(io::IOBuffer) - nbytes = filesize(io) - - # If the memory is invalidated (and hence unused), or no bytes, return empty string - return if (iszero(nbytes) || io.reinit) - "" - else - start = io.seekable ? io.offset + 1 : io.ptr - # The C function can only copy from the start of the memory. - # Fortunately, in most cases, start will be 1. - s = if isone(start) - ccall(:jl_genericmemory_to_string, Ref{String}, (Any, Int), io.data, nbytes) - else - mem = StringMemory(nbytes) - unsafe_copyto!(mem, 1, io.data, start, nbytes) - unsafe_takestring!(mem) - end - - # Empty the IOBuffer, resetting it, if the buffer is writable. - # By setting io.reinit and setting its size to zero, we ensure the memory - # is written. - # If the buffer is not writable, the assumption is that the memory will - # never be mutated, and as such, it's okay that a string shares memory - # with this object. - if io.writable - io.reinit = true - io.mark = -1 - io.ptr = 1 - io.size = 0 - io.offset = 0 - end - s + # Note: Currently, `IOBuffer`s (as opposed to `GenericIOBuffer`) is always seekable, since all + # IOBuffer constructors return seekable buffers, and GenericIOBuffer is internal. + # This makes `unsafe_takestring!` valid. + s = unsafe_takestring!(io) + + # Restore the buffer to a usable state, making it no longer undefined behaviour to + # use the buffer after the `unsafe_takestring!` call. + # Note that if the buffer is not writable, there is no need to reinitialize the buffer, + # since it doesn't matter that the returned string looks into the same memory - + # this is because the buffer is not mutable through either the string nor the buffer. + if io.writable + io.reinit = true + io.mark = -1 + io.ptr = 1 + io.size = 0 + io.offset = 0 end + s end """ From 998c9f66e6004e58d962a32b80cf880dcb35df39 Mon Sep 17 00:00:00 2001 From: Jakob Nybo Nissen Date: Tue, 23 Jul 2024 10:46:40 +0200 Subject: [PATCH 30/35] Check for init in takestring --- base/iobuffer.jl | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/base/iobuffer.jl b/base/iobuffer.jl index 896900687839a..5769893207638 100644 --- a/base/iobuffer.jl +++ b/base/iobuffer.jl @@ -534,7 +534,11 @@ true This function requires at least Julia 1.12. """ function takestring!(io::IOBuffer) - # Note: Currently, `IOBuffer`s (as opposed to `GenericIOBuffer`) is always seekable, since all + # If the buffer has been used up and needs to be replaced, there are no bytes, and + # we can return an empty string without interacting with the buffer at all. + io.reinit && return "" + + # Currently, `IOBuffer`s (as opposed to `GenericIOBuffer`) is always seekable, since all # IOBuffer constructors return seekable buffers, and GenericIOBuffer is internal. # This makes `unsafe_takestring!` valid. s = unsafe_takestring!(io) From c9bfe0956643d4a19e977de0a69adf6b3fbdb381 Mon Sep 17 00:00:00 2001 From: Jakob Nybo Nissen Date: Wed, 24 Jul 2024 10:27:58 +0200 Subject: [PATCH 31/35] Address reviewer comments * Do not assume io.seekable in unsafe_takestring!(::IOBuffer) * Add fallback definitions unsafe_takestring!(::GenericIOBuffer) and takestring!(::GenericIOBuffer) * Test takestring! with a nonzero offset in buffer --- base/iobuffer.jl | 21 +++++++++++---------- test/iobuffer.jl | 9 +++++++++ 2 files changed, 20 insertions(+), 10 deletions(-) diff --git a/base/iobuffer.jl b/base/iobuffer.jl index 5769893207638..d715b4e45b2bb 100644 --- a/base/iobuffer.jl +++ b/base/iobuffer.jl @@ -489,23 +489,23 @@ function take!(io::IOBuffer) return data end -"Internal method. This method can be faster than takestring!, because it assumes -the buffer is seekable, and because it does not reset the buffer to a usable state. +"Internal method. This method can be faster than takestring!, because it does not +reset the buffer to a usable state, and it does not check for io.reinit. Using the buffer after calling unsafe_takestring! may cause undefined behaviour. This function is meant to be used when the buffer is only used as a temporary string builder, which is discarded after the string is built." function unsafe_takestring!(io::IOBuffer) - off = io.offset - nbytes = io.size - off + start = io.seekable ? io.offset + 1 : io.ptr + nbytes = io.size - start + 1 iszero(nbytes) && return "" # The C function can only copy from the start of the memory. # Fortunately, in most cases, the offset will be zero. - return if iszero(off) + return if isone(start) ccall(:jl_genericmemory_to_string, Ref{String}, (Any, Int), io.data, nbytes) else mem = StringMemory(nbytes) - unsafe_copyto!(mem, 1, io.data, off + 1, nbytes) - unsafe_takestring!(mem) + unsafe_copyto!(mem, 1, io.data, start, nbytes) + unsafe_takestring(mem) end end @@ -538,9 +538,6 @@ function takestring!(io::IOBuffer) # we can return an empty string without interacting with the buffer at all. io.reinit && return "" - # Currently, `IOBuffer`s (as opposed to `GenericIOBuffer`) is always seekable, since all - # IOBuffer constructors return seekable buffers, and GenericIOBuffer is internal. - # This makes `unsafe_takestring!` valid. s = unsafe_takestring!(io) # Restore the buffer to a usable state, making it no longer undefined behaviour to @@ -558,6 +555,10 @@ function takestring!(io::IOBuffer) s end +# Fallback methods +unsafe_takestring!(io::GenericIOBuffer) = takestring!(io) +takestring!(io::GenericIOBuffer) = String(take!(io)) + """ _unsafe_take!(io::IOBuffer) diff --git a/test/iobuffer.jl b/test/iobuffer.jl index 89245878e25bd..843cc72b6b5a8 100644 --- a/test/iobuffer.jl +++ b/test/iobuffer.jl @@ -63,6 +63,15 @@ end write(buf, "xyz") @test takestring!(buf) == "xyz" buf = IOBuffer() + + # Test with a nonzero offset in the buffer + v = rand(UInt8, 8) + for i in 1:8 + pushfirst!(v, rand(UInt8)) + end + buf = IOBuffer(v) + s = String(copy(v)) + @test takestring!(buf) == s end @testset "Read/write readonly IOBuffer" begin From 67b4d9a146b07ac53afc105c97417bea6179a18b Mon Sep 17 00:00:00 2001 From: Jakob Nybo Nissen Date: Wed, 31 Jul 2024 11:09:20 +0200 Subject: [PATCH 32/35] Use unsafe version of takestring more rarely Specifically, do not use it when a value of an unknown type prints to the buffer that is being mutated, since any user may define printing of these values to store references to the buffer somewhere else. --- base/indices.jl | 2 +- base/io.jl | 10 ++++++++-- base/iostream.jl | 2 +- base/show.jl | 2 +- base/strings/annotated.jl | 4 ++-- base/strings/basic.jl | 2 +- base/strings/io.jl | 8 ++++---- base/strings/unicode.jl | 2 +- base/strings/util.jl | 2 +- base/task.jl | 2 +- stdlib/Base64/src/encode.jl | 2 +- stdlib/Markdown/src/parse/util.jl | 2 +- 12 files changed, 23 insertions(+), 17 deletions(-) diff --git a/base/indices.jl b/base/indices.jl index fbcf6b924a3a9..4253755d552a5 100644 --- a/base/indices.jl +++ b/base/indices.jl @@ -132,7 +132,7 @@ function throw_promote_shape_mismatch(a::Tuple, b::Union{Nothing,Tuple}, i = not if i ≢ nothing print(msg, ", mismatch at dim ", i) end - throw(DimensionMismatch(unsafe_takestring!(msg))) + throw(DimensionMismatch(takestring!(msg))) end function promote_shape(a::Tuple{Int,}, b::Tuple{Int,}) diff --git a/base/io.jl b/base/io.jl index 03d9ac9614616..529c5c5fd17b4 100644 --- a/base/io.jl +++ b/base/io.jl @@ -543,8 +543,8 @@ julia> rm("my_file.txt") ``` """ readuntil(filename::AbstractString, delim; kw...) = open(io->readuntil(io, delim; kw...), convert(String, filename)::String) -readuntil(stream::IO, delim::UInt8; kw...) = _unsafe_take!(copyuntil(IOBuffer(sizehint=16), stream, delim; kw...)) -readuntil(stream::IO, delim::Union{AbstractChar, AbstractString}; kw...) = unsafe_takestring!(copyuntil(IOBuffer(sizehint=16), stream, delim; kw...)) +readuntil(stream::IO, delim::UInt8; kw...) = takestring!(copyuntil(IOBuffer(sizehint=16), stream, delim; kw...)) +readuntil(stream::IO, delim::Union{AbstractChar, AbstractString}; kw...) = takestring!(copyuntil(IOBuffer(sizehint=16), stream, delim; kw...)) readuntil(stream::IO, delim::T; keep::Bool=false) where T = _copyuntil(Vector{T}(), stream, delim, keep) @@ -616,7 +616,13 @@ Logan """ readline(filename::AbstractString; keep::Bool=false) = open(io -> readline(io; keep), filename) +<<<<<<< HEAD readline(s::IO=stdin; keep::Bool=false) = unsafe_takestring!(copyline(IOBuffer(sizehint=16), s; keep)) +||||||| parent of 8a7667fc90 (Use unsafe version of takestring more rarely) +readline(s::IO=stdin; keep::Bool=false) = unsafe_takestring!(copyline(IOBuffer(sizehint=70), s; keep)) +======= +readline(s::IO=stdin; keep::Bool=false) = takestring!(copyline(IOBuffer(sizehint=70), s; keep)) +>>>>>>> 8a7667fc90 (Use unsafe version of takestring more rarely) """ copyline(out::IO, io::IO=stdin; keep::Bool=false) diff --git a/base/iostream.jl b/base/iostream.jl index ff035ba384256..767ef8ba882c2 100644 --- a/base/iostream.jl +++ b/base/iostream.jl @@ -456,7 +456,7 @@ function readuntil_string(s::IOStream, delim::UInt8, keep::Bool) end readuntil(s::IOStream, delim::AbstractChar; keep::Bool=false) = isascii(delim) ? readuntil_string(s, delim % UInt8, keep) : - unsafe_takestring!(copyuntil(IOBuffer(sizehint=70), s, delim; keep)) + takestring!(copyuntil(IOBuffer(sizehint=70), s, delim; keep)) function readline(s::IOStream; keep::Bool=false) @_lock_ios s ccall(:jl_readuntil, Ref{String}, (Ptr{Cvoid}, UInt8, UInt8, UInt8), s.ios, '\n', 1, keep ? 0 : 2) diff --git a/base/show.jl b/base/show.jl index def83e4eaf14e..d39fb687f5dd8 100644 --- a/base/show.jl +++ b/base/show.jl @@ -3150,7 +3150,7 @@ summary(io::IO, x) = print(io, typeof(x)) function summary(x) io = IOBuffer() summary(io, x) - unsafe_takestring!(io) + takestring!(io) end ## `summary` for AbstractArrays diff --git a/base/strings/annotated.jl b/base/strings/annotated.jl index 57f80e15b7a47..5bb32abde2b9a 100644 --- a/base/strings/annotated.jl +++ b/base/strings/annotated.jl @@ -274,7 +274,7 @@ function annotatedstring(xs...) print(s, x) end end - str = unsafe_takestring!(buf) + str = takestring!(buf) AnnotatedString(str, annotations) end @@ -457,7 +457,7 @@ function annotated_chartransform(f::Function, str::AnnotatedString, state=nothin stop_offset = last(offsets[findlast(<=(stop) ∘ first, offsets)::Int]) push!(annots, ((start + start_offset):(stop + stop_offset), value)) end - AnnotatedString(unsafe_takestring!(outstr), annots) + AnnotatedString(takestring!(outstr), annots) end ## AnnotatedIOBuffer diff --git a/base/strings/basic.jl b/base/strings/basic.jl index 2710fc926c9a1..927d86b79474a 100644 --- a/base/strings/basic.jl +++ b/base/strings/basic.jl @@ -681,7 +681,7 @@ function filter(f, s::AbstractString) for c in s f(c) && write(out, c) end - unsafe_takestring!(out) + takestring!(out) end ## string first and last ## diff --git a/base/strings/io.jl b/base/strings/io.jl index 72eb14d76f996..9b278122f5bc9 100644 --- a/base/strings/io.jl +++ b/base/strings/io.jl @@ -113,7 +113,7 @@ function sprint(f::Function, args...; context=nothing, sizehint::Integer=0) else f(s, args...) end - unsafe_takestring!(s) + takestring!(s) end function _str_sizehint(x) @@ -147,7 +147,7 @@ function print_to_string(xs...) for x in xs print(s, x) end - unsafe_takestring!(s) + takestring!(s) end function string_with_env(env, xs...) @@ -164,7 +164,7 @@ function string_with_env(env, xs...) for x in xs print(env_io, x) end - unsafe_takestring!(s) + takestring!(s) end """ @@ -777,7 +777,7 @@ function unindent(str::AbstractString, indent::Int; tabwidth=8) print(buf, ' ') end end - unsafe_takestring!(buf) + takestring!(buf) end function String(a::AbstractVector{Char}) diff --git a/base/strings/unicode.jl b/base/strings/unicode.jl index ad903ecfb793d..b2882b49d89b3 100644 --- a/base/strings/unicode.jl +++ b/base/strings/unicode.jl @@ -689,7 +689,7 @@ function titlecase(s::AbstractString; wordsep::Function = !isletter, strict::Boo end c0 = c end - return unsafe_takestring!(b) + return takestring!(b) end # TODO: improve performance characteristics, room for a ~10x improvement. diff --git a/base/strings/util.jl b/base/strings/util.jl index 5848d80d2cbd1..2d530f5e2124f 100644 --- a/base/strings/util.jl +++ b/base/strings/util.jl @@ -1260,5 +1260,5 @@ function Base.rest(s::AbstractString, st...) for c in Iterators.rest(s, st...) print(io, c) end - return unsafe_takestring!(io) + return takestring!(io) end diff --git a/base/task.jl b/base/task.jl index f7439418f2762..61cc103c70081 100644 --- a/base/task.jl +++ b/base/task.jl @@ -95,7 +95,7 @@ function show_task_exception(io::IO, t::Task; indent = true) else show_exception_stack(IOContext(b, io), stack) end - str = unsafe_takestring!(b) + str = takestring!(b) if indent str = replace(str, "\n" => "\n ") end diff --git a/stdlib/Base64/src/encode.jl b/stdlib/Base64/src/encode.jl index 958bd8ce061fc..d55421c52ff55 100644 --- a/stdlib/Base64/src/encode.jl +++ b/stdlib/Base64/src/encode.jl @@ -211,6 +211,6 @@ function base64encode(f::Function, args...; context=nothing) f(IOContext(b, context), args...) end close(b) - return unsafe_takestring!(s) + return takestring!(s) end base64encode(args...; context=nothing) = base64encode(write, args...; context=context) diff --git a/stdlib/Markdown/src/parse/util.jl b/stdlib/Markdown/src/parse/util.jl index 6394351110656..cd8158780bd6d 100644 --- a/stdlib/Markdown/src/parse/util.jl +++ b/stdlib/Markdown/src/parse/util.jl @@ -141,7 +141,7 @@ function readuntil(stream::IO, delimiter; newlines = false, match = nothing) while !eof(stream) if startswith(stream, delimiter) if count == 0 - return unsafe_takestring!(buffer) + return takestring!(buffer) else count -= 1 write(buffer, delimiter) From d7ff27611ca6472e8af6a64be492fd9266ffdc1f Mon Sep 17 00:00:00 2001 From: Jakob Nybo Nissen Date: Mon, 9 Sep 2024 13:50:02 +0200 Subject: [PATCH 33/35] Fix error when rebasing --- base/io.jl | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/base/io.jl b/base/io.jl index 529c5c5fd17b4..efda0fbcf6fae 100644 --- a/base/io.jl +++ b/base/io.jl @@ -616,13 +616,7 @@ Logan """ readline(filename::AbstractString; keep::Bool=false) = open(io -> readline(io; keep), filename) -<<<<<<< HEAD -readline(s::IO=stdin; keep::Bool=false) = unsafe_takestring!(copyline(IOBuffer(sizehint=16), s; keep)) -||||||| parent of 8a7667fc90 (Use unsafe version of takestring more rarely) -readline(s::IO=stdin; keep::Bool=false) = unsafe_takestring!(copyline(IOBuffer(sizehint=70), s; keep)) -======= -readline(s::IO=stdin; keep::Bool=false) = takestring!(copyline(IOBuffer(sizehint=70), s; keep)) ->>>>>>> 8a7667fc90 (Use unsafe version of takestring more rarely) +readline(s::IO=stdin; keep::Bool=false) = takestring!(copyline(IOBuffer(sizehint=16), s; keep)) """ copyline(out::IO, io::IO=stdin; keep::Bool=false) From 462fcae7127ee1afae40e56bc5292175f351bb23 Mon Sep 17 00:00:00 2001 From: Jakob Nybo Nissen Date: Tue, 10 Sep 2024 09:58:59 +0200 Subject: [PATCH 34/35] Address some reviewer comments --- base/io.jl | 2 +- base/strings/string.jl | 2 +- stdlib/Base64/src/Base64.jl | 2 +- stdlib/Markdown/src/Markdown.jl | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/base/io.jl b/base/io.jl index efda0fbcf6fae..9e0308cffd653 100644 --- a/base/io.jl +++ b/base/io.jl @@ -543,7 +543,7 @@ julia> rm("my_file.txt") ``` """ readuntil(filename::AbstractString, delim; kw...) = open(io->readuntil(io, delim; kw...), convert(String, filename)::String) -readuntil(stream::IO, delim::UInt8; kw...) = takestring!(copyuntil(IOBuffer(sizehint=16), stream, delim; kw...)) +readuntil(stream::IO, delim::UInt8; kw...) = _unsafe_take!(copyuntil(IOBuffer(sizehint=16), stream, delim; kw...)) readuntil(stream::IO, delim::Union{AbstractChar, AbstractString}; kw...) = takestring!(copyuntil(IOBuffer(sizehint=16), stream, delim; kw...)) readuntil(stream::IO, delim::T; keep::Bool=false) where T = _copyuntil(Vector{T}(), stream, delim, keep) diff --git a/base/strings/string.jl b/base/strings/string.jl index 270716f4e37a3..04ecdab089ce0 100644 --- a/base/strings/string.jl +++ b/base/strings/string.jl @@ -99,7 +99,7 @@ takestring!(v::Vector{UInt8}) = String(v) "Create a string re-using the memory, if possible. Mutating the memory after calling this function is undefined behaviour." function unsafe_takestring(m::Memory{UInt8}) - ccall(:jl_genericmemory_to_string, Ref{String}, (Any, Int), m, length(m)) + isempty(m) ? "" : ccall(:jl_genericmemory_to_string, Ref{String}, (Any, Int), m, length(m)) end """ diff --git a/stdlib/Base64/src/Base64.jl b/stdlib/Base64/src/Base64.jl index 311ff68ce3c9a..f1fef096888ed 100644 --- a/stdlib/Base64/src/Base64.jl +++ b/stdlib/Base64/src/Base64.jl @@ -8,7 +8,7 @@ a method to represent binary data using text, common on the web. """ module Base64 -using Base: require_one_based_indexing, unsafe_takestring! +using Base: require_one_based_indexing export Base64EncodePipe, diff --git a/stdlib/Markdown/src/Markdown.jl b/stdlib/Markdown/src/Markdown.jl index 2503c1c01049b..b9ff56297fe51 100644 --- a/stdlib/Markdown/src/Markdown.jl +++ b/stdlib/Markdown/src/Markdown.jl @@ -9,7 +9,7 @@ literals `md"..."` and `doc"..."`. """ module Markdown -import Base: AnnotatedString, AnnotatedIOBuffer, show, ==, with_output_color, mapany, unsafe_takestring! +import Base: AnnotatedString, AnnotatedIOBuffer, show, ==, with_output_color, mapany using Base64: stringmime using StyledStrings: StyledStrings, Face, addface!, @styled_str, styled From 35fe11c92ac6a243572cbe5ffd204cd51a991ece Mon Sep 17 00:00:00 2001 From: Jakob Nybo Nissen Date: Fri, 4 Oct 2024 09:28:47 +0200 Subject: [PATCH 35/35] Address review: Do not unsafe_take memory the user may hold externally --- base/iobuffer.jl | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/base/iobuffer.jl b/base/iobuffer.jl index d715b4e45b2bb..6e3e1ea30b671 100644 --- a/base/iobuffer.jl +++ b/base/iobuffer.jl @@ -538,6 +538,14 @@ function takestring!(io::IOBuffer) # we can return an empty string without interacting with the buffer at all. io.reinit && return "" + # If the buffer is not writable, it may hold external memory the user has a + # reference to. We can't use unsafe_takestring! because the GC would take control + # of the memory and could deallocate it while the user still has a reference to it. + if !io.writable + nbytes = filesize(io) + return copyto!(StringVector(nbytes), 1, io.data, io.offset + 1, nbytes) + end + s = unsafe_takestring!(io) # Restore the buffer to a usable state, making it no longer undefined behaviour to