diff --git a/NEWS.md b/NEWS.md index c12cc3c64300c..37edaa843a17e 100644 --- a/NEWS.md +++ b/NEWS.md @@ -103,6 +103,7 @@ New library features * `RegexMatch` objects can now be used to construct `NamedTuple`s and `Dict`s ([#50988]) * `Lockable` is now exported ([#54595]) * New `ltruncate`, `rtruncate` and `ctruncate` functions for truncating strings to text width, accounting for char widths ([#55351]) +* `takestring!(v)` creates a `String` from `v` (a `Vector{UInt8}` or `IOBuffer`), truncating `v` and reusing its memory if possible )([#54372]). Standard library changes ------------------------ diff --git a/base/compiler/ssair/show.jl b/base/compiler/ssair/show.jl index 7d936a1688aba..53799e980462b 100644 --- a/base/compiler/ssair/show.jl +++ b/base/compiler/ssair/show.jl @@ -312,7 +312,7 @@ function compute_ir_line_annotations(code::IRCode) loc_method = string(" "^printing_depth, loc_method) last_stack = stack end - push!(loc_annotations, String(take!(buf))) + push!(loc_annotations, takestring!(buf)) push!(loc_lineno, (lineno != 0 && lineno != last_lineno) ? string(lineno) : "") push!(loc_methods, loc_method) (lineno != 0) && (last_lineno = lineno) diff --git a/base/errorshow.jl b/base/errorshow.jl index a3bf464439d44..1454193a11393 100644 --- a/base/errorshow.jl +++ b/base/errorshow.jl @@ -305,7 +305,7 @@ function showerror(io::IO, ex::MethodError) iob = IOContext(buf, io) # for type abbreviation as in #49795; some, like `convert(T, x)`, should not abbreviate show_signature_function(iob, Core.Typeof(f)) show_tuple_as_call(iob, :function, arg_types; hasfirst=false, kwargs = isempty(kwargs) ? nothing : kwargs) - str = String(take!(buf)) + str = takestring!(buf) str = type_limited_string_from_context(io, str) print(io, str) end @@ -596,7 +596,7 @@ function show_method_candidates(io::IO, ex::MethodError, kwargs=[]) m = parentmodule_before_main(method) modulecolor = get!(() -> popfirst!(STACKTRACE_MODULECOLORS), STACKTRACE_FIXEDCOLORS, m) print_module_path_file(iob, m, string(file), line; modulecolor, digit_align_width = 3) - push!(lines, String(take!(buf))) + push!(lines, takestring!(buf)) push!(line_score, -(right_matches * 2 + (length(arg_types_param) < 2 ? 1 : 0))) end end diff --git a/base/exports.jl b/base/exports.jl index daba9a010a9e6..b1eda2b60c52c 100644 --- a/base/exports.jl +++ b/base/exports.jl @@ -638,6 +638,7 @@ export split, string, strip, + takestring!, textwidth, thisind, titlecase, diff --git a/base/filesystem.jl b/base/filesystem.jl index bc1f4942877e8..9a5256f2d6478 100644 --- a/base/filesystem.jl +++ b/base/filesystem.jl @@ -141,7 +141,7 @@ import .Base: bytesavailable, position, read, read!, readavailable, seek, seekend, show, skip, stat, unsafe_read, unsafe_write, write, transcode, uv_error, setup_stdio, rawhandle, OS_HANDLE, INVALID_OS_HANDLE, windowserror, filesize, - isexecutable, isreadable, iswritable, MutableDenseArrayType + isexecutable, isreadable, iswritable, MutableDenseArrayType, unsafe_takestring! import .Base.RefValue diff --git a/base/gmp.jl b/base/gmp.jl index 1eaa20d6baecf..df0d9fee49348 100644 --- a/base/gmp.jl +++ b/base/gmp.jl @@ -11,7 +11,7 @@ import .Base: *, +, -, /, <, <<, >>, >>>, <=, ==, >, >=, ^, (~), (&), (|), xor, bin, oct, dec, hex, isequal, invmod, _prevpow2, _nextpow2, ndigits0zpb, widen, signed, unsafe_trunc, trunc, iszero, isone, big, flipsign, signbit, sign, hastypemax, isodd, iseven, digits!, hash, hash_integer, top_set_bit, - clamp + clamp, unsafe_takestring if Clong == Int32 const ClongMax = Union{Int8, Int16, Int32} @@ -761,7 +761,7 @@ function string(n::BigInt; base::Integer = 10, pad::Integer = 1) sv[i] = '0' % UInt8 end isneg(n) && (sv[1] = '-' % UInt8) - String(sv) + unsafe_takestring(sv) end function digits!(a::AbstractVector{T}, n::BigInt; base::Integer = 10) where {T<:Integer} diff --git a/base/indices.jl b/base/indices.jl index 455bb0f7656a1..4253755d552a5 100644 --- a/base/indices.jl +++ b/base/indices.jl @@ -132,7 +132,7 @@ function throw_promote_shape_mismatch(a::Tuple, b::Union{Nothing,Tuple}, i = not if i ≢ nothing print(msg, ", mismatch at dim ", i) end - throw(DimensionMismatch(String(take!(msg)))) + throw(DimensionMismatch(takestring!(msg))) end function promote_shape(a::Tuple{Int,}, b::Tuple{Int,}) diff --git a/base/int.jl b/base/int.jl index a25b17e2cc958..581a875af4253 100644 --- a/base/int.jl +++ b/base/int.jl @@ -718,7 +718,7 @@ macro big_str(s) is_prev_dot = (c == '.') end print(bf, s[end]) - s = String(take!(bf)) + s = unsafe_takestring!(bf) end n = tryparse(BigInt, s) n === nothing || return n diff --git a/base/intfuncs.jl b/base/intfuncs.jl index 8d46fcffa3ad5..1515c2f763d7e 100644 --- a/base/intfuncs.jl +++ b/base/intfuncs.jl @@ -766,7 +766,7 @@ function bin(x::Unsigned, pad::Int, neg::Bool) i -= 1 end neg && (@inbounds a[1] = 0x2d) # UInt8('-') - String(a) + unsafe_takestring(a) end function oct(x::Unsigned, pad::Int, neg::Bool) @@ -780,7 +780,7 @@ function oct(x::Unsigned, pad::Int, neg::Bool) i -= 1 end neg && (@inbounds a[1] = 0x2d) # UInt8('-') - String(a) + unsafe_takestring(a) end # 2-digit decimal characters ("00":"99") @@ -850,7 +850,7 @@ function dec(x::Unsigned, pad::Int, neg::Bool) a = StringMemory(n) append_c_digits_fast(n, x, a, 1) neg && (@inbounds a[1] = 0x2d) # UInt8('-') - String(a) + unsafe_takestring(a) end function hex(x::Unsigned, pad::Int, neg::Bool) @@ -871,7 +871,7 @@ function hex(x::Unsigned, pad::Int, neg::Bool) @inbounds a[i] = d + ifelse(d > 0x9, 0x57, 0x30) end neg && (@inbounds a[1] = 0x2d) # UInt8('-') - String(a) + unsafe_takestring(a) end const base36digits = UInt8['0':'9';'a':'z'] @@ -896,7 +896,7 @@ function _base(base::Integer, x::Integer, pad::Int, neg::Bool) i -= 1 end neg && (@inbounds a[1] = 0x2d) # UInt8('-') - String(a) + unsafe_takestring(a) end split_sign(n::Integer) = unsigned(abs(n)), n < 0 @@ -972,7 +972,7 @@ function bitstring(x::T) where {T} x = lshr_int(x, 4) i -= 4 end - return String(str) + return unsafe_takestring(str) end """ diff --git a/base/io.jl b/base/io.jl index 83a215d6359fc..9e0308cffd653 100644 --- a/base/io.jl +++ b/base/io.jl @@ -277,13 +277,13 @@ julia> io = IOBuffer(); julia> write(io, "JuliaLang is a GitHub organization.", " It has many members.") 56 -julia> String(take!(io)) +julia> takestring!(io) "JuliaLang is a GitHub organization. It has many members." julia> write(io, "Sometimes those members") + write(io, " write documentation.") 44 -julia> String(take!(io)) +julia> takestring!(io) "Sometimes those members write documentation." ``` User-defined plain-data types without `write` methods can be written when wrapped in a `Ref`: @@ -544,7 +544,7 @@ julia> rm("my_file.txt") """ readuntil(filename::AbstractString, delim; kw...) = open(io->readuntil(io, delim; kw...), convert(String, filename)::String) readuntil(stream::IO, delim::UInt8; kw...) = _unsafe_take!(copyuntil(IOBuffer(sizehint=16), stream, delim; kw...)) -readuntil(stream::IO, delim::Union{AbstractChar, AbstractString}; kw...) = String(_unsafe_take!(copyuntil(IOBuffer(sizehint=16), stream, delim; kw...))) +readuntil(stream::IO, delim::Union{AbstractChar, AbstractString}; kw...) = takestring!(copyuntil(IOBuffer(sizehint=16), stream, delim; kw...)) readuntil(stream::IO, delim::T; keep::Bool=false) where T = _copyuntil(Vector{T}(), stream, delim, keep) @@ -566,10 +566,10 @@ Similar to [`readuntil`](@ref), which returns a `String`; in contrast, ```jldoctest julia> write("my_file.txt", "JuliaLang is a GitHub organization.\\nIt has many members.\\n"); -julia> String(take!(copyuntil(IOBuffer(), "my_file.txt", 'L'))) +julia> takestring!(copyuntil(IOBuffer(), "my_file.txt", 'L')) "Julia" -julia> String(take!(copyuntil(IOBuffer(), "my_file.txt", '.', keep = true))) +julia> takestring!(copyuntil(IOBuffer(), "my_file.txt", '.', keep = true)) "JuliaLang is a GitHub organization." julia> rm("my_file.txt") @@ -616,8 +616,7 @@ Logan """ readline(filename::AbstractString; keep::Bool=false) = open(io -> readline(io; keep), filename) -readline(s::IO=stdin; keep::Bool=false) = - String(_unsafe_take!(copyline(IOBuffer(sizehint=16), s; keep))) +readline(s::IO=stdin; keep::Bool=false) = takestring!(copyline(IOBuffer(sizehint=16), s; keep)) """ copyline(out::IO, io::IO=stdin; keep::Bool=false) @@ -642,10 +641,10 @@ See also [`copyuntil`](@ref) for reading until more general delimiters. ```jldoctest julia> write("my_file.txt", "JuliaLang is a GitHub organization.\\nIt has many members.\\n"); -julia> String(take!(copyline(IOBuffer(), "my_file.txt"))) +julia> takestring!(copyline(IOBuffer(), "my_file.txt")) "JuliaLang is a GitHub organization." -julia> String(take!(copyline(IOBuffer(), "my_file.txt", keep=true))) +julia> takestring!(copyline(IOBuffer(), "my_file.txt", keep=true)) "JuliaLang is a GitHub organization.\\n" julia> rm("my_file.txt") @@ -1291,7 +1290,7 @@ function iterate(r::Iterators.Reverse{<:EachLine}, state) buf.size = _stripnewline(r.itr.keep, buf.size, buf.data) empty!(chunks) # will cause next iteration to terminate seekend(r.itr.stream) # reposition to end of stream for isdone - s = String(_unsafe_take!(buf)) + s = unsafe_takestring!(buf) else # extract the string from chunks[ichunk][inewline+1] to chunks[jchunk][jnewline] if ichunk == jchunk # common case: current and previous newline in same chunk @@ -1308,7 +1307,7 @@ function iterate(r::Iterators.Reverse{<:EachLine}, state) end write(buf, view(chunks[jchunk], 1:jnewline)) buf.size = _stripnewline(r.itr.keep, buf.size, buf.data) - s = String(_unsafe_take!(buf)) + s = unsafe_takestring!(buf) # overwrite obsolete chunks (ichunk+1:jchunk) i = jchunk diff --git a/base/iobuffer.jl b/base/iobuffer.jl index c0c2731eec08b..6e3e1ea30b671 100644 --- a/base/iobuffer.jl +++ b/base/iobuffer.jl @@ -66,7 +66,7 @@ julia> io = IOBuffer(); julia> write(io, "JuliaLang is a GitHub organization.", " It has many members.") 56 -julia> String(take!(io)) +julia> takestring!(io) "JuliaLang is a GitHub organization. It has many members." julia> io = IOBuffer(b"JuliaLang is a GitHub organization.") @@ -84,7 +84,7 @@ IOBuffer(data=UInt8[...], readable=true, writable=true, seekable=true, append=fa julia> write(io, "JuliaLang is a GitHub organization.") 34 -julia> String(take!(io)) +julia> takestring!(io) "JuliaLang is a GitHub organization" julia> length(read(IOBuffer(b"data", read=true, truncate=false))) @@ -489,6 +489,84 @@ function take!(io::IOBuffer) return data end +"Internal method. This method can be faster than takestring!, because it does not +reset the buffer to a usable state, and it does not check for io.reinit. +Using the buffer after calling unsafe_takestring! may cause undefined behaviour. +This function is meant to be used when the buffer is only used as a temporary +string builder, which is discarded after the string is built." +function unsafe_takestring!(io::IOBuffer) + start = io.seekable ? io.offset + 1 : io.ptr + nbytes = io.size - start + 1 + iszero(nbytes) && return "" + # The C function can only copy from the start of the memory. + # Fortunately, in most cases, the offset will be zero. + return if isone(start) + ccall(:jl_genericmemory_to_string, Ref{String}, (Any, Int), io.data, nbytes) + else + mem = StringMemory(nbytes) + unsafe_copyto!(mem, 1, io.data, start, nbytes) + unsafe_takestring(mem) + end +end + +""" + takestring!(io::IOBuffer) -> String + +Return the content of `io` as a `String`, resetting the buffer to its initial +state. +This is preferred over calling `String(take!(io))` to create a string from +an `IOBuffer`. + +# Examples +```jldoctest +julia> io = IOBuffer(); + +julia> write(io, [0x61, 0x62, 0x63]); + +julia> s = takestring!(io) +"abc" + +julia> isempty(take!(io)) # io is now empty +true +``` + +!!! compat "Julia 1.12" + This function requires at least Julia 1.12. +""" +function takestring!(io::IOBuffer) + # If the buffer has been used up and needs to be replaced, there are no bytes, and + # we can return an empty string without interacting with the buffer at all. + io.reinit && return "" + + # If the buffer is not writable, it may hold external memory the user has a + # reference to. We can't use unsafe_takestring! because the GC would take control + # of the memory and could deallocate it while the user still has a reference to it. + if !io.writable + nbytes = filesize(io) + return copyto!(StringVector(nbytes), 1, io.data, io.offset + 1, nbytes) + end + + s = unsafe_takestring!(io) + + # Restore the buffer to a usable state, making it no longer undefined behaviour to + # use the buffer after the `unsafe_takestring!` call. + # Note that if the buffer is not writable, there is no need to reinitialize the buffer, + # since it doesn't matter that the returned string looks into the same memory - + # this is because the buffer is not mutable through either the string nor the buffer. + if io.writable + io.reinit = true + io.mark = -1 + io.ptr = 1 + io.size = 0 + io.offset = 0 + end + s +end + +# Fallback methods +unsafe_takestring!(io::GenericIOBuffer) = takestring!(io) +takestring!(io::GenericIOBuffer) = String(take!(io)) + """ _unsafe_take!(io::IOBuffer) diff --git a/base/iostream.jl b/base/iostream.jl index 762f881cfbecb..767ef8ba882c2 100644 --- a/base/iostream.jl +++ b/base/iostream.jl @@ -98,7 +98,7 @@ julia> write(io, "JuliaLang is a GitHub organization.") julia> truncate(io, 15) IOBuffer(data=UInt8[...], readable=true, writable=true, seekable=true, append=false, size=15, maxsize=Inf, ptr=16, mark=-1) -julia> String(take!(io)) +julia> takestring!(io) "JuliaLang is a " julia> io = IOBuffer(); @@ -107,7 +107,7 @@ julia> write(io, "JuliaLang is a GitHub organization."); julia> truncate(io, 40); -julia> String(take!(io)) +julia> takestring!(io) "JuliaLang is a GitHub organization.\\0\\0\\0\\0\\0" ``` """ @@ -456,7 +456,7 @@ function readuntil_string(s::IOStream, delim::UInt8, keep::Bool) end readuntil(s::IOStream, delim::AbstractChar; keep::Bool=false) = isascii(delim) ? readuntil_string(s, delim % UInt8, keep) : - String(_unsafe_take!(copyuntil(IOBuffer(sizehint=70), s, delim; keep))) + takestring!(copyuntil(IOBuffer(sizehint=70), s, delim; keep)) function readline(s::IOStream; keep::Bool=false) @_lock_ios s ccall(:jl_readuntil, Ref{String}, (Ptr{Cvoid}, UInt8, UInt8, UInt8), s.ios, '\n', 1, keep ? 0 : 2) diff --git a/base/logging/ConsoleLogger.jl b/base/logging/ConsoleLogger.jl index c4596dd86c3f5..c0a98822b52d4 100644 --- a/base/logging/ConsoleLogger.jl +++ b/base/logging/ConsoleLogger.jl @@ -141,7 +141,7 @@ function handle_message(logger::ConsoleLogger, level::LogLevel, message, _module for (key, val) in kwargs key === :maxlog && continue showvalue(valio, val) - vallines = split(String(take!(valbuf)), '\n') + vallines = split(takestring!(valbuf), '\n') if length(vallines) == 1 push!(msglines, (indent=2, msg=SubString("$key = $(vallines[1])"))) else diff --git a/base/pkgid.jl b/base/pkgid.jl index 8c776d79a69cb..9878c0b0a8a40 100644 --- a/base/pkgid.jl +++ b/base/pkgid.jl @@ -32,7 +32,7 @@ function binpack(pkg::PkgId) uuid = pkg.uuid write(io, uuid === nothing ? UInt128(0) : UInt128(uuid)) write(io, pkg.name) - return String(take!(io)) + return unsafe_takestring!(io) end function binunpack(s::String) diff --git a/base/show.jl b/base/show.jl index 0a2976e7ebe42..d39fb687f5dd8 100644 --- a/base/show.jl +++ b/base/show.jl @@ -375,12 +375,12 @@ julia> io = IOBuffer(); julia> printstyled(IOContext(io, :color => true), "string", color=:red) -julia> String(take!(io)) +julia> takestring!(io) "\\e[31mstring\\e[39m" julia> printstyled(io, "string", color=:red) -julia> String(take!(io)) +julia> takestring!(io) "string" ``` @@ -2609,7 +2609,7 @@ function show_tuple_as_call(out::IO, name::Symbol, sig::Type; end print_within_stacktrace(io, ")", bold=true) show_method_params(io, tv) - str = String(take!(buf)) + str = takestring!(buf) str = type_limited_string_from_context(out, str) print(out, str) nothing @@ -2718,7 +2718,7 @@ function type_depth_limit(str::String, n::Int; maxdepth = nothing) end prev = di end - return String(take!(output)) + return unsafe_takestring!(output) end function print_type_bicolor(io, type; kwargs...) @@ -3150,7 +3150,7 @@ summary(io::IO, x) = print(io, typeof(x)) function summary(x) io = IOBuffer() summary(io, x) - String(take!(io)) + takestring!(io) end ## `summary` for AbstractArrays diff --git a/base/stat.jl b/base/stat.jl index 506b5644dccbc..48967bbae0035 100644 --- a/base/stat.jl +++ b/base/stat.jl @@ -301,7 +301,7 @@ function filemode_string(mode) end complete && write(str, "-") end - return String(take!(str)) + return unsafe_takestring!(str) end """ diff --git a/base/strings/annotated.jl b/base/strings/annotated.jl index be4c6887d4a6d..5bb32abde2b9a 100644 --- a/base/strings/annotated.jl +++ b/base/strings/annotated.jl @@ -274,7 +274,7 @@ function annotatedstring(xs...) print(s, x) end end - str = String(take!(buf)) + str = takestring!(buf) AnnotatedString(str, annotations) end @@ -457,7 +457,7 @@ function annotated_chartransform(f::Function, str::AnnotatedString, state=nothin stop_offset = last(offsets[findlast(<=(stop) ∘ first, offsets)::Int]) push!(annots, ((start + start_offset):(stop + stop_offset), value)) end - AnnotatedString(String(take!(outstr)), annots) + AnnotatedString(takestring!(outstr), annots) end ## AnnotatedIOBuffer diff --git a/base/strings/basic.jl b/base/strings/basic.jl index bf11199143c1e..927d86b79474a 100644 --- a/base/strings/basic.jl +++ b/base/strings/basic.jl @@ -681,7 +681,7 @@ function filter(f, s::AbstractString) for c in s f(c) && write(out, c) end - String(_unsafe_take!(out)) + takestring!(out) end ## string first and last ## diff --git a/base/strings/io.jl b/base/strings/io.jl index acbd945c8e137..9b278122f5bc9 100644 --- a/base/strings/io.jl +++ b/base/strings/io.jl @@ -25,7 +25,7 @@ julia> io = IOBuffer(); julia> print(io, "Hello", ' ', :World!) -julia> String(take!(io)) +julia> takestring!(io) "Hello World!" ``` """ @@ -68,7 +68,7 @@ julia> io = IOBuffer(); julia> println(io, "Hello", ',', " world.") -julia> String(take!(io)) +julia> takestring!(io) "Hello, world.\\n" ``` """ @@ -113,7 +113,7 @@ function sprint(f::Function, args...; context=nothing, sizehint::Integer=0) else f(s, args...) end - String(_unsafe_take!(s)) + takestring!(s) end function _str_sizehint(x) @@ -147,7 +147,7 @@ function print_to_string(xs...) for x in xs print(s, x) end - String(_unsafe_take!(s)) + takestring!(s) end function string_with_env(env, xs...) @@ -164,7 +164,7 @@ function string_with_env(env, xs...) for x in xs print(env_io, x) end - String(_unsafe_take!(s)) + takestring!(s) end """ @@ -298,10 +298,10 @@ Create a read-only `IOBuffer` on the data underlying the given string. ```jldoctest julia> io = IOBuffer("Haho"); -julia> String(take!(io)) +julia> takestring!(io) "Haho" -julia> String(take!(io)) +julia> takestring!(io) "Haho" ``` """ @@ -777,7 +777,7 @@ function unindent(str::AbstractString, indent::Int; tabwidth=8) print(buf, ' ') end end - String(take!(buf)) + takestring!(buf) end function String(a::AbstractVector{Char}) diff --git a/base/strings/string.jl b/base/strings/string.jl index 90d6e5b26ccd3..04ecdab089ce0 100644 --- a/base/strings/string.jl +++ b/base/strings/string.jl @@ -61,14 +61,9 @@ by [`take!`](@ref) on a writable [`IOBuffer`](@ref) and by calls to In other cases, `Vector{UInt8}` data may be copied, but `v` is truncated anyway to guarantee consistent behavior. """ -String(v::AbstractVector{UInt8}) = String(copyto!(StringMemory(length(v)), v)) -function String(v::Memory{UInt8}) - len = length(v) - len == 0 && return "" - return ccall(:jl_genericmemory_to_string, Ref{String}, (Any, Int), v, len) -end +String(v::AbstractVector{UInt8}) = unsafe_takestring(copyto!(StringMemory(length(v)), v)) + function String(v::Vector{UInt8}) - #return ccall(:jl_array_to_string, Ref{String}, (Any,), v) len = length(v) len == 0 && return "" ref = v.ref @@ -83,6 +78,30 @@ function String(v::Vector{UInt8}) return str end +""" + takestring!(x) -> String + +Create a string from the content of `x`, emptying `x`. + +# Examples +```jldoctest +julia> v = [0x61, 0x62, 0x63]; + +julia> s = takestring!(v) +"abc" + +julia> isempty(v) +true +``` +""" +takestring!(v::Vector{UInt8}) = String(v) + +"Create a string re-using the memory, if possible. +Mutating the memory after calling this function is undefined behaviour." +function unsafe_takestring(m::Memory{UInt8}) + isempty(m) ? "" : ccall(:jl_genericmemory_to_string, Ref{String}, (Any, Int), m, length(m)) +end + """ unsafe_string(p::Ptr{UInt8}, [length::Integer]) diff --git a/base/strings/unicode.jl b/base/strings/unicode.jl index ad047514c85a6..b2882b49d89b3 100644 --- a/base/strings/unicode.jl +++ b/base/strings/unicode.jl @@ -6,7 +6,7 @@ module Unicode import Base: show, ==, hash, string, Symbol, isless, length, eltype, convert, isvalid, ismalformed, isoverlong, iterate, AnnotatedString, AnnotatedChar, annotated_chartransform, - @assume_effects, annotations + @assume_effects, annotations, unsafe_takestring! # whether codepoints are valid Unicode scalar values, i.e. 0-0xd7ff, 0xe000-0x10ffff @@ -689,7 +689,7 @@ function titlecase(s::AbstractString; wordsep::Function = !isletter, strict::Boo end c0 = c end - return String(take!(b)) + return takestring!(b) end # TODO: improve performance characteristics, room for a ~10x improvement. diff --git a/base/strings/util.jl b/base/strings/util.jl index 0ba76e1c76fa0..2d530f5e2124f 100644 --- a/base/strings/util.jl +++ b/base/strings/util.jl @@ -1028,7 +1028,7 @@ function _replace_(str, pat_repl::NTuple{N, Pair}, count::Int) where N return String(str) end out = IOBuffer(sizehint=floor(Int, 1.2sizeof(str))) - return String(take!(_replace_finish(out, str, count, e1, patterns, replaces, rs))) + return takestring!(_replace_finish(out, str, count, e1, patterns, replaces, rs)) end """ @@ -1214,7 +1214,7 @@ function bytes2hex(itr) b[2i - 1] = hex_chars[1 + x >> 4] b[2i ] = hex_chars[1 + x & 0xf] end - return String(b) + return unsafe_takestring(b) end function bytes2hex(io::IO, itr) @@ -1260,5 +1260,5 @@ function Base.rest(s::AbstractString, st...) for c in Iterators.rest(s, st...) print(io, c) end - return String(take!(io)) + return takestring!(io) end diff --git a/base/task.jl b/base/task.jl index 6cb1ff785eeee..61cc103c70081 100644 --- a/base/task.jl +++ b/base/task.jl @@ -95,7 +95,7 @@ function show_task_exception(io::IO, t::Task; indent = true) else show_exception_stack(IOContext(b, io), stack) end - str = String(take!(b)) + str = takestring!(b) if indent str = replace(str, "\n" => "\n ") end diff --git a/base/toml_parser.jl b/base/toml_parser.jl index 4d07cfed05d8a..7dac4af022eca 100644 --- a/base/toml_parser.jl +++ b/base/toml_parser.jl @@ -315,7 +315,7 @@ function point_to_line(str::AbstractString, a::Int, b::Int, context) c == '\n' && break print(io1, c) end - return String(take!(io1.io)), String(take!(io2.io)) + return takestring!(io1.io), takestring!(io2.io) end function Base.showerror(io::IO, err::ParserError) diff --git a/base/util.jl b/base/util.jl index 95d62c4a16e1d..e606e092ef959 100644 --- a/base/util.jl +++ b/base/util.jl @@ -77,7 +77,7 @@ function with_output_color(@nospecialize(f::Function), color::Union{Int, Symbol} iscolor = get(io, :color, false)::Bool try f(IOContext(buf, io), args...) finally - str = String(take!(buf)) + str = takestring!(buf) if !iscolor print(io, str) else @@ -109,7 +109,7 @@ function with_output_color(@nospecialize(f::Function), color::Union{Int, Symbol} isempty(line) && continue print(buf, enable_ansi, line, disable_ansi) end - print(io, String(take!(buf))) + print(io, takestring!(buf)) end end end diff --git a/base/uuid.jl b/base/uuid.jl index 9b2da3c6409db..217c0b55adf7d 100644 --- a/base/uuid.jl +++ b/base/uuid.jl @@ -96,7 +96,7 @@ let groupings = [36:-1:25; 23:-1:20; 18:-1:15; 13:-1:10; 8:-1:1] u >>= 4 end @inbounds a[24] = a[19] = a[14] = a[9] = '-' - return String(a) + return unsafe_takestring(a) end end diff --git a/doc/src/base/strings.md b/doc/src/base/strings.md index b7d16ffc7d487..b2ba92fc54981 100644 --- a/doc/src/base/strings.md +++ b/doc/src/base/strings.md @@ -34,6 +34,7 @@ Base.SubstitutionString Base.@s_str Base.@raw_str Base.@b_str +Base.takestring! Base.Docs.@html_str Base.Docs.@text_str Base.isvalid(::Any) diff --git a/doc/src/devdocs/functions.md b/doc/src/devdocs/functions.md index 777afaa56348d..fb67dfd17c3e2 100644 --- a/doc/src/devdocs/functions.md +++ b/doc/src/devdocs/functions.md @@ -117,7 +117,7 @@ function lines(words) n += length(w)+1 end end - String(take!(io)) + takestring!(io) end import Markdown [string(n) for n in names(Core;all=true) diff --git a/doc/src/manual/getting-started.md b/doc/src/manual/getting-started.md index 2c69aabbda192..edec633245021 100644 --- a/doc/src/manual/getting-started.md +++ b/doc/src/manual/getting-started.md @@ -13,7 +13,7 @@ known as a read-eval-print loop or "REPL") by double-clicking the Julia executab using REPL io = IOBuffer() REPL.banner(io) -banner = String(take!(io)) +banner = takestring!(io) import Markdown Markdown.parse("```\n\$ julia\n\n$(banner)\njulia> 1 + 2\n3\n\njulia> ans\n3\n```") ``` diff --git a/src/genericmemory.c b/src/genericmemory.c index ea52fca66ba48..4a64b571ac367 100644 --- a/src/genericmemory.c +++ b/src/genericmemory.c @@ -193,7 +193,6 @@ JL_DLLEXPORT jl_value_t *jl_genericmemory_to_string(jl_genericmemory_t *m, size_ } int how = jl_genericmemory_how(m); size_t mlength = m->length; - m->length = 0; if (how != 0) { jl_value_t *o = jl_genericmemory_data_owner_field(m); jl_genericmemory_data_owner_field(m) = NULL; @@ -208,8 +207,6 @@ JL_DLLEXPORT jl_value_t *jl_genericmemory_to_string(jl_genericmemory_t *m, size_ JL_GC_PUSH1(&o); jl_value_t *str = jl_pchar_to_string((const char*)m->ptr, len); JL_GC_POP(); - if (how == 1) // TODO: we might like to early-call jl_gc_free_memory here instead actually, but hopefully `m` will die soon - jl_gc_count_freed(mlength); return str; } // n.b. how == 0 is always pool-allocated, so the freed bytes are computed from the pool not the object diff --git a/stdlib/Base64/src/encode.jl b/stdlib/Base64/src/encode.jl index 588b49aa28d97..d55421c52ff55 100644 --- a/stdlib/Base64/src/encode.jl +++ b/stdlib/Base64/src/encode.jl @@ -24,7 +24,7 @@ julia> write(iob64_encode, "Hello!") julia> close(iob64_encode); -julia> str = String(take!(io)) +julia> str = takestring!(io) "SGVsbG8h" julia> String(base64decode(str)) @@ -211,6 +211,6 @@ function base64encode(f::Function, args...; context=nothing) f(IOContext(b, context), args...) end close(b) - return String(take!(s)) + return takestring!(s) end base64encode(args...; context=nothing) = base64encode(write, args...; context=context) diff --git a/stdlib/FileWatching/src/FileWatching.jl b/stdlib/FileWatching/src/FileWatching.jl index 0c987ad01c828..27fc7231bcd2a 100644 --- a/stdlib/FileWatching/src/FileWatching.jl +++ b/stdlib/FileWatching/src/FileWatching.jl @@ -25,7 +25,7 @@ import Base: @handle_as, wait, close, eventloop, notify_error, IOError, _sizeof_uv_poll, _sizeof_uv_fs_poll, _sizeof_uv_fs_event, _uv_hook_close, uv_error, _UVError, iolock_begin, iolock_end, associate_julia_struct, disassociate_julia_struct, preserve_handle, unpreserve_handle, isreadable, iswritable, isopen, - |, getproperty, propertynames + |, getproperty, propertynames, unsafe_takestring import Base.Filesystem.StatStruct if Sys.iswindows() import Base.WindowsRawSocket diff --git a/stdlib/FileWatching/src/pidfile.jl b/stdlib/FileWatching/src/pidfile.jl index 4c821a3d897e4..3a8b9430f4f68 100644 --- a/stdlib/FileWatching/src/pidfile.jl +++ b/stdlib/FileWatching/src/pidfile.jl @@ -5,7 +5,8 @@ export mkpidlock, trymkpidlock using Base: IOError, UV_EEXIST, UV_ESRCH, - Process + Process, + unsafe_takestring using Base.Filesystem: File, open, JL_O_CREAT, JL_O_RDWR, JL_O_RDONLY, JL_O_EXCL, @@ -285,7 +286,7 @@ function _rand_filename(len::Int=4) # modified from Base.Libc for i = 1:len slug[i] = chars[(Libc.rand() % length(chars)) + 1] end - return String(slug) + return unsafe_takestring(slug) end function tryrmopenfile(path::String) diff --git a/stdlib/InteractiveUtils/src/InteractiveUtils.jl b/stdlib/InteractiveUtils/src/InteractiveUtils.jl index 835988ddf149f..e7d10266677c6 100644 --- a/stdlib/InteractiveUtils/src/InteractiveUtils.jl +++ b/stdlib/InteractiveUtils/src/InteractiveUtils.jl @@ -147,7 +147,7 @@ function versioninfo(io::IO=stdout; verbose::Bool=false) if verbose cpuio = IOBuffer() # print cpu_summary with correct alignment Sys.cpu_summary(cpuio) - for (i, line) in enumerate(split(chomp(String(take!(cpuio))), "\n")) + for (i, line) in enumerate(split(chomp(takestring!(cpuio)), "\n")) prefix = i == 1 ? " CPU: " : " " println(io, prefix, line) end diff --git a/stdlib/LibGit2/src/utils.jl b/stdlib/LibGit2/src/utils.jl index f62663a6ea4ca..a1edce7ab403a 100644 --- a/stdlib/LibGit2/src/utils.jl +++ b/stdlib/LibGit2/src/utils.jl @@ -162,7 +162,7 @@ function git_url(; end seekstart(io) - return String(take!(io)) + return takestring!(io) end function credential_identifier(scheme::AbstractString, host::AbstractString) diff --git a/stdlib/Markdown/src/Common/block.jl b/stdlib/Markdown/src/Common/block.jl index 1b5cc1a752bcb..d68db483f999d 100644 --- a/stdlib/Markdown/src/Common/block.jl +++ b/stdlib/Markdown/src/Common/block.jl @@ -114,7 +114,7 @@ function indentcode(stream::IO, block::MD) break end end - code = String(take!(buffer)) + code = takestring!(buffer) !isempty(code) && (push!(block, Code(rstrip(code))); return true) return false end @@ -178,7 +178,7 @@ function blockquote(stream::IO, block::MD) end empty && return false - md = String(take!(buffer)) + md = takestring!(buffer) push!(block, BlockQuote(parse(md, flavor = config(block)).content)) return true end @@ -236,7 +236,7 @@ function admonition(stream::IO, block::MD) end end # Parse the nested block as markdown and create a new Admonition block. - nested = parse(String(take!(buffer)), flavor = config(block)) + nested = parse(takestring!(buffer), flavor = config(block)) push!(block, Admonition(category, title, nested.content)) return true end @@ -326,7 +326,7 @@ function list(stream::IO, block::MD) return true end end -pushitem!(list, buffer) = push!(list.items, parse(String(take!(buffer))).content) +pushitem!(list, buffer) = push!(list.items, parse(takestring!(buffer)).content) # –––––––––––––– # HorizontalRule diff --git a/stdlib/Markdown/src/GitHub/GitHub.jl b/stdlib/Markdown/src/GitHub/GitHub.jl index 61807d267511d..f53aaaa00e2aa 100644 --- a/stdlib/Markdown/src/GitHub/GitHub.jl +++ b/stdlib/Markdown/src/GitHub/GitHub.jl @@ -21,9 +21,9 @@ function fencedcode(stream::IO, block::MD) if startswith(stream, string(ch) ^ n) if !startswith(stream, string(ch)) if flavor == "math" - push!(block, LaTeX(String(take!(buffer)) |> chomp)) + push!(block, LaTeX(takestring!(buffer) |> chomp)) else - push!(block, Code(flavor, String(take!(buffer)) |> chomp)) + push!(block, Code(flavor, takestring!(buffer) |> chomp)) end return true else diff --git a/stdlib/Markdown/src/parse/parse.jl b/stdlib/Markdown/src/parse/parse.jl index 389099b2984f6..371d52cfaf53f 100644 --- a/stdlib/Markdown/src/parse/parse.jl +++ b/stdlib/Markdown/src/parse/parse.jl @@ -65,7 +65,7 @@ function parseinline(stream::IO, md::MD, config::Config) char = peek(stream, Char) if haskey(config.inner, char) && (inner = parseinline(stream, md, config.inner[char])) !== nothing - c = String(take!(buffer)) + c = takestring!(buffer) !isempty(c) && push!(content, c) buffer = IOBuffer() push!(content, inner) @@ -73,7 +73,7 @@ function parseinline(stream::IO, md::MD, config::Config) write(buffer, read(stream, Char)) end end - c = String(take!(buffer)) + c = takestring!(buffer) !isempty(c) && push!(content, c) return content end diff --git a/stdlib/Markdown/src/parse/util.jl b/stdlib/Markdown/src/parse/util.jl index aabfcbb3ddc62..cd8158780bd6d 100644 --- a/stdlib/Markdown/src/parse/util.jl +++ b/stdlib/Markdown/src/parse/util.jl @@ -141,7 +141,7 @@ function readuntil(stream::IO, delimiter; newlines = false, match = nothing) while !eof(stream) if startswith(stream, delimiter) if count == 0 - return String(take!(buffer)) + return takestring!(buffer) else count -= 1 write(buffer, delimiter) @@ -187,7 +187,7 @@ function parse_inline_wrapper(stream::IO, delimiter::AbstractString; rep = false if !(char in whitespace || char == '\n' || char in delimiter) && startswith(stream, delimiter^n) trailing = 0 while startswith(stream, delimiter); trailing += 1; end - trailing == 0 && return String(take!(buffer)) + trailing == 0 && return takestring!(buffer) write(buffer, delimiter ^ (n + trailing)) end end diff --git a/stdlib/REPL/src/LineEdit.jl b/stdlib/REPL/src/LineEdit.jl index 5af03e0df9b6d..464fdab5a8337 100644 --- a/stdlib/REPL/src/LineEdit.jl +++ b/stdlib/REPL/src/LineEdit.jl @@ -166,7 +166,7 @@ region_active(s::PromptState) = s.region_active region_active(s::ModeState) = :off -input_string(s::PromptState) = String(take!(copy(s.input_buffer))) +input_string(s::PromptState) = takestring!(copy(s.input_buffer)) input_string_newlines(s::PromptState) = count(c->(c == '\n'), input_string(s)) function input_string_newlines_aftercursor(s::PromptState) @@ -1443,7 +1443,7 @@ function edit_input(s, f = (filename, line, column) -> InteractiveUtils.edit(fil end buf = buffer(s) pos = position(buf) - str = String(take!(buf)) + str = takestring!(buf) lines = readlines(IOBuffer(str); keep=true) # Compute line @@ -1672,7 +1672,7 @@ function normalize_key(key::Union{String,SubString{String}}) write(buf, c) end end - return String(take!(buf)) + return takestring!(buf) end function normalize_keys(keymap::Union{Dict{Char,Any},AnyDict}) @@ -2012,7 +2012,7 @@ function history_set_backward(s::SearchState, backward::Bool) nothing end -input_string(s::SearchState) = String(take!(copy(s.query_buffer))) +input_string(s::SearchState) = takestring!(copy(s.query_buffer)) function reset_state(s::SearchState) if s.query_buffer.size != 0 @@ -2100,7 +2100,7 @@ function refresh_multi_line(termbuf::TerminalBuffer, terminal::UnixTerminal, return ias end -input_string(s::PrefixSearchState) = String(take!(copy(s.response_buffer))) +input_string(s::PrefixSearchState) = takestring!(copy(s.response_buffer)) write_prompt(terminal, s::PrefixSearchState, color::Bool) = write_prompt(terminal, s.histprompt.parent_prompt, color) prompt_string(s::PrefixSearchState) = prompt_string(s.histprompt.parent_prompt.prompt) diff --git a/stdlib/REPL/src/REPL.jl b/stdlib/REPL/src/REPL.jl index ddf2f55d0b9f7..7e84b83509041 100644 --- a/stdlib/REPL/src/REPL.jl +++ b/stdlib/REPL/src/REPL.jl @@ -880,7 +880,7 @@ function hist_from_file(hp::REPLHistoryProvider, path::String) end function add_history(hist::REPLHistoryProvider, s::PromptState) - str = rstrip(String(take!(copy(s.input_buffer)))) + str = rstrip(takestring!(copy(s.input_buffer))) isempty(strip(str)) && return mode = mode_idx(hist, LineEdit.mode(s)) !isempty(hist.history) && @@ -1007,7 +1007,7 @@ function history_move_prefix(s::LineEdit.PrefixSearchState, prefix::AbstractString, backwards::Bool, cur_idx::Int = hist.cur_idx) - cur_response = String(take!(copy(LineEdit.buffer(s)))) + cur_response = takestring!(copy(LineEdit.buffer(s))) # when searching forward, start at last_idx if !backwards && hist.last_idx > 0 cur_idx = hist.last_idx @@ -1049,7 +1049,7 @@ function history_search(hist::REPLHistoryProvider, query_buffer::IOBuffer, respo qpos = position(query_buffer) qpos > 0 || return true searchdata = beforecursor(query_buffer) - response_str = String(take!(copy(response_buffer))) + response_str = takestring!(copy(response_buffer)) # Alright, first try to see if the current match still works a = position(response_buffer) + 1 # position is zero-indexed @@ -1106,7 +1106,7 @@ end LineEdit.reset_state(hist::REPLHistoryProvider) = history_reset_state(hist) function return_callback(s) - ast = Base.parse_input_line(String(take!(copy(LineEdit.buffer(s)))), depwarn=false) + ast = Base.parse_input_line(takestring!(copy(LineEdit.buffer(s))), depwarn=false) return !(isa(ast, Expr) && ast.head === :incomplete) end @@ -1668,7 +1668,7 @@ let matchend = Dict("\"" => r"\"", "\"\"\"" => r"\"\"\"", "'" => r"'", pos = nextind(code, last(j)) end print(buf, SubString(code, pos, lastindex(code))) - return String(take!(buf)) + return takestring!(buf) end end diff --git a/test/iobuffer.jl b/test/iobuffer.jl index b5b34a2dbed8c..843cc72b6b5a8 100644 --- a/test/iobuffer.jl +++ b/test/iobuffer.jl @@ -54,6 +54,26 @@ bufcontents(io::Base.GenericIOBuffer) = unsafe_string(pointer(io.data), io.size) @test_throws ArgumentError seek(io, 0) end +@testset "takestring!" begin + buf = IOBuffer() + write(buf, "abcø") + s = takestring!(buf) + @test isempty(takestring!(buf)) + @test s == "abcø" + write(buf, "xyz") + @test takestring!(buf) == "xyz" + buf = IOBuffer() + + # Test with a nonzero offset in the buffer + v = rand(UInt8, 8) + for i in 1:8 + pushfirst!(v, rand(UInt8)) + end + buf = IOBuffer(v) + s = String(copy(v)) + @test takestring!(buf) == s +end + @testset "Read/write readonly IOBuffer" begin io = IOBuffer("hamster\nguinea pig\nturtle") @test position(io) == 0 diff --git a/test/strings/basic.jl b/test/strings/basic.jl index 874607f3c1b20..f081d87a32d3e 100644 --- a/test/strings/basic.jl +++ b/test/strings/basic.jl @@ -49,6 +49,24 @@ using Random end end +@testset "takestring!" begin + v = [0x61, 0x62, 0x63] + old_mem = v.ref.mem + @test takestring!(v) == "abc" + @test isempty(v) + @test v.ref.mem !== old_mem # memory is changed + for v in [ + UInt8[], + [0x01, 0x02, 0x03], + collect(codeunits("æøå")) + ] + cp = copy(v) + s = takestring!(v) + @test isempty(v) + @test codeunits(s) == cp + end +end + @testset "{starts,ends}with" begin @test startswith("abcd", 'a') @test startswith('a')("abcd")