diff --git a/base/strings/string.jl b/base/strings/string.jl index 17d06fc464cc2..50b95f8cbf9f3 100644 --- a/base/strings/string.jl +++ b/base/strings/string.jl @@ -159,9 +159,7 @@ end ## checking UTF-8 & ACSII validity ## -byte_string_classify(data::Vector{UInt8}) = - ccall(:u8_isvalid, Int32, (Ptr{UInt8}, Int), data, length(data)) -byte_string_classify(s::String) = +byte_string_classify(s::Union{String,Vector{UInt8}}) = ccall(:u8_isvalid, Int32, (Ptr{UInt8}, Int), s, sizeof(s)) # 0: neither valid ASCII nor UTF-8 # 1: valid ASCII diff --git a/base/strings/substring.jl b/base/strings/substring.jl index b18d63c9e1388..546fd6ea6e0dc 100644 --- a/base/strings/substring.jl +++ b/base/strings/substring.jl @@ -82,6 +82,12 @@ function isvalid(s::SubString, i::Integer) @inbounds return ib && isvalid(s.string, s.offset + i) end +byte_string_classify(s::SubString{String}) = + ccall(:u8_isvalid, Int32, (Ptr{UInt8}, Int), s, sizeof(s)) + +isvalid(::Type{String}, s::SubString{String}) = byte_string_classify(s) ≠ 0 +isvalid(s::SubString{String}) = isvalid(String, s) + thisind(s::SubString{String}, i::Int) = _thisind_str(s, i) nextind(s::SubString{String}, i::Int) = _nextind_str(s, i) diff --git a/base/strings/unicode.jl b/base/strings/unicode.jl index e0c932a28f437..e5bd5e85f982a 100644 --- a/base/strings/unicode.jl +++ b/base/strings/unicode.jl @@ -12,13 +12,16 @@ import Base: show, ==, hash, string, Symbol, isless, length, eltype, isvalid(value) -> Bool Returns `true` if the given value is valid for its type, which currently can be either -`AbstractChar` or `String`. +`AbstractChar` or `String` or `SubString{String}`. # Examples ```jldoctest julia> isvalid(Char(0xd800)) false +julia> isvalid(SubString(String(UInt8[0xfe,0x80,0x80,0x80,0x80,0x80]),1,2)) +false + julia> isvalid(Char(0xd799)) true ``` @@ -30,13 +33,16 @@ isvalid(value) Returns `true` if the given value is valid for that type. Types currently can be either `AbstractChar` or `String`. Values for `AbstractChar` can be of type `AbstractChar` or [`UInt32`](@ref). -Values for `String` can be of that type, or `Vector{UInt8}`. +Values for `String` can be of that type, or `Vector{UInt8}` or `SubString{String}`. # Examples ```jldoctest julia> isvalid(Char, 0xd800) false +julia> isvalid(String, SubString("thisisvalid",1,5)) +true + julia> isvalid(Char, 0xd799) true ``` diff --git a/test/strings/basic.jl b/test/strings/basic.jl index 150bbcfe48a4e..af17c4dee095a 100644 --- a/test/strings/basic.jl +++ b/test/strings/basic.jl @@ -523,7 +523,8 @@ end end # Check seven-byte sequences, should be invalid @test isvalid(String, UInt8[0xfe, 0x80, 0x80, 0x80, 0x80, 0x80]) == false - + @test isvalid(lstrip("blablabla")) == true + @test isvalid(SubString(String(UInt8[0xfe, 0x80, 0x80, 0x80, 0x80, 0x80]), 1,2)) == false # invalid Chars @test isvalid('a') @test isvalid('柒')