Skip to content

Commit

Permalink
Fixes #24214::isvalid(Substring{String}) (#30397)
Browse files Browse the repository at this point in the history
  • Loading branch information
raghav9-97 authored and ViralBShah committed Dec 17, 2018
1 parent 8b35e84 commit 046755c
Show file tree
Hide file tree
Showing 4 changed files with 17 additions and 6 deletions.
4 changes: 1 addition & 3 deletions base/strings/string.jl
Original file line number Diff line number Diff line change
Expand Up @@ -159,9 +159,7 @@ end

## checking UTF-8 & ACSII validity ##

byte_string_classify(data::Vector{UInt8}) =
ccall(:u8_isvalid, Int32, (Ptr{UInt8}, Int), data, length(data))
byte_string_classify(s::String) =
byte_string_classify(s::Union{String,Vector{UInt8}}) =
ccall(:u8_isvalid, Int32, (Ptr{UInt8}, Int), s, sizeof(s))
# 0: neither valid ASCII nor UTF-8
# 1: valid ASCII
Expand Down
6 changes: 6 additions & 0 deletions base/strings/substring.jl
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,12 @@ function isvalid(s::SubString, i::Integer)
@inbounds return ib && isvalid(s.string, s.offset + i)
end

byte_string_classify(s::SubString{String}) =
ccall(:u8_isvalid, Int32, (Ptr{UInt8}, Int), s, sizeof(s))

isvalid(::Type{String}, s::SubString{String}) = byte_string_classify(s) 0
isvalid(s::SubString{String}) = isvalid(String, s)

thisind(s::SubString{String}, i::Int) = _thisind_str(s, i)
nextind(s::SubString{String}, i::Int) = _nextind_str(s, i)

Expand Down
10 changes: 8 additions & 2 deletions base/strings/unicode.jl
Original file line number Diff line number Diff line change
Expand Up @@ -12,13 +12,16 @@ import Base: show, ==, hash, string, Symbol, isless, length, eltype,
isvalid(value) -> Bool
Returns `true` if the given value is valid for its type, which currently can be either
`AbstractChar` or `String`.
`AbstractChar` or `String` or `SubString{String}`.
# Examples
```jldoctest
julia> isvalid(Char(0xd800))
false
julia> isvalid(SubString(String(UInt8[0xfe,0x80,0x80,0x80,0x80,0x80]),1,2))
false
julia> isvalid(Char(0xd799))
true
```
Expand All @@ -30,13 +33,16 @@ isvalid(value)
Returns `true` if the given value is valid for that type. Types currently can
be either `AbstractChar` or `String`. Values for `AbstractChar` can be of type `AbstractChar` or [`UInt32`](@ref).
Values for `String` can be of that type, or `Vector{UInt8}`.
Values for `String` can be of that type, or `Vector{UInt8}` or `SubString{String}`.
# Examples
```jldoctest
julia> isvalid(Char, 0xd800)
false
julia> isvalid(String, SubString("thisisvalid",1,5))
true
julia> isvalid(Char, 0xd799)
true
```
Expand Down
3 changes: 2 additions & 1 deletion test/strings/basic.jl
Original file line number Diff line number Diff line change
Expand Up @@ -523,7 +523,8 @@ end
end
# Check seven-byte sequences, should be invalid
@test isvalid(String, UInt8[0xfe, 0x80, 0x80, 0x80, 0x80, 0x80]) == false

@test isvalid(lstrip("blablabla")) == true
@test isvalid(SubString(String(UInt8[0xfe, 0x80, 0x80, 0x80, 0x80, 0x80]), 1,2)) == false
# invalid Chars
@test isvalid('a')
@test isvalid('')
Expand Down

0 comments on commit 046755c

Please sign in to comment.