From 0158b8c2d15703f6b0e65df86f107effa42f8935 Mon Sep 17 00:00:00 2001 From: Viktor Dukhovni Date: Mon, 9 Oct 2023 19:15:44 -0400 Subject: [PATCH] New API to convert directly between Lazy and Short --- Changelog.md | 5 +++ Data/ByteString/Short.hs | 2 + Data/ByteString/Short/Internal.hs | 71 ++++++++++++++++++++++++++++++- bytestring.cabal | 2 +- tests/Properties.hs | 3 ++ 5 files changed, 80 insertions(+), 3 deletions(-) diff --git a/Changelog.md b/Changelog.md index 69e4fbb24..f9283da21 100644 --- a/Changelog.md +++ b/Changelog.md @@ -1,3 +1,8 @@ +[0.12.1.0] - TBD + +* API additions: + * Data.ByteString.Short now provides `lazyToShort` and `lazyFromShort`. + [0.12.0.2] — August 2023 * Bug fixes: diff --git a/Data/ByteString/Short.hs b/Data/ByteString/Short.hs index e1339e970..5069ed134 100644 --- a/Data/ByteString/Short.hs +++ b/Data/ByteString/Short.hs @@ -74,6 +74,8 @@ module Data.ByteString.Short ( unpack, fromShort, toShort, + lazyFromShort, + lazyToShort, -- * Basic interface snoc, diff --git a/Data/ByteString/Short/Internal.hs b/Data/ByteString/Short/Internal.hs index 7aa826e3e..9448b718f 100644 --- a/Data/ByteString/Short/Internal.hs +++ b/Data/ByteString/Short/Internal.hs @@ -47,6 +47,8 @@ module Data.ByteString.Short.Internal ( unpack, fromShort, toShort, + lazyFromShort, + lazyToShort, -- * Basic interface snoc, @@ -214,6 +216,8 @@ import GHC.Exts , byteArrayContents# , unsafeCoerce# , copyMutableByteArray# + , resizeMutableByteArray# + , shrinkMutableByteArray# #if MIN_VERSION_base(4,10,0) , isByteArrayPinned# , isTrue# @@ -249,11 +253,11 @@ import GHC.Stack.Types import GHC.Word import Prelude ( Eq(..), Ord(..), Ordering(..), Read(..), Show(..) - , ($), ($!), error, (++), (.), (||) + , ($), ($!), error, fail, (++), (.), (||) , String, userError , Bool(..), (&&), otherwise , (+), (-), fromIntegral - , (*) + , (*), quot , (^) , (<$>) , return @@ -263,6 +267,7 @@ import Prelude ) import qualified Data.ByteString.Internal.Type as BS +import qualified Data.ByteString.Lazy.Internal as LBS import qualified Data.List as List import qualified GHC.Exts @@ -482,6 +487,56 @@ toShortIO (BS fptr len) = do touchForeignPtr fptr ShortByteString <$> stToIO (unsafeFreezeByteArray mba) +-- | A simple wrapper around 'fromShort' that wraps the strict 'ByteString' as +-- a one-chunk lazy 'LBS.ByteString'. +lazyFromShort :: ShortByteString -> LBS.ByteString +lazyFromShort = LBS.fromStrict . fromShort + +-- | /O(n)/. Convert a lazy 'LBS.ByteString' into a 'ShortByteString'. +-- +-- This makes a copy, so does not retain the input string. Naturally, best +-- used only with sufficiently short lazy ByteStrings. +-- +lazyToShort :: LBS.ByteString -> ShortByteString +lazyToShort LBS.Empty = empty +lazyToShort (LBS.Chunk c@(BS _ len) cs) = + unsafeDupablePerformIO $ do + mba0 <- stToIO (newByteArray len) + mba <- lazyToShortIO mba0 0 len c cs + ShortByteString <$> stToIO (unsafeFreezeByteArray mba) + +-- | Append first and any subsequent chunks of lazy bytestring to the provided +-- buffer, resizing it if required. The return value is the final buffer. +-- Arithmetic is done with care, to avoid undetected integer addition +-- wrap-around. While nobody should be copying multi-gigabyte lazy bytestrings +-- to "short" bytestrings, we should error out if possible, rather than corrupt +-- memory.. +lazyToShortIO :: MutableByteArray RealWorld -- ^ result buffer + -> Int -- ^ space already used + -> Int -- ^ total space allocated + -> ByteString -- ^ initial strict chunk + -> LBS.ByteString -- ^ remaining chunks + -> IO (MutableByteArray RealWorld) -- ^ possibly resized result +lazyToShortIO mba used size c@(BS fptr len) cs + -- Safe against overflow since used <= size. + | len <= size - used + = do let ptr = unsafeForeignPtrToPtr fptr + stToIO (copyAddrToByteArray ptr mba used len) + touchForeignPtr fptr + case cs of + LBS.Empty + -> stToIO (shrinkByteArray mba $ used + len) >> pure mba + LBS.Chunk c' cs' + -> lazyToShortIO mba (used + len) size c' cs' + -- Care to detect possible 'Int' arithmetic overflow. + | used < used + len + , let newsize = max (used + len) (size + size `quot` 2) + , newsize >= size + = do mba' <- stToIO (resizeByteArray mba newsize) + lazyToShortIO mba' used newsize c cs + | otherwise + = fail "Input lazy bytestring is too long" + -- | /O(n)/. Convert a 'ShortByteString' into a 'ByteString'. -- fromShort :: ShortByteString -> ByteString @@ -1614,6 +1669,18 @@ unsafeFreezeByteArray (MutableByteArray mba#) = ST $ \s -> case unsafeFreezeByteArray# mba# s of (# s', ba# #) -> (# s', ByteArray ba# #) +resizeByteArray :: MutableByteArray s -> Int -> ST s (MutableByteArray s) +resizeByteArray (MutableByteArray mba#) new@(I# new#) = + assert (new >= 0) $ + ST $ \s -> case resizeMutableByteArray# mba# new# s of + (# s', mba'# #) -> (# s', MutableByteArray mba'# #) + +shrinkByteArray :: MutableByteArray s -> Int -> ST s () +shrinkByteArray (MutableByteArray mba#) new@(I# new#) = do + assert (new >= 0) $ + ST $ \s -> case shrinkMutableByteArray# mba# new# s of + s' -> (# s', () #) + writeWord8Array :: MutableByteArray s -> Int -> Word8 -> ST s () writeWord8Array (MutableByteArray mba#) (I# i#) (W8# w#) = ST $ \s -> case writeWord8Array# mba# i# w# s of diff --git a/bytestring.cabal b/bytestring.cabal index 69066eeea..1b9568ea2 100644 --- a/bytestring.cabal +++ b/bytestring.cabal @@ -1,5 +1,5 @@ Name: bytestring -Version: 0.12.0.2 +Version: 0.12.1.0 Synopsis: Fast, compact, strict and lazy byte strings with a list interface Description: An efficient compact, immutable byte string type (both strict and lazy) diff --git a/tests/Properties.hs b/tests/Properties.hs index 09bb4baca..344d156a1 100644 --- a/tests/Properties.hs +++ b/tests/Properties.hs @@ -536,6 +536,8 @@ prop_short_pack_unpack xs = (Short.unpack . Short.pack) xs == xs prop_short_toShort_fromShort bs = (Short.fromShort . Short.toShort) bs == bs +prop_short_lazyToShort_fromShort lbs = + (Short.lazyFromShort . Short.lazyToShort) lbs == lbs prop_short_toShort_unpack bs = (Short.unpack . Short.toShort) bs == P.unpack bs @@ -602,6 +604,7 @@ prop_short_pinned (NonNegative (I# len#)) = runST $ ST $ \s -> short_tests = [ testProperty "pack/unpack" prop_short_pack_unpack , testProperty "toShort/fromShort" prop_short_toShort_fromShort + , testProperty "lazyToShort/fromShort" prop_short_lazyToShort_fromShort , testProperty "toShort/unpack" prop_short_toShort_unpack , testProperty "pack/fromShort" prop_short_pack_fromShort , testProperty "empty" prop_short_empty