Skip to content

Commit

Permalink
Merge pull request #178 from sergesku/parseSharedStrings-richText-sup…
Browse files Browse the repository at this point in the history
…port

Correctly parse shared strings in the streaming implementation
  • Loading branch information
qrilka authored Jun 6, 2024
2 parents 61ab9f4 + c489d9d commit 87f74bb
Show file tree
Hide file tree
Showing 3 changed files with 101 additions and 4 deletions.
12 changes: 8 additions & 4 deletions src/Codec/Xlsx/Parser/Stream.hs
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ module Codec.Xlsx.Parser.Stream
, WorkbookInfo(..)
, SheetInfo(..)
, wiSheets
, getOrParseSharedStringss
, getWorkbookInfo
, CellRow
, readSheet
Expand Down Expand Up @@ -181,6 +182,8 @@ makeLenses 'MkSheetState
-- | State for parsing shared strings
data SharedStringsState = MkSharedStringsState
{ _ss_string :: TB.Builder -- ^ String we are parsing
-- TODO: At the moment SharedStrings can be used only to create CellText values.
-- We should add support for CellRich values.
, _ss_list :: DL.DList Text -- ^ list of shared strings
} deriving stock (Generic, Show)
makeLenses 'MkSharedStringsState
Expand Down Expand Up @@ -256,10 +259,11 @@ parseSharedStrings
)
=> HexpatEvent -> m (Maybe Text)
parseSharedStrings = \case
StartElement "t" _ -> Nothing <$ (ss_string .= mempty)
EndElement "t" -> Just . LT.toStrict . TB.toLazyText <$> gets _ss_string
CharacterData txt -> Nothing <$ (ss_string <>= TB.fromText txt)
_ -> pure Nothing
-- TODO: Add parsing of text styles to further create CellRich values.
StartElement "si" _ -> Nothing <$ (ss_string .= mempty)
EndElement "si" -> Just . LT.toStrict . TB.toLazyText <$> gets _ss_string
CharacterData txt -> Nothing <$ (ss_string <>= TB.fromText txt)
_ -> pure Nothing

-- | Run a series of actions on an Xlsx file
runXlsxM :: MonadIO m => FilePath -> XlsxM a -> m a
Expand Down
92 changes: 92 additions & 0 deletions test/StreamTests.hs
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ tests = testGroup
#else

import Control.Exception
import Codec.Archive.Zip as Zip
import Codec.Xlsx
import Codec.Xlsx.Parser.Stream
import Conduit ((.|))
Expand All @@ -31,10 +32,12 @@ import Data.Set.Lens
import qualified Data.ByteString.Lazy as LB
import qualified Data.ByteString as BS
import Data.Map (Map)
import qualified Data.Conduit.Combinators as C
import qualified Data.Map as M
import qualified Data.IntMap.Strict as IM
import Data.Text (Text)
import qualified Data.Text as Text
import qualified Data.Vector as V
import Diff
import Test.Tasty (TestTree, testGroup)
import Test.Tasty.HUnit (testCase)
Expand Down Expand Up @@ -66,6 +69,11 @@ tests =
, testProperty "Set of input texts is as value set length" sharedStringInputTextsIsSameAsValueSetLength
],

testGroup "Reader/shared strings"
[ testCase "Can parse RichText values" richCellTextIsParsed
],


testGroup "Reader/Writer"
[ testCase "Write as stream, see if memory based implementation can read it" $ readWrite simpleWorkbook
, testCase "Write as stream, see if memory based implementation can read it" $ readWrite simpleWorkbookRow
Expand Down Expand Up @@ -234,4 +242,88 @@ untypedCellsAreParsedAsFloats = do
]
expected @==? (_ri_cell_row . _si_row <$> items)


richCellTextIsParsed :: IO ()
richCellTextIsParsed = do
BS.writeFile "testinput.xlsx" (toBs richWorkbook)
runXlsxM "testinput.xlsx" $ do
sharedStrings <- getOrParseSharedStringss
let result = Set.fromList $ V.toList sharedStrings
liftIO $ expected @==? result

where
expected :: Set.Set Text
expected = Set.fromList
[ textA1
, firstClauseB1 <> secondClauseB1
, firstClauseB2 <> secondClauseB2
]

textA1 = "Text at A1"
firstClauseB1 = "First clause at B1;"
firstClauseB2 = "First clause at B2;"
secondClauseB1 = "Second clause at B1"
secondClauseB2 = "Second clause at B2"

richWorkbook :: Xlsx
richWorkbook = def & atSheet "Sheet1" ?~ toWs
[ ((RowIndex 1, ColumnIndex 1), cellValue ?~ CellText textA1 $ def)
, ((RowIndex 2, ColumnIndex 1), cellValue ?~ cellRich firstClauseB1 secondClauseB1 $ def)
, ((RowIndex 2, ColumnIndex 2), cellValue ?~ cellRich firstClauseB2 secondClauseB2 $ def)
]

cellRich :: Text -> Text -> CellValue
cellRich firstClause secondClause = CellRich
[ RichTextRun
{ _richTextRunProperties = Just RunProperties
{ _runPropertiesBold = Nothing
, _runPropertiesCharset = Just 1
, _runPropertiesColor = Just Color
{ _colorAutomatic = Nothing
, _colorARGB = Nothing
, _colorTheme = Just 1
, _colorTint = Nothing
}
, _runPropertiesCondense = Nothing
, _runPropertiesExtend = Nothing
, _runPropertiesFontFamily = Just FontFamilySwiss
, _runPropertiesItalic = Nothing
, _runPropertiesOutline = Nothing
, _runPropertiesFont = Just "Aptos Narrow"
, _runPropertiesScheme = Nothing
, _runPropertiesShadow = Nothing
, _runPropertiesStrikeThrough = Nothing
, _runPropertiesSize = Just 11.0
, _runPropertiesUnderline = Nothing
, _runPropertiesVertAlign = Nothing
}
, _richTextRunText = firstClause
}
, RichTextRun
{ _richTextRunProperties = Just RunProperties
{ _runPropertiesBold = Just True
, _runPropertiesCharset = Just 1
, _runPropertiesColor = Just Color
{ _colorAutomatic = Nothing
, _colorARGB = Just "FFFF0000"
, _colorTheme = Nothing
, _colorTint = Nothing
}
, _runPropertiesCondense = Nothing
, _runPropertiesExtend = Nothing
, _runPropertiesFontFamily = Just FontFamilySwiss
, _runPropertiesItalic = Nothing
, _runPropertiesOutline = Nothing
, _runPropertiesFont = Just "Arial"
, _runPropertiesScheme = Nothing
, _runPropertiesShadow = Nothing
, _runPropertiesStrikeThrough = Nothing
, _runPropertiesSize = Just 8.0
, _runPropertiesUnderline = Nothing
, _runPropertiesVertAlign = Nothing
}
, _richTextRunText = secondClause
}
]

#endif
1 change: 1 addition & 0 deletions xlsx.cabal
Original file line number Diff line number Diff line change
Expand Up @@ -176,6 +176,7 @@ test-suite data-test
, conduit
, filepath
, deepseq
, zip
if flag(microlens)
Build-depends: microlens >= 0.4 && < 0.5
, microlens-mtl
Expand Down

0 comments on commit 87f74bb

Please sign in to comment.