diff --git a/.github/workflows/haskell.yml b/.github/workflows/haskell.yml index cfc9362a2c..edbc1ec3b7 100644 --- a/.github/workflows/haskell.yml +++ b/.github/workflows/haskell.yml @@ -14,7 +14,7 @@ jobs: LC_ALL: C.UTF-8 BUILD: ${{ matrix.build }} # For updating see: https://downloads.haskell.org/~ghcup/ - GHCUP_VERSION: 0.1.20.0 + GHCUP_VERSION: 0.1.50.2 DOCSPEC_URL: https://github.com/phadej/cabal-extras/releases/download/cabal-docspec-0.0.0.20210111/cabal-docspec-0.0.0.20210111.xz DOCSPEC_OPTIONS: "--timeout 60 --check-properties --property-variables xs" GHCUP_GHC_OPTIONS: ${{ matrix.ghcup_ghc_options }} @@ -68,7 +68,7 @@ jobs: matrix: # The order is important to optimize fail-fast. name: - - 9.10.1-Werror + - 9.10.3-Werror # - 9.8.1-docspec # - 8.10.7-coverage @@ -99,14 +99,14 @@ jobs: ghc_version: 9.12.1 runner: ubuntu-latest build: cabal - cabal_build_options: "--flag limit-build-mem" + cabal_build_options: "--flag limit-build-mem --flag fusion-plugin" cabal_version: 3.12.1.0 disable_sdist_build: "y" cabal_project: cabal.project ignore_error: false # Note: use linux for warning build for convenient dev testing - - name: 9.10.1-Werror - ghc_version: 9.10.1 + - name: 9.10.3-Werror + ghc_version: 9.10.3 runner: ubuntu-latest build: cabal cabal_build_options: "--flag limit-build-mem" @@ -114,8 +114,8 @@ jobs: disable_sdist_build: "y" cabal_project: cabal.project.Werror ignore_error: false - - name: 9.10.1-macos - ghc_version: 9.10.1 + - name: 9.10.3-macos + ghc_version: 9.10.3 runner: macos-latest build: cabal cabal_build_options: "--flag limit-build-mem" @@ -123,8 +123,8 @@ jobs: disable_sdist_build: "y" cabal_project: cabal.project ignore_error: false - - name: 9.10.1-fusion-inspection - ghc_version: 9.10.1 + - name: 9.10.3-fusion-inspection + ghc_version: 9.10.3 runner: ubuntu-latest build: cabal cabal_version: 3.12.1.0 diff --git a/.github/workflows/regression-check.yml b/.github/workflows/regression-check.yml index 3c9cf24c79..01c006ad7d 100644 --- a/.github/workflows/regression-check.yml +++ b/.github/workflows/regression-check.yml @@ -33,6 +33,8 @@ jobs: Data.ParserK.Chunked Data.ParserK.Chunked.Generic Data.RingArray + Data.Scanl + Data.Scanl.Concurrent Data.Scanl.Window Data.Serialize Data.Stream @@ -40,10 +42,12 @@ jobs: Data.Stream.ConcurrentEager Data.Stream.ConcurrentInterleaved Data.Stream.ConcurrentOrdered + Data.Stream.Prelude Data.StreamK:6 Data.Unbox Data.Unbox.Derive.TH Data.Unfold + Data.Unfold.Prelude FileSystem.DirIO FileSystem.Handle Unicode.Parser @@ -67,10 +71,10 @@ jobs: - name: Download ghc run: | - GHCUP_VER=0.1.18.0 + GHCUP_VER=0.1.50.2 curl -sL -o ./ghcup https://downloads.haskell.org/~ghcup/$GHCUP_VER/x86_64-linux-ghcup-$GHCUP_VER chmod +x ./ghcup - GHCVER=9.6.6 + GHCVER=9.10.3 ./ghcup install ghc $GHCVER ./ghcup set ghc $GHCVER cabal update @@ -135,7 +139,7 @@ jobs: - name: Run benchmarks and append run: | - ./bench-runner --package-name streamly-benchmarks --package-version 0.0.0 --targets "$CI_BENCHMARKS" --raw --append + ./bench-runner --package-name streamly-benchmarks --package-version 0.0.0 --cabal-build-options "-j1" --targets "$CI_BENCHMARKS" --raw --append # ----------------------------------------------------------------- # -- Compare diff --git a/.hlint.ignore b/.hlint.ignore index 462511808f..4209b68b9a 100644 --- a/.hlint.ignore +++ b/.hlint.ignore @@ -1,6 +1,6 @@ core/src/Streamly/Internal/Data/StreamK/Type.hs core/src/Streamly/Internal/Data/Pipe/Type.hs -core/src/Streamly/Internal/Unicode/Stream.hs +core/src/Streamly/Internal/Unicode/Encode.hs src/Streamly/Internal/Data/SmallArray/Type.hs src/Streamly/Internal/Data/Stream/IsStream/Eliminate.hs src/Streamly/Internal/Data/Stream/Serial.hs @@ -22,19 +22,9 @@ test/Streamly/Test/Unicode/Stream.hs test/Streamly/Test/Data/Unbox.hs benchmark/lib/Streamly/Benchmark/Common.hs benchmark/lib/Streamly/Benchmark/Common/Handle.hs -benchmark/lib/Streamly/Benchmark/Prelude.hs benchmark/NanoBenchmarks.hs benchmark/Streamly/Benchmark/Data/Array.hs -benchmark/Streamly/Benchmark/Data/Parser.hs benchmark/Streamly/Benchmark/Data/ParserK.hs benchmark/Streamly/Benchmark/Data/Stream/StreamK.hs benchmark/Streamly/Benchmark/Data/Unfold.hs benchmark/Streamly/Benchmark/FileSystem/Handle.hs -benchmark/Streamly/Benchmark/Prelude/Async.hs -benchmark/Streamly/Benchmark/Prelude/Concurrent.hs -benchmark/Streamly/Benchmark/Prelude/Merge.hs -benchmark/Streamly/Benchmark/Prelude/Parallel.hs -benchmark/Streamly/Benchmark/Prelude/Rate.hs -benchmark/Streamly/Benchmark/Prelude/WAsync.hs -benchmark/Streamly/Benchmark/Prelude/ZipAsync.hs -benchmark/Streamly/Benchmark/Prelude/ZipSerial.hs diff --git a/.packcheck.ignore b/.packcheck.ignore index 4a45873f1d..66b8033047 100644 --- a/.packcheck.ignore +++ b/.packcheck.ignore @@ -15,6 +15,7 @@ benchmark/bench-runner/flake.lock benchmark/bench-runner/flake.nix benchmark/bench-runner/sources.nix bin/ghc.sh +bin/ghc-make.sh bin/repl bin/run-ci.sh bin/run-repl-quick.sh diff --git a/benchmark/Streamly/Benchmark/Data/Array/Stream.hs b/benchmark/Streamly/Benchmark/Data/Array/Stream.hs index 31677349ab..2390eeb25a 100644 --- a/benchmark/Streamly/Benchmark/Data/Array/Stream.hs +++ b/benchmark/Streamly/Benchmark/Data/Array/Stream.hs @@ -201,6 +201,7 @@ inspect $ 'copyChunksSplitInterposeSuffix `hasNoType` ''Step copyChunksSplitInterpose :: Handle -> Handle -> IO () copyChunksSplitInterpose inh outh = Stream.fold (Handle.write outh) + -- XXX requires @-fspec-constr-recursive=12@. -- XXX this is not correct word splitting combinator $ Array.concatSepBy 32 . Array.compactSepByByte_ 32 $ Handle.readChunks inh diff --git a/benchmark/Streamly/Benchmark/Data/Fold.hs b/benchmark/Streamly/Benchmark/Data/Fold.hs index 8f3a0d9f7c..fb25832b6e 100644 --- a/benchmark/Streamly/Benchmark/Data/Fold.hs +++ b/benchmark/Streamly/Benchmark/Data/Fold.hs @@ -355,6 +355,8 @@ o_1_space_reduce_read_split env = -- | Infix split on a character sequence. splitOnSeqUtf8 :: String -> Handle -> IO Int splitOnSeqUtf8 str inh = + -- XXX requires @-fspec-constr-recursive=12@. Maybe due to + -- decodeUtf8. Stream.fold Fold.length $ Stream.foldManyPost (Fold.takeEndBySeq_ (Array.fromList str) Fold.drain) $ Unicode.decodeUtf8Chunks diff --git a/benchmark/Streamly/Benchmark/Data/Parser.hs b/benchmark/Streamly/Benchmark/Data/Parser.hs index 6200a83def..1a677c5d82 100644 --- a/benchmark/Streamly/Benchmark/Data/Parser.hs +++ b/benchmark/Streamly/Benchmark/Data/Parser.hs @@ -1,4 +1,3 @@ - #undef FUSION_CHECK #ifdef FUSION_CHECK {-# OPTIONS_GHC -ddump-simpl -ddump-to-file -dsuppress-all #-} @@ -16,808 +15,86 @@ {-# LANGUAGE ScopedTypeVariables #-} {-# OPTIONS_GHC -Wno-orphans #-} -module Streamly.Benchmark.Data.Parser +module Main ( - benchmarks - , sourceUnfoldrM + main ) where -import Control.Applicative ((<|>)) -import Control.DeepSeq (NFData(..)) -import Data.Function ((&)) -import Data.Functor (($>)) -import Data.Monoid (Sum(..)) -import GHC.Magic (inline) -import GHC.Magic (noinline) -import System.IO (Handle) -import System.Random (randomRIO) -import Streamly.Internal.Data.Fold (Fold(..)) -import Streamly.Internal.Data.Parser - (ParseError(..), Parser(..), Initial(..), Step(..), Final(..)) -import Streamly.Internal.Data.Stream (Stream) import Prelude hiding (any, all, take, sequence, sequence_, sequenceA, takeWhile, dropWhile, span) -import qualified Control.Applicative as AP -import qualified Data.Foldable as F -import qualified Data.Traversable as TR -import qualified Streamly.FileSystem.Handle as Handle import qualified Streamly.Internal.Data.Array as Array import qualified Streamly.Internal.Data.Fold as Fold -import qualified Streamly.Internal.Data.Parser as PR import qualified Streamly.Data.Stream as Stream -import qualified Streamly.Internal.Data.Producer as Producer -import qualified Streamly.Internal.Data.Stream as Stream import Test.Tasty.Bench hiding (env) import Streamly.Benchmark.Common import Streamly.Benchmark.Common.Handle +import Streamly.Benchmark.Data.Parser.Alternative as Alternative +import Streamly.Benchmark.Data.Parser.Applicative as Applicative +import Streamly.Benchmark.Data.Parser.Monad as Monad +import Streamly.Benchmark.Data.Parser.Sequence as Sequence +import Streamly.Benchmark.Data.Parser.Producer as Producer +import Streamly.Benchmark.Data.Parser.Interleave as Interleave +import Streamly.Benchmark.Data.Parser.Groups as Groups + +moduleName :: String +moduleName = "Data.Parser" + +benchmarkList :: + Int + -> BenchEnv + -> [Array.Array Int] + -> [(SpaceComplexity, Benchmark)] +benchmarkList value env arrays = + Alternative.benchmarks value + ++ Applicative.benchmarks value + ++ Monad.benchmarks value + ++ Sequence.benchmarks value + ++ Sequence.benchmarksFileIO env + ++ Producer.benchmarks value arrays + ++ Groups.benchmarks value + ++ Interleave.benchmarks value ------------------------------------------------------------------------------- --- Utilities +-- Driver ------------------------------------------------------------------------------- --- XXX these can be moved to the common module - -{-# INLINE sourceUnfoldrM #-} -sourceUnfoldrM :: Monad m => Int -> Int -> Stream m Int -sourceUnfoldrM value n = Stream.unfoldrM step n - where - step cnt = - if cnt > n + value - then return Nothing - else return (Just (cnt, cnt + 1)) +main :: IO () +main = do +#ifndef FUSION_CHECK + env <- mkHandleBenchEnv + runWithCLIOptsEnv defaultStreamSize alloc (allBenchmarks env) --- | Generates something like this: { { \{ \{ } }. The stream consists of --- three parts, the first part is contains a sequence of `{`. The second part --- contains a sequence pf escaped values `\{`. The third part contains a --- sequence of `}`. -{-# INLINE sourceEscapedFrames #-} -sourceEscapedFrames :: - Monad m - => Int - -> Int - -> Stream m Char -sourceEscapedFrames value n = Stream.unfoldrM step n where - bs = '\\' - cbOpen = '{' - cbClose = '}' - value1 = value `div` 4 - - step cnt - | cnt > 4 * value1 = return Nothing - | cnt <= value1 = return $ Just (cbOpen, cnt + 1) - | cnt > 3 * value1 = return $ Just (cbClose, cnt + 1) - | otherwise = - return - $ Just - $ if (cnt - value1) `mod` 2 == 1 - then (bs, cnt + 1) - else (cbOpen, cnt + 1) - -{-# INLINE benchIOSrc #-} -benchIOSrc - :: NFData b - => (Int -> Int -> Stream IO a) - -> Int - -> String - -> (Stream IO a -> IO b) - -> Benchmark -benchIOSrc src value name f = - bench name $ nfIO $ randomRIO (1,1) >>= f . src value - --- | Takes a fold method, and uses it with a default source. -{-# INLINE benchIOSink #-} -benchIOSink - :: NFData b - => Int -> String -> (Stream IO Int -> IO b) -> Benchmark -benchIOSink value name f = - bench name $ nfIO $ randomRIO (1,1) >>= f . sourceUnfoldrM value - -------------------------------------------------------------------------------- --- Parsers -------------------------------------------------------------------------------- - -{-# INLINE takeBetween #-} -takeBetween :: Monad m => Int -> Stream m a -> m (Either ParseError ()) -takeBetween value = Stream.parse (PR.takeBetween 0 value Fold.drain) - -{-# INLINE takeEQ #-} -takeEQ :: Monad m => Int -> Stream m a -> m (Either ParseError ()) -takeEQ value = Stream.parse (PR.takeEQ value Fold.drain) - -{-# INLINE takeGE #-} -takeGE :: Monad m => Int -> Stream m a -> m (Either ParseError ()) -takeGE value = Stream.parse (PR.takeGE value Fold.drain) - -{-# INLINE dropWhile #-} -dropWhile :: Monad m => Int -> Stream m Int -> m (Either ParseError ()) -dropWhile value = Stream.parse (PR.dropWhile (<= value)) - -{-# INLINE takeBeginBy #-} -takeBeginBy :: Monad m => Int -> Stream m Int -> m (Either ParseError ()) -takeBeginBy value stream = do - let stream2 = value `Stream.cons` stream - Stream.parse (PR.takeBeginBy (== value) Fold.drain) stream2 - -takeFramedByEsc_ :: Monad m => Int -> Stream m Char -> m (Either ParseError ()) -takeFramedByEsc_ _ = Stream.parse parser - - where - - isEsc = (== '\\') - isBegin = (== '{') - isEnd = (== '}') - - parser = PR.takeFramedByEsc_ isEsc isBegin isEnd Fold.drain - -{-# INLINE listEqBy #-} -listEqBy :: Int -> Stream IO Int -> IO (Either ParseError [Int]) -listEqBy len = Stream.parse (PR.listEqBy (==) [1 .. len]) - -{-# INLINE streamEqBy #-} -streamEqBy :: Int -> Stream IO Int -> IO (Either ParseError ()) -streamEqBy len = Stream.parse (PR.streamEqBy (==) (Stream.enumerateFromTo 1 len)) - -{-# INLINE takeWhile #-} -takeWhile :: Monad m => Int -> Stream m Int -> m (Either ParseError ()) -takeWhile value = Stream.parse (PR.takeWhile (<= value) Fold.drain) - -takeWhileP :: Monad m => Int -> Stream m Int -> m (Either ParseError ()) -takeWhileP value = - Stream.parse (PR.takeWhileP (<= value) (PR.takeWhile (<= value - 1) Fold.drain)) - -{-# INLINE takeP #-} -takeP :: Monad m => Int -> Stream m a -> m (Either ParseError ()) -takeP value = Stream.parse (PR.takeP value (PR.fromFold Fold.drain)) - -{-# INLINE groupBy #-} -groupBy :: Monad m => Stream m Int -> m (Either ParseError ()) -groupBy = Stream.parse (PR.groupBy (<=) Fold.drain) - -{-# INLINE groupByRolling #-} -groupByRolling :: Monad m => Stream m Int -> m (Either ParseError ()) -groupByRolling = Stream.parse (PR.groupByRolling (<=) Fold.drain) - -{-# INLINE wordBy #-} -wordBy :: Monad m => Int -> Stream m Int -> m (Either ParseError ()) -wordBy value = Stream.parse (PR.wordBy (>= value) Fold.drain) - -{-# INLINE sepByWords #-} -sepByWords :: Monad m => Stream m Int -> m (Either ParseError ()) -sepByWords = Stream.parse (wrds even Fold.drain) - where - wrds p = PR.sepBy (PR.takeWhile (not . p) Fold.drain) (PR.dropWhile p) - -{-# INLINE sepByAllWords #-} -sepByAllWords :: Monad m => Stream m Int -> m (Either ParseError ()) -sepByAllWords = Stream.parse (wrds even Fold.drain) - where - wrds p = PR.sepByAll (PR.takeWhile (not . p) Fold.drain) (PR.dropWhile p) - --- Returning a list to compare with the sepBy1 in ParserK -{-# INLINE sepBy1 #-} -sepBy1 :: Monad m => Stream m Int -> m (Either ParseError [Int]) -sepBy1 xs = do - Stream.parse (PR.sepBy1 (PR.satisfy odd) (PR.satisfy even) Fold.toList) xs - -{-# INLINE sepByWords1 #-} -sepByWords1 :: Monad m => Stream m Int -> m (Either ParseError ()) -sepByWords1 = Stream.parse (wrds even Fold.drain) - where - wrds p = PR.sepBy1 (PR.takeWhile (not . p) Fold.drain) (PR.dropWhile p) - -{-# INLINE deintercalate #-} -deintercalate :: Monad m => Int -> Stream m Int -> m (Either ParseError ()) -deintercalate _ = Stream.parse (partition even) - - where - - partition p = - PR.deintercalate - (PR.takeWhile (not . p) Fold.sum) (PR.takeWhile p Fold.sum) Fold.drain - -{-# INLINE deintercalate1 #-} -deintercalate1 :: Monad m => Int -> Stream m Int -> m (Either ParseError ()) -deintercalate1 _ = Stream.parse (partition even) - - where - - partition p = - PR.deintercalate1 - (PR.takeWhile (not . p) Fold.sum) (PR.takeWhile p Fold.sum) Fold.drain - -{-# INLINE deintercalateAll #-} -deintercalateAll :: Monad m => Int -> Stream m Int -> m (Either ParseError ()) -deintercalateAll _ = Stream.parse (partition even) - - where - - partition p = - PR.deintercalateAll - (PR.takeWhile (not . p) Fold.sum) (PR.takeWhile p Fold.sum) Fold.drain - -{-# INLINE manyWordByEven #-} -manyWordByEven :: Monad m => Stream m Int -> m (Either ParseError ()) -manyWordByEven = Stream.parse (PR.many (PR.wordBy even Fold.drain) Fold.drain) - -{-# INLINE many #-} -many :: Monad m => Stream m Int -> m (Either ParseError Int) -many = Stream.parse (PR.many (PR.satisfy (> 0)) Fold.length) - -{-# INLINE manyAlt #-} -manyAlt :: Monad m => Stream m Int -> m Int -manyAlt xs = do - x <- Stream.parse (AP.many (PR.satisfy (> 0))) xs - return $ Prelude.length x - -{-# INLINE some #-} -some :: Monad m => Stream m Int -> m (Either ParseError Int) -some = Stream.parse (PR.some (PR.satisfy (> 0)) Fold.length) - -{-# INLINE someAlt #-} -someAlt :: Monad m => Stream m Int -> m Int -someAlt xs = do - x <- Stream.parse (AP.some (PR.satisfy (> 0))) xs - return $ Prelude.length x - -{-# INLINE manyTill #-} -manyTill :: Monad m => Int -> Stream m Int -> m (Either ParseError Int) -manyTill value = - Stream.parse (PR.manyTill (PR.satisfy (> 0)) (PR.satisfy (== value)) Fold.length) - -{-# INLINE splitAp2 #-} -splitAp2 :: Monad m - => Int -> Stream m Int -> m (Either ParseError ((), ())) -splitAp2 value = - Stream.parse - ((,) - <$> PR.dropWhile (<= (value `div` 2)) - <*> PR.dropWhile (<= value) - ) - -{-# INLINE splitAp4 #-} -splitAp4 :: Monad m - => Int -> Stream m Int -> m (Either ParseError ()) -splitAp4 value = - Stream.parse - ( (\() () () () -> ()) - <$> PR.dropWhile (<= (value * 1 `div` 4)) - <*> PR.dropWhile (<= (value * 2 `div` 4)) - <*> PR.dropWhile (<= (value * 3 `div` 4)) - <*> PR.dropWhile (<= value) - ) - -{-# INLINE splitAp8 #-} -splitAp8 :: Monad m - => Int -> Stream m Int -> m (Either ParseError ()) -splitAp8 value = - Stream.parse - ( (\() () () () () () () () -> ()) - <$> PR.dropWhile (<= (value * 1 `div` 8)) - <*> PR.dropWhile (<= (value * 2 `div` 8)) - <*> PR.dropWhile (<= (value * 3 `div` 8)) - <*> PR.dropWhile (<= (value * 4 `div` 8)) - <*> PR.dropWhile (<= (value * 5 `div` 8)) - <*> PR.dropWhile (<= (value * 6 `div` 8)) - <*> PR.dropWhile (<= (value * 7 `div` 8)) - <*> PR.dropWhile (<= value) - ) - -{-# INLINE splitApBefore #-} -splitApBefore :: Monad m - => Int -> Stream m Int -> m (Either ParseError ()) -splitApBefore value = - Stream.parse - ( PR.dropWhile (<= (value `div` 2)) - *> PR.dropWhile (<= value) - ) - -{-# INLINE splitApAfter #-} -splitApAfter :: Monad m - => Int -> Stream m Int -> m (Either ParseError ()) -splitApAfter value = - Stream.parse - ( PR.dropWhile (<= (value `div` 2)) - <* PR.dropWhile (<= value) - ) - -{-# INLINE splitWith2 #-} -splitWith2 :: Monad m - => Int -> Stream m Int -> m (Either ParseError ((), ())) -splitWith2 value = - Stream.parse - (PR.splitWith (,) - (PR.dropWhile (<= (value `div` 2))) - (PR.dropWhile (<= value)) - ) - -{-# INLINE split_ #-} -split_ :: Monad m - => Int -> Stream m Int -> m (Either ParseError ()) -split_ value = - Stream.parse - (PR.split_ - (PR.dropWhile (<= (value `div` 2))) - (PR.dropWhile (<= value)) - ) - --- XXX dropWhile with applicative does not fuse --- PR.dropWhile (<= (value * 1 `div` 4)) *> PR.die "alt" -{-# INLINE takeWhileFail #-} -takeWhileFail :: Monad m => (a -> Bool) -> Fold m a b -> Parser a m b -takeWhileFail predicate (Fold fstep finitial _ ffinal) = - Parser step initial extract - - where - - initial = do - res <- finitial - return $ case res of - Fold.Partial s -> IPartial s - Fold.Done b -> IDone b - - step s a = - if predicate a - then do - fres <- fstep s a - return - $ case fres of - Fold.Partial s1 -> SPartial 1 s1 - Fold.Done b -> SDone 1 b - else return $ SError "fail" - - extract s = fmap (FDone 0) (ffinal s) - -{-# INLINE alt2 #-} -alt2 :: Monad m - => Int -> Stream m Int -> m (Either ParseError ()) -alt2 value = - Stream.parse - (PR.alt - (takeWhileFail (<= (value `div` 2)) Fold.drain) - (PR.dropWhile (<= value)) - ) - -{-# INLINE alt4 #-} -alt4 :: Monad m - => Int -> Stream m Int -> m (Either ParseError ()) -alt4 value = - Stream.parse - ( takeWhileFail (<= (value * 1 `div` 4)) Fold.drain - <|> takeWhileFail (<= (value * 2 `div` 4)) Fold.drain - <|> takeWhileFail (<= (value * 3 `div` 4)) Fold.drain - <|> PR.dropWhile (<= value) - ) - -{-# INLINE alt8 #-} -alt8 :: Monad m - => Int -> Stream m Int -> m (Either ParseError ()) -alt8 value = - Stream.parse - ( takeWhileFail (<= (value * 1 `div` 8)) Fold.drain - <|> takeWhileFail (<= (value * 2 `div` 8)) Fold.drain - <|> takeWhileFail (<= (value * 3 `div` 8)) Fold.drain - <|> takeWhileFail (<= (value * 4 `div` 8)) Fold.drain - <|> takeWhileFail (<= (value * 5 `div` 8)) Fold.drain - <|> takeWhileFail (<= (value * 6 `div` 8)) Fold.drain - <|> takeWhileFail (<= (value * 7 `div` 8)) Fold.drain - <|> PR.dropWhile (<= value) - ) - -{-# INLINE alt16 #-} -alt16 :: Monad m - => Int -> Stream m Int -> m (Either ParseError ()) -alt16 value = - Stream.parse - ( takeWhileFail (<= (value * 1 `div` 16)) Fold.drain - <|> takeWhileFail (<= (value * 2 `div` 16)) Fold.drain - <|> takeWhileFail (<= (value * 3 `div` 16)) Fold.drain - <|> takeWhileFail (<= (value * 4 `div` 16)) Fold.drain - <|> takeWhileFail (<= (value * 5 `div` 16)) Fold.drain - <|> takeWhileFail (<= (value * 6 `div` 16)) Fold.drain - <|> takeWhileFail (<= (value * 8 `div` 16)) Fold.drain - <|> takeWhileFail (<= (value * 9 `div` 16)) Fold.drain - <|> takeWhileFail (<= (value * 10 `div` 16)) Fold.drain - <|> takeWhileFail (<= (value * 11 `div` 16)) Fold.drain - <|> takeWhileFail (<= (value * 12 `div` 16)) Fold.drain - <|> takeWhileFail (<= (value * 13 `div` 16)) Fold.drain - <|> takeWhileFail (<= (value * 14 `div` 16)) Fold.drain - <|> takeWhileFail (<= (value * 15 `div` 16)) Fold.drain - <|> PR.dropWhile (<= value) - ) - -{-# INLINE altSmall #-} -altSmall :: Monad m - => Int -> Stream m Int -> m () -altSmall value = - Stream.fold Fold.drain . - Stream.parseMany - (PR.alt - (PR.satisfy (>= value) *> PR.die "alt") - (PR.satisfy (<= value)) - ) - -{-# INLINE monad #-} -monad :: Monad m - => Int -> Stream m Int -> m (Either ParseError ()) -monad value = - Stream.parse - $ do - PR.dropWhile (<= (value `div` 2)) - PR.dropWhile (<= value) - -{-# INLINE monad4 #-} -monad4 :: Monad m - => Int -> Stream m Int -> m (Either ParseError ()) -monad4 value = - Stream.parse $ do - PR.dropWhile (<= (value `div` 4)) - PR.dropWhile (<= (value `div` 2)) - PR.dropWhile (<= (value * 3 `div` 4)) - PR.dropWhile (<= value) - -{-# INLINE monad8 #-} -monad8 :: Monad m - => Int -> Stream m Int -> m (Either ParseError ()) -monad8 value = - Stream.parse $ do - PR.dropWhile (<= (value * 1 `div` 8)) - PR.dropWhile (<= (value * 2 `div` 8)) - PR.dropWhile (<= (value * 3 `div` 8)) - PR.dropWhile (<= (value * 4 `div` 8)) - PR.dropWhile (<= (value * 5 `div` 8)) - PR.dropWhile (<= (value * 6 `div` 8)) - PR.dropWhile (<= (value * 7 `div` 8)) - PR.dropWhile (<= value) - -{-# INLINE monad16 #-} -monad16 :: Monad m - => Int -> Stream m Int -> m (Either ParseError ()) -monad16 value = - Stream.parse $ do - PR.dropWhile (<= (value * 1 `div` 16)) - PR.dropWhile (<= (value * 2 `div` 16)) - PR.dropWhile (<= (value * 3 `div` 16)) - PR.dropWhile (<= (value * 4 `div` 16)) - PR.dropWhile (<= (value * 5 `div` 16)) - PR.dropWhile (<= (value * 6 `div` 16)) - PR.dropWhile (<= (value * 7 `div` 16)) - PR.dropWhile (<= (value * 8 `div` 16)) - PR.dropWhile (<= (value * 9 `div` 16)) - PR.dropWhile (<= (value * 10 `div` 16)) - PR.dropWhile (<= (value * 11 `div` 16)) - PR.dropWhile (<= (value * 12 `div` 16)) - PR.dropWhile (<= (value * 13 `div` 16)) - PR.dropWhile (<= (value * 14 `div` 16)) - PR.dropWhile (<= (value * 15 `div` 16)) - PR.dropWhile (<= value) - -{-# INLINE takeEndBy_ #-} -takeEndBy_ :: Monad m - => Int -> Stream m Int -> m (Either ParseError ()) -takeEndBy_ value = Stream.parse (PR.takeEndBy_ (>= value) (PR.fromFold Fold.drain)) - -{- -{-# INLINE teeAllAny #-} -teeAllAny :: Monad m - => Int -> Stream m Int -> m ((), ()) -teeAllAny value = - Stream.parse - (PR.teeWith (,) - (PR.dropWhile (<= value)) - (PR.dropWhile (<= value)) - ) - -{-# INLINE teeFstAllAny #-} -teeFstAllAny :: Monad m - => Int -> Stream m Int -> m ((), ()) -teeFstAllAny value = - Stream.parse - (PR.teeWithFst (,) - (PR.dropWhile (<= value)) - (PR.dropWhile (<= value)) - ) - -{-# INLINE shortestAllAny #-} -shortestAllAny :: Monad m - => Int -> Stream m Int -> m (Either ParseError ()) -shortestAllAny value = - Stream.parse - (PR.shortest - (PR.dropWhile (<= value)) - (PR.dropWhile (<= value)) - ) - -{-# INLINE longestAllAny #-} -longestAllAny :: Monad m - => Int -> Stream m Int -> m (Either ParseError ()) -longestAllAny value = - Stream.parse - (PR.longest - (PR.dropWhile (<= value)) - (PR.dropWhile (<= value)) - ) --} - -------------------------------------------------------------------------------- --- Spanning -------------------------------------------------------------------------------- - -{-# INLINE span #-} -span :: Monad m => Int -> Stream m Int -> m (Either ParseError ((), ())) -span value = Stream.parse (PR.span (<= (value `div` 2)) Fold.drain Fold.drain) - -{-# INLINE spanBy #-} -spanBy :: Monad m => Int -> Stream m Int -> m (Either ParseError ((), ())) -spanBy value = - Stream.parse (PR.spanBy (\_ i -> i <= (value `div` 2)) Fold.drain Fold.drain) - -{-# INLINE spanByRolling #-} -spanByRolling :: Monad m => Int -> Stream m Int -> m (Either ParseError ((), ())) -spanByRolling value = - Stream.parse (PR.spanByRolling (\_ i -> i <= value `div` 2) Fold.drain Fold.drain) - -parseManyChunksOfSum :: Int -> Handle -> IO Int -parseManyChunksOfSum n inh = - Stream.fold Fold.length - $ Stream.parseMany - (PR.fromFold $ Fold.take n Fold.sum) - (Stream.unfold Handle.reader inh) - -------------------------------------------------------------------------------- --- Parsers in which -fspec-constr-recursive=16 is problematic -------------------------------------------------------------------------------- - --- XXX -fspec-constr-recursive=16 makes GHC go beserk when compiling these. --- We need to fix GHC so that we can have better control over that option or do --- not have to rely on it. --- -{-# INLINE lookAhead #-} -lookAhead :: Monad m => Int -> Stream m Int -> m (Either ParseError ()) -lookAhead value = - Stream.parse (PR.lookAhead (PR.takeWhile (<= value) Fold.drain) $> ()) - --- XXX The timing of this increased 3x after the stepify extract changes. -{-# INLINE sequenceA_ #-} -sequenceA_ :: Monad m => Int -> Stream m Int -> m (Either ParseError ()) -sequenceA_ value = - Stream.parse (F.sequenceA_ $ replicate value (PR.satisfy (> 0))) - --- quadratic complexity -{-# INLINE sequenceA #-} -sequenceA :: Monad m => Int -> Stream m Int -> m Int -sequenceA value xs = do - x <- Stream.parse (TR.sequenceA (replicate value (PR.satisfy (> 0)))) xs - return $ length x - --- quadratic complexity -{-# INLINE sequence #-} -sequence :: Monad m => Int -> Stream m Int -> m Int -sequence value xs = do - x <- Stream.parse (TR.sequence (replicate value (PR.satisfy (> 0)))) xs - return $ length x - -{-# INLINE sequence_ #-} -sequence_ :: Monad m => Int -> Stream m Int -> m (Either ParseError ()) -sequence_ value xs = - Stream.parse (foldr f (return ()) (replicate value (PR.takeBetween 0 1 Fold.drain))) xs - - where - - {-# INLINE f #-} - f m k = m >>= (\_ -> k) - --- choice using the "Alternative" instance with direct style parser type has --- quadratic performance complexity. --- -{-# INLINE choiceAsum #-} -choiceAsum :: Monad m => Int -> Stream m Int -> m (Either ParseError Int) -choiceAsum value = - Stream.parse (F.asum (replicate value (PR.satisfy (< 0))) - AP.<|> PR.satisfy (> 0)) - -{- -{-# INLINE choice #-} -choice :: Monad m => Int -> Stream m Int -> m (Either ParseError Int) -choice value = - Stream.parse - (PR.choice (replicate value (PR.satisfy (< 0))) AP.<|> PR.satisfy (> 0)) --} - -------------------------------------------------------------------------------- --- Parsing with unfolds -------------------------------------------------------------------------------- - -{-# INLINE parseManyUnfoldArrays #-} -parseManyUnfoldArrays :: Int -> [Array.Array Int] -> IO () -parseManyUnfoldArrays count arrays = do - let src = Producer.source (Just (Producer.OuterLoop arrays)) - let parser = PR.fromFold (Fold.take count Fold.drain) - let readSrc = - Producer.producer - $ Producer.concat Producer.fromList Array.producer - let streamParser = - Producer.simplify (Producer.parseMany parser readSrc) - Stream.fold Fold.drain $ Stream.unfold streamParser src - -------------------------------------------------------------------------------- --- Stream transformation -------------------------------------------------------------------------------- - -{-# INLINE parseMany #-} -parseMany :: Monad m => Int -> Stream m Int -> m () -parseMany n = - Stream.fold Fold.drain - . fmap getSum - . Stream.catRights . Stream.parseMany (PR.fromFold $ Fold.take n Fold.mconcat) - . fmap Sum - -{-# INLINE parseManyGroupBy #-} -parseManyGroupBy :: Monad m => (Int -> Int -> Bool) -> Stream m Int -> m () -parseManyGroupBy cmp = - Stream.fold Fold.drain . Stream.parseMany (PR.groupBy cmp Fold.drain) - -{-# INLINE parseManyGroupsRolling #-} -parseManyGroupsRolling :: Monad m => Bool -> Stream m Int -> m () -parseManyGroupsRolling b = - Stream.fold Fold.drain - . Stream.parseMany (PR.groupByRolling (\_ _ -> b) Fold.drain) - -{-# INLINE parseManyGroupsRollingEither #-} -parseManyGroupsRollingEither :: Monad m => - (Int -> Int -> Bool) -> Int -> m () -parseManyGroupsRollingEither cmp value = do - sourceUnfoldrM value 1 - & Stream.parseMany (PR.groupByRollingEither cmp Fold.drain Fold.drain) - & Stream.fold Fold.drain - -{-# INLINE parseManyGroupsRollingEitherAlt #-} -parseManyGroupsRollingEitherAlt :: Monad m => - (Int -> Int -> Bool) -> Int -> m () -parseManyGroupsRollingEitherAlt cmp value = do - sourceUnfoldrM value 1 - -- Make the input unsorted. - & fmap (\x -> if even x then x + 2 else x) - & Stream.parseMany (PR.groupByRollingEither cmp Fold.drain Fold.drain) - & Stream.fold Fold.drain - -{-# INLINE parseIterate #-} -parseIterate :: Monad m => Int -> Stream m Int -> m () -parseIterate n = - Stream.fold Fold.drain - . fmap getSum - . Stream.catRights - . Stream.parseIterate - (PR.fromFold . Fold.take n . Fold.sconcat) - (Sum 0) - . fmap Sum - -{-# INLINE concatSequence #-} -concatSequence :: Monad m => Stream m Int -> m (Either ParseError ()) -concatSequence = - Stream.parse $ PR.sequence (Stream.repeat PR.one) Fold.drain - -------------------------------------------------------------------------------- --- Benchmarks -------------------------------------------------------------------------------- - -instance NFData ParseError where - {-# INLINE rnf #-} - rnf (ParseError x) = rnf x - -benchmarks :: Int -> BenchEnv -> [Array.Array Int] -> [(SpaceComplexity, Benchmark)] -benchmarks value env arrays = - [ (SpaceO_1, benchIOSink value "takeBetween" $ takeBetween value) - , (SpaceO_1, benchIOSink value "takeWhile" $ takeWhile value) - , (SpaceO_1, benchIOSink value "takeWhileP" $ takeWhileP value) - , (SpaceO_1, benchIOSink value "takeP" $ takeP value) - , (SpaceO_1, benchIOSink value "dropWhile" $ dropWhile value) - , (SpaceO_1, benchIOSink value "takeBeginBy" $ takeBeginBy value) - , (SpaceO_1, benchIOSink value "takeEndBy_" $ takeEndBy_ value) - , (SpaceO_1, benchIOSink value "groupBy" $ groupBy) - , (SpaceO_1, benchIOSink value "groupByRolling" $ groupByRolling) - , (SpaceO_1, benchIOSink value "wordBy" $ wordBy value) - , (SpaceO_1, benchIOSink value "sepBy (words)" sepByWords) - , (SpaceO_1, benchIOSink value "sepByAll (words)" sepByAllWords) - , (SpaceO_1, benchIOSink value "sepBy1 (words)" sepByWords1) - , (SpaceO_1, benchIOSink value "deintercalate" $ deintercalate value) - , (SpaceO_1, benchIOSink value "deintercalate1" $ deintercalate1 value) - , (SpaceO_1, benchIOSink value "deintercalateAll" $ deintercalateAll value) - -- Applicative and Monad - , (SpaceO_1, benchIOSink value "splitAp2" $ splitAp2 value) - , (SpaceO_1, benchIOSink value "splitAp4" $ splitAp4 value) - , (SpaceO_1, benchIOSink value "splitAp8" $ splitAp8 value) - , (SpaceO_1, benchIOSink value "splitApBefore" $ splitApBefore value) - , (SpaceO_1, benchIOSink value "splitApAfter" $ splitApAfter value) - , (SpaceO_1, benchIOSink value "splitWith2" $ splitWith2 value) - , (SpaceO_1, benchIOSink value "span" $ span value) - , (SpaceO_1, benchIOSink value "spanBy" $ spanBy value) - , (SpaceO_1, benchIOSink value "spanByRolling" $ spanByRolling value) - , (SpaceO_1, benchIOSink value "monad2" $ monad value) - , (SpaceO_1, benchIOSink value "monad4" $ monad4 value) - , (SpaceO_1, benchIOSink value "monad8" $ monad8 value) - -- Alternative - , (SpaceO_1, benchIOSink value "alt2parseMany" $ altSmall value) - , (SpaceO_1, benchIOSink value "alt2" $ alt2 value) - , (SpaceO_1, benchIOSink value "alt4" $ alt4 value) - , (SpaceO_1, benchIOSink value "alt8" $ alt8 value) - , (SpaceO_1, benchIOSink value "alt16" $ alt16 value) - , (SpaceO_1, benchIOSink value "many" many) - , (SpaceO_1, benchIOSink value "many (wordBy even)" $ manyWordByEven) - , (SpaceO_1, benchIOSink value "some" some) - , (SpaceO_1, benchIOSink value "manyTill" $ manyTill value) - , (SpaceO_1, benchIOSink value "parseMany" $ parseMany value) - , (SpaceO_1, benchIOSink value "parseMany (take 1)" (parseMany 1)) - , (SpaceO_1, benchIOSink value "parseMany (take all)" (parseMany value)) - , (SpaceO_1, benchIOSink value "parseMany (groupBy (<))" (parseManyGroupBy (<))) - , (SpaceO_1, benchIOSink value "parseMany (groupBy (==))" (parseManyGroupBy (==))) - , (SpaceO_1, benchIOSink value "parseMany groupRollingBy (bound groups)" - $ parseManyGroupsRolling False) - , (SpaceO_1, benchIOSink value "parseMany groupRollingBy (1 group)" - $ parseManyGroupsRolling True) - , (SpaceO_1, bench "parseMany groupRollingByEither (Left)" - $ nfIO $ parseManyGroupsRollingEitherLeft) - , (SpaceO_1, bench "parseMany groupRollingByEither (Right)" - $ nfIO $ parseManyGroupsRollingEitherRight) - , (SpaceO_1, bench "parseMany groupRollingByEither (Alternating)" - $ nfIO $ parseManyGroupsRollingEitherAlt1) - , (SpaceO_1, benchIOSink value "parseIterate (take 1)" (parseIterate 1)) - , (SpaceO_1, benchIOSink value "parseIterate (take all)" (parseIterate value)) - , (SpaceO_1, benchIOSink value "concatSequence" concatSequence) - {- - , benchIOSink value "tee" $ teeAllAny value - , benchIOSink value "teeFst" $ teeFstAllAny value - , benchIOSink value "shortest" $ shortestAllAny value - , benchIOSink value "longest" $ longestAllAny value - -} - , (SpaceO_1, benchIOSink value "streamEqBy" (streamEqBy value)) - , (SpaceO_1, mkBench ("parseMany (Fold.take " ++ show (bigSize env) ++ " Fold.sum)") env - $ \inh _ -> noinline parseManyChunksOfSum (bigSize env) inh) - , (SpaceO_1, mkBench "parseMany (Fold.take 1 Fold.sum)" env - $ \inh _ -> inline parseManyChunksOfSum 1 inh) - , (SpaceO_1, bench "parseMany/Unfold/1000 arrays/take all" - $ nfIO $ parseManyUnfoldArrays value arrays) - , (SpaceO_1, bench "parseMany/Unfold/1000 arrays/take 1" - $ nfIO $ parseManyUnfoldArrays 1 arrays) - , (HeapO_n, benchIOSink value "takeEQ" $ takeEQ value) - , (HeapO_n, benchIOSink value "takeGE" $ takeGE value) - - -- lookahead benchmark holds the entire input till end - , (HeapO_n, benchIOSink value "lookAhead" $ lookAhead value) - - -- o-n-heap because of backtracking - , (HeapO_n, benchIOSrc sourceEscapedFrames value "takeFramedByEsc_" - $ takeFramedByEsc_ value) - - -- non-linear time complexity (parserD) - , (HeapO_n, benchIOSink value "split_" $ split_ value) - -- XXX Takes lot of space when run on a long stream, why? - , (HeapO_n, benchIOSink value "monad16" $ monad16 value) - - -- These show non-linear time complexity. - -- They accumulate the results in a list. - , (HeapO_n, benchIOSink value "sepBy1" sepBy1) - , (HeapO_n, benchIOSink value "manyAlt" manyAlt) - , (HeapO_n, benchIOSink value "someAlt" someAlt) - , (HeapO_n, benchIOSink value "listEqBy" (listEqBy value)) - , (SpaceO_n, benchIOSink value "sequenceA/100" $ sequenceA (value `div` 100)) - , (SpaceO_n, benchIOSink value "sequenceA_/100" $ sequenceA_ (value `div` 100)) - , (SpaceO_n, benchIOSink value "sequence/100" $ sequence (value `div` 100)) - , (SpaceO_n, benchIOSink value "sequence_/100" $ sequence_ (value `div` 100)) - , (SpaceO_n, benchIOSink value "choice (asum)/100" $ choiceAsum (value `div` 100)) - -- , benchIOSink value "choice/100" $ choice (value `div` 100) - ] - where - - {-# NOINLINE parseManyGroupsRollingEitherLeft #-} - parseManyGroupsRollingEitherLeft = parseManyGroupsRollingEither (<) value - - {-# NOINLINE parseManyGroupsRollingEitherRight #-} - parseManyGroupsRollingEitherRight = parseManyGroupsRollingEither (>) value - - {-# NOINLINE parseManyGroupsRollingEitherAlt1 #-} - parseManyGroupsRollingEitherAlt1 = - parseManyGroupsRollingEitherAlt (>) value + alloc value = Stream.fold Fold.toList $ Array.chunksOf 100 $ streamUnfoldrM value 0 + + allBenchmarks env arrays value = + let allBenches = benchmarkList value env arrays + get x = map snd $ filter ((==) x . fst) allBenches + o_1_space = get SpaceO_1 + o_n_heap = get HeapO_n + o_n_space = get SpaceO_n + in + [ bgroup (o_1_space_prefix moduleName) o_1_space + , bgroup (o_n_heap_prefix moduleName) o_n_heap + , bgroup (o_n_space_prefix moduleName) o_n_space + ] +#else + -- Enable FUSION_CHECK macro at the beginning of the file + -- Enable one benchmark below, and run the benchmark + -- Check the .dump-simpl output + let value = 100000 + -- let input = streamUnfoldrM value 1 + -- manyTill value input + -- deintercalate value input + -- deintercalate1 value input + -- deintercalateAll value input + -- sepByWords input + -- sepByAllWords input + -- sepBy1 input + -- sepByWords1 input + takeFramedByEsc_ value (sourceEscapedFrames value 1) + return () +#endif diff --git a/benchmark/Streamly/Benchmark/Data/Parser/Alternative.hs b/benchmark/Streamly/Benchmark/Data/Parser/Alternative.hs new file mode 100644 index 0000000000..0f930308b8 --- /dev/null +++ b/benchmark/Streamly/Benchmark/Data/Parser/Alternative.hs @@ -0,0 +1,264 @@ +#undef FUSION_CHECK +#ifdef FUSION_CHECK +{-# OPTIONS_GHC -ddump-simpl -ddump-to-file -dsuppress-all #-} +#endif + +-- | +-- Module : Streamly.Benchmark.Data.Parser.Alternative +-- Copyright : (c) 2020 Composewell Technologies +-- +-- License : BSD-3-Clause +-- Maintainer : streamly@composewell.com + +{-# LANGUAGE CPP #-} +{-# LANGUAGE FlexibleContexts #-} +{-# LANGUAGE ScopedTypeVariables #-} +{-# OPTIONS_GHC -Wno-orphans #-} + +module Streamly.Benchmark.Data.Parser.Alternative + ( + benchmarks + ) where + +import Control.Applicative ((<|>)) +import Control.DeepSeq (NFData(..)) +import Streamly.Internal.Data.Fold (Fold(..)) +import Streamly.Internal.Data.Parser + (ParseError(..), Parser(..), Initial(..), Step(..), Final(..)) +import Streamly.Internal.Data.Stream (Stream) +import Test.Tasty.Bench (Benchmark) + +import qualified Control.Applicative as AP +import qualified Data.Foldable as F +import qualified Streamly.Internal.Data.Fold as Fold +import qualified Streamly.Internal.Data.Parser as PR +import qualified Streamly.Data.Stream as Stream + +import Streamly.Benchmark.Common +import Streamly.Benchmark.Data.Parser.Common + +------------------------------------------------------------------------------- +-- Parsers +------------------------------------------------------------------------------- + +{-# INLINE manyWordByEven #-} +manyWordByEven :: Monad m => Stream m Int -> m (Either ParseError ()) +manyWordByEven = Stream.parse (PR.many (PR.wordBy even Fold.drain) Fold.drain) + +{-# INLINE many #-} +many :: Monad m => Stream m Int -> m (Either ParseError Int) +many = Stream.parse (PR.many (PR.satisfy (> 0)) Fold.length) + +{-# INLINE some #-} +some :: Monad m => Stream m Int -> m (Either ParseError Int) +some = Stream.parse (PR.some (PR.satisfy (> 0)) Fold.length) + +{-# INLINE manyAlt #-} +manyAlt :: Monad m => Stream m Int -> m Int +manyAlt xs = do + x <- Stream.parse (AP.many (PR.satisfy (> 0))) xs + return $ Prelude.length x + +{-# INLINE someAlt #-} +someAlt :: Monad m => Stream m Int -> m Int +someAlt xs = do + x <- Stream.parse (AP.some (PR.satisfy (> 0))) xs + return $ Prelude.length x + +-- XXX dropWhile with applicative does not fuse +-- PR.dropWhile (<= (value * 1 `div` 4)) *> PR.die "alt" +{-# INLINE takeWhileFail #-} +takeWhileFail :: Monad m => (a -> Bool) -> Fold m a b -> Parser a m b +takeWhileFail predicate (Fold fstep finitial _ ffinal) = + Parser step initial extract + + where + + initial = do + res <- finitial + return $ case res of + Fold.Partial s -> IPartial s + Fold.Done b -> IDone b + + step s a = + if predicate a + then do + fres <- fstep s a + return + $ case fres of + Fold.Partial s1 -> SPartial 1 s1 + Fold.Done b -> SDone 1 b + else return $ SError "fail" + + extract s = fmap (FDone 0) (ffinal s) + +{-# INLINE alt2 #-} +alt2 :: Monad m + => Int -> Stream m Int -> m (Either ParseError ()) +alt2 value = + Stream.parse + (PR.alt + (takeWhileFail (<= (value `div` 2)) Fold.drain) + (PR.dropWhile (<= value)) + ) + +{- HLINT ignore "Evaluate"-} +{-# INLINE alt4 #-} +alt4 :: Monad m + => Int -> Stream m Int -> m (Either ParseError ()) +alt4 value = + Stream.parse + ( takeWhileFail (<= (value * 1 `div` 4)) Fold.drain + <|> takeWhileFail (<= (value * 2 `div` 4)) Fold.drain + <|> takeWhileFail (<= (value * 3 `div` 4)) Fold.drain + <|> PR.dropWhile (<= value) + ) + +{-# INLINE alt8 #-} +alt8 :: Monad m + => Int -> Stream m Int -> m (Either ParseError ()) +alt8 value = + Stream.parse + ( takeWhileFail (<= (value * 1 `div` 8)) Fold.drain + <|> takeWhileFail (<= (value * 2 `div` 8)) Fold.drain + <|> takeWhileFail (<= (value * 3 `div` 8)) Fold.drain + <|> takeWhileFail (<= (value * 4 `div` 8)) Fold.drain + <|> takeWhileFail (<= (value * 5 `div` 8)) Fold.drain + <|> takeWhileFail (<= (value * 6 `div` 8)) Fold.drain + <|> takeWhileFail (<= (value * 7 `div` 8)) Fold.drain + <|> PR.dropWhile (<= value) + ) + +{-# INLINE alt16 #-} +alt16 :: Monad m + => Int -> Stream m Int -> m (Either ParseError ()) +alt16 value = + Stream.parse + ( takeWhileFail (<= (value * 1 `div` 16)) Fold.drain + <|> takeWhileFail (<= (value * 2 `div` 16)) Fold.drain + <|> takeWhileFail (<= (value * 3 `div` 16)) Fold.drain + <|> takeWhileFail (<= (value * 4 `div` 16)) Fold.drain + <|> takeWhileFail (<= (value * 5 `div` 16)) Fold.drain + <|> takeWhileFail (<= (value * 6 `div` 16)) Fold.drain + <|> takeWhileFail (<= (value * 8 `div` 16)) Fold.drain + <|> takeWhileFail (<= (value * 9 `div` 16)) Fold.drain + <|> takeWhileFail (<= (value * 10 `div` 16)) Fold.drain + <|> takeWhileFail (<= (value * 11 `div` 16)) Fold.drain + <|> takeWhileFail (<= (value * 12 `div` 16)) Fold.drain + <|> takeWhileFail (<= (value * 13 `div` 16)) Fold.drain + <|> takeWhileFail (<= (value * 14 `div` 16)) Fold.drain + <|> takeWhileFail (<= (value * 15 `div` 16)) Fold.drain + <|> PR.dropWhile (<= value) + ) + +{-# INLINE altSmall #-} +altSmall :: Monad m + => Int -> Stream m Int -> m () +altSmall value = + Stream.fold Fold.drain . + Stream.parseMany + (PR.alt + (PR.satisfy (>= value) *> PR.die "alt") + (PR.satisfy (<= value)) + ) + +{- +{-# INLINE teeAllAny #-} +teeAllAny :: Monad m + => Int -> Stream m Int -> m ((), ()) +teeAllAny value = + Stream.parse + (PR.teeWith (,) + (PR.dropWhile (<= value)) + (PR.dropWhile (<= value)) + ) + +{-# INLINE teeFstAllAny #-} +teeFstAllAny :: Monad m + => Int -> Stream m Int -> m ((), ()) +teeFstAllAny value = + Stream.parse + (PR.teeWithFst (,) + (PR.dropWhile (<= value)) + (PR.dropWhile (<= value)) + ) + +{-# INLINE shortestAllAny #-} +shortestAllAny :: Monad m + => Int -> Stream m Int -> m (Either ParseError ()) +shortestAllAny value = + Stream.parse + (PR.shortest + (PR.dropWhile (<= value)) + (PR.dropWhile (<= value)) + ) + +{-# INLINE longestAllAny #-} +longestAllAny :: Monad m + => Int -> Stream m Int -> m (Either ParseError ()) +longestAllAny value = + Stream.parse + (PR.longest + (PR.dropWhile (<= value)) + (PR.dropWhile (<= value)) + ) +-} + +------------------------------------------------------------------------------- +-- Choice +------------------------------------------------------------------------------- + +-- choice using the "Alternative" instance with direct style parser type has +-- quadratic performance complexity. +-- +{-# INLINE choiceAsum #-} +choiceAsum :: Monad m => Int -> Stream m Int -> m (Either ParseError Int) +choiceAsum value = + Stream.parse (F.asum (replicate value (PR.satisfy (< 0))) + AP.<|> PR.satisfy (> 0)) + +{- +{-# INLINE choice #-} +choice :: Monad m => Int -> Stream m Int -> m (Either ParseError Int) +choice value = + Stream.parse + (PR.choice (replicate value (PR.satisfy (< 0))) AP.<|> PR.satisfy (> 0)) +-} + +------------------------------------------------------------------------------- +-- Benchmarks +------------------------------------------------------------------------------- + +instance NFData ParseError where + {-# INLINE rnf #-} + rnf (ParseError x) = rnf x + +benchmarks :: Int -> [(SpaceComplexity, Benchmark)] +benchmarks value = + [ + -- Alternative + (SpaceO_1, benchIOSink value "alt2parseMany" $ altSmall value) + , (SpaceO_1, benchIOSink value "alt2" $ alt2 value) + , (SpaceO_1, benchIOSink value "alt4" $ alt4 value) + , (SpaceO_1, benchIOSink value "alt8" $ alt8 value) + , (SpaceO_1, benchIOSink value "alt16" $ alt16 value) + + -- O_n as they accumulate the results in a list. + , (HeapO_n, benchIOSink value "manyAlt" manyAlt) + , (HeapO_n, benchIOSink value "someAlt" someAlt) + , (SpaceO_n, benchIOSink value "choice (asum)/100" $ choiceAsum (value `div` 100)) + -- , benchIOSink value "choice/100" $ choice (value `div` 100) + + -- Sequential Repetition + -- XXX requires @-fspec-constr-recursive=12@. + , (SpaceO_1, benchIOSink value "many" many) + , (SpaceO_1, benchIOSink value "many (wordBy even)" manyWordByEven) + , (SpaceO_1, benchIOSink value "some" some) + + {- + , benchIOSink value "tee" $ teeAllAny value + , benchIOSink value "teeFst" $ teeFstAllAny value + , benchIOSink value "shortest" $ shortestAllAny value + , benchIOSink value "longest" $ longestAllAny value + -} + ] diff --git a/benchmark/Streamly/Benchmark/Data/Parser/Applicative.hs b/benchmark/Streamly/Benchmark/Data/Parser/Applicative.hs new file mode 100644 index 0000000000..360f016814 --- /dev/null +++ b/benchmark/Streamly/Benchmark/Data/Parser/Applicative.hs @@ -0,0 +1,187 @@ +#undef FUSION_CHECK +#ifdef FUSION_CHECK +{-# OPTIONS_GHC -ddump-simpl -ddump-to-file -dsuppress-all #-} +#endif + +-- | +-- Module : Streamly.Benchmark.Data.Parser.Applicative +-- Copyright : (c) 2020 Composewell Technologies +-- +-- License : BSD-3-Clause +-- Maintainer : streamly@composewell.com + +{-# LANGUAGE CPP #-} +{-# LANGUAGE FlexibleContexts #-} +{-# LANGUAGE ScopedTypeVariables #-} +{-# OPTIONS_GHC -Wno-orphans #-} + +module Streamly.Benchmark.Data.Parser.Applicative + ( + benchmarks + ) where + +import Control.DeepSeq (NFData(..)) +import Streamly.Internal.Data.Parser (ParseError(..)) +import Streamly.Internal.Data.Stream (Stream) +import Test.Tasty.Bench (Benchmark) + +import qualified Data.Foldable as F +import qualified Data.Traversable as TR +import qualified Streamly.Internal.Data.Fold as Fold +import qualified Streamly.Internal.Data.Parser as PR +import qualified Streamly.Data.Stream as Stream + +import Streamly.Benchmark.Common +import Streamly.Benchmark.Data.Parser.Common +import Prelude hiding (sequence, sequence_, sequenceA) + +{-# INLINE splitAp2 #-} +splitAp2 :: Monad m + => Int -> Stream m Int -> m (Either ParseError ((), ())) +splitAp2 value = + Stream.parse + ((,) + <$> PR.dropWhile (<= (value `div` 2)) + <*> PR.dropWhile (<= value) + ) + +{- HLINT ignore "Evaluate"-} +{-# INLINE splitAp4 #-} +splitAp4 :: Monad m + => Int -> Stream m Int -> m (Either ParseError ()) +splitAp4 value = + Stream.parse + ( (\() () () () -> ()) + <$> PR.dropWhile (<= (value * 1 `div` 4)) + <*> PR.dropWhile (<= (value * 2 `div` 4)) + <*> PR.dropWhile (<= (value * 3 `div` 4)) + <*> PR.dropWhile (<= value) + ) + +{-# INLINE splitAp8 #-} +splitAp8 :: Monad m + => Int -> Stream m Int -> m (Either ParseError ()) +splitAp8 value = + Stream.parse + ( (\() () () () () () () () -> ()) + <$> PR.dropWhile (<= (value * 1 `div` 8)) + <*> PR.dropWhile (<= (value * 2 `div` 8)) + <*> PR.dropWhile (<= (value * 3 `div` 8)) + <*> PR.dropWhile (<= (value * 4 `div` 8)) + <*> PR.dropWhile (<= (value * 5 `div` 8)) + <*> PR.dropWhile (<= (value * 6 `div` 8)) + <*> PR.dropWhile (<= (value * 7 `div` 8)) + <*> PR.dropWhile (<= value) + ) + +{-# INLINE splitApBefore #-} +splitApBefore :: Monad m + => Int -> Stream m Int -> m (Either ParseError ()) +splitApBefore value = + Stream.parse + ( PR.dropWhile (<= (value `div` 2)) + *> PR.dropWhile (<= value) + ) + +{-# INLINE splitApAfter #-} +splitApAfter :: Monad m + => Int -> Stream m Int -> m (Either ParseError ()) +splitApAfter value = + Stream.parse + ( PR.dropWhile (<= (value `div` 2)) + <* PR.dropWhile (<= value) + ) + +{-# INLINE splitWith2 #-} +splitWith2 :: Monad m + => Int -> Stream m Int -> m (Either ParseError ((), ())) +splitWith2 value = + Stream.parse + (PR.splitWith (,) + (PR.dropWhile (<= (value `div` 2))) + (PR.dropWhile (<= value)) + ) + +{-# INLINE split_ #-} +split_ :: Monad m + => Int -> Stream m Int -> m (Either ParseError ()) +split_ value = + Stream.parse + (PR.split_ + (PR.dropWhile (<= (value `div` 2))) + (PR.dropWhile (<= value)) + ) + +------------------------------------------------------------------------------- +-- +------------------------------------------------------------------------------- + +-- XXX The timing of this increased 3x after the stepify extract changes. +{-# INLINE sequenceA_ #-} +sequenceA_ :: Monad m => Int -> Int -> m (Either ParseError ()) +sequenceA_ value = +{- HLINT ignore "Use replicateM_"-} + Stream.parse (F.sequenceA_ $ replicate value (PR.satisfy (> 0))) + . streamUnfoldrM value + +-- quadratic complexity +{-# INLINE sequenceA #-} +sequenceA :: Monad m => Int -> Int -> m Int +sequenceA value n = do + x <- Stream.parse (TR.sequenceA (replicate value (PR.satisfy (> 0)))) + (streamUnfoldrM value n) + return $ length x + +-- quadratic complexity +{-# INLINE sequence #-} +sequence :: Monad m => Int -> Int -> m Int +sequence value n = do + x <- Stream.parse (TR.sequence (replicate value (PR.satisfy (> 0)))) + (streamUnfoldrM value n) + return $ length x + +{-# INLINE sequence_ #-} +sequence_ :: Monad m => Int -> Int -> m (Either ParseError ()) +sequence_ value n = + Stream.parse (foldr f (return ()) (replicate value (PR.takeBetween 0 1 Fold.drain))) + (streamUnfoldrM value n) + + where + + {-# INLINE f #-} + f m k = m >> k + +{-# INLINE concatSequence #-} +concatSequence :: Monad m => Stream m Int -> m (Either ParseError ()) +concatSequence = + Stream.parse $ PR.sequence (Stream.repeat PR.one) Fold.drain + +------------------------------------------------------------------------------- +-- Benchmarks +------------------------------------------------------------------------------- + +instance NFData ParseError where + {-# INLINE rnf #-} + rnf (ParseError x) = rnf x + +benchmarks :: Int -> [(SpaceComplexity, Benchmark)] +benchmarks value = + [ + -- Applicative + (SpaceO_1, benchIOSink value "splitAp2" $ splitAp2 value) + , (SpaceO_1, benchIOSink value "splitAp4" $ splitAp4 value) + , (SpaceO_1, benchIOSink value "splitAp8" $ splitAp8 value) + , (SpaceO_1, benchIOSink value "splitApBefore" $ splitApBefore value) + , (SpaceO_1, benchIOSink value "splitApAfter" $ splitApAfter value) + , (SpaceO_1, benchIOSink value "splitWith2" $ splitWith2 value) + -- non-linear time complexity (parserD) + , (HeapO_n, benchIOSink value "split_" $ split_ value) + + -- Sequential Collection + -- Accumulate the results in a list. + , (SpaceO_n, benchIOSink1 "sequenceA/100" $ sequenceA (value `div` 100)) + , (SpaceO_n, benchIOSink1 "sequenceA_/100" $ sequenceA_ (value `div` 100)) + , (SpaceO_n, benchIOSink1 "sequence/100" $ sequence (value `div` 100)) + , (SpaceO_n, benchIOSink1 "sequence_/100" $ sequence_ (value `div` 100)) + , (SpaceO_1, benchIOSink value "concatSequence" concatSequence) + ] diff --git a/benchmark/Streamly/Benchmark/Data/Parser/Common.hs b/benchmark/Streamly/Benchmark/Data/Parser/Common.hs new file mode 100644 index 0000000000..de6e91c64b --- /dev/null +++ b/benchmark/Streamly/Benchmark/Data/Parser/Common.hs @@ -0,0 +1,45 @@ +-- | +-- Module : Streamly.Benchmark.Data.Parser.Common +-- Copyright : (c) 2020 Composewell Technologies +-- +-- License : BSD-3-Clause +-- Maintainer : streamly@composewell.com + +{-# LANGUAGE CPP #-} +{-# LANGUAGE FlexibleContexts #-} +{-# LANGUAGE ScopedTypeVariables #-} +{-# OPTIONS_GHC -Wno-orphans #-} + +module Streamly.Benchmark.Data.Parser.Common + ( + benchIOSink + , benchIOSrc + ) where + +import Control.DeepSeq (NFData(..)) +import Streamly.Internal.Data.Stream (Stream) +import System.Random (randomRIO) + +import Test.Tasty.Bench +import Streamly.Benchmark.Common + +-- XXX use benchIOSink1 instead, this is to be removed +-- | Takes a fold method, and uses it with a default source. +{-# INLINE benchIOSink #-} +benchIOSink + :: NFData b + => Int -> String -> (Stream IO Int -> IO b) -> Benchmark +benchIOSink value name f = + bench name $ nfIO $ randomRIO (1,1) >>= f . streamUnfoldrM value + +-- XXX use benchIOSrc1 instead, this is to be removed +{-# INLINE benchIOSrc #-} +benchIOSrc + :: NFData b + => (Int -> Int -> Stream IO a) + -> Int + -> String + -> (Stream IO a -> IO b) + -> Benchmark +benchIOSrc src value name f = + bench name $ nfIO $ randomRIO (1,1) >>= f . src value diff --git a/benchmark/Streamly/Benchmark/Data/Parser/Groups.hs b/benchmark/Streamly/Benchmark/Data/Parser/Groups.hs new file mode 100644 index 0000000000..cb85971476 --- /dev/null +++ b/benchmark/Streamly/Benchmark/Data/Parser/Groups.hs @@ -0,0 +1,225 @@ +#undef FUSION_CHECK +#ifdef FUSION_CHECK +{-# OPTIONS_GHC -ddump-simpl -ddump-to-file -dsuppress-all #-} +#endif + +-- | +-- Module : Streamly.Benchmark.Data.Parser.Groups +-- Copyright : (c) 2020 Composewell Technologies +-- +-- License : BSD-3-Clause +-- Maintainer : streamly@composewell.com + +{-# LANGUAGE CPP #-} +{-# LANGUAGE FlexibleContexts #-} +{-# LANGUAGE ScopedTypeVariables #-} +{-# OPTIONS_GHC -Wno-orphans #-} + +module Streamly.Benchmark.Data.Parser.Groups + ( + benchmarks + ) where + +import Control.DeepSeq (NFData(..)) +import Data.Functor (($>)) +import Streamly.Internal.Data.Parser (ParseError(..)) +import Streamly.Internal.Data.Stream (Stream) +import Test.Tasty.Bench (Benchmark) + +import qualified Streamly.Internal.Data.Fold as Fold +import qualified Streamly.Internal.Data.Parser as PR +import qualified Streamly.Internal.Data.Stream as Stream + +import Streamly.Benchmark.Common +import Streamly.Benchmark.Data.Parser.Common +import Prelude hiding (takeWhile, dropWhile, span) + +------------------------------------------------------------------------------- +-- Utilities +------------------------------------------------------------------------------- + +-- | Generates something like this: { { \{ \{ } }. The stream consists of +-- three parts, the first part is contains a sequence of `{`. The second part +-- contains a sequence pf escaped values `\{`. The third part contains a +-- sequence of `}`. +{-# INLINE sourceEscapedFrames #-} +sourceEscapedFrames :: + Monad m + => Int + -> Int + -> Stream m Char +sourceEscapedFrames value = Stream.unfoldrM step + + where + + bs = '\\' + cbOpen = '{' + cbClose = '}' + value1 = value `div` 4 + + step cnt + | cnt > 4 * value1 = return Nothing + | cnt <= value1 = return $ Just (cbOpen, cnt + 1) + | cnt > 3 * value1 = return $ Just (cbClose, cnt + 1) + | otherwise = + return + $ Just + $ if (cnt - value1) `mod` 2 == 1 + then (bs, cnt + 1) + else (cbOpen, cnt + 1) + +------------------------------------------------------------------------------- +-- Parsers +------------------------------------------------------------------------------- + +{-# INLINE takeBetween #-} +takeBetween :: Monad m => Int -> Stream m a -> m (Either ParseError ()) +takeBetween value = Stream.parse (PR.takeBetween 0 value Fold.drain) + +{-# INLINE takeEQ #-} +takeEQ :: Monad m => Int -> Stream m a -> m (Either ParseError ()) +takeEQ value = Stream.parse (PR.takeEQ value Fold.drain) + +{-# INLINE takeGE #-} +takeGE :: Monad m => Int -> Stream m a -> m (Either ParseError ()) +takeGE value = Stream.parse (PR.takeGE value Fold.drain) + +{-# INLINE dropWhile #-} +dropWhile :: Monad m => Int -> Stream m Int -> m (Either ParseError ()) +dropWhile value = Stream.parse (PR.dropWhile (<= value)) + +{-# INLINE takeBeginBy #-} +takeBeginBy :: Monad m => Int -> Stream m Int -> m (Either ParseError ()) +takeBeginBy value stream = do + let stream2 = value `Stream.cons` stream + Stream.parse (PR.takeBeginBy (== value) Fold.drain) stream2 + +takeFramedByEsc_ :: Monad m => Int -> Stream m Char -> m (Either ParseError ()) +takeFramedByEsc_ _ = Stream.parse parser + + where + + isEsc = (== '\\') + isBegin = (== '{') + isEnd = (== '}') + + parser = PR.takeFramedByEsc_ isEsc isBegin isEnd Fold.drain + +{-# INLINE listEqBy #-} +listEqBy :: Int -> Stream IO Int -> IO (Either ParseError [Int]) +listEqBy len = Stream.parse (PR.listEqBy (==) [1 .. len]) + +{-# INLINE streamEqBy #-} +streamEqBy :: Int -> Stream IO Int -> IO (Either ParseError ()) +streamEqBy len = Stream.parse (PR.streamEqBy (==) (Stream.enumerateFromTo 1 len)) + +{-# INLINE takeWhile #-} +takeWhile :: Monad m => Int -> Stream m Int -> m (Either ParseError ()) +takeWhile value = Stream.parse (PR.takeWhile (<= value) Fold.drain) + +takeWhileP :: Monad m => Int -> Stream m Int -> m (Either ParseError ()) +takeWhileP value = + Stream.parse (PR.takeWhileP (<= value) (PR.takeWhile (<= value - 1) Fold.drain)) + +{-# INLINE takeP #-} +takeP :: Monad m => Int -> Stream m a -> m (Either ParseError ()) +takeP value = Stream.parse (PR.takeP value (PR.fromFold Fold.drain)) + +{-# INLINE groupBy #-} +groupBy :: Monad m => Stream m Int -> m (Either ParseError ()) +groupBy = Stream.parse (PR.groupBy (<=) Fold.drain) + +{-# INLINE groupByRolling #-} +groupByRolling :: Monad m => Stream m Int -> m (Either ParseError ()) +groupByRolling = Stream.parse (PR.groupByRolling (<=) Fold.drain) + +{-# INLINE wordBy #-} +wordBy :: Monad m => Int -> Stream m Int -> m (Either ParseError ()) +wordBy value = Stream.parse (PR.wordBy (>= value) Fold.drain) + +{-# INLINE takeEndBy_ #-} +takeEndBy_ :: Monad m + => Int -> Stream m Int -> m (Either ParseError ()) +takeEndBy_ value = Stream.parse (PR.takeEndBy_ (>= value) (PR.fromFold Fold.drain)) + +------------------------------------------------------------------------------- +-- Spanning +------------------------------------------------------------------------------- + +{-# INLINE span #-} +span :: Monad m => Int -> Stream m Int -> m (Either ParseError ((), ())) +span value = Stream.parse (PR.span (<= (value `div` 2)) Fold.drain Fold.drain) + +{-# INLINE spanBy #-} +spanBy :: Monad m => Int -> Stream m Int -> m (Either ParseError ((), ())) +spanBy value = + Stream.parse (PR.spanBy (\_ i -> i <= (value `div` 2)) Fold.drain Fold.drain) + +{-# INLINE spanByRolling #-} +spanByRolling :: Monad m => Int -> Stream m Int -> m (Either ParseError ((), ())) +spanByRolling value = + Stream.parse (PR.spanByRolling (\_ i -> i <= value `div` 2) Fold.drain Fold.drain) + +------------------------------------------------------------------------------- +-- +------------------------------------------------------------------------------- + +{-# INLINE lookAhead #-} +lookAhead :: Monad m => Int -> Stream m Int -> m (Either ParseError ()) +lookAhead value = + Stream.parse (PR.lookAhead (PR.takeWhile (<= value) Fold.drain) $> ()) + +------------------------------------------------------------------------------- +-- Benchmarks +------------------------------------------------------------------------------- + +instance NFData ParseError where + {-# INLINE rnf #-} + rnf (ParseError x) = rnf x + +benchmarks :: Int -> [(SpaceComplexity, Benchmark)] +benchmarks value = + [ + -- lookahead sequence length + -- lookahead benchmark holds the entire input till end + (HeapO_n, benchIOSink value "lookAhead" $ lookAhead value) + + -- take sequence by length + , (SpaceO_1, benchIOSink value "takeBetween" $ takeBetween value) + -- XXX requires @-fspec-constr-recursive=12@. + , (HeapO_n, benchIOSink value "takeEQ" $ takeEQ value) + , (HeapO_n, benchIOSink value "takeGE" $ takeGE value) + , (SpaceO_1, benchIOSink value "takeP" $ takeP value) + + -- Match exact sequence + -- O_n because of the list accumulation + , (HeapO_n, benchIOSink value "listEqBy" (listEqBy value)) + , (SpaceO_1, benchIOSink value "streamEqBy" (streamEqBy value)) + + -- sequence matching a predicate + , (SpaceO_1, benchIOSink value "takeWhile" $ takeWhile value) + -- XXX requires @-fspec-constr-recursive=12@. + , (SpaceO_1, benchIOSink value "takeWhileP" $ takeWhileP value) + , (SpaceO_1, benchIOSink value "dropWhile" $ dropWhile value) + + -- sequence begin/end by known elements + , (SpaceO_1, benchIOSink value "takeEndBy_" $ takeEndBy_ value) + , (SpaceO_1, benchIOSink value "takeBeginBy" $ takeBeginBy value) + -- XXX requires @-fspec-constr-recursive=12@. + , (SpaceO_1, benchIOSink value "wordBy" $ wordBy value) + + -- Group sequence by + , (SpaceO_1, benchIOSink value "groupBy" groupBy) + -- XXX requires @-fspec-constr-recursive=12@. + , (SpaceO_1, benchIOSink value "groupByRolling" groupByRolling) + + -- Framing + -- o-n-heap because of backtracking + , (HeapO_n, benchIOSrc sourceEscapedFrames value "takeFramedByEsc_" + $ takeFramedByEsc_ value) + + -- Spanning + , (SpaceO_1, benchIOSink value "span" $ span value) + , (SpaceO_1, benchIOSink value "spanBy" $ spanBy value) + , (SpaceO_1, benchIOSink value "spanByRolling" $ spanByRolling value) + ] diff --git a/benchmark/Streamly/Benchmark/Data/Parser/Interleave.hs b/benchmark/Streamly/Benchmark/Data/Parser/Interleave.hs new file mode 100644 index 0000000000..583a539fc8 --- /dev/null +++ b/benchmark/Streamly/Benchmark/Data/Parser/Interleave.hs @@ -0,0 +1,120 @@ +#undef FUSION_CHECK +#ifdef FUSION_CHECK +{-# OPTIONS_GHC -ddump-simpl -ddump-to-file -dsuppress-all #-} +#endif + +-- | +-- Module : Streamly.Benchmark.Data.Parser.Interleave +-- Copyright : (c) 2020 Composewell Technologies +-- +-- License : BSD-3-Clause +-- Maintainer : streamly@composewell.com + +{-# LANGUAGE CPP #-} +{-# LANGUAGE FlexibleContexts #-} +{-# LANGUAGE ScopedTypeVariables #-} +{-# OPTIONS_GHC -Wno-orphans #-} + +module Streamly.Benchmark.Data.Parser.Interleave + ( + benchmarks + ) where + +import Control.DeepSeq (NFData(..)) +import Streamly.Internal.Data.Parser (ParseError(..)) +import Streamly.Internal.Data.Stream (Stream) +import Test.Tasty.Bench (Benchmark) + +import qualified Streamly.Internal.Data.Fold as Fold +import qualified Streamly.Internal.Data.Parser as PR +import qualified Streamly.Data.Stream as Stream + +import Streamly.Benchmark.Common +import Streamly.Benchmark.Data.Parser.Common + +------------------------------------------------------------------------------- +-- Parsers +------------------------------------------------------------------------------- + +{-# INLINE sepByWords #-} +sepByWords :: Monad m => Stream m Int -> m (Either ParseError ()) +sepByWords = Stream.parse (wrds even Fold.drain) + where + wrds p = PR.sepBy (PR.takeWhile (not . p) Fold.drain) (PR.dropWhile p) + +{-# INLINE sepByAllWords #-} +sepByAllWords :: Monad m => Stream m Int -> m (Either ParseError ()) +sepByAllWords = Stream.parse (wrds even Fold.drain) + where + wrds p = PR.sepByAll (PR.takeWhile (not . p) Fold.drain) (PR.dropWhile p) + +-- Returning a list to compare with the sepBy1 in ParserK +{-# INLINE sepBy1 #-} +sepBy1 :: Monad m => Stream m Int -> m (Either ParseError [Int]) +sepBy1 xs = do + Stream.parse (PR.sepBy1 (PR.satisfy odd) (PR.satisfy even) Fold.toList) xs + +{-# INLINE sepByWords1 #-} +sepByWords1 :: Monad m => Stream m Int -> m (Either ParseError ()) +sepByWords1 = Stream.parse (wrds even Fold.drain) + where + wrds p = PR.sepBy1 (PR.takeWhile (not . p) Fold.drain) (PR.dropWhile p) + +{-# INLINE deintercalate #-} +deintercalate :: Monad m => Int -> Stream m Int -> m (Either ParseError ()) +deintercalate _ = Stream.parse (partition even) + + where + + partition p = + PR.deintercalate + (PR.takeWhile (not . p) Fold.sum) (PR.takeWhile p Fold.sum) Fold.drain + +{-# INLINE deintercalate1 #-} +deintercalate1 :: Monad m => Int -> Stream m Int -> m (Either ParseError ()) +deintercalate1 _ = Stream.parse (partition even) + + where + + partition p = + PR.deintercalate1 + (PR.takeWhile (not . p) Fold.sum) (PR.takeWhile p Fold.sum) Fold.drain + +{-# INLINE deintercalateAll #-} +deintercalateAll :: Monad m => Int -> Stream m Int -> m (Either ParseError ()) +deintercalateAll _ = Stream.parse (partition even) + + where + + partition p = + PR.deintercalateAll + (PR.takeWhile (not . p) Fold.sum) (PR.takeWhile p Fold.sum) Fold.drain + +{-# INLINE manyTill #-} +manyTill :: Monad m => Int -> Stream m Int -> m (Either ParseError Int) +manyTill value = + Stream.parse (PR.manyTill (PR.satisfy (> 0)) (PR.satisfy (== value)) Fold.length) + +------------------------------------------------------------------------------- +-- Benchmarks +------------------------------------------------------------------------------- + +instance NFData ParseError where + {-# INLINE rnf #-} + rnf (ParseError x) = rnf x + +benchmarks :: Int -> [(SpaceComplexity, Benchmark)] +benchmarks value = + [ + -- Interleaved Repetition + (SpaceO_1, benchIOSink value "deintercalate" $ deintercalate value) + , (SpaceO_1, benchIOSink value "deintercalate1" $ deintercalate1 value) + , (SpaceO_1, benchIOSink value "deintercalateAll" $ deintercalateAll value) + + -- Accumulates the results in a list. + , (HeapO_n, benchIOSink value "sepBy1" sepBy1) + , (SpaceO_1, benchIOSink value "sepBy1 (words)" sepByWords1) + , (SpaceO_1, benchIOSink value "sepBy (words)" sepByWords) + , (SpaceO_1, benchIOSink value "sepByAll (words)" sepByAllWords) + , (SpaceO_1, benchIOSink value "manyTill" $ manyTill value) + ] diff --git a/benchmark/Streamly/Benchmark/Data/Parser/Monad.hs b/benchmark/Streamly/Benchmark/Data/Parser/Monad.hs new file mode 100644 index 0000000000..4de684e3ca --- /dev/null +++ b/benchmark/Streamly/Benchmark/Data/Parser/Monad.hs @@ -0,0 +1,124 @@ +#undef FUSION_CHECK +#ifdef FUSION_CHECK +{-# OPTIONS_GHC -ddump-simpl -ddump-to-file -dsuppress-all #-} +#endif + +-- | +-- Module : Streamly.Benchmark.Data.Parser.Monad +-- Copyright : (c) 2020 Composewell Technologies +-- +-- License : BSD-3-Clause +-- Maintainer : streamly@composewell.com + +{-# LANGUAGE CPP #-} +{-# LANGUAGE FlexibleContexts #-} +{-# LANGUAGE ScopedTypeVariables #-} +{-# OPTIONS_GHC -Wno-orphans #-} + +module Streamly.Benchmark.Data.Parser.Monad + ( + benchmarks + ) where + +import Control.DeepSeq (NFData(..)) +import Data.Monoid (Sum(..)) +import Streamly.Internal.Data.Parser (ParseError(..)) +import Streamly.Internal.Data.Stream (Stream) +import Test.Tasty.Bench (Benchmark) + +import qualified Streamly.Internal.Data.Fold as Fold +import qualified Streamly.Internal.Data.Parser as PR +import qualified Streamly.Internal.Data.Stream as Stream + +import Streamly.Benchmark.Common +import Streamly.Benchmark.Data.Parser.Common + +{-# INLINE monad #-} +monad :: Monad m + => Int -> Stream m Int -> m (Either ParseError ()) +monad value = + Stream.parse + $ do + PR.dropWhile (<= (value `div` 2)) + PR.dropWhile (<= value) + +{-# INLINE monad4 #-} +monad4 :: Monad m + => Int -> Stream m Int -> m (Either ParseError ()) +monad4 value = + Stream.parse $ do + PR.dropWhile (<= (value `div` 4)) + PR.dropWhile (<= (value `div` 2)) + PR.dropWhile (<= (value * 3 `div` 4)) + PR.dropWhile (<= value) + +{- HLINT ignore "Evaluate"-} +{-# INLINE monad8 #-} +monad8 :: Monad m + => Int -> Stream m Int -> m (Either ParseError ()) +monad8 value = + Stream.parse $ do + PR.dropWhile (<= (value * 1 `div` 8)) + PR.dropWhile (<= (value * 2 `div` 8)) + PR.dropWhile (<= (value * 3 `div` 8)) + PR.dropWhile (<= (value * 4 `div` 8)) + PR.dropWhile (<= (value * 5 `div` 8)) + PR.dropWhile (<= (value * 6 `div` 8)) + PR.dropWhile (<= (value * 7 `div` 8)) + PR.dropWhile (<= value) + +{-# INLINE monad16 #-} +monad16 :: Monad m + => Int -> Stream m Int -> m (Either ParseError ()) +monad16 value = + Stream.parse $ do + PR.dropWhile (<= (value * 1 `div` 16)) + PR.dropWhile (<= (value * 2 `div` 16)) + PR.dropWhile (<= (value * 3 `div` 16)) + PR.dropWhile (<= (value * 4 `div` 16)) + PR.dropWhile (<= (value * 5 `div` 16)) + PR.dropWhile (<= (value * 6 `div` 16)) + PR.dropWhile (<= (value * 7 `div` 16)) + PR.dropWhile (<= (value * 8 `div` 16)) + PR.dropWhile (<= (value * 9 `div` 16)) + PR.dropWhile (<= (value * 10 `div` 16)) + PR.dropWhile (<= (value * 11 `div` 16)) + PR.dropWhile (<= (value * 12 `div` 16)) + PR.dropWhile (<= (value * 13 `div` 16)) + PR.dropWhile (<= (value * 14 `div` 16)) + PR.dropWhile (<= (value * 15 `div` 16)) + PR.dropWhile (<= value) + +{-# INLINE parseIterate #-} +parseIterate :: Monad m => Int -> Stream m Int -> m () +parseIterate n = + Stream.fold Fold.drain + . fmap getSum + . Stream.catRights + . Stream.parseIterate + (PR.fromFold . Fold.take n . Fold.sconcat) + (Sum 0) + . fmap Sum + +------------------------------------------------------------------------------- +-- Benchmarks +------------------------------------------------------------------------------- + +instance NFData ParseError where + {-# INLINE rnf #-} + rnf (ParseError x) = rnf x + +benchmarks :: Int -> [(SpaceComplexity, Benchmark)] +benchmarks value = + [ + -- Monad + (SpaceO_1, benchIOSink value "monad2" $ monad value) + , (SpaceO_1, benchIOSink value "monad4" $ monad4 value) + , (SpaceO_1, benchIOSink value "monad8" $ monad8 value) + -- XXX Takes lot of space when run on a long stream, why? + , (HeapO_n, benchIOSink value "monad16" $ monad16 value) + + -- parseIterate + , (SpaceO_1, benchIOSink value "parseIterate (take 1)" (parseIterate 1)) + , (SpaceO_1, benchIOSink value "parseIterate (take all)" (parseIterate value)) + ] diff --git a/benchmark/Streamly/Benchmark/Data/Parser/Producer.hs b/benchmark/Streamly/Benchmark/Data/Parser/Producer.hs new file mode 100644 index 0000000000..739c275003 --- /dev/null +++ b/benchmark/Streamly/Benchmark/Data/Parser/Producer.hs @@ -0,0 +1,67 @@ +#undef FUSION_CHECK +#ifdef FUSION_CHECK +{-# OPTIONS_GHC -ddump-simpl -ddump-to-file -dsuppress-all #-} +#endif + +-- | +-- Module : Streamly.Benchmark.Data.Parser.Producer +-- Copyright : (c) 2020 Composewell Technologies +-- +-- License : BSD-3-Clause +-- Maintainer : streamly@composewell.com + +{-# LANGUAGE CPP #-} +{-# LANGUAGE FlexibleContexts #-} +{-# LANGUAGE ScopedTypeVariables #-} +{-# OPTIONS_GHC -Wno-orphans #-} + +module Streamly.Benchmark.Data.Parser.Producer + ( + benchmarks + ) where + +import Control.DeepSeq (NFData(..)) +import Streamly.Internal.Data.Parser (ParseError(..)) + +import qualified Streamly.Internal.Data.Array as Array +import qualified Streamly.Internal.Data.Fold as Fold +import qualified Streamly.Internal.Data.Parser as PR +import qualified Streamly.Data.Stream as Stream +import qualified Streamly.Internal.Data.Producer as Producer + +import Test.Tasty.Bench hiding (env) +import Streamly.Benchmark.Common + +------------------------------------------------------------------------------- +-- Parsing with unfolds +------------------------------------------------------------------------------- + +{-# INLINE parseManyUnfoldArrays #-} +parseManyUnfoldArrays :: Int -> [Array.Array Int] -> IO () +parseManyUnfoldArrays count arrays = do + let src = Producer.source (Just (Producer.OuterLoop arrays)) + let parser = PR.fromFold (Fold.take count Fold.drain) + let readSrc = + Producer.producer + $ Producer.concat Producer.fromList Array.producer + let streamParser = + Producer.simplify (Producer.parseMany parser readSrc) + Stream.fold Fold.drain $ Stream.unfold streamParser src + +------------------------------------------------------------------------------- +-- Benchmarks +------------------------------------------------------------------------------- + +instance NFData ParseError where + {-# INLINE rnf #-} + rnf (ParseError x) = rnf x + +benchmarks :: Int -> [Array.Array Int] -> [(SpaceComplexity, Benchmark)] +benchmarks value arrays = + [ + -- parseMany Unfolds + (SpaceO_1, bench "parseMany/Unfold/1000 arrays/take all" + $ nfIO $ parseManyUnfoldArrays value arrays) + , (SpaceO_1, bench "parseMany/Unfold/1000 arrays/take 1" + $ nfIO $ parseManyUnfoldArrays 1 arrays) + ] diff --git a/benchmark/Streamly/Benchmark/Data/Parser/Sequence.hs b/benchmark/Streamly/Benchmark/Data/Parser/Sequence.hs new file mode 100644 index 0000000000..8ccf107f47 --- /dev/null +++ b/benchmark/Streamly/Benchmark/Data/Parser/Sequence.hs @@ -0,0 +1,152 @@ +#undef FUSION_CHECK +#ifdef FUSION_CHECK +{-# OPTIONS_GHC -ddump-simpl -ddump-to-file -dsuppress-all #-} +#endif + +-- Compiling parseMany with higher values of spec-constr-recursive hogs a lot +-- of memory and takes too much time. Fusion plugin alleviates the problem +-- though. +{-# OPTIONS_GHC -fspec-constr-recursive=10 #-} + +-- | +-- Module : Streamly.Benchmark.Data.Parser.Sequence +-- Copyright : (c) 2020 Composewell Technologies +-- +-- License : BSD-3-Clause +-- Maintainer : streamly@composewell.com + +{-# LANGUAGE CPP #-} +{-# LANGUAGE FlexibleContexts #-} +{-# LANGUAGE ScopedTypeVariables #-} +{-# OPTIONS_GHC -Wno-orphans #-} + +module Streamly.Benchmark.Data.Parser.Sequence + ( + benchmarks + , benchmarksFileIO + ) where + +import Control.DeepSeq (NFData(..)) +import Data.Function ((&)) +import Data.Monoid (Sum(..)) +import GHC.Magic (inline) +import GHC.Magic (noinline) +import System.IO (Handle) +import Streamly.Internal.Data.Parser (ParseError(..)) +import Streamly.Internal.Data.Stream (Stream) + +import qualified Streamly.FileSystem.Handle as Handle +import qualified Streamly.Internal.Data.Fold as Fold +import qualified Streamly.Internal.Data.Parser as PR +import qualified Streamly.Data.Stream as Stream + +import Test.Tasty.Bench hiding (env) +import Streamly.Benchmark.Common +import Streamly.Benchmark.Common.Handle +import Streamly.Benchmark.Data.Parser.Common + +------------------------------------------------------------------------------- +-- Stream transformation +------------------------------------------------------------------------------- + +{-# INLINE parseMany #-} +parseMany :: Monad m => Int -> Stream m Int -> m () +parseMany n = + Stream.fold Fold.drain + . fmap getSum + . Stream.catRights . Stream.parseMany (PR.fromFold $ Fold.take n Fold.mconcat) + . fmap Sum + +{-# INLINE parseManyGroupBy #-} +parseManyGroupBy :: Monad m => (Int -> Int -> Bool) -> Stream m Int -> m () +parseManyGroupBy cmp = + Stream.fold Fold.drain . Stream.parseMany (PR.groupBy cmp Fold.drain) + +{-# INLINE parseManyGroupsRolling #-} +parseManyGroupsRolling :: Monad m => Bool -> Stream m Int -> m () +parseManyGroupsRolling b = + Stream.fold Fold.drain + . Stream.parseMany (PR.groupByRolling (\_ _ -> b) Fold.drain) + +{-# INLINE parseManyGroupsRollingEither #-} +parseManyGroupsRollingEither :: Monad m => + (Int -> Int -> Bool) -> Int -> m () +parseManyGroupsRollingEither cmp value = do + streamUnfoldrM value 1 + & Stream.parseMany (PR.groupByRollingEither cmp Fold.drain Fold.drain) + & Stream.fold Fold.drain + +{-# INLINE parseManyGroupsRollingEitherAlt #-} +parseManyGroupsRollingEitherAlt :: Monad m => + (Int -> Int -> Bool) -> Int -> m () +parseManyGroupsRollingEitherAlt cmp value = do + streamUnfoldrM value 1 + -- Make the input unsorted. + & fmap (\x -> if even x then x + 2 else x) + & Stream.parseMany (PR.groupByRollingEither cmp Fold.drain Fold.drain) + & Stream.fold Fold.drain + +------------------------------------------------------------------------------- +-- Benchmarks +------------------------------------------------------------------------------- + +instance NFData ParseError where + {-# INLINE rnf #-} + rnf (ParseError x) = rnf x + +benchmarks :: Int -> [(SpaceComplexity, Benchmark)] +benchmarks value = + [ + -- parseMany + (SpaceO_1, benchIOSink value "parseMany" $ parseMany value) + , (SpaceO_1, benchIOSink value "parseMany (take 1)" (parseMany 1)) + , (SpaceO_1, benchIOSink value "parseMany (take all)" (parseMany value)) + , (SpaceO_1, benchIOSink value "parseMany (groupBy (<))" (parseManyGroupBy (<))) + -- requires -fspec-constr-recursive=10 + , (SpaceO_1, benchIOSink value "parseMany (groupBy (==))" (parseManyGroupBy (==))) + -- requires -fspec-constr-recursive=10 + , (SpaceO_1, benchIOSink value "parseMany groupRollingBy (bound groups)" + $ parseManyGroupsRolling False) + , (SpaceO_1, benchIOSink value "parseMany groupRollingBy (1 group)" + $ parseManyGroupsRolling True) + , (SpaceO_1, bench "parseMany groupRollingByEither (Left)" + $ nfIO parseManyGroupsRollingEitherLeft) + , (SpaceO_1, bench "parseMany groupRollingByEither (Right)" + $ nfIO parseManyGroupsRollingEitherRight) + -- requires -fspec-constr-recursive=10 + , (SpaceO_1, bench "parseMany groupRollingByEither (Alternating)" + $ nfIO parseManyGroupsRollingEitherAlt1) + ] + + where + + {-# NOINLINE parseManyGroupsRollingEitherLeft #-} + parseManyGroupsRollingEitherLeft = parseManyGroupsRollingEither (<) value + + {-# NOINLINE parseManyGroupsRollingEitherRight #-} + parseManyGroupsRollingEitherRight = parseManyGroupsRollingEither (>) value + + {-# NOINLINE parseManyGroupsRollingEitherAlt1 #-} + parseManyGroupsRollingEitherAlt1 = + parseManyGroupsRollingEitherAlt (>) value + +------------------------------------------------------------------------------- +-- parseMany with FileIO +------------------------------------------------------------------------------- + +parseManyChunksOfSum :: Int -> Handle -> IO Int +parseManyChunksOfSum n inh = + Stream.fold Fold.length + $ Stream.parseMany + (PR.fromFold $ Fold.take n Fold.sum) + (Stream.unfold Handle.reader inh) + +benchmarksFileIO :: BenchEnv -> [(SpaceComplexity, Benchmark)] +benchmarksFileIO env = + [ + -- parseMany with file IO + (SpaceO_1, mkBench ("parseMany (Fold.take " ++ show (bigSize env) ++ " Fold.sum)") env + $ \inh _ -> noinline parseManyChunksOfSum (bigSize env) inh) + , (SpaceO_1, mkBench "parseMany (Fold.take 1 Fold.sum)" env + $ \inh _ -> inline parseManyChunksOfSum 1 inh) + ] diff --git a/benchmark/Streamly/Benchmark/Data/ParserMain.hs b/benchmark/Streamly/Benchmark/Data/ParserMain.hs deleted file mode 100644 index 611be9667f..0000000000 --- a/benchmark/Streamly/Benchmark/Data/ParserMain.hs +++ /dev/null @@ -1,79 +0,0 @@ -#undef FUSION_CHECK -#ifdef FUSION_CHECK -{-# OPTIONS_GHC -ddump-simpl -ddump-to-file -dsuppress-all #-} -#endif - --- | --- Module : Streamly.Benchmark.Data.Parser --- Copyright : (c) 2020 Composewell Technologies --- --- License : BSD-3-Clause --- Maintainer : streamly@composewell.com - -{-# LANGUAGE CPP #-} -{-# LANGUAGE FlexibleContexts #-} -{-# LANGUAGE ScopedTypeVariables #-} -{-# OPTIONS_GHC -Wno-orphans #-} - -module Main - ( - main - ) where - -import Prelude hiding - (any, all, take, sequence, sequence_, sequenceA, takeWhile, dropWhile, span) - -import qualified Streamly.Internal.Data.Array as Array -import qualified Streamly.Internal.Data.Fold as Fold -import qualified Streamly.Data.Stream as Stream - -import Test.Tasty.Bench hiding (env) -import Streamly.Benchmark.Common -import Streamly.Benchmark.Common.Handle -import Streamly.Benchmark.Data.Parser - -moduleName :: String -moduleName = "Data.Parser" - -------------------------------------------------------------------------------- --- Driver -------------------------------------------------------------------------------- - -main :: IO () -main = do -#ifndef FUSION_CHECK - env <- mkHandleBenchEnv - runWithCLIOptsEnv defaultStreamSize alloc (allBenchmarks env) - - where - - alloc value = Stream.fold Fold.toList $ Array.chunksOf 100 $ sourceUnfoldrM value 0 - - allBenchmarks env arrays value = - let allBenches = benchmarks value env arrays - get x = map snd $ filter ((==) x . fst) allBenches - o_1_space = get SpaceO_1 - o_n_heap = get HeapO_n - o_n_space = get SpaceO_n - in - [ bgroup (o_1_space_prefix moduleName) o_1_space - , bgroup (o_n_heap_prefix moduleName) o_n_heap - , bgroup (o_n_space_prefix moduleName) o_n_space - ] -#else - -- Enable FUSION_CHECK macro at the beginning of the file - -- Enable one benchmark below, and run the benchmark - -- Check the .dump-simpl output - let value = 100000 - -- let input = sourceUnfoldrM value 1 - -- manyTill value input - -- deintercalate value input - -- deintercalate1 value input - -- deintercalateAll value input - -- sepByWords input - -- sepByAllWords input - -- sepBy1 input - -- sepByWords1 input - takeFramedByEsc_ value (sourceEscapedFrames value 1) - return () -#endif diff --git a/benchmark/Streamly/Benchmark/Data/Stream.hs b/benchmark/Streamly/Benchmark/Data/Stream.hs index 120b2de6f6..f59a2515f2 100644 --- a/benchmark/Streamly/Benchmark/Data/Stream.hs +++ b/benchmark/Streamly/Benchmark/Data/Stream.hs @@ -25,6 +25,7 @@ import qualified Stream.Generate as Generation import qualified Stream.Lift as Lift import qualified Stream.Reduce as NestedFold import qualified Stream.Split as Split +import qualified Stream.SplitChunks as SplitChunks import qualified Stream.Transform as Transformation import Streamly.Benchmark.Common @@ -61,6 +62,7 @@ main = do , Elimination.benchmarks moduleName size , Exceptions.benchmarks moduleName env size , Split.benchmarks moduleName env + , SplitChunks.benchmarks moduleName env , Transformation.benchmarks moduleName size , NestedFold.benchmarks moduleName size , Lift.benchmarks moduleName size diff --git a/benchmark/Streamly/Benchmark/Data/Stream/Common.hs b/benchmark/Streamly/Benchmark/Data/Stream/Common.hs index 6ae127ca4b..d441e303a2 100644 --- a/benchmark/Streamly/Benchmark/Data/Stream/Common.hs +++ b/benchmark/Streamly/Benchmark/Data/Stream/Common.hs @@ -79,15 +79,12 @@ import Control.DeepSeq (NFData) import Control.Exception (try) import GHC.Exception (ErrorCall) import System.Random (randomRIO) +import Streamly.Internal.Data.Stream (Stream) import qualified Streamly.Internal.Data.Fold as Fold import qualified Streamly.Internal.Data.Pipe as Pipe import qualified Streamly.Internal.Data.Scanl as Scanl import qualified Streamly.Internal.Data.Scanr as Scanr - - -import Streamly.Internal.Data.Stream (Stream) -import qualified Streamly.Internal.Data.Stream as D import qualified Streamly.Internal.Data.Stream as Stream import Test.Tasty.Bench @@ -107,7 +104,7 @@ append = Stream.append {-# INLINE append2 #-} append2 :: Monad m => Stream m a -> Stream m a -> Stream m a -append2 = D.append +append2 = Stream.append {-# INLINE drain #-} drain :: Monad m => Stream m a -> m () @@ -201,7 +198,7 @@ benchIO name f = bench name $ nfIO $ randomRIO (1,1) >>= f sourceConcatMapId :: (Monad m) => Int -> Int -> Stream m (Stream m Int) sourceConcatMapId value n = - Stream.fromList $ fmap (D.fromEffect . return) [n..n+value] + Stream.fromList $ fmap (Stream.fromEffect . return) [n..n+value] {-# INLINE apDiscardFst #-} apDiscardFst :: MonadAsync m => diff --git a/benchmark/Streamly/Benchmark/Data/Stream/Eliminate.hs b/benchmark/Streamly/Benchmark/Data/Stream/Eliminate.hs index 2f21775fc9..c1bbd62e6d 100644 --- a/benchmark/Streamly/Benchmark/Data/Stream/Eliminate.hs +++ b/benchmark/Streamly/Benchmark/Data/Stream/Eliminate.hs @@ -35,7 +35,6 @@ import qualified Streamly.Internal.Data.Fold as Fold #ifdef INSPECTION import GHC.Types (SPEC(..)) import Test.Inspection -import qualified Streamly.Internal.Data.Stream as D #endif import Stream.Common @@ -581,7 +580,7 @@ eqByPure value n = eqBy' (sourceUnfoldr value n) #ifdef INSPECTION inspect $ hasNoTypeClasses 'eqByPure inspect $ 'eqByPure `hasNoType` ''SPEC -inspect $ 'eqByPure `hasNoType` ''D.Step +inspect $ 'eqByPure `hasNoType` ''S.Step #endif {-# INLINE eqInstance #-} @@ -603,7 +602,7 @@ cmpByPure value n = cmpBy' (sourceUnfoldr value n) #ifdef INSPECTION inspect $ hasNoTypeClasses 'cmpByPure inspect $ 'cmpByPure `hasNoType` ''SPEC -inspect $ 'cmpByPure `hasNoType` ''D.Step +inspect $ 'cmpByPure `hasNoType` ''S.Step #endif {-# INLINE ordInstance #-} @@ -642,7 +641,7 @@ eqBy value n = eqBy' (sourceUnfoldrM value n) #ifdef INSPECTION inspect $ hasNoTypeClasses 'eqBy inspect $ 'eqBy `hasNoType` ''SPEC -inspect $ 'eqBy `hasNoType` ''D.Step +inspect $ 'eqBy `hasNoType` ''S.Step #endif {-# INLINE cmpBy #-} @@ -652,7 +651,7 @@ cmpBy value n = cmpBy' (sourceUnfoldrM value n) #ifdef INSPECTION inspect $ hasNoTypeClasses 'cmpBy inspect $ 'cmpBy `hasNoType` ''SPEC -inspect $ 'cmpBy `hasNoType` ''D.Step +inspect $ 'cmpBy `hasNoType` ''S.Step #endif o_1_space_elimination_multi_stream :: Int -> [Benchmark] diff --git a/benchmark/Streamly/Benchmark/Data/Stream/Expand.hs b/benchmark/Streamly/Benchmark/Data/Stream/Expand.hs index d669bc242d..8af6a8200d 100644 --- a/benchmark/Streamly/Benchmark/Data/Stream/Expand.hs +++ b/benchmark/Streamly/Benchmark/Data/Stream/Expand.hs @@ -23,20 +23,19 @@ module Stream.Expand (benchmarks) where #ifdef INSPECTION import GHC.Types (SPEC(..)) import Test.Inspection - -import qualified Streamly.Internal.Data.Stream as D #endif -import qualified Stream.Common as Common -import qualified Streamly.Internal.Data.Unfold as UF - import Streamly.Data.Stream (Stream) import Streamly.Data.Unfold (Unfold) + +import qualified Stream.Common as Common +import qualified Streamly.Internal.Data.Unfold as UF import qualified Streamly.Internal.Data.Stream as S import qualified Streamly.Internal.Data.Unfold as Unfold import qualified Streamly.Internal.Data.Fold as Fold import qualified Streamly.Internal.Data.Stream as Stream import qualified Streamly.Internal.Data.StreamK as StreamK + import Test.Tasty.Bench import Stream.Common import Streamly.Benchmark.Common @@ -218,7 +217,7 @@ unfoldSched outer inner n = inspect $ hasNoTypeClasses 'unfoldSched -- inspect $ 'unfoldSched `hasNoType` ''SPEC -- inspect $ 'unfoldSched `hasNoType` --- ''D.ConcatUnfoldInterleaveState +-- ''Stream.ConcatUnfoldInterleaveState #endif o_1_space_joining :: Int -> [Benchmark] @@ -328,7 +327,7 @@ unfoldEach outer inner start = drain $ #ifdef INSPECTION inspect $ hasNoTypeClasses 'unfoldEach -inspect $ 'unfoldEach `hasNoType` ''D.UnfoldEachState +inspect $ 'unfoldEach `hasNoType` ''Stream.UnfoldEachState inspect $ 'unfoldEach `hasNoType` ''SPEC #endif diff --git a/benchmark/Streamly/Benchmark/Data/Stream/Generate.hs b/benchmark/Streamly/Benchmark/Data/Stream/Generate.hs index 4ba9fc1784..ff68396978 100644 --- a/benchmark/Streamly/Benchmark/Data/Stream/Generate.hs +++ b/benchmark/Streamly/Benchmark/Data/Stream/Generate.hs @@ -17,28 +17,20 @@ module Stream.Generate (benchmarks) where import Control.DeepSeq (NFData(..)) import Control.Monad.IO.Class (MonadIO) import Data.Functor.Identity (Identity(..)) +import Streamly.Internal.Data.Stream (Stream) import Streamly.Internal.Data.Time.Units (AbsTime) import qualified GHC.Exts as GHC import qualified Streamly.Internal.Data.Fold as Fold - -import Stream.Common -import Streamly.Internal.Data.Stream (Stream) import qualified Streamly.Internal.Data.Stream as Stream import Test.Tasty.Bench +import Stream.Common import Streamly.Benchmark.Common import qualified Prelude import Prelude hiding (repeat, replicate, iterate) -------------------------------------------------------------------------------- --- Generation -------------------------------------------------------------------------------- - -toStreamD :: a -> a -toStreamD = id - ------------------------------------------------------------------------------- -- fromList ------------------------------------------------------------------------------- @@ -177,8 +169,8 @@ o_1_space_generation value = , benchIOSrc "fracFromTo" (sourceFracFromTo value) , benchIOSrc "fromList" (sourceFromList value) , benchIOSrc "fromListM" (sourceFromListM value) - , benchPureSrc "IsList.fromList" (toStreamD . sourceIsList value) - , benchPureSrc "IsString.fromString" (toStreamD . sourceIsString value) + , benchPureSrc "IsList.fromList" (sourceIsList value) + , benchPureSrc "IsString.fromString" (sourceIsString value) , benchIOSrc "enumerateFrom" (enumerateFrom value) , benchIOSrc "enumerateFromTo" (enumerateFromTo value) , benchIOSrc "enumerateFromThen" (enumerateFromThen value) diff --git a/benchmark/Streamly/Benchmark/Data/Stream/Lift.hs b/benchmark/Streamly/Benchmark/Data/Stream/Lift.hs index 059e79d762..b034719a83 100644 --- a/benchmark/Streamly/Benchmark/Data/Stream/Lift.hs +++ b/benchmark/Streamly/Benchmark/Data/Stream/Lift.hs @@ -16,11 +16,10 @@ import Control.Monad.State.Strict (StateT, get, put) import Data.Functor.Identity (Identity) import Stream.Common (sourceUnfoldr, sourceUnfoldrM, benchIOSrc) import System.Random (randomRIO) +import Streamly.Internal.Data.Stream (Stream) import qualified Stream.Common as Common import qualified Streamly.Internal.Data.Fold as Fold - -import Streamly.Internal.Data.Stream (Stream) import qualified Streamly.Internal.Data.Stream as Stream import Test.Tasty.Bench diff --git a/benchmark/Streamly/Benchmark/Data/Stream/Reduce.hs b/benchmark/Streamly/Benchmark/Data/Stream/Reduce.hs index c1a892d06a..debf5b047b 100644 --- a/benchmark/Streamly/Benchmark/Data/Stream/Reduce.hs +++ b/benchmark/Streamly/Benchmark/Data/Stream/Reduce.hs @@ -19,12 +19,11 @@ import Control.Monad.IO.Class (MonadIO(..)) import Data.Maybe (isJust) import Data.Monoid (Sum(..)) import GHC.Generics (Generic) +import Streamly.Internal.Data.Stream (Stream) -import qualified Streamly.Internal.Data.Refold.Type as Refold -import qualified Streamly.Internal.Data.Fold as FL import qualified Stream.Common as Common - -import Streamly.Internal.Data.Stream (Stream) +import qualified Streamly.Internal.Data.Fold as FL +import qualified Streamly.Internal.Data.Refold.Type as Refold import qualified Streamly.Internal.Data.Stream as S import Test.Tasty.Bench @@ -398,6 +397,7 @@ o_1_space_pipesX4 value = [ bgroup "pipesX4" [ benchIOSink value "mapM" (transformMapM 4) , benchIOSink value "compose" (transformComposeMapM 4) + -- XXX requires @-fspec-constr-recursive=16@. , benchIOSink value "tee" (transformTeeMapM 4) #ifdef DEVBUILD -- XXX this take 1 GB memory to compile diff --git a/benchmark/Streamly/Benchmark/Data/Stream/Split.hs b/benchmark/Streamly/Benchmark/Data/Stream/Split.hs index b91805c526..2beee72ea1 100644 --- a/benchmark/Streamly/Benchmark/Data/Stream/Split.hs +++ b/benchmark/Streamly/Benchmark/Data/Stream/Split.hs @@ -30,7 +30,6 @@ import qualified Streamly.Internal.Data.Array as Array import qualified Streamly.Internal.Data.Fold as Fold import qualified Streamly.Internal.Data.Stream as Stream import qualified Streamly.Internal.FileSystem.Handle as Handle -import qualified Streamly.Internal.Unicode.Stream as Unicode import Test.Tasty.Bench hiding (env) import Prelude hiding (last, length) @@ -245,6 +244,8 @@ o_1_space_reduce_read_split env = takeEndBySeq "aaaa" inh , mkBench "takeEndBySeq word abcdefgh" env $ \inh _ -> takeEndBySeq "abcdefgh" inh + + -- XXX takeEndBySeq KR requires @-fspec-constr-recursive=12@. , mkBench "takeEndBySeq KR abcdefghi" env $ \inh _ -> takeEndBySeq "abcdefghi" inh , mkBench "takeEndBySeq KR catcatcatcatcat" env $ \inh _ -> @@ -277,28 +278,8 @@ o_1_space_reduce_read_split env = ] ] --- | Split on a character sequence. -splitOnSeqUtf8 :: String -> Handle -> IO Int -splitOnSeqUtf8 str inh = - (Stream.fold Fold.length - $ Stream.splitSepBySeq_ (Array.fromList str) Fold.drain - $ Unicode.decodeUtf8Chunks - $ Handle.readChunks inh) -- >>= print - -o_1_space_reduce_toChunks_split :: BenchEnv -> [Benchmark] -o_1_space_reduce_toChunks_split env = - [ bgroup "FileSplitSeqUtf8" - [ mkBenchSmall "splitOnSeqUtf8 word abcdefgh" - env $ \inh _ -> splitOnSeqUtf8 "abcdefgh" inh - , mkBenchSmall "splitOnSeqUtf8 KR abcdefghijklmnopqrstuvwxyz" - env $ \inh _ -> splitOnSeqUtf8 "abcdefghijklmnopqrstuvwxyz" inh - ] - ] - benchmarks :: String -> BenchEnv -> [Benchmark] benchmarks moduleName env = - [ bgroup (o_1_space_prefix moduleName) $ concat - [ o_1_space_reduce_read_split env - , o_1_space_reduce_toChunks_split env - ] + [ bgroup (o_1_space_prefix moduleName) $ + o_1_space_reduce_read_split env ] diff --git a/benchmark/Streamly/Benchmark/Data/Stream/SplitChunks.hs b/benchmark/Streamly/Benchmark/Data/Stream/SplitChunks.hs new file mode 100644 index 0000000000..1954c4bd33 --- /dev/null +++ b/benchmark/Streamly/Benchmark/Data/Stream/SplitChunks.hs @@ -0,0 +1,72 @@ + +-- | +-- Module : Stream.SplitChunks +-- Copyright : (c) 2019 Composewell Technologies +-- License : BSD-3-Clause +-- Maintainer : streamly@composewell.com +-- Stability : experimental +-- Portability : GHC + +{-# LANGUAGE CPP #-} +{-# LANGUAGE ScopedTypeVariables #-} + +#ifdef __HADDOCK_VERSION__ +#undef INSPECTION +#endif + +#ifdef INSPECTION +{-# LANGUAGE TemplateHaskell #-} +{-# OPTIONS_GHC -fplugin Test.Inspection.Plugin #-} +#endif + +module Stream.SplitChunks (benchmarks) where + +import System.IO (Handle) + +import qualified Streamly.Internal.Data.Array as Array +import qualified Streamly.Internal.Data.Fold as Fold +import qualified Streamly.Internal.Data.Stream as Stream +import qualified Streamly.Internal.FileSystem.Handle as Handle +import qualified Streamly.Internal.Unicode.Stream as Unicode + +import Test.Tasty.Bench hiding (env) +import Prelude hiding (last, length) +import Streamly.Benchmark.Common +import Streamly.Benchmark.Common.Handle + +#ifdef INSPECTION +import Streamly.Internal.Data.Stream (Step(..)) + +import qualified Streamly.Internal.Data.MutArray as MutArray +import qualified Streamly.Internal.Data.Unfold as Unfold + +import Test.Inspection +#endif + +------------------------------------------------------------------------------- +-- reduce with splitting transformations +------------------------------------------------------------------------------- + +-- | Split on a character sequence. +splitOnSeqUtf8 :: String -> Handle -> IO Int +splitOnSeqUtf8 str inh = + Stream.fold Fold.length + $ Stream.splitSepBySeq_ (Array.fromList str) Fold.drain + $ Unicode.decodeUtf8Chunks + $ Handle.readChunks inh -- >>= print + +o_1_space_reduce_toChunks_split :: BenchEnv -> [Benchmark] +o_1_space_reduce_toChunks_split env = + [ bgroup "FileSplitSeqUtf8" + [ mkBenchSmall "splitOnSeqUtf8 word abcdefgh" + env $ \inh _ -> splitOnSeqUtf8 "abcdefgh" inh + , mkBenchSmall "splitOnSeqUtf8 KR abcdefghijklmnopqrstuvwxyz" + env $ \inh _ -> splitOnSeqUtf8 "abcdefghijklmnopqrstuvwxyz" inh + ] + ] + +benchmarks :: String -> BenchEnv -> [Benchmark] +benchmarks moduleName env = + [ bgroup (o_1_space_prefix moduleName) $ + o_1_space_reduce_toChunks_split env + ] diff --git a/benchmark/Streamly/Benchmark/Data/Stream/Transform.hs b/benchmark/Streamly/Benchmark/Data/Stream/Transform.hs index 0bb0ae6b78..03cb2a8230 100644 --- a/benchmark/Streamly/Benchmark/Data/Stream/Transform.hs +++ b/benchmark/Streamly/Benchmark/Data/Stream/Transform.hs @@ -27,12 +27,11 @@ import Control.Monad.IO.Class (MonadIO(..)) import Streamly.Internal.Data.Stream (Stream) import System.Random (randomRIO) +import qualified Stream.Common as Common import qualified Streamly.Internal.Data.Fold as FL import qualified Streamly.Internal.Data.Scanl as Scanl - -import qualified Stream.Common as Common -import qualified Streamly.Internal.Data.Unfold as Unfold import qualified Streamly.Internal.Data.Stream as Stream +import qualified Streamly.Internal.Data.Unfold as Unfold import Test.Tasty.Bench import Stream.Common hiding (scanl') @@ -406,6 +405,7 @@ o_1_space_filteringX4 value = , benchIOSink value "drop-all" (dropAll value 4) , benchIOSink value "dropWhile-true" (dropWhileTrue value 4) , benchIOSink value "dropWhileM-true" (dropWhileMTrue value 4) + -- XXX requires @-fspec-constr-recursive=12@. , benchIOSink value "dropWhile-false" @@ -465,7 +465,9 @@ o_1_space_inserting value = o_1_space_insertingX4 :: Int -> [Benchmark] o_1_space_insertingX4 value = [ bgroup "insertingX4" - [ benchIOSink value "intersperse" (intersperse value 4) + [ + -- XXX requires @-fspec-constr-recursive=16@. + benchIOSink value "intersperse" (intersperse value 4) , benchIOSink value "insertBy" (insertBy value 4) ] ] diff --git a/benchmark/Streamly/Benchmark/FileSystem/Handle/Read.hs b/benchmark/Streamly/Benchmark/FileSystem/Handle/Read.hs index b03b2cd5f2..a777acc683 100644 --- a/benchmark/Streamly/Benchmark/FileSystem/Handle/Read.hs +++ b/benchmark/Streamly/Benchmark/FileSystem/Handle/Read.hs @@ -249,6 +249,7 @@ chunksOf :: Int -> Handle -> IO Int chunksOf n inh = S.fold Fold.length $ A.chunksOf n (S.unfold FH.reader inh) +-- XXX all these require @-fspec-constr-recursive=12@. o_1_space_reduce_read_grouped :: BenchEnv -> [Benchmark] o_1_space_reduce_read_grouped env = [ bgroup "reduce/read/chunks" diff --git a/benchmark/Streamly/Benchmark/Unicode/Stream.hs b/benchmark/Streamly/Benchmark/Unicode/Stream.hs index d0329f66ee..d95d39499f 100644 --- a/benchmark/Streamly/Benchmark/Unicode/Stream.hs +++ b/benchmark/Streamly/Benchmark/Unicode/Stream.hs @@ -28,7 +28,7 @@ import Streamly.Data.Stream (Stream) import Streamly.Data.Fold (Fold) import Prelude hiding (last, length) import System.IO (Handle) -import Streamly.Internal.System.IO (arrayPayloadSize) +-- import Streamly.Internal.System.IO (arrayPayloadSize) import qualified Streamly.Data.Array as Array import qualified Streamly.Data.Fold as Fold @@ -37,7 +37,7 @@ import qualified Streamly.Internal.Data.Unfold as Unfold import qualified Streamly.Internal.FileSystem.Handle as Handle import qualified Streamly.Internal.Unicode.Array as UnicodeArr import qualified Streamly.Internal.Unicode.Stream as Unicode -import qualified Streamly.Internal.Data.Array as Array +-- import qualified Streamly.Internal.Data.Array as Array import Test.Tasty.Bench hiding (env) import Streamly.Benchmark.Common @@ -89,6 +89,7 @@ splitOnSuffix => (a -> Bool) -> Fold m a b -> Stream m a -> Stream m b splitOnSuffix predicate f = Stream.foldMany (Fold.takeEndBy_ predicate f) +-- XXX require @-fspec-constr-recursive=12@. {-# NOINLINE linesUnlinesCopy #-} linesUnlinesCopy :: Handle -> Handle -> IO () linesUnlinesCopy inh outh = @@ -108,6 +109,7 @@ linesUnlinesArrayWord8Copy inh outh = $ Stream.unfold Handle.reader inh -- XXX splitSuffixOn requires -funfolding-use-threshold=150 for better fusion +-- XXX require @-fspec-constr-recursive=12@. -- | Lines and unlines {-# NOINLINE linesUnlinesArrayCharCopy #-} linesUnlinesArrayCharCopy :: Handle -> Handle -> IO () @@ -176,6 +178,7 @@ wordsUnwordsCopy inh outh = -- inspect $ 'wordsUnwordsCopy `hasNoType` ''Step #endif +-- XXX require @-fspec-constr-recursive=12@. {-# NOINLINE wordsUnwordsCharArrayCopy #-} wordsUnwordsCharArrayCopy :: Handle -> Handle -> IO () wordsUnwordsCharArrayCopy inh outh = @@ -186,6 +189,7 @@ wordsUnwordsCharArrayCopy inh outh = $ Unicode.decodeLatin1 $ Stream.unfold Handle.reader inh +-- XXX all these require @-fspec-constr-recursive=12@. o_1_space_copy_read_group_ungroup :: BenchEnv -> [Benchmark] o_1_space_copy_read_group_ungroup env = [ bgroup "ungroup-group" @@ -258,6 +262,7 @@ _copyStreamUtf8' inh outh = $ Unicode.decodeUtf8' $ Stream.unfold Handle.reader inh +{- -- | Copy file {-# NOINLINE copyStreamUtf16 #-} copyStreamUtf16 :: Handle -> Handle -> IO () @@ -266,8 +271,11 @@ copyStreamUtf16 inh outh = $ fmap Array.unsafeCast $ Array.chunksOf (arrayPayloadSize (16 * 1024)) $ Unicode.encodeUtf16le' $ Unicode.decodeUtf16le + -- XXX we have a commented implementation of mkEvenW8Chunks in + -- streamly-core Unicode module, copy it here to enable this benchmark. $ Array.concat $ fmap Array.unsafeCast $ Unicode.mkEvenW8Chunks $ Handle.readChunks inh + -} #ifdef INSPECTION inspect $ hasNoTypeClasses '_copyStreamUtf8' @@ -310,6 +318,7 @@ _copyStreamUtf8Parser inh outh = (Unicode.parseCharUtf8With Unicode.TransliterateCodingFailure) $ Stream.unfold Handle.reader inh +-- XXX all these require @-fspec-constr-recursive=12@. o_1_space_decode_encode_read :: BenchEnv -> [Benchmark] o_1_space_decode_encode_read env = [ bgroup "decode-encode" @@ -330,8 +339,10 @@ o_1_space_decode_encode_read env = $ \inh outh -> _copyStreamUtf8Parser inh outh , mkBenchSmall "encodeUtf8 . decodeUtf8" env $ \inh outh -> copyStreamUtf8 inh outh + {- , mkBenchSmall "encodeUtf16 . decodeUtf16" env $ \inh outh -> copyStreamUtf16 inh outh + -} ] ] diff --git a/benchmark/lib/Streamly/Benchmark/Common.hs b/benchmark/lib/Streamly/Benchmark/Common.hs index 213d5acefd..5c8d4298ba 100644 --- a/benchmark/lib/Streamly/Benchmark/Common.hs +++ b/benchmark/lib/Streamly/Benchmark/Common.hs @@ -18,6 +18,7 @@ module Streamly.Benchmark.Common , runWithCLIOptsEnv , runWithCLIOpts + , streamUnfoldrM , benchIOSink1 , benchPure , benchPureSink1 @@ -54,8 +55,9 @@ import Control.DeepSeq (NFData(..)) import Data.Functor.Identity (Identity, runIdentity) import System.Random (randomRIO) +import Streamly.Internal.Data.Stream (Stream) import qualified Streamly.Internal.Data.Fold as Fold -import qualified Streamly.Internal.Data.Stream as S +import qualified Streamly.Internal.Data.Stream as Stream import Test.Tasty.Bench @@ -81,6 +83,15 @@ o_n_stack_prefix name = name ++ "/o-n-stack" -- Benchmarking utilities ------------------------------------------------------------------------------- +{-# INLINE streamUnfoldrM #-} +streamUnfoldrM :: Monad m => Int -> Int -> Stream m Int +streamUnfoldrM value n = Stream.unfoldrM step n + where + step cnt = + if cnt > n + value + then return Nothing + else return (Just (cnt, cnt + 1)) + -- XXX once we convert all the functions to use this we can rename this to -- benchIOSink {-# INLINE benchIOSink1 #-} @@ -110,12 +121,12 @@ benchPureSink1 name f = bench name $ nfIO $ randomRIO (1,1) >>= return . runIdentity . f {-# INLINE benchPureSrc #-} -benchPureSrc :: String -> (Int -> S.Stream Identity a) -> Benchmark +benchPureSrc :: String -> (Int -> Stream Identity a) -> Benchmark benchPureSrc name src = benchPure name src (runIdentity . drain) where - drain = S.fold Fold.drain + drain = Stream.fold Fold.drain ------------------------------------------------------------------------------- -- String/List generation for read instances diff --git a/benchmark/streamly-benchmarks.cabal b/benchmark/streamly-benchmarks.cabal index 27174f978a..b3addc9cc4 100644 --- a/benchmark/streamly-benchmarks.cabal +++ b/benchmark/streamly-benchmarks.cabal @@ -228,7 +228,7 @@ common bench-options include-dirs: . ghc-options: -rtsopts -with-rtsopts "-t" if flag(limit-build-mem) - ghc-options: +RTS -M400M -RTS + ghc-options: -j1 +RTS -M400M -RTS build-depends: streamly-benchmarks == 0.0.0 -- Some benchmarks are threaded some are not @@ -238,7 +238,7 @@ common bench-options-threaded -- trigger only with these options. ghc-options: -threaded -rtsopts -with-rtsopts "-t -N2" if flag(limit-build-mem) - ghc-options: +RTS -M400M -RTS + ghc-options: -j1 +RTS -M400M -RTS build-depends: streamly-benchmarks == 0.0.0 ------------------------------------------------------------------------------- @@ -256,8 +256,8 @@ benchmark Data.Array buildable: False else buildable: True - if flag(limit-build-mem) - ghc-options: +RTS -M1500M -RTS + if flag(limit-build-mem) && !flag(fusion-plugin) + ghc-options: +RTS -M600M -RTS benchmark Data.Array.Generic import: bench-options @@ -270,8 +270,8 @@ benchmark Data.Array.Generic buildable: False else buildable: True - if flag(limit-build-mem) - ghc-options: +RTS -M1000M -RTS + if flag(limit-build-mem) && !flag(fusion-plugin) + ghc-options: +RTS -M500M -RTS benchmark Data.Array.Stream import: bench-options @@ -293,6 +293,8 @@ benchmark Data.Fold buildable: False else buildable: True + if flag(limit-build-mem) && !flag(fusion-plugin) + ghc-options: +RTS -M500M -RTS benchmark Data.Fold.Prelude import: bench-options @@ -319,8 +321,8 @@ benchmark Data.MutArray import: bench-options type: exitcode-stdio-1.0 main-is: Streamly/Benchmark/Data/MutArray.hs - if flag(limit-build-mem) - ghc-options: +RTS -M1500M -RTS + if flag(limit-build-mem) && !flag(fusion-plugin) + ghc-options: +RTS -M500M -RTS if flag(use-streamly-core) buildable: False else @@ -330,19 +332,26 @@ benchmark Data.Parser import: bench-options type: exitcode-stdio-1.0 hs-source-dirs: . - main-is: Streamly/Benchmark/Data/ParserMain.hs + main-is: Streamly/Benchmark/Data/Parser.hs other-modules: - Streamly.Benchmark.Data.Parser + Streamly.Benchmark.Data.Parser.Common + , Streamly.Benchmark.Data.Parser.Alternative + , Streamly.Benchmark.Data.Parser.Applicative + , Streamly.Benchmark.Data.Parser.Monad + , Streamly.Benchmark.Data.Parser.Interleave + , Streamly.Benchmark.Data.Parser.Sequence + , Streamly.Benchmark.Data.Parser.Producer + , Streamly.Benchmark.Data.Parser.Groups if impl(ghcjs) buildable: False else buildable: True - build-depends: exceptions >= 0.8 && < 0.11 + build-depends: exceptions >= 0.8 && < 0.11 if flag(limit-build-mem) - if flag(dev) - ghc-options: +RTS -M3000M -RTS + if flag(dev) || !flag(fusion-plugin) + ghc-options: +RTS -M750M -RTS else - ghc-options: +RTS -M2500M -RTS + ghc-options: +RTS -M500M -RTS benchmark Data.ParserK import: bench-options @@ -384,8 +393,6 @@ benchmark Data.RingArray import: bench-options type: exitcode-stdio-1.0 main-is: Streamly/Benchmark/Data/RingArray.hs - if flag(limit-build-mem) - ghc-options: +RTS -M1500M -RTS benchmark Data.Scanl import: bench-options @@ -421,8 +428,6 @@ benchmark Data.Serialize Streamly.Benchmark.Data.Serialize.TH Streamly.Benchmark.Data.Serialize.RecCompatible Streamly.Benchmark.Data.Serialize.RecNonCompatible - if flag(limit-build-mem) - ghc-options: +RTS -M1000M -RTS benchmark Data.SmallArray import: bench-options @@ -439,20 +444,22 @@ benchmark Data.Stream hs-source-dirs: Streamly/Benchmark/Data main-is: Stream.hs other-modules: - Stream.Generate + Stream.Common Stream.Eliminate - Stream.Transform - Stream.Reduce + Stream.Exceptions Stream.Expand + Stream.Generate Stream.Lift + Stream.Reduce Stream.Split - Stream.Common - Stream.Exceptions + -- XXX uses lot of memory + Stream.SplitChunks + Stream.Transform if flag(limit-build-mem) if flag(dev) - ghc-options: +RTS -M3500M -RTS + ghc-options: +RTS -M1000M -RTS else - ghc-options: +RTS -M2500M -RTS + ghc-options: +RTS -M500M -RTS benchmark Data.Stream.Adaptive import: bench-options-threaded @@ -611,33 +618,31 @@ benchmark Data.Unfold.Prelude else buildable: True -benchmark FileSystem.Handle +benchmark FileSystem.DirIO import: bench-options type: exitcode-stdio-1.0 hs-source-dirs: Streamly/Benchmark/FileSystem - main-is: Handle.hs - other-modules: - Handle.Read - , Handle.ReadWrite - if flag(limit-build-mem) - ghc-options: +RTS -M1000M -RTS + main-is: DirIO.hs if flag(use-streamly-core) buildable: False else buildable: True + -- Fix this benchmark for Windows + if os(windows) + buildable: False -benchmark FileSystem.DirIO +benchmark FileSystem.Handle import: bench-options type: exitcode-stdio-1.0 hs-source-dirs: Streamly/Benchmark/FileSystem - main-is: DirIO.hs + main-is: Handle.hs + other-modules: + Handle.Read + , Handle.ReadWrite if flag(use-streamly-core) buildable: False else buildable: True - -- Fix this benchmark for Windows - if os(windows) - buildable: False benchmark Unicode.Char import: bench-options @@ -666,8 +671,6 @@ benchmark Unicode.Stream type: exitcode-stdio-1.0 hs-source-dirs: Streamly/Benchmark/Unicode main-is: Stream.hs - if flag(limit-build-mem) - ghc-options: +RTS -M1000M -RTS if flag(use-streamly-core) buildable: False else diff --git a/bin/ghc-make.sh b/bin/ghc-make.sh new file mode 100755 index 0000000000..3357014808 --- /dev/null +++ b/bin/ghc-make.sh @@ -0,0 +1,137 @@ +#!/bin/bash + +curdir=`pwd` +SRC_ROOT=${curdir}/core +BLD_ROOT=${curdir}/build +RSP_FILE=${BLD_ROOT}/ghc-streamly.rsp +CONFIG_FILE=${BLD_ROOT}/src/config.h + +mkdir -p ${BLD_ROOT} +CABAL_MACROS=${BLD_ROOT}/cabal_macros.h + +if test ! -f "$CABAL_MACROS" +then + echo "$CABAL_MACROS: not found" + echo "Use cabal to generate cabal_macros.h and place it in build dir." + exit 1 +fi + +# configure creates config.h in a directory reflecting the relative path of +# config.h.in in the source tree. We keep that src path in the include path and +# then all source include paths are translated to include paths relative to the +# build dir. So that all such paths get included. +# + +# -I${BLD_ROOT}/src/doctest +# -I${BLD_ROOT}/src/Streamly/Internal/Data +# -I${BLD_ROOT}/src/Streamly/Internal/Data/Array +# -I${BLD_ROOT}/src/Streamly/Internal/Data/Stream + +if test ! -f "$CONFIG_FILE" +then + cd ${BLD_ROOT} + ${SRC_ROOT}/configure + cd $curdir +fi + +RTS_INCLUDE_DIRS=$(ghc-pkg field rts include-dirs --simple-output) +RTS_INCLUDE_FLAGS="" +for dir in $RTS_INCLUDE_DIRS; do + RTS_INCLUDE_FLAGS="$RTS_INCLUDE_FLAGS -I$dir" +done + +CPP_FLAGS="\ +-I${SRC_ROOT}/src \ +-I${SRC_ROOT}/src/doctest \ +-I${SRC_ROOT}/src/Streamly/Internal/Data \ +-I${SRC_ROOT}/src/Streamly/Internal/Data/Array \ +-I${SRC_ROOT}/src/Streamly/Internal/Data/Stream \ +-I${BLD_ROOT}/src \ +$RTS_INCLUDE_FLAGS" + +# Use cabal build with -v option. Copy paste compiler flags from the +# response file arguments shown in the output. + +if test ! -f "$RSP_FILE" +then +cat << EOF > ${RSP_FILE} +--make +-static -dynamic-too -dynosuf dyn_o -dynhisuf dyn_hi +'-package-env=-' +-this-unit-id streamly-core-0.4.0-temp +-outputdir ${BLD_ROOT} -odir ${BLD_ROOT} -hidir ${BLD_ROOT} -hiedir ${BLD_ROOT} -stubdir ${BLD_ROOT} +-i -i${SRC_ROOT}/src -i${BLD_ROOT}/src +-optP-include -optP${CABAL_MACROS} +$CPP_FLAGS +-XHaskell2010 +-XBangPatterns +-XConstraintKinds +-XDeriveDataTypeable +-XDeriveGeneric +-XDeriveTraversable +-XExistentialQuantification +-XFlexibleContexts +-XFlexibleInstances +-XGeneralizedNewtypeDeriving +-XInstanceSigs +-XKindSignatures +-XLambdaCase +-XMultiParamTypeClasses +-XRankNTypes +-XScopedTypeVariables +-XStandaloneDeriving +-XTupleSections +-XTypeApplications +-XTypeOperators +-XCApiFFI +-XCPP +-XDefaultSignatures +-XMagicHash +-XRecordWildCards +-XStandaloneKindSignatures +-XQuantifiedConstraints +-Weverything +-Wno-implicit-prelude +-Wno-missing-deriving-strategies +-Wno-missing-exported-signatures +-Wno-missing-import-lists +-Wno-missing-local-signatures +-Wno-missing-safe-haskell-mode +-Wno-missed-specialisations +-Wno-all-missed-specialisations +-Wno-monomorphism-restriction +-Wno-prepositive-qualified-module +-Wno-unsafe +-Wno-missing-kind-signatures +-Wno-redundant-bang-patterns +-Wno-operator-whitespace +-Wno-missing-role-annotations +-Wno-missing-poly-kind-signatures +-O2 +-fdicts-strict +'-fspec-constr-recursive=16' +'-fmax-worker-args=16' +-Rghc-timing +EOF +fi + +HSC2HS_FLAGS="$CPP_FLAGS -D__GLASGOW_HASKELL__=910 -i${CABAL_MACROS}" + +HSC_FILES=$(find "$SRC_ROOT" -name '*.hsc' -printf '%P\n') +for f in $HSC_FILES; do + src="$SRC_ROOT/$f" + dst="$BLD_ROOT/${f%.hsc}.hs" + + mkdir -p "$(dirname "$dst")" + #echo "src=$src, dst=$dst" + echo "hsc2hs $HSC2HS_FLAGS $src -o $dst" + hsc2hs $HSC2HS_FLAGS "$src" -o "$dst" +done + +# -fplugin Fusion.Plugin \ +# -ddump-to-file \ +# -ddump-simpl \ + +ghc @${RSP_FILE} \ + +RTS -M400M -RTS \ + $* diff --git a/cabal.project b/cabal.project index e1fe3bfc99..d0102b2061 100644 --- a/cabal.project +++ b/cabal.project @@ -3,3 +3,6 @@ packages: streamly.cabal , test/streamly-tests.cabal , benchmark/streamly-benchmarks.cabal , bench-test-lib/bench-test-lib.cabal + +-- For debugging heap overflow +jobs: 1 diff --git a/cabal.project.Werror b/cabal.project.Werror index 281c05e992..d01b665f45 100644 --- a/cabal.project.Werror +++ b/cabal.project.Werror @@ -5,6 +5,9 @@ packages: core bench-test-lib +-- For debugging heap overflow +jobs: 1 + package streamly ghc-options: -Werror diff --git a/cabal.project.report b/cabal.project.report index 49517512af..2902b5f710 100644 --- a/cabal.project.report +++ b/cabal.project.report @@ -10,33 +10,28 @@ package bench-report package bench-show flags: +no-charts -source-repository-package - type: git - location: https://github.com/composewell/streamly.git - tag: 8500b6446dae07e763370608625f26eff5c84600 - -source-repository-package - type: git - location: https://github.com/composewell/streamly.git - tag: 8500b6446dae07e763370608625f26eff5c84600 - subdir: core - source-repository-package type: git location: https://github.com/composewell/bench-report.git - tag: d012e03bea201b6af605f4c4a7f2eb4d36452c5f + tag: 26a70093f9da76144fefe9b5305b5f928ef59caf source-repository-package type: git location: https://github.com/composewell/streamly-coreutils.git - tag: 2b62fc242b9645a85124ef8c52c8212d316f24dc - -source-repository-package - type: git - location: https://github.com/composewell/streamly-shell.git - tag: 070ff21fda7aab8ca45b08b330746299a91eb981 - -source-repository-package - type: git - location: https://github.com/composewell/streamly-process.git - tag: 9af2f96971d3365be6f4e75d7ded0d3ae5458fbf + tag: fa180060c7510c89d2767980ca6f7ec7011d04b9 + +--source-repository-package +-- type: git +-- location: https://github.com/composewell/streamly.git +-- tag: 8500b6446dae07e763370608625f26eff5c84600 +-- +--source-repository-package +-- type: git +-- location: https://github.com/composewell/streamly.git +-- tag: 8500b6446dae07e763370608625f26eff5c84600 +-- subdir: core + +--source-repository-package +-- type: git +-- location: https://github.com/composewell/streamly-process.git +-- tag: 9af2f96971d3365be6f4e75d7ded0d3ae5458fbf diff --git a/core/src/Streamly/Internal/Data/MutArray.hs b/core/src/Streamly/Internal/Data/MutArray.hs index 1d29de4ee2..95961ec0dd 100644 --- a/core/src/Streamly/Internal/Data/MutArray.hs +++ b/core/src/Streamly/Internal/Data/MutArray.hs @@ -74,7 +74,7 @@ import Streamly.Internal.Data.Fold.Type (Fold) import qualified Streamly.Internal.Data.IORef as IORef import qualified Streamly.Internal.Data.RingArray as RingArray import qualified Streamly.Internal.Data.Serialize.Type as Serialize -import qualified Streamly.Internal.Data.Stream.Nesting as Stream +import qualified Streamly.Internal.Data.Stream.Parse as Stream import qualified Streamly.Internal.Data.Stream.Type as Stream import qualified Streamly.Internal.Data.Fold.Type as Fold -- import qualified Streamly.Internal.Data.Stream.Transform as Stream diff --git a/core/src/Streamly/Internal/Data/Stream.hs b/core/src/Streamly/Internal/Data/Stream.hs index 812bb4d1c0..4569111e3e 100644 --- a/core/src/Streamly/Internal/Data/Stream.hs +++ b/core/src/Streamly/Internal/Data/Stream.hs @@ -23,6 +23,7 @@ module Streamly.Internal.Data.Stream , module Streamly.Internal.Data.Stream.Lift , module Streamly.Internal.Data.Stream.Transformer , module Streamly.Internal.Data.Stream.Nesting + , module Streamly.Internal.Data.Stream.Parse , module Streamly.Internal.Data.Stream.Transform , module Streamly.Internal.Data.Stream.Top , module Streamly.Internal.Data.Stream.Container @@ -36,6 +37,7 @@ import Streamly.Internal.Data.Stream.Exception import Streamly.Internal.Data.Stream.Lift import Streamly.Internal.Data.Stream.Transformer import Streamly.Internal.Data.Stream.Nesting +import Streamly.Internal.Data.Stream.Parse import Streamly.Internal.Data.Stream.Transform import Streamly.Internal.Data.Stream.Top import Streamly.Internal.Data.Stream.Container diff --git a/core/src/Streamly/Internal/Data/Stream/Eliminate.hs b/core/src/Streamly/Internal/Data/Stream/Eliminate.hs index 084b4481c9..edfce3b297 100644 --- a/core/src/Streamly/Internal/Data/Stream/Eliminate.hs +++ b/core/src/Streamly/Internal/Data/Stream/Eliminate.hs @@ -83,7 +83,7 @@ import qualified Streamly.Internal.Data.Array.Type as Array import qualified Streamly.Internal.Data.Fold as Fold import qualified Streamly.Internal.Data.Parser as PR import qualified Streamly.Internal.Data.ParserDrivers as Drivers -import qualified Streamly.Internal.Data.Stream.Nesting as Nesting +import qualified Streamly.Internal.Data.Stream.Parse as Nesting import qualified Streamly.Internal.Data.Stream.Transform as StreamD import Prelude hiding diff --git a/core/src/Streamly/Internal/Data/Stream/Nesting.hs b/core/src/Streamly/Internal/Data/Stream/Nesting.hs index 30f93dc0da..55f230a15f 100644 --- a/core/src/Streamly/Internal/Data/Stream/Nesting.hs +++ b/core/src/Streamly/Internal/Data/Stream/Nesting.hs @@ -8,31 +8,22 @@ -- Stability : experimental -- Portability : GHC -- --- This module contains transformations involving multiple streams, unfolds or --- folds. There are two types of transformations generational or eliminational. --- Generational transformations are like the "Generate" module but they --- generate a stream by combining streams instead of elements. Eliminational --- transformations are like the "Eliminate" module but they transform a stream --- by eliminating parts of the stream instead of eliminating the whole stream. +-- This module contains unfolding (generational) transformations involving +-- multiple streams, unfolds or folds. There are two types of transformations +-- generational or eliminational. Generational transformations are like the +-- "Generate" module but they generate a stream by combining streams instead of +-- elements. Eliminational transformations are like the "Eliminate" module but +-- they transform a stream by eliminating parts of the stream instead of +-- eliminating the whole stream. -- -- These combinators involve transformation, generation, elimination so can be -- classified under any of those. - --- The zipWithM combinator in this module has been adapted from the vector --- package (c) Roman Leshchinskiy. -- -- Flipped versions can be named as: -- mapFor/forEach, concatFor, unfoldStepFor (only step function) -- foreach would be better for streams than mapFor as map could be used for any -- type not just containers with multiple elements. -- --- Flipped versions for folding streams: --- groupsFor :: stream -> fold -> stream (flipped groupsWhile) --- --- Flipped versions for folds: --- foldMany :: outer fold -> inner fold -> fold (original version) --- groupFoldFor :: inner fold -> outer fold -> fold (flipped version) --- groupStepFor :: inner fold -> outer fold step -> fold (flipped version) -- This can be convenient for defining the outer fold step using a lambda. -- module Streamly.Internal.Data.Stream.Nesting @@ -140,62 +131,6 @@ module Streamly.Internal.Data.Stream.Nesting , fairSchedForM , fairSchedFor - -- * Eliminate - -- | Folding and Parsing chunks of streams to eliminate nested streams. - -- Functions generally ending in these shapes: - -- - -- @ - -- f (Fold m a b) -> t m a -> t m b - -- f (Parser a m b) -> t m a -> t m b - -- @ - - -- ** Folding - -- | Apply folds on a stream. - , foldSequence - , foldIterateM - - -- ** Parsing - -- | Parsing is opposite to flattening. 'parseMany' is dual to concatMap or - -- unfoldEach concatMap generates a stream from single values in a - -- stream and flattens, parseMany does the opposite of flattening by - -- splitting the stream and then folds each such split to single value in - -- the output stream. - , parseMany - , parseManyPos - , parseSequence - , parseManyTill - , parseIterate - , parseIteratePos - - -- ** Grouping - -- | Group segments of a stream and fold. Special case of parsing. - , groupsWhile - , groupsRollingBy - - -- ** Splitting - -- | A special case of parsing. - , takeEndBySeq - , takeEndBySeq_ - , wordsBy - , splitSepBySeq_ - , splitEndBySeq - , splitEndBySeq_ - , splitOnSuffixSeq -- internal - - , splitBeginBy_ - , splitEndBySeqOneOf - , splitSepBySeqOneOf - - -- * Transform (Nested Containers) - -- | Opposite to compact in ArrayStream - , splitInnerBy -- XXX innerSplitOn - , splitInnerBySuffix -- XXX innerSplitOnSuffix - - -- * Reduce By Streams - , dropPrefix - , dropInfix - , dropSuffix - -- * Deprecated , interpose , interposeM @@ -210,10 +145,6 @@ module Streamly.Internal.Data.Stream.Nesting , interleaveMin , interleaveFst , interleaveFstSuffix - , parseManyD - , parseIterateD - , groupsBy - , splitOnSeq ) where @@ -221,38 +152,19 @@ where #include "inline.hs" #include "ArrayMacros.h" -import Control.Exception (assert) -import Control.Monad.IO.Class (MonadIO(..)) -import Data.Bits (shiftR, shiftL, (.|.), (.&.)) -import Data.Proxy (Proxy(..)) -import Data.Word (Word32) import Fusion.Plugin.Types (Fuse(..)) -import GHC.Types (SPEC(..)) - -import Streamly.Internal.Data.Array.Type (Array(..)) import Streamly.Internal.Data.Fold.Type (Fold(..)) -import Streamly.Internal.Data.MutArray.Type (MutArray(..)) -import Streamly.Internal.Data.Parser (ParseError(..), ParseErrorPos) -import Streamly.Internal.Data.RingArray (RingArray(..)) import Streamly.Internal.Data.SVar.Type (adaptState) -import Streamly.Internal.Data.Unbox (Unbox(..)) import Streamly.Internal.Data.Unfold.Type (Unfold(..)) -import qualified Streamly.Internal.Data.Array.Type as A -import qualified Streamly.Internal.Data.MutArray.Type as MutArray -import qualified Streamly.Internal.Data.Fold as FL -import qualified Streamly.Internal.Data.Parser as PR -import qualified Streamly.Internal.Data.Parser as PRD -import qualified Streamly.Internal.Data.ParserDrivers as Drivers -import qualified Streamly.Internal.Data.RingArray as RB import qualified Streamly.Internal.Data.Stream.Generate as Stream import qualified Streamly.Internal.Data.Unfold.Type as Unfold import Streamly.Internal.Data.Stream.Transform (intersperse, intersperseEndByM) -import Streamly.Internal.Data.Stream.Type hiding (splitAt) +import Streamly.Internal.Data.Stream.Type -import Prelude hiding (concatMap, mapM, zipWith, splitAt) +import Prelude hiding (concatMap, zipWith) #include "DocTestDataStream.hs" @@ -1925,2057 +1837,3 @@ unfoldEachSepBySeq seed unf str = unfoldEach unf $ intersperse seed str intercalate :: Monad m => Unfold m b c -> b -> Stream m b -> Stream m c intercalate u x = unfoldEachSepBySeq x u - ------------------------------------------------------------------------------- --- Folding ------------------------------------------------------------------------------- - --- | Apply a stream of folds to an input stream and emit the results in the --- output stream. --- --- /Unimplemented/ --- -{-# INLINE foldSequence #-} -foldSequence - :: -- Monad m => - Stream m (Fold m a b) - -> Stream m a - -> Stream m b -foldSequence _f _m = undefined - -{-# ANN type FIterState Fuse #-} -data FIterState s f m a b - = FIterInit s f - | forall fs. FIterStream s (fs -> a -> m (FL.Step fs b)) fs (fs -> m b) - (fs -> m b) - | FIterYield b (FIterState s f m a b) - | FIterStop - --- | Iterate a fold generator on a stream. The initial value @b@ is used to --- generate the first fold, the fold is applied on the stream and the result of --- the fold is used to generate the next fold and so on. --- --- Usage: --- --- >>> import Data.Monoid (Sum(..)) --- >>> f x = return (Fold.take 2 (Fold.sconcat x)) --- >>> s = fmap Sum $ Stream.fromList [1..10] --- >>> Stream.fold Fold.toList $ fmap getSum $ Stream.foldIterateM f (pure 0) s --- [3,10,21,36,55,55] --- --- This is the streaming equivalent of monad like sequenced application of --- folds where next fold is dependent on the previous fold. --- --- /Pre-release/ --- -{-# INLINE_NORMAL foldIterateM #-} -foldIterateM :: - Monad m => (b -> m (FL.Fold m a b)) -> m b -> Stream m a -> Stream m b -foldIterateM func seed0 (Stream step state) = - Stream stepOuter (FIterInit state seed0) - - where - - {-# INLINE iterStep #-} - iterStep from st fstep extract final = do - res <- from - return - $ Skip - $ case res of - FL.Partial fs -> FIterStream st fstep fs extract final - FL.Done fb -> FIterYield fb $ FIterInit st (return fb) - - {-# INLINE_LATE stepOuter #-} - stepOuter _ (FIterInit st seed) = do - (FL.Fold fstep initial extract final) <- seed >>= func - iterStep initial st fstep extract final - stepOuter gst (FIterStream st fstep fs extract final) = do - r <- step (adaptState gst) st - case r of - Yield x s -> do - iterStep (fstep fs x) s fstep extract final - Skip s -> return $ Skip $ FIterStream s fstep fs extract final - Stop -> do - b <- final fs - return $ Skip $ FIterYield b FIterStop - stepOuter _ (FIterYield a next) = return $ Yield a next - stepOuter _ FIterStop = return Stop - ------------------------------------------------------------------------------- --- Parsing ------------------------------------------------------------------------------- - --- | Apply a 'Parser' repeatedly on a stream and emit the parsed values in the --- output stream. --- --- Usage: --- --- >>> s = Stream.fromList [1..10] --- >>> parser = Parser.takeBetween 0 2 Fold.sum --- >>> Stream.toList $ Stream.parseMany parser s --- [Right 3,Right 7,Right 11,Right 15,Right 19] --- --- This is the streaming equivalent of the 'Streamly.Data.Parser.many' parse --- combinator. --- --- Known Issues: When the parser fails there is no way to get the remaining --- stream. --- -{-# INLINE parseMany #-} -parseMany - :: Monad m - => PRD.Parser a m b - -> Stream m a - -> Stream m (Either ParseError b) -parseMany = Drivers.parseMany - --- | Like 'parseMany' but includes stream position information in the error --- messages. --- -{-# INLINE parseManyPos #-} -parseManyPos - :: Monad m - => PRD.Parser a m b - -> Stream m a - -> Stream m (Either ParseErrorPos b) -parseManyPos = Drivers.parseManyPos - -{-# DEPRECATED parseManyD "Please use parseMany instead." #-} -{-# INLINE parseManyD #-} -parseManyD - :: Monad m - => PR.Parser a m b - -> Stream m a - -> Stream m (Either ParseError b) -parseManyD = parseMany - --- | Apply a stream of parsers to an input stream and emit the results in the --- output stream. --- --- /Unimplemented/ --- -{-# INLINE parseSequence #-} -parseSequence - :: -- Monad m => - Stream m (PR.Parser a m b) - -> Stream m a - -> Stream m b -parseSequence _f _m = undefined - --- XXX Change the parser arguments' order - --- | @parseManyTill collect test stream@ tries the parser @test@ on the input, --- if @test@ fails it backtracks and tries @collect@, after @collect@ succeeds --- @test@ is tried again and so on. The parser stops when @test@ succeeds. The --- output of @test@ is discarded and the output of @collect@ is emitted in the --- output stream. The parser fails if @collect@ fails. --- --- /Unimplemented/ --- -{-# INLINE parseManyTill #-} -parseManyTill :: - -- MonadThrow m => - PR.Parser a m b - -> PR.Parser a m x - -> Stream m a - -> Stream m b -parseManyTill = undefined - --- | Iterate a parser generating function on a stream. The initial value @b@ is --- used to generate the first parser, the parser is applied on the stream and --- the result is used to generate the next parser and so on. --- --- Example: --- --- >>> import Data.Monoid (Sum(..)) --- >>> s = Stream.fromList [1..10] --- >>> Stream.toList $ fmap getSum $ Stream.catRights $ Stream.parseIterate (\b -> Parser.takeBetween 0 2 (Fold.sconcat b)) (Sum 0) $ fmap Sum s --- [3,10,21,36,55,55] --- --- This is the streaming equivalent of monad like sequenced application of --- parsers where next parser is dependent on the previous parser. --- --- /Pre-release/ --- -{-# INLINE parseIterate #-} -parseIterate - :: Monad m - => (b -> PRD.Parser a m b) - -> b - -> Stream m a - -> Stream m (Either ParseError b) -parseIterate = Drivers.parseIterate - --- | Like 'parseIterate' but includes stream position information in the error --- messages. --- -{-# INLINE parseIteratePos #-} -parseIteratePos - :: Monad m - => (b -> PRD.Parser a m b) - -> b - -> Stream m a - -> Stream m (Either ParseErrorPos b) -parseIteratePos = Drivers.parseIteratePos - -{-# DEPRECATED parseIterateD "Please use parseIterate instead." #-} -{-# INLINE parseIterateD #-} -parseIterateD - :: Monad m - => (b -> PR.Parser a m b) - -> b - -> Stream m a - -> Stream m (Either ParseError b) -parseIterateD = parseIterate - ------------------------------------------------------------------------------- --- Grouping ------------------------------------------------------------------------------- - -data GroupByState st fs a b - = GroupingInit st - | GroupingDo st !fs - | GroupingInitWith st !a - | GroupingDoWith st !fs !a - | GroupingYield !b (GroupByState st fs a b) - | GroupingDone - --- | Keep collecting items in a group as long as the comparison function --- returns true. The comparison function is @cmp old new@ where @old@ is the --- first item in the group and @new@ is the incoming item being tested for --- membership of the group. The collected items are folded by the supplied --- fold. --- --- Definition: --- --- >>> groupsWhile cmp f = Stream.parseMany (Parser.groupBy cmp f) -{-# INLINE_NORMAL groupsWhile #-} -groupsWhile :: Monad m - => (a -> a -> Bool) - -> Fold m a b - -> Stream m a - -> Stream m b -{- -groupsWhile eq fld = parseMany (PRD.groupBy eq fld) --} -groupsWhile cmp (Fold fstep initial _ final) (Stream step state) = - Stream stepOuter (GroupingInit state) - - where - - {-# INLINE_LATE stepOuter #-} - stepOuter _ (GroupingInit st) = do - -- XXX Note that if the stream stops without yielding a single element - -- in the group we discard the "initial" effect. - res <- initial - return - $ case res of - FL.Partial s -> Skip $ GroupingDo st s - FL.Done b -> Yield b $ GroupingInit st - stepOuter gst (GroupingDo st fs) = do - res <- step (adaptState gst) st - case res of - Yield x s -> do - r <- fstep fs x - case r of - FL.Partial fs1 -> go SPEC x s fs1 - FL.Done b -> return $ Yield b (GroupingInit s) - Skip s -> return $ Skip $ GroupingDo s fs - Stop -> final fs >> return Stop - - where - - go !_ prev stt !acc = do - res <- step (adaptState gst) stt - case res of - Yield x s -> do - if cmp prev x - then do - r <- fstep acc x - case r of - FL.Partial fs1 -> go SPEC prev s fs1 - FL.Done b -> return $ Yield b (GroupingInit s) - else do - r <- final acc - return $ Yield r (GroupingInitWith s x) - Skip s -> go SPEC prev s acc - Stop -> do - r <- final acc - return $ Yield r GroupingDone - stepOuter _ (GroupingInitWith st x) = do - res <- initial - return - $ case res of - FL.Partial s -> Skip $ GroupingDoWith st s x - FL.Done b -> Yield b $ GroupingInitWith st x - stepOuter gst (GroupingDoWith st fs prev) = do - res <- fstep fs prev - case res of - FL.Partial fs1 -> go SPEC st fs1 - FL.Done b -> return $ Yield b (GroupingInit st) - - where - - -- XXX code duplicated from the previous equation - go !_ stt !acc = do - res <- step (adaptState gst) stt - case res of - Yield x s -> do - if cmp prev x - then do - r <- fstep acc x - case r of - FL.Partial fs1 -> go SPEC s fs1 - FL.Done b -> return $ Yield b (GroupingInit s) - else do - r <- final acc - return $ Yield r (GroupingInitWith s x) - Skip s -> go SPEC s acc - Stop -> do - r <- final acc - return $ Yield r GroupingDone - stepOuter _ (GroupingYield _ _) = error "groupsWhile: Unreachable" - stepOuter _ GroupingDone = return Stop - --- | The argument order of the comparison function in `groupsWhile` is --- different than that of `groupsBy`. --- --- In `groupsBy` the comparison function takes the next element as the first --- argument and the previous element as the second argument. In `groupsWhile` --- the first argument is the previous element and second argument is the next --- element. -{-# DEPRECATED groupsBy "Please use groupsWhile instead. Please note the change in the argument order of the comparison function." #-} -{-# INLINE_NORMAL groupsBy #-} -groupsBy :: Monad m - => (a -> a -> Bool) - -> Fold m a b - -> Stream m a - -> Stream m b -groupsBy cmp = groupsWhile (flip cmp) - --- | --- --- Definition: --- --- >>> groupsRollingBy cmp f = Stream.parseMany (Parser.groupByRolling cmp f) --- -{-# INLINE_NORMAL groupsRollingBy #-} -groupsRollingBy :: Monad m - => (a -> a -> Bool) - -> Fold m a b - -> Stream m a - -> Stream m b -{- -groupsRollingBy eq fld = parseMany (PRD.groupByRolling eq fld) --} -groupsRollingBy cmp (Fold fstep initial _ final) (Stream step state) = - Stream stepOuter (GroupingInit state) - - where - - {-# INLINE_LATE stepOuter #-} - stepOuter _ (GroupingInit st) = do - -- XXX Note that if the stream stops without yielding a single element - -- in the group we discard the "initial" effect. - res <- initial - return - $ case res of - FL.Partial fs -> Skip $ GroupingDo st fs - FL.Done fb -> Yield fb $ GroupingInit st - stepOuter gst (GroupingDo st fs) = do - res <- step (adaptState gst) st - case res of - Yield x s -> do - r <- fstep fs x - case r of - FL.Partial fs1 -> go SPEC x s fs1 - FL.Done fb -> return $ Yield fb (GroupingInit s) - Skip s -> return $ Skip $ GroupingDo s fs - Stop -> final fs >> return Stop - - where - - go !_ prev stt !acc = do - res <- step (adaptState gst) stt - case res of - Yield x s -> do - if cmp prev x - then do - r <- fstep acc x - case r of - FL.Partial fs1 -> go SPEC x s fs1 - FL.Done b -> return $ Yield b (GroupingInit s) - else do - r <- final acc - return $ Yield r (GroupingInitWith s x) - Skip s -> go SPEC prev s acc - Stop -> do - r <- final acc - return $ Yield r GroupingDone - stepOuter _ (GroupingInitWith st x) = do - res <- initial - return - $ case res of - FL.Partial s -> Skip $ GroupingDoWith st s x - FL.Done b -> Yield b $ GroupingInitWith st x - stepOuter gst (GroupingDoWith st fs previous) = do - res <- fstep fs previous - case res of - FL.Partial s -> go SPEC previous st s - FL.Done b -> return $ Yield b (GroupingInit st) - - where - - -- XXX GHC: groupsWhile has one less parameter in this go loop and it - -- fuses. However, groupsRollingBy does not fuse, removing the prev - -- parameter makes it fuse. Something needs to be fixed in GHC. The - -- workaround for this is noted in the comments below. - go !_ prev !stt !acc = do - res <- step (adaptState gst) stt - case res of - Yield x s -> do - if cmp prev x - then do - r <- fstep acc x - case r of - FL.Partial fs1 -> go SPEC x s fs1 - FL.Done b -> return $ Yield b (GroupingInit st) - else do - {- - r <- final acc - return $ Yield r (GroupingInitWith s x) - -} - -- The code above does not let groupBy fuse. We use the - -- alternative code below instead. Instead of jumping - -- to GroupingInitWith state, we unroll the code of - -- GroupingInitWith state here to help GHC with stream - -- fusion. - result <- initial - r <- final acc - return - $ Yield r - $ case result of - FL.Partial fsi -> GroupingDoWith s fsi x - FL.Done b -> GroupingYield b (GroupingInit s) - Skip s -> go SPEC prev s acc - Stop -> do - r <- final acc - return $ Yield r GroupingDone - stepOuter _ (GroupingYield r next) = return $ Yield r next - stepOuter _ GroupingDone = return Stop - ------------------------------------------------------------------------------- --- Splitting - by a predicate ------------------------------------------------------------------------------- - -data WordsByState st fs b - = WordsByInit st - | WordsByDo st !fs - | WordsByDone - | WordsByYield !b (WordsByState st fs b) - --- | Split the stream after stripping leading, trailing, and repeated --- separators determined by the predicate supplied. The tokens after splitting --- are collected by the supplied fold. In other words, the tokens are parsed in --- the same way as words are parsed from whitespace separated text. --- --- >>> f x = Stream.toList $ Stream.wordsBy (== '.') Fold.toList $ Stream.fromList x --- >>> f "a.b" --- ["a","b"] --- >>> f "a..b" --- ["a","b"] --- >>> f ".a..b." --- ["a","b"] --- -{-# INLINE_NORMAL wordsBy #-} -wordsBy :: Monad m => (a -> Bool) -> Fold m a b -> Stream m a -> Stream m b -wordsBy predicate (Fold fstep initial _ final) (Stream step state) = - Stream stepOuter (WordsByInit state) - - where - - {-# INLINE_LATE stepOuter #-} - stepOuter _ (WordsByInit st) = do - res <- initial - return - $ case res of - FL.Partial s -> Skip $ WordsByDo st s - FL.Done b -> Yield b (WordsByInit st) - - stepOuter gst (WordsByDo st fs) = do - res <- step (adaptState gst) st - case res of - Yield x s -> do - if predicate x - then do - resi <- initial - return - $ case resi of - FL.Partial fs1 -> Skip $ WordsByDo s fs1 - FL.Done b -> Yield b (WordsByInit s) - else do - r <- fstep fs x - case r of - FL.Partial fs1 -> go SPEC s fs1 - FL.Done b -> return $ Yield b (WordsByInit s) - Skip s -> return $ Skip $ WordsByDo s fs - Stop -> final fs >> return Stop - - where - - go !_ stt !acc = do - res <- step (adaptState gst) stt - case res of - Yield x s -> do - if predicate x - then do - {- - r <- final acc - return $ Yield r (WordsByInit s) - -} - -- The above code does not fuse well. Need to check why - -- GHC is not able to simplify it well. Using the code - -- below, instead of jumping through the WordsByInit - -- state always, we directly go to WordsByDo state in - -- the common case of Partial. - resi <- initial - r <- final acc - return - $ Yield r - $ case resi of - FL.Partial fs1 -> WordsByDo s fs1 - FL.Done b -> WordsByYield b (WordsByInit s) - else do - r <- fstep acc x - case r of - FL.Partial fs1 -> go SPEC s fs1 - FL.Done b -> return $ Yield b (WordsByInit s) - Skip s -> go SPEC s acc - Stop -> do - r <- final acc - return $ Yield r WordsByDone - - stepOuter _ WordsByDone = return Stop - - stepOuter _ (WordsByYield b next) = return $ Yield b next - ------------------------------------------------------------------------------- --- Splitting on a sequence ------------------------------------------------------------------------------- - --- String search algorithms: --- http://www-igm.univ-mlv.fr/~lecroq/string/index.html - --- XXX Can GHC find a way to modularise this? Can we write different cases --- i.e.g single element, word hash, karp-rabin as different functions and then --- be able to combine them into a single state machine? - -{-# ANN type TakeEndBySeqState Fuse #-} -data TakeEndBySeqState mba rb rh ck w s b x = - TakeEndBySeqInit - | TakeEndBySeqYield !b (TakeEndBySeqState mba rb rh ck w s b x) - | TakeEndBySeqDone - - | TakeEndBySeqSingle s x - - | TakeEndBySeqWordInit !Int !w s - | TakeEndBySeqWordLoop !w s - | TakeEndBySeqWordDone !Int !w - - | TakeEndBySeqKRInit s mba - | TakeEndBySeqKRInit1 s mba !Int - | TakeEndBySeqKRLoop s mba !rh !ck - | TakeEndBySeqKRCheck s mba !rh - | TakeEndBySeqKRDone !Int rb - --- | If the pattern is empty the output stream is empty. -{-# INLINE_NORMAL takeEndBySeqWith #-} -takeEndBySeqWith - :: forall m a. (MonadIO m, Unbox a, Enum a, Eq a) - => Bool - -> Array a - -> Stream m a - -> Stream m a -takeEndBySeqWith withSep patArr (Stream step state) = - Stream stepOuter TakeEndBySeqInit - - where - - patLen = A.length patArr - patBytes = A.byteLength patArr - maxIndex = patLen - 1 - maxOffset = patBytes - SIZE_OF(a) - elemBits = SIZE_OF(a) * 8 - - -- For word pattern case - wordMask :: Word - wordMask = (1 `shiftL` (elemBits * patLen)) - 1 - - elemMask :: Word - elemMask = (1 `shiftL` elemBits) - 1 - - wordPat :: Word - wordPat = wordMask .&. A.foldl' addToWord 0 patArr - - addToWord wd a = (wd `shiftL` elemBits) .|. fromIntegral (fromEnum a) - - -- For Rabin-Karp search - k = 2891336453 :: Word32 - coeff = k ^ patLen - - addCksum cksum a = cksum * k + fromIntegral (fromEnum a) - - deltaCksum cksum old new = - addCksum cksum new - coeff * fromIntegral (fromEnum old) - - -- XXX shall we use a random starting hash or 1 instead of 0? - patHash = A.foldl' addCksum 0 patArr - - skip = return . Skip - - {-# INLINE yield #-} - yield x !s = skip $ TakeEndBySeqYield x s - - {-# INLINE_LATE stepOuter #-} - stepOuter _ TakeEndBySeqInit = do - -- XXX When we statically specify the method compiler is able to - -- simplify the code better and removes the handling of other states. - -- When it is determined dynamically, the code is less efficient. For - -- example, the single element search degrades by 80% if the handling - -- of other cases is present. We need to investigate this further but - -- until then we can guide the compiler statically where we can. If we - -- want to use single element search statically then we can use - -- takeEndBy instead. - -- - -- XXX Is there a way for GHC to statically determine patLen when we - -- use an array created from a static string as pattern e.g. "\n". - case () of - _ | patLen == 0 -> return Stop - | patLen == 1 -> do - pat <- liftIO $ A.unsafeGetIndexIO 0 patArr - return $ Skip $ TakeEndBySeqSingle state pat - | SIZE_OF(a) * patLen <= sizeOf (Proxy :: Proxy Word) -> - return $ Skip $ TakeEndBySeqWordInit 0 0 state - | otherwise -> do - (MutArray mba _ _ _) :: MutArray a <- - liftIO $ MutArray.emptyOf patLen - skip $ TakeEndBySeqKRInit state mba - - --------------------- - -- Single yield point - --------------------- - - stepOuter _ (TakeEndBySeqYield x next) = return $ Yield x next - - ----------------- - -- Done - ----------------- - - stepOuter _ TakeEndBySeqDone = return Stop - - ----------------- - -- Single Pattern - ----------------- - - stepOuter gst (TakeEndBySeqSingle st pat) = do - res <- step (adaptState gst) st - case res of - Yield x s -> - if pat /= x - then yield x (TakeEndBySeqSingle s pat) - else do - if withSep - then yield x TakeEndBySeqDone - else return Stop - Skip s -> skip $ TakeEndBySeqSingle s pat - Stop -> return Stop - - --------------------------- - -- Short Pattern - Shift Or - --------------------------- - - -- Note: Karp-Rabin is roughly 15% slower than word hash for a 2 element - -- pattern. This may be useful for common cases like splitting lines using - -- "\r\n". - stepOuter _ (TakeEndBySeqWordDone 0 _) = do - return Stop - stepOuter _ (TakeEndBySeqWordDone n wrd) = do - let old = elemMask .&. (wrd `shiftR` (elemBits * (n - 1))) - in yield - (toEnum $ fromIntegral old) - (TakeEndBySeqWordDone (n - 1) wrd) - - -- XXX If we remove this init state for perf experiment the time taken - -- reduces to half, there may be some optimization opportunity here. - stepOuter gst (TakeEndBySeqWordInit idx wrd st) = do - res <- step (adaptState gst) st - case res of - Yield x s -> do - let wrd1 = addToWord wrd x - next - | idx /= maxIndex = - TakeEndBySeqWordInit (idx + 1) wrd1 s - | wrd1 .&. wordMask /= wordPat = - TakeEndBySeqWordLoop wrd1 s - | otherwise = TakeEndBySeqDone - if withSep - then yield x next - else skip next - Skip s -> skip $ TakeEndBySeqWordInit idx wrd s - Stop -> - if withSep - then return Stop - else skip $ TakeEndBySeqWordDone idx wrd - - stepOuter gst (TakeEndBySeqWordLoop wrd st) = do - res <- step (adaptState gst) st - case res of - Yield x s -> do - -- XXX Never use a lazy expression as state, that causes issues - -- in simplification because the state argument of Yield is - -- lazy, maybe we can make that strict. - let wrd1 = addToWord wrd x - old = (wordMask .&. wrd) - `shiftR` (elemBits * (patLen - 1)) - !y = - if withSep - then x - else toEnum $ fromIntegral old - -- Note: changing the nesting order of if and yield makes a - -- difference in performance. - if wrd1 .&. wordMask /= wordPat - then yield y (TakeEndBySeqWordLoop wrd1 s) - else yield y TakeEndBySeqDone - Skip s -> skip $ TakeEndBySeqWordLoop wrd s - Stop -> - if withSep - then return Stop - else skip $ TakeEndBySeqWordDone patLen wrd - - ------------------------------- - -- General Pattern - Karp Rabin - ------------------------------- - - stepOuter gst (TakeEndBySeqKRInit st0 mba) = do - res <- step (adaptState gst) st0 - case res of - Yield x s -> do - liftIO $ pokeAt 0 mba x - if withSep - then yield x (TakeEndBySeqKRInit1 s mba (SIZE_OF(a))) - else skip $ TakeEndBySeqKRInit1 s mba (SIZE_OF(a)) - Skip s -> skip $ TakeEndBySeqKRInit s mba - Stop -> return Stop - - stepOuter gst (TakeEndBySeqKRInit1 st mba offset) = do - res <- step (adaptState gst) st - let arr :: Array a = Array - { arrContents = mba - , arrStart = 0 - , arrEnd = patBytes - } - case res of - Yield x s -> do - liftIO $ pokeAt offset mba x - let next = - if offset /= maxOffset - then TakeEndBySeqKRInit1 s mba (offset + SIZE_OF(a)) - else - let ringHash = A.foldl' addCksum 0 arr - in if ringHash == patHash - then TakeEndBySeqKRCheck s mba 0 - else TakeEndBySeqKRLoop s mba 0 ringHash - if withSep - then yield x next - else skip next - Skip s -> skip $ TakeEndBySeqKRInit1 s mba offset - Stop -> do - if withSep - then return Stop - else do - let rb = RingArray - { ringContents = mba - , ringSize = offset - , ringHead = 0 - } - in skip $ TakeEndBySeqKRDone offset rb - - stepOuter gst (TakeEndBySeqKRLoop st mba rh cksum) = do - res <- step (adaptState gst) st - let rb = RingArray - { ringContents = mba - , ringSize = patBytes - , ringHead = rh - } - case res of - Yield x s -> do - (rb1, old) <- liftIO (RB.replace rb x) - let cksum1 = deltaCksum cksum old x - let rh1 = ringHead rb1 - next = - if cksum1 /= patHash - then TakeEndBySeqKRLoop s mba rh1 cksum1 - else TakeEndBySeqKRCheck s mba rh1 - if withSep - then yield x next - else yield old next - Skip s -> skip $ TakeEndBySeqKRLoop s mba rh cksum - Stop -> do - if withSep - then return Stop - else skip $ TakeEndBySeqKRDone patBytes rb - - stepOuter _ (TakeEndBySeqKRCheck st mba rh) = do - let rb = RingArray - { ringContents = mba - , ringSize = patBytes - , ringHead = rh - } - matches <- liftIO $ RB.eqArray rb patArr - if matches - then return Stop - else skip $ TakeEndBySeqKRLoop st mba rh patHash - - stepOuter _ (TakeEndBySeqKRDone 0 _) = return Stop - stepOuter _ (TakeEndBySeqKRDone len rb) = do - assert (len >= 0) (return ()) - old <- RB.unsafeGetHead rb - let rb1 = RB.moveForward rb - yield old $ TakeEndBySeqKRDone (len - SIZE_OF(a)) rb1 - --- | Take the stream until the supplied sequence is encountered. Take the --- sequence as well and stop. --- --- Usage: --- --- >>> f pat xs = Stream.toList $ Stream.takeEndBySeq (Array.fromList pat) $ Stream.fromList xs --- >>> f "fgh" "abcdefghijk" --- "abcdefgh" --- >>> f "lmn" "abcdefghijk" --- "abcdefghijk" --- >>> f "" "abcdefghijk" --- "" --- -{-# INLINE takeEndBySeq #-} -takeEndBySeq - :: forall m a. (MonadIO m, Unbox a, Enum a, Eq a) - => Array a - -> Stream m a - -> Stream m a -takeEndBySeq = takeEndBySeqWith True - --- | Take the stream until the supplied sequence is encountered. Do not take --- the sequence. --- --- Usage: --- --- >>> f pat xs = Stream.toList $ Stream.takeEndBySeq_ (Array.fromList pat) $ Stream.fromList xs --- >>> f "fgh" "abcdefghijk" --- "abcde" --- >>> f "lmn" "abcdefghijk" --- "abcdefghijk" --- >>> f "" "abcdefghijk" --- "" --- -{-# INLINE takeEndBySeq_ #-} -takeEndBySeq_ - :: forall m a. (MonadIO m, Unbox a, Enum a, Eq a) - => Array a - -> Stream m a - -> Stream m a -takeEndBySeq_ = takeEndBySeqWith False - -{- --- TODO can we unify the splitting operations using a splitting configuration --- like in the split package. --- -data SplitStyle = Infix | Suffix | Prefix deriving (Eq, Show) -data SplitOptions = SplitOptions - { style :: SplitStyle - , withSep :: Bool -- ^ keep the separators in output - -- , compact :: Bool -- ^ treat multiple consecutive separators as one - -- , trimHead :: Bool -- ^ drop blank at head - -- , trimTail :: Bool -- ^ drop blank at tail - } --} - --- XXX using "fs" as the last arg in Constructors may simplify the code a bit, --- because we can use the constructor directly without having to create "jump" --- functions. -{-# ANN type SplitOnSeqState Fuse #-} -data SplitOnSeqState mba rb rh ck w fs s b x = - SplitOnSeqInit - | SplitOnSeqYield b (SplitOnSeqState mba rb rh ck w fs s b x) - | SplitOnSeqDone - - | SplitOnSeqEmpty !fs s - - | SplitOnSeqSingle0 !fs s x - | SplitOnSeqSingle !fs s x - - | SplitOnSeqWordInit0 !fs s - | SplitOnSeqWordInit Int Word !fs s - | SplitOnSeqWordLoop !w s !fs - | SplitOnSeqWordDone Int !fs !w - - | SplitOnSeqKRInit0 Int !fs s mba - | SplitOnSeqKRInit Int !fs s mba - | SplitOnSeqKRLoop fs s mba !rh !ck - | SplitOnSeqKRCheck fs s mba !rh - | SplitOnSeqKRDone Int !fs rb - - | SplitOnSeqReinit (fs -> SplitOnSeqState mba rb rh ck w fs s b x) - --- XXX Need to fix empty stream split behavior - --- | Like 'splitSepBy_' but splits the stream on a sequence of elements rather than --- a single element. Parses a sequence of tokens separated by an infixed --- separator e.g. @a;b;c@ is parsed as @a@, @b@, @c@. If the pattern is empty --- then each element is a match, thus the fold is finalized on each element. --- --- >>> splitSepBy p xs = Stream.fold Fold.toList $ Stream.splitSepBySeq_ (Array.fromList p) Fold.toList (Stream.fromList xs) --- --- >>> splitSepBy "" "" --- [] --- --- >>> splitSepBy "" "a...b" --- ["a",".",".",".","b"] --- --- >>> splitSepBy ".." "" --- [] --- --- >>> splitSepBy ".." "a...b" --- ["a",".b"] --- --- >>> splitSepBy ".." "abc" --- ["abc"] --- --- >>> splitSepBy ".." ".." --- ["",""] --- --- >>> splitSepBy "." ".a" --- ["","a"] --- --- >>> splitSepBy "." "a." --- ["a",""] --- --- Uses Rabin-Karp algorithm for substring search. --- -{-# INLINE_NORMAL splitSepBySeq_ #-} -splitSepBySeq_, splitOnSeq - :: forall m a b. (MonadIO m, Unbox a, Enum a, Eq a) - => Array a - -> Fold m a b - -> Stream m a - -> Stream m b -splitSepBySeq_ patArr (Fold fstep initial _ final) (Stream step state) = - Stream stepOuter SplitOnSeqInit - - where - - patLen = A.length patArr - patBytes = A.byteLength patArr - maxIndex = patLen - 1 - maxOffset = patBytes - SIZE_OF(a) - elemBits = SIZE_OF(a) * 8 - - -- For word pattern case - wordMask :: Word - wordMask = (1 `shiftL` (elemBits * patLen)) - 1 - - elemMask :: Word - elemMask = (1 `shiftL` elemBits) - 1 - - wordPat :: Word - wordPat = wordMask .&. A.foldl' addToWord 0 patArr - - addToWord wd a = (wd `shiftL` elemBits) .|. fromIntegral (fromEnum a) - - -- For Rabin-Karp search - k = 2891336453 :: Word32 - coeff = k ^ patLen - - addCksum cksum a = cksum * k + fromIntegral (fromEnum a) - - deltaCksum cksum old new = - addCksum cksum new - coeff * fromIntegral (fromEnum old) - - -- XXX shall we use a random starting hash or 1 instead of 0? - patHash = A.foldl' addCksum 0 patArr - - skip = return . Skip - - nextAfterInit nextGen stepRes = - case stepRes of - FL.Partial s -> nextGen s - FL.Done b -> SplitOnSeqYield b (SplitOnSeqReinit nextGen) - - {-# INLINE yieldReinit #-} - yieldReinit nextGen fs = - initial >>= skip . SplitOnSeqYield fs . nextAfterInit nextGen - - {-# INLINE_LATE stepOuter #-} - stepOuter _ SplitOnSeqInit = do - res <- initial - case res of - FL.Partial acc - | patLen == 0 -> - return $ Skip $ SplitOnSeqEmpty acc state - | patLen == 1 -> do - pat <- liftIO $ A.unsafeGetIndexIO 0 patArr - return $ Skip $ SplitOnSeqSingle0 acc state pat - | SIZE_OF(a) * patLen <= sizeOf (Proxy :: Proxy Word) -> - return $ Skip $ SplitOnSeqWordInit0 acc state - | otherwise -> do - (MutArray mba _ _ _) :: MutArray a <- - liftIO $ MutArray.emptyOf patLen - skip $ SplitOnSeqKRInit0 0 acc state mba - FL.Done b -> skip $ SplitOnSeqYield b SplitOnSeqInit - - stepOuter _ (SplitOnSeqYield x next) = return $ Yield x next - - --------------------------- - -- Checkpoint - --------------------------- - - stepOuter _ (SplitOnSeqReinit nextGen) = - initial >>= skip . nextAfterInit nextGen - - --------------------------- - -- Empty pattern - --------------------------- - - stepOuter gst (SplitOnSeqEmpty acc st) = do - res <- step (adaptState gst) st - case res of - Yield x s -> do - r <- fstep acc x - b1 <- - case r of - FL.Partial acc1 -> final acc1 - FL.Done b -> return b - let jump c = SplitOnSeqEmpty c s - in yieldReinit jump b1 - Skip s -> skip (SplitOnSeqEmpty acc s) - Stop -> final acc >> return Stop - - ----------------- - -- Done - ----------------- - - stepOuter _ SplitOnSeqDone = return Stop - - ----------------- - -- Single Pattern - ----------------- - - stepOuter gst (SplitOnSeqSingle0 fs st pat) = do - res <- step (adaptState gst) st - case res of - Yield x s -> do - -- XXX This code block is duplicated in SplitOnSeqSingle state - let jump c = SplitOnSeqSingle c s pat - if pat == x - then final fs >>= yieldReinit jump - else do - r <- fstep fs x - case r of - FL.Partial fs1 -> - pure $ Skip $ SplitOnSeqSingle fs1 s pat - FL.Done b -> yieldReinit jump b - Skip s -> pure $ Skip $ SplitOnSeqSingle0 fs s pat - Stop -> final fs >> pure Stop - - stepOuter gst (SplitOnSeqSingle fs0 st0 pat) = do - go SPEC fs0 st0 - - where - - -- The local loop increases allocations by 6% but improves CPU - -- performance by 14%. - go !_ !fs !st = do - res <- step (adaptState gst) st - case res of - Yield x s -> do - let jump c = SplitOnSeqSingle c s pat - if pat == x - then final fs >>= yieldReinit jump - else do - r <- fstep fs x - case r of - FL.Partial fs1 -> go SPEC fs1 s - FL.Done b -> yieldReinit jump b - Skip s -> go SPEC fs s - Stop -> do - r <- final fs - return $ Skip $ SplitOnSeqYield r SplitOnSeqDone - - --------------------------- - -- Short Pattern - Shift Or - --------------------------- - - -- Note: We fill the matching buffer before we emit anything, in case it - -- matches and we have to drop it. Though we could be more eager in - -- emitting as soon as we know that the pattern cannot match. But still the - -- worst case will remain the same, in case a match is going to happen we - -- will have to delay until the very end. - - stepOuter _ (SplitOnSeqWordDone 0 fs _) = do - r <- final fs - skip $ SplitOnSeqYield r SplitOnSeqDone - stepOuter _ (SplitOnSeqWordDone n fs wrd) = do - let old = elemMask .&. (wrd `shiftR` (elemBits * (n - 1))) - r <- fstep fs (toEnum $ fromIntegral old) - case r of - FL.Partial fs1 -> skip $ SplitOnSeqWordDone (n - 1) fs1 wrd - FL.Done b -> do - let jump c = SplitOnSeqWordDone (n - 1) c wrd - yieldReinit jump b - - stepOuter gst (SplitOnSeqWordInit0 fs st) = do - res <- step (adaptState gst) st - case res of - Yield x s -> - let wrd1 = addToWord 0 x - in pure $ Skip $ SplitOnSeqWordInit 1 wrd1 fs s - Skip s -> pure $ Skip $ SplitOnSeqWordInit0 fs s - Stop -> final fs >> pure Stop - - stepOuter gst (SplitOnSeqWordInit idx0 wrd0 fs st0) = - go SPEC idx0 wrd0 st0 - - where - - {-# INLINE go #-} - go !_ !idx !wrd !st = do - res <- step (adaptState gst) st - case res of - Yield x s -> do - let wrd1 = addToWord wrd x - if idx == maxIndex - then do - if wrd1 .&. wordMask == wordPat - then do - let jump c = SplitOnSeqWordInit 0 0 c s - final fs >>= yieldReinit jump - else skip $ SplitOnSeqWordLoop wrd1 s fs - else go SPEC (idx + 1) wrd1 s - Skip s -> go SPEC idx wrd s - Stop -> do - if idx /= 0 - then skip $ SplitOnSeqWordDone idx fs wrd - else do - r <- final fs - skip $ SplitOnSeqYield r SplitOnSeqDone - - stepOuter gst (SplitOnSeqWordLoop wrd0 st0 fs0) = - go SPEC wrd0 st0 fs0 - - where - - -- This loop does not affect allocations but it improves the CPU - -- performance signifcantly compared to looping using state. - {-# INLINE go #-} - go !_ !wrd !st !fs = do - res <- step (adaptState gst) st - case res of - Yield x s -> do - let jump c = SplitOnSeqWordInit 0 0 c s - wrd1 = addToWord wrd x - old = (wordMask .&. wrd) - `shiftR` (elemBits * (patLen - 1)) - r <- fstep fs (toEnum $ fromIntegral old) - case r of - FL.Partial fs1 -> do - if wrd1 .&. wordMask == wordPat - then final fs1 >>= yieldReinit jump - else go SPEC wrd1 s fs1 - FL.Done b -> yieldReinit jump b - Skip s -> go SPEC wrd s fs - Stop -> skip $ SplitOnSeqWordDone patLen fs wrd - - ------------------------------- - -- General Pattern - Karp Rabin - ------------------------------- - - -- XXX Document this pattern for writing efficient code. Loop around only - -- required elements in the recursive loop, build the structures being - -- manipulated locally e.g. we are passing only mba, here and build an - -- array using patLen and arrStart from the surrounding context. - - stepOuter gst (SplitOnSeqKRInit0 offset fs st mba) = do - res <- step (adaptState gst) st - case res of - Yield x s -> do - liftIO $ pokeAt offset mba x - skip $ SplitOnSeqKRInit (offset + SIZE_OF(a)) fs s mba - Skip s -> skip $ SplitOnSeqKRInit0 offset fs s mba - Stop -> final fs >> pure Stop - - stepOuter gst (SplitOnSeqKRInit offset fs st mba) = do - res <- step (adaptState gst) st - case res of - Yield x s -> do - liftIO $ pokeAt offset mba x - if offset == maxOffset - then do - let arr :: Array a = Array - { arrContents = mba - , arrStart = 0 - , arrEnd = patBytes - } - let ringHash = A.foldl' addCksum 0 arr - if ringHash == patHash && A.byteEq arr patArr - then skip $ SplitOnSeqKRCheck fs s mba 0 - else skip $ SplitOnSeqKRLoop fs s mba 0 ringHash - else skip $ SplitOnSeqKRInit (offset + SIZE_OF(a)) fs s mba - Skip s -> skip $ SplitOnSeqKRInit offset fs s mba - Stop -> do - let rb = RingArray - { ringContents = mba - , ringSize = offset - , ringHead = 0 - } - skip $ SplitOnSeqKRDone offset fs rb - - -- XXX The recursive "go" is more efficient than the state based recursion - -- code commented out below. Perhaps its more efficient because of - -- factoring out "mba" outside the loop. - -- - stepOuter gst (SplitOnSeqKRLoop fs0 st0 mba rh0 cksum0) = - go SPEC fs0 st0 rh0 cksum0 - - where - - go !_ !fs !st !rh !cksum = do - res <- step (adaptState gst) st - let rb = RingArray - { ringContents = mba - , ringSize = patBytes - , ringHead = rh - } - case res of - Yield x s -> do - (rb1, old) <- liftIO (RB.replace rb x) - r <- fstep fs old - case r of - FL.Partial fs1 -> do - let cksum1 = deltaCksum cksum old x - let rh1 = ringHead rb1 - if cksum1 == patHash - then skip $ SplitOnSeqKRCheck fs1 s mba rh1 - else go SPEC fs1 s rh1 cksum1 - FL.Done b -> do - -- XXX the old code looks wrong as we are resetting - -- the ring head but the ring still has old - -- elements as we are not resetting the size. - let jump c = SplitOnSeqKRInit 0 c s mba - yieldReinit jump b - Skip s -> go SPEC fs s rh cksum - Stop -> skip $ SplitOnSeqKRDone patBytes fs rb - - -- XXX The following code is 5 times slower compared to the recursive loop - -- based code above. Need to investigate why. One possibility is that the - -- go loop above does not thread around the ring buffer (rb). This code may - -- be causing the state to bloat and getting allocated on each iteration. - -- We can check the cmm/asm code to confirm. If so a good GHC solution to - -- such problem is needed. One way to avoid this could be to use unboxed - -- mutable state? - {- - stepOuter gst (SplitOnSeqKRLoop fs st rb rh cksum) = do - res <- step (adaptState gst) st - case res of - Yield x s -> do - old <- liftIO $ peek rh - let cksum1 = deltaCksum cksum old x - fs1 <- fstep fs old - if (cksum1 == patHash) - then do - r <- done fs1 - skip $ SplitOnSeqYield r $ SplitOnSeqKRInit 0 s rb rh - else do - rh1 <- liftIO (RB.unsafeInsert rb rh x) - skip $ SplitOnSeqKRLoop fs1 s rb rh1 cksum1 - Skip s -> skip $ SplitOnSeqKRLoop fs s rb rh cksum - Stop -> skip $ SplitOnSeqKRDone patLen fs rb rh - -} - - stepOuter _ (SplitOnSeqKRCheck fs st mba rh) = do - let rb = RingArray - { ringContents = mba - , ringSize = patBytes - , ringHead = rh - } - res <- liftIO $ RB.eqArray rb patArr - if res - then do - r <- final fs - let jump c = SplitOnSeqKRInit 0 c st mba - yieldReinit jump r - else skip $ SplitOnSeqKRLoop fs st mba rh patHash - - stepOuter _ (SplitOnSeqKRDone 0 fs _) = do - r <- final fs - skip $ SplitOnSeqYield r SplitOnSeqDone - stepOuter _ (SplitOnSeqKRDone len fs rb) = do - assert (len >= 0) (return ()) - old <- RB.unsafeGetHead rb - let rb1 = RB.moveForward rb - r <- fstep fs old - case r of - FL.Partial fs1 -> skip $ SplitOnSeqKRDone (len - SIZE_OF(a)) fs1 rb1 - FL.Done b -> do - let jump c = SplitOnSeqKRDone (len - SIZE_OF(a)) c rb1 - yieldReinit jump b - -RENAME(splitOnSeq,splitSepBySeq_) - -{-# ANN type SplitOnSuffixSeqState Fuse #-} -data SplitOnSuffixSeqState mba rb rh ck w fs s b x = - SplitOnSuffixSeqInit - | SplitOnSuffixSeqYield b (SplitOnSuffixSeqState mba rb rh ck w fs s b x) - | SplitOnSuffixSeqDone - - | SplitOnSuffixSeqEmpty !fs s - - | SplitOnSuffixSeqSingleInit !fs s x - | SplitOnSuffixSeqSingle !fs s x - - | SplitOnSuffixSeqWordInit !fs s - | SplitOnSuffixSeqWordLoop !w s !fs - | SplitOnSuffixSeqWordDone Int !fs !w - - | SplitOnSuffixSeqKRInit !fs s mba - | SplitOnSuffixSeqKRInit1 !fs s mba - | SplitOnSuffixSeqKRLoop fs s mba !rh !ck - | SplitOnSuffixSeqKRCheck fs s mba !rh - | SplitOnSuffixSeqKRDone Int !fs rb - - | SplitOnSuffixSeqReinit - (fs -> SplitOnSuffixSeqState mba rb rh ck w fs s b x) - --- | @splitOnSuffixSeq withSep pat fld input@ splits the input using @pat@ as a --- suffixed separator, the resulting split segments are fed to the fold @fld@. --- If @withSep@ is True then the separator sequence is also suffixed with the --- split segments. --- --- /Internal/ -{-# INLINE_NORMAL splitOnSuffixSeq #-} -splitOnSuffixSeq - :: forall m a b. (MonadIO m, Unbox a, Enum a, Eq a) - => Bool - -> Array a - -> Fold m a b - -> Stream m a - -> Stream m b -splitOnSuffixSeq withSep patArr (Fold fstep initial _ final) (Stream step state) = - Stream stepOuter SplitOnSuffixSeqInit - - where - - patLen = A.length patArr - patBytes = A.byteLength patArr - maxIndex = patLen - 1 - maxOffset = patBytes - SIZE_OF(a) - elemBits = SIZE_OF(a) * 8 - - -- For word pattern case - wordMask :: Word - wordMask = (1 `shiftL` (elemBits * patLen)) - 1 - - elemMask :: Word - elemMask = (1 `shiftL` elemBits) - 1 - - wordPat :: Word - wordPat = wordMask .&. A.foldl' addToWord 0 patArr - - addToWord wd a = (wd `shiftL` elemBits) .|. fromIntegral (fromEnum a) - - nextAfterInit nextGen stepRes = - case stepRes of - FL.Partial s -> nextGen s - FL.Done b -> - SplitOnSuffixSeqYield b (SplitOnSuffixSeqReinit nextGen) - - {-# INLINE yieldReinit #-} - yieldReinit nextGen fs = - initial >>= skip . SplitOnSuffixSeqYield fs . nextAfterInit nextGen - - -- For single element pattern case - {-# INLINE processYieldSingle #-} - processYieldSingle pat x s fs = do - let jump c = SplitOnSuffixSeqSingleInit c s pat - if pat == x - then do - r <- if withSep then fstep fs x else return $ FL.Partial fs - b1 <- - case r of - FL.Partial fs1 -> final fs1 - FL.Done b -> return b - yieldReinit jump b1 - else do - r <- fstep fs x - case r of - FL.Partial fs1 -> skip $ SplitOnSuffixSeqSingle fs1 s pat - FL.Done b -> yieldReinit jump b - - -- For Rabin-Karp search - k = 2891336453 :: Word32 - coeff = k ^ patLen - - addCksum cksum a = cksum * k + fromIntegral (fromEnum a) - - deltaCksum cksum old new = - addCksum cksum new - coeff * fromIntegral (fromEnum old) - - -- XXX shall we use a random starting hash or 1 instead of 0? - patHash = A.foldl' addCksum 0 patArr - - skip = return . Skip - - {-# INLINE_LATE stepOuter #-} - stepOuter _ SplitOnSuffixSeqInit = do - res <- initial - case res of - FL.Partial fs - | patLen == 0 -> - skip $ SplitOnSuffixSeqEmpty fs state - | patLen == 1 -> do - pat <- liftIO $ A.unsafeGetIndexIO 0 patArr - skip $ SplitOnSuffixSeqSingleInit fs state pat - | SIZE_OF(a) * patLen <= sizeOf (Proxy :: Proxy Word) -> - skip $ SplitOnSuffixSeqWordInit fs state - | otherwise -> do - (MutArray mba _ _ _) :: MutArray a <- - liftIO $ MutArray.emptyOf patLen - skip $ SplitOnSuffixSeqKRInit fs state mba - FL.Done fb -> skip $ SplitOnSuffixSeqYield fb SplitOnSuffixSeqInit - - stepOuter _ (SplitOnSuffixSeqYield x next) = return $ Yield x next - - --------------------------- - -- Reinit - --------------------------- - - stepOuter _ (SplitOnSuffixSeqReinit nextGen) = - initial >>= skip . nextAfterInit nextGen - - --------------------------- - -- Empty pattern - --------------------------- - - stepOuter gst (SplitOnSuffixSeqEmpty acc st) = do - res <- step (adaptState gst) st - case res of - Yield x s -> do - let jump c = SplitOnSuffixSeqEmpty c s - r <- fstep acc x - b1 <- - case r of - FL.Partial fs -> final fs - FL.Done b -> return b - yieldReinit jump b1 - Skip s -> skip (SplitOnSuffixSeqEmpty acc s) - Stop -> final acc >> return Stop - - ----------------- - -- Done - ----------------- - - stepOuter _ SplitOnSuffixSeqDone = return Stop - - ----------------- - -- Single Pattern - ----------------- - - stepOuter gst (SplitOnSuffixSeqSingleInit fs st pat) = do - res <- step (adaptState gst) st - case res of - Yield x s -> processYieldSingle pat x s fs - Skip s -> skip $ SplitOnSuffixSeqSingleInit fs s pat - Stop -> final fs >> return Stop - - stepOuter gst (SplitOnSuffixSeqSingle fs st pat) = do - res <- step (adaptState gst) st - case res of - Yield x s -> processYieldSingle pat x s fs - Skip s -> skip $ SplitOnSuffixSeqSingle fs s pat - Stop -> do - r <- final fs - skip $ SplitOnSuffixSeqYield r SplitOnSuffixSeqDone - - --------------------------- - -- Short Pattern - Shift Or - --------------------------- - - stepOuter _ (SplitOnSuffixSeqWordDone 0 fs _) = do - r <- final fs - skip $ SplitOnSuffixSeqYield r SplitOnSuffixSeqDone - stepOuter _ (SplitOnSuffixSeqWordDone n fs wrd) = do - let old = elemMask .&. (wrd `shiftR` (elemBits * (n - 1))) - r <- fstep fs (toEnum $ fromIntegral old) - case r of - FL.Partial fs1 -> skip $ SplitOnSuffixSeqWordDone (n - 1) fs1 wrd - FL.Done b -> do - let jump c = SplitOnSuffixSeqWordDone (n - 1) c wrd - yieldReinit jump b - - stepOuter gst (SplitOnSuffixSeqWordInit fs0 st0) = do - res <- step (adaptState gst) st0 - case res of - Yield x s -> do - let wrd = addToWord 0 x - r <- if withSep then fstep fs0 x else return $ FL.Partial fs0 - case r of - FL.Partial fs1 -> go SPEC 1 wrd s fs1 - FL.Done b -> do - let jump c = SplitOnSuffixSeqWordInit c s - yieldReinit jump b - Skip s -> skip (SplitOnSuffixSeqWordInit fs0 s) - Stop -> final fs0 >> return Stop - - where - - {-# INLINE go #-} - go !_ !idx !wrd !st !fs = do - res <- step (adaptState gst) st - case res of - Yield x s -> do - let jump c = SplitOnSuffixSeqWordInit c s - let wrd1 = addToWord wrd x - r <- if withSep then fstep fs x else return $ FL.Partial fs - case r of - FL.Partial fs1 - | idx /= maxIndex -> - go SPEC (idx + 1) wrd1 s fs1 - | wrd1 .&. wordMask /= wordPat -> - skip $ SplitOnSuffixSeqWordLoop wrd1 s fs1 - | otherwise -> - final fs1 >>= yieldReinit jump - FL.Done b -> yieldReinit jump b - Skip s -> go SPEC idx wrd s fs - Stop -> - if withSep - then do - r <- final fs - skip $ SplitOnSuffixSeqYield r SplitOnSuffixSeqDone - else skip $ SplitOnSuffixSeqWordDone idx fs wrd - - stepOuter gst (SplitOnSuffixSeqWordLoop wrd0 st0 fs0) = - go SPEC wrd0 st0 fs0 - - where - - {-# INLINE go #-} - go !_ !wrd !st !fs = do - res <- step (adaptState gst) st - case res of - Yield x s -> do - let jump c = SplitOnSuffixSeqWordInit c s - wrd1 = addToWord wrd x - old = (wordMask .&. wrd) - `shiftR` (elemBits * (patLen - 1)) - r <- - if withSep - then fstep fs x - else fstep fs (toEnum $ fromIntegral old) - case r of - FL.Partial fs1 -> - if wrd1 .&. wordMask == wordPat - then final fs1 >>= yieldReinit jump - else go SPEC wrd1 s fs1 - FL.Done b -> yieldReinit jump b - Skip s -> go SPEC wrd s fs - Stop -> - if withSep - then do - r <- final fs - skip $ SplitOnSuffixSeqYield r SplitOnSuffixSeqDone - else skip $ SplitOnSuffixSeqWordDone patLen fs wrd - - ------------------------------- - -- General Pattern - Karp Rabin - ------------------------------- - - stepOuter gst (SplitOnSuffixSeqKRInit fs st0 mba) = do - res <- step (adaptState gst) st0 - case res of - Yield x s -> do - liftIO $ pokeAt 0 mba x - r <- if withSep then fstep fs x else return $ FL.Partial fs - case r of - FL.Partial fs1 -> - skip $ SplitOnSuffixSeqKRInit1 fs1 s mba - FL.Done b -> do - let jump c = SplitOnSuffixSeqKRInit c s mba - yieldReinit jump b - Skip s -> skip $ SplitOnSuffixSeqKRInit fs s mba - Stop -> final fs >> return Stop - - stepOuter gst (SplitOnSuffixSeqKRInit1 fs0 st0 mba) = do - go SPEC (SIZE_OF(a)) st0 fs0 - - where - - go !_ !offset st !fs = do - res <- step (adaptState gst) st - let arr :: Array a = Array - { arrContents = mba - , arrStart = 0 - , arrEnd = patBytes - } - case res of - Yield x s -> do - liftIO $ pokeAt offset mba x - r <- if withSep then fstep fs x else return $ FL.Partial fs - let ringHash = A.foldl' addCksum 0 arr - case r of - FL.Partial fs1 - | offset /= maxOffset -> - go SPEC (offset + SIZE_OF(a)) s fs1 - | ringHash == patHash -> - skip $ SplitOnSuffixSeqKRCheck fs1 s mba 0 - | otherwise -> - skip $ SplitOnSuffixSeqKRLoop - fs1 s mba 0 ringHash - FL.Done b -> do - let jump c = SplitOnSuffixSeqKRInit c s mba - yieldReinit jump b - Skip s -> go SPEC offset s fs - Stop -> do - -- do not issue a blank segment when we end at pattern - if offset == maxOffset && A.byteEq arr patArr - then final fs >> return Stop - else if withSep - then do - r <- final fs - skip $ SplitOnSuffixSeqYield r SplitOnSuffixSeqDone - else do - let rb = RingArray - { ringContents = mba - , ringSize = offset - , ringHead = 0 - } - in skip $ SplitOnSuffixSeqKRDone offset fs rb - - stepOuter gst (SplitOnSuffixSeqKRLoop fs0 st0 mba rh0 cksum0) = - go SPEC fs0 st0 rh0 cksum0 - - where - - go !_ !fs !st !rh !cksum = do - res <- step (adaptState gst) st - let rb = RingArray - { ringContents = mba - , ringSize = patBytes - , ringHead = rh - } - case res of - Yield x s -> do - (rb1, old) <- liftIO (RB.replace rb x) - let cksum1 = deltaCksum cksum old x - let rh1 = ringHead rb1 - r <- if withSep then fstep fs x else fstep fs old - case r of - FL.Partial fs1 -> - if cksum1 /= patHash - then go SPEC fs1 s rh1 cksum1 - else skip $ SplitOnSuffixSeqKRCheck fs1 s mba rh1 - FL.Done b -> do - let jump c = SplitOnSuffixSeqKRInit c s mba - yieldReinit jump b - Skip s -> go SPEC fs s rh cksum - Stop -> do - if withSep - then do - r <- final fs - skip $ SplitOnSuffixSeqYield r SplitOnSuffixSeqDone - else skip $ SplitOnSuffixSeqKRDone patBytes fs rb - - stepOuter _ (SplitOnSuffixSeqKRCheck fs st mba rh) = do - let rb = RingArray - { ringContents = mba - , ringSize = patBytes - , ringHead = rh - } - matches <- liftIO $ RB.eqArray rb patArr - if matches - then do - r <- final fs - let jump c = SplitOnSuffixSeqKRInit c st mba - yieldReinit jump r - else skip $ SplitOnSuffixSeqKRLoop fs st mba rh patHash - - stepOuter _ (SplitOnSuffixSeqKRDone 0 fs _) = do - r <- final fs - skip $ SplitOnSuffixSeqYield r SplitOnSuffixSeqDone - stepOuter _ (SplitOnSuffixSeqKRDone len fs rb) = do - assert (len >= 0) (return ()) - old <- RB.unsafeGetHead rb - let rb1 = RB.moveForward rb - r <- fstep fs old - case r of - FL.Partial fs1 -> - skip $ SplitOnSuffixSeqKRDone (len - SIZE_OF(a)) fs1 rb1 - FL.Done b -> do - let jump c = SplitOnSuffixSeqKRDone (len - SIZE_OF(a)) c rb1 - yieldReinit jump b - --- | Parses a sequence of tokens suffixed by a separator e.g. @a;b;c;@ is --- parsed as @a;@, @b;@, @c;@. If the pattern is empty the input stream is --- returned as it is. --- --- Equivalent to the following: --- --- >>> splitEndBySeq pat f = Stream.foldMany (Fold.takeEndBySeq pat f) --- --- Usage: --- --- >>> f p = Stream.splitEndBySeq (Array.fromList p) Fold.toList --- >>> splitEndBy p xs = Stream.fold Fold.toList $ f p (Stream.fromList xs) --- --- >>> splitEndBy "" "" --- [] --- --- >>> splitEndBy "" "a...b" --- ["a",".",".",".","b"] --- --- >>> splitEndBy ".." "" --- [] --- --- --- >>> splitEndBy ".." "a...b" --- ["a..",".b"] --- --- --- >>> splitEndBy ".." "abc" --- ["abc"] --- --- >>> splitEndBy ".." ".." --- [".."] --- --- >>> splitEndBy "." ".a" --- [".","a"] --- --- >>> splitEndBy "." "a." --- ["a."] --- --- Uses Rabin-Karp algorithm for substring search. --- -{-# INLINE_NORMAL splitEndBySeq #-} -splitEndBySeq - :: forall m a b. (MonadIO m, Unbox a, Enum a, Eq a) - => Array a - -> Fold m a b - -> Stream m a - -> Stream m b -splitEndBySeq = splitOnSuffixSeq True - --- | Like 'splitEndBySeq' but drops the separators and returns only the tokens. --- --- Equivalent to the following: --- --- >>> splitEndBySeq_ pat f = Stream.foldMany (Fold.takeEndBySeq_ pat f) --- --- Usage: --- --- >>> f p = Stream.splitEndBySeq_ (Array.fromList p) Fold.toList --- >>> splitEndBy_ p xs = Stream.fold Fold.toList $ f p (Stream.fromList xs) --- --- >>> splitEndBy_ "" "" --- [] --- --- >>> splitEndBy_ "" "a...b" --- ["a",".",".",".","b"] --- --- >>> splitEndBy_ ".." "" --- [] --- --- >>> splitEndBy_ ".." "a...b" --- ["a",".b"] --- --- >>> splitEndBy_ ".." "abc" --- ["abc"] --- --- >>> splitEndBy_ ".." ".." --- [""] --- --- >>> splitEndBy_ "." ".a" --- ["","a"] --- --- >>> splitEndBy_ "." "a." --- ["a"] --- --- Uses Rabin-Karp algorithm for substring search. --- -{-# INLINE_NORMAL splitEndBySeq_ #-} -splitEndBySeq_ - :: forall m a b. (MonadIO m, Unbox a, Enum a, Eq a) - => Array a - -> Fold m a b - -> Stream m a - -> Stream m b -splitEndBySeq_ = splitOnSuffixSeq False - --- Implement this as a fold or a parser instead. --- This can be implemented easily using Rabin Karp - --- | Split post any one of the given patterns. --- --- /Unimplemented/ -{-# INLINE splitEndBySeqOneOf #-} -splitEndBySeqOneOf :: -- (Monad m, Unboxed a, Integral a) => - [Array a] -> Fold m a b -> Stream m a -> Stream m b -splitEndBySeqOneOf _subseq _f _m = undefined - --- | Split on a prefixed separator element, dropping the separator. The --- supplied 'Fold' is applied on the split segments. --- --- @ --- > splitOnPrefix' p xs = Stream.toList $ Stream.splitOnPrefix p (Fold.toList) (Stream.fromList xs) --- > splitOnPrefix' (== '.') ".a.b" --- ["a","b"] --- @ --- --- An empty stream results in an empty output stream: --- @ --- > splitOnPrefix' (== '.') "" --- [] --- @ --- --- An empty segment consisting of only a prefix is folded to the default output --- of the fold: --- --- @ --- > splitOnPrefix' (== '.') "." --- [""] --- --- > splitOnPrefix' (== '.') ".a.b." --- ["a","b",""] --- --- > splitOnPrefix' (== '.') ".a..b" --- ["a","","b"] --- --- @ --- --- A prefix is optional at the beginning of the stream: --- --- @ --- > splitOnPrefix' (== '.') "a" --- ["a"] --- --- > splitOnPrefix' (== '.') "a.b" --- ["a","b"] --- @ --- --- 'splitOnPrefix' is an inverse of 'intercalatePrefix' with a single element: --- --- > Stream.intercalatePrefix (Stream.fromPure '.') Unfold.fromList . Stream.splitOnPrefix (== '.') Fold.toList === id --- --- Assuming the input stream does not contain the separator: --- --- > Stream.splitOnPrefix (== '.') Fold.toList . Stream.intercalatePrefix (Stream.fromPure '.') Unfold.fromList === id --- --- /Unimplemented/ -{-# INLINE splitBeginBy_ #-} -splitBeginBy_ :: -- (MonadCatch m) => - (a -> Bool) -> Fold m a b -> Stream m a -> Stream m b -splitBeginBy_ _predicate _f = undefined - -- parseMany (Parser.sliceBeginBy predicate f) - --- Int list examples for splitOn: --- --- >>> splitList [] [1,2,3,3,4] --- > [[1],[2],[3],[3],[4]] --- --- >>> splitList [5] [1,2,3,3,4] --- > [[1,2,3,3,4]] --- --- >>> splitList [1] [1,2,3,3,4] --- > [[],[2,3,3,4]] --- --- >>> splitList [4] [1,2,3,3,4] --- > [[1,2,3,3],[]] --- --- >>> splitList [2] [1,2,3,3,4] --- > [[1],[3,3,4]] --- --- >>> splitList [3] [1,2,3,3,4] --- > [[1,2],[],[4]] --- --- >>> splitList [3,3] [1,2,3,3,4] --- > [[1,2],[4]] --- --- >>> splitList [1,2,3,3,4] [1,2,3,3,4] --- > [[],[]] - --- This can be implemented easily using Rabin Karp --- | Split on any one of the given patterns. --- --- /Unimplemented/ --- -{-# INLINE splitSepBySeqOneOf #-} -splitSepBySeqOneOf :: -- (Monad m, Unboxed a, Integral a) => - [Array a] -> Fold m a b -> Stream m a -> Stream m b -splitSepBySeqOneOf _subseq _f _m = - undefined -- D.fromStreamD $ D.splitOnAny f subseq (D.toStreamD m) - ------------------------------------------------------------------------------- --- Nested Container Transformation ------------------------------------------------------------------------------- - -{-# ANN type SplitState Fuse #-} -data SplitState s arr - = SplitInitial s - | SplitBuffering s arr - | SplitSplitting s arr - | SplitYielding arr (SplitState s arr) - | SplitFinishing - --- XXX An alternative approach would be to use a partial fold (Fold m a b) to --- split using a splitBy like combinator. The Fold would consume upto the --- separator and return any leftover which can then be fed to the next fold. --- --- We can revisit this once we have partial folds/parsers. --- --- | Performs infix separator style splitting. -{-# INLINE_NORMAL splitInnerBy #-} -splitInnerBy - :: Monad m - => (f a -> m (f a, Maybe (f a))) -- splitter - -> (f a -> f a -> m (f a)) -- joiner - -> Stream m (f a) - -> Stream m (f a) -splitInnerBy splitter joiner (Stream step1 state1) = - Stream step (SplitInitial state1) - - where - - {-# INLINE_LATE step #-} - step gst (SplitInitial st) = do - r <- step1 gst st - case r of - Yield x s -> do - (x1, mx2) <- splitter x - return $ case mx2 of - Nothing -> Skip (SplitBuffering s x1) - Just x2 -> Skip (SplitYielding x1 (SplitSplitting s x2)) - Skip s -> return $ Skip (SplitInitial s) - Stop -> return Stop - - step gst (SplitBuffering st buf) = do - r <- step1 gst st - case r of - Yield x s -> do - (x1, mx2) <- splitter x - buf' <- joiner buf x1 - return $ case mx2 of - Nothing -> Skip (SplitBuffering s buf') - Just x2 -> Skip (SplitYielding buf' (SplitSplitting s x2)) - Skip s -> return $ Skip (SplitBuffering s buf) - Stop -> return $ Skip (SplitYielding buf SplitFinishing) - - step _ (SplitSplitting st buf) = do - (x1, mx2) <- splitter buf - return $ case mx2 of - Nothing -> Skip $ SplitBuffering st x1 - Just x2 -> Skip $ SplitYielding x1 (SplitSplitting st x2) - - step _ (SplitYielding x next) = return $ Yield x next - step _ SplitFinishing = return Stop - --- | Performs infix separator style splitting. -{-# INLINE_NORMAL splitInnerBySuffix #-} -splitInnerBySuffix - :: Monad m - => (f a -> Bool) -- isEmpty? - -> (f a -> m (f a, Maybe (f a))) -- splitter - -> (f a -> f a -> m (f a)) -- joiner - -> Stream m (f a) - -> Stream m (f a) -splitInnerBySuffix isEmpty splitter joiner (Stream step1 state1) = - Stream step (SplitInitial state1) - - where - - {-# INLINE_LATE step #-} - step gst (SplitInitial st) = do - r <- step1 gst st - case r of - Yield x s -> do - (x1, mx2) <- splitter x - return $ case mx2 of - Nothing -> Skip (SplitBuffering s x1) - Just x2 -> Skip (SplitYielding x1 (SplitSplitting s x2)) - Skip s -> return $ Skip (SplitInitial s) - Stop -> return Stop - - step gst (SplitBuffering st buf) = do - r <- step1 gst st - case r of - Yield x s -> do - (x1, mx2) <- splitter x - buf' <- joiner buf x1 - return $ case mx2 of - Nothing -> Skip (SplitBuffering s buf') - Just x2 -> Skip (SplitYielding buf' (SplitSplitting s x2)) - Skip s -> return $ Skip (SplitBuffering s buf) - Stop -> - return $ - if isEmpty buf - then Stop - else Skip (SplitYielding buf SplitFinishing) - - step _ (SplitSplitting st buf) = do - (x1, mx2) <- splitter buf - return $ case mx2 of - Nothing -> Skip $ SplitBuffering st x1 - Just x2 -> Skip $ SplitYielding x1 (SplitSplitting st x2) - - step _ (SplitYielding x next) = return $ Yield x next - step _ SplitFinishing = return Stop - ------------------------------------------------------------------------------- --- Trimming ------------------------------------------------------------------------------- - --- | Drop prefix from the input stream if present. --- --- Space: @O(1)@ --- --- See also stripPrefix. --- --- /Unimplemented/ -{-# INLINE dropPrefix #-} -dropPrefix :: - -- (Monad m, Eq a) => - Stream m a -> Stream m a -> Stream m a -dropPrefix = error "Not implemented yet!" - --- | Drop all matching infix from the input stream if present. Infix stream --- may be consumed multiple times. --- --- Space: @O(n)@ where n is the length of the infix. --- --- See also stripInfix. --- --- /Unimplemented/ -{-# INLINE dropInfix #-} -dropInfix :: - -- (Monad m, Eq a) => - Stream m a -> Stream m a -> Stream m a -dropInfix = error "Not implemented yet!" - --- | Drop suffix from the input stream if present. Suffix stream may be --- consumed multiple times. --- --- Space: @O(n)@ where n is the length of the suffix. --- --- See also stripSuffix. --- --- /Unimplemented/ -{-# INLINE dropSuffix #-} -dropSuffix :: - -- (Monad m, Eq a) => - Stream m a -> Stream m a -> Stream m a -dropSuffix = error "Not implemented yet!" diff --git a/core/src/Streamly/Internal/Data/Stream/Parse.hs b/core/src/Streamly/Internal/Data/Stream/Parse.hs new file mode 100644 index 0000000000..3cbfc7384e --- /dev/null +++ b/core/src/Streamly/Internal/Data/Stream/Parse.hs @@ -0,0 +1,2179 @@ +{-# LANGUAGE CPP #-} +-- | +-- Module : Streamly.Internal.Data.Stream.Parse +-- Copyright : (c) 2018 Composewell Technologies +-- (c) Roman Leshchinskiy 2008-2010 +-- License : BSD-3-Clause +-- Maintainer : streamly@composewell.com +-- Stability : experimental +-- Portability : GHC +-- +-- This module contains folding (eliminating) transformations involving +-- multiple streams. folds. There are two types of transformations generational +-- or eliminational. Eliminational transformations are like the "Eliminate" +-- module but they transform a stream by eliminating parts of the stream +-- instead of eliminating the whole stream. +-- +-- These combinators involve transformation, generation, elimination so can be +-- classified under any of those. + +-- Flipped versions can be named as: +-- groupsFor :: stream -> fold -> stream (flipped groupsWhile) +-- +-- Flipped versions for folds: +-- foldMany :: outer fold -> inner fold -> fold (original version) +-- groupFoldFor :: inner fold -> outer fold -> fold (flipped version) +-- groupStepFor :: inner fold -> outer fold step -> fold (flipped version) +-- This can be convenient for defining the outer fold step using a lambda. +-- +module Streamly.Internal.Data.Stream.Parse + ( + -- * Eliminate + -- | Folding and Parsing chunks of streams to eliminate nested streams. + -- Functions generally ending in these shapes: + -- + -- @ + -- f (Fold m a b) -> t m a -> t m b + -- f (Parser a m b) -> t m a -> t m b + -- @ + + -- ** Folding + -- | Apply folds on a stream. + foldSequence + , foldIterateM + + -- ** Parsing + -- | Parsing is opposite to flattening. 'parseMany' is dual to concatMap or + -- unfoldEach concatMap generates a stream from single values in a + -- stream and flattens, parseMany does the opposite of flattening by + -- splitting the stream and then folds each such split to single value in + -- the output stream. + , parseMany + , parseManyPos + , parseSequence + , parseManyTill + , parseIterate + , parseIteratePos + + -- ** Grouping + -- | Group segments of a stream and fold. Special case of parsing. + , groupsWhile + , groupsRollingBy + + -- ** Splitting + -- | A special case of parsing. + , takeEndBySeq + , takeEndBySeq_ + , wordsBy + , splitSepBySeq_ + , splitEndBySeq + , splitEndBySeq_ + , splitOnSuffixSeq -- internal + + , splitBeginBy_ + , splitEndBySeqOneOf + , splitSepBySeqOneOf + + -- * Transform (Nested Containers) + -- | Opposite to compact in ArrayStream + , splitInnerBy -- XXX innerSplitOn + , splitInnerBySuffix -- XXX innerSplitOnSuffix + + -- * Reduce By Streams + , dropPrefix + , dropInfix + , dropSuffix + + -- * Deprecated + , parseManyD + , parseIterateD + , groupsBy + , splitOnSeq + ) +where + +#include "deprecation.h" +#include "inline.hs" +#include "ArrayMacros.h" + +import Control.Exception (assert) +import Control.Monad.IO.Class (MonadIO(..)) +import Data.Bits (shiftR, shiftL, (.|.), (.&.)) +import Data.Proxy (Proxy(..)) +import Data.Word (Word32) +import Fusion.Plugin.Types (Fuse(..)) +import GHC.Types (SPEC(..)) + +import Streamly.Internal.Data.Array.Type (Array(..)) +import Streamly.Internal.Data.Fold.Type (Fold(..)) +import Streamly.Internal.Data.MutArray.Type (MutArray(..)) +import Streamly.Internal.Data.Parser (ParseError(..), ParseErrorPos) +import Streamly.Internal.Data.RingArray (RingArray(..)) +import Streamly.Internal.Data.SVar.Type (adaptState) +import Streamly.Internal.Data.Unbox (Unbox(..)) + +import qualified Streamly.Internal.Data.Array.Type as A +import qualified Streamly.Internal.Data.MutArray.Type as MutArray +import qualified Streamly.Internal.Data.Fold as FL +import qualified Streamly.Internal.Data.Parser as PR +import qualified Streamly.Internal.Data.Parser as PRD +import qualified Streamly.Internal.Data.ParserDrivers as Drivers +import qualified Streamly.Internal.Data.RingArray as RB + +import Streamly.Internal.Data.Stream.Type + +#include "DocTestDataStream.hs" + +------------------------------------------------------------------------------ +-- Folding +------------------------------------------------------------------------------ + +-- | Apply a stream of folds to an input stream and emit the results in the +-- output stream. +-- +-- /Unimplemented/ +-- +{-# INLINE foldSequence #-} +foldSequence + :: -- Monad m => + Stream m (Fold m a b) + -> Stream m a + -> Stream m b +foldSequence _f _m = undefined + +{-# ANN type FIterState Fuse #-} +data FIterState s f m a b + = FIterInit s f + | forall fs. FIterStream s (fs -> a -> m (FL.Step fs b)) fs (fs -> m b) + (fs -> m b) + | FIterYield b (FIterState s f m a b) + | FIterStop + +-- | Iterate a fold generator on a stream. The initial value @b@ is used to +-- generate the first fold, the fold is applied on the stream and the result of +-- the fold is used to generate the next fold and so on. +-- +-- Usage: +-- +-- >>> import Data.Monoid (Sum(..)) +-- >>> f x = return (Fold.take 2 (Fold.sconcat x)) +-- >>> s = fmap Sum $ Stream.fromList [1..10] +-- >>> Stream.fold Fold.toList $ fmap getSum $ Stream.foldIterateM f (pure 0) s +-- [3,10,21,36,55,55] +-- +-- This is the streaming equivalent of monad like sequenced application of +-- folds where next fold is dependent on the previous fold. +-- +-- /Pre-release/ +-- +{-# INLINE_NORMAL foldIterateM #-} +foldIterateM :: + Monad m => (b -> m (FL.Fold m a b)) -> m b -> Stream m a -> Stream m b +foldIterateM func seed0 (Stream step state) = + Stream stepOuter (FIterInit state seed0) + + where + + {-# INLINE iterStep #-} + iterStep from st fstep extract final = do + res <- from + return + $ Skip + $ case res of + FL.Partial fs -> FIterStream st fstep fs extract final + FL.Done fb -> FIterYield fb $ FIterInit st (return fb) + + {-# INLINE_LATE stepOuter #-} + stepOuter _ (FIterInit st seed) = do + (FL.Fold fstep initial extract final) <- seed >>= func + iterStep initial st fstep extract final + stepOuter gst (FIterStream st fstep fs extract final) = do + r <- step (adaptState gst) st + case r of + Yield x s -> do + iterStep (fstep fs x) s fstep extract final + Skip s -> return $ Skip $ FIterStream s fstep fs extract final + Stop -> do + b <- final fs + return $ Skip $ FIterYield b FIterStop + stepOuter _ (FIterYield a next) = return $ Yield a next + stepOuter _ FIterStop = return Stop + +------------------------------------------------------------------------------ +-- Parsing +------------------------------------------------------------------------------ + +-- | Apply a 'Parser' repeatedly on a stream and emit the parsed values in the +-- output stream. +-- +-- Usage: +-- +-- >>> s = Stream.fromList [1..10] +-- >>> parser = Parser.takeBetween 0 2 Fold.sum +-- >>> Stream.toList $ Stream.parseMany parser s +-- [Right 3,Right 7,Right 11,Right 15,Right 19] +-- +-- This is the streaming equivalent of the 'Streamly.Data.Parser.many' parse +-- combinator. +-- +-- Known Issues: When the parser fails there is no way to get the remaining +-- stream. +-- +{-# INLINE parseMany #-} +parseMany + :: Monad m + => PRD.Parser a m b + -> Stream m a + -> Stream m (Either ParseError b) +parseMany = Drivers.parseMany + +-- | Like 'parseMany' but includes stream position information in the error +-- messages. +-- +{-# INLINE parseManyPos #-} +parseManyPos + :: Monad m + => PRD.Parser a m b + -> Stream m a + -> Stream m (Either ParseErrorPos b) +parseManyPos = Drivers.parseManyPos + +{-# DEPRECATED parseManyD "Please use parseMany instead." #-} +{-# INLINE parseManyD #-} +parseManyD + :: Monad m + => PR.Parser a m b + -> Stream m a + -> Stream m (Either ParseError b) +parseManyD = parseMany + +-- | Apply a stream of parsers to an input stream and emit the results in the +-- output stream. +-- +-- /Unimplemented/ +-- +{-# INLINE parseSequence #-} +parseSequence + :: -- Monad m => + Stream m (PR.Parser a m b) + -> Stream m a + -> Stream m b +parseSequence _f _m = undefined + +-- XXX Change the parser arguments' order + +-- | @parseManyTill collect test stream@ tries the parser @test@ on the input, +-- if @test@ fails it backtracks and tries @collect@, after @collect@ succeeds +-- @test@ is tried again and so on. The parser stops when @test@ succeeds. The +-- output of @test@ is discarded and the output of @collect@ is emitted in the +-- output stream. The parser fails if @collect@ fails. +-- +-- /Unimplemented/ +-- +{-# INLINE parseManyTill #-} +parseManyTill :: + -- MonadThrow m => + PR.Parser a m b + -> PR.Parser a m x + -> Stream m a + -> Stream m b +parseManyTill = undefined + +-- | Iterate a parser generating function on a stream. The initial value @b@ is +-- used to generate the first parser, the parser is applied on the stream and +-- the result is used to generate the next parser and so on. +-- +-- Example: +-- +-- >>> import Data.Monoid (Sum(..)) +-- >>> s = Stream.fromList [1..10] +-- >>> Stream.toList $ fmap getSum $ Stream.catRights $ Stream.parseIterate (\b -> Parser.takeBetween 0 2 (Fold.sconcat b)) (Sum 0) $ fmap Sum s +-- [3,10,21,36,55,55] +-- +-- This is the streaming equivalent of monad like sequenced application of +-- parsers where next parser is dependent on the previous parser. +-- +-- /Pre-release/ +-- +{-# INLINE parseIterate #-} +parseIterate + :: Monad m + => (b -> PRD.Parser a m b) + -> b + -> Stream m a + -> Stream m (Either ParseError b) +parseIterate = Drivers.parseIterate + +-- | Like 'parseIterate' but includes stream position information in the error +-- messages. +-- +{-# INLINE parseIteratePos #-} +parseIteratePos + :: Monad m + => (b -> PRD.Parser a m b) + -> b + -> Stream m a + -> Stream m (Either ParseErrorPos b) +parseIteratePos = Drivers.parseIteratePos + +{-# DEPRECATED parseIterateD "Please use parseIterate instead." #-} +{-# INLINE parseIterateD #-} +parseIterateD + :: Monad m + => (b -> PR.Parser a m b) + -> b + -> Stream m a + -> Stream m (Either ParseError b) +parseIterateD = parseIterate + +------------------------------------------------------------------------------ +-- Grouping +------------------------------------------------------------------------------ + +data GroupByState st fs a b + = GroupingInit st + | GroupingDo st !fs + | GroupingInitWith st !a + | GroupingDoWith st !fs !a + | GroupingYield !b (GroupByState st fs a b) + | GroupingDone + +-- | Keep collecting items in a group as long as the comparison function +-- returns true. The comparison function is @cmp old new@ where @old@ is the +-- first item in the group and @new@ is the incoming item being tested for +-- membership of the group. The collected items are folded by the supplied +-- fold. +-- +-- Definition: +-- +-- >>> groupsWhile cmp f = Stream.parseMany (Parser.groupBy cmp f) +{-# INLINE_NORMAL groupsWhile #-} +groupsWhile :: Monad m + => (a -> a -> Bool) + -> Fold m a b + -> Stream m a + -> Stream m b +{- +groupsWhile eq fld = parseMany (PRD.groupBy eq fld) +-} +groupsWhile cmp (Fold fstep initial _ final) (Stream step state) = + Stream stepOuter (GroupingInit state) + + where + + {-# INLINE_LATE stepOuter #-} + stepOuter _ (GroupingInit st) = do + -- XXX Note that if the stream stops without yielding a single element + -- in the group we discard the "initial" effect. + res <- initial + return + $ case res of + FL.Partial s -> Skip $ GroupingDo st s + FL.Done b -> Yield b $ GroupingInit st + stepOuter gst (GroupingDo st fs) = do + res <- step (adaptState gst) st + case res of + Yield x s -> do + r <- fstep fs x + case r of + FL.Partial fs1 -> go SPEC x s fs1 + FL.Done b -> return $ Yield b (GroupingInit s) + Skip s -> return $ Skip $ GroupingDo s fs + Stop -> final fs >> return Stop + + where + + go !_ prev stt !acc = do + res <- step (adaptState gst) stt + case res of + Yield x s -> do + if cmp prev x + then do + r <- fstep acc x + case r of + FL.Partial fs1 -> go SPEC prev s fs1 + FL.Done b -> return $ Yield b (GroupingInit s) + else do + r <- final acc + return $ Yield r (GroupingInitWith s x) + Skip s -> go SPEC prev s acc + Stop -> do + r <- final acc + return $ Yield r GroupingDone + stepOuter _ (GroupingInitWith st x) = do + res <- initial + return + $ case res of + FL.Partial s -> Skip $ GroupingDoWith st s x + FL.Done b -> Yield b $ GroupingInitWith st x + stepOuter gst (GroupingDoWith st fs prev) = do + res <- fstep fs prev + case res of + FL.Partial fs1 -> go SPEC st fs1 + FL.Done b -> return $ Yield b (GroupingInit st) + + where + + -- XXX code duplicated from the previous equation + go !_ stt !acc = do + res <- step (adaptState gst) stt + case res of + Yield x s -> do + if cmp prev x + then do + r <- fstep acc x + case r of + FL.Partial fs1 -> go SPEC s fs1 + FL.Done b -> return $ Yield b (GroupingInit s) + else do + r <- final acc + return $ Yield r (GroupingInitWith s x) + Skip s -> go SPEC s acc + Stop -> do + r <- final acc + return $ Yield r GroupingDone + stepOuter _ (GroupingYield _ _) = error "groupsWhile: Unreachable" + stepOuter _ GroupingDone = return Stop + +-- | The argument order of the comparison function in `groupsWhile` is +-- different than that of `groupsBy`. +-- +-- In `groupsBy` the comparison function takes the next element as the first +-- argument and the previous element as the second argument. In `groupsWhile` +-- the first argument is the previous element and second argument is the next +-- element. +{-# DEPRECATED groupsBy "Please use groupsWhile instead. Please note the change in the argument order of the comparison function." #-} +{-# INLINE_NORMAL groupsBy #-} +groupsBy :: Monad m + => (a -> a -> Bool) + -> Fold m a b + -> Stream m a + -> Stream m b +groupsBy cmp = groupsWhile (flip cmp) + +-- | +-- +-- Definition: +-- +-- >>> groupsRollingBy cmp f = Stream.parseMany (Parser.groupByRolling cmp f) +-- +{-# INLINE_NORMAL groupsRollingBy #-} +groupsRollingBy :: Monad m + => (a -> a -> Bool) + -> Fold m a b + -> Stream m a + -> Stream m b +{- +groupsRollingBy eq fld = parseMany (PRD.groupByRolling eq fld) +-} +groupsRollingBy cmp (Fold fstep initial _ final) (Stream step state) = + Stream stepOuter (GroupingInit state) + + where + + {-# INLINE_LATE stepOuter #-} + stepOuter _ (GroupingInit st) = do + -- XXX Note that if the stream stops without yielding a single element + -- in the group we discard the "initial" effect. + res <- initial + return + $ case res of + FL.Partial fs -> Skip $ GroupingDo st fs + FL.Done fb -> Yield fb $ GroupingInit st + stepOuter gst (GroupingDo st fs) = do + res <- step (adaptState gst) st + case res of + Yield x s -> do + r <- fstep fs x + case r of + FL.Partial fs1 -> go SPEC x s fs1 + FL.Done fb -> return $ Yield fb (GroupingInit s) + Skip s -> return $ Skip $ GroupingDo s fs + Stop -> final fs >> return Stop + + where + + go !_ prev stt !acc = do + res <- step (adaptState gst) stt + case res of + Yield x s -> do + if cmp prev x + then do + r <- fstep acc x + case r of + FL.Partial fs1 -> go SPEC x s fs1 + FL.Done b -> return $ Yield b (GroupingInit s) + else do + r <- final acc + return $ Yield r (GroupingInitWith s x) + Skip s -> go SPEC prev s acc + Stop -> do + r <- final acc + return $ Yield r GroupingDone + stepOuter _ (GroupingInitWith st x) = do + res <- initial + return + $ case res of + FL.Partial s -> Skip $ GroupingDoWith st s x + FL.Done b -> Yield b $ GroupingInitWith st x + stepOuter gst (GroupingDoWith st fs previous) = do + res <- fstep fs previous + case res of + FL.Partial s -> go SPEC previous st s + FL.Done b -> return $ Yield b (GroupingInit st) + + where + + -- XXX GHC: groupsWhile has one less parameter in this go loop and it + -- fuses. However, groupsRollingBy does not fuse, removing the prev + -- parameter makes it fuse. Something needs to be fixed in GHC. The + -- workaround for this is noted in the comments below. + go !_ prev !stt !acc = do + res <- step (adaptState gst) stt + case res of + Yield x s -> do + if cmp prev x + then do + r <- fstep acc x + case r of + FL.Partial fs1 -> go SPEC x s fs1 + FL.Done b -> return $ Yield b (GroupingInit st) + else do + {- + r <- final acc + return $ Yield r (GroupingInitWith s x) + -} + -- The code above does not let groupBy fuse. We use the + -- alternative code below instead. Instead of jumping + -- to GroupingInitWith state, we unroll the code of + -- GroupingInitWith state here to help GHC with stream + -- fusion. + result <- initial + r <- final acc + return + $ Yield r + $ case result of + FL.Partial fsi -> GroupingDoWith s fsi x + FL.Done b -> GroupingYield b (GroupingInit s) + Skip s -> go SPEC prev s acc + Stop -> do + r <- final acc + return $ Yield r GroupingDone + stepOuter _ (GroupingYield r next) = return $ Yield r next + stepOuter _ GroupingDone = return Stop + +------------------------------------------------------------------------------ +-- Splitting - by a predicate +------------------------------------------------------------------------------ + +data WordsByState st fs b + = WordsByInit st + | WordsByDo st !fs + | WordsByDone + | WordsByYield !b (WordsByState st fs b) + +-- | Split the stream after stripping leading, trailing, and repeated +-- separators determined by the predicate supplied. The tokens after splitting +-- are collected by the supplied fold. In other words, the tokens are parsed in +-- the same way as words are parsed from whitespace separated text. +-- +-- >>> f x = Stream.toList $ Stream.wordsBy (== '.') Fold.toList $ Stream.fromList x +-- >>> f "a.b" +-- ["a","b"] +-- >>> f "a..b" +-- ["a","b"] +-- >>> f ".a..b." +-- ["a","b"] +-- +{-# INLINE_NORMAL wordsBy #-} +wordsBy :: Monad m => (a -> Bool) -> Fold m a b -> Stream m a -> Stream m b +wordsBy predicate (Fold fstep initial _ final) (Stream step state) = + Stream stepOuter (WordsByInit state) + + where + + {-# INLINE_LATE stepOuter #-} + stepOuter _ (WordsByInit st) = do + res <- initial + return + $ case res of + FL.Partial s -> Skip $ WordsByDo st s + FL.Done b -> Yield b (WordsByInit st) + + stepOuter gst (WordsByDo st fs) = do + res <- step (adaptState gst) st + case res of + Yield x s -> do + if predicate x + then do + resi <- initial + return + $ case resi of + FL.Partial fs1 -> Skip $ WordsByDo s fs1 + FL.Done b -> Yield b (WordsByInit s) + else do + r <- fstep fs x + case r of + FL.Partial fs1 -> go SPEC s fs1 + FL.Done b -> return $ Yield b (WordsByInit s) + Skip s -> return $ Skip $ WordsByDo s fs + Stop -> final fs >> return Stop + + where + + go !_ stt !acc = do + res <- step (adaptState gst) stt + case res of + Yield x s -> do + if predicate x + then do + {- + r <- final acc + return $ Yield r (WordsByInit s) + -} + -- The above code does not fuse well. Need to check why + -- GHC is not able to simplify it well. Using the code + -- below, instead of jumping through the WordsByInit + -- state always, we directly go to WordsByDo state in + -- the common case of Partial. + resi <- initial + r <- final acc + return + $ Yield r + $ case resi of + FL.Partial fs1 -> WordsByDo s fs1 + FL.Done b -> WordsByYield b (WordsByInit s) + else do + r <- fstep acc x + case r of + FL.Partial fs1 -> go SPEC s fs1 + FL.Done b -> return $ Yield b (WordsByInit s) + Skip s -> go SPEC s acc + Stop -> do + r <- final acc + return $ Yield r WordsByDone + + stepOuter _ WordsByDone = return Stop + + stepOuter _ (WordsByYield b next) = return $ Yield b next + +------------------------------------------------------------------------------ +-- Splitting on a sequence +------------------------------------------------------------------------------ + +-- String search algorithms: +-- http://www-igm.univ-mlv.fr/~lecroq/string/index.html + +-- XXX Can GHC find a way to modularise this? Can we write different cases +-- i.e.g single element, word hash, karp-rabin as different functions and then +-- be able to combine them into a single state machine? + +{-# ANN type TakeEndBySeqState Fuse #-} +data TakeEndBySeqState mba rb rh ck w s b x = + TakeEndBySeqInit + | TakeEndBySeqYield !b (TakeEndBySeqState mba rb rh ck w s b x) + | TakeEndBySeqDone + + | TakeEndBySeqSingle s x + + | TakeEndBySeqWordInit !Int !w s + | TakeEndBySeqWordLoop !w s + | TakeEndBySeqWordDone !Int !w + + | TakeEndBySeqKRInit s mba + | TakeEndBySeqKRInit1 s mba !Int + | TakeEndBySeqKRLoop s mba !rh !ck + | TakeEndBySeqKRCheck s mba !rh + | TakeEndBySeqKRDone !Int rb + +-- | If the pattern is empty the output stream is empty. +{-# INLINE_NORMAL takeEndBySeqWith #-} +takeEndBySeqWith + :: forall m a. (MonadIO m, Unbox a, Enum a, Eq a) + => Bool + -> Array a + -> Stream m a + -> Stream m a +takeEndBySeqWith withSep patArr (Stream step state) = + Stream stepOuter TakeEndBySeqInit + + where + + patLen = A.length patArr + patBytes = A.byteLength patArr + maxIndex = patLen - 1 + maxOffset = patBytes - SIZE_OF(a) + elemBits = SIZE_OF(a) * 8 + + -- For word pattern case + wordMask :: Word + wordMask = (1 `shiftL` (elemBits * patLen)) - 1 + + elemMask :: Word + elemMask = (1 `shiftL` elemBits) - 1 + + wordPat :: Word + wordPat = wordMask .&. A.foldl' addToWord 0 patArr + + addToWord wd a = (wd `shiftL` elemBits) .|. fromIntegral (fromEnum a) + + -- For Rabin-Karp search + k = 2891336453 :: Word32 + coeff = k ^ patLen + + addCksum cksum a = cksum * k + fromIntegral (fromEnum a) + + deltaCksum cksum old new = + addCksum cksum new - coeff * fromIntegral (fromEnum old) + + -- XXX shall we use a random starting hash or 1 instead of 0? + patHash = A.foldl' addCksum 0 patArr + + skip = return . Skip + + {-# INLINE yield #-} + yield x !s = skip $ TakeEndBySeqYield x s + + {-# INLINE_LATE stepOuter #-} + stepOuter _ TakeEndBySeqInit = do + -- XXX When we statically specify the method compiler is able to + -- simplify the code better and removes the handling of other states. + -- When it is determined dynamically, the code is less efficient. For + -- example, the single element search degrades by 80% if the handling + -- of other cases is present. We need to investigate this further but + -- until then we can guide the compiler statically where we can. If we + -- want to use single element search statically then we can use + -- takeEndBy instead. + -- + -- XXX Is there a way for GHC to statically determine patLen when we + -- use an array created from a static string as pattern e.g. "\n". + case () of + _ | patLen == 0 -> return Stop + | patLen == 1 -> do + pat <- liftIO $ A.unsafeGetIndexIO 0 patArr + return $ Skip $ TakeEndBySeqSingle state pat + | SIZE_OF(a) * patLen <= sizeOf (Proxy :: Proxy Word) -> + return $ Skip $ TakeEndBySeqWordInit 0 0 state + | otherwise -> do + (MutArray mba _ _ _) :: MutArray a <- + liftIO $ MutArray.emptyOf patLen + skip $ TakeEndBySeqKRInit state mba + + --------------------- + -- Single yield point + --------------------- + + stepOuter _ (TakeEndBySeqYield x next) = return $ Yield x next + + ----------------- + -- Done + ----------------- + + stepOuter _ TakeEndBySeqDone = return Stop + + ----------------- + -- Single Pattern + ----------------- + + stepOuter gst (TakeEndBySeqSingle st pat) = do + res <- step (adaptState gst) st + case res of + Yield x s -> + if pat /= x + then yield x (TakeEndBySeqSingle s pat) + else do + if withSep + then yield x TakeEndBySeqDone + else return Stop + Skip s -> skip $ TakeEndBySeqSingle s pat + Stop -> return Stop + + --------------------------- + -- Short Pattern - Shift Or + --------------------------- + + -- Note: Karp-Rabin is roughly 15% slower than word hash for a 2 element + -- pattern. This may be useful for common cases like splitting lines using + -- "\r\n". + stepOuter _ (TakeEndBySeqWordDone 0 _) = do + return Stop + stepOuter _ (TakeEndBySeqWordDone n wrd) = do + let old = elemMask .&. (wrd `shiftR` (elemBits * (n - 1))) + in yield + (toEnum $ fromIntegral old) + (TakeEndBySeqWordDone (n - 1) wrd) + + -- XXX If we remove this init state for perf experiment the time taken + -- reduces to half, there may be some optimization opportunity here. + stepOuter gst (TakeEndBySeqWordInit idx wrd st) = do + res <- step (adaptState gst) st + case res of + Yield x s -> do + let wrd1 = addToWord wrd x + next + | idx /= maxIndex = + TakeEndBySeqWordInit (idx + 1) wrd1 s + | wrd1 .&. wordMask /= wordPat = + TakeEndBySeqWordLoop wrd1 s + | otherwise = TakeEndBySeqDone + if withSep + then yield x next + else skip next + Skip s -> skip $ TakeEndBySeqWordInit idx wrd s + Stop -> + if withSep + then return Stop + else skip $ TakeEndBySeqWordDone idx wrd + + stepOuter gst (TakeEndBySeqWordLoop wrd st) = do + res <- step (adaptState gst) st + case res of + Yield x s -> do + -- XXX Never use a lazy expression as state, that causes issues + -- in simplification because the state argument of Yield is + -- lazy, maybe we can make that strict. + let wrd1 = addToWord wrd x + old = (wordMask .&. wrd) + `shiftR` (elemBits * (patLen - 1)) + !y = + if withSep + then x + else toEnum $ fromIntegral old + -- Note: changing the nesting order of if and yield makes a + -- difference in performance. + if wrd1 .&. wordMask /= wordPat + then yield y (TakeEndBySeqWordLoop wrd1 s) + else yield y TakeEndBySeqDone + Skip s -> skip $ TakeEndBySeqWordLoop wrd s + Stop -> + if withSep + then return Stop + else skip $ TakeEndBySeqWordDone patLen wrd + + ------------------------------- + -- General Pattern - Karp Rabin + ------------------------------- + + stepOuter gst (TakeEndBySeqKRInit st0 mba) = do + res <- step (adaptState gst) st0 + case res of + Yield x s -> do + liftIO $ pokeAt 0 mba x + if withSep + then yield x (TakeEndBySeqKRInit1 s mba (SIZE_OF(a))) + else skip $ TakeEndBySeqKRInit1 s mba (SIZE_OF(a)) + Skip s -> skip $ TakeEndBySeqKRInit s mba + Stop -> return Stop + + stepOuter gst (TakeEndBySeqKRInit1 st mba offset) = do + res <- step (adaptState gst) st + let arr :: Array a = Array + { arrContents = mba + , arrStart = 0 + , arrEnd = patBytes + } + case res of + Yield x s -> do + liftIO $ pokeAt offset mba x + let next = + if offset /= maxOffset + then TakeEndBySeqKRInit1 s mba (offset + SIZE_OF(a)) + else + let ringHash = A.foldl' addCksum 0 arr + in if ringHash == patHash + then TakeEndBySeqKRCheck s mba 0 + else TakeEndBySeqKRLoop s mba 0 ringHash + if withSep + then yield x next + else skip next + Skip s -> skip $ TakeEndBySeqKRInit1 s mba offset + Stop -> do + if withSep + then return Stop + else do + let rb = RingArray + { ringContents = mba + , ringSize = offset + , ringHead = 0 + } + in skip $ TakeEndBySeqKRDone offset rb + + stepOuter gst (TakeEndBySeqKRLoop st mba rh cksum) = do + res <- step (adaptState gst) st + let rb = RingArray + { ringContents = mba + , ringSize = patBytes + , ringHead = rh + } + case res of + Yield x s -> do + (rb1, old) <- liftIO (RB.replace rb x) + let cksum1 = deltaCksum cksum old x + let rh1 = ringHead rb1 + next = + if cksum1 /= patHash + then TakeEndBySeqKRLoop s mba rh1 cksum1 + else TakeEndBySeqKRCheck s mba rh1 + if withSep + then yield x next + else yield old next + Skip s -> skip $ TakeEndBySeqKRLoop s mba rh cksum + Stop -> do + if withSep + then return Stop + else skip $ TakeEndBySeqKRDone patBytes rb + + stepOuter _ (TakeEndBySeqKRCheck st mba rh) = do + let rb = RingArray + { ringContents = mba + , ringSize = patBytes + , ringHead = rh + } + matches <- liftIO $ RB.eqArray rb patArr + if matches + then return Stop + else skip $ TakeEndBySeqKRLoop st mba rh patHash + + stepOuter _ (TakeEndBySeqKRDone 0 _) = return Stop + stepOuter _ (TakeEndBySeqKRDone len rb) = do + assert (len >= 0) (return ()) + old <- RB.unsafeGetHead rb + let rb1 = RB.moveForward rb + yield old $ TakeEndBySeqKRDone (len - SIZE_OF(a)) rb1 + +-- | Take the stream until the supplied sequence is encountered. Take the +-- sequence as well and stop. +-- +-- Usage: +-- +-- >>> f pat xs = Stream.toList $ Stream.takeEndBySeq (Array.fromList pat) $ Stream.fromList xs +-- >>> f "fgh" "abcdefghijk" +-- "abcdefgh" +-- >>> f "lmn" "abcdefghijk" +-- "abcdefghijk" +-- >>> f "" "abcdefghijk" +-- "" +-- +{-# INLINE takeEndBySeq #-} +takeEndBySeq + :: forall m a. (MonadIO m, Unbox a, Enum a, Eq a) + => Array a + -> Stream m a + -> Stream m a +takeEndBySeq = takeEndBySeqWith True + +-- | Take the stream until the supplied sequence is encountered. Do not take +-- the sequence. +-- +-- Usage: +-- +-- >>> f pat xs = Stream.toList $ Stream.takeEndBySeq_ (Array.fromList pat) $ Stream.fromList xs +-- >>> f "fgh" "abcdefghijk" +-- "abcde" +-- >>> f "lmn" "abcdefghijk" +-- "abcdefghijk" +-- >>> f "" "abcdefghijk" +-- "" +-- +{-# INLINE takeEndBySeq_ #-} +takeEndBySeq_ + :: forall m a. (MonadIO m, Unbox a, Enum a, Eq a) + => Array a + -> Stream m a + -> Stream m a +takeEndBySeq_ = takeEndBySeqWith False + +{- +-- TODO can we unify the splitting operations using a splitting configuration +-- like in the split package. +-- +data SplitStyle = Infix | Suffix | Prefix deriving (Eq, Show) +data SplitOptions = SplitOptions + { style :: SplitStyle + , withSep :: Bool -- ^ keep the separators in output + -- , compact :: Bool -- ^ treat multiple consecutive separators as one + -- , trimHead :: Bool -- ^ drop blank at head + -- , trimTail :: Bool -- ^ drop blank at tail + } +-} + +-- XXX using "fs" as the last arg in Constructors may simplify the code a bit, +-- because we can use the constructor directly without having to create "jump" +-- functions. +{-# ANN type SplitOnSeqState Fuse #-} +data SplitOnSeqState mba rb rh ck w fs s b x = + SplitOnSeqInit + | SplitOnSeqYield b (SplitOnSeqState mba rb rh ck w fs s b x) + | SplitOnSeqDone + + | SplitOnSeqEmpty !fs s + + | SplitOnSeqSingle0 !fs s x + | SplitOnSeqSingle !fs s x + + | SplitOnSeqWordInit0 !fs s + | SplitOnSeqWordInit Int Word !fs s + | SplitOnSeqWordLoop !w s !fs + | SplitOnSeqWordDone Int !fs !w + + | SplitOnSeqKRInit0 Int !fs s mba + | SplitOnSeqKRInit Int !fs s mba + | SplitOnSeqKRLoop fs s mba !rh !ck + | SplitOnSeqKRCheck fs s mba !rh + | SplitOnSeqKRDone Int !fs rb + + | SplitOnSeqReinit (fs -> SplitOnSeqState mba rb rh ck w fs s b x) + +-- XXX Need to fix empty stream split behavior + +-- | Like 'splitSepBy_' but splits the stream on a sequence of elements rather than +-- a single element. Parses a sequence of tokens separated by an infixed +-- separator e.g. @a;b;c@ is parsed as @a@, @b@, @c@. If the pattern is empty +-- then each element is a match, thus the fold is finalized on each element. +-- +-- >>> splitSepBy p xs = Stream.fold Fold.toList $ Stream.splitSepBySeq_ (Array.fromList p) Fold.toList (Stream.fromList xs) +-- +-- >>> splitSepBy "" "" +-- [] +-- +-- >>> splitSepBy "" "a...b" +-- ["a",".",".",".","b"] +-- +-- >>> splitSepBy ".." "" +-- [] +-- +-- >>> splitSepBy ".." "a...b" +-- ["a",".b"] +-- +-- >>> splitSepBy ".." "abc" +-- ["abc"] +-- +-- >>> splitSepBy ".." ".." +-- ["",""] +-- +-- >>> splitSepBy "." ".a" +-- ["","a"] +-- +-- >>> splitSepBy "." "a." +-- ["a",""] +-- +-- Uses Rabin-Karp algorithm for substring search. +-- +{-# INLINE_NORMAL splitSepBySeq_ #-} +splitSepBySeq_, splitOnSeq + :: forall m a b. (MonadIO m, Unbox a, Enum a, Eq a) + => Array a + -> Fold m a b + -> Stream m a + -> Stream m b +splitSepBySeq_ patArr (Fold fstep initial _ final) (Stream step state) = + Stream stepOuter SplitOnSeqInit + + where + + patLen = A.length patArr + patBytes = A.byteLength patArr + maxIndex = patLen - 1 + maxOffset = patBytes - SIZE_OF(a) + elemBits = SIZE_OF(a) * 8 + + -- For word pattern case + wordMask :: Word + wordMask = (1 `shiftL` (elemBits * patLen)) - 1 + + elemMask :: Word + elemMask = (1 `shiftL` elemBits) - 1 + + wordPat :: Word + wordPat = wordMask .&. A.foldl' addToWord 0 patArr + + addToWord wd a = (wd `shiftL` elemBits) .|. fromIntegral (fromEnum a) + + -- For Rabin-Karp search + k = 2891336453 :: Word32 + coeff = k ^ patLen + + addCksum cksum a = cksum * k + fromIntegral (fromEnum a) + + deltaCksum cksum old new = + addCksum cksum new - coeff * fromIntegral (fromEnum old) + + -- XXX shall we use a random starting hash or 1 instead of 0? + patHash = A.foldl' addCksum 0 patArr + + skip = return . Skip + + nextAfterInit nextGen stepRes = + case stepRes of + FL.Partial s -> nextGen s + FL.Done b -> SplitOnSeqYield b (SplitOnSeqReinit nextGen) + + {-# INLINE yieldReinit #-} + yieldReinit nextGen fs = + initial >>= skip . SplitOnSeqYield fs . nextAfterInit nextGen + + {-# INLINE_LATE stepOuter #-} + stepOuter _ SplitOnSeqInit = do + res <- initial + case res of + FL.Partial acc + | patLen == 0 -> + return $ Skip $ SplitOnSeqEmpty acc state + | patLen == 1 -> do + pat <- liftIO $ A.unsafeGetIndexIO 0 patArr + return $ Skip $ SplitOnSeqSingle0 acc state pat + | SIZE_OF(a) * patLen <= sizeOf (Proxy :: Proxy Word) -> + return $ Skip $ SplitOnSeqWordInit0 acc state + | otherwise -> do + (MutArray mba _ _ _) :: MutArray a <- + liftIO $ MutArray.emptyOf patLen + skip $ SplitOnSeqKRInit0 0 acc state mba + FL.Done b -> skip $ SplitOnSeqYield b SplitOnSeqInit + + stepOuter _ (SplitOnSeqYield x next) = return $ Yield x next + + --------------------------- + -- Checkpoint + --------------------------- + + stepOuter _ (SplitOnSeqReinit nextGen) = + initial >>= skip . nextAfterInit nextGen + + --------------------------- + -- Empty pattern + --------------------------- + + stepOuter gst (SplitOnSeqEmpty acc st) = do + res <- step (adaptState gst) st + case res of + Yield x s -> do + r <- fstep acc x + b1 <- + case r of + FL.Partial acc1 -> final acc1 + FL.Done b -> return b + let jump c = SplitOnSeqEmpty c s + in yieldReinit jump b1 + Skip s -> skip (SplitOnSeqEmpty acc s) + Stop -> final acc >> return Stop + + ----------------- + -- Done + ----------------- + + stepOuter _ SplitOnSeqDone = return Stop + + ----------------- + -- Single Pattern + ----------------- + + stepOuter gst (SplitOnSeqSingle0 fs st pat) = do + res <- step (adaptState gst) st + case res of + Yield x s -> do + -- XXX This code block is duplicated in SplitOnSeqSingle state + let jump c = SplitOnSeqSingle c s pat + if pat == x + then final fs >>= yieldReinit jump + else do + r <- fstep fs x + case r of + FL.Partial fs1 -> + pure $ Skip $ SplitOnSeqSingle fs1 s pat + FL.Done b -> yieldReinit jump b + Skip s -> pure $ Skip $ SplitOnSeqSingle0 fs s pat + Stop -> final fs >> pure Stop + + stepOuter gst (SplitOnSeqSingle fs0 st0 pat) = do + go SPEC fs0 st0 + + where + + -- The local loop increases allocations by 6% but improves CPU + -- performance by 14%. + go !_ !fs !st = do + res <- step (adaptState gst) st + case res of + Yield x s -> do + let jump c = SplitOnSeqSingle c s pat + if pat == x + then final fs >>= yieldReinit jump + else do + r <- fstep fs x + case r of + FL.Partial fs1 -> go SPEC fs1 s + FL.Done b -> yieldReinit jump b + Skip s -> go SPEC fs s + Stop -> do + r <- final fs + return $ Skip $ SplitOnSeqYield r SplitOnSeqDone + + --------------------------- + -- Short Pattern - Shift Or + --------------------------- + + -- Note: We fill the matching buffer before we emit anything, in case it + -- matches and we have to drop it. Though we could be more eager in + -- emitting as soon as we know that the pattern cannot match. But still the + -- worst case will remain the same, in case a match is going to happen we + -- will have to delay until the very end. + + stepOuter _ (SplitOnSeqWordDone 0 fs _) = do + r <- final fs + skip $ SplitOnSeqYield r SplitOnSeqDone + stepOuter _ (SplitOnSeqWordDone n fs wrd) = do + let old = elemMask .&. (wrd `shiftR` (elemBits * (n - 1))) + r <- fstep fs (toEnum $ fromIntegral old) + case r of + FL.Partial fs1 -> skip $ SplitOnSeqWordDone (n - 1) fs1 wrd + FL.Done b -> do + let jump c = SplitOnSeqWordDone (n - 1) c wrd + yieldReinit jump b + + stepOuter gst (SplitOnSeqWordInit0 fs st) = do + res <- step (adaptState gst) st + case res of + Yield x s -> + let wrd1 = addToWord 0 x + in pure $ Skip $ SplitOnSeqWordInit 1 wrd1 fs s + Skip s -> pure $ Skip $ SplitOnSeqWordInit0 fs s + Stop -> final fs >> pure Stop + + stepOuter gst (SplitOnSeqWordInit idx0 wrd0 fs st0) = + go SPEC idx0 wrd0 st0 + + where + + {-# INLINE go #-} + go !_ !idx !wrd !st = do + res <- step (adaptState gst) st + case res of + Yield x s -> do + let wrd1 = addToWord wrd x + if idx == maxIndex + then do + if wrd1 .&. wordMask == wordPat + then do + let jump c = SplitOnSeqWordInit 0 0 c s + final fs >>= yieldReinit jump + else skip $ SplitOnSeqWordLoop wrd1 s fs + else go SPEC (idx + 1) wrd1 s + Skip s -> go SPEC idx wrd s + Stop -> do + if idx /= 0 + then skip $ SplitOnSeqWordDone idx fs wrd + else do + r <- final fs + skip $ SplitOnSeqYield r SplitOnSeqDone + + stepOuter gst (SplitOnSeqWordLoop wrd0 st0 fs0) = + go SPEC wrd0 st0 fs0 + + where + + -- This loop does not affect allocations but it improves the CPU + -- performance signifcantly compared to looping using state. + {-# INLINE go #-} + go !_ !wrd !st !fs = do + res <- step (adaptState gst) st + case res of + Yield x s -> do + let jump c = SplitOnSeqWordInit 0 0 c s + wrd1 = addToWord wrd x + old = (wordMask .&. wrd) + `shiftR` (elemBits * (patLen - 1)) + r <- fstep fs (toEnum $ fromIntegral old) + case r of + FL.Partial fs1 -> do + if wrd1 .&. wordMask == wordPat + then final fs1 >>= yieldReinit jump + else go SPEC wrd1 s fs1 + FL.Done b -> yieldReinit jump b + Skip s -> go SPEC wrd s fs + Stop -> skip $ SplitOnSeqWordDone patLen fs wrd + + ------------------------------- + -- General Pattern - Karp Rabin + ------------------------------- + + -- XXX Document this pattern for writing efficient code. Loop around only + -- required elements in the recursive loop, build the structures being + -- manipulated locally e.g. we are passing only mba, here and build an + -- array using patLen and arrStart from the surrounding context. + + stepOuter gst (SplitOnSeqKRInit0 offset fs st mba) = do + res <- step (adaptState gst) st + case res of + Yield x s -> do + liftIO $ pokeAt offset mba x + skip $ SplitOnSeqKRInit (offset + SIZE_OF(a)) fs s mba + Skip s -> skip $ SplitOnSeqKRInit0 offset fs s mba + Stop -> final fs >> pure Stop + + stepOuter gst (SplitOnSeqKRInit offset fs st mba) = do + res <- step (adaptState gst) st + case res of + Yield x s -> do + liftIO $ pokeAt offset mba x + if offset == maxOffset + then do + let arr :: Array a = Array + { arrContents = mba + , arrStart = 0 + , arrEnd = patBytes + } + let ringHash = A.foldl' addCksum 0 arr + if ringHash == patHash && A.byteEq arr patArr + then skip $ SplitOnSeqKRCheck fs s mba 0 + else skip $ SplitOnSeqKRLoop fs s mba 0 ringHash + else skip $ SplitOnSeqKRInit (offset + SIZE_OF(a)) fs s mba + Skip s -> skip $ SplitOnSeqKRInit offset fs s mba + Stop -> do + let rb = RingArray + { ringContents = mba + , ringSize = offset + , ringHead = 0 + } + skip $ SplitOnSeqKRDone offset fs rb + + -- XXX The recursive "go" is more efficient than the state based recursion + -- code commented out below. Perhaps its more efficient because of + -- factoring out "mba" outside the loop. + -- + stepOuter gst (SplitOnSeqKRLoop fs0 st0 mba rh0 cksum0) = + go SPEC fs0 st0 rh0 cksum0 + + where + + go !_ !fs !st !rh !cksum = do + res <- step (adaptState gst) st + let rb = RingArray + { ringContents = mba + , ringSize = patBytes + , ringHead = rh + } + case res of + Yield x s -> do + (rb1, old) <- liftIO (RB.replace rb x) + r <- fstep fs old + case r of + FL.Partial fs1 -> do + let cksum1 = deltaCksum cksum old x + let rh1 = ringHead rb1 + if cksum1 == patHash + then skip $ SplitOnSeqKRCheck fs1 s mba rh1 + else go SPEC fs1 s rh1 cksum1 + FL.Done b -> do + -- XXX the old code looks wrong as we are resetting + -- the ring head but the ring still has old + -- elements as we are not resetting the size. + let jump c = SplitOnSeqKRInit 0 c s mba + yieldReinit jump b + Skip s -> go SPEC fs s rh cksum + Stop -> skip $ SplitOnSeqKRDone patBytes fs rb + + -- XXX The following code is 5 times slower compared to the recursive loop + -- based code above. Need to investigate why. One possibility is that the + -- go loop above does not thread around the ring buffer (rb). This code may + -- be causing the state to bloat and getting allocated on each iteration. + -- We can check the cmm/asm code to confirm. If so a good GHC solution to + -- such problem is needed. One way to avoid this could be to use unboxed + -- mutable state? + {- + stepOuter gst (SplitOnSeqKRLoop fs st rb rh cksum) = do + res <- step (adaptState gst) st + case res of + Yield x s -> do + old <- liftIO $ peek rh + let cksum1 = deltaCksum cksum old x + fs1 <- fstep fs old + if (cksum1 == patHash) + then do + r <- done fs1 + skip $ SplitOnSeqYield r $ SplitOnSeqKRInit 0 s rb rh + else do + rh1 <- liftIO (RB.unsafeInsert rb rh x) + skip $ SplitOnSeqKRLoop fs1 s rb rh1 cksum1 + Skip s -> skip $ SplitOnSeqKRLoop fs s rb rh cksum + Stop -> skip $ SplitOnSeqKRDone patLen fs rb rh + -} + + stepOuter _ (SplitOnSeqKRCheck fs st mba rh) = do + let rb = RingArray + { ringContents = mba + , ringSize = patBytes + , ringHead = rh + } + res <- liftIO $ RB.eqArray rb patArr + if res + then do + r <- final fs + let jump c = SplitOnSeqKRInit 0 c st mba + yieldReinit jump r + else skip $ SplitOnSeqKRLoop fs st mba rh patHash + + stepOuter _ (SplitOnSeqKRDone 0 fs _) = do + r <- final fs + skip $ SplitOnSeqYield r SplitOnSeqDone + stepOuter _ (SplitOnSeqKRDone len fs rb) = do + assert (len >= 0) (return ()) + old <- RB.unsafeGetHead rb + let rb1 = RB.moveForward rb + r <- fstep fs old + case r of + FL.Partial fs1 -> skip $ SplitOnSeqKRDone (len - SIZE_OF(a)) fs1 rb1 + FL.Done b -> do + let jump c = SplitOnSeqKRDone (len - SIZE_OF(a)) c rb1 + yieldReinit jump b + +RENAME(splitOnSeq,splitSepBySeq_) + +{-# ANN type SplitOnSuffixSeqState Fuse #-} +data SplitOnSuffixSeqState mba rb rh ck w fs s b x = + SplitOnSuffixSeqInit + | SplitOnSuffixSeqYield b (SplitOnSuffixSeqState mba rb rh ck w fs s b x) + | SplitOnSuffixSeqDone + + | SplitOnSuffixSeqEmpty !fs s + + | SplitOnSuffixSeqSingleInit !fs s x + | SplitOnSuffixSeqSingle !fs s x + + | SplitOnSuffixSeqWordInit !fs s + | SplitOnSuffixSeqWordLoop !w s !fs + | SplitOnSuffixSeqWordDone Int !fs !w + + | SplitOnSuffixSeqKRInit !fs s mba + | SplitOnSuffixSeqKRInit1 !fs s mba + | SplitOnSuffixSeqKRLoop fs s mba !rh !ck + | SplitOnSuffixSeqKRCheck fs s mba !rh + | SplitOnSuffixSeqKRDone Int !fs rb + + | SplitOnSuffixSeqReinit + (fs -> SplitOnSuffixSeqState mba rb rh ck w fs s b x) + +-- | @splitOnSuffixSeq withSep pat fld input@ splits the input using @pat@ as a +-- suffixed separator, the resulting split segments are fed to the fold @fld@. +-- If @withSep@ is True then the separator sequence is also suffixed with the +-- split segments. +-- +-- /Internal/ +{-# INLINE_NORMAL splitOnSuffixSeq #-} +splitOnSuffixSeq + :: forall m a b. (MonadIO m, Unbox a, Enum a, Eq a) + => Bool + -> Array a + -> Fold m a b + -> Stream m a + -> Stream m b +splitOnSuffixSeq withSep patArr (Fold fstep initial _ final) (Stream step state) = + Stream stepOuter SplitOnSuffixSeqInit + + where + + patLen = A.length patArr + patBytes = A.byteLength patArr + maxIndex = patLen - 1 + maxOffset = patBytes - SIZE_OF(a) + elemBits = SIZE_OF(a) * 8 + + -- For word pattern case + wordMask :: Word + wordMask = (1 `shiftL` (elemBits * patLen)) - 1 + + elemMask :: Word + elemMask = (1 `shiftL` elemBits) - 1 + + wordPat :: Word + wordPat = wordMask .&. A.foldl' addToWord 0 patArr + + addToWord wd a = (wd `shiftL` elemBits) .|. fromIntegral (fromEnum a) + + nextAfterInit nextGen stepRes = + case stepRes of + FL.Partial s -> nextGen s + FL.Done b -> + SplitOnSuffixSeqYield b (SplitOnSuffixSeqReinit nextGen) + + {-# INLINE yieldReinit #-} + yieldReinit nextGen fs = + initial >>= skip . SplitOnSuffixSeqYield fs . nextAfterInit nextGen + + -- For single element pattern case + {-# INLINE processYieldSingle #-} + processYieldSingle pat x s fs = do + let jump c = SplitOnSuffixSeqSingleInit c s pat + if pat == x + then do + r <- if withSep then fstep fs x else return $ FL.Partial fs + b1 <- + case r of + FL.Partial fs1 -> final fs1 + FL.Done b -> return b + yieldReinit jump b1 + else do + r <- fstep fs x + case r of + FL.Partial fs1 -> skip $ SplitOnSuffixSeqSingle fs1 s pat + FL.Done b -> yieldReinit jump b + + -- For Rabin-Karp search + k = 2891336453 :: Word32 + coeff = k ^ patLen + + addCksum cksum a = cksum * k + fromIntegral (fromEnum a) + + deltaCksum cksum old new = + addCksum cksum new - coeff * fromIntegral (fromEnum old) + + -- XXX shall we use a random starting hash or 1 instead of 0? + patHash = A.foldl' addCksum 0 patArr + + skip = return . Skip + + {-# INLINE_LATE stepOuter #-} + stepOuter _ SplitOnSuffixSeqInit = do + res <- initial + case res of + FL.Partial fs + | patLen == 0 -> + skip $ SplitOnSuffixSeqEmpty fs state + | patLen == 1 -> do + pat <- liftIO $ A.unsafeGetIndexIO 0 patArr + skip $ SplitOnSuffixSeqSingleInit fs state pat + | SIZE_OF(a) * patLen <= sizeOf (Proxy :: Proxy Word) -> + skip $ SplitOnSuffixSeqWordInit fs state + | otherwise -> do + (MutArray mba _ _ _) :: MutArray a <- + liftIO $ MutArray.emptyOf patLen + skip $ SplitOnSuffixSeqKRInit fs state mba + FL.Done fb -> skip $ SplitOnSuffixSeqYield fb SplitOnSuffixSeqInit + + stepOuter _ (SplitOnSuffixSeqYield x next) = return $ Yield x next + + --------------------------- + -- Reinit + --------------------------- + + stepOuter _ (SplitOnSuffixSeqReinit nextGen) = + initial >>= skip . nextAfterInit nextGen + + --------------------------- + -- Empty pattern + --------------------------- + + stepOuter gst (SplitOnSuffixSeqEmpty acc st) = do + res <- step (adaptState gst) st + case res of + Yield x s -> do + let jump c = SplitOnSuffixSeqEmpty c s + r <- fstep acc x + b1 <- + case r of + FL.Partial fs -> final fs + FL.Done b -> return b + yieldReinit jump b1 + Skip s -> skip (SplitOnSuffixSeqEmpty acc s) + Stop -> final acc >> return Stop + + ----------------- + -- Done + ----------------- + + stepOuter _ SplitOnSuffixSeqDone = return Stop + + ----------------- + -- Single Pattern + ----------------- + + stepOuter gst (SplitOnSuffixSeqSingleInit fs st pat) = do + res <- step (adaptState gst) st + case res of + Yield x s -> processYieldSingle pat x s fs + Skip s -> skip $ SplitOnSuffixSeqSingleInit fs s pat + Stop -> final fs >> return Stop + + stepOuter gst (SplitOnSuffixSeqSingle fs st pat) = do + res <- step (adaptState gst) st + case res of + Yield x s -> processYieldSingle pat x s fs + Skip s -> skip $ SplitOnSuffixSeqSingle fs s pat + Stop -> do + r <- final fs + skip $ SplitOnSuffixSeqYield r SplitOnSuffixSeqDone + + --------------------------- + -- Short Pattern - Shift Or + --------------------------- + + stepOuter _ (SplitOnSuffixSeqWordDone 0 fs _) = do + r <- final fs + skip $ SplitOnSuffixSeqYield r SplitOnSuffixSeqDone + stepOuter _ (SplitOnSuffixSeqWordDone n fs wrd) = do + let old = elemMask .&. (wrd `shiftR` (elemBits * (n - 1))) + r <- fstep fs (toEnum $ fromIntegral old) + case r of + FL.Partial fs1 -> skip $ SplitOnSuffixSeqWordDone (n - 1) fs1 wrd + FL.Done b -> do + let jump c = SplitOnSuffixSeqWordDone (n - 1) c wrd + yieldReinit jump b + + stepOuter gst (SplitOnSuffixSeqWordInit fs0 st0) = do + res <- step (adaptState gst) st0 + case res of + Yield x s -> do + let wrd = addToWord 0 x + r <- if withSep then fstep fs0 x else return $ FL.Partial fs0 + case r of + FL.Partial fs1 -> go SPEC 1 wrd s fs1 + FL.Done b -> do + let jump c = SplitOnSuffixSeqWordInit c s + yieldReinit jump b + Skip s -> skip (SplitOnSuffixSeqWordInit fs0 s) + Stop -> final fs0 >> return Stop + + where + + {-# INLINE go #-} + go !_ !idx !wrd !st !fs = do + res <- step (adaptState gst) st + case res of + Yield x s -> do + let jump c = SplitOnSuffixSeqWordInit c s + let wrd1 = addToWord wrd x + r <- if withSep then fstep fs x else return $ FL.Partial fs + case r of + FL.Partial fs1 + | idx /= maxIndex -> + go SPEC (idx + 1) wrd1 s fs1 + | wrd1 .&. wordMask /= wordPat -> + skip $ SplitOnSuffixSeqWordLoop wrd1 s fs1 + | otherwise -> + final fs1 >>= yieldReinit jump + FL.Done b -> yieldReinit jump b + Skip s -> go SPEC idx wrd s fs + Stop -> + if withSep + then do + r <- final fs + skip $ SplitOnSuffixSeqYield r SplitOnSuffixSeqDone + else skip $ SplitOnSuffixSeqWordDone idx fs wrd + + stepOuter gst (SplitOnSuffixSeqWordLoop wrd0 st0 fs0) = + go SPEC wrd0 st0 fs0 + + where + + {-# INLINE go #-} + go !_ !wrd !st !fs = do + res <- step (adaptState gst) st + case res of + Yield x s -> do + let jump c = SplitOnSuffixSeqWordInit c s + wrd1 = addToWord wrd x + old = (wordMask .&. wrd) + `shiftR` (elemBits * (patLen - 1)) + r <- + if withSep + then fstep fs x + else fstep fs (toEnum $ fromIntegral old) + case r of + FL.Partial fs1 -> + if wrd1 .&. wordMask == wordPat + then final fs1 >>= yieldReinit jump + else go SPEC wrd1 s fs1 + FL.Done b -> yieldReinit jump b + Skip s -> go SPEC wrd s fs + Stop -> + if withSep + then do + r <- final fs + skip $ SplitOnSuffixSeqYield r SplitOnSuffixSeqDone + else skip $ SplitOnSuffixSeqWordDone patLen fs wrd + + ------------------------------- + -- General Pattern - Karp Rabin + ------------------------------- + + stepOuter gst (SplitOnSuffixSeqKRInit fs st0 mba) = do + res <- step (adaptState gst) st0 + case res of + Yield x s -> do + liftIO $ pokeAt 0 mba x + r <- if withSep then fstep fs x else return $ FL.Partial fs + case r of + FL.Partial fs1 -> + skip $ SplitOnSuffixSeqKRInit1 fs1 s mba + FL.Done b -> do + let jump c = SplitOnSuffixSeqKRInit c s mba + yieldReinit jump b + Skip s -> skip $ SplitOnSuffixSeqKRInit fs s mba + Stop -> final fs >> return Stop + + stepOuter gst (SplitOnSuffixSeqKRInit1 fs0 st0 mba) = do + go SPEC (SIZE_OF(a)) st0 fs0 + + where + + go !_ !offset st !fs = do + res <- step (adaptState gst) st + let arr :: Array a = Array + { arrContents = mba + , arrStart = 0 + , arrEnd = patBytes + } + case res of + Yield x s -> do + liftIO $ pokeAt offset mba x + r <- if withSep then fstep fs x else return $ FL.Partial fs + let ringHash = A.foldl' addCksum 0 arr + case r of + FL.Partial fs1 + | offset /= maxOffset -> + go SPEC (offset + SIZE_OF(a)) s fs1 + | ringHash == patHash -> + skip $ SplitOnSuffixSeqKRCheck fs1 s mba 0 + | otherwise -> + skip $ SplitOnSuffixSeqKRLoop + fs1 s mba 0 ringHash + FL.Done b -> do + let jump c = SplitOnSuffixSeqKRInit c s mba + yieldReinit jump b + Skip s -> go SPEC offset s fs + Stop -> do + -- do not issue a blank segment when we end at pattern + if offset == maxOffset && A.byteEq arr patArr + then final fs >> return Stop + else if withSep + then do + r <- final fs + skip $ SplitOnSuffixSeqYield r SplitOnSuffixSeqDone + else do + let rb = RingArray + { ringContents = mba + , ringSize = offset + , ringHead = 0 + } + in skip $ SplitOnSuffixSeqKRDone offset fs rb + + stepOuter gst (SplitOnSuffixSeqKRLoop fs0 st0 mba rh0 cksum0) = + go SPEC fs0 st0 rh0 cksum0 + + where + + go !_ !fs !st !rh !cksum = do + res <- step (adaptState gst) st + let rb = RingArray + { ringContents = mba + , ringSize = patBytes + , ringHead = rh + } + case res of + Yield x s -> do + (rb1, old) <- liftIO (RB.replace rb x) + let cksum1 = deltaCksum cksum old x + let rh1 = ringHead rb1 + r <- if withSep then fstep fs x else fstep fs old + case r of + FL.Partial fs1 -> + if cksum1 /= patHash + then go SPEC fs1 s rh1 cksum1 + else skip $ SplitOnSuffixSeqKRCheck fs1 s mba rh1 + FL.Done b -> do + let jump c = SplitOnSuffixSeqKRInit c s mba + yieldReinit jump b + Skip s -> go SPEC fs s rh cksum + Stop -> do + if withSep + then do + r <- final fs + skip $ SplitOnSuffixSeqYield r SplitOnSuffixSeqDone + else skip $ SplitOnSuffixSeqKRDone patBytes fs rb + + stepOuter _ (SplitOnSuffixSeqKRCheck fs st mba rh) = do + let rb = RingArray + { ringContents = mba + , ringSize = patBytes + , ringHead = rh + } + matches <- liftIO $ RB.eqArray rb patArr + if matches + then do + r <- final fs + let jump c = SplitOnSuffixSeqKRInit c st mba + yieldReinit jump r + else skip $ SplitOnSuffixSeqKRLoop fs st mba rh patHash + + stepOuter _ (SplitOnSuffixSeqKRDone 0 fs _) = do + r <- final fs + skip $ SplitOnSuffixSeqYield r SplitOnSuffixSeqDone + stepOuter _ (SplitOnSuffixSeqKRDone len fs rb) = do + assert (len >= 0) (return ()) + old <- RB.unsafeGetHead rb + let rb1 = RB.moveForward rb + r <- fstep fs old + case r of + FL.Partial fs1 -> + skip $ SplitOnSuffixSeqKRDone (len - SIZE_OF(a)) fs1 rb1 + FL.Done b -> do + let jump c = SplitOnSuffixSeqKRDone (len - SIZE_OF(a)) c rb1 + yieldReinit jump b + +-- | Parses a sequence of tokens suffixed by a separator e.g. @a;b;c;@ is +-- parsed as @a;@, @b;@, @c;@. If the pattern is empty the input stream is +-- returned as it is. +-- +-- Equivalent to the following: +-- +-- >>> splitEndBySeq pat f = Stream.foldMany (Fold.takeEndBySeq pat f) +-- +-- Usage: +-- +-- >>> f p = Stream.splitEndBySeq (Array.fromList p) Fold.toList +-- >>> splitEndBy p xs = Stream.fold Fold.toList $ f p (Stream.fromList xs) +-- +-- >>> splitEndBy "" "" +-- [] +-- +-- >>> splitEndBy "" "a...b" +-- ["a",".",".",".","b"] +-- +-- >>> splitEndBy ".." "" +-- [] +-- +-- +-- >>> splitEndBy ".." "a...b" +-- ["a..",".b"] +-- +-- +-- >>> splitEndBy ".." "abc" +-- ["abc"] +-- +-- >>> splitEndBy ".." ".." +-- [".."] +-- +-- >>> splitEndBy "." ".a" +-- [".","a"] +-- +-- >>> splitEndBy "." "a." +-- ["a."] +-- +-- Uses Rabin-Karp algorithm for substring search. +-- +{-# INLINE_NORMAL splitEndBySeq #-} +splitEndBySeq + :: forall m a b. (MonadIO m, Unbox a, Enum a, Eq a) + => Array a + -> Fold m a b + -> Stream m a + -> Stream m b +splitEndBySeq = splitOnSuffixSeq True + +-- | Like 'splitEndBySeq' but drops the separators and returns only the tokens. +-- +-- Equivalent to the following: +-- +-- >>> splitEndBySeq_ pat f = Stream.foldMany (Fold.takeEndBySeq_ pat f) +-- +-- Usage: +-- +-- >>> f p = Stream.splitEndBySeq_ (Array.fromList p) Fold.toList +-- >>> splitEndBy_ p xs = Stream.fold Fold.toList $ f p (Stream.fromList xs) +-- +-- >>> splitEndBy_ "" "" +-- [] +-- +-- >>> splitEndBy_ "" "a...b" +-- ["a",".",".",".","b"] +-- +-- >>> splitEndBy_ ".." "" +-- [] +-- +-- >>> splitEndBy_ ".." "a...b" +-- ["a",".b"] +-- +-- >>> splitEndBy_ ".." "abc" +-- ["abc"] +-- +-- >>> splitEndBy_ ".." ".." +-- [""] +-- +-- >>> splitEndBy_ "." ".a" +-- ["","a"] +-- +-- >>> splitEndBy_ "." "a." +-- ["a"] +-- +-- Uses Rabin-Karp algorithm for substring search. +-- +{-# INLINE_NORMAL splitEndBySeq_ #-} +splitEndBySeq_ + :: forall m a b. (MonadIO m, Unbox a, Enum a, Eq a) + => Array a + -> Fold m a b + -> Stream m a + -> Stream m b +splitEndBySeq_ = splitOnSuffixSeq False + +-- Implement this as a fold or a parser instead. +-- This can be implemented easily using Rabin Karp + +-- | Split post any one of the given patterns. +-- +-- /Unimplemented/ +{-# INLINE splitEndBySeqOneOf #-} +splitEndBySeqOneOf :: -- (Monad m, Unboxed a, Integral a) => + [Array a] -> Fold m a b -> Stream m a -> Stream m b +splitEndBySeqOneOf _subseq _f _m = undefined + +-- | Split on a prefixed separator element, dropping the separator. The +-- supplied 'Fold' is applied on the split segments. +-- +-- @ +-- > splitOnPrefix' p xs = Stream.toList $ Stream.splitOnPrefix p (Fold.toList) (Stream.fromList xs) +-- > splitOnPrefix' (== '.') ".a.b" +-- ["a","b"] +-- @ +-- +-- An empty stream results in an empty output stream: +-- @ +-- > splitOnPrefix' (== '.') "" +-- [] +-- @ +-- +-- An empty segment consisting of only a prefix is folded to the default output +-- of the fold: +-- +-- @ +-- > splitOnPrefix' (== '.') "." +-- [""] +-- +-- > splitOnPrefix' (== '.') ".a.b." +-- ["a","b",""] +-- +-- > splitOnPrefix' (== '.') ".a..b" +-- ["a","","b"] +-- +-- @ +-- +-- A prefix is optional at the beginning of the stream: +-- +-- @ +-- > splitOnPrefix' (== '.') "a" +-- ["a"] +-- +-- > splitOnPrefix' (== '.') "a.b" +-- ["a","b"] +-- @ +-- +-- 'splitOnPrefix' is an inverse of 'intercalatePrefix' with a single element: +-- +-- > Stream.intercalatePrefix (Stream.fromPure '.') Unfold.fromList . Stream.splitOnPrefix (== '.') Fold.toList === id +-- +-- Assuming the input stream does not contain the separator: +-- +-- > Stream.splitOnPrefix (== '.') Fold.toList . Stream.intercalatePrefix (Stream.fromPure '.') Unfold.fromList === id +-- +-- /Unimplemented/ +{-# INLINE splitBeginBy_ #-} +splitBeginBy_ :: -- (MonadCatch m) => + (a -> Bool) -> Fold m a b -> Stream m a -> Stream m b +splitBeginBy_ _predicate _f = undefined + -- parseMany (Parser.sliceBeginBy predicate f) + +-- Int list examples for splitOn: +-- +-- >>> splitList [] [1,2,3,3,4] +-- > [[1],[2],[3],[3],[4]] +-- +-- >>> splitList [5] [1,2,3,3,4] +-- > [[1,2,3,3,4]] +-- +-- >>> splitList [1] [1,2,3,3,4] +-- > [[],[2,3,3,4]] +-- +-- >>> splitList [4] [1,2,3,3,4] +-- > [[1,2,3,3],[]] +-- +-- >>> splitList [2] [1,2,3,3,4] +-- > [[1],[3,3,4]] +-- +-- >>> splitList [3] [1,2,3,3,4] +-- > [[1,2],[],[4]] +-- +-- >>> splitList [3,3] [1,2,3,3,4] +-- > [[1,2],[4]] +-- +-- >>> splitList [1,2,3,3,4] [1,2,3,3,4] +-- > [[],[]] + +-- This can be implemented easily using Rabin Karp +-- | Split on any one of the given patterns. +-- +-- /Unimplemented/ +-- +{-# INLINE splitSepBySeqOneOf #-} +splitSepBySeqOneOf :: -- (Monad m, Unboxed a, Integral a) => + [Array a] -> Fold m a b -> Stream m a -> Stream m b +splitSepBySeqOneOf _subseq _f _m = + undefined -- D.fromStreamD $ D.splitOnAny f subseq (D.toStreamD m) + +------------------------------------------------------------------------------ +-- Nested Container Transformation +------------------------------------------------------------------------------ + +{-# ANN type SplitState Fuse #-} +data SplitState s arr + = SplitInitial s + | SplitBuffering s arr + | SplitSplitting s arr + | SplitYielding arr (SplitState s arr) + | SplitFinishing + +-- XXX An alternative approach would be to use a partial fold (Fold m a b) to +-- split using a splitBy like combinator. The Fold would consume upto the +-- separator and return any leftover which can then be fed to the next fold. +-- +-- We can revisit this once we have partial folds/parsers. +-- +-- | Performs infix separator style splitting. +{-# INLINE_NORMAL splitInnerBy #-} +splitInnerBy + :: Monad m + => (f a -> m (f a, Maybe (f a))) -- splitter + -> (f a -> f a -> m (f a)) -- joiner + -> Stream m (f a) + -> Stream m (f a) +splitInnerBy splitter joiner (Stream step1 state1) = + Stream step (SplitInitial state1) + + where + + {-# INLINE_LATE step #-} + step gst (SplitInitial st) = do + r <- step1 gst st + case r of + Yield x s -> do + (x1, mx2) <- splitter x + return $ case mx2 of + Nothing -> Skip (SplitBuffering s x1) + Just x2 -> Skip (SplitYielding x1 (SplitSplitting s x2)) + Skip s -> return $ Skip (SplitInitial s) + Stop -> return Stop + + step gst (SplitBuffering st buf) = do + r <- step1 gst st + case r of + Yield x s -> do + (x1, mx2) <- splitter x + buf' <- joiner buf x1 + return $ case mx2 of + Nothing -> Skip (SplitBuffering s buf') + Just x2 -> Skip (SplitYielding buf' (SplitSplitting s x2)) + Skip s -> return $ Skip (SplitBuffering s buf) + Stop -> return $ Skip (SplitYielding buf SplitFinishing) + + step _ (SplitSplitting st buf) = do + (x1, mx2) <- splitter buf + return $ case mx2 of + Nothing -> Skip $ SplitBuffering st x1 + Just x2 -> Skip $ SplitYielding x1 (SplitSplitting st x2) + + step _ (SplitYielding x next) = return $ Yield x next + step _ SplitFinishing = return Stop + +-- | Performs infix separator style splitting. +{-# INLINE_NORMAL splitInnerBySuffix #-} +splitInnerBySuffix + :: Monad m + => (f a -> Bool) -- isEmpty? + -> (f a -> m (f a, Maybe (f a))) -- splitter + -> (f a -> f a -> m (f a)) -- joiner + -> Stream m (f a) + -> Stream m (f a) +splitInnerBySuffix isEmpty splitter joiner (Stream step1 state1) = + Stream step (SplitInitial state1) + + where + + {-# INLINE_LATE step #-} + step gst (SplitInitial st) = do + r <- step1 gst st + case r of + Yield x s -> do + (x1, mx2) <- splitter x + return $ case mx2 of + Nothing -> Skip (SplitBuffering s x1) + Just x2 -> Skip (SplitYielding x1 (SplitSplitting s x2)) + Skip s -> return $ Skip (SplitInitial s) + Stop -> return Stop + + step gst (SplitBuffering st buf) = do + r <- step1 gst st + case r of + Yield x s -> do + (x1, mx2) <- splitter x + buf' <- joiner buf x1 + return $ case mx2 of + Nothing -> Skip (SplitBuffering s buf') + Just x2 -> Skip (SplitYielding buf' (SplitSplitting s x2)) + Skip s -> return $ Skip (SplitBuffering s buf) + Stop -> + return $ + if isEmpty buf + then Stop + else Skip (SplitYielding buf SplitFinishing) + + step _ (SplitSplitting st buf) = do + (x1, mx2) <- splitter buf + return $ case mx2 of + Nothing -> Skip $ SplitBuffering st x1 + Just x2 -> Skip $ SplitYielding x1 (SplitSplitting st x2) + + step _ (SplitYielding x next) = return $ Yield x next + step _ SplitFinishing = return Stop + +------------------------------------------------------------------------------ +-- Trimming +------------------------------------------------------------------------------ + +-- | Drop prefix from the input stream if present. +-- +-- Space: @O(1)@ +-- +-- See also stripPrefix. +-- +-- /Unimplemented/ +{-# INLINE dropPrefix #-} +dropPrefix :: + -- (Monad m, Eq a) => + Stream m a -> Stream m a -> Stream m a +dropPrefix = error "Not implemented yet!" + +-- | Drop all matching infix from the input stream if present. Infix stream +-- may be consumed multiple times. +-- +-- Space: @O(n)@ where n is the length of the infix. +-- +-- See also stripInfix. +-- +-- /Unimplemented/ +{-# INLINE dropInfix #-} +dropInfix :: + -- (Monad m, Eq a) => + Stream m a -> Stream m a -> Stream m a +dropInfix = error "Not implemented yet!" + +-- | Drop suffix from the input stream if present. Suffix stream may be +-- consumed multiple times. +-- +-- Space: @O(n)@ where n is the length of the suffix. +-- +-- See also stripSuffix. +-- +-- /Unimplemented/ +{-# INLINE dropSuffix #-} +dropSuffix :: + -- (Monad m, Eq a) => + Stream m a -> Stream m a -> Stream m a +dropSuffix = error "Not implemented yet!" diff --git a/core/src/Streamly/Internal/Unicode/Decode.hs b/core/src/Streamly/Internal/Unicode/Decode.hs new file mode 100644 index 0000000000..b9a0ea856b --- /dev/null +++ b/core/src/Streamly/Internal/Unicode/Decode.hs @@ -0,0 +1,979 @@ +{-# LANGUAGE CPP #-} +-- | +-- Module : Streamly.Internal.Unicode.Decode +-- Copyright : (c) 2018 Composewell Technologies +-- (c) Bjoern Hoehrmann 2008-2009 +-- +-- License : BSD-3-Clause +-- Maintainer : streamly@composewell.com +-- Stability : experimental +-- Portability : GHC +-- + +module Streamly.Internal.Unicode.Decode + ( + -- * Setup + -- | To execute the code examples provided in this module in ghci, please + -- run the following commands first. + -- + -- $setup + + -- XXX Use to/from instead of encode/decode for more compact naming. + + -- * Construction (Decoding) + decodeLatin1 + + -- ** UTF-8 Byte Stream Decoding + , CodingFailureMode(..) + , writeCharUtf8' + , parseCharUtf8With + , decodeUtf8 + , decodeUtf8' + , decodeUtf8_ + + -- ** UTF-16 Byte Stream Decoding + , decodeUtf16le' + , decodeUtf16le + + -- ** Resumable UTF-8 Byte Stream Decoding + , DecodeError(..) + , DecodeState + , CodePoint + , decodeUtf8Either + , resumeDecodeUtf8Either + + -- ** UTF-8 Array Stream Decoding + , decodeUtf8Chunks + , decodeUtf8Chunks' + , decodeUtf8Chunks_ + -- , fromUtf8ChunksEndByLn + + -- * StreamD UTF8 Encoding / Decoding transformations. + , decodeUtf8D + , decodeUtf8D' + , decodeUtf8D_ + , decodeUtf8EitherD + , resumeDecodeUtf8EitherD + + -- * Decoding String Literals + , fromStr# + + -- * Word16 Utilities + -- , mkEvenW8Chunks + , swapByteOrder + + -- * Deprecations + , decodeUtf8Lax + ) +where + + +#include "inline.hs" + +-- MachDeps.h includes ghcautoconf.h that defines WORDS_BIGENDIAN for big endian +-- systems. +#include "MachDeps.h" + +import Control.Monad (void) +import Control.Monad.IO.Class (MonadIO, liftIO) +import Data.Bits (shiftR, shiftL, (.|.), (.&.)) +import Data.Word (Word8, Word16) +import Foreign.Marshal.Alloc (mallocBytes) +import Foreign.Storable (Storable(..)) +#ifndef __GHCJS__ +import Fusion.Plugin.Types (Fuse(..)) +#endif +import GHC.Base (assert, unsafeChr) +import GHC.Exts (Addr#) +import GHC.Ptr (Ptr (..), plusPtr) +import System.IO.Unsafe (unsafePerformIO) +import Streamly.Internal.Data.Array.Type (Array(..)) +import Streamly.Internal.Data.MutByteArray.Type (MutByteArray) +import Streamly.Internal.Data.Parser (Parser) +import Streamly.Internal.Data.Stream (Stream) +import Streamly.Internal.Data.Stream (Step (..)) +import Streamly.Internal.Data.SVar.Type (adaptState) +import Streamly.Internal.Data.Tuple.Strict (Tuple'(..)) +import Streamly.Internal.Data.Unbox (Unbox(peekAt)) +import Streamly.Internal.System.IO (unsafeInlineIO) + +import qualified Streamly.Data.Fold as Fold +import qualified Streamly.Internal.Data.Parser as Parser (Parser) +import qualified Streamly.Internal.Data.Parser as ParserD +import qualified Streamly.Internal.Data.Stream as Stream +import qualified Streamly.Internal.Data.Stream as D + +import Prelude hiding (lines, words, unlines, unwords) + +#include "DocTestUnicodeStream.hs" + +------------------------------------------------------------------------------- +-- Latin1 decoding +------------------------------------------------------------------------------- + +-- | Decode a stream of bytes to Unicode characters by mapping each byte to a +-- corresponding Unicode 'Char' in 0-255 range. +-- +{-# INLINE decodeLatin1 #-} +decodeLatin1 :: Monad m => Stream m Word8 -> Stream m Char +decodeLatin1 = fmap (unsafeChr . fromIntegral) + +------------------------------------------------------------------------------- +-- UTF-8 decoding +------------------------------------------------------------------------------- + +-- Int helps in cheaper conversion from Int to Char +type CodePoint = Int +type DecodeState = Word8 + +-- We can divide the errors in three general categories: +-- * A non-starter was encountered in a begin state +-- * A starter was encountered without completing a codepoint +-- * The last codepoint was not complete (input underflow) +-- +-- Need to separate resumable and non-resumable error. In case of non-resumable +-- error we can also provide the failing byte. In case of resumable error the +-- state can be opaque. +-- +data DecodeError = DecodeError !DecodeState !CodePoint deriving Show + +-- See http://bjoern.hoehrmann.de/utf-8/decoder/dfa/ for details. + +-- XXX Use names decodeSuccess = 0, decodeFailure = 12 + +decodeTable :: [Word8] +decodeTable = [ + -- The first part of the table maps bytes to character classes that + -- to reduce the size of the transition table and create bitmasks. + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9, + 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, + 8,8,2,2,2,2,2,2,2,2,2,2,2,2,2,2, 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, + 10,3,3,3,3,3,3,3,3,3,3,3,3,4,3,3, 11,6,6,6,5,8,8,8,8,8,8,8,8,8,8,8, + + -- The second part is a transition table that maps a combination + -- of a state of the automaton and a character class to a state. + 0,12,24,36,60,96,84,12,12,12,48,72, 12,12,12,12,12,12,12,12,12,12,12,12, + 12, 0,12,12,12,12,12, 0,12, 0,12,12, 12,24,12,12,12,12,12,24,12,24,12,12, + 12,12,12,12,12,12,12,24,12,12,12,12, 12,24,12,12,12,12,12,12,12,24,12,12, + 12,12,12,12,12,12,12,36,12,36,12,12, 12,36,12,12,12,12,12,36,12,36,12,12, + 12,36,12,12,12,12,12,12,12,12,12,12 + ] + +{-# INLINE utf8dLength #-} +utf8dLength :: Int +utf8dLength = length decodeTable + +-- | We do not want to garbage collect this and free the memory, we want to +-- keep this persistent. We don't know how to do that with GHC without having a +-- reference in some global structure. So we use a hack, use mallocBytes so +-- that the GC has no way to free it. +{-# NOINLINE utf8d #-} +utf8d :: Ptr Word8 +utf8d = unsafePerformIO $ do + let size = utf8dLength + p <- liftIO $ mallocBytes size + void $ D.fold + (Fold.foldlM' (\b a -> poke b a >> return (b `plusPtr` 1)) (return p)) + (D.fromList decodeTable) + return p + +-- | Return element at the specified index without checking the bounds. +-- and without touching the foreign ptr. +{-# INLINE_NORMAL unsafePeekElemOff #-} +unsafePeekElemOff :: forall a. Storable a => Ptr a -> Int -> a +unsafePeekElemOff p i = + let !x = unsafeInlineIO $ peekElemOff p i + in x + +-- XXX We can use a fromPtr stream to implement it. +{-# INLINE showMemory #-} +showMemory :: + forall a. (Show a, Storable a) => Ptr a -> Ptr a -> String +showMemory cur end + | cur < end = + let cur1 = cur `plusPtr` sizeOf (undefined :: a) + in show (unsafeInlineIO $ peek cur) ++ " " ++ showMemory cur1 end +showMemory _ _ = "" + +-- decode is split into two separate cases to avoid branching instructions. +-- From the higher level flow we already know which case we are in so we can +-- call the appropriate decode function. +-- +-- When the state is 0 +{-# INLINE decode0 #-} +decode0 :: Ptr Word8 -> Word8 -> Tuple' DecodeState CodePoint +decode0 table byte = + let !t = table `unsafePeekElemOff` fromIntegral byte + !codep' = (0xff `shiftR` fromIntegral t) .&. fromIntegral byte + !state' = table `unsafePeekElemOff` (256 + fromIntegral t) + in assert ((byte > 0x7f || error showByte) + && (state' /= 0 || error (showByte ++ showTable))) + (Tuple' state' codep') + + where + + utf8tableEnd = table `plusPtr` 364 + showByte = "Streamly: decode0: byte: " ++ show byte + showTable = " table: " ++ showMemory table utf8tableEnd + +-- When the state is not 0 +{-# INLINE decode1 #-} +decode1 + :: Ptr Word8 + -> DecodeState + -> CodePoint + -> Word8 + -> Tuple' DecodeState CodePoint +decode1 table state codep byte = + -- Remember codep is Int type! + -- Can it be unsafe to convert the resulting Int to Char? + let !t = table `unsafePeekElemOff` fromIntegral byte + !codep' = (fromIntegral byte .&. 0x3f) .|. (codep `shiftL` 6) + !state' = table `unsafePeekElemOff` + (256 + fromIntegral state + fromIntegral t) + in assert (codep' <= 0x10FFFF + || error (showByte ++ showState state codep)) + (Tuple' state' codep') + where + + utf8tableEnd = table `plusPtr` 364 + showByte = "Streamly: decode1: byte: " ++ show byte + showState st cp = + " state: " ++ show st ++ + " codepoint: " ++ show cp ++ + " table: " ++ showMemory table utf8tableEnd + +------------------------------------------------------------------------------- +-- Resumable UTF-8 decoding +------------------------------------------------------------------------------- + +-- Strangely, GHCJS hangs linking template-haskell with this +#ifndef __GHCJS__ +{-# ANN type UTF8DecodeState Fuse #-} +#endif +data UTF8DecodeState s a + = UTF8DecodeInit s + | UTF8DecodeInit1 s Word8 + | UTF8DecodeFirst s Word8 + | UTF8Decoding s !DecodeState !CodePoint + | YieldAndContinue a (UTF8DecodeState s a) + | Done + +{- HLINT ignore "Use if" -} +{-# INLINE_NORMAL resumeDecodeUtf8EitherD #-} +resumeDecodeUtf8EitherD + :: Monad m + => DecodeState + -> CodePoint + -> D.Stream m Word8 + -> D.Stream m (Either DecodeError Char) +resumeDecodeUtf8EitherD dst codep (D.Stream step state) = + let stt = + if dst == 0 + then UTF8DecodeInit state + else UTF8Decoding state dst codep + in D.Stream (step' utf8d) stt + where + {-# INLINE_LATE step' #-} + step' _ gst (UTF8DecodeInit st) = do + r <- step (adaptState gst) st + return $ case r of + Yield x s -> Skip (UTF8DecodeInit1 s x) + Skip s -> Skip (UTF8DecodeInit s) + Stop -> Skip Done + + step' _ _ (UTF8DecodeInit1 st x) = do + -- Note: It is important to use a ">" instead of a "<=" test + -- here for GHC to generate code layout for default branch + -- prediction for the common case. This is fragile and might + -- change with the compiler versions, we need a more reliable + -- "likely" primitive to control branch predication. + case x > 0x7f of + False -> + return $ Skip $ YieldAndContinue + (Right $ unsafeChr (fromIntegral x)) + (UTF8DecodeInit st) + -- Using a separate state here generates a jump to a + -- separate code block in the core which seems to perform + -- slightly better for the non-ascii case. + True -> return $ Skip $ UTF8DecodeFirst st x + + -- XXX should we merge it with UTF8DecodeInit1? + step' table _ (UTF8DecodeFirst st x) = do + let (Tuple' sv cp) = decode0 table x + return $ + case sv of + 12 -> + Skip $ YieldAndContinue (Left $ DecodeError 0 (fromIntegral x)) + (UTF8DecodeInit st) + 0 -> error "unreachable state" + _ -> Skip (UTF8Decoding st sv cp) + + -- We recover by trying the new byte x a starter of a new codepoint. + -- XXX on error need to report the next byte "x" as well. + -- XXX need to use the same recovery in array decoding routine as well + step' table gst (UTF8Decoding st statePtr codepointPtr) = do + r <- step (adaptState gst) st + case r of + Yield x s -> do + let (Tuple' sv cp) = decode1 table statePtr codepointPtr x + return $ + case sv of + 0 -> Skip $ YieldAndContinue (Right $ unsafeChr cp) + (UTF8DecodeInit s) + 12 -> + Skip $ YieldAndContinue (Left $ DecodeError statePtr codepointPtr) + (UTF8DecodeInit1 s x) + _ -> Skip (UTF8Decoding s sv cp) + Skip s -> return $ Skip (UTF8Decoding s statePtr codepointPtr) + Stop -> return $ Skip $ YieldAndContinue (Left $ DecodeError statePtr codepointPtr) Done + + step' _ _ (YieldAndContinue c s) = return $ Yield c s + step' _ _ Done = return Stop + +-- XXX We can use just one API, and define InitState = 0 and InitCodePoint = 0 +-- to use as starting state. +-- +{-# INLINE_NORMAL decodeUtf8EitherD #-} +decodeUtf8EitherD :: Monad m + => D.Stream m Word8 -> D.Stream m (Either DecodeError Char) +decodeUtf8EitherD = resumeDecodeUtf8EitherD 0 0 + +-- | +-- +-- /Pre-release/ +{-# INLINE decodeUtf8Either #-} +decodeUtf8Either :: Monad m + => Stream m Word8 -> Stream m (Either DecodeError Char) +decodeUtf8Either = decodeUtf8EitherD + +-- | +-- +-- /Pre-release/ +{-# INLINE resumeDecodeUtf8Either #-} +resumeDecodeUtf8Either + :: Monad m + => DecodeState + -> CodePoint + -> Stream m Word8 + -> Stream m (Either DecodeError Char) +resumeDecodeUtf8Either = resumeDecodeUtf8EitherD + +------------------------------------------------------------------------------- +-- One shot decoding +------------------------------------------------------------------------------- + +data CodingFailureMode + = TransliterateCodingFailure + | ErrorOnCodingFailure + | DropOnCodingFailure + deriving (Show) + +{-# INLINE replacementChar #-} +replacementChar :: Char +replacementChar = '\xFFFD' + +data UTF8CharDecodeState a + = UTF8CharDecodeInit + | UTF8CharDecoding !DecodeState !CodePoint + +{-# INLINE parseCharUtf8WithD #-} +parseCharUtf8WithD :: + Monad m => CodingFailureMode -> ParserD.Parser Word8 m Char +parseCharUtf8WithD cfm = ParserD.Parser (step' utf8d) initial extract + + where + + prefix = "Streamly.Internal.Data.Stream.parseCharUtf8WithD:" + + {-# INLINE initial #-} + initial = return $ ParserD.IPartial UTF8CharDecodeInit + + handleError err souldBackTrack = + case cfm of + ErrorOnCodingFailure -> ParserD.SError err + TransliterateCodingFailure -> + case souldBackTrack of + True -> ParserD.SDone 0 replacementChar + False -> ParserD.SDone 1 replacementChar + DropOnCodingFailure -> + case souldBackTrack of + True -> ParserD.SContinue 0 UTF8CharDecodeInit + False -> ParserD.SContinue 1 UTF8CharDecodeInit + + {-# INLINE step' #-} + step' table UTF8CharDecodeInit x = + -- Note: It is important to use a ">" instead of a "<=" test + -- here for GHC to generate code layout for default branch + -- prediction for the common case. This is fragile and might + -- change with the compiler versions, we need a more reliable + -- "likely" primitive to control branch predication. + return $ case x > 0x7f of + False -> ParserD.SDone 1 $ unsafeChr $ fromIntegral x + True -> + let (Tuple' sv cp) = decode0 table x + in case sv of + 12 -> + let msg = prefix + ++ "Invalid first UTF8 byte" ++ show x + in handleError msg False + 0 -> error $ prefix ++ "unreachable state" + _ -> ParserD.SContinue 1 (UTF8CharDecoding sv cp) + + step' table (UTF8CharDecoding statePtr codepointPtr) x = return $ + let (Tuple' sv cp) = decode1 table statePtr codepointPtr x + in case sv of + 0 -> ParserD.SDone 1 $ unsafeChr cp + 12 -> + let msg = prefix + ++ "Invalid subsequent UTF8 byte" + ++ show x + ++ "in state" + ++ show statePtr + ++ "accumulated value" + ++ show codepointPtr + in handleError msg True + _ -> ParserD.SContinue 1 (UTF8CharDecoding sv cp) + + {-# INLINE extract #-} + extract UTF8CharDecodeInit = error $ prefix ++ "Not enough input" + extract (UTF8CharDecoding _ _) = + case cfm of + ErrorOnCodingFailure -> + return $ ParserD.FError $ prefix ++ "Not enough input" + TransliterateCodingFailure -> + return (ParserD.FDone 0 replacementChar) + -- XXX We shouldn't error out here. There is no way to represent an + -- empty parser result unless we return a "Maybe" type. + DropOnCodingFailure -> error $ prefix ++ "Not enough input" + +-- XXX This should ideally accept a "CodingFailureMode" and perform appropriate +-- error handling. This isn't possible now as "TransliterateCodingFailure"'s +-- workflow requires backtracking 1 element. This can be revisited once "Fold" +-- supports backtracking. +{-# INLINE writeCharUtf8' #-} +writeCharUtf8' :: Monad m => Parser Word8 m Char +writeCharUtf8' = parseCharUtf8WithD ErrorOnCodingFailure + +-- XXX The initial idea was to have "parseCharUtf8" and offload the error +-- handling to another parser. So, say we had "parseCharUtf8'", +-- +-- >>> parseCharUtf8Smart = parseCharUtf8' <|> Parser.fromPure replacementChar +-- +-- But unfortunately parseCharUtf8Smart used in conjunction with "parseMany" - +-- that is "parseMany parseCharUtf8Smart" on a stream causes the heap to +-- overflow. Even a heap size of 500 MB was not sufficient. +-- +-- This needs to be investigated futher. +{-# INLINE parseCharUtf8With #-} +parseCharUtf8With :: + Monad m => CodingFailureMode -> Parser.Parser Word8 m Char +parseCharUtf8With = parseCharUtf8WithD + +-- XXX write it as a parser and use parseMany to decode a stream, need to check +-- if that preserves the same performance. Or we can use a resumable parser +-- that parses a chunk at a time. +-- +-- XXX Implement this in terms of decodeUtf8Either. Need to make sure that +-- decodeUtf8Either preserves the performance characterstics. +-- +-- XXX Currently this requires @-fspec-constr-recursive=16@ for best perf but +-- it takes too much memory and compile time. Reimplement to avoid that. +{-# INLINE_NORMAL decodeUtf8WithD #-} +decodeUtf8WithD :: Monad m + => CodingFailureMode -> D.Stream m Word8 -> D.Stream m Char +decodeUtf8WithD cfm (D.Stream step state) = + D.Stream (step' utf8d) (UTF8DecodeInit state) + + where + + prefix = "Streamly.Internal.Data.Stream.decodeUtf8With: " + + {-# INLINE handleError #-} + handleError e s = + case cfm of + ErrorOnCodingFailure -> error e + TransliterateCodingFailure -> YieldAndContinue replacementChar s + DropOnCodingFailure -> s + + {-# INLINE handleUnderflow #-} + handleUnderflow = + case cfm of + ErrorOnCodingFailure -> error $ prefix ++ "Not enough input" + TransliterateCodingFailure -> YieldAndContinue replacementChar Done + DropOnCodingFailure -> Done + + {-# INLINE_LATE step' #-} + step' _ gst (UTF8DecodeInit st) = do + r <- step (adaptState gst) st + return $ case r of + Yield x s -> Skip (UTF8DecodeInit1 s x) + Skip s -> Skip (UTF8DecodeInit s) + Stop -> Skip Done + + step' _ _ (UTF8DecodeInit1 st x) = do + -- Note: It is important to use a ">" instead of a "<=" test + -- here for GHC to generate code layout for default branch + -- prediction for the common case. This is fragile and might + -- change with the compiler versions, we need a more reliable + -- "likely" primitive to control branch predication. + case x > 0x7f of + False -> + return $ Skip $ YieldAndContinue + (unsafeChr (fromIntegral x)) + (UTF8DecodeInit st) + -- Using a separate state here generates a jump to a + -- separate code block in the core which seems to perform + -- slightly better for the non-ascii case. + True -> return $ Skip $ UTF8DecodeFirst st x + + -- XXX should we merge it with UTF8DecodeInit1? + step' table _ (UTF8DecodeFirst st x) = do + let (Tuple' sv cp) = decode0 table x + return $ + case sv of + 12 -> + let msg = prefix ++ "Invalid first UTF8 byte " ++ show x + in Skip $ handleError msg (UTF8DecodeInit st) + 0 -> error "unreachable state" + _ -> Skip (UTF8Decoding st sv cp) + + -- We recover by trying the new byte x as a starter of a new codepoint. + -- XXX need to use the same recovery in array decoding routine as well + step' table gst (UTF8Decoding st statePtr codepointPtr) = do + r <- step (adaptState gst) st + case r of + Yield x s -> do + let (Tuple' sv cp) = decode1 table statePtr codepointPtr x + return $ case sv of + 0 -> Skip $ YieldAndContinue + (unsafeChr cp) (UTF8DecodeInit s) + 12 -> + let msg = prefix + ++ "Invalid subsequent UTF8 byte " + ++ show x + ++ " in state " + ++ show statePtr + ++ " accumulated value " + ++ show codepointPtr + in Skip $ handleError msg (UTF8DecodeInit1 s x) + _ -> Skip (UTF8Decoding s sv cp) + Skip s -> return $ + Skip (UTF8Decoding s statePtr codepointPtr) + Stop -> return $ Skip handleUnderflow + + step' _ _ (YieldAndContinue c s) = return $ Yield c s + step' _ _ Done = return Stop + +{-# INLINE decodeUtf8D #-} +decodeUtf8D :: Monad m => D.Stream m Word8 -> D.Stream m Char +decodeUtf8D = decodeUtf8WithD TransliterateCodingFailure + +-- | Decode a UTF-8 encoded bytestream to a stream of Unicode characters. +-- Any invalid codepoint encountered is replaced with the unicode replacement +-- character. +-- +{-# INLINE decodeUtf8 #-} +decodeUtf8 :: Monad m => Stream m Word8 -> Stream m Char +decodeUtf8 = decodeUtf8D + +{-# INLINE decodeUtf8D' #-} +decodeUtf8D' :: Monad m => D.Stream m Word8 -> D.Stream m Char +decodeUtf8D' = decodeUtf8WithD ErrorOnCodingFailure + +-- | Decode a UTF-8 encoded bytestream to a stream of Unicode characters. +-- The function throws an error if an invalid codepoint is encountered. +-- +{-# INLINE decodeUtf8' #-} +decodeUtf8' :: Monad m => Stream m Word8 -> Stream m Char +decodeUtf8' = decodeUtf8D' + +{-# INLINE decodeUtf8D_ #-} +decodeUtf8D_ :: Monad m => D.Stream m Word8 -> D.Stream m Char +decodeUtf8D_ = decodeUtf8WithD DropOnCodingFailure + +-- | Decode a UTF-8 encoded bytestream to a stream of Unicode characters. +-- Any invalid codepoint encountered is dropped. +-- +{-# INLINE decodeUtf8_ #-} +decodeUtf8_ :: Monad m => Stream m Word8 -> Stream m Char +decodeUtf8_ = decodeUtf8D_ + +-- | Same as 'decodeUtf8' +-- +{-# DEPRECATED decodeUtf8Lax "Please use 'decodeUtf8' instead" #-} +{-# INLINE decodeUtf8Lax #-} +decodeUtf8Lax :: Monad m => Stream m Word8 -> Stream m Char +decodeUtf8Lax = decodeUtf8 + +------------------------------------------------------------------------------- +-- Decoding Utf16 +------------------------------------------------------------------------------- + +{- +data MkEvenW8ChunksState s w8 arr + = MECSInit s + | MECSBuffer w8 s + | MECSYieldAndInit arr s + | MECSYieldAndBuffer arr w8 s + +-- | Ensure chunks of even length. This can be used before casting the arrays to +-- Word16. Use this API when interacting with external data. +-- +-- The chunks are split and merged accordingly to create arrays of even length. +-- If the sum of length of all the arrays in the stream is odd then the trailing +-- byte of the last array is dropped. +-- +{-# INLINE_NORMAL mkEvenW8Chunks #-} +mkEvenW8Chunks :: Monad m => Stream m (Array Word8) -> Stream m (Array Word8) +mkEvenW8Chunks (D.Stream step state) = D.Stream step1 (MECSInit state) + + where + + {-# INLINE_LATE step1 #-} + step1 gst (MECSInit st) = do + r <- step (adaptState gst) st + return $ + case r of + Yield arr st1 -> + let len = Array.length arr + in if (len .&. 1) == 1 + then let arr1 = Array.unsafeSliceOffLen 0 (len - 1) arr + remElem = Array.unsafeGetIndex (len - 1) arr + in Yield arr1 (MECSBuffer remElem st1) + else Yield arr (MECSInit st1) + Skip s -> Skip (MECSInit s) + Stop -> Stop + step1 gst (MECSBuffer remElem st) = do + r <- step (adaptState gst) st + return $ + case r of + Yield arr st1 | Array.length arr == 0 -> + Skip (MECSBuffer remElem st1) + Yield arr st1 | Array.length arr == 1 -> + let fstElem = Array.unsafeGetIndex 0 arr + w16 = Array.fromList [remElem, fstElem] + in Yield w16 (MECSInit st1) + Yield arr st1 -> + let len = Array.length arr + in if (len .&. 1) == 1 + then let arr1 = Array.unsafeSliceOffLen 1 (len - 1) arr + fstElem = Array.unsafeGetIndex 0 arr + w16 = Array.fromList [remElem, fstElem] + in Yield w16 (MECSYieldAndInit arr1 st1) + else let arr1 = Array.unsafeSliceOffLen 1 (len - 2) arr + fstElem = Array.unsafeGetIndex 0 arr + lstElem = Array.unsafeGetIndex (len - 1) arr + w16 = Array.fromList [remElem, fstElem] + in Yield w16 + (MECSYieldAndBuffer arr1 lstElem st1) + Skip s -> Skip (MECSBuffer remElem s) + Stop -> Stop -- Here the last Word8 is lost + step1 _ (MECSYieldAndInit arr st) = + pure $ Yield arr (MECSInit st) + step1 _ (MECSYieldAndBuffer arr lastElem st) = + pure $ Yield arr (MECSBuffer lastElem st) +-} + +-- | Swap the byte order of Word16 +-- +-- > swapByteOrder 0xABCD == 0xCDAB +-- > swapByteOrder . swapByteOrder == id +{-# INLINE swapByteOrder #-} +swapByteOrder :: Word16 -> Word16 +swapByteOrder w = (w `shiftL` 8) .|. (w `shiftR` 8) + +{-# INLINE utf16LowSurrogate #-} +utf16LowSurrogate :: Word16 +utf16LowSurrogate = 0xDC00 + +{-# INLINE utf16HighSurrogate #-} +utf16HighSurrogate :: Word16 +utf16HighSurrogate = 0xD800 + +data DecodeUtf16WithState w c s + = U16NoSurrogate s + | U16HighSurrogate w s + | U16D + | U16YAndC c (DecodeUtf16WithState w c s) + +{-# INLINE_NORMAL decodeUtf16With #-} +decodeUtf16With :: + Monad m + => CodingFailureMode + -> D.Stream m Word16 + -> D.Stream m Char +decodeUtf16With cfm (D.Stream step state) = + D.Stream step1 (U16NoSurrogate state) + + where + + prefix = "Streamly.Internal.Unicode.Stream.decodeUtf16With: " + + {-# INLINE combineSurrogates #-} + combineSurrogates hi lo = + let first10 = fromIntegral (hi - utf16HighSurrogate) `shiftL` 10 + second10 = fromIntegral (lo - utf16LowSurrogate) + in unsafeChr (0x10000 + (first10 .|. second10)) + + {-# INLINE transliterateOrError #-} + transliterateOrError e s = + case cfm of + ErrorOnCodingFailure -> error e + TransliterateCodingFailure -> U16YAndC replacementChar s + DropOnCodingFailure -> s + + {-# INLINE inputUnderflow #-} + inputUnderflow = + case cfm of + ErrorOnCodingFailure -> error $ prefix ++ "Input Underflow" + TransliterateCodingFailure -> U16YAndC replacementChar U16D + DropOnCodingFailure -> U16D + + {-# INLINE_LATE step1 #-} + step1 gst (U16NoSurrogate st) = do + r <- step (adaptState gst) st + pure $ + case r of + Yield x st1 + | x < 0xD800 || x > 0xDFFF -> + Yield (unsafeChr (fromIntegral x)) (U16NoSurrogate st1) + | x >= 0xD800 && x <= 0xDBFF -> + Skip (U16HighSurrogate x st1) + | otherwise -> + let msg = prefix + ++ "Invalid first UTF16 word " ++ show x + in Skip $ + transliterateOrError msg (U16NoSurrogate st1) + Skip st1 -> Skip (U16NoSurrogate st1) + Stop -> Stop + step1 gst (U16HighSurrogate hi st) = do + r <- step (adaptState gst) st + pure $ + case r of + Yield x st1 + | x >= 0xDC00 && x <= 0xDFFF -> + Yield (combineSurrogates hi x) (U16NoSurrogate st1) + | otherwise -> + let msg = prefix + ++ "Invalid subsequent UTF16 word " ++ show x + ++ " in state " ++ show hi + in Skip $ + transliterateOrError msg (U16NoSurrogate st1) + Skip st1 -> Skip (U16HighSurrogate hi st1) + Stop -> Skip inputUnderflow + step1 _ (U16YAndC x st) = pure $ Yield x st + step1 _ U16D = pure Stop + +{-# INLINE decodeUtf16' #-} +decodeUtf16' :: Monad m => Stream m Word16 -> Stream m Char +decodeUtf16' = decodeUtf16With ErrorOnCodingFailure + +{-# INLINE decodeUtf16 #-} +decodeUtf16 :: Monad m => Stream m Word16 -> Stream m Char +decodeUtf16 = decodeUtf16With TransliterateCodingFailure + +-- | Similar to 'decodeUtf16le' but throws an error if an invalid codepoint is +-- encountered. +-- +{-# INLINE decodeUtf16le' #-} +decodeUtf16le' :: Monad m => Stream m Word16 -> Stream m Char +decodeUtf16le' = + decodeUtf16' +#ifdef WORDS_BIGENDIAN + . fmap swapByteOrder +#endif + +-- | Decode a UTF-16 encoded stream to a stream of Unicode characters. Any +-- invalid codepoint encountered is replaced with the unicode replacement +-- character. +-- +-- The Word16s are expected to be in the little-endian byte order. +-- +{-# INLINE decodeUtf16le #-} +decodeUtf16le :: Monad m => Stream m Word16 -> Stream m Char +decodeUtf16le = + decodeUtf16 +#ifdef WORDS_BIGENDIAN + . fmap swapByteOrder +#endif + +------------------------------------------------------------------------------- +-- Decoding Array Streams +------------------------------------------------------------------------------- + +#ifndef __GHCJS__ +{-# ANN type FlattenState Fuse #-} +#endif +data FlattenState s + = OuterLoop s !(Maybe (DecodeState, CodePoint)) + | InnerLoopDecodeInit s MutByteArray !Int !Int + | InnerLoopDecodeFirst s MutByteArray !Int !Int Word8 + | InnerLoopDecoding s MutByteArray !Int !Int + !DecodeState !CodePoint + | YAndC !Char (FlattenState s) -- These constructors can be + -- encoded in the UTF8DecodeState + -- type, I prefer to keep these + -- flat even though that means + -- coming up with new names + | D + +-- The normal decodeUtf8 above should fuse with flattenArrays +-- to create this exact code but it doesn't for some reason, as of now this +-- remains the fastest way I could figure out to decodeUtf8. +-- +-- XXX Add Proper error messages +{-# INLINE_NORMAL decodeUtf8ArraysWithD #-} +decodeUtf8ArraysWithD :: + MonadIO m + => CodingFailureMode + -> D.Stream m (Array Word8) + -> D.Stream m Char +decodeUtf8ArraysWithD cfm (D.Stream step state) = + D.Stream (step' utf8d) (OuterLoop state Nothing) + where + {-# INLINE transliterateOrError #-} + transliterateOrError e s = + case cfm of + ErrorOnCodingFailure -> error e + TransliterateCodingFailure -> YAndC replacementChar s + DropOnCodingFailure -> s + {-# INLINE inputUnderflow #-} + inputUnderflow = + case cfm of + ErrorOnCodingFailure -> + error $ + show "Streamly.Internal.Data.Stream." + ++ "decodeUtf8ArraysWith: Input Underflow" + TransliterateCodingFailure -> YAndC replacementChar D + DropOnCodingFailure -> D + {-# INLINE_LATE step' #-} + step' _ gst (OuterLoop st Nothing) = do + r <- step (adaptState gst) st + return $ + case r of + Yield Array {..} s -> + Skip (InnerLoopDecodeInit s arrContents arrStart arrEnd) + Skip s -> Skip (OuterLoop s Nothing) + Stop -> Skip D + step' _ gst (OuterLoop st dst@(Just (ds, cp))) = do + r <- step (adaptState gst) st + return $ + case r of + Yield Array {..} s -> + Skip (InnerLoopDecoding s arrContents arrStart arrEnd ds cp) + Skip s -> Skip (OuterLoop s dst) + Stop -> Skip inputUnderflow + step' _ _ (InnerLoopDecodeInit st _ p end) + | p == end = do + return $ Skip $ OuterLoop st Nothing + step' _ _ (InnerLoopDecodeInit st contents p end) = do + x <- liftIO $ peekAt p contents + -- Note: It is important to use a ">" instead of a "<=" test here for + -- GHC to generate code layout for default branch prediction for the + -- common case. This is fragile and might change with the compiler + -- versions, we need a more reliable "likely" primitive to control + -- branch predication. + case x > 0x7f of + False -> + return $ Skip $ YAndC + (unsafeChr (fromIntegral x)) + (InnerLoopDecodeInit st contents (p + 1) end) + -- Using a separate state here generates a jump to a separate code + -- block in the core which seems to perform slightly better for the + -- non-ascii case. + True -> return $ Skip $ InnerLoopDecodeFirst st contents p end x + + step' table _ (InnerLoopDecodeFirst st contents p end x) = do + let (Tuple' sv cp) = decode0 table x + return $ + case sv of + 12 -> + Skip $ + transliterateOrError + ( + "Streamly.Internal.Data.Stream." + ++ "decodeUtf8ArraysWith: Invalid UTF8" + ++ " codepoint encountered" + ) + (InnerLoopDecodeInit st contents (p + 1) end) + 0 -> error "unreachable state" + _ -> Skip (InnerLoopDecoding st contents (p + 1) end sv cp) + step' _ _ (InnerLoopDecoding st _ p end sv cp) + | p == end = return $ Skip $ OuterLoop st (Just (sv, cp)) + step' table _ (InnerLoopDecoding st contents p end statePtr codepointPtr) = do + x <- liftIO $ peekAt p contents + let (Tuple' sv cp) = decode1 table statePtr codepointPtr x + return $ + case sv of + 0 -> + Skip $ + YAndC + (unsafeChr cp) + (InnerLoopDecodeInit st contents (p + 1) end) + 12 -> + Skip $ + transliterateOrError + ( + "Streamly.Internal.Data.Stream." + ++ "decodeUtf8ArraysWith: Invalid UTF8" + ++ " codepoint encountered" + ) + (InnerLoopDecodeInit st contents (p + 1) end) + _ -> + Skip + (InnerLoopDecoding st contents (p + 1) end sv cp) + step' _ _ (YAndC c s) = return $ Yield c s + step' _ _ D = return Stop + +-- | Like 'decodeUtf8' but for a chunked stream. It may be slightly faster than +-- flattening the stream and then decoding with 'decodeUtf8'. +{-# INLINE decodeUtf8Chunks #-} +decodeUtf8Chunks :: + MonadIO m + => D.Stream m (Array Word8) + -> D.Stream m Char +decodeUtf8Chunks = decodeUtf8ArraysWithD TransliterateCodingFailure + +-- | Like 'decodeUtf8\'' but for a chunked stream. It may be slightly faster +-- than flattening the stream and then decoding with 'decodeUtf8\''. +{-# INLINE decodeUtf8Chunks' #-} +decodeUtf8Chunks' :: + MonadIO m + => D.Stream m (Array Word8) + -> D.Stream m Char +decodeUtf8Chunks' = decodeUtf8ArraysWithD ErrorOnCodingFailure + +-- | Like 'decodeUtf8_' but for a chunked stream. It may be slightly faster +-- than flattening the stream and then decoding with 'decodeUtf8_'. +{-# INLINE decodeUtf8Chunks_ #-} +decodeUtf8Chunks_ :: + MonadIO m + => D.Stream m (Array Word8) + -> D.Stream m Char +decodeUtf8Chunks_ = decodeUtf8ArraysWithD DropOnCodingFailure + +------------------------------------------------------------------------------- +-- Decoding string literals +------------------------------------------------------------------------------- + +-- XXX decodeCString# + +-- | Read UTF-8 encoded bytes as chars from an 'Addr#' until a 0 byte is +-- encountered, the 0 byte is not included in the stream. +-- +-- /Unsafe:/ The caller is responsible for safe addressing. +-- +-- Note that this is completely safe when reading from Haskell string +-- literals because they are guaranteed to be NULL terminated: +-- +-- >>> Stream.fold Fold.toList (Unicode.fromStr# "Haskell"#) +-- "Haskell" +-- +{-# INLINE fromStr# #-} +fromStr# :: MonadIO m => Addr# -> Stream m Char +fromStr# addr = decodeUtf8 $ Stream.fromCString# addr diff --git a/core/src/Streamly/Internal/Unicode/Encode.hs b/core/src/Streamly/Internal/Unicode/Encode.hs new file mode 100644 index 0000000000..df33732d7f --- /dev/null +++ b/core/src/Streamly/Internal/Unicode/Encode.hs @@ -0,0 +1,396 @@ +{-# LANGUAGE CPP #-} +-- | +-- Module : Streamly.Internal.Unicode.Encode +-- Copyright : (c) 2018 Composewell Technologies +-- +-- License : BSD-3-Clause +-- Maintainer : streamly@composewell.com +-- Stability : experimental +-- Portability : GHC + +module Streamly.Internal.Unicode.Encode + ( + -- * Setup + -- | To execute the code examples provided in this module in ghci, please + -- run the following commands first. + -- + -- $setup + + -- XXX Use to/from instead of encode/decode for more compact naming. + + -- * Elimination (Encoding) + -- ** Latin1 Encoding to Byte Stream + encodeLatin1 + , encodeLatin1' + , encodeLatin1_ + + -- ** UTF-8 Encoding to Byte Stream + , readCharUtf8' + , readCharUtf8 + , readCharUtf8_ + , encodeUtf8 + , encodeUtf8' + , encodeUtf8_ + , encodeStrings + + -- ** UTF-8 Encoding to Chunk Stream + -- , toUtf8Chunks + -- , toUtf8Chunks' + -- , toUtf8Chunks_ + -- , toUtf8ChunksEndByLn + + -- , toPinnedUtf8Chunks + -- , toPinnedUtf8Chunks' + -- , toPinnedUtf8Chunks_ + -- , toPinnedUtf8ChunksEndByLn + + -- ** UTF-16 Encoding to Byte Stream + , encodeUtf16le' + , encodeUtf16le + + -- * StreamD UTF8 Encoding / Decoding transformations. + , encodeUtf8D + , encodeUtf8D' + , encodeUtf8D_ + + -- * Word16 Utilities + -- , swapByteOrder + + -- * Deprecations + , encodeLatin1Lax + , encodeUtf8Lax + ) +where + +#include "inline.hs" + +-- MachDeps.h includes ghcautoconf.h that defines WORDS_BIGENDIAN for big endian +-- systems. +#include "MachDeps.h" + +import Control.Monad.IO.Class (MonadIO) +import Data.Bits (shiftR, shiftL, (.|.), (.&.)) +import Data.Char (chr, ord) +import Data.Word (Word8, Word16) +import GHC.Base (assert) +import GHC.IO.Encoding.Failure (isSurrogate) +import Streamly.Internal.Data.Array.Type (Array(..)) +import Streamly.Internal.Data.Stream (Stream) +import Streamly.Internal.Data.Stream (Step (..)) +import Streamly.Internal.Data.Unfold.Type (Unfold(..)) + +import qualified Streamly.Data.Unfold as Unfold +import qualified Streamly.Internal.Data.Array as Array +import qualified Streamly.Internal.Data.Stream as Stream +import qualified Streamly.Internal.Data.Stream as D + +#include "DocTestUnicodeStream.hs" + +------------------------------------------------------------------------------- +-- Latin1 encoding +------------------------------------------------------------------------------- + +-- | Encode a stream of Unicode characters to bytes by mapping each character +-- to a byte in 0-255 range. Throws an error if the input stream contains +-- characters beyond 255. +-- +{-# INLINE encodeLatin1' #-} +encodeLatin1' :: Monad m => Stream m Char -> Stream m Word8 +encodeLatin1' = fmap convert + where + convert c = + let codepoint = ord c + in if codepoint > 255 + then error $ "Streamly.Unicode.encodeLatin1 invalid " ++ + "input char codepoint " ++ show codepoint + else fromIntegral codepoint + +-- XXX Should we instead replace the invalid chars by NUL or whitespace or some +-- other control char? That may affect the perf a bit but may be a better +-- behavior. +-- +-- | Like 'encodeLatin1'' but silently maps input codepoints beyond 255 to +-- arbitrary Latin1 chars in 0-255 range. No error or exception is thrown when +-- such mapping occurs. +-- +{-# INLINE encodeLatin1 #-} +encodeLatin1 :: Monad m => Stream m Char -> Stream m Word8 +encodeLatin1 = fmap (fromIntegral . ord) + +-- | Like 'encodeLatin1' but drops the input characters beyond 255. +-- +{-# INLINE encodeLatin1_ #-} +encodeLatin1_ :: Monad m => Stream m Char -> Stream m Word8 +encodeLatin1_ = fmap (fromIntegral . ord) . Stream.filter (<= chr 255) + +-- | Same as 'encodeLatin1' +-- +{-# DEPRECATED encodeLatin1Lax "Please use 'encodeLatin1' instead" #-} +{-# INLINE encodeLatin1Lax #-} +encodeLatin1Lax :: Monad m => Stream m Char -> Stream m Word8 +encodeLatin1Lax = encodeLatin1 + +------------------------------------------------------------------------------- +-- UTF-8 decoding +------------------------------------------------------------------------------- + +-- Int helps in cheaper conversion from Int to Char +type CodePoint = Int +type DecodeState = Word8 + +-- We can divide the errors in three general categories: +-- * A non-starter was encountered in a begin state +-- * A starter was encountered without completing a codepoint +-- * The last codepoint was not complete (input underflow) +-- +-- Need to separate resumable and non-resumable error. In case of non-resumable +-- error we can also provide the failing byte. In case of resumable error the +-- state can be opaque. +-- +data DecodeError = DecodeError !DecodeState !CodePoint deriving Show + +------------------------------------------------------------------------------- +-- One shot decoding +------------------------------------------------------------------------------- + +data CodingFailureMode + = TransliterateCodingFailure + | ErrorOnCodingFailure + | DropOnCodingFailure + deriving (Show) + +-- XXX this is defined in both Encode/Decode modules. + +-- | Swap the byte order of Word16 +-- +-- > swapByteOrder 0xABCD == 0xCDAB +-- > swapByteOrder . swapByteOrder == id +{-# INLINE _swapByteOrder #-} +_swapByteOrder :: Word16 -> Word16 +_swapByteOrder w = (w `shiftL` 8) .|. (w `shiftR` 8) + +------------------------------------------------------------------------------- +-- Encoding Unicode (UTF-8) Characters +------------------------------------------------------------------------------- + +data WList a = WCons !a !(WList a) | WNil + +-- UTF-8 primitives, Lifted from GHC.IO.Encoding.UTF8. + +{-# INLINE ord2 #-} +ord2 :: Char -> (WList Word8) +ord2 c = assert (n >= 0x80 && n <= 0x07ff) (WCons x1 (WCons x2 WNil)) + where + n = ord c + x1 = fromIntegral $ (n `shiftR` 6) + 0xC0 + x2 = fromIntegral $ (n .&. 0x3F) + 0x80 + +{-# INLINE ord3 #-} +ord3 :: Char -> (WList Word8) +ord3 c = assert (n >= 0x0800 && n <= 0xffff) (WCons x1 (WCons x2 (WCons x3 WNil))) + where + n = ord c + x1 = fromIntegral $ (n `shiftR` 12) + 0xE0 + x2 = fromIntegral $ ((n `shiftR` 6) .&. 0x3F) + 0x80 + x3 = fromIntegral $ (n .&. 0x3F) + 0x80 + +{-# INLINE ord4 #-} +ord4 :: Char -> (WList Word8) +ord4 c = assert (n >= 0x10000) (WCons x1 (WCons x2 (WCons x3 (WCons x4 WNil)))) + where + n = ord c + x1 = fromIntegral $ (n `shiftR` 18) + 0xF0 + x2 = fromIntegral $ ((n `shiftR` 12) .&. 0x3F) + 0x80 + x3 = fromIntegral $ ((n `shiftR` 6) .&. 0x3F) + 0x80 + x4 = fromIntegral $ (n .&. 0x3F) + 0x80 + +{-# INLINE_NORMAL readCharUtf8With #-} +readCharUtf8With :: Monad m => (WList Word8) -> Unfold m Char Word8 +readCharUtf8With surr = Unfold step inject + + where + + inject c = + return $ case ord c of + x | x <= 0x7F -> fromIntegral x `WCons` WNil + | x <= 0x7FF -> ord2 c + | x <= 0xFFFF -> if isSurrogate c then surr else ord3 c + | otherwise -> ord4 c + + {-# INLINE_LATE step #-} + step WNil = return Stop + step (WCons x xs) = return $ Yield x xs + +{-# INLINE_NORMAL readCharUtf8' #-} +readCharUtf8' :: Monad m => Unfold m Char Word8 +readCharUtf8' = + readCharUtf8With $ + error "Streamly.Internal.Unicode.readCharUtf8': Encountered a surrogate" + +-- More yield points improve performance, but I am not sure if they can cause +-- too much code bloat or some trouble with fusion. So keeping only two yield +-- points for now, one for the ascii chars (fast path) and one for all other +-- paths (slow path). +{-# INLINE_NORMAL encodeUtf8D' #-} +encodeUtf8D' :: Monad m => D.Stream m Char -> D.Stream m Word8 +encodeUtf8D' = D.unfoldEach readCharUtf8' + +-- | Encode a stream of Unicode characters to a UTF-8 encoded bytestream. When +-- any invalid character (U+D800-U+D8FF) is encountered in the input stream the +-- function errors out. +-- +{-# INLINE encodeUtf8' #-} +encodeUtf8' :: Monad m => Stream m Char -> Stream m Word8 +encodeUtf8' = encodeUtf8D' + +{-# INLINE_NORMAL readCharUtf8 #-} +readCharUtf8 :: Monad m => Unfold m Char Word8 +readCharUtf8 = readCharUtf8With $ WCons 239 (WCons 191 (WCons 189 WNil)) + +-- | See section "3.9 Unicode Encoding Forms" in +-- https://www.unicode.org/versions/Unicode13.0.0/UnicodeStandard-13.0.pdf +-- +{-# INLINE_NORMAL encodeUtf8D #-} +encodeUtf8D :: Monad m => D.Stream m Char -> D.Stream m Word8 +encodeUtf8D = D.unfoldEach readCharUtf8 + +-- | Encode a stream of Unicode characters to a UTF-8 encoded bytestream. Any +-- Invalid characters (U+D800-U+D8FF) in the input stream are replaced by the +-- Unicode replacement character U+FFFD. +-- +{-# INLINE encodeUtf8 #-} +encodeUtf8 :: Monad m => Stream m Char -> Stream m Word8 +encodeUtf8 = encodeUtf8D + +{-# INLINE_NORMAL readCharUtf8_ #-} +readCharUtf8_ :: Monad m => Unfold m Char Word8 +readCharUtf8_ = readCharUtf8With WNil + +{-# INLINE_NORMAL encodeUtf8D_ #-} +encodeUtf8D_ :: Monad m => D.Stream m Char -> D.Stream m Word8 +encodeUtf8D_ = D.unfoldEach readCharUtf8_ + +-- | Encode a stream of Unicode characters to a UTF-8 encoded bytestream. Any +-- Invalid characters (U+D800-U+D8FF) in the input stream are dropped. +-- +{-# INLINE encodeUtf8_ #-} +encodeUtf8_ :: Monad m => Stream m Char -> Stream m Word8 +encodeUtf8_ = encodeUtf8D_ + +-- | Same as 'encodeUtf8' +-- +{-# DEPRECATED encodeUtf8Lax "Please use 'encodeUtf8' instead" #-} +{-# INLINE encodeUtf8Lax #-} +encodeUtf8Lax :: Monad m => Stream m Char -> Stream m Word8 +encodeUtf8Lax = encodeUtf8 + +------------------------------------------------------------------------------- +-- Encoding to Utf16 +------------------------------------------------------------------------------- + +{-# INLINE utf16LowSurrogate #-} +utf16LowSurrogate :: Word16 +utf16LowSurrogate = 0xDC00 + +{-# INLINE utf16HighSurrogate #-} +utf16HighSurrogate :: Word16 +utf16HighSurrogate = 0xD800 + +{-# INLINE_NORMAL readCharUtf16With #-} +readCharUtf16With :: Monad m => WList Word16 -> Unfold m Char Word16 +readCharUtf16With invalidReplacement = Unfold step inject + + where + + inject c = + return $ case ord c of + x | x < 0xD800 -> fromIntegral x `WCons` WNil + | x > 0xDFFF && x <= 0xFFFF -> fromIntegral x `WCons` WNil + | x >= 0x10000 && x <= 0x10FFFF -> + let u = x - 0x10000 -- 20 bits + h = utf16HighSurrogate + + fromIntegral (u `shiftR` 10) -- 10 bits + l = utf16LowSurrogate + + fromIntegral (u .&. 0x3FF) -- 10 bits + in WCons h $ WCons l WNil + | otherwise -> invalidReplacement + + {-# INLINE_LATE step #-} + step WNil = return Stop + step (WCons x xs) = return $ Yield x xs + +{-# INLINE encodeUtf16' #-} +encodeUtf16' :: Monad m => Stream m Char -> Stream m Word16 +encodeUtf16' = D.unfoldEach (readCharUtf16With errString) + where + errString = + error + $ "Streamly.Internal.Unicode.encodeUtf16': Encountered an \ + invalid character" + +{-# INLINE encodeUtf16 #-} +encodeUtf16 :: Monad m => Stream m Char -> Stream m Word16 +encodeUtf16 = D.unfoldEach (readCharUtf16With WNil) + +-- | Similar to 'encodeUtf16le' but throws an error if any invalid character is +-- encountered. +-- +{-# INLINE encodeUtf16le' #-} +encodeUtf16le' :: Monad m => Stream m Char -> Stream m Word16 +encodeUtf16le' = +#ifdef WORDS_BIGENDIAN + fmap _swapByteOrder . +#endif + encodeUtf16' + +-- | Encode a stream of Unicode characters to a UTF-16 encoded stream. Any +-- invalid characters in the input stream are replaced by the Unicode +-- replacement character U+FFFD. +-- +-- The resulting Word16s are encoded in little-endian byte order. +-- +{-# INLINE encodeUtf16le #-} +encodeUtf16le :: Monad m => Stream m Char -> Stream m Word16 +encodeUtf16le = +#ifdef WORDS_BIGENDIAN + fmap _swapByteOrder . +#endif + encodeUtf16 + +------------------------------------------------------------------------------- +-- Encode streams of containers +------------------------------------------------------------------------------- + +-- | Encode a container to @Array Word8@ provided an unfold to covert it to a +-- Char stream and an encoding function. +-- +-- /Internal/ +{-# INLINE encodeObject #-} +encodeObject :: MonadIO m => + (Stream m Char -> Stream m Word8) + -> Unfold m a Char + -> a + -> m (Array Word8) +encodeObject encode u = Stream.fold Array.create . encode . Stream.unfold u + +-- | Encode a stream of container objects using the supplied encoding scheme. +-- Each object is encoded as an @Array Word8@. +-- +-- /Internal/ +{-# INLINE encodeObjects #-} +encodeObjects :: MonadIO m => + (Stream m Char -> Stream m Word8) + -> Unfold m a Char + -> Stream m a + -> Stream m (Array Word8) +encodeObjects encode u = Stream.mapM (encodeObject encode u) + +-- | Encode a stream of 'String' using the supplied encoding scheme. Each +-- string is encoded as an @Array Word8@. +-- +{-# INLINE encodeStrings #-} +encodeStrings :: MonadIO m => + (Stream m Char -> Stream m Word8) + -> Stream m String + -> Stream m (Array Word8) +encodeStrings encode = encodeObjects encode Unfold.fromList diff --git a/core/src/Streamly/Internal/Unicode/Stream.hs b/core/src/Streamly/Internal/Unicode/Stream.hs index 04ca5718ec..a444b93184 100644 --- a/core/src/Streamly/Internal/Unicode/Stream.hs +++ b/core/src/Streamly/Internal/Unicode/Stream.hs @@ -2,7 +2,6 @@ -- | -- Module : Streamly.Internal.Unicode.Stream -- Copyright : (c) 2018 Composewell Technologies --- (c) Bjoern Hoehrmann 2008-2009 -- -- License : BSD-3-Clause -- Maintainer : streamly@composewell.com @@ -18,65 +17,9 @@ module Streamly.Internal.Unicode.Stream -- -- $setup - -- XXX Use to/from instead of encode/decode for more compact naming. + module Streamly.Internal.Unicode.Encode + , module Streamly.Internal.Unicode.Decode - -- * Construction (Decoding) - decodeLatin1 - - -- ** UTF-8 Byte Stream Decoding - , CodingFailureMode(..) - , writeCharUtf8' - , parseCharUtf8With - , decodeUtf8 - , decodeUtf8' - , decodeUtf8_ - - -- ** UTF-16 Byte Stream Decoding - , decodeUtf16le' - , decodeUtf16le - - -- ** Resumable UTF-8 Byte Stream Decoding - , DecodeError(..) - , DecodeState - , CodePoint - , decodeUtf8Either - , resumeDecodeUtf8Either - - -- ** UTF-8 Array Stream Decoding - , decodeUtf8Chunks - , decodeUtf8Chunks' - , decodeUtf8Chunks_ - -- , fromUtf8ChunksEndByLn - - -- * Elimination (Encoding) - -- ** Latin1 Encoding to Byte Stream - , encodeLatin1 - , encodeLatin1' - , encodeLatin1_ - - -- ** UTF-8 Encoding to Byte Stream - , readCharUtf8' - , readCharUtf8 - , readCharUtf8_ - , encodeUtf8 - , encodeUtf8' - , encodeUtf8_ - , encodeStrings - - -- ** UTF-8 Encoding to Chunk Stream - -- , toUtf8Chunks - -- , toUtf8Chunks' - -- , toUtf8Chunks_ - -- , toUtf8ChunksEndByLn - - -- , toPinnedUtf8Chunks - -- , toPinnedUtf8Chunks' - -- , toPinnedUtf8Chunks_ - -- , toPinnedUtf8ChunksEndByLn - - -- ** UTF-16 Encoding to Byte Stream - , encodeUtf16le' - , encodeUtf16le {- -- * Operations on character strings , strip -- (dropAround isSpace) @@ -89,1206 +32,30 @@ module Streamly.Internal.Unicode.Stream , words -- foldWords , unlines -- unfoldLines , unwords -- unfoldWords - - -- * StreamD UTF8 Encoding / Decoding transformations. - , decodeUtf8D - , decodeUtf8D' - , decodeUtf8D_ - , encodeUtf8D - , encodeUtf8D' - , encodeUtf8D_ - , decodeUtf8EitherD - , resumeDecodeUtf8EitherD - - -- * Decoding String Literals - , fromStr# - - -- * Word16 Utilities - , mkEvenW8Chunks - , swapByteOrder - - -- * Deprecations - , decodeUtf8Lax - , encodeLatin1Lax - , encodeUtf8Lax ) where #include "inline.hs" --- MachDeps.h includes ghcautoconf.h that defines WORDS_BIGENDIAN for big endian --- systems. -#include "MachDeps.h" - -import Control.Monad (void) -import Control.Monad.IO.Class (MonadIO, liftIO) -import Data.Bits (shiftR, shiftL, (.|.), (.&.)) -import Data.Char (chr, ord) +import Control.Monad.IO.Class (MonadIO) +import Data.Char (ord) #if MIN_VERSION_base(4,17,0) import Data.Char (generalCategory, GeneralCategory(Space)) #endif -import Data.Word (Word8, Word16) -import Foreign.Marshal.Alloc (mallocBytes) -import Foreign.Storable (Storable(..)) -#ifndef __GHCJS__ -import Fusion.Plugin.Types (Fuse(..)) -#endif -import GHC.Base (assert, unsafeChr) -import GHC.Exts (Addr#) -import GHC.IO.Encoding.Failure (isSurrogate) -import GHC.Ptr (Ptr (..), plusPtr) -import System.IO.Unsafe (unsafePerformIO) -import Streamly.Internal.Data.Array.Type (Array(..)) -import Streamly.Internal.Data.MutByteArray.Type (MutByteArray) import Streamly.Internal.Data.Fold (Fold) -import Streamly.Internal.Data.Parser (Parser) import Streamly.Internal.Data.Stream (Stream) -import Streamly.Internal.Data.Stream (Step (..)) -import Streamly.Internal.Data.SVar.Type (adaptState) -import Streamly.Internal.Data.Tuple.Strict (Tuple'(..)) -import Streamly.Internal.Data.Unbox (Unbox(peekAt)) import Streamly.Internal.Data.Unfold.Type (Unfold(..)) -import Streamly.Internal.System.IO (unsafeInlineIO) import qualified Streamly.Data.Fold as Fold -import qualified Streamly.Data.Unfold as Unfold -import qualified Streamly.Internal.Data.Array as Array -import qualified Streamly.Internal.Data.Parser as Parser (Parser) -import qualified Streamly.Internal.Data.Parser as ParserD import qualified Streamly.Internal.Data.Stream as Stream import qualified Streamly.Internal.Data.Stream as D import Prelude hiding (lines, words, unlines, unwords) -#include "DocTestUnicodeStream.hs" - -------------------------------------------------------------------------------- --- Latin1 decoding -------------------------------------------------------------------------------- - --- | Decode a stream of bytes to Unicode characters by mapping each byte to a --- corresponding Unicode 'Char' in 0-255 range. --- -{-# INLINE decodeLatin1 #-} -decodeLatin1 :: Monad m => Stream m Word8 -> Stream m Char -decodeLatin1 = fmap (unsafeChr . fromIntegral) - -------------------------------------------------------------------------------- --- Latin1 encoding -------------------------------------------------------------------------------- - --- | Encode a stream of Unicode characters to bytes by mapping each character --- to a byte in 0-255 range. Throws an error if the input stream contains --- characters beyond 255. --- -{-# INLINE encodeLatin1' #-} -encodeLatin1' :: Monad m => Stream m Char -> Stream m Word8 -encodeLatin1' = fmap convert - where - convert c = - let codepoint = ord c - in if codepoint > 255 - then error $ "Streamly.Unicode.encodeLatin1 invalid " ++ - "input char codepoint " ++ show codepoint - else fromIntegral codepoint - --- XXX Should we instead replace the invalid chars by NUL or whitespace or some --- other control char? That may affect the perf a bit but may be a better --- behavior. --- --- | Like 'encodeLatin1'' but silently maps input codepoints beyond 255 to --- arbitrary Latin1 chars in 0-255 range. No error or exception is thrown when --- such mapping occurs. --- -{-# INLINE encodeLatin1 #-} -encodeLatin1 :: Monad m => Stream m Char -> Stream m Word8 -encodeLatin1 = fmap (fromIntegral . ord) - --- | Like 'encodeLatin1' but drops the input characters beyond 255. --- -{-# INLINE encodeLatin1_ #-} -encodeLatin1_ :: Monad m => Stream m Char -> Stream m Word8 -encodeLatin1_ = fmap (fromIntegral . ord) . Stream.filter (<= chr 255) - --- | Same as 'encodeLatin1' --- -{-# DEPRECATED encodeLatin1Lax "Please use 'encodeLatin1' instead" #-} -{-# INLINE encodeLatin1Lax #-} -encodeLatin1Lax :: Monad m => Stream m Char -> Stream m Word8 -encodeLatin1Lax = encodeLatin1 - -------------------------------------------------------------------------------- --- UTF-8 decoding -------------------------------------------------------------------------------- - --- Int helps in cheaper conversion from Int to Char -type CodePoint = Int -type DecodeState = Word8 - --- We can divide the errors in three general categories: --- * A non-starter was encountered in a begin state --- * A starter was encountered without completing a codepoint --- * The last codepoint was not complete (input underflow) --- --- Need to separate resumable and non-resumable error. In case of non-resumable --- error we can also provide the failing byte. In case of resumable error the --- state can be opaque. --- -data DecodeError = DecodeError !DecodeState !CodePoint deriving Show - --- See http://bjoern.hoehrmann.de/utf-8/decoder/dfa/ for details. - --- XXX Use names decodeSuccess = 0, decodeFailure = 12 - -decodeTable :: [Word8] -decodeTable = [ - -- The first part of the table maps bytes to character classes that - -- to reduce the size of the transition table and create bitmasks. - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9, - 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, - 8,8,2,2,2,2,2,2,2,2,2,2,2,2,2,2, 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, - 10,3,3,3,3,3,3,3,3,3,3,3,3,4,3,3, 11,6,6,6,5,8,8,8,8,8,8,8,8,8,8,8, - - -- The second part is a transition table that maps a combination - -- of a state of the automaton and a character class to a state. - 0,12,24,36,60,96,84,12,12,12,48,72, 12,12,12,12,12,12,12,12,12,12,12,12, - 12, 0,12,12,12,12,12, 0,12, 0,12,12, 12,24,12,12,12,12,12,24,12,24,12,12, - 12,12,12,12,12,12,12,24,12,12,12,12, 12,24,12,12,12,12,12,12,12,24,12,12, - 12,12,12,12,12,12,12,36,12,36,12,12, 12,36,12,12,12,12,12,36,12,36,12,12, - 12,36,12,12,12,12,12,12,12,12,12,12 - ] - -{-# INLINE utf8dLength #-} -utf8dLength :: Int -utf8dLength = length decodeTable - --- | We do not want to garbage collect this and free the memory, we want to --- keep this persistent. We don't know how to do that with GHC without having a --- reference in some global structure. So we use a hack, use mallocBytes so --- that the GC has no way to free it. -{-# NOINLINE utf8d #-} -utf8d :: Ptr Word8 -utf8d = unsafePerformIO $ do - let size = utf8dLength - p <- liftIO $ mallocBytes size - void $ D.fold - (Fold.foldlM' (\b a -> poke b a >> return (b `plusPtr` 1)) (return p)) - (D.fromList decodeTable) - return p - --- | Return element at the specified index without checking the bounds. --- and without touching the foreign ptr. -{-# INLINE_NORMAL unsafePeekElemOff #-} -unsafePeekElemOff :: forall a. Storable a => Ptr a -> Int -> a -unsafePeekElemOff p i = - let !x = unsafeInlineIO $ peekElemOff p i - in x - --- XXX We can use a fromPtr stream to implement it. -{-# INLINE showMemory #-} -showMemory :: - forall a. (Show a, Storable a) => Ptr a -> Ptr a -> String -showMemory cur end - | cur < end = - let cur1 = cur `plusPtr` sizeOf (undefined :: a) - in show (unsafeInlineIO $ peek cur) ++ " " ++ showMemory cur1 end -showMemory _ _ = "" - --- decode is split into two separate cases to avoid branching instructions. --- From the higher level flow we already know which case we are in so we can --- call the appropriate decode function. --- --- When the state is 0 -{-# INLINE decode0 #-} -decode0 :: Ptr Word8 -> Word8 -> Tuple' DecodeState CodePoint -decode0 table byte = - let !t = table `unsafePeekElemOff` fromIntegral byte - !codep' = (0xff `shiftR` fromIntegral t) .&. fromIntegral byte - !state' = table `unsafePeekElemOff` (256 + fromIntegral t) - in assert ((byte > 0x7f || error showByte) - && (state' /= 0 || error (showByte ++ showTable))) - (Tuple' state' codep') - - where - - utf8tableEnd = table `plusPtr` 364 - showByte = "Streamly: decode0: byte: " ++ show byte - showTable = " table: " ++ showMemory table utf8tableEnd - --- When the state is not 0 -{-# INLINE decode1 #-} -decode1 - :: Ptr Word8 - -> DecodeState - -> CodePoint - -> Word8 - -> Tuple' DecodeState CodePoint -decode1 table state codep byte = - -- Remember codep is Int type! - -- Can it be unsafe to convert the resulting Int to Char? - let !t = table `unsafePeekElemOff` fromIntegral byte - !codep' = (fromIntegral byte .&. 0x3f) .|. (codep `shiftL` 6) - !state' = table `unsafePeekElemOff` - (256 + fromIntegral state + fromIntegral t) - in assert (codep' <= 0x10FFFF - || error (showByte ++ showState state codep)) - (Tuple' state' codep') - where - - utf8tableEnd = table `plusPtr` 364 - showByte = "Streamly: decode1: byte: " ++ show byte - showState st cp = - " state: " ++ show st ++ - " codepoint: " ++ show cp ++ - " table: " ++ showMemory table utf8tableEnd - -------------------------------------------------------------------------------- --- Resumable UTF-8 decoding -------------------------------------------------------------------------------- - --- Strangely, GHCJS hangs linking template-haskell with this -#ifndef __GHCJS__ -{-# ANN type UTF8DecodeState Fuse #-} -#endif -data UTF8DecodeState s a - = UTF8DecodeInit s - | UTF8DecodeInit1 s Word8 - | UTF8DecodeFirst s Word8 - | UTF8Decoding s !DecodeState !CodePoint - | YieldAndContinue a (UTF8DecodeState s a) - | Done - -{-# INLINE_NORMAL resumeDecodeUtf8EitherD #-} -resumeDecodeUtf8EitherD - :: Monad m - => DecodeState - -> CodePoint - -> D.Stream m Word8 - -> D.Stream m (Either DecodeError Char) -resumeDecodeUtf8EitherD dst codep (D.Stream step state) = - let stt = - if dst == 0 - then UTF8DecodeInit state - else UTF8Decoding state dst codep - in D.Stream (step' utf8d) stt - where - {-# INLINE_LATE step' #-} - step' _ gst (UTF8DecodeInit st) = do - r <- step (adaptState gst) st - return $ case r of - Yield x s -> Skip (UTF8DecodeInit1 s x) - Skip s -> Skip (UTF8DecodeInit s) - Stop -> Skip Done - - step' _ _ (UTF8DecodeInit1 st x) = do - -- Note: It is important to use a ">" instead of a "<=" test - -- here for GHC to generate code layout for default branch - -- prediction for the common case. This is fragile and might - -- change with the compiler versions, we need a more reliable - -- "likely" primitive to control branch predication. - case x > 0x7f of - False -> - return $ Skip $ YieldAndContinue - (Right $ unsafeChr (fromIntegral x)) - (UTF8DecodeInit st) - -- Using a separate state here generates a jump to a - -- separate code block in the core which seems to perform - -- slightly better for the non-ascii case. - True -> return $ Skip $ UTF8DecodeFirst st x - - -- XXX should we merge it with UTF8DecodeInit1? - step' table _ (UTF8DecodeFirst st x) = do - let (Tuple' sv cp) = decode0 table x - return $ - case sv of - 12 -> - Skip $ YieldAndContinue (Left $ DecodeError 0 (fromIntegral x)) - (UTF8DecodeInit st) - 0 -> error "unreachable state" - _ -> Skip (UTF8Decoding st sv cp) - - -- We recover by trying the new byte x a starter of a new codepoint. - -- XXX on error need to report the next byte "x" as well. - -- XXX need to use the same recovery in array decoding routine as well - step' table gst (UTF8Decoding st statePtr codepointPtr) = do - r <- step (adaptState gst) st - case r of - Yield x s -> do - let (Tuple' sv cp) = decode1 table statePtr codepointPtr x - return $ - case sv of - 0 -> Skip $ YieldAndContinue (Right $ unsafeChr cp) - (UTF8DecodeInit s) - 12 -> - Skip $ YieldAndContinue (Left $ DecodeError statePtr codepointPtr) - (UTF8DecodeInit1 s x) - _ -> Skip (UTF8Decoding s sv cp) - Skip s -> return $ Skip (UTF8Decoding s statePtr codepointPtr) - Stop -> return $ Skip $ YieldAndContinue (Left $ DecodeError statePtr codepointPtr) Done - - step' _ _ (YieldAndContinue c s) = return $ Yield c s - step' _ _ Done = return Stop - --- XXX We can use just one API, and define InitState = 0 and InitCodePoint = 0 --- to use as starting state. --- -{-# INLINE_NORMAL decodeUtf8EitherD #-} -decodeUtf8EitherD :: Monad m - => D.Stream m Word8 -> D.Stream m (Either DecodeError Char) -decodeUtf8EitherD = resumeDecodeUtf8EitherD 0 0 - --- | --- --- /Pre-release/ -{-# INLINE decodeUtf8Either #-} -decodeUtf8Either :: Monad m - => Stream m Word8 -> Stream m (Either DecodeError Char) -decodeUtf8Either = decodeUtf8EitherD - --- | --- --- /Pre-release/ -{-# INLINE resumeDecodeUtf8Either #-} -resumeDecodeUtf8Either - :: Monad m - => DecodeState - -> CodePoint - -> Stream m Word8 - -> Stream m (Either DecodeError Char) -resumeDecodeUtf8Either = resumeDecodeUtf8EitherD - -------------------------------------------------------------------------------- --- One shot decoding -------------------------------------------------------------------------------- - -data CodingFailureMode - = TransliterateCodingFailure - | ErrorOnCodingFailure - | DropOnCodingFailure - deriving (Show) - -{-# INLINE replacementChar #-} -replacementChar :: Char -replacementChar = '\xFFFD' - -data UTF8CharDecodeState a - = UTF8CharDecodeInit - | UTF8CharDecoding !DecodeState !CodePoint - -{-# INLINE parseCharUtf8WithD #-} -parseCharUtf8WithD :: - Monad m => CodingFailureMode -> ParserD.Parser Word8 m Char -parseCharUtf8WithD cfm = ParserD.Parser (step' utf8d) initial extract - - where - - prefix = "Streamly.Internal.Data.Stream.parseCharUtf8WithD:" - - {-# INLINE initial #-} - initial = return $ ParserD.IPartial UTF8CharDecodeInit - - handleError err souldBackTrack = - case cfm of - ErrorOnCodingFailure -> ParserD.SError err - TransliterateCodingFailure -> - case souldBackTrack of - True -> ParserD.SDone 0 replacementChar - False -> ParserD.SDone 1 replacementChar - DropOnCodingFailure -> - case souldBackTrack of - True -> ParserD.SContinue 0 UTF8CharDecodeInit - False -> ParserD.SContinue 1 UTF8CharDecodeInit - - {-# INLINE step' #-} - step' table UTF8CharDecodeInit x = - -- Note: It is important to use a ">" instead of a "<=" test - -- here for GHC to generate code layout for default branch - -- prediction for the common case. This is fragile and might - -- change with the compiler versions, we need a more reliable - -- "likely" primitive to control branch predication. - return $ case x > 0x7f of - False -> ParserD.SDone 1 $ unsafeChr $ fromIntegral x - True -> - let (Tuple' sv cp) = decode0 table x - in case sv of - 12 -> - let msg = prefix - ++ "Invalid first UTF8 byte" ++ show x - in handleError msg False - 0 -> error $ prefix ++ "unreachable state" - _ -> ParserD.SContinue 1 (UTF8CharDecoding sv cp) - - step' table (UTF8CharDecoding statePtr codepointPtr) x = return $ - let (Tuple' sv cp) = decode1 table statePtr codepointPtr x - in case sv of - 0 -> ParserD.SDone 1 $ unsafeChr cp - 12 -> - let msg = prefix - ++ "Invalid subsequent UTF8 byte" - ++ show x - ++ "in state" - ++ show statePtr - ++ "accumulated value" - ++ show codepointPtr - in handleError msg True - _ -> ParserD.SContinue 1 (UTF8CharDecoding sv cp) - - {-# INLINE extract #-} - extract UTF8CharDecodeInit = error $ prefix ++ "Not enough input" - extract (UTF8CharDecoding _ _) = - case cfm of - ErrorOnCodingFailure -> - return $ ParserD.FError $ prefix ++ "Not enough input" - TransliterateCodingFailure -> - return (ParserD.FDone 0 replacementChar) - -- XXX We shouldn't error out here. There is no way to represent an - -- empty parser result unless we return a "Maybe" type. - DropOnCodingFailure -> error $ prefix ++ "Not enough input" - --- XXX This should ideally accept a "CodingFailureMode" and perform appropriate --- error handling. This isn't possible now as "TransliterateCodingFailure"'s --- workflow requires backtracking 1 element. This can be revisited once "Fold" --- supports backtracking. -{-# INLINE writeCharUtf8' #-} -writeCharUtf8' :: Monad m => Parser Word8 m Char -writeCharUtf8' = parseCharUtf8WithD ErrorOnCodingFailure - --- XXX The initial idea was to have "parseCharUtf8" and offload the error --- handling to another parser. So, say we had "parseCharUtf8'", --- --- >>> parseCharUtf8Smart = parseCharUtf8' <|> Parser.fromPure replacementChar --- --- But unfortunately parseCharUtf8Smart used in conjunction with "parseMany" - --- that is "parseMany parseCharUtf8Smart" on a stream causes the heap to --- overflow. Even a heap size of 500 MB was not sufficient. --- --- This needs to be investigated futher. -{-# INLINE parseCharUtf8With #-} -parseCharUtf8With :: - Monad m => CodingFailureMode -> Parser.Parser Word8 m Char -parseCharUtf8With = parseCharUtf8WithD - --- XXX write it as a parser and use parseMany to decode a stream, need to check --- if that preserves the same performance. Or we can use a resumable parser --- that parses a chunk at a time. --- --- XXX Implement this in terms of decodeUtf8Either. Need to make sure that --- decodeUtf8Either preserves the performance characterstics. --- -{-# INLINE_NORMAL decodeUtf8WithD #-} -decodeUtf8WithD :: Monad m - => CodingFailureMode -> D.Stream m Word8 -> D.Stream m Char -decodeUtf8WithD cfm (D.Stream step state) = - D.Stream (step' utf8d) (UTF8DecodeInit state) - - where - - prefix = "Streamly.Internal.Data.Stream.decodeUtf8With: " - - {-# INLINE handleError #-} - handleError e s = - case cfm of - ErrorOnCodingFailure -> error e - TransliterateCodingFailure -> YieldAndContinue replacementChar s - DropOnCodingFailure -> s - - {-# INLINE handleUnderflow #-} - handleUnderflow = - case cfm of - ErrorOnCodingFailure -> error $ prefix ++ "Not enough input" - TransliterateCodingFailure -> YieldAndContinue replacementChar Done - DropOnCodingFailure -> Done - - {-# INLINE_LATE step' #-} - step' _ gst (UTF8DecodeInit st) = do - r <- step (adaptState gst) st - return $ case r of - Yield x s -> Skip (UTF8DecodeInit1 s x) - Skip s -> Skip (UTF8DecodeInit s) - Stop -> Skip Done - - step' _ _ (UTF8DecodeInit1 st x) = do - -- Note: It is important to use a ">" instead of a "<=" test - -- here for GHC to generate code layout for default branch - -- prediction for the common case. This is fragile and might - -- change with the compiler versions, we need a more reliable - -- "likely" primitive to control branch predication. - case x > 0x7f of - False -> - return $ Skip $ YieldAndContinue - (unsafeChr (fromIntegral x)) - (UTF8DecodeInit st) - -- Using a separate state here generates a jump to a - -- separate code block in the core which seems to perform - -- slightly better for the non-ascii case. - True -> return $ Skip $ UTF8DecodeFirst st x - - -- XXX should we merge it with UTF8DecodeInit1? - step' table _ (UTF8DecodeFirst st x) = do - let (Tuple' sv cp) = decode0 table x - return $ - case sv of - 12 -> - let msg = prefix ++ "Invalid first UTF8 byte " ++ show x - in Skip $ handleError msg (UTF8DecodeInit st) - 0 -> error "unreachable state" - _ -> Skip (UTF8Decoding st sv cp) - - -- We recover by trying the new byte x as a starter of a new codepoint. - -- XXX need to use the same recovery in array decoding routine as well - step' table gst (UTF8Decoding st statePtr codepointPtr) = do - r <- step (adaptState gst) st - case r of - Yield x s -> do - let (Tuple' sv cp) = decode1 table statePtr codepointPtr x - return $ case sv of - 0 -> Skip $ YieldAndContinue - (unsafeChr cp) (UTF8DecodeInit s) - 12 -> - let msg = prefix - ++ "Invalid subsequent UTF8 byte " - ++ show x - ++ " in state " - ++ show statePtr - ++ " accumulated value " - ++ show codepointPtr - in Skip $ handleError msg (UTF8DecodeInit1 s x) - _ -> Skip (UTF8Decoding s sv cp) - Skip s -> return $ - Skip (UTF8Decoding s statePtr codepointPtr) - Stop -> return $ Skip handleUnderflow - - step' _ _ (YieldAndContinue c s) = return $ Yield c s - step' _ _ Done = return Stop - -{-# INLINE decodeUtf8D #-} -decodeUtf8D :: Monad m => D.Stream m Word8 -> D.Stream m Char -decodeUtf8D = decodeUtf8WithD TransliterateCodingFailure - --- | Decode a UTF-8 encoded bytestream to a stream of Unicode characters. --- Any invalid codepoint encountered is replaced with the unicode replacement --- character. --- -{-# INLINE decodeUtf8 #-} -decodeUtf8 :: Monad m => Stream m Word8 -> Stream m Char -decodeUtf8 = decodeUtf8D - -{-# INLINE decodeUtf8D' #-} -decodeUtf8D' :: Monad m => D.Stream m Word8 -> D.Stream m Char -decodeUtf8D' = decodeUtf8WithD ErrorOnCodingFailure - --- | Decode a UTF-8 encoded bytestream to a stream of Unicode characters. --- The function throws an error if an invalid codepoint is encountered. --- -{-# INLINE decodeUtf8' #-} -decodeUtf8' :: Monad m => Stream m Word8 -> Stream m Char -decodeUtf8' = decodeUtf8D' +import Streamly.Internal.Unicode.Encode +import Streamly.Internal.Unicode.Decode -{-# INLINE decodeUtf8D_ #-} -decodeUtf8D_ :: Monad m => D.Stream m Word8 -> D.Stream m Char -decodeUtf8D_ = decodeUtf8WithD DropOnCodingFailure - --- | Decode a UTF-8 encoded bytestream to a stream of Unicode characters. --- Any invalid codepoint encountered is dropped. --- -{-# INLINE decodeUtf8_ #-} -decodeUtf8_ :: Monad m => Stream m Word8 -> Stream m Char -decodeUtf8_ = decodeUtf8D_ - --- | Same as 'decodeUtf8' --- -{-# DEPRECATED decodeUtf8Lax "Please use 'decodeUtf8' instead" #-} -{-# INLINE decodeUtf8Lax #-} -decodeUtf8Lax :: Monad m => Stream m Word8 -> Stream m Char -decodeUtf8Lax = decodeUtf8 - -------------------------------------------------------------------------------- --- Decoding Utf16 -------------------------------------------------------------------------------- - -data MkEvenW8ChunksState s w8 arr - = MECSInit s - | MECSBuffer w8 s - | MECSYieldAndInit arr s - | MECSYieldAndBuffer arr w8 s - --- | Ensure chunks of even length. This can be used before casting the arrays to --- Word16. Use this API when interacting with external data. --- --- The chunks are split and merged accordingly to create arrays of even length. --- If the sum of length of all the arrays in the stream is odd then the trailing --- byte of the last array is dropped. --- -{-# INLINE_NORMAL mkEvenW8Chunks #-} -mkEvenW8Chunks :: Monad m => Stream m (Array Word8) -> Stream m (Array Word8) -mkEvenW8Chunks (D.Stream step state) = D.Stream step1 (MECSInit state) - - where - - {-# INLINE_LATE step1 #-} - step1 gst (MECSInit st) = do - r <- step (adaptState gst) st - return $ - case r of - Yield arr st1 -> - let len = Array.length arr - in if (len .&. 1) == 1 - then let arr1 = Array.unsafeSliceOffLen 0 (len - 1) arr - remElem = Array.unsafeGetIndex (len - 1) arr - in Yield arr1 (MECSBuffer remElem st1) - else Yield arr (MECSInit st1) - Skip s -> Skip (MECSInit s) - Stop -> Stop - step1 gst (MECSBuffer remElem st) = do - r <- step (adaptState gst) st - return $ - case r of - Yield arr st1 | Array.length arr == 0 -> - Skip (MECSBuffer remElem st1) - Yield arr st1 | Array.length arr == 1 -> - let fstElem = Array.unsafeGetIndex 0 arr - w16 = Array.fromList [remElem, fstElem] - in Yield w16 (MECSInit st1) - Yield arr st1 -> - let len = Array.length arr - in if (len .&. 1) == 1 - then let arr1 = Array.unsafeSliceOffLen 1 (len - 1) arr - fstElem = Array.unsafeGetIndex 0 arr - w16 = Array.fromList [remElem, fstElem] - in Yield w16 (MECSYieldAndInit arr1 st1) - else let arr1 = Array.unsafeSliceOffLen 1 (len - 2) arr - fstElem = Array.unsafeGetIndex 0 arr - lstElem = Array.unsafeGetIndex (len - 1) arr - w16 = Array.fromList [remElem, fstElem] - in Yield w16 - (MECSYieldAndBuffer arr1 lstElem st1) - Skip s -> Skip (MECSBuffer remElem s) - Stop -> Stop -- Here the last Word8 is lost - step1 _ (MECSYieldAndInit arr st) = - pure $ Yield arr (MECSInit st) - step1 _ (MECSYieldAndBuffer arr lastElem st) = - pure $ Yield arr (MECSBuffer lastElem st) - --- | Swap the byte order of Word16 --- --- > swapByteOrder 0xABCD == 0xCDAB --- > swapByteOrder . swapByteOrder == id -{-# INLINE swapByteOrder #-} -swapByteOrder :: Word16 -> Word16 -swapByteOrder w = (w `shiftL` 8) .|. (w `shiftR` 8) - -data DecodeUtf16WithState w c s - = U16NoSurrogate s - | U16HighSurrogate w s - | U16D - | U16YAndC c (DecodeUtf16WithState w c s) - -{-# INLINE_NORMAL decodeUtf16With #-} -decodeUtf16With :: - Monad m - => CodingFailureMode - -> D.Stream m Word16 - -> D.Stream m Char -decodeUtf16With cfm (D.Stream step state) = - D.Stream step1 (U16NoSurrogate state) - - where - - prefix = "Streamly.Internal.Unicode.Stream.decodeUtf16With: " - - {-# INLINE combineSurrogates #-} - combineSurrogates hi lo = - let first10 = fromIntegral (hi - utf16HighSurrogate) `shiftL` 10 - second10 = fromIntegral (lo - utf16LowSurrogate) - in unsafeChr (0x10000 + (first10 .|. second10)) - - {-# INLINE transliterateOrError #-} - transliterateOrError e s = - case cfm of - ErrorOnCodingFailure -> error e - TransliterateCodingFailure -> U16YAndC replacementChar s - DropOnCodingFailure -> s - - {-# INLINE inputUnderflow #-} - inputUnderflow = - case cfm of - ErrorOnCodingFailure -> error $ prefix ++ "Input Underflow" - TransliterateCodingFailure -> U16YAndC replacementChar U16D - DropOnCodingFailure -> U16D - - {-# INLINE_LATE step1 #-} - step1 gst (U16NoSurrogate st) = do - r <- step (adaptState gst) st - pure $ - case r of - Yield x st1 - | x < 0xD800 || x > 0xDFFF -> - Yield (unsafeChr (fromIntegral x)) (U16NoSurrogate st1) - | x >= 0xD800 && x <= 0xDBFF -> - Skip (U16HighSurrogate x st1) - | otherwise -> - let msg = prefix - ++ "Invalid first UTF16 word " ++ show x - in Skip $ - transliterateOrError msg (U16NoSurrogate st1) - Skip st1 -> Skip (U16NoSurrogate st1) - Stop -> Stop - step1 gst (U16HighSurrogate hi st) = do - r <- step (adaptState gst) st - pure $ - case r of - Yield x st1 - | x >= 0xDC00 && x <= 0xDFFF -> - Yield (combineSurrogates hi x) (U16NoSurrogate st1) - | otherwise -> - let msg = prefix - ++ "Invalid subsequent UTF16 word " ++ show x - ++ " in state " ++ show hi - in Skip $ - transliterateOrError msg (U16NoSurrogate st1) - Skip st1 -> Skip (U16HighSurrogate hi st1) - Stop -> Skip inputUnderflow - step1 _ (U16YAndC x st) = pure $ Yield x st - step1 _ U16D = pure Stop - -{-# INLINE decodeUtf16' #-} -decodeUtf16' :: Monad m => Stream m Word16 -> Stream m Char -decodeUtf16' = decodeUtf16With ErrorOnCodingFailure - -{-# INLINE decodeUtf16 #-} -decodeUtf16 :: Monad m => Stream m Word16 -> Stream m Char -decodeUtf16 = decodeUtf16With TransliterateCodingFailure - --- | Similar to 'decodeUtf16le' but throws an error if an invalid codepoint is --- encountered. --- -{-# INLINE decodeUtf16le' #-} -decodeUtf16le' :: Monad m => Stream m Word16 -> Stream m Char -decodeUtf16le' = - decodeUtf16' -#ifdef WORDS_BIGENDIAN - . fmap swapByteOrder -#endif - --- | Decode a UTF-16 encoded stream to a stream of Unicode characters. Any --- invalid codepoint encountered is replaced with the unicode replacement --- character. --- --- The Word16s are expected to be in the little-endian byte order. --- -{-# INLINE decodeUtf16le #-} -decodeUtf16le :: Monad m => Stream m Word16 -> Stream m Char -decodeUtf16le = - decodeUtf16 -#ifdef WORDS_BIGENDIAN - . fmap swapByteOrder -#endif - -------------------------------------------------------------------------------- --- Decoding Array Streams -------------------------------------------------------------------------------- - -#ifndef __GHCJS__ -{-# ANN type FlattenState Fuse #-} -#endif -data FlattenState s - = OuterLoop s !(Maybe (DecodeState, CodePoint)) - | InnerLoopDecodeInit s MutByteArray !Int !Int - | InnerLoopDecodeFirst s MutByteArray !Int !Int Word8 - | InnerLoopDecoding s MutByteArray !Int !Int - !DecodeState !CodePoint - | YAndC !Char (FlattenState s) -- These constructors can be - -- encoded in the UTF8DecodeState - -- type, I prefer to keep these - -- flat even though that means - -- coming up with new names - | D - --- The normal decodeUtf8 above should fuse with flattenArrays --- to create this exact code but it doesn't for some reason, as of now this --- remains the fastest way I could figure out to decodeUtf8. --- --- XXX Add Proper error messages -{-# INLINE_NORMAL decodeUtf8ArraysWithD #-} -decodeUtf8ArraysWithD :: - MonadIO m - => CodingFailureMode - -> D.Stream m (Array Word8) - -> D.Stream m Char -decodeUtf8ArraysWithD cfm (D.Stream step state) = - D.Stream (step' utf8d) (OuterLoop state Nothing) - where - {-# INLINE transliterateOrError #-} - transliterateOrError e s = - case cfm of - ErrorOnCodingFailure -> error e - TransliterateCodingFailure -> YAndC replacementChar s - DropOnCodingFailure -> s - {-# INLINE inputUnderflow #-} - inputUnderflow = - case cfm of - ErrorOnCodingFailure -> - error $ - show "Streamly.Internal.Data.Stream." - ++ "decodeUtf8ArraysWith: Input Underflow" - TransliterateCodingFailure -> YAndC replacementChar D - DropOnCodingFailure -> D - {-# INLINE_LATE step' #-} - step' _ gst (OuterLoop st Nothing) = do - r <- step (adaptState gst) st - return $ - case r of - Yield Array {..} s -> - Skip (InnerLoopDecodeInit s arrContents arrStart arrEnd) - Skip s -> Skip (OuterLoop s Nothing) - Stop -> Skip D - step' _ gst (OuterLoop st dst@(Just (ds, cp))) = do - r <- step (adaptState gst) st - return $ - case r of - Yield Array {..} s -> - Skip (InnerLoopDecoding s arrContents arrStart arrEnd ds cp) - Skip s -> Skip (OuterLoop s dst) - Stop -> Skip inputUnderflow - step' _ _ (InnerLoopDecodeInit st _ p end) - | p == end = do - return $ Skip $ OuterLoop st Nothing - step' _ _ (InnerLoopDecodeInit st contents p end) = do - x <- liftIO $ peekAt p contents - -- Note: It is important to use a ">" instead of a "<=" test here for - -- GHC to generate code layout for default branch prediction for the - -- common case. This is fragile and might change with the compiler - -- versions, we need a more reliable "likely" primitive to control - -- branch predication. - case x > 0x7f of - False -> - return $ Skip $ YAndC - (unsafeChr (fromIntegral x)) - (InnerLoopDecodeInit st contents (p + 1) end) - -- Using a separate state here generates a jump to a separate code - -- block in the core which seems to perform slightly better for the - -- non-ascii case. - True -> return $ Skip $ InnerLoopDecodeFirst st contents p end x - - step' table _ (InnerLoopDecodeFirst st contents p end x) = do - let (Tuple' sv cp) = decode0 table x - return $ - case sv of - 12 -> - Skip $ - transliterateOrError - ( - "Streamly.Internal.Data.Stream." - ++ "decodeUtf8ArraysWith: Invalid UTF8" - ++ " codepoint encountered" - ) - (InnerLoopDecodeInit st contents (p + 1) end) - 0 -> error "unreachable state" - _ -> Skip (InnerLoopDecoding st contents (p + 1) end sv cp) - step' _ _ (InnerLoopDecoding st _ p end sv cp) - | p == end = return $ Skip $ OuterLoop st (Just (sv, cp)) - step' table _ (InnerLoopDecoding st contents p end statePtr codepointPtr) = do - x <- liftIO $ peekAt p contents - let (Tuple' sv cp) = decode1 table statePtr codepointPtr x - return $ - case sv of - 0 -> - Skip $ - YAndC - (unsafeChr cp) - (InnerLoopDecodeInit st contents (p + 1) end) - 12 -> - Skip $ - transliterateOrError - ( - "Streamly.Internal.Data.Stream." - ++ "decodeUtf8ArraysWith: Invalid UTF8" - ++ " codepoint encountered" - ) - (InnerLoopDecodeInit st contents (p + 1) end) - _ -> - Skip - (InnerLoopDecoding st contents (p + 1) end sv cp) - step' _ _ (YAndC c s) = return $ Yield c s - step' _ _ D = return Stop - --- | Like 'decodeUtf8' but for a chunked stream. It may be slightly faster than --- flattening the stream and then decoding with 'decodeUtf8'. -{-# INLINE decodeUtf8Chunks #-} -decodeUtf8Chunks :: - MonadIO m - => D.Stream m (Array Word8) - -> D.Stream m Char -decodeUtf8Chunks = decodeUtf8ArraysWithD TransliterateCodingFailure - --- | Like 'decodeUtf8\'' but for a chunked stream. It may be slightly faster --- than flattening the stream and then decoding with 'decodeUtf8\''. -{-# INLINE decodeUtf8Chunks' #-} -decodeUtf8Chunks' :: - MonadIO m - => D.Stream m (Array Word8) - -> D.Stream m Char -decodeUtf8Chunks' = decodeUtf8ArraysWithD ErrorOnCodingFailure - --- | Like 'decodeUtf8_' but for a chunked stream. It may be slightly faster --- than flattening the stream and then decoding with 'decodeUtf8_'. -{-# INLINE decodeUtf8Chunks_ #-} -decodeUtf8Chunks_ :: - MonadIO m - => D.Stream m (Array Word8) - -> D.Stream m Char -decodeUtf8Chunks_ = decodeUtf8ArraysWithD DropOnCodingFailure - -------------------------------------------------------------------------------- --- Encoding Unicode (UTF-8) Characters -------------------------------------------------------------------------------- - -data WList a = WCons !a !(WList a) | WNil - --- UTF-8 primitives, Lifted from GHC.IO.Encoding.UTF8. - -{-# INLINE ord2 #-} -ord2 :: Char -> (WList Word8) -ord2 c = assert (n >= 0x80 && n <= 0x07ff) (WCons x1 (WCons x2 WNil)) - where - n = ord c - x1 = fromIntegral $ (n `shiftR` 6) + 0xC0 - x2 = fromIntegral $ (n .&. 0x3F) + 0x80 - -{-# INLINE ord3 #-} -ord3 :: Char -> (WList Word8) -ord3 c = assert (n >= 0x0800 && n <= 0xffff) (WCons x1 (WCons x2 (WCons x3 WNil))) - where - n = ord c - x1 = fromIntegral $ (n `shiftR` 12) + 0xE0 - x2 = fromIntegral $ ((n `shiftR` 6) .&. 0x3F) + 0x80 - x3 = fromIntegral $ (n .&. 0x3F) + 0x80 - -{-# INLINE ord4 #-} -ord4 :: Char -> (WList Word8) -ord4 c = assert (n >= 0x10000) (WCons x1 (WCons x2 (WCons x3 (WCons x4 WNil)))) - where - n = ord c - x1 = fromIntegral $ (n `shiftR` 18) + 0xF0 - x2 = fromIntegral $ ((n `shiftR` 12) .&. 0x3F) + 0x80 - x3 = fromIntegral $ ((n `shiftR` 6) .&. 0x3F) + 0x80 - x4 = fromIntegral $ (n .&. 0x3F) + 0x80 - -{-# INLINE_NORMAL readCharUtf8With #-} -readCharUtf8With :: Monad m => (WList Word8) -> Unfold m Char Word8 -readCharUtf8With surr = Unfold step inject - - where - - inject c = - return $ case ord c of - x | x <= 0x7F -> fromIntegral x `WCons` WNil - | x <= 0x7FF -> ord2 c - | x <= 0xFFFF -> if isSurrogate c then surr else ord3 c - | otherwise -> ord4 c - - {-# INLINE_LATE step #-} - step WNil = return Stop - step (WCons x xs) = return $ Yield x xs - -{-# INLINE_NORMAL readCharUtf8' #-} -readCharUtf8' :: Monad m => Unfold m Char Word8 -readCharUtf8' = - readCharUtf8With $ - error "Streamly.Internal.Unicode.readCharUtf8': Encountered a surrogate" - --- More yield points improve performance, but I am not sure if they can cause --- too much code bloat or some trouble with fusion. So keeping only two yield --- points for now, one for the ascii chars (fast path) and one for all other --- paths (slow path). -{-# INLINE_NORMAL encodeUtf8D' #-} -encodeUtf8D' :: Monad m => D.Stream m Char -> D.Stream m Word8 -encodeUtf8D' = D.unfoldEach readCharUtf8' - --- | Encode a stream of Unicode characters to a UTF-8 encoded bytestream. When --- any invalid character (U+D800-U+D8FF) is encountered in the input stream the --- function errors out. --- -{-# INLINE encodeUtf8' #-} -encodeUtf8' :: Monad m => Stream m Char -> Stream m Word8 -encodeUtf8' = encodeUtf8D' - -{-# INLINE_NORMAL readCharUtf8 #-} -readCharUtf8 :: Monad m => Unfold m Char Word8 -readCharUtf8 = readCharUtf8With $ WCons 239 (WCons 191 (WCons 189 WNil)) - --- | See section "3.9 Unicode Encoding Forms" in --- https://www.unicode.org/versions/Unicode13.0.0/UnicodeStandard-13.0.pdf --- -{-# INLINE_NORMAL encodeUtf8D #-} -encodeUtf8D :: Monad m => D.Stream m Char -> D.Stream m Word8 -encodeUtf8D = D.unfoldEach readCharUtf8 - --- | Encode a stream of Unicode characters to a UTF-8 encoded bytestream. Any --- Invalid characters (U+D800-U+D8FF) in the input stream are replaced by the --- Unicode replacement character U+FFFD. --- -{-# INLINE encodeUtf8 #-} -encodeUtf8 :: Monad m => Stream m Char -> Stream m Word8 -encodeUtf8 = encodeUtf8D - -{-# INLINE_NORMAL readCharUtf8_ #-} -readCharUtf8_ :: Monad m => Unfold m Char Word8 -readCharUtf8_ = readCharUtf8With WNil - -{-# INLINE_NORMAL encodeUtf8D_ #-} -encodeUtf8D_ :: Monad m => D.Stream m Char -> D.Stream m Word8 -encodeUtf8D_ = D.unfoldEach readCharUtf8_ - --- | Encode a stream of Unicode characters to a UTF-8 encoded bytestream. Any --- Invalid characters (U+D800-U+D8FF) in the input stream are dropped. --- -{-# INLINE encodeUtf8_ #-} -encodeUtf8_ :: Monad m => Stream m Char -> Stream m Word8 -encodeUtf8_ = encodeUtf8D_ - --- | Same as 'encodeUtf8' --- -{-# DEPRECATED encodeUtf8Lax "Please use 'encodeUtf8' instead" #-} -{-# INLINE encodeUtf8Lax #-} -encodeUtf8Lax :: Monad m => Stream m Char -> Stream m Word8 -encodeUtf8Lax = encodeUtf8 - -------------------------------------------------------------------------------- --- Encoding to Utf16 -------------------------------------------------------------------------------- - -{-# INLINE utf16LowSurrogate #-} -utf16LowSurrogate :: Word16 -utf16LowSurrogate = 0xDC00 - -{-# INLINE utf16HighSurrogate #-} -utf16HighSurrogate :: Word16 -utf16HighSurrogate = 0xD800 - -{-# INLINE_NORMAL readCharUtf16With #-} -readCharUtf16With :: Monad m => WList Word16 -> Unfold m Char Word16 -readCharUtf16With invalidReplacement = Unfold step inject - - where - - inject c = - return $ case ord c of - x | x < 0xD800 -> fromIntegral x `WCons` WNil - | x > 0xDFFF && x <= 0xFFFF -> fromIntegral x `WCons` WNil - | x >= 0x10000 && x <= 0x10FFFF -> - let u = x - 0x10000 -- 20 bits - h = utf16HighSurrogate - + fromIntegral (u `shiftR` 10) -- 10 bits - l = utf16LowSurrogate - + fromIntegral (u .&. 0x3FF) -- 10 bits - in WCons h $ WCons l WNil - | otherwise -> invalidReplacement - - {-# INLINE_LATE step #-} - step WNil = return Stop - step (WCons x xs) = return $ Yield x xs - -{-# INLINE encodeUtf16' #-} -encodeUtf16' :: Monad m => Stream m Char -> Stream m Word16 -encodeUtf16' = D.unfoldEach (readCharUtf16With errString) - where - errString = - error - $ "Streamly.Internal.Unicode.encodeUtf16': Encountered an \ - invalid character" - -{-# INLINE encodeUtf16 #-} -encodeUtf16 :: Monad m => Stream m Char -> Stream m Word16 -encodeUtf16 = D.unfoldEach (readCharUtf16With WNil) - --- | Similar to 'encodeUtf16le' but throws an error if any invalid character is --- encountered. --- -{-# INLINE encodeUtf16le' #-} -encodeUtf16le' :: Monad m => Stream m Char -> Stream m Word16 -encodeUtf16le' = -#ifdef WORDS_BIGENDIAN - fmap swapByteOrder . -#endif - encodeUtf16' - --- | Encode a stream of Unicode characters to a UTF-16 encoded stream. Any --- invalid characters in the input stream are replaced by the Unicode --- replacement character U+FFFD. --- --- The resulting Word16s are encoded in little-endian byte order. --- -{-# INLINE encodeUtf16le #-} -encodeUtf16le :: Monad m => Stream m Char -> Stream m Word16 -encodeUtf16le = -#ifdef WORDS_BIGENDIAN - fmap swapByteOrder . -#endif - encodeUtf16 - -------------------------------------------------------------------------------- --- Decoding string literals -------------------------------------------------------------------------------- - --- XXX decodeCString# - --- | Read UTF-8 encoded bytes as chars from an 'Addr#' until a 0 byte is --- encountered, the 0 byte is not included in the stream. --- --- /Unsafe:/ The caller is responsible for safe addressing. --- --- Note that this is completely safe when reading from Haskell string --- literals because they are guaranteed to be NULL terminated: --- --- >>> Stream.fold Fold.toList (Unicode.fromStr# "Haskell"#) --- "Haskell" --- -{-# INLINE fromStr# #-} -fromStr# :: MonadIO m => Addr# -> Stream m Char -fromStr# addr = decodeUtf8 $ Stream.fromCString# addr - -------------------------------------------------------------------------------- --- Encode streams of containers -------------------------------------------------------------------------------- - --- | Encode a container to @Array Word8@ provided an unfold to covert it to a --- Char stream and an encoding function. --- --- /Internal/ -{-# INLINE encodeObject #-} -encodeObject :: MonadIO m => - (Stream m Char -> Stream m Word8) - -> Unfold m a Char - -> a - -> m (Array Word8) -encodeObject encode u = Stream.fold Array.create . encode . Stream.unfold u - --- | Encode a stream of container objects using the supplied encoding scheme. --- Each object is encoded as an @Array Word8@. --- --- /Internal/ -{-# INLINE encodeObjects #-} -encodeObjects :: MonadIO m => - (Stream m Char -> Stream m Word8) - -> Unfold m a Char - -> Stream m a - -> Stream m (Array Word8) -encodeObjects encode u = Stream.mapM (encodeObject encode u) - --- | Encode a stream of 'String' using the supplied encoding scheme. Each --- string is encoded as an @Array Word8@. --- -{-# INLINE encodeStrings #-} -encodeStrings :: MonadIO m => - (Stream m Char -> Stream m Word8) - -> Stream m String - -> Stream m (Array Word8) -encodeStrings encode = encodeObjects encode Unfold.fromList +#include "DocTestUnicodeStream.hs" {- ------------------------------------------------------------------------------- @@ -1360,7 +127,7 @@ isSpace c -- /Pre-release/ {-# INLINE words #-} words :: Monad m => Fold m Char b -> Stream m Char -> Stream m b -words f = D.wordsBy isSpace f +words = D.wordsBy isSpace -- | Unfold a stream to character streams using the supplied 'Unfold' -- and concat the results suffixing a newline character @\\n@ to each stream. diff --git a/core/src/doctest/DocTestUnicodeStream.hs b/core/src/doctest/DocTestUnicodeStream.hs index 7bde1b55f0..83f4a70eb5 100644 --- a/core/src/doctest/DocTestUnicodeStream.hs +++ b/core/src/doctest/DocTestUnicodeStream.hs @@ -1,12 +1,15 @@ {- $setup >>> :m +>>> import qualified Data.Char as Char >>> import qualified Streamly.Data.Fold as Fold >>> import qualified Streamly.Data.Stream as Stream +>>> import qualified Streamly.Data.Unfold as Unfold >>> import qualified Streamly.Unicode.Stream as Unicode For APIs that have not been released yet. >>> :set -XMagicHash +>>> import qualified Streamly.Internal.Data.Stream as Stream >>> import qualified Streamly.Internal.Unicode.Stream as Unicode -} diff --git a/core/streamly-core.cabal b/core/streamly-core.cabal index 027aed6943..d0d743ad3f 100644 --- a/core/streamly-core.cabal +++ b/core/streamly-core.cabal @@ -201,7 +201,7 @@ common compile-options -Wall-missed-specialisations if flag(limit-build-mem) - ghc-options: +RTS -M1000M -RTS + ghc-options: -j1 +RTS -M500M -RTS if flag(internal-use-unliftio) cpp-options: -DUSE_UNLIFTIO @@ -515,6 +515,7 @@ library , Streamly.Internal.Data.Stream.Generate , Streamly.Internal.Data.Stream.Lift , Streamly.Internal.Data.Stream.Nesting + , Streamly.Internal.Data.Stream.Parse , Streamly.Internal.Data.Stream.Step , Streamly.Internal.Data.Stream.Top , Streamly.Internal.Data.Stream.Transform @@ -550,6 +551,9 @@ library , Streamly.Internal.FileSystem.Path.Common , Streamly.Internal.FileSystem.DirOptions + , Streamly.Internal.Unicode.Encode + , Streamly.Internal.Unicode.Decode + if flag(internal-dev) exposed-modules: Streamly.Internal.Data.StreamK.Alt diff --git a/docs/User/Project/Changelog.md b/docs/User/Project/Changelog.md index cb23f869b8..f87c8c2215 100644 --- a/docs/User/Project/Changelog.md +++ b/docs/User/Project/Changelog.md @@ -4,6 +4,42 @@ ## Unreleased +* The following deprecated modules have been removed: + * Streamly.Data.Array.Foreign + * Streamly.Data.Fold.Tee + * Streamly.Prelude + +* The following deprecated internal modules have been removed: + * Streamly.Internal.Data.SVar + * Streamly.Internal.Data.Stream.SVar + * Streamly.Internal.Data.Stream.Serial + * Streamly.Internal.Data.Stream.Zip + * Streamly.Internal.Data.Stream.Async + * Streamly.Internal.Data.Stream.Parallel + * Streamly.Internal.Data.Stream.Ahead + * Streamly.Internal.Data.Stream.ZipAsync + * Streamly.Internal.Data.Stream.IsStream + * Streamly.Internal.Data.Unfold.SVar + * Streamly.Internal.Data.Stream.Common + * Streamly.Internal.Data.Stream.IsStream.Type + * Streamly.Internal.Data.Stream.IsStream.Generate + * Streamly.Internal.Data.Stream.IsStream.Eliminate + * Streamly.Internal.Data.Stream.IsStream.Transform + * Streamly.Internal.Data.Stream.IsStream.Expand + * Streamly.Internal.Data.Stream.IsStream.Reduce + * Streamly.Internal.Data.Stream.IsStream.Exception + * Streamly.Internal.Data.Stream.IsStream.Lift + * Streamly.Internal.Data.Stream.IsStream.Top + * Streamly.Internal.Data.Stream.IsStream.Combinators + * Streamly.Internal.Data.Stream.IsStream.Common + * Streamly.Internal.Data.Stream.IsStream.Enumeration + * Streamly.Internal.Data.Stream.SVar.Generate + * Streamly.Internal.Data.Stream.SVar.Eliminate + * Streamly.Internal.Data.Fold.SVar + * Streamly.Internal.Data.SVar.Worker + * Streamly.Internal.Data.SVar.Dispatch + * Streamly.Internal.Data.SVar.Pull + ## 0.11.0 See diff --git a/hie.yaml b/hie.yaml index 517d6025d2..085a65e20a 100644 --- a/hie.yaml +++ b/hie.yaml @@ -18,6 +18,8 @@ cradle: cabal: - path: "./benchmark/lib/" component: "lib:streamly-benchmarks" + - path: "./benchmark/NanoBenchmarks.hs" + component: "exe:nano-bench" - path: "./benchmark/Streamly/Benchmark/Data/Array.hs" component: "bench:Data.Array" - path: "./benchmark/Streamly/Benchmark/Data/Array/Generic.hs" @@ -26,7 +28,7 @@ cradle: component: "bench:Data.Array.Stream" - path: "./benchmark/Streamly/Benchmark/Data/Fold.hs" component: "bench:Data.Fold" - - path: "./benchmark/Streamly/Benchmark/Data/Fold/Prelude.hs" + - path: "./benchmark/Streamly/Benchmark/Data/Fold/Prelood.hs" component: "bench:Data.Fold.Prelude" - path: "./benchmark/Streamly/Benchmark/Data/Fold/Window.hs" component: "bench:Data.Fold.Window" @@ -36,44 +38,50 @@ cradle: component: "bench:Data.Parser" - path: "./benchmark/Streamly/Benchmark/Data/ParserK.hs" component: "bench:Data.ParserK" - - path: "./benchmark/Streamly/Benchmark/Data/Stream/Common.hs" - component: "bench:Data.Stream" - - path: "./benchmark/Streamly/Benchmark/Data/Stream/Concurrent.hs" - component: "bench:Data.Stream.Concurrent" - - path: "./benchmark/Streamly/Benchmark/Data/Stream/ConcurrentCommon.hs" - component: "bench:Data.Stream.Concurrent" - - path: "./benchmark/Streamly/Benchmark/Data/Stream/ConcurrentInterleaved.hs" - component: "bench:Data.Stream.ConcurrentInterleaved" - - path: "./benchmark/Streamly/Benchmark/Data/Stream/ConcurrentEager.hs" - component: "bench:Data.Stream.ConcurrentEager" - - path: "./benchmark/Streamly/Benchmark/Data/Stream/ConcurrentOrdered.hs" - component: "bench:Data.Stream.ConcurrentOrdered" - - path: "./benchmark/Streamly/Benchmark/Data/Stream/Eliminate.hs" - component: "bench:Data.Stream" - - path: "./benchmark/Streamly/Benchmark/Data/Stream/Exceptions.hs" - component: "bench:Data.Stream" - - path: "./benchmark/Streamly/Benchmark/Data/Stream/Expand.hs" - component: "bench:Data.Stream" - - path: "./benchmark/Streamly/Benchmark/Data/Stream/Generate.hs" - component: "bench:Data.Stream" - - path: "./benchmark/Streamly/Benchmark/Data/Stream/Lift.hs" - component: "bench:Data.Stream" - - path: "./benchmark/Streamly/Benchmark/Data/Stream/Reduce.hs" - component: "bench:Data.Stream" - - path: "./benchmark/Streamly/Benchmark/Data/Stream/Split.hs" - component: "bench:Data.Stream" - - path: "./benchmark/Streamly/Benchmark/Data/Stream/Transform.hs" + - path: "./benchmark/Streamly/Benchmark/Data/ParserK.hs" + component: "bench:Data.ParserK.Chunked" + - path: "./benchmark/Streamly/Benchmark/Data/ParserK.hs" + component: "bench:Data.ParserK.Chunked.Generic" + - path: "./benchmark/Streamly/Benchmark/Data/RingArray.hs" + component: "bench:Data.RingArray" + - path: "./benchmark/Streamly/Benchmark/Data/Scanl.hs" + component: "bench:Data.Scanl" + - path: "./benchmark/Streamly/Benchmark/Data/Scanl/Concurrent.hs" + component: "bench:Data.Scanl.Concurrent" + - path: "./benchmark/Streamly/Benchmark/Data/Scanl/Window.hs" + component: "bench:Data.Scanl.Window" + - path: "./benchmark/Streamly/Benchmark/Data/Serialize.hs" + component: "bench:Data.Serialize" + - path: "./benchmark/Streamly/Benchmark/Data/Array/SmallArray.hs" + component: "bench:Data.SmallArray" + - path: "./benchmark/Streamly/Benchmark/Data/Stream/" component: "bench:Data.Stream" + - path: "./benchmark/Streamly/Benchmark/Data/Stream/Prelude/Adaptive.hs" + component: "bench:Data.Stream.Adaptive" + - path: "./benchmark/Streamly/Benchmark/Data/Stream/Prelude/ConcurrentThreadHeavy.hs" + component: "bench:Data.Stream.ConcurrentThreadHeavy" + - path: "./benchmark/Streamly/Benchmark/Data/Stream/Prelude/Exceptions.hs" + component: "bench:Data.Stream.Prelude" + - path: "./benchmark/Streamly/Benchmark/Data/Stream/Prelude/Exceptions.hs" + component: "bench:Data.Stream.Prelude.Exceptions" + - path: "./benchmark/Streamly/Benchmark/Data/Stream/Prelude/Rate.hs" + component: "bench:Data.Stream.Rate" - path: "./benchmark/Streamly/Benchmark/Data/StreamK.hs" component: "bench:Data.StreamK" - path: "./benchmark/Streamly/Benchmark/Data/StreamK/FromStream.hs" component: "bench:Data.StreamK.FromStream" - - path: "./benchmark/Streamly/Benchmark/Data/Stream/Prelude/Exceptions.hs" - component: "bench:Data.Stream.Prelude.Exceptions" + - path: "./benchmark/Streamly/Benchmark/Data/StreamK/StreamKAlt.hs" + component: "bench:Data.StreamK.StreamKAlt" + - path: "./benchmark/Streamly/Benchmark/Data/Serialize.hs" + component: "bench:Data.Unbox" + - path: "./benchmark/Streamly/Benchmark/Data/Serialize.hs" + component: "bench:Data.Unbox.Derive.TH" - path: "./benchmark/Streamly/Benchmark/Data/Unfold.hs" component: "bench:Data.Unfold" - path: "./benchmark/Streamly/Benchmark/Data/Unfold/Prelude1.hs" component: "bench:Data.Unfold.Prelude" + - path: "./benchmark/Streamly/Benchmark/FileSystem/DirIO.hs" + component: "bench:FileSystem.DirIO" - path: "./benchmark/Streamly/Benchmark/FileSystem/Handle/Read.hs" component: "bench:FileSystem.Handle" - path: "./benchmark/Streamly/Benchmark/FileSystem/Handle/ReadWrite.hs" @@ -104,6 +112,8 @@ cradle: component: "test:Data.Fold.Window" - path: "./test/Streamly/Test/Data/List.hs" component: "test:Data.List" + - path: "./test/Streamly/Test/Data/List.hs" + component: "test:Data.List.Base" - path: "./test/Streamly/Test/Data/MutArray.hs" component: "test:Data.MutArray" - path: "./test/Streamly/Test/Data/Parser.hs" @@ -112,18 +122,36 @@ cradle: component: "test:Data.Parser" - path: "./test/Streamly/Test/Data/ParserK.hs" component: "test:Data.ParserK" + - path: "./test/Streamly/Test/Data/RingArray.hs" + component: "test:Data.RingArray" + - path: "./test/Streamly/Test/Data/Scanl/Concurrent.hs" + component: "test:Data.Scanl.Concurrent" + - path: "./test/Streamly/Test/Data/Serialize.hs" + component: "test:Data.Serialize" + - path: "./test/Streamly/Test/Data/Serialize.hs" + component: "test:Data.Serialize.ENABLE_constructorTagAsString" + - path: "./test/Streamly/Test/Data/SmallArray.hs" + component: "test:Data.SmallArray" - path: "./test/Streamly/Test/Data/Stream.hs" component: "test:Data.Stream" - - path: "./test/Streamly/Test/Data/Stream/Common.hs" - component: "test:Data.Stream.Concurrent" - path: "./test/Streamly/Test/Data/Stream/Concurrent.hs" component: "test:Data.Stream.Concurrent" - path: "./test/Streamly/Test/Data/Stream/Exception.hs" component: "test:Data.Stream.Exception" + - path: "./test/Streamly/Test/Data/Stream/Rate.hs" + component: "test:Data.Stream.Rate" - path: "./test/Streamly/Test/Data/Stream/Time.hs" component: "test:Data.Stream.Time" + - path: "./test/Streamly/Test/Data/Unbox.hs" + component: "test:Data.Serialize.Derive.TH" - path: "./test/Streamly/Test/Data/Unbox.hs" component: "test:Data.Unbox" + - path: "./test/Streamly/Test/Data/Unbox.hs" + component: "test:Data.Unbox.Derive.Generic" + - path: "./test/Streamly/Test/Data/Unbox.hs" + component: "test:Data.Unbox.Derive.TH" + - path: "./test/Streamly/Test/Data/Unbox/TH.hs" + component: "test:Data.Unbox.TH" - path: "./test/Streamly/Test/Data/Unfold.hs" component: "test:Data.Unfold" - path: "./test/Streamly/Test/FileSystem/DirIO.hs" @@ -160,6 +188,8 @@ cradle: component: "test:Prelude.Rate" - path: "./test/Streamly/Test/Prelude/Serial.hs" component: "test:Prelude.Serial" + - path: "./test/Streamly/Test/Prelude/Top.hs" + component: "test:Prelude.Top" - path: "./test/Streamly/Test/Prelude/WAsync.hs" component: "test:Prelude.WAsync" - path: "./test/Streamly/Test/Prelude/WSerial.hs" @@ -168,8 +198,12 @@ cradle: component: "test:Prelude.ZipAsync" - path: "./test/Streamly/Test/Prelude/ZipSerial.hs" component: "test:Prelude.ZipSerial" + - path: "./test/Streamly/Test/Serialize/Serializable.hs" + component: "test:Data.Binary" - path: "./test/Streamly/Test/Serialize/Serializable.hs" component: "test:Serialize.Serializable" + - path: "./test/Streamly/Test/Unicode/Char.hs" + component: "test:Unicode.Char" - path: "./test/Streamly/Test/Unicode/Parser.hs" component: "test:Unicode.Parser" - path: "./test/Streamly/Test/Unicode/Stream.hs" diff --git a/src/Streamly/Internal/Data/Fold/Prelude.hs b/src/Streamly/Internal/Data/Fold/Prelude.hs index 05d5a533f4..f6f9b22334 100644 --- a/src/Streamly/Internal/Data/Fold/Prelude.hs +++ b/src/Streamly/Internal/Data/Fold/Prelude.hs @@ -16,12 +16,9 @@ module Streamly.Internal.Data.Fold.Prelude , module Streamly.Internal.Data.Fold.Concurrent -- * Time , module Streamly.Internal.Data.Fold.Time - -- * Deprecated - , module Streamly.Internal.Data.Fold.SVar ) where import Streamly.Internal.Data.Fold.Channel import Streamly.Internal.Data.Fold.Concurrent -import Streamly.Internal.Data.Fold.SVar import Streamly.Internal.Data.Fold.Time diff --git a/src/Streamly/Internal/Data/Unfold/Prelude.hs b/src/Streamly/Internal/Data/Unfold/Prelude.hs index a2f0ad4d19..02a9bb1d3c 100644 --- a/src/Streamly/Internal/Data/Unfold/Prelude.hs +++ b/src/Streamly/Internal/Data/Unfold/Prelude.hs @@ -11,10 +11,7 @@ module Streamly.Internal.Data.Unfold.Prelude ( module Streamly.Internal.Data.Unfold.Exception - -- * Deprecated - , module Streamly.Internal.Data.Unfold.SVar ) where import Streamly.Internal.Data.Unfold.Exception -import Streamly.Internal.Data.Unfold.SVar diff --git a/streamly.cabal b/streamly.cabal index 200d77da96..93bc978258 100644 --- a/streamly.cabal +++ b/streamly.cabal @@ -79,6 +79,7 @@ extra-source-files: benchmark/Streamly/Benchmark/Data/Fold/*.hs benchmark/Streamly/Benchmark/Data/Fold/Window.hs benchmark/Streamly/Benchmark/Data/MutArray.hs + benchmark/Streamly/Benchmark/Data/Parser/*.hs benchmark/Streamly/Benchmark/Data/RingArray.hs benchmark/Streamly/Benchmark/Data/Scanl/*.hs benchmark/Streamly/Benchmark/Data/Serialize/*.hs @@ -284,7 +285,7 @@ common compile-options -Wall-missed-specialisations if flag(limit-build-mem) - ghc-options: +RTS -M1000M -RTS + ghc-options: +RTS -M600M -RTS if flag(internal-use-unliftio) cpp-options: -DUSE_UNLIFTIO @@ -430,6 +431,15 @@ library , Streamly.Network.Socket , Streamly.Network.Inet.TCP + if !impl(ghcjs) && flag(internal-dev) + other-modules: + Streamly.Internal.System.IOVec.Type + , Streamly.Internal.System.IOVec + , Streamly.Internal.FileSystem.FDIO + , Streamly.Internal.FileSystem.FD + + if flag(internal-dev) + exposed-modules: Streamly.Internal.Data.SmallArray -- Deprecated , Streamly.Data.Array.Foreign , Streamly.Data.Fold.Tee @@ -448,19 +458,35 @@ library , Streamly.Internal.Data.Stream.IsStream - if !impl(ghcjs) && flag(internal-dev) - other-modules: - Streamly.Internal.System.IOVec.Type - , Streamly.Internal.System.IOVec - , Streamly.Internal.FileSystem.FDIO - , Streamly.Internal.FileSystem.FD - - if flag(internal-dev) - exposed-modules: Streamly.Internal.Data.SmallArray -- Exposed modules show up on hackage irrespective of the flag, so keep -- it hidden. - other-modules: Streamly.Data.SmallArray - , Streamly.Internal.Data.SmallArray.Type + other-modules: + Streamly.Data.SmallArray + , Streamly.Internal.Data.SmallArray.Type + -- Deprecated + , Streamly.Internal.Data.Unfold.SVar + , Streamly.Internal.Data.Stream.Common + , Streamly.Internal.Data.Stream.IsStream.Type + , Streamly.Internal.Data.Stream.IsStream.Generate + , Streamly.Internal.Data.Stream.IsStream.Eliminate + , Streamly.Internal.Data.Stream.IsStream.Transform + , Streamly.Internal.Data.Stream.IsStream.Expand + , Streamly.Internal.Data.Stream.IsStream.Reduce + , Streamly.Internal.Data.Stream.IsStream.Exception + , Streamly.Internal.Data.Stream.IsStream.Lift + , Streamly.Internal.Data.Stream.IsStream.Top + , Streamly.Internal.Data.Stream.IsStream.Combinators + , Streamly.Internal.Data.Stream.IsStream.Common + , Streamly.Internal.Data.Stream.IsStream.Enumeration + + , Streamly.Internal.Data.Stream.SVar.Generate + , Streamly.Internal.Data.Stream.SVar.Eliminate + + , Streamly.Internal.Data.Fold.SVar + + , Streamly.Internal.Data.SVar.Worker + , Streamly.Internal.Data.SVar.Dispatch + , Streamly.Internal.Data.SVar.Pull if os(windows) exposed-modules: Streamly.Internal.FileSystem.Event.Windows @@ -499,31 +525,6 @@ library , Streamly.Internal.Data.Scanl.Concurrent , Streamly.Internal.Data.Unfold.Exception - , Streamly.Internal.Data.Unfold.SVar - - -- Deprecated - , Streamly.Internal.Data.Stream.Common - , Streamly.Internal.Data.Stream.IsStream.Type - , Streamly.Internal.Data.Stream.IsStream.Generate - , Streamly.Internal.Data.Stream.IsStream.Eliminate - , Streamly.Internal.Data.Stream.IsStream.Transform - , Streamly.Internal.Data.Stream.IsStream.Expand - , Streamly.Internal.Data.Stream.IsStream.Reduce - , Streamly.Internal.Data.Stream.IsStream.Exception - , Streamly.Internal.Data.Stream.IsStream.Lift - , Streamly.Internal.Data.Stream.IsStream.Top - , Streamly.Internal.Data.Stream.IsStream.Combinators - , Streamly.Internal.Data.Stream.IsStream.Common - , Streamly.Internal.Data.Stream.IsStream.Enumeration - - , Streamly.Internal.Data.Stream.SVar.Generate - , Streamly.Internal.Data.Stream.SVar.Eliminate - - , Streamly.Internal.Data.Fold.SVar - - , Streamly.Internal.Data.SVar.Worker - , Streamly.Internal.Data.SVar.Dispatch - , Streamly.Internal.Data.SVar.Pull build-depends: -- Core libraries shipped with ghc, the min and max @@ -535,7 +536,6 @@ library base >= 4.12 && < 4.22 , fusion-plugin-types >= 0.1 && < 0.2 , containers >= 0.6.0 && < 0.8 - , deepseq >= 1.4.4 && < 1.6 , exceptions >= 0.8.0 && < 0.11 , mtl >= 2.2.2 && < 2.4 , transformers >= 0.5.5 && < 0.7 @@ -572,6 +572,7 @@ library if flag(internal-dev) build-depends: primitive >= 0.5.4 && < 0.9 + , deepseq >= 1.4.4 && < 1.6 -- For FileSystem.Event module if os(linux) diff --git a/targets/Targets.hs b/targets/Targets.hs index c4ecc24d25..596420c1e5 100644 --- a/targets/Targets.hs +++ b/targets/Targets.hs @@ -107,6 +107,12 @@ targets = , "noTest" ] ) + , ("Data.Scanl.Concurrent", + [ "infinite_grp" + , "fold_parser_grp" + , "noTest" + ] + ) , ("Data.Scanl.Window", [ "infinite_grp" , "fold_parser_grp" @@ -229,6 +235,13 @@ targets = , "streamly_core_grp" ] ) + , ("Data.Unfold.Prelude", + [ "infinite_grp" + , "serial_stream_grp" + , "noTest" + ] + ) + , ("FileSystem.DirIO", []) , ("FileSystem.Event", [ "noBench" ] @@ -253,7 +266,6 @@ targets = ) #endif , ("FileSystem.Handle", []) - , ("FileSystem.DirIO", []) , ("Network.Inet.TCP", ["noBench"]) , ("Network.Socket", ["noBench"]) , ("Unicode.Char", ["testDevOnly"]) diff --git a/test/Streamly/Test/Unicode/Stream.hs b/test/Streamly/Test/Unicode/Stream.hs index c36de806de..1d61ad49af 100644 --- a/test/Streamly/Test/Unicode/Stream.hs +++ b/test/Streamly/Test/Unicode/Stream.hs @@ -56,8 +56,10 @@ genUnicode = listOf arbitraryUnicodeChar genWord8List :: Gen [Word8] genWord8List = listOf arbitrary +{- genListOfW8List :: Gen [[Word8]] genListOfW8List = listOf (listOf arbitrary) +-} propDecodeEncodeId' :: Property propDecodeEncodeId' = @@ -78,6 +80,7 @@ propDecodeEncodeUtf16Id encoder decoder = chrs <- run $ Stream.toList $ decoder wrds assertEq chrs list +{- propMkEvenW8Chunks :: Property propMkEvenW8Chunks = forAll genListOfW8List $ \list -> @@ -93,6 +96,7 @@ propMkEvenW8Chunks = if (odd (length concatedList)) then assertEq concatedList1 (init concatedList) else assertEq concatedList1 concatedList +-} -- XXX need to use invalid characters propDecodeEncodeId :: Property @@ -229,7 +233,7 @@ main = H.hspec (propDecodeEncodeUtf16Id IUS.encodeUtf16le' IUS.decodeUtf16le') prop "decodeUtf16le . encodeUtf16le == id" (propDecodeEncodeUtf16Id IUS.encodeUtf16le' IUS.decodeUtf16le) - prop "mkEvenW8Chunks" propMkEvenW8Chunks + -- prop "mkEvenW8Chunks" propMkEvenW8Chunks H.describe "Latin1 - Encoding / Decoding" $ do prop "ASCII to Latin1" propASCIIToLatin1 diff --git a/test/streamly-tests.cabal b/test/streamly-tests.cabal index 7dbe887a7f..f6052aafee 100644 --- a/test/streamly-tests.cabal +++ b/test/streamly-tests.cabal @@ -189,7 +189,7 @@ library exposed-modules: Streamly.Test.Common Streamly.Test.Parser.Common - if !flag(use-streamly-core) + if !flag(use-streamly-core) && flag(dev) exposed-modules: Streamly.Test.Prelude.Common if flag(limit-build-mem) ghc-options: +RTS -M1500M -RTS @@ -547,6 +547,8 @@ test-suite Prelude ghc-options: -main-is Streamly.Test.Prelude.main if flag(use-streamly-core) buildable: False + if !flag(dev) + buildable: False test-suite Prelude.Ahead import: test-options @@ -555,6 +557,8 @@ test-suite Prelude.Ahead ghc-options: -main-is Streamly.Test.Prelude.Ahead.main if flag(use-streamly-core) buildable: False + if !flag(dev) + buildable: False test-suite Prelude.Async import: test-options @@ -563,6 +567,8 @@ test-suite Prelude.Async ghc-options: -main-is Streamly.Test.Prelude.Async.main if flag(use-streamly-core) buildable: False + if !flag(dev) + buildable: False test-suite Prelude.Concurrent import: test-options @@ -573,6 +579,8 @@ test-suite Prelude.Concurrent ghc-options: +RTS -M2000M -RTS if flag(use-streamly-core) buildable: False + if !flag(dev) + buildable: False test-suite Prelude.Fold import: test-options @@ -581,6 +589,8 @@ test-suite Prelude.Fold ghc-options: -main-is Streamly.Test.Prelude.Fold.main if flag(use-streamly-core) buildable: False + if !flag(dev) + buildable: False test-suite Prelude.Parallel import: test-options @@ -589,6 +599,8 @@ test-suite Prelude.Parallel ghc-options: -main-is Streamly.Test.Prelude.Parallel.main if flag(use-streamly-core) buildable: False + if !flag(dev) + buildable: False test-suite Prelude.Rate import:always-optimized @@ -611,6 +623,8 @@ test-suite Prelude.Serial ghc-options: +RTS -M1500M -RTS if flag(use-streamly-core) buildable: False + if !flag(dev) + buildable: False test-suite Prelude.Top import: test-options @@ -618,6 +632,8 @@ test-suite Prelude.Top main-is: Streamly/Test/Prelude/Top.hs if flag(use-streamly-core) buildable: False + if !flag(dev) + buildable: False test-suite Prelude.WAsync import: test-options @@ -626,6 +642,8 @@ test-suite Prelude.WAsync ghc-options: -main-is Streamly.Test.Prelude.WAsync.main if flag(use-streamly-core) buildable: False + if !flag(dev) + buildable: False test-suite Prelude.WSerial import: test-options @@ -634,6 +652,8 @@ test-suite Prelude.WSerial ghc-options: -main-is Streamly.Test.Prelude.WSerial.main if flag(use-streamly-core) buildable: False + if !flag(dev) + buildable: False test-suite Prelude.ZipAsync import: test-options @@ -644,6 +664,8 @@ test-suite Prelude.ZipAsync ghc-options: +RTS -M750M -RTS if flag(use-streamly-core) buildable: False + if !flag(dev) + buildable: False test-suite Prelude.ZipSerial import: test-options @@ -652,3 +674,5 @@ test-suite Prelude.ZipSerial ghc-options: -main-is Streamly.Test.Prelude.ZipSerial.main if flag(use-streamly-core) buildable: False + if !flag(dev) + buildable: False