diff --git a/.github/workflows/haskell.yml b/.github/workflows/haskell.yml
index cfc9362a2c..edbc1ec3b7 100644
--- a/.github/workflows/haskell.yml
+++ b/.github/workflows/haskell.yml
@@ -14,7 +14,7 @@ jobs:
       LC_ALL: C.UTF-8
       BUILD: ${{ matrix.build }}
       # For updating see: https://downloads.haskell.org/~ghcup/
-      GHCUP_VERSION: 0.1.20.0
+      GHCUP_VERSION: 0.1.50.2
       DOCSPEC_URL: https://github.com/phadej/cabal-extras/releases/download/cabal-docspec-0.0.0.20210111/cabal-docspec-0.0.0.20210111.xz
       DOCSPEC_OPTIONS: "--timeout 60 --check-properties --property-variables xs"
       GHCUP_GHC_OPTIONS: ${{ matrix.ghcup_ghc_options }}
@@ -68,7 +68,7 @@ jobs:
       matrix:
         # The order is important to optimize fail-fast.
         name:
-          - 9.10.1-Werror
+          - 9.10.3-Werror
         # - 9.8.1-docspec
         # - 8.10.7-coverage
 
@@ -99,14 +99,14 @@ jobs:
             ghc_version: 9.12.1
             runner: ubuntu-latest
             build: cabal
-            cabal_build_options: "--flag limit-build-mem"
+            cabal_build_options: "--flag limit-build-mem --flag fusion-plugin"
             cabal_version: 3.12.1.0
             disable_sdist_build: "y"
             cabal_project: cabal.project
             ignore_error: false
           # Note: use linux for warning build for convenient dev testing
-          - name: 9.10.1-Werror
-            ghc_version: 9.10.1
+          - name: 9.10.3-Werror
+            ghc_version: 9.10.3
             runner: ubuntu-latest
             build: cabal
             cabal_build_options: "--flag limit-build-mem"
@@ -114,8 +114,8 @@ jobs:
             disable_sdist_build: "y"
             cabal_project: cabal.project.Werror
             ignore_error: false
-          - name: 9.10.1-macos
-            ghc_version: 9.10.1
+          - name: 9.10.3-macos
+            ghc_version: 9.10.3
             runner: macos-latest
             build: cabal
             cabal_build_options: "--flag limit-build-mem"
@@ -123,8 +123,8 @@ jobs:
             disable_sdist_build: "y"
             cabal_project: cabal.project
             ignore_error: false
-          - name: 9.10.1-fusion-inspection
-            ghc_version: 9.10.1
+          - name: 9.10.3-fusion-inspection
+            ghc_version: 9.10.3
             runner: ubuntu-latest
             build: cabal
             cabal_version: 3.12.1.0
diff --git a/.github/workflows/regression-check.yml b/.github/workflows/regression-check.yml
index 3c9cf24c79..01c006ad7d 100644
--- a/.github/workflows/regression-check.yml
+++ b/.github/workflows/regression-check.yml
@@ -33,6 +33,8 @@ jobs:
         Data.ParserK.Chunked
         Data.ParserK.Chunked.Generic
         Data.RingArray
+        Data.Scanl
+        Data.Scanl.Concurrent
         Data.Scanl.Window
         Data.Serialize
         Data.Stream
@@ -40,10 +42,12 @@ jobs:
         Data.Stream.ConcurrentEager
         Data.Stream.ConcurrentInterleaved
         Data.Stream.ConcurrentOrdered
+        Data.Stream.Prelude
         Data.StreamK:6
         Data.Unbox
         Data.Unbox.Derive.TH
         Data.Unfold
+        Data.Unfold.Prelude
         FileSystem.DirIO
         FileSystem.Handle
         Unicode.Parser
@@ -67,10 +71,10 @@ jobs:
 
     - name: Download ghc
       run: |
-        GHCUP_VER=0.1.18.0
+        GHCUP_VER=0.1.50.2
         curl -sL -o ./ghcup https://downloads.haskell.org/~ghcup/$GHCUP_VER/x86_64-linux-ghcup-$GHCUP_VER
         chmod +x ./ghcup
-        GHCVER=9.6.6
+        GHCVER=9.10.3
         ./ghcup install ghc $GHCVER
         ./ghcup set ghc $GHCVER
         cabal update
@@ -135,7 +139,7 @@ jobs:
 
     - name: Run benchmarks and append
       run: |
-        ./bench-runner --package-name streamly-benchmarks --package-version 0.0.0 --targets "$CI_BENCHMARKS" --raw --append
+        ./bench-runner --package-name streamly-benchmarks --package-version 0.0.0 --cabal-build-options "-j1" --targets "$CI_BENCHMARKS" --raw --append
 
     # -----------------------------------------------------------------
     # -- Compare
diff --git a/.hlint.ignore b/.hlint.ignore
index 462511808f..4209b68b9a 100644
--- a/.hlint.ignore
+++ b/.hlint.ignore
@@ -1,6 +1,6 @@
 core/src/Streamly/Internal/Data/StreamK/Type.hs
 core/src/Streamly/Internal/Data/Pipe/Type.hs
-core/src/Streamly/Internal/Unicode/Stream.hs
+core/src/Streamly/Internal/Unicode/Encode.hs
 src/Streamly/Internal/Data/SmallArray/Type.hs
 src/Streamly/Internal/Data/Stream/IsStream/Eliminate.hs
 src/Streamly/Internal/Data/Stream/Serial.hs
@@ -22,19 +22,9 @@ test/Streamly/Test/Unicode/Stream.hs
 test/Streamly/Test/Data/Unbox.hs
 benchmark/lib/Streamly/Benchmark/Common.hs
 benchmark/lib/Streamly/Benchmark/Common/Handle.hs
-benchmark/lib/Streamly/Benchmark/Prelude.hs
 benchmark/NanoBenchmarks.hs
 benchmark/Streamly/Benchmark/Data/Array.hs
-benchmark/Streamly/Benchmark/Data/Parser.hs
 benchmark/Streamly/Benchmark/Data/ParserK.hs
 benchmark/Streamly/Benchmark/Data/Stream/StreamK.hs
 benchmark/Streamly/Benchmark/Data/Unfold.hs
 benchmark/Streamly/Benchmark/FileSystem/Handle.hs
-benchmark/Streamly/Benchmark/Prelude/Async.hs
-benchmark/Streamly/Benchmark/Prelude/Concurrent.hs
-benchmark/Streamly/Benchmark/Prelude/Merge.hs
-benchmark/Streamly/Benchmark/Prelude/Parallel.hs
-benchmark/Streamly/Benchmark/Prelude/Rate.hs
-benchmark/Streamly/Benchmark/Prelude/WAsync.hs
-benchmark/Streamly/Benchmark/Prelude/ZipAsync.hs
-benchmark/Streamly/Benchmark/Prelude/ZipSerial.hs
diff --git a/.packcheck.ignore b/.packcheck.ignore
index 4a45873f1d..66b8033047 100644
--- a/.packcheck.ignore
+++ b/.packcheck.ignore
@@ -15,6 +15,7 @@ benchmark/bench-runner/flake.lock
 benchmark/bench-runner/flake.nix
 benchmark/bench-runner/sources.nix
 bin/ghc.sh
+bin/ghc-make.sh
 bin/repl
 bin/run-ci.sh
 bin/run-repl-quick.sh
diff --git a/benchmark/Streamly/Benchmark/Data/Array/Stream.hs b/benchmark/Streamly/Benchmark/Data/Array/Stream.hs
index 31677349ab..2390eeb25a 100644
--- a/benchmark/Streamly/Benchmark/Data/Array/Stream.hs
+++ b/benchmark/Streamly/Benchmark/Data/Array/Stream.hs
@@ -201,6 +201,7 @@ inspect $ 'copyChunksSplitInterposeSuffix `hasNoType` ''Step
 copyChunksSplitInterpose :: Handle -> Handle -> IO ()
 copyChunksSplitInterpose inh outh =
     Stream.fold (Handle.write outh)
+        -- XXX requires @-fspec-constr-recursive=12@.
         -- XXX this is not correct word splitting combinator
         $ Array.concatSepBy 32 . Array.compactSepByByte_ 32
         $ Handle.readChunks inh
diff --git a/benchmark/Streamly/Benchmark/Data/Fold.hs b/benchmark/Streamly/Benchmark/Data/Fold.hs
index 8f3a0d9f7c..fb25832b6e 100644
--- a/benchmark/Streamly/Benchmark/Data/Fold.hs
+++ b/benchmark/Streamly/Benchmark/Data/Fold.hs
@@ -355,6 +355,8 @@ o_1_space_reduce_read_split env =
 -- | Infix split on a character sequence.
 splitOnSeqUtf8 :: String -> Handle -> IO Int
 splitOnSeqUtf8 str inh =
+    -- XXX requires @-fspec-constr-recursive=12@. Maybe due to
+    -- decodeUtf8.
     Stream.fold Fold.length
         $ Stream.foldManyPost (Fold.takeEndBySeq_ (Array.fromList str) Fold.drain)
         $ Unicode.decodeUtf8Chunks
diff --git a/benchmark/Streamly/Benchmark/Data/Parser.hs b/benchmark/Streamly/Benchmark/Data/Parser.hs
index 6200a83def..1a677c5d82 100644
--- a/benchmark/Streamly/Benchmark/Data/Parser.hs
+++ b/benchmark/Streamly/Benchmark/Data/Parser.hs
@@ -1,4 +1,3 @@
-
 #undef FUSION_CHECK
 #ifdef FUSION_CHECK
 {-# OPTIONS_GHC -ddump-simpl -ddump-to-file -dsuppress-all #-}
@@ -16,808 +15,86 @@
 {-# LANGUAGE ScopedTypeVariables #-}
 {-# OPTIONS_GHC -Wno-orphans #-}
 
-module Streamly.Benchmark.Data.Parser
+module Main
   (
-    benchmarks
-  , sourceUnfoldrM
+    main
   ) where
 
-import Control.Applicative ((<|>))
-import Control.DeepSeq (NFData(..))
-import Data.Function ((&))
-import Data.Functor (($>))
-import Data.Monoid (Sum(..))
-import GHC.Magic (inline)
-import GHC.Magic (noinline)
-import System.IO (Handle)
-import System.Random (randomRIO)
-import Streamly.Internal.Data.Fold (Fold(..))
-import Streamly.Internal.Data.Parser
-    (ParseError(..), Parser(..), Initial(..), Step(..), Final(..))
-import Streamly.Internal.Data.Stream (Stream)
 import Prelude hiding
     (any, all, take, sequence, sequence_, sequenceA, takeWhile, dropWhile, span)
 
-import qualified Control.Applicative as AP
-import qualified Data.Foldable as F
-import qualified Data.Traversable as TR
-import qualified Streamly.FileSystem.Handle as Handle
 import qualified Streamly.Internal.Data.Array as Array
 import qualified Streamly.Internal.Data.Fold as Fold
-import qualified Streamly.Internal.Data.Parser as PR
 import qualified Streamly.Data.Stream as Stream
-import qualified Streamly.Internal.Data.Producer as Producer
-import qualified Streamly.Internal.Data.Stream as Stream
 
 import Test.Tasty.Bench hiding (env)
 import Streamly.Benchmark.Common
 import Streamly.Benchmark.Common.Handle
+import Streamly.Benchmark.Data.Parser.Alternative as Alternative
+import Streamly.Benchmark.Data.Parser.Applicative as Applicative
+import Streamly.Benchmark.Data.Parser.Monad as Monad
+import Streamly.Benchmark.Data.Parser.Sequence as Sequence
+import Streamly.Benchmark.Data.Parser.Producer as Producer
+import Streamly.Benchmark.Data.Parser.Interleave as Interleave
+import Streamly.Benchmark.Data.Parser.Groups as Groups
+
+moduleName :: String
+moduleName = "Data.Parser"
+
+benchmarkList ::
+       Int
+    -> BenchEnv
+    -> [Array.Array Int]
+    -> [(SpaceComplexity, Benchmark)]
+benchmarkList value env arrays =
+    Alternative.benchmarks value
+        ++ Applicative.benchmarks value
+        ++ Monad.benchmarks value
+        ++ Sequence.benchmarks value
+        ++ Sequence.benchmarksFileIO env
+        ++ Producer.benchmarks value arrays
+        ++ Groups.benchmarks value
+        ++ Interleave.benchmarks value
 
 -------------------------------------------------------------------------------
--- Utilities
+-- Driver
 -------------------------------------------------------------------------------
 
--- XXX these can be moved to the common module
-
-{-# INLINE sourceUnfoldrM #-}
-sourceUnfoldrM :: Monad m => Int -> Int -> Stream m Int
-sourceUnfoldrM value n = Stream.unfoldrM step n
-    where
-    step cnt =
-        if cnt > n + value
-        then return Nothing
-        else return (Just (cnt, cnt + 1))
+main :: IO ()
+main = do
+#ifndef FUSION_CHECK
+    env <- mkHandleBenchEnv
+    runWithCLIOptsEnv defaultStreamSize alloc (allBenchmarks env)
 
--- | Generates something like this: { { \{ \{ } }.  The stream consists of
--- three parts, the first part is contains a sequence of `{`. The second part
--- contains a sequence pf escaped values `\{`. The third part contains a
--- sequence of `}`.
-{-# INLINE sourceEscapedFrames #-}
-sourceEscapedFrames ::
-    Monad m
-    => Int
-    -> Int
-    -> Stream m Char
-sourceEscapedFrames value n = Stream.unfoldrM step n
     where
 
-    bs = '\\'
-    cbOpen = '{'
-    cbClose = '}'
-    value1 = value `div` 4
-
-    step cnt
-        | cnt > 4 * value1 = return Nothing
-        | cnt <= value1 = return $ Just (cbOpen, cnt + 1)
-        | cnt > 3 * value1 = return $ Just (cbClose, cnt + 1)
-        | otherwise =
-            return
-                $ Just
-                $ if (cnt - value1) `mod` 2 == 1
-                  then (bs, cnt + 1)
-                  else (cbOpen, cnt + 1)
-
-{-# INLINE benchIOSrc #-}
-benchIOSrc
-    :: NFData b
-    => (Int -> Int -> Stream IO a)
-    -> Int
-    -> String
-    -> (Stream IO a -> IO b)
-    -> Benchmark
-benchIOSrc src value name f =
-    bench name $ nfIO $ randomRIO (1,1) >>= f . src value
-
--- | Takes a fold method, and uses it with a default source.
-{-# INLINE benchIOSink #-}
-benchIOSink
-    :: NFData b
-    => Int -> String -> (Stream IO Int -> IO b) -> Benchmark
-benchIOSink value name f =
-    bench name $ nfIO $ randomRIO (1,1) >>= f . sourceUnfoldrM value
-
--------------------------------------------------------------------------------
--- Parsers
--------------------------------------------------------------------------------
-
-{-# INLINE takeBetween #-}
-takeBetween :: Monad m => Int -> Stream m a -> m (Either ParseError ())
-takeBetween value = Stream.parse (PR.takeBetween 0 value Fold.drain)
-
-{-# INLINE takeEQ #-}
-takeEQ :: Monad m => Int -> Stream m a -> m (Either ParseError ())
-takeEQ value = Stream.parse (PR.takeEQ value Fold.drain)
-
-{-# INLINE takeGE #-}
-takeGE :: Monad m => Int -> Stream m a -> m (Either ParseError ())
-takeGE value = Stream.parse (PR.takeGE value Fold.drain)
-
-{-# INLINE dropWhile #-}
-dropWhile :: Monad m => Int -> Stream m Int -> m (Either ParseError ())
-dropWhile value = Stream.parse (PR.dropWhile (<= value))
-
-{-# INLINE takeBeginBy #-}
-takeBeginBy :: Monad m => Int -> Stream m Int -> m (Either ParseError ())
-takeBeginBy value stream = do
-    let stream2 = value `Stream.cons` stream
-    Stream.parse (PR.takeBeginBy (== value) Fold.drain) stream2
-
-takeFramedByEsc_ :: Monad m => Int -> Stream m Char -> m (Either ParseError ())
-takeFramedByEsc_ _ = Stream.parse parser
-
-    where
-
-    isEsc = (== '\\')
-    isBegin = (== '{')
-    isEnd = (== '}')
-
-    parser = PR.takeFramedByEsc_ isEsc isBegin isEnd Fold.drain
-
-{-# INLINE listEqBy #-}
-listEqBy :: Int -> Stream IO Int -> IO (Either ParseError [Int])
-listEqBy len = Stream.parse (PR.listEqBy (==) [1 .. len])
-
-{-# INLINE streamEqBy #-}
-streamEqBy :: Int -> Stream IO Int -> IO (Either ParseError ())
-streamEqBy len = Stream.parse (PR.streamEqBy (==) (Stream.enumerateFromTo 1 len))
-
-{-# INLINE takeWhile #-}
-takeWhile :: Monad m => Int -> Stream m Int -> m (Either ParseError ())
-takeWhile value = Stream.parse (PR.takeWhile (<= value) Fold.drain)
-
-takeWhileP :: Monad m => Int -> Stream m Int -> m (Either ParseError ())
-takeWhileP value =
-    Stream.parse (PR.takeWhileP (<= value) (PR.takeWhile (<= value - 1) Fold.drain))
-
-{-# INLINE takeP #-}
-takeP :: Monad m => Int -> Stream m a -> m (Either ParseError ())
-takeP value = Stream.parse (PR.takeP value (PR.fromFold Fold.drain))
-
-{-# INLINE groupBy #-}
-groupBy :: Monad m => Stream m Int -> m (Either ParseError ())
-groupBy = Stream.parse (PR.groupBy (<=) Fold.drain)
-
-{-# INLINE groupByRolling #-}
-groupByRolling :: Monad m => Stream m Int -> m (Either ParseError ())
-groupByRolling = Stream.parse (PR.groupByRolling (<=) Fold.drain)
-
-{-# INLINE wordBy #-}
-wordBy :: Monad m => Int -> Stream m Int -> m (Either ParseError ())
-wordBy value = Stream.parse (PR.wordBy (>= value) Fold.drain)
-
-{-# INLINE sepByWords #-}
-sepByWords :: Monad m => Stream m Int -> m (Either ParseError ())
-sepByWords = Stream.parse (wrds even Fold.drain)
-    where
-    wrds p = PR.sepBy (PR.takeWhile (not . p) Fold.drain) (PR.dropWhile p)
-
-{-# INLINE sepByAllWords #-}
-sepByAllWords :: Monad m => Stream m Int -> m (Either ParseError ())
-sepByAllWords = Stream.parse (wrds even Fold.drain)
-    where
-    wrds p = PR.sepByAll (PR.takeWhile (not . p) Fold.drain) (PR.dropWhile p)
-
--- Returning a list to compare with the sepBy1 in ParserK
-{-# INLINE sepBy1 #-}
-sepBy1 :: Monad m => Stream m Int -> m (Either ParseError [Int])
-sepBy1 xs = do
-    Stream.parse (PR.sepBy1 (PR.satisfy odd) (PR.satisfy even) Fold.toList) xs
-
-{-# INLINE sepByWords1 #-}
-sepByWords1 :: Monad m => Stream m Int -> m (Either ParseError ())
-sepByWords1 = Stream.parse (wrds even Fold.drain)
-    where
-    wrds p = PR.sepBy1 (PR.takeWhile (not . p) Fold.drain) (PR.dropWhile p)
-
-{-# INLINE deintercalate #-}
-deintercalate :: Monad m => Int -> Stream m Int -> m (Either ParseError ())
-deintercalate _ = Stream.parse (partition even)
-
-    where
-
-    partition p =
-        PR.deintercalate
-            (PR.takeWhile (not . p) Fold.sum) (PR.takeWhile p Fold.sum) Fold.drain
-
-{-# INLINE deintercalate1 #-}
-deintercalate1 :: Monad m => Int -> Stream m Int -> m (Either ParseError ())
-deintercalate1 _ = Stream.parse (partition even)
-
-    where
-
-    partition p =
-        PR.deintercalate1
-            (PR.takeWhile (not . p) Fold.sum) (PR.takeWhile p Fold.sum) Fold.drain
-
-{-# INLINE deintercalateAll #-}
-deintercalateAll :: Monad m => Int -> Stream m Int -> m (Either ParseError ())
-deintercalateAll _ = Stream.parse (partition even)
-
-    where
-
-    partition p =
-        PR.deintercalateAll
-            (PR.takeWhile (not . p) Fold.sum) (PR.takeWhile p Fold.sum) Fold.drain
-
-{-# INLINE manyWordByEven #-}
-manyWordByEven :: Monad m => Stream m Int -> m (Either ParseError ())
-manyWordByEven = Stream.parse (PR.many (PR.wordBy even Fold.drain) Fold.drain)
-
-{-# INLINE many #-}
-many :: Monad m => Stream m Int -> m (Either ParseError Int)
-many = Stream.parse (PR.many (PR.satisfy (> 0)) Fold.length)
-
-{-# INLINE manyAlt #-}
-manyAlt :: Monad m => Stream m Int -> m Int
-manyAlt xs = do
-    x <- Stream.parse (AP.many (PR.satisfy (> 0))) xs
-    return $ Prelude.length x
-
-{-# INLINE some #-}
-some :: Monad m => Stream m Int -> m (Either ParseError Int)
-some = Stream.parse (PR.some (PR.satisfy (> 0)) Fold.length)
-
-{-# INLINE someAlt #-}
-someAlt :: Monad m => Stream m Int -> m Int
-someAlt xs = do
-    x <- Stream.parse (AP.some (PR.satisfy (> 0))) xs
-    return $ Prelude.length x
-
-{-# INLINE manyTill #-}
-manyTill :: Monad m => Int -> Stream m Int -> m (Either ParseError Int)
-manyTill value =
-    Stream.parse (PR.manyTill (PR.satisfy (> 0)) (PR.satisfy (== value)) Fold.length)
-
-{-# INLINE splitAp2 #-}
-splitAp2 :: Monad m
-    => Int -> Stream m Int -> m (Either ParseError ((), ()))
-splitAp2 value =
-    Stream.parse
-        ((,)
-            <$> PR.dropWhile (<= (value `div` 2))
-            <*> PR.dropWhile (<= value)
-        )
-
-{-# INLINE splitAp4 #-}
-splitAp4 :: Monad m
-    => Int -> Stream m Int -> m (Either ParseError ())
-splitAp4 value =
-    Stream.parse
-        (      (\() () () () -> ())
-            <$> PR.dropWhile (<= (value * 1 `div` 4))
-            <*> PR.dropWhile (<= (value * 2 `div` 4))
-            <*> PR.dropWhile (<= (value * 3 `div` 4))
-            <*> PR.dropWhile (<= value)
-        )
-
-{-# INLINE splitAp8 #-}
-splitAp8 :: Monad m
-    => Int -> Stream m Int -> m (Either ParseError ())
-splitAp8 value =
-    Stream.parse
-        (      (\() () () () () () () () -> ())
-            <$> PR.dropWhile (<= (value * 1 `div` 8))
-            <*> PR.dropWhile (<= (value * 2 `div` 8))
-            <*> PR.dropWhile (<= (value * 3 `div` 8))
-            <*> PR.dropWhile (<= (value * 4 `div` 8))
-            <*> PR.dropWhile (<= (value * 5 `div` 8))
-            <*> PR.dropWhile (<= (value * 6 `div` 8))
-            <*> PR.dropWhile (<= (value * 7 `div` 8))
-            <*> PR.dropWhile (<= value)
-        )
-
-{-# INLINE splitApBefore #-}
-splitApBefore :: Monad m
-    => Int -> Stream m Int -> m (Either ParseError ())
-splitApBefore value =
-    Stream.parse
-        (  PR.dropWhile (<= (value `div` 2))
-        *> PR.dropWhile (<= value)
-        )
-
-{-# INLINE splitApAfter #-}
-splitApAfter :: Monad m
-    => Int -> Stream m Int -> m (Either ParseError ())
-splitApAfter value =
-    Stream.parse
-        (  PR.dropWhile (<= (value `div` 2))
-        <* PR.dropWhile (<= value)
-        )
-
-{-# INLINE splitWith2 #-}
-splitWith2 :: Monad m
-    => Int -> Stream m Int -> m (Either ParseError ((), ()))
-splitWith2 value =
-    Stream.parse
-        (PR.splitWith (,)
-            (PR.dropWhile (<= (value `div` 2)))
-            (PR.dropWhile (<= value))
-        )
-
-{-# INLINE split_ #-}
-split_ :: Monad m
-    => Int -> Stream m Int -> m (Either ParseError ())
-split_ value =
-    Stream.parse
-        (PR.split_
-            (PR.dropWhile (<= (value `div` 2)))
-            (PR.dropWhile (<= value))
-        )
-
--- XXX dropWhile with applicative does not fuse
--- PR.dropWhile (<= (value * 1 `div` 4)) *> PR.die "alt"
-{-# INLINE takeWhileFail #-}
-takeWhileFail :: Monad m => (a -> Bool) -> Fold m a b -> Parser a m b
-takeWhileFail predicate (Fold fstep finitial _ ffinal) =
-    Parser step initial extract
-
-    where
-
-    initial = do
-        res <- finitial
-        return $ case res of
-            Fold.Partial s -> IPartial s
-            Fold.Done b -> IDone b
-
-    step s a =
-        if predicate a
-        then do
-            fres <- fstep s a
-            return
-                $ case fres of
-                      Fold.Partial s1 -> SPartial 1 s1
-                      Fold.Done b -> SDone 1 b
-        else return $ SError "fail"
-
-    extract s = fmap (FDone 0) (ffinal s)
-
-{-# INLINE alt2 #-}
-alt2 :: Monad m
-    => Int -> Stream m Int -> m (Either ParseError ())
-alt2 value =
-    Stream.parse
-        (PR.alt
-            (takeWhileFail (<= (value `div` 2)) Fold.drain)
-            (PR.dropWhile (<= value))
-        )
-
-{-# INLINE alt4 #-}
-alt4 :: Monad m
-    => Int -> Stream m Int -> m (Either ParseError ())
-alt4 value =
-    Stream.parse
-        (   takeWhileFail (<= (value * 1 `div` 4)) Fold.drain
-        <|> takeWhileFail (<= (value * 2 `div` 4)) Fold.drain
-        <|> takeWhileFail (<= (value * 3 `div` 4)) Fold.drain
-        <|> PR.dropWhile (<= value)
-        )
-
-{-# INLINE alt8 #-}
-alt8 :: Monad m
-    => Int -> Stream m Int -> m (Either ParseError ())
-alt8 value =
-    Stream.parse
-        (   takeWhileFail (<= (value * 1 `div` 8)) Fold.drain
-        <|> takeWhileFail (<= (value * 2 `div` 8)) Fold.drain
-        <|> takeWhileFail (<= (value * 3 `div` 8)) Fold.drain
-        <|> takeWhileFail (<= (value * 4 `div` 8)) Fold.drain
-        <|> takeWhileFail (<= (value * 5 `div` 8)) Fold.drain
-        <|> takeWhileFail (<= (value * 6 `div` 8)) Fold.drain
-        <|> takeWhileFail (<= (value * 7 `div` 8)) Fold.drain
-        <|> PR.dropWhile (<= value)
-        )
-
-{-# INLINE alt16 #-}
-alt16 :: Monad m
-    => Int -> Stream m Int -> m (Either ParseError ())
-alt16 value =
-    Stream.parse
-        (   takeWhileFail (<= (value * 1 `div` 16)) Fold.drain
-        <|> takeWhileFail (<= (value * 2 `div` 16)) Fold.drain
-        <|> takeWhileFail (<= (value * 3 `div` 16)) Fold.drain
-        <|> takeWhileFail (<= (value * 4 `div` 16)) Fold.drain
-        <|> takeWhileFail (<= (value * 5 `div` 16)) Fold.drain
-        <|> takeWhileFail (<= (value * 6 `div` 16)) Fold.drain
-        <|> takeWhileFail (<= (value * 8 `div` 16)) Fold.drain
-        <|> takeWhileFail (<= (value * 9 `div` 16)) Fold.drain
-        <|> takeWhileFail (<= (value * 10 `div` 16)) Fold.drain
-        <|> takeWhileFail (<= (value * 11 `div` 16)) Fold.drain
-        <|> takeWhileFail (<= (value * 12 `div` 16)) Fold.drain
-        <|> takeWhileFail (<= (value * 13 `div` 16)) Fold.drain
-        <|> takeWhileFail (<= (value * 14 `div` 16)) Fold.drain
-        <|> takeWhileFail (<= (value * 15 `div` 16)) Fold.drain
-        <|> PR.dropWhile (<= value)
-        )
-
-{-# INLINE altSmall #-}
-altSmall :: Monad m
-    => Int -> Stream m Int -> m ()
-altSmall value =
-    Stream.fold Fold.drain .
-        Stream.parseMany
-            (PR.alt
-                (PR.satisfy (>= value) *> PR.die "alt")
-                (PR.satisfy (<= value))
-            )
-
-{-# INLINE monad #-}
-monad :: Monad m
-    => Int -> Stream m Int -> m (Either ParseError ())
-monad value =
-    Stream.parse
-        $ do
-            PR.dropWhile (<= (value `div` 2))
-            PR.dropWhile (<= value)
-
-{-# INLINE monad4 #-}
-monad4 :: Monad m
-    => Int -> Stream m Int -> m (Either ParseError ())
-monad4 value =
-    Stream.parse $ do
-        PR.dropWhile (<= (value `div` 4))
-        PR.dropWhile (<= (value `div` 2))
-        PR.dropWhile (<= (value * 3 `div` 4))
-        PR.dropWhile (<= value)
-
-{-# INLINE monad8 #-}
-monad8 :: Monad m
-    => Int -> Stream m Int -> m (Either ParseError ())
-monad8 value =
-    Stream.parse $ do
-        PR.dropWhile (<= (value * 1 `div` 8))
-        PR.dropWhile (<= (value * 2 `div` 8))
-        PR.dropWhile (<= (value * 3 `div` 8))
-        PR.dropWhile (<= (value * 4 `div` 8))
-        PR.dropWhile (<= (value * 5 `div` 8))
-        PR.dropWhile (<= (value * 6 `div` 8))
-        PR.dropWhile (<= (value * 7 `div` 8))
-        PR.dropWhile (<= value)
-
-{-# INLINE monad16 #-}
-monad16 :: Monad m
-    => Int -> Stream m Int -> m (Either ParseError ())
-monad16 value =
-    Stream.parse $ do
-        PR.dropWhile (<= (value * 1 `div` 16))
-        PR.dropWhile (<= (value * 2 `div` 16))
-        PR.dropWhile (<= (value * 3 `div` 16))
-        PR.dropWhile (<= (value * 4 `div` 16))
-        PR.dropWhile (<= (value * 5 `div` 16))
-        PR.dropWhile (<= (value * 6 `div` 16))
-        PR.dropWhile (<= (value * 7 `div` 16))
-        PR.dropWhile (<= (value * 8 `div` 16))
-        PR.dropWhile (<= (value * 9 `div` 16))
-        PR.dropWhile (<= (value * 10 `div` 16))
-        PR.dropWhile (<= (value * 11 `div` 16))
-        PR.dropWhile (<= (value * 12 `div` 16))
-        PR.dropWhile (<= (value * 13 `div` 16))
-        PR.dropWhile (<= (value * 14 `div` 16))
-        PR.dropWhile (<= (value * 15 `div` 16))
-        PR.dropWhile (<= value)
-
-{-# INLINE takeEndBy_ #-}
-takeEndBy_ :: Monad m
-    => Int -> Stream m Int -> m (Either ParseError ())
-takeEndBy_ value = Stream.parse (PR.takeEndBy_ (>= value) (PR.fromFold Fold.drain))
-
-{-
-{-# INLINE teeAllAny #-}
-teeAllAny :: Monad m
-    => Int -> Stream m Int -> m ((), ())
-teeAllAny value =
-    Stream.parse
-        (PR.teeWith (,)
-            (PR.dropWhile (<= value))
-            (PR.dropWhile (<= value))
-        )
-
-{-# INLINE teeFstAllAny #-}
-teeFstAllAny :: Monad m
-    => Int -> Stream m Int -> m ((), ())
-teeFstAllAny value =
-    Stream.parse
-        (PR.teeWithFst (,)
-            (PR.dropWhile (<= value))
-            (PR.dropWhile (<= value))
-        )
-
-{-# INLINE shortestAllAny #-}
-shortestAllAny :: Monad m
-    => Int -> Stream m Int -> m (Either ParseError ())
-shortestAllAny value =
-    Stream.parse
-        (PR.shortest
-            (PR.dropWhile (<= value))
-            (PR.dropWhile (<= value))
-        )
-
-{-# INLINE longestAllAny #-}
-longestAllAny :: Monad m
-    => Int -> Stream m Int -> m (Either ParseError ())
-longestAllAny value =
-    Stream.parse
-        (PR.longest
-            (PR.dropWhile (<= value))
-            (PR.dropWhile (<= value))
-        )
--}
-
--------------------------------------------------------------------------------
--- Spanning
--------------------------------------------------------------------------------
-
-{-# INLINE span #-}
-span :: Monad m => Int -> Stream m Int -> m (Either ParseError ((), ()))
-span value = Stream.parse (PR.span (<= (value `div` 2)) Fold.drain Fold.drain)
-
-{-# INLINE spanBy #-}
-spanBy :: Monad m => Int -> Stream m Int -> m (Either ParseError ((), ()))
-spanBy value =
-    Stream.parse (PR.spanBy (\_ i -> i <= (value `div` 2)) Fold.drain Fold.drain)
-
-{-# INLINE spanByRolling #-}
-spanByRolling :: Monad m => Int -> Stream m Int -> m (Either ParseError ((), ()))
-spanByRolling value =
-    Stream.parse (PR.spanByRolling (\_ i -> i <= value `div` 2) Fold.drain Fold.drain)
-
-parseManyChunksOfSum :: Int -> Handle -> IO Int
-parseManyChunksOfSum n inh =
-    Stream.fold Fold.length
-        $ Stream.parseMany
-              (PR.fromFold $ Fold.take n Fold.sum)
-              (Stream.unfold Handle.reader inh)
-
--------------------------------------------------------------------------------
--- Parsers in which -fspec-constr-recursive=16 is problematic
--------------------------------------------------------------------------------
-
--- XXX -fspec-constr-recursive=16 makes GHC go beserk when compiling these.
--- We need to fix GHC so that we can have better control over that option or do
--- not have to rely on it.
---
-{-# INLINE lookAhead #-}
-lookAhead :: Monad m => Int -> Stream m Int -> m (Either ParseError ())
-lookAhead value =
-    Stream.parse (PR.lookAhead (PR.takeWhile (<= value) Fold.drain) $> ())
-
--- XXX The timing of this increased 3x after the stepify extract changes.
-{-# INLINE sequenceA_ #-}
-sequenceA_ :: Monad m => Int -> Stream m Int -> m (Either ParseError ())
-sequenceA_ value =
-    Stream.parse (F.sequenceA_ $ replicate value (PR.satisfy (> 0)))
-
--- quadratic complexity
-{-# INLINE sequenceA #-}
-sequenceA :: Monad m => Int -> Stream m Int -> m Int
-sequenceA value xs = do
-    x <- Stream.parse (TR.sequenceA (replicate value (PR.satisfy (> 0)))) xs
-    return $ length x
-
--- quadratic complexity
-{-# INLINE sequence #-}
-sequence :: Monad m => Int -> Stream m Int -> m Int
-sequence value xs = do
-    x <- Stream.parse (TR.sequence (replicate value (PR.satisfy (> 0)))) xs
-    return $ length x
-
-{-# INLINE sequence_ #-}
-sequence_ :: Monad m => Int -> Stream m Int -> m (Either ParseError ())
-sequence_ value xs =
-    Stream.parse (foldr f (return ()) (replicate value (PR.takeBetween 0 1 Fold.drain))) xs
-
-    where
-
-    {-# INLINE f #-}
-    f m k = m >>= (\_ -> k)
-
--- choice using the "Alternative" instance with direct style parser type has
--- quadratic performance complexity.
---
-{-# INLINE choiceAsum #-}
-choiceAsum :: Monad m => Int -> Stream m Int -> m (Either ParseError Int)
-choiceAsum value =
-    Stream.parse (F.asum (replicate value (PR.satisfy (< 0)))
-        AP.<|> PR.satisfy (> 0))
-
-{-
-{-# INLINE choice #-}
-choice :: Monad m => Int -> Stream m Int -> m (Either ParseError Int)
-choice value =
-    Stream.parse
-        (PR.choice (replicate value (PR.satisfy (< 0))) AP.<|> PR.satisfy (> 0))
--}
-
--------------------------------------------------------------------------------
--- Parsing with unfolds
--------------------------------------------------------------------------------
-
-{-# INLINE parseManyUnfoldArrays #-}
-parseManyUnfoldArrays :: Int -> [Array.Array Int] -> IO ()
-parseManyUnfoldArrays count arrays = do
-    let src = Producer.source (Just (Producer.OuterLoop arrays))
-    let parser = PR.fromFold (Fold.take count Fold.drain)
-    let readSrc =
-            Producer.producer
-                $ Producer.concat Producer.fromList Array.producer
-    let streamParser =
-            Producer.simplify (Producer.parseMany parser readSrc)
-    Stream.fold Fold.drain $ Stream.unfold streamParser src
-
--------------------------------------------------------------------------------
--- Stream transformation
--------------------------------------------------------------------------------
-
-{-# INLINE parseMany #-}
-parseMany :: Monad m => Int -> Stream m Int -> m ()
-parseMany n =
-      Stream.fold Fold.drain
-    . fmap getSum
-    . Stream.catRights . Stream.parseMany (PR.fromFold $ Fold.take n Fold.mconcat)
-    . fmap Sum
-
-{-# INLINE parseManyGroupBy #-}
-parseManyGroupBy :: Monad m => (Int -> Int -> Bool) -> Stream m Int -> m ()
-parseManyGroupBy cmp =
-    Stream.fold Fold.drain . Stream.parseMany (PR.groupBy cmp Fold.drain)
-
-{-# INLINE parseManyGroupsRolling #-}
-parseManyGroupsRolling :: Monad m => Bool -> Stream m Int -> m ()
-parseManyGroupsRolling b =
-      Stream.fold Fold.drain
-    . Stream.parseMany (PR.groupByRolling (\_ _ -> b) Fold.drain)
-
-{-# INLINE parseManyGroupsRollingEither #-}
-parseManyGroupsRollingEither :: Monad m =>
-    (Int -> Int -> Bool) -> Int -> m ()
-parseManyGroupsRollingEither cmp value = do
-    sourceUnfoldrM value 1
-        & Stream.parseMany (PR.groupByRollingEither cmp Fold.drain Fold.drain)
-        & Stream.fold Fold.drain
-
-{-# INLINE parseManyGroupsRollingEitherAlt #-}
-parseManyGroupsRollingEitherAlt :: Monad m =>
-    (Int -> Int -> Bool) -> Int -> m ()
-parseManyGroupsRollingEitherAlt cmp value = do
-    sourceUnfoldrM value 1
-        -- Make the input unsorted.
-        & fmap (\x -> if even x then x + 2 else x)
-        & Stream.parseMany (PR.groupByRollingEither cmp Fold.drain Fold.drain)
-        & Stream.fold Fold.drain
-
-{-# INLINE parseIterate #-}
-parseIterate :: Monad m => Int -> Stream m Int -> m ()
-parseIterate n =
-      Stream.fold Fold.drain
-    . fmap getSum
-    . Stream.catRights
-    . Stream.parseIterate
-        (PR.fromFold . Fold.take n . Fold.sconcat)
-        (Sum 0)
-    . fmap Sum
-
-{-# INLINE concatSequence #-}
-concatSequence :: Monad m => Stream m Int -> m (Either ParseError ())
-concatSequence =
-    Stream.parse $ PR.sequence (Stream.repeat PR.one) Fold.drain
-
--------------------------------------------------------------------------------
--- Benchmarks
--------------------------------------------------------------------------------
-
-instance NFData ParseError where
-    {-# INLINE rnf #-}
-    rnf (ParseError x) = rnf x
-
-benchmarks :: Int -> BenchEnv -> [Array.Array Int] -> [(SpaceComplexity, Benchmark)]
-benchmarks value env arrays =
-    [ (SpaceO_1, benchIOSink value "takeBetween" $ takeBetween value)
-    , (SpaceO_1, benchIOSink value "takeWhile" $ takeWhile value)
-    , (SpaceO_1, benchIOSink value "takeWhileP" $ takeWhileP value)
-    , (SpaceO_1, benchIOSink value "takeP" $ takeP value)
-    , (SpaceO_1, benchIOSink value "dropWhile" $ dropWhile value)
-    , (SpaceO_1, benchIOSink value "takeBeginBy" $ takeBeginBy value)
-    , (SpaceO_1, benchIOSink value "takeEndBy_" $ takeEndBy_ value)
-    , (SpaceO_1, benchIOSink value "groupBy" $ groupBy)
-    , (SpaceO_1, benchIOSink value "groupByRolling" $ groupByRolling)
-    , (SpaceO_1, benchIOSink value "wordBy" $ wordBy value)
-    , (SpaceO_1, benchIOSink value "sepBy (words)" sepByWords)
-    , (SpaceO_1, benchIOSink value "sepByAll (words)" sepByAllWords)
-    , (SpaceO_1, benchIOSink value "sepBy1 (words)" sepByWords1)
-    , (SpaceO_1, benchIOSink value "deintercalate" $ deintercalate value)
-    , (SpaceO_1, benchIOSink value "deintercalate1" $ deintercalate1 value)
-    , (SpaceO_1, benchIOSink value "deintercalateAll" $ deintercalateAll value)
-    -- Applicative and Monad
-    , (SpaceO_1, benchIOSink value "splitAp2" $ splitAp2 value)
-    , (SpaceO_1, benchIOSink value "splitAp4" $ splitAp4 value)
-    , (SpaceO_1, benchIOSink value "splitAp8" $ splitAp8 value)
-    , (SpaceO_1, benchIOSink value "splitApBefore" $ splitApBefore value)
-    , (SpaceO_1, benchIOSink value "splitApAfter" $ splitApAfter value)
-    , (SpaceO_1, benchIOSink value "splitWith2" $ splitWith2 value)
-    , (SpaceO_1, benchIOSink value "span" $ span value)
-    , (SpaceO_1, benchIOSink value "spanBy" $ spanBy value)
-    , (SpaceO_1, benchIOSink value "spanByRolling" $ spanByRolling value)
-    , (SpaceO_1, benchIOSink value "monad2" $ monad value)
-    , (SpaceO_1, benchIOSink value "monad4" $ monad4 value)
-    , (SpaceO_1, benchIOSink value "monad8" $ monad8 value)
-    -- Alternative
-    , (SpaceO_1, benchIOSink value "alt2parseMany" $ altSmall value)
-    , (SpaceO_1, benchIOSink value "alt2" $ alt2 value)
-    , (SpaceO_1, benchIOSink value "alt4" $ alt4 value)
-    , (SpaceO_1, benchIOSink value "alt8" $ alt8 value)
-    , (SpaceO_1, benchIOSink value "alt16" $ alt16 value)
-    , (SpaceO_1, benchIOSink value "many" many)
-    , (SpaceO_1, benchIOSink value "many (wordBy even)" $ manyWordByEven)
-    , (SpaceO_1, benchIOSink value "some" some)
-    , (SpaceO_1, benchIOSink value "manyTill" $ manyTill value)
-    , (SpaceO_1, benchIOSink value "parseMany" $ parseMany value)
-    , (SpaceO_1, benchIOSink value "parseMany (take 1)" (parseMany 1))
-    , (SpaceO_1, benchIOSink value "parseMany (take all)" (parseMany value))
-    , (SpaceO_1, benchIOSink value "parseMany (groupBy (<))" (parseManyGroupBy (<)))
-    , (SpaceO_1, benchIOSink value "parseMany (groupBy (==))" (parseManyGroupBy (==)))
-    , (SpaceO_1, benchIOSink value "parseMany groupRollingBy (bound groups)"
-          $ parseManyGroupsRolling False)
-    , (SpaceO_1, benchIOSink value "parseMany groupRollingBy (1 group)"
-          $ parseManyGroupsRolling True)
-    , (SpaceO_1, bench "parseMany groupRollingByEither (Left)"
-        $ nfIO $ parseManyGroupsRollingEitherLeft)
-    , (SpaceO_1, bench "parseMany groupRollingByEither (Right)"
-        $ nfIO $ parseManyGroupsRollingEitherRight)
-    , (SpaceO_1, bench "parseMany groupRollingByEither (Alternating)"
-        $ nfIO $ parseManyGroupsRollingEitherAlt1)
-    , (SpaceO_1, benchIOSink value "parseIterate (take 1)" (parseIterate 1))
-    , (SpaceO_1, benchIOSink value "parseIterate (take all)" (parseIterate value))
-    , (SpaceO_1, benchIOSink value "concatSequence" concatSequence)
-    {-
-    , benchIOSink value "tee" $ teeAllAny value
-    , benchIOSink value "teeFst" $ teeFstAllAny value
-    , benchIOSink value "shortest" $ shortestAllAny value
-    , benchIOSink value "longest" $ longestAllAny value
-    -}
-    , (SpaceO_1, benchIOSink value "streamEqBy" (streamEqBy value))
-    , (SpaceO_1, mkBench ("parseMany (Fold.take " ++ show (bigSize env) ++ " Fold.sum)") env
-          $ \inh _ -> noinline parseManyChunksOfSum (bigSize env) inh)
-    , (SpaceO_1, mkBench "parseMany (Fold.take 1 Fold.sum)" env
-          $ \inh _ -> inline parseManyChunksOfSum 1 inh)
-    , (SpaceO_1, bench "parseMany/Unfold/1000 arrays/take all"
-        $ nfIO $ parseManyUnfoldArrays value arrays)
-    , (SpaceO_1, bench "parseMany/Unfold/1000 arrays/take 1"
-        $ nfIO $ parseManyUnfoldArrays 1 arrays)
-    , (HeapO_n, benchIOSink value "takeEQ" $ takeEQ value)
-    , (HeapO_n, benchIOSink value "takeGE" $ takeGE value)
-
-    -- lookahead benchmark holds the entire input till end
-    , (HeapO_n, benchIOSink value "lookAhead" $ lookAhead value)
-
-    -- o-n-heap because of backtracking
-    , (HeapO_n, benchIOSrc sourceEscapedFrames value "takeFramedByEsc_"
-        $ takeFramedByEsc_ value)
-
-    -- non-linear time complexity (parserD)
-    , (HeapO_n, benchIOSink value "split_" $ split_ value)
-    -- XXX Takes lot of space when run on a long stream, why?
-    , (HeapO_n, benchIOSink value "monad16" $ monad16 value)
-
-    -- These show non-linear time complexity.
-    -- They accumulate the results in a list.
-    , (HeapO_n, benchIOSink value "sepBy1" sepBy1)
-    , (HeapO_n, benchIOSink value "manyAlt" manyAlt)
-    , (HeapO_n, benchIOSink value "someAlt" someAlt)
-    , (HeapO_n, benchIOSink value "listEqBy" (listEqBy value))
-    , (SpaceO_n, benchIOSink value "sequenceA/100" $ sequenceA (value `div` 100))
-    , (SpaceO_n, benchIOSink value "sequenceA_/100" $ sequenceA_ (value `div` 100))
-    , (SpaceO_n, benchIOSink value "sequence/100" $ sequence (value `div` 100))
-    , (SpaceO_n, benchIOSink value "sequence_/100" $ sequence_ (value `div` 100))
-    , (SpaceO_n, benchIOSink value "choice (asum)/100" $ choiceAsum (value `div` 100))
-    -- , benchIOSink value "choice/100" $ choice (value `div` 100)
-    ]
-    where
-
-    {-# NOINLINE parseManyGroupsRollingEitherLeft #-}
-    parseManyGroupsRollingEitherLeft = parseManyGroupsRollingEither (<) value
-
-    {-# NOINLINE parseManyGroupsRollingEitherRight #-}
-    parseManyGroupsRollingEitherRight = parseManyGroupsRollingEither (>) value
-
-    {-# NOINLINE parseManyGroupsRollingEitherAlt1 #-}
-    parseManyGroupsRollingEitherAlt1 =
-        parseManyGroupsRollingEitherAlt (>) value
+    alloc value = Stream.fold Fold.toList $ Array.chunksOf 100 $ streamUnfoldrM value 0
+
+    allBenchmarks env arrays value =
+        let allBenches = benchmarkList value env arrays
+            get x = map snd $ filter ((==) x . fst) allBenches
+            o_1_space = get SpaceO_1
+            o_n_heap = get HeapO_n
+            o_n_space = get SpaceO_n
+        in
+        [ bgroup (o_1_space_prefix moduleName) o_1_space
+        , bgroup (o_n_heap_prefix moduleName) o_n_heap
+        , bgroup (o_n_space_prefix moduleName) o_n_space
+        ]
+#else
+    -- Enable FUSION_CHECK macro at the beginning of the file
+    -- Enable one benchmark below, and run the benchmark
+    -- Check the .dump-simpl output
+    let value = 100000
+    -- let input = streamUnfoldrM value 1
+    -- manyTill value input
+    -- deintercalate value input
+    -- deintercalate1 value input
+    -- deintercalateAll value input
+    -- sepByWords input
+    -- sepByAllWords input
+    -- sepBy1 input
+    -- sepByWords1 input
+    takeFramedByEsc_ value (sourceEscapedFrames value 1)
+    return ()
+#endif
diff --git a/benchmark/Streamly/Benchmark/Data/Parser/Alternative.hs b/benchmark/Streamly/Benchmark/Data/Parser/Alternative.hs
new file mode 100644
index 0000000000..0f930308b8
--- /dev/null
+++ b/benchmark/Streamly/Benchmark/Data/Parser/Alternative.hs
@@ -0,0 +1,264 @@
+#undef FUSION_CHECK
+#ifdef FUSION_CHECK
+{-# OPTIONS_GHC -ddump-simpl -ddump-to-file -dsuppress-all #-}
+#endif
+
+-- |
+-- Module      : Streamly.Benchmark.Data.Parser.Alternative
+-- Copyright   : (c) 2020 Composewell Technologies
+--
+-- License     : BSD-3-Clause
+-- Maintainer  : streamly@composewell.com
+
+{-# LANGUAGE CPP #-}
+{-# LANGUAGE FlexibleContexts #-}
+{-# LANGUAGE ScopedTypeVariables #-}
+{-# OPTIONS_GHC -Wno-orphans #-}
+
+module Streamly.Benchmark.Data.Parser.Alternative
+  (
+    benchmarks
+  ) where
+
+import Control.Applicative ((<|>))
+import Control.DeepSeq (NFData(..))
+import Streamly.Internal.Data.Fold (Fold(..))
+import Streamly.Internal.Data.Parser
+    (ParseError(..), Parser(..), Initial(..), Step(..), Final(..))
+import Streamly.Internal.Data.Stream (Stream)
+import Test.Tasty.Bench (Benchmark)
+
+import qualified Control.Applicative as AP
+import qualified Data.Foldable as F
+import qualified Streamly.Internal.Data.Fold as Fold
+import qualified Streamly.Internal.Data.Parser as PR
+import qualified Streamly.Data.Stream as Stream
+
+import Streamly.Benchmark.Common
+import Streamly.Benchmark.Data.Parser.Common
+
+-------------------------------------------------------------------------------
+-- Parsers
+-------------------------------------------------------------------------------
+
+{-# INLINE manyWordByEven #-}
+manyWordByEven :: Monad m => Stream m Int -> m (Either ParseError ())
+manyWordByEven = Stream.parse (PR.many (PR.wordBy even Fold.drain) Fold.drain)
+
+{-# INLINE many #-}
+many :: Monad m => Stream m Int -> m (Either ParseError Int)
+many = Stream.parse (PR.many (PR.satisfy (> 0)) Fold.length)
+
+{-# INLINE some #-}
+some :: Monad m => Stream m Int -> m (Either ParseError Int)
+some = Stream.parse (PR.some (PR.satisfy (> 0)) Fold.length)
+
+{-# INLINE manyAlt #-}
+manyAlt :: Monad m => Stream m Int -> m Int
+manyAlt xs = do
+    x <- Stream.parse (AP.many (PR.satisfy (> 0))) xs
+    return $ Prelude.length x
+
+{-# INLINE someAlt #-}
+someAlt :: Monad m => Stream m Int -> m Int
+someAlt xs = do
+    x <- Stream.parse (AP.some (PR.satisfy (> 0))) xs
+    return $ Prelude.length x
+
+-- XXX dropWhile with applicative does not fuse
+-- PR.dropWhile (<= (value * 1 `div` 4)) *> PR.die "alt"
+{-# INLINE takeWhileFail #-}
+takeWhileFail :: Monad m => (a -> Bool) -> Fold m a b -> Parser a m b
+takeWhileFail predicate (Fold fstep finitial _ ffinal) =
+    Parser step initial extract
+
+    where
+
+    initial = do
+        res <- finitial
+        return $ case res of
+            Fold.Partial s -> IPartial s
+            Fold.Done b -> IDone b
+
+    step s a =
+        if predicate a
+        then do
+            fres <- fstep s a
+            return
+                $ case fres of
+                      Fold.Partial s1 -> SPartial 1 s1
+                      Fold.Done b -> SDone 1 b
+        else return $ SError "fail"
+
+    extract s = fmap (FDone 0) (ffinal s)
+
+{-# INLINE alt2 #-}
+alt2 :: Monad m
+    => Int -> Stream m Int -> m (Either ParseError ())
+alt2 value =
+    Stream.parse
+        (PR.alt
+            (takeWhileFail (<= (value `div` 2)) Fold.drain)
+            (PR.dropWhile (<= value))
+        )
+
+{- HLINT ignore "Evaluate"-}
+{-# INLINE alt4 #-}
+alt4 :: Monad m
+    => Int -> Stream m Int -> m (Either ParseError ())
+alt4 value =
+    Stream.parse
+        (   takeWhileFail (<= (value * 1 `div` 4)) Fold.drain
+        <|> takeWhileFail (<= (value * 2 `div` 4)) Fold.drain
+        <|> takeWhileFail (<= (value * 3 `div` 4)) Fold.drain
+        <|> PR.dropWhile (<= value)
+        )
+
+{-# INLINE alt8 #-}
+alt8 :: Monad m
+    => Int -> Stream m Int -> m (Either ParseError ())
+alt8 value =
+    Stream.parse
+        (   takeWhileFail (<= (value * 1 `div` 8)) Fold.drain
+        <|> takeWhileFail (<= (value * 2 `div` 8)) Fold.drain
+        <|> takeWhileFail (<= (value * 3 `div` 8)) Fold.drain
+        <|> takeWhileFail (<= (value * 4 `div` 8)) Fold.drain
+        <|> takeWhileFail (<= (value * 5 `div` 8)) Fold.drain
+        <|> takeWhileFail (<= (value * 6 `div` 8)) Fold.drain
+        <|> takeWhileFail (<= (value * 7 `div` 8)) Fold.drain
+        <|> PR.dropWhile (<= value)
+        )
+
+{-# INLINE alt16 #-}
+alt16 :: Monad m
+    => Int -> Stream m Int -> m (Either ParseError ())
+alt16 value =
+    Stream.parse
+        (   takeWhileFail (<= (value * 1 `div` 16)) Fold.drain
+        <|> takeWhileFail (<= (value * 2 `div` 16)) Fold.drain
+        <|> takeWhileFail (<= (value * 3 `div` 16)) Fold.drain
+        <|> takeWhileFail (<= (value * 4 `div` 16)) Fold.drain
+        <|> takeWhileFail (<= (value * 5 `div` 16)) Fold.drain
+        <|> takeWhileFail (<= (value * 6 `div` 16)) Fold.drain
+        <|> takeWhileFail (<= (value * 8 `div` 16)) Fold.drain
+        <|> takeWhileFail (<= (value * 9 `div` 16)) Fold.drain
+        <|> takeWhileFail (<= (value * 10 `div` 16)) Fold.drain
+        <|> takeWhileFail (<= (value * 11 `div` 16)) Fold.drain
+        <|> takeWhileFail (<= (value * 12 `div` 16)) Fold.drain
+        <|> takeWhileFail (<= (value * 13 `div` 16)) Fold.drain
+        <|> takeWhileFail (<= (value * 14 `div` 16)) Fold.drain
+        <|> takeWhileFail (<= (value * 15 `div` 16)) Fold.drain
+        <|> PR.dropWhile (<= value)
+        )
+
+{-# INLINE altSmall #-}
+altSmall :: Monad m
+    => Int -> Stream m Int -> m ()
+altSmall value =
+    Stream.fold Fold.drain .
+        Stream.parseMany
+            (PR.alt
+                (PR.satisfy (>= value) *> PR.die "alt")
+                (PR.satisfy (<= value))
+            )
+
+{-
+{-# INLINE teeAllAny #-}
+teeAllAny :: Monad m
+    => Int -> Stream m Int -> m ((), ())
+teeAllAny value =
+    Stream.parse
+        (PR.teeWith (,)
+            (PR.dropWhile (<= value))
+            (PR.dropWhile (<= value))
+        )
+
+{-# INLINE teeFstAllAny #-}
+teeFstAllAny :: Monad m
+    => Int -> Stream m Int -> m ((), ())
+teeFstAllAny value =
+    Stream.parse
+        (PR.teeWithFst (,)
+            (PR.dropWhile (<= value))
+            (PR.dropWhile (<= value))
+        )
+
+{-# INLINE shortestAllAny #-}
+shortestAllAny :: Monad m
+    => Int -> Stream m Int -> m (Either ParseError ())
+shortestAllAny value =
+    Stream.parse
+        (PR.shortest
+            (PR.dropWhile (<= value))
+            (PR.dropWhile (<= value))
+        )
+
+{-# INLINE longestAllAny #-}
+longestAllAny :: Monad m
+    => Int -> Stream m Int -> m (Either ParseError ())
+longestAllAny value =
+    Stream.parse
+        (PR.longest
+            (PR.dropWhile (<= value))
+            (PR.dropWhile (<= value))
+        )
+-}
+
+-------------------------------------------------------------------------------
+-- Choice
+-------------------------------------------------------------------------------
+
+-- choice using the "Alternative" instance with direct style parser type has
+-- quadratic performance complexity.
+--
+{-# INLINE choiceAsum #-}
+choiceAsum :: Monad m => Int -> Stream m Int -> m (Either ParseError Int)
+choiceAsum value =
+    Stream.parse (F.asum (replicate value (PR.satisfy (< 0)))
+        AP.<|> PR.satisfy (> 0))
+
+{-
+{-# INLINE choice #-}
+choice :: Monad m => Int -> Stream m Int -> m (Either ParseError Int)
+choice value =
+    Stream.parse
+        (PR.choice (replicate value (PR.satisfy (< 0))) AP.<|> PR.satisfy (> 0))
+-}
+
+-------------------------------------------------------------------------------
+-- Benchmarks
+-------------------------------------------------------------------------------
+
+instance NFData ParseError where
+    {-# INLINE rnf #-}
+    rnf (ParseError x) = rnf x
+
+benchmarks :: Int -> [(SpaceComplexity, Benchmark)]
+benchmarks value =
+    [
+    -- Alternative
+      (SpaceO_1, benchIOSink value "alt2parseMany" $ altSmall value)
+    , (SpaceO_1, benchIOSink value "alt2" $ alt2 value)
+    , (SpaceO_1, benchIOSink value "alt4" $ alt4 value)
+    , (SpaceO_1, benchIOSink value "alt8" $ alt8 value)
+    , (SpaceO_1, benchIOSink value "alt16" $ alt16 value)
+
+    -- O_n as they accumulate the results in a list.
+    , (HeapO_n, benchIOSink value "manyAlt" manyAlt)
+    , (HeapO_n, benchIOSink value "someAlt" someAlt)
+    , (SpaceO_n, benchIOSink value "choice (asum)/100" $ choiceAsum (value `div` 100))
+    -- , benchIOSink value "choice/100" $ choice (value `div` 100)
+
+    -- Sequential Repetition
+    -- XXX requires @-fspec-constr-recursive=12@.
+    , (SpaceO_1, benchIOSink value "many" many)
+    , (SpaceO_1, benchIOSink value "many (wordBy even)" manyWordByEven)
+    , (SpaceO_1, benchIOSink value "some" some)
+
+    {-
+    , benchIOSink value "tee" $ teeAllAny value
+    , benchIOSink value "teeFst" $ teeFstAllAny value
+    , benchIOSink value "shortest" $ shortestAllAny value
+    , benchIOSink value "longest" $ longestAllAny value
+    -}
+    ]
diff --git a/benchmark/Streamly/Benchmark/Data/Parser/Applicative.hs b/benchmark/Streamly/Benchmark/Data/Parser/Applicative.hs
new file mode 100644
index 0000000000..360f016814
--- /dev/null
+++ b/benchmark/Streamly/Benchmark/Data/Parser/Applicative.hs
@@ -0,0 +1,187 @@
+#undef FUSION_CHECK
+#ifdef FUSION_CHECK
+{-# OPTIONS_GHC -ddump-simpl -ddump-to-file -dsuppress-all #-}
+#endif
+
+-- |
+-- Module      : Streamly.Benchmark.Data.Parser.Applicative
+-- Copyright   : (c) 2020 Composewell Technologies
+--
+-- License     : BSD-3-Clause
+-- Maintainer  : streamly@composewell.com
+
+{-# LANGUAGE CPP #-}
+{-# LANGUAGE FlexibleContexts #-}
+{-# LANGUAGE ScopedTypeVariables #-}
+{-# OPTIONS_GHC -Wno-orphans #-}
+
+module Streamly.Benchmark.Data.Parser.Applicative
+  (
+    benchmarks
+  ) where
+
+import Control.DeepSeq (NFData(..))
+import Streamly.Internal.Data.Parser (ParseError(..))
+import Streamly.Internal.Data.Stream (Stream)
+import Test.Tasty.Bench (Benchmark)
+
+import qualified Data.Foldable as F
+import qualified Data.Traversable as TR
+import qualified Streamly.Internal.Data.Fold as Fold
+import qualified Streamly.Internal.Data.Parser as PR
+import qualified Streamly.Data.Stream as Stream
+
+import Streamly.Benchmark.Common
+import Streamly.Benchmark.Data.Parser.Common
+import Prelude hiding (sequence, sequence_, sequenceA)
+
+{-# INLINE splitAp2 #-}
+splitAp2 :: Monad m
+    => Int -> Stream m Int -> m (Either ParseError ((), ()))
+splitAp2 value =
+    Stream.parse
+        ((,)
+            <$> PR.dropWhile (<= (value `div` 2))
+            <*> PR.dropWhile (<= value)
+        )
+
+{- HLINT ignore "Evaluate"-}
+{-# INLINE splitAp4 #-}
+splitAp4 :: Monad m
+    => Int -> Stream m Int -> m (Either ParseError ())
+splitAp4 value =
+    Stream.parse
+        (      (\() () () () -> ())
+            <$> PR.dropWhile (<= (value * 1 `div` 4))
+            <*> PR.dropWhile (<= (value * 2 `div` 4))
+            <*> PR.dropWhile (<= (value * 3 `div` 4))
+            <*> PR.dropWhile (<= value)
+        )
+
+{-# INLINE splitAp8 #-}
+splitAp8 :: Monad m
+    => Int -> Stream m Int -> m (Either ParseError ())
+splitAp8 value =
+    Stream.parse
+        (      (\() () () () () () () () -> ())
+            <$> PR.dropWhile (<= (value * 1 `div` 8))
+            <*> PR.dropWhile (<= (value * 2 `div` 8))
+            <*> PR.dropWhile (<= (value * 3 `div` 8))
+            <*> PR.dropWhile (<= (value * 4 `div` 8))
+            <*> PR.dropWhile (<= (value * 5 `div` 8))
+            <*> PR.dropWhile (<= (value * 6 `div` 8))
+            <*> PR.dropWhile (<= (value * 7 `div` 8))
+            <*> PR.dropWhile (<= value)
+        )
+
+{-# INLINE splitApBefore #-}
+splitApBefore :: Monad m
+    => Int -> Stream m Int -> m (Either ParseError ())
+splitApBefore value =
+    Stream.parse
+        (  PR.dropWhile (<= (value `div` 2))
+        *> PR.dropWhile (<= value)
+        )
+
+{-# INLINE splitApAfter #-}
+splitApAfter :: Monad m
+    => Int -> Stream m Int -> m (Either ParseError ())
+splitApAfter value =
+    Stream.parse
+        (  PR.dropWhile (<= (value `div` 2))
+        <* PR.dropWhile (<= value)
+        )
+
+{-# INLINE splitWith2 #-}
+splitWith2 :: Monad m
+    => Int -> Stream m Int -> m (Either ParseError ((), ()))
+splitWith2 value =
+    Stream.parse
+        (PR.splitWith (,)
+            (PR.dropWhile (<= (value `div` 2)))
+            (PR.dropWhile (<= value))
+        )
+
+{-# INLINE split_ #-}
+split_ :: Monad m
+    => Int -> Stream m Int -> m (Either ParseError ())
+split_ value =
+    Stream.parse
+        (PR.split_
+            (PR.dropWhile (<= (value `div` 2)))
+            (PR.dropWhile (<= value))
+        )
+
+-------------------------------------------------------------------------------
+--
+-------------------------------------------------------------------------------
+
+-- XXX The timing of this increased 3x after the stepify extract changes.
+{-# INLINE sequenceA_ #-}
+sequenceA_ :: Monad m => Int -> Int -> m (Either ParseError ())
+sequenceA_ value =
+{- HLINT ignore "Use replicateM_"-}
+    Stream.parse (F.sequenceA_ $ replicate value (PR.satisfy (> 0)))
+        . streamUnfoldrM value
+
+-- quadratic complexity
+{-# INLINE sequenceA #-}
+sequenceA :: Monad m => Int -> Int -> m Int
+sequenceA value n = do
+    x <- Stream.parse (TR.sequenceA (replicate value (PR.satisfy (> 0))))
+            (streamUnfoldrM value n)
+    return $ length x
+
+-- quadratic complexity
+{-# INLINE sequence #-}
+sequence :: Monad m => Int -> Int -> m Int
+sequence value n = do
+    x <- Stream.parse (TR.sequence (replicate value (PR.satisfy (> 0))))
+            (streamUnfoldrM value n)
+    return $ length x
+
+{-# INLINE sequence_ #-}
+sequence_ :: Monad m => Int -> Int -> m (Either ParseError ())
+sequence_ value n =
+    Stream.parse (foldr f (return ()) (replicate value (PR.takeBetween 0 1 Fold.drain)))
+            (streamUnfoldrM value n)
+
+    where
+
+    {-# INLINE f #-}
+    f m k = m >> k
+
+{-# INLINE concatSequence #-}
+concatSequence :: Monad m => Stream m Int -> m (Either ParseError ())
+concatSequence =
+    Stream.parse $ PR.sequence (Stream.repeat PR.one) Fold.drain
+
+-------------------------------------------------------------------------------
+-- Benchmarks
+-------------------------------------------------------------------------------
+
+instance NFData ParseError where
+    {-# INLINE rnf #-}
+    rnf (ParseError x) = rnf x
+
+benchmarks :: Int -> [(SpaceComplexity, Benchmark)]
+benchmarks value =
+    [
+    -- Applicative
+      (SpaceO_1, benchIOSink value "splitAp2" $ splitAp2 value)
+    , (SpaceO_1, benchIOSink value "splitAp4" $ splitAp4 value)
+    , (SpaceO_1, benchIOSink value "splitAp8" $ splitAp8 value)
+    , (SpaceO_1, benchIOSink value "splitApBefore" $ splitApBefore value)
+    , (SpaceO_1, benchIOSink value "splitApAfter" $ splitApAfter value)
+    , (SpaceO_1, benchIOSink value "splitWith2" $ splitWith2 value)
+    -- non-linear time complexity (parserD)
+    , (HeapO_n, benchIOSink value "split_" $ split_ value)
+
+    -- Sequential Collection
+    -- Accumulate the results in a list.
+    , (SpaceO_n, benchIOSink1 "sequenceA/100" $ sequenceA (value `div` 100))
+    , (SpaceO_n, benchIOSink1 "sequenceA_/100" $ sequenceA_ (value `div` 100))
+    , (SpaceO_n, benchIOSink1 "sequence/100" $ sequence (value `div` 100))
+    , (SpaceO_n, benchIOSink1 "sequence_/100" $ sequence_ (value `div` 100))
+    , (SpaceO_1, benchIOSink value "concatSequence" concatSequence)
+    ]
diff --git a/benchmark/Streamly/Benchmark/Data/Parser/Common.hs b/benchmark/Streamly/Benchmark/Data/Parser/Common.hs
new file mode 100644
index 0000000000..de6e91c64b
--- /dev/null
+++ b/benchmark/Streamly/Benchmark/Data/Parser/Common.hs
@@ -0,0 +1,45 @@
+-- |
+-- Module      : Streamly.Benchmark.Data.Parser.Common
+-- Copyright   : (c) 2020 Composewell Technologies
+--
+-- License     : BSD-3-Clause
+-- Maintainer  : streamly@composewell.com
+
+{-# LANGUAGE CPP #-}
+{-# LANGUAGE FlexibleContexts #-}
+{-# LANGUAGE ScopedTypeVariables #-}
+{-# OPTIONS_GHC -Wno-orphans #-}
+
+module Streamly.Benchmark.Data.Parser.Common
+  (
+    benchIOSink
+  , benchIOSrc
+  ) where
+
+import Control.DeepSeq (NFData(..))
+import Streamly.Internal.Data.Stream (Stream)
+import System.Random (randomRIO)
+
+import Test.Tasty.Bench
+import Streamly.Benchmark.Common
+
+-- XXX use benchIOSink1 instead, this is to be removed
+-- | Takes a fold method, and uses it with a default source.
+{-# INLINE benchIOSink #-}
+benchIOSink
+    :: NFData b
+    => Int -> String -> (Stream IO Int -> IO b) -> Benchmark
+benchIOSink value name f =
+    bench name $ nfIO $ randomRIO (1,1) >>= f . streamUnfoldrM value
+
+-- XXX use benchIOSrc1 instead, this is to be removed
+{-# INLINE benchIOSrc #-}
+benchIOSrc
+    :: NFData b
+    => (Int -> Int -> Stream IO a)
+    -> Int
+    -> String
+    -> (Stream IO a -> IO b)
+    -> Benchmark
+benchIOSrc src value name f =
+    bench name $ nfIO $ randomRIO (1,1) >>= f . src value
diff --git a/benchmark/Streamly/Benchmark/Data/Parser/Groups.hs b/benchmark/Streamly/Benchmark/Data/Parser/Groups.hs
new file mode 100644
index 0000000000..cb85971476
--- /dev/null
+++ b/benchmark/Streamly/Benchmark/Data/Parser/Groups.hs
@@ -0,0 +1,225 @@
+#undef FUSION_CHECK
+#ifdef FUSION_CHECK
+{-# OPTIONS_GHC -ddump-simpl -ddump-to-file -dsuppress-all #-}
+#endif
+
+-- |
+-- Module      : Streamly.Benchmark.Data.Parser.Groups
+-- Copyright   : (c) 2020 Composewell Technologies
+--
+-- License     : BSD-3-Clause
+-- Maintainer  : streamly@composewell.com
+
+{-# LANGUAGE CPP #-}
+{-# LANGUAGE FlexibleContexts #-}
+{-# LANGUAGE ScopedTypeVariables #-}
+{-# OPTIONS_GHC -Wno-orphans #-}
+
+module Streamly.Benchmark.Data.Parser.Groups
+  (
+    benchmarks
+  ) where
+
+import Control.DeepSeq (NFData(..))
+import Data.Functor (($>))
+import Streamly.Internal.Data.Parser (ParseError(..))
+import Streamly.Internal.Data.Stream (Stream)
+import Test.Tasty.Bench (Benchmark)
+
+import qualified Streamly.Internal.Data.Fold as Fold
+import qualified Streamly.Internal.Data.Parser as PR
+import qualified Streamly.Internal.Data.Stream as Stream
+
+import Streamly.Benchmark.Common
+import Streamly.Benchmark.Data.Parser.Common
+import Prelude hiding (takeWhile, dropWhile, span)
+
+-------------------------------------------------------------------------------
+-- Utilities
+-------------------------------------------------------------------------------
+
+-- | Generates something like this: { { \{ \{ } }.  The stream consists of
+-- three parts, the first part is contains a sequence of `{`. The second part
+-- contains a sequence pf escaped values `\{`. The third part contains a
+-- sequence of `}`.
+{-# INLINE sourceEscapedFrames #-}
+sourceEscapedFrames ::
+    Monad m
+    => Int
+    -> Int
+    -> Stream m Char
+sourceEscapedFrames value = Stream.unfoldrM step
+
+    where
+
+    bs = '\\'
+    cbOpen = '{'
+    cbClose = '}'
+    value1 = value `div` 4
+
+    step cnt
+        | cnt > 4 * value1 = return Nothing
+        | cnt <= value1 = return $ Just (cbOpen, cnt + 1)
+        | cnt > 3 * value1 = return $ Just (cbClose, cnt + 1)
+        | otherwise =
+            return
+                $ Just
+                $ if (cnt - value1) `mod` 2 == 1
+                  then (bs, cnt + 1)
+                  else (cbOpen, cnt + 1)
+
+-------------------------------------------------------------------------------
+-- Parsers
+-------------------------------------------------------------------------------
+
+{-# INLINE takeBetween #-}
+takeBetween :: Monad m => Int -> Stream m a -> m (Either ParseError ())
+takeBetween value = Stream.parse (PR.takeBetween 0 value Fold.drain)
+
+{-# INLINE takeEQ #-}
+takeEQ :: Monad m => Int -> Stream m a -> m (Either ParseError ())
+takeEQ value = Stream.parse (PR.takeEQ value Fold.drain)
+
+{-# INLINE takeGE #-}
+takeGE :: Monad m => Int -> Stream m a -> m (Either ParseError ())
+takeGE value = Stream.parse (PR.takeGE value Fold.drain)
+
+{-# INLINE dropWhile #-}
+dropWhile :: Monad m => Int -> Stream m Int -> m (Either ParseError ())
+dropWhile value = Stream.parse (PR.dropWhile (<= value))
+
+{-# INLINE takeBeginBy #-}
+takeBeginBy :: Monad m => Int -> Stream m Int -> m (Either ParseError ())
+takeBeginBy value stream = do
+    let stream2 = value `Stream.cons` stream
+    Stream.parse (PR.takeBeginBy (== value) Fold.drain) stream2
+
+takeFramedByEsc_ :: Monad m => Int -> Stream m Char -> m (Either ParseError ())
+takeFramedByEsc_ _ = Stream.parse parser
+
+    where
+
+    isEsc = (== '\\')
+    isBegin = (== '{')
+    isEnd = (== '}')
+
+    parser = PR.takeFramedByEsc_ isEsc isBegin isEnd Fold.drain
+
+{-# INLINE listEqBy #-}
+listEqBy :: Int -> Stream IO Int -> IO (Either ParseError [Int])
+listEqBy len = Stream.parse (PR.listEqBy (==) [1 .. len])
+
+{-# INLINE streamEqBy #-}
+streamEqBy :: Int -> Stream IO Int -> IO (Either ParseError ())
+streamEqBy len = Stream.parse (PR.streamEqBy (==) (Stream.enumerateFromTo 1 len))
+
+{-# INLINE takeWhile #-}
+takeWhile :: Monad m => Int -> Stream m Int -> m (Either ParseError ())
+takeWhile value = Stream.parse (PR.takeWhile (<= value) Fold.drain)
+
+takeWhileP :: Monad m => Int -> Stream m Int -> m (Either ParseError ())
+takeWhileP value =
+    Stream.parse (PR.takeWhileP (<= value) (PR.takeWhile (<= value - 1) Fold.drain))
+
+{-# INLINE takeP #-}
+takeP :: Monad m => Int -> Stream m a -> m (Either ParseError ())
+takeP value = Stream.parse (PR.takeP value (PR.fromFold Fold.drain))
+
+{-# INLINE groupBy #-}
+groupBy :: Monad m => Stream m Int -> m (Either ParseError ())
+groupBy = Stream.parse (PR.groupBy (<=) Fold.drain)
+
+{-# INLINE groupByRolling #-}
+groupByRolling :: Monad m => Stream m Int -> m (Either ParseError ())
+groupByRolling = Stream.parse (PR.groupByRolling (<=) Fold.drain)
+
+{-# INLINE wordBy #-}
+wordBy :: Monad m => Int -> Stream m Int -> m (Either ParseError ())
+wordBy value = Stream.parse (PR.wordBy (>= value) Fold.drain)
+
+{-# INLINE takeEndBy_ #-}
+takeEndBy_ :: Monad m
+    => Int -> Stream m Int -> m (Either ParseError ())
+takeEndBy_ value = Stream.parse (PR.takeEndBy_ (>= value) (PR.fromFold Fold.drain))
+
+-------------------------------------------------------------------------------
+-- Spanning
+-------------------------------------------------------------------------------
+
+{-# INLINE span #-}
+span :: Monad m => Int -> Stream m Int -> m (Either ParseError ((), ()))
+span value = Stream.parse (PR.span (<= (value `div` 2)) Fold.drain Fold.drain)
+
+{-# INLINE spanBy #-}
+spanBy :: Monad m => Int -> Stream m Int -> m (Either ParseError ((), ()))
+spanBy value =
+    Stream.parse (PR.spanBy (\_ i -> i <= (value `div` 2)) Fold.drain Fold.drain)
+
+{-# INLINE spanByRolling #-}
+spanByRolling :: Monad m => Int -> Stream m Int -> m (Either ParseError ((), ()))
+spanByRolling value =
+    Stream.parse (PR.spanByRolling (\_ i -> i <= value `div` 2) Fold.drain Fold.drain)
+
+-------------------------------------------------------------------------------
+--
+-------------------------------------------------------------------------------
+
+{-# INLINE lookAhead #-}
+lookAhead :: Monad m => Int -> Stream m Int -> m (Either ParseError ())
+lookAhead value =
+    Stream.parse (PR.lookAhead (PR.takeWhile (<= value) Fold.drain) $> ())
+
+-------------------------------------------------------------------------------
+-- Benchmarks
+-------------------------------------------------------------------------------
+
+instance NFData ParseError where
+    {-# INLINE rnf #-}
+    rnf (ParseError x) = rnf x
+
+benchmarks :: Int -> [(SpaceComplexity, Benchmark)]
+benchmarks value =
+    [
+    -- lookahead sequence length
+    -- lookahead benchmark holds the entire input till end
+      (HeapO_n, benchIOSink value "lookAhead" $ lookAhead value)
+
+    -- take sequence by length
+    , (SpaceO_1, benchIOSink value "takeBetween" $ takeBetween value)
+    -- XXX requires @-fspec-constr-recursive=12@.
+    , (HeapO_n, benchIOSink value "takeEQ" $ takeEQ value)
+    , (HeapO_n, benchIOSink value "takeGE" $ takeGE value)
+    , (SpaceO_1, benchIOSink value "takeP" $ takeP value)
+
+    -- Match exact sequence
+    -- O_n because of the list accumulation
+    , (HeapO_n, benchIOSink value "listEqBy" (listEqBy value))
+    , (SpaceO_1, benchIOSink value "streamEqBy" (streamEqBy value))
+
+    -- sequence matching a predicate
+    , (SpaceO_1, benchIOSink value "takeWhile" $ takeWhile value)
+    -- XXX requires @-fspec-constr-recursive=12@.
+    , (SpaceO_1, benchIOSink value "takeWhileP" $ takeWhileP value)
+    , (SpaceO_1, benchIOSink value "dropWhile" $ dropWhile value)
+
+    -- sequence begin/end by known elements
+    , (SpaceO_1, benchIOSink value "takeEndBy_" $ takeEndBy_ value)
+    , (SpaceO_1, benchIOSink value "takeBeginBy" $ takeBeginBy value)
+    -- XXX requires @-fspec-constr-recursive=12@.
+    , (SpaceO_1, benchIOSink value "wordBy" $ wordBy value)
+
+    -- Group sequence by
+    , (SpaceO_1, benchIOSink value "groupBy" groupBy)
+    -- XXX requires @-fspec-constr-recursive=12@.
+    , (SpaceO_1, benchIOSink value "groupByRolling" groupByRolling)
+
+    -- Framing
+    -- o-n-heap because of backtracking
+    , (HeapO_n, benchIOSrc sourceEscapedFrames value "takeFramedByEsc_"
+        $ takeFramedByEsc_ value)
+
+    -- Spanning
+    , (SpaceO_1, benchIOSink value "span" $ span value)
+    , (SpaceO_1, benchIOSink value "spanBy" $ spanBy value)
+    , (SpaceO_1, benchIOSink value "spanByRolling" $ spanByRolling value)
+    ]
diff --git a/benchmark/Streamly/Benchmark/Data/Parser/Interleave.hs b/benchmark/Streamly/Benchmark/Data/Parser/Interleave.hs
new file mode 100644
index 0000000000..583a539fc8
--- /dev/null
+++ b/benchmark/Streamly/Benchmark/Data/Parser/Interleave.hs
@@ -0,0 +1,120 @@
+#undef FUSION_CHECK
+#ifdef FUSION_CHECK
+{-# OPTIONS_GHC -ddump-simpl -ddump-to-file -dsuppress-all #-}
+#endif
+
+-- |
+-- Module      : Streamly.Benchmark.Data.Parser.Interleave
+-- Copyright   : (c) 2020 Composewell Technologies
+--
+-- License     : BSD-3-Clause
+-- Maintainer  : streamly@composewell.com
+
+{-# LANGUAGE CPP #-}
+{-# LANGUAGE FlexibleContexts #-}
+{-# LANGUAGE ScopedTypeVariables #-}
+{-# OPTIONS_GHC -Wno-orphans #-}
+
+module Streamly.Benchmark.Data.Parser.Interleave
+  (
+    benchmarks
+  ) where
+
+import Control.DeepSeq (NFData(..))
+import Streamly.Internal.Data.Parser (ParseError(..))
+import Streamly.Internal.Data.Stream (Stream)
+import Test.Tasty.Bench (Benchmark)
+
+import qualified Streamly.Internal.Data.Fold as Fold
+import qualified Streamly.Internal.Data.Parser as PR
+import qualified Streamly.Data.Stream as Stream
+
+import Streamly.Benchmark.Common
+import Streamly.Benchmark.Data.Parser.Common
+
+-------------------------------------------------------------------------------
+-- Parsers
+-------------------------------------------------------------------------------
+
+{-# INLINE sepByWords #-}
+sepByWords :: Monad m => Stream m Int -> m (Either ParseError ())
+sepByWords = Stream.parse (wrds even Fold.drain)
+    where
+    wrds p = PR.sepBy (PR.takeWhile (not . p) Fold.drain) (PR.dropWhile p)
+
+{-# INLINE sepByAllWords #-}
+sepByAllWords :: Monad m => Stream m Int -> m (Either ParseError ())
+sepByAllWords = Stream.parse (wrds even Fold.drain)
+    where
+    wrds p = PR.sepByAll (PR.takeWhile (not . p) Fold.drain) (PR.dropWhile p)
+
+-- Returning a list to compare with the sepBy1 in ParserK
+{-# INLINE sepBy1 #-}
+sepBy1 :: Monad m => Stream m Int -> m (Either ParseError [Int])
+sepBy1 xs = do
+    Stream.parse (PR.sepBy1 (PR.satisfy odd) (PR.satisfy even) Fold.toList) xs
+
+{-# INLINE sepByWords1 #-}
+sepByWords1 :: Monad m => Stream m Int -> m (Either ParseError ())
+sepByWords1 = Stream.parse (wrds even Fold.drain)
+    where
+    wrds p = PR.sepBy1 (PR.takeWhile (not . p) Fold.drain) (PR.dropWhile p)
+
+{-# INLINE deintercalate #-}
+deintercalate :: Monad m => Int -> Stream m Int -> m (Either ParseError ())
+deintercalate _ = Stream.parse (partition even)
+
+    where
+
+    partition p =
+        PR.deintercalate
+            (PR.takeWhile (not . p) Fold.sum) (PR.takeWhile p Fold.sum) Fold.drain
+
+{-# INLINE deintercalate1 #-}
+deintercalate1 :: Monad m => Int -> Stream m Int -> m (Either ParseError ())
+deintercalate1 _ = Stream.parse (partition even)
+
+    where
+
+    partition p =
+        PR.deintercalate1
+            (PR.takeWhile (not . p) Fold.sum) (PR.takeWhile p Fold.sum) Fold.drain
+
+{-# INLINE deintercalateAll #-}
+deintercalateAll :: Monad m => Int -> Stream m Int -> m (Either ParseError ())
+deintercalateAll _ = Stream.parse (partition even)
+
+    where
+
+    partition p =
+        PR.deintercalateAll
+            (PR.takeWhile (not . p) Fold.sum) (PR.takeWhile p Fold.sum) Fold.drain
+
+{-# INLINE manyTill #-}
+manyTill :: Monad m => Int -> Stream m Int -> m (Either ParseError Int)
+manyTill value =
+    Stream.parse (PR.manyTill (PR.satisfy (> 0)) (PR.satisfy (== value)) Fold.length)
+
+-------------------------------------------------------------------------------
+-- Benchmarks
+-------------------------------------------------------------------------------
+
+instance NFData ParseError where
+    {-# INLINE rnf #-}
+    rnf (ParseError x) = rnf x
+
+benchmarks :: Int -> [(SpaceComplexity, Benchmark)]
+benchmarks value =
+    [
+    -- Interleaved Repetition
+      (SpaceO_1, benchIOSink value "deintercalate" $ deintercalate value)
+    , (SpaceO_1, benchIOSink value "deintercalate1" $ deintercalate1 value)
+    , (SpaceO_1, benchIOSink value "deintercalateAll" $ deintercalateAll value)
+
+    -- Accumulates the results in a list.
+    , (HeapO_n, benchIOSink value "sepBy1" sepBy1)
+    , (SpaceO_1, benchIOSink value "sepBy1 (words)" sepByWords1)
+    , (SpaceO_1, benchIOSink value "sepBy (words)" sepByWords)
+    , (SpaceO_1, benchIOSink value "sepByAll (words)" sepByAllWords)
+    , (SpaceO_1, benchIOSink value "manyTill" $ manyTill value)
+    ]
diff --git a/benchmark/Streamly/Benchmark/Data/Parser/Monad.hs b/benchmark/Streamly/Benchmark/Data/Parser/Monad.hs
new file mode 100644
index 0000000000..4de684e3ca
--- /dev/null
+++ b/benchmark/Streamly/Benchmark/Data/Parser/Monad.hs
@@ -0,0 +1,124 @@
+#undef FUSION_CHECK
+#ifdef FUSION_CHECK
+{-# OPTIONS_GHC -ddump-simpl -ddump-to-file -dsuppress-all #-}
+#endif
+
+-- |
+-- Module      : Streamly.Benchmark.Data.Parser.Monad
+-- Copyright   : (c) 2020 Composewell Technologies
+--
+-- License     : BSD-3-Clause
+-- Maintainer  : streamly@composewell.com
+
+{-# LANGUAGE CPP #-}
+{-# LANGUAGE FlexibleContexts #-}
+{-# LANGUAGE ScopedTypeVariables #-}
+{-# OPTIONS_GHC -Wno-orphans #-}
+
+module Streamly.Benchmark.Data.Parser.Monad
+  (
+    benchmarks
+  ) where
+
+import Control.DeepSeq (NFData(..))
+import Data.Monoid (Sum(..))
+import Streamly.Internal.Data.Parser (ParseError(..))
+import Streamly.Internal.Data.Stream (Stream)
+import Test.Tasty.Bench (Benchmark)
+
+import qualified Streamly.Internal.Data.Fold as Fold
+import qualified Streamly.Internal.Data.Parser as PR
+import qualified Streamly.Internal.Data.Stream as Stream
+
+import Streamly.Benchmark.Common
+import Streamly.Benchmark.Data.Parser.Common
+
+{-# INLINE monad #-}
+monad :: Monad m
+    => Int -> Stream m Int -> m (Either ParseError ())
+monad value =
+    Stream.parse
+        $ do
+            PR.dropWhile (<= (value `div` 2))
+            PR.dropWhile (<= value)
+
+{-# INLINE monad4 #-}
+monad4 :: Monad m
+    => Int -> Stream m Int -> m (Either ParseError ())
+monad4 value =
+    Stream.parse $ do
+        PR.dropWhile (<= (value `div` 4))
+        PR.dropWhile (<= (value `div` 2))
+        PR.dropWhile (<= (value * 3 `div` 4))
+        PR.dropWhile (<= value)
+
+{- HLINT ignore "Evaluate"-}
+{-# INLINE monad8 #-}
+monad8 :: Monad m
+    => Int -> Stream m Int -> m (Either ParseError ())
+monad8 value =
+    Stream.parse $ do
+        PR.dropWhile (<= (value * 1 `div` 8))
+        PR.dropWhile (<= (value * 2 `div` 8))
+        PR.dropWhile (<= (value * 3 `div` 8))
+        PR.dropWhile (<= (value * 4 `div` 8))
+        PR.dropWhile (<= (value * 5 `div` 8))
+        PR.dropWhile (<= (value * 6 `div` 8))
+        PR.dropWhile (<= (value * 7 `div` 8))
+        PR.dropWhile (<= value)
+
+{-# INLINE monad16 #-}
+monad16 :: Monad m
+    => Int -> Stream m Int -> m (Either ParseError ())
+monad16 value =
+    Stream.parse $ do
+        PR.dropWhile (<= (value * 1 `div` 16))
+        PR.dropWhile (<= (value * 2 `div` 16))
+        PR.dropWhile (<= (value * 3 `div` 16))
+        PR.dropWhile (<= (value * 4 `div` 16))
+        PR.dropWhile (<= (value * 5 `div` 16))
+        PR.dropWhile (<= (value * 6 `div` 16))
+        PR.dropWhile (<= (value * 7 `div` 16))
+        PR.dropWhile (<= (value * 8 `div` 16))
+        PR.dropWhile (<= (value * 9 `div` 16))
+        PR.dropWhile (<= (value * 10 `div` 16))
+        PR.dropWhile (<= (value * 11 `div` 16))
+        PR.dropWhile (<= (value * 12 `div` 16))
+        PR.dropWhile (<= (value * 13 `div` 16))
+        PR.dropWhile (<= (value * 14 `div` 16))
+        PR.dropWhile (<= (value * 15 `div` 16))
+        PR.dropWhile (<= value)
+
+{-# INLINE parseIterate #-}
+parseIterate :: Monad m => Int -> Stream m Int -> m ()
+parseIterate n =
+      Stream.fold Fold.drain
+    . fmap getSum
+    . Stream.catRights
+    . Stream.parseIterate
+        (PR.fromFold . Fold.take n . Fold.sconcat)
+        (Sum 0)
+    . fmap Sum
+
+-------------------------------------------------------------------------------
+-- Benchmarks
+-------------------------------------------------------------------------------
+
+instance NFData ParseError where
+    {-# INLINE rnf #-}
+    rnf (ParseError x) = rnf x
+
+benchmarks :: Int -> [(SpaceComplexity, Benchmark)]
+benchmarks value =
+    [
+    -- Monad
+      (SpaceO_1, benchIOSink value "monad2" $ monad value)
+    , (SpaceO_1, benchIOSink value "monad4" $ monad4 value)
+    , (SpaceO_1, benchIOSink value "monad8" $ monad8 value)
+    -- XXX Takes lot of space when run on a long stream, why?
+    , (HeapO_n, benchIOSink value "monad16" $ monad16 value)
+
+    -- parseIterate
+    , (SpaceO_1, benchIOSink value "parseIterate (take 1)" (parseIterate 1))
+    , (SpaceO_1, benchIOSink value "parseIterate (take all)" (parseIterate value))
+    ]
diff --git a/benchmark/Streamly/Benchmark/Data/Parser/Producer.hs b/benchmark/Streamly/Benchmark/Data/Parser/Producer.hs
new file mode 100644
index 0000000000..739c275003
--- /dev/null
+++ b/benchmark/Streamly/Benchmark/Data/Parser/Producer.hs
@@ -0,0 +1,67 @@
+#undef FUSION_CHECK
+#ifdef FUSION_CHECK
+{-# OPTIONS_GHC -ddump-simpl -ddump-to-file -dsuppress-all #-}
+#endif
+
+-- |
+-- Module      : Streamly.Benchmark.Data.Parser.Producer
+-- Copyright   : (c) 2020 Composewell Technologies
+--
+-- License     : BSD-3-Clause
+-- Maintainer  : streamly@composewell.com
+
+{-# LANGUAGE CPP #-}
+{-# LANGUAGE FlexibleContexts #-}
+{-# LANGUAGE ScopedTypeVariables #-}
+{-# OPTIONS_GHC -Wno-orphans #-}
+
+module Streamly.Benchmark.Data.Parser.Producer
+  (
+    benchmarks
+  ) where
+
+import Control.DeepSeq (NFData(..))
+import Streamly.Internal.Data.Parser (ParseError(..))
+
+import qualified Streamly.Internal.Data.Array as Array
+import qualified Streamly.Internal.Data.Fold as Fold
+import qualified Streamly.Internal.Data.Parser as PR
+import qualified Streamly.Data.Stream as Stream
+import qualified Streamly.Internal.Data.Producer as Producer
+
+import Test.Tasty.Bench hiding (env)
+import Streamly.Benchmark.Common
+
+-------------------------------------------------------------------------------
+-- Parsing with unfolds
+-------------------------------------------------------------------------------
+
+{-# INLINE parseManyUnfoldArrays #-}
+parseManyUnfoldArrays :: Int -> [Array.Array Int] -> IO ()
+parseManyUnfoldArrays count arrays = do
+    let src = Producer.source (Just (Producer.OuterLoop arrays))
+    let parser = PR.fromFold (Fold.take count Fold.drain)
+    let readSrc =
+            Producer.producer
+                $ Producer.concat Producer.fromList Array.producer
+    let streamParser =
+            Producer.simplify (Producer.parseMany parser readSrc)
+    Stream.fold Fold.drain $ Stream.unfold streamParser src
+
+-------------------------------------------------------------------------------
+-- Benchmarks
+-------------------------------------------------------------------------------
+
+instance NFData ParseError where
+    {-# INLINE rnf #-}
+    rnf (ParseError x) = rnf x
+
+benchmarks :: Int -> [Array.Array Int] -> [(SpaceComplexity, Benchmark)]
+benchmarks value arrays =
+    [
+    -- parseMany Unfolds
+      (SpaceO_1, bench "parseMany/Unfold/1000 arrays/take all"
+        $ nfIO $ parseManyUnfoldArrays value arrays)
+    , (SpaceO_1, bench "parseMany/Unfold/1000 arrays/take 1"
+        $ nfIO $ parseManyUnfoldArrays 1 arrays)
+    ]
diff --git a/benchmark/Streamly/Benchmark/Data/Parser/Sequence.hs b/benchmark/Streamly/Benchmark/Data/Parser/Sequence.hs
new file mode 100644
index 0000000000..8ccf107f47
--- /dev/null
+++ b/benchmark/Streamly/Benchmark/Data/Parser/Sequence.hs
@@ -0,0 +1,152 @@
+#undef FUSION_CHECK
+#ifdef FUSION_CHECK
+{-# OPTIONS_GHC -ddump-simpl -ddump-to-file -dsuppress-all #-}
+#endif
+
+-- Compiling parseMany with higher values of spec-constr-recursive hogs a lot
+-- of memory and takes too much time. Fusion plugin alleviates the problem
+-- though.
+{-# OPTIONS_GHC -fspec-constr-recursive=10 #-}
+
+-- |
+-- Module      : Streamly.Benchmark.Data.Parser.Sequence
+-- Copyright   : (c) 2020 Composewell Technologies
+--
+-- License     : BSD-3-Clause
+-- Maintainer  : streamly@composewell.com
+
+{-# LANGUAGE CPP #-}
+{-# LANGUAGE FlexibleContexts #-}
+{-# LANGUAGE ScopedTypeVariables #-}
+{-# OPTIONS_GHC -Wno-orphans #-}
+
+module Streamly.Benchmark.Data.Parser.Sequence
+  (
+    benchmarks
+  , benchmarksFileIO
+  ) where
+
+import Control.DeepSeq (NFData(..))
+import Data.Function ((&))
+import Data.Monoid (Sum(..))
+import GHC.Magic (inline)
+import GHC.Magic (noinline)
+import System.IO (Handle)
+import Streamly.Internal.Data.Parser (ParseError(..))
+import Streamly.Internal.Data.Stream (Stream)
+
+import qualified Streamly.FileSystem.Handle as Handle
+import qualified Streamly.Internal.Data.Fold as Fold
+import qualified Streamly.Internal.Data.Parser as PR
+import qualified Streamly.Data.Stream as Stream
+
+import Test.Tasty.Bench hiding (env)
+import Streamly.Benchmark.Common
+import Streamly.Benchmark.Common.Handle
+import Streamly.Benchmark.Data.Parser.Common
+
+-------------------------------------------------------------------------------
+-- Stream transformation
+-------------------------------------------------------------------------------
+
+{-# INLINE parseMany #-}
+parseMany :: Monad m => Int -> Stream m Int -> m ()
+parseMany n =
+      Stream.fold Fold.drain
+    . fmap getSum
+    . Stream.catRights . Stream.parseMany (PR.fromFold $ Fold.take n Fold.mconcat)
+    . fmap Sum
+
+{-# INLINE parseManyGroupBy #-}
+parseManyGroupBy :: Monad m => (Int -> Int -> Bool) -> Stream m Int -> m ()
+parseManyGroupBy cmp =
+    Stream.fold Fold.drain . Stream.parseMany (PR.groupBy cmp Fold.drain)
+
+{-# INLINE parseManyGroupsRolling #-}
+parseManyGroupsRolling :: Monad m => Bool -> Stream m Int -> m ()
+parseManyGroupsRolling b =
+      Stream.fold Fold.drain
+    . Stream.parseMany (PR.groupByRolling (\_ _ -> b) Fold.drain)
+
+{-# INLINE parseManyGroupsRollingEither #-}
+parseManyGroupsRollingEither :: Monad m =>
+    (Int -> Int -> Bool) -> Int -> m ()
+parseManyGroupsRollingEither cmp value = do
+    streamUnfoldrM value 1
+        & Stream.parseMany (PR.groupByRollingEither cmp Fold.drain Fold.drain)
+        & Stream.fold Fold.drain
+
+{-# INLINE parseManyGroupsRollingEitherAlt #-}
+parseManyGroupsRollingEitherAlt :: Monad m =>
+    (Int -> Int -> Bool) -> Int -> m ()
+parseManyGroupsRollingEitherAlt cmp value = do
+    streamUnfoldrM value 1
+        -- Make the input unsorted.
+        & fmap (\x -> if even x then x + 2 else x)
+        & Stream.parseMany (PR.groupByRollingEither cmp Fold.drain Fold.drain)
+        & Stream.fold Fold.drain
+
+-------------------------------------------------------------------------------
+-- Benchmarks
+-------------------------------------------------------------------------------
+
+instance NFData ParseError where
+    {-# INLINE rnf #-}
+    rnf (ParseError x) = rnf x
+
+benchmarks :: Int -> [(SpaceComplexity, Benchmark)]
+benchmarks value =
+    [
+    -- parseMany
+      (SpaceO_1, benchIOSink value "parseMany" $ parseMany value)
+    , (SpaceO_1, benchIOSink value "parseMany (take 1)" (parseMany 1))
+    , (SpaceO_1, benchIOSink value "parseMany (take all)" (parseMany value))
+    , (SpaceO_1, benchIOSink value "parseMany (groupBy (<))" (parseManyGroupBy (<)))
+    -- requires -fspec-constr-recursive=10
+    , (SpaceO_1, benchIOSink value "parseMany (groupBy (==))" (parseManyGroupBy (==)))
+    -- requires -fspec-constr-recursive=10
+    , (SpaceO_1, benchIOSink value "parseMany groupRollingBy (bound groups)"
+          $ parseManyGroupsRolling False)
+    , (SpaceO_1, benchIOSink value "parseMany groupRollingBy (1 group)"
+          $ parseManyGroupsRolling True)
+    , (SpaceO_1, bench "parseMany groupRollingByEither (Left)"
+        $ nfIO parseManyGroupsRollingEitherLeft)
+    , (SpaceO_1, bench "parseMany groupRollingByEither (Right)"
+        $ nfIO parseManyGroupsRollingEitherRight)
+    -- requires -fspec-constr-recursive=10
+    , (SpaceO_1, bench "parseMany groupRollingByEither (Alternating)"
+        $ nfIO parseManyGroupsRollingEitherAlt1)
+    ]
+
+    where
+
+    {-# NOINLINE parseManyGroupsRollingEitherLeft #-}
+    parseManyGroupsRollingEitherLeft = parseManyGroupsRollingEither (<) value
+
+    {-# NOINLINE parseManyGroupsRollingEitherRight #-}
+    parseManyGroupsRollingEitherRight = parseManyGroupsRollingEither (>) value
+
+    {-# NOINLINE parseManyGroupsRollingEitherAlt1 #-}
+    parseManyGroupsRollingEitherAlt1 =
+        parseManyGroupsRollingEitherAlt (>) value
+
+-------------------------------------------------------------------------------
+-- parseMany with FileIO
+-------------------------------------------------------------------------------
+
+parseManyChunksOfSum :: Int -> Handle -> IO Int
+parseManyChunksOfSum n inh =
+    Stream.fold Fold.length
+        $ Stream.parseMany
+              (PR.fromFold $ Fold.take n Fold.sum)
+              (Stream.unfold Handle.reader inh)
+
+benchmarksFileIO :: BenchEnv -> [(SpaceComplexity, Benchmark)]
+benchmarksFileIO env =
+    [
+    -- parseMany with file IO
+      (SpaceO_1, mkBench ("parseMany (Fold.take " ++ show (bigSize env) ++ " Fold.sum)") env
+          $ \inh _ -> noinline parseManyChunksOfSum (bigSize env) inh)
+    , (SpaceO_1, mkBench "parseMany (Fold.take 1 Fold.sum)" env
+          $ \inh _ -> inline parseManyChunksOfSum 1 inh)
+    ]
diff --git a/benchmark/Streamly/Benchmark/Data/ParserMain.hs b/benchmark/Streamly/Benchmark/Data/ParserMain.hs
deleted file mode 100644
index 611be9667f..0000000000
--- a/benchmark/Streamly/Benchmark/Data/ParserMain.hs
+++ /dev/null
@@ -1,79 +0,0 @@
-#undef FUSION_CHECK
-#ifdef FUSION_CHECK
-{-# OPTIONS_GHC -ddump-simpl -ddump-to-file -dsuppress-all #-}
-#endif
-
--- |
--- Module      : Streamly.Benchmark.Data.Parser
--- Copyright   : (c) 2020 Composewell Technologies
---
--- License     : BSD-3-Clause
--- Maintainer  : streamly@composewell.com
-
-{-# LANGUAGE CPP #-}
-{-# LANGUAGE FlexibleContexts #-}
-{-# LANGUAGE ScopedTypeVariables #-}
-{-# OPTIONS_GHC -Wno-orphans #-}
-
-module Main
-  (
-    main
-  ) where
-
-import Prelude hiding
-    (any, all, take, sequence, sequence_, sequenceA, takeWhile, dropWhile, span)
-
-import qualified Streamly.Internal.Data.Array as Array
-import qualified Streamly.Internal.Data.Fold as Fold
-import qualified Streamly.Data.Stream as Stream
-
-import Test.Tasty.Bench hiding (env)
-import Streamly.Benchmark.Common
-import Streamly.Benchmark.Common.Handle
-import Streamly.Benchmark.Data.Parser
-
-moduleName :: String
-moduleName = "Data.Parser"
-
--------------------------------------------------------------------------------
--- Driver
--------------------------------------------------------------------------------
-
-main :: IO ()
-main = do
-#ifndef FUSION_CHECK
-    env <- mkHandleBenchEnv
-    runWithCLIOptsEnv defaultStreamSize alloc (allBenchmarks env)
-
-    where
-
-    alloc value = Stream.fold Fold.toList $ Array.chunksOf 100 $ sourceUnfoldrM value 0
-
-    allBenchmarks env arrays value =
-        let allBenches = benchmarks value env arrays
-            get x = map snd $ filter ((==) x . fst) allBenches
-            o_1_space = get SpaceO_1
-            o_n_heap = get HeapO_n
-            o_n_space = get SpaceO_n
-        in
-        [ bgroup (o_1_space_prefix moduleName) o_1_space
-        , bgroup (o_n_heap_prefix moduleName) o_n_heap
-        , bgroup (o_n_space_prefix moduleName) o_n_space
-        ]
-#else
-    -- Enable FUSION_CHECK macro at the beginning of the file
-    -- Enable one benchmark below, and run the benchmark
-    -- Check the .dump-simpl output
-    let value = 100000
-    -- let input = sourceUnfoldrM value 1
-    -- manyTill value input
-    -- deintercalate value input
-    -- deintercalate1 value input
-    -- deintercalateAll value input
-    -- sepByWords input
-    -- sepByAllWords input
-    -- sepBy1 input
-    -- sepByWords1 input
-    takeFramedByEsc_ value (sourceEscapedFrames value 1)
-    return ()
-#endif
diff --git a/benchmark/Streamly/Benchmark/Data/Stream.hs b/benchmark/Streamly/Benchmark/Data/Stream.hs
index 120b2de6f6..f59a2515f2 100644
--- a/benchmark/Streamly/Benchmark/Data/Stream.hs
+++ b/benchmark/Streamly/Benchmark/Data/Stream.hs
@@ -25,6 +25,7 @@ import qualified Stream.Generate as Generation
 import qualified Stream.Lift as Lift
 import qualified Stream.Reduce as NestedFold
 import qualified Stream.Split as Split
+import qualified Stream.SplitChunks as SplitChunks
 import qualified Stream.Transform as Transformation
 
 import Streamly.Benchmark.Common
@@ -61,6 +62,7 @@ main = do
         , Elimination.benchmarks moduleName size
         , Exceptions.benchmarks moduleName env size
         , Split.benchmarks moduleName env
+        , SplitChunks.benchmarks moduleName env
         , Transformation.benchmarks moduleName size
         , NestedFold.benchmarks moduleName size
         , Lift.benchmarks moduleName size
diff --git a/benchmark/Streamly/Benchmark/Data/Stream/Common.hs b/benchmark/Streamly/Benchmark/Data/Stream/Common.hs
index 6ae127ca4b..d441e303a2 100644
--- a/benchmark/Streamly/Benchmark/Data/Stream/Common.hs
+++ b/benchmark/Streamly/Benchmark/Data/Stream/Common.hs
@@ -79,15 +79,12 @@ import Control.DeepSeq (NFData)
 import Control.Exception (try)
 import GHC.Exception (ErrorCall)
 import System.Random (randomRIO)
+import Streamly.Internal.Data.Stream (Stream)
 
 import qualified Streamly.Internal.Data.Fold as Fold
 import qualified Streamly.Internal.Data.Pipe as Pipe
 import qualified Streamly.Internal.Data.Scanl as Scanl
 import qualified Streamly.Internal.Data.Scanr as Scanr
-
-
-import Streamly.Internal.Data.Stream (Stream)
-import qualified Streamly.Internal.Data.Stream as D
 import qualified Streamly.Internal.Data.Stream as Stream
 
 import Test.Tasty.Bench
@@ -107,7 +104,7 @@ append = Stream.append
 
 {-# INLINE append2 #-}
 append2 :: Monad m => Stream m a -> Stream m a -> Stream m a
-append2 = D.append
+append2 = Stream.append
 
 {-# INLINE drain #-}
 drain :: Monad m => Stream m a -> m ()
@@ -201,7 +198,7 @@ benchIO name f = bench name $ nfIO $ randomRIO (1,1) >>= f
 sourceConcatMapId :: (Monad m)
     => Int -> Int -> Stream m (Stream m Int)
 sourceConcatMapId value n =
-    Stream.fromList $ fmap (D.fromEffect . return) [n..n+value]
+    Stream.fromList $ fmap (Stream.fromEffect . return) [n..n+value]
 
 {-# INLINE apDiscardFst #-}
 apDiscardFst :: MonadAsync m =>
diff --git a/benchmark/Streamly/Benchmark/Data/Stream/Eliminate.hs b/benchmark/Streamly/Benchmark/Data/Stream/Eliminate.hs
index 2f21775fc9..c1bbd62e6d 100644
--- a/benchmark/Streamly/Benchmark/Data/Stream/Eliminate.hs
+++ b/benchmark/Streamly/Benchmark/Data/Stream/Eliminate.hs
@@ -35,7 +35,6 @@ import qualified Streamly.Internal.Data.Fold as Fold
 #ifdef INSPECTION
 import GHC.Types (SPEC(..))
 import Test.Inspection
-import qualified Streamly.Internal.Data.Stream as D
 #endif
 
 import Stream.Common
@@ -581,7 +580,7 @@ eqByPure value n = eqBy' (sourceUnfoldr value n)
 #ifdef INSPECTION
 inspect $ hasNoTypeClasses 'eqByPure
 inspect $ 'eqByPure `hasNoType` ''SPEC
-inspect $ 'eqByPure `hasNoType` ''D.Step
+inspect $ 'eqByPure `hasNoType` ''S.Step
 #endif
 
 {-# INLINE eqInstance #-}
@@ -603,7 +602,7 @@ cmpByPure value n = cmpBy' (sourceUnfoldr value n)
 #ifdef INSPECTION
 inspect $ hasNoTypeClasses 'cmpByPure
 inspect $ 'cmpByPure `hasNoType` ''SPEC
-inspect $ 'cmpByPure `hasNoType` ''D.Step
+inspect $ 'cmpByPure `hasNoType` ''S.Step
 #endif
 
 {-# INLINE ordInstance #-}
@@ -642,7 +641,7 @@ eqBy value n = eqBy' (sourceUnfoldrM value n)
 #ifdef INSPECTION
 inspect $ hasNoTypeClasses 'eqBy
 inspect $ 'eqBy `hasNoType` ''SPEC
-inspect $ 'eqBy `hasNoType` ''D.Step
+inspect $ 'eqBy `hasNoType` ''S.Step
 #endif
 
 {-# INLINE cmpBy #-}
@@ -652,7 +651,7 @@ cmpBy value n = cmpBy' (sourceUnfoldrM value n)
 #ifdef INSPECTION
 inspect $ hasNoTypeClasses 'cmpBy
 inspect $ 'cmpBy `hasNoType` ''SPEC
-inspect $ 'cmpBy `hasNoType` ''D.Step
+inspect $ 'cmpBy `hasNoType` ''S.Step
 #endif
 
 o_1_space_elimination_multi_stream :: Int -> [Benchmark]
diff --git a/benchmark/Streamly/Benchmark/Data/Stream/Expand.hs b/benchmark/Streamly/Benchmark/Data/Stream/Expand.hs
index d669bc242d..8af6a8200d 100644
--- a/benchmark/Streamly/Benchmark/Data/Stream/Expand.hs
+++ b/benchmark/Streamly/Benchmark/Data/Stream/Expand.hs
@@ -23,20 +23,19 @@ module Stream.Expand (benchmarks) where
 #ifdef INSPECTION
 import GHC.Types (SPEC(..))
 import Test.Inspection
-
-import qualified Streamly.Internal.Data.Stream as D
 #endif
 
-import qualified Stream.Common as Common
-import qualified Streamly.Internal.Data.Unfold as UF
-
 import Streamly.Data.Stream (Stream)
 import Streamly.Data.Unfold (Unfold)
+
+import qualified Stream.Common as Common
+import qualified Streamly.Internal.Data.Unfold as UF
 import qualified Streamly.Internal.Data.Stream as S
 import qualified Streamly.Internal.Data.Unfold as Unfold
 import qualified Streamly.Internal.Data.Fold as Fold
 import qualified Streamly.Internal.Data.Stream as Stream
 import qualified Streamly.Internal.Data.StreamK as StreamK
+
 import Test.Tasty.Bench
 import Stream.Common
 import Streamly.Benchmark.Common
@@ -218,7 +217,7 @@ unfoldSched outer inner n =
 inspect $ hasNoTypeClasses 'unfoldSched
 -- inspect $ 'unfoldSched `hasNoType` ''SPEC
 -- inspect $ 'unfoldSched `hasNoType`
---      ''D.ConcatUnfoldInterleaveState
+--      ''Stream.ConcatUnfoldInterleaveState
 #endif
 
 o_1_space_joining :: Int -> [Benchmark]
@@ -328,7 +327,7 @@ unfoldEach outer inner start = drain $
 
 #ifdef INSPECTION
 inspect $ hasNoTypeClasses 'unfoldEach
-inspect $ 'unfoldEach `hasNoType` ''D.UnfoldEachState
+inspect $ 'unfoldEach `hasNoType` ''Stream.UnfoldEachState
 inspect $ 'unfoldEach `hasNoType` ''SPEC
 #endif
 
diff --git a/benchmark/Streamly/Benchmark/Data/Stream/Generate.hs b/benchmark/Streamly/Benchmark/Data/Stream/Generate.hs
index 4ba9fc1784..ff68396978 100644
--- a/benchmark/Streamly/Benchmark/Data/Stream/Generate.hs
+++ b/benchmark/Streamly/Benchmark/Data/Stream/Generate.hs
@@ -17,28 +17,20 @@ module Stream.Generate (benchmarks) where
 import Control.DeepSeq (NFData(..))
 import Control.Monad.IO.Class (MonadIO)
 import Data.Functor.Identity (Identity(..))
+import Streamly.Internal.Data.Stream (Stream)
 import Streamly.Internal.Data.Time.Units (AbsTime)
 
 import qualified GHC.Exts as GHC
 import qualified Streamly.Internal.Data.Fold as Fold
-
-import Stream.Common
-import Streamly.Internal.Data.Stream (Stream)
 import qualified Streamly.Internal.Data.Stream as Stream
 
 import Test.Tasty.Bench
+import Stream.Common
 import Streamly.Benchmark.Common
 import qualified Prelude
 
 import Prelude hiding (repeat, replicate, iterate)
 
--------------------------------------------------------------------------------
--- Generation
--------------------------------------------------------------------------------
-
-toStreamD :: a -> a
-toStreamD = id
-
 -------------------------------------------------------------------------------
 -- fromList
 -------------------------------------------------------------------------------
@@ -177,8 +169,8 @@ o_1_space_generation value =
         , benchIOSrc "fracFromTo" (sourceFracFromTo value)
         , benchIOSrc "fromList" (sourceFromList value)
         , benchIOSrc "fromListM" (sourceFromListM value)
-        , benchPureSrc "IsList.fromList" (toStreamD . sourceIsList value)
-        , benchPureSrc "IsString.fromString" (toStreamD . sourceIsString value)
+        , benchPureSrc "IsList.fromList" (sourceIsList value)
+        , benchPureSrc "IsString.fromString" (sourceIsString value)
         , benchIOSrc "enumerateFrom" (enumerateFrom value)
         , benchIOSrc "enumerateFromTo" (enumerateFromTo value)
         , benchIOSrc "enumerateFromThen" (enumerateFromThen value)
diff --git a/benchmark/Streamly/Benchmark/Data/Stream/Lift.hs b/benchmark/Streamly/Benchmark/Data/Stream/Lift.hs
index 059e79d762..b034719a83 100644
--- a/benchmark/Streamly/Benchmark/Data/Stream/Lift.hs
+++ b/benchmark/Streamly/Benchmark/Data/Stream/Lift.hs
@@ -16,11 +16,10 @@ import Control.Monad.State.Strict (StateT, get, put)
 import Data.Functor.Identity (Identity)
 import Stream.Common (sourceUnfoldr, sourceUnfoldrM, benchIOSrc)
 import System.Random (randomRIO)
+import Streamly.Internal.Data.Stream (Stream)
 
 import qualified Stream.Common as Common
 import qualified Streamly.Internal.Data.Fold as Fold
-
-import Streamly.Internal.Data.Stream (Stream)
 import qualified Streamly.Internal.Data.Stream as Stream
 
 import Test.Tasty.Bench
diff --git a/benchmark/Streamly/Benchmark/Data/Stream/Reduce.hs b/benchmark/Streamly/Benchmark/Data/Stream/Reduce.hs
index c1a892d06a..debf5b047b 100644
--- a/benchmark/Streamly/Benchmark/Data/Stream/Reduce.hs
+++ b/benchmark/Streamly/Benchmark/Data/Stream/Reduce.hs
@@ -19,12 +19,11 @@ import Control.Monad.IO.Class (MonadIO(..))
 import Data.Maybe (isJust)
 import Data.Monoid (Sum(..))
 import GHC.Generics (Generic)
+import Streamly.Internal.Data.Stream (Stream)
 
-import qualified Streamly.Internal.Data.Refold.Type as Refold
-import qualified Streamly.Internal.Data.Fold as FL
 import qualified Stream.Common as Common
-
-import Streamly.Internal.Data.Stream (Stream)
+import qualified Streamly.Internal.Data.Fold as FL
+import qualified Streamly.Internal.Data.Refold.Type as Refold
 import qualified Streamly.Internal.Data.Stream as S
 
 import Test.Tasty.Bench
@@ -398,6 +397,7 @@ o_1_space_pipesX4 value =
     [ bgroup "pipesX4"
         [ benchIOSink value "mapM" (transformMapM 4)
         , benchIOSink value "compose" (transformComposeMapM 4)
+        -- XXX requires @-fspec-constr-recursive=16@.
         , benchIOSink value "tee" (transformTeeMapM 4)
 #ifdef DEVBUILD
         -- XXX this take 1 GB memory to compile
diff --git a/benchmark/Streamly/Benchmark/Data/Stream/Split.hs b/benchmark/Streamly/Benchmark/Data/Stream/Split.hs
index b91805c526..2beee72ea1 100644
--- a/benchmark/Streamly/Benchmark/Data/Stream/Split.hs
+++ b/benchmark/Streamly/Benchmark/Data/Stream/Split.hs
@@ -30,7 +30,6 @@ import qualified Streamly.Internal.Data.Array as Array
 import qualified Streamly.Internal.Data.Fold as Fold
 import qualified Streamly.Internal.Data.Stream as Stream
 import qualified Streamly.Internal.FileSystem.Handle as Handle
-import qualified Streamly.Internal.Unicode.Stream as Unicode
 
 import Test.Tasty.Bench hiding (env)
 import Prelude hiding (last, length)
@@ -245,6 +244,8 @@ o_1_space_reduce_read_split env =
             takeEndBySeq "aaaa" inh
         , mkBench "takeEndBySeq word abcdefgh" env $ \inh _ ->
             takeEndBySeq "abcdefgh" inh
+
+        -- XXX takeEndBySeq KR requires @-fspec-constr-recursive=12@.
         , mkBench "takeEndBySeq KR abcdefghi" env $ \inh _ ->
             takeEndBySeq "abcdefghi" inh
         , mkBench "takeEndBySeq KR catcatcatcatcat" env $ \inh _ ->
@@ -277,28 +278,8 @@ o_1_space_reduce_read_split env =
         ]
     ]
 
--- | Split on a character sequence.
-splitOnSeqUtf8 :: String -> Handle -> IO Int
-splitOnSeqUtf8 str inh =
-    (Stream.fold Fold.length
-        $ Stream.splitSepBySeq_ (Array.fromList str) Fold.drain
-        $ Unicode.decodeUtf8Chunks
-        $ Handle.readChunks inh) -- >>= print
-
-o_1_space_reduce_toChunks_split :: BenchEnv -> [Benchmark]
-o_1_space_reduce_toChunks_split env =
-    [ bgroup "FileSplitSeqUtf8"
-        [ mkBenchSmall "splitOnSeqUtf8 word abcdefgh"
-            env $ \inh _ -> splitOnSeqUtf8 "abcdefgh" inh
-        , mkBenchSmall "splitOnSeqUtf8 KR abcdefghijklmnopqrstuvwxyz"
-            env $ \inh _ -> splitOnSeqUtf8 "abcdefghijklmnopqrstuvwxyz" inh
-        ]
-    ]
-
 benchmarks :: String -> BenchEnv -> [Benchmark]
 benchmarks moduleName env =
-        [ bgroup (o_1_space_prefix moduleName) $ concat
-            [ o_1_space_reduce_read_split env
-            , o_1_space_reduce_toChunks_split env
-            ]
+        [ bgroup (o_1_space_prefix moduleName) $
+            o_1_space_reduce_read_split env
         ]
diff --git a/benchmark/Streamly/Benchmark/Data/Stream/SplitChunks.hs b/benchmark/Streamly/Benchmark/Data/Stream/SplitChunks.hs
new file mode 100644
index 0000000000..1954c4bd33
--- /dev/null
+++ b/benchmark/Streamly/Benchmark/Data/Stream/SplitChunks.hs
@@ -0,0 +1,72 @@
+
+-- |
+-- Module      : Stream.SplitChunks
+-- Copyright   : (c) 2019 Composewell Technologies
+-- License     : BSD-3-Clause
+-- Maintainer  : streamly@composewell.com
+-- Stability   : experimental
+-- Portability : GHC
+
+{-# LANGUAGE CPP #-}
+{-# LANGUAGE ScopedTypeVariables #-}
+
+#ifdef __HADDOCK_VERSION__
+#undef INSPECTION
+#endif
+
+#ifdef INSPECTION
+{-# LANGUAGE TemplateHaskell #-}
+{-# OPTIONS_GHC -fplugin Test.Inspection.Plugin #-}
+#endif
+
+module Stream.SplitChunks (benchmarks) where
+
+import System.IO (Handle)
+
+import qualified Streamly.Internal.Data.Array as Array
+import qualified Streamly.Internal.Data.Fold as Fold
+import qualified Streamly.Internal.Data.Stream as Stream
+import qualified Streamly.Internal.FileSystem.Handle as Handle
+import qualified Streamly.Internal.Unicode.Stream as Unicode
+
+import Test.Tasty.Bench hiding (env)
+import Prelude hiding (last, length)
+import Streamly.Benchmark.Common
+import Streamly.Benchmark.Common.Handle
+
+#ifdef INSPECTION
+import Streamly.Internal.Data.Stream (Step(..))
+
+import qualified Streamly.Internal.Data.MutArray as MutArray
+import qualified Streamly.Internal.Data.Unfold as Unfold
+
+import Test.Inspection
+#endif
+
+-------------------------------------------------------------------------------
+-- reduce with splitting transformations
+-------------------------------------------------------------------------------
+
+-- | Split on a character sequence.
+splitOnSeqUtf8 :: String -> Handle -> IO Int
+splitOnSeqUtf8 str inh =
+    Stream.fold Fold.length
+        $ Stream.splitSepBySeq_ (Array.fromList str) Fold.drain
+        $ Unicode.decodeUtf8Chunks
+        $ Handle.readChunks inh -- >>= print
+
+o_1_space_reduce_toChunks_split :: BenchEnv -> [Benchmark]
+o_1_space_reduce_toChunks_split env =
+    [ bgroup "FileSplitSeqUtf8"
+        [ mkBenchSmall "splitOnSeqUtf8 word abcdefgh"
+            env $ \inh _ -> splitOnSeqUtf8 "abcdefgh" inh
+        , mkBenchSmall "splitOnSeqUtf8 KR abcdefghijklmnopqrstuvwxyz"
+            env $ \inh _ -> splitOnSeqUtf8 "abcdefghijklmnopqrstuvwxyz" inh
+        ]
+    ]
+
+benchmarks :: String -> BenchEnv -> [Benchmark]
+benchmarks moduleName env =
+        [ bgroup (o_1_space_prefix moduleName) $
+            o_1_space_reduce_toChunks_split env
+        ]
diff --git a/benchmark/Streamly/Benchmark/Data/Stream/Transform.hs b/benchmark/Streamly/Benchmark/Data/Stream/Transform.hs
index 0bb0ae6b78..03cb2a8230 100644
--- a/benchmark/Streamly/Benchmark/Data/Stream/Transform.hs
+++ b/benchmark/Streamly/Benchmark/Data/Stream/Transform.hs
@@ -27,12 +27,11 @@ import Control.Monad.IO.Class (MonadIO(..))
 import Streamly.Internal.Data.Stream (Stream)
 import System.Random (randomRIO)
 
+import qualified Stream.Common as Common
 import qualified Streamly.Internal.Data.Fold as FL
 import qualified Streamly.Internal.Data.Scanl as Scanl
-
-import qualified Stream.Common as Common
-import qualified Streamly.Internal.Data.Unfold as Unfold
 import qualified Streamly.Internal.Data.Stream as Stream
+import qualified Streamly.Internal.Data.Unfold as Unfold
 
 import Test.Tasty.Bench
 import Stream.Common hiding (scanl')
@@ -406,6 +405,7 @@ o_1_space_filteringX4 value =
         , benchIOSink value "drop-all" (dropAll value 4)
         , benchIOSink value "dropWhile-true" (dropWhileTrue value 4)
         , benchIOSink value "dropWhileM-true" (dropWhileMTrue value 4)
+        -- XXX requires @-fspec-constr-recursive=12@.
         , benchIOSink
               value
               "dropWhile-false"
@@ -465,7 +465,9 @@ o_1_space_inserting value =
 o_1_space_insertingX4 :: Int -> [Benchmark]
 o_1_space_insertingX4 value =
     [ bgroup "insertingX4"
-        [ benchIOSink value "intersperse" (intersperse value 4)
+        [
+          -- XXX requires @-fspec-constr-recursive=16@.
+          benchIOSink value "intersperse" (intersperse value 4)
         , benchIOSink value "insertBy" (insertBy value 4)
         ]
     ]
diff --git a/benchmark/Streamly/Benchmark/FileSystem/Handle/Read.hs b/benchmark/Streamly/Benchmark/FileSystem/Handle/Read.hs
index b03b2cd5f2..a777acc683 100644
--- a/benchmark/Streamly/Benchmark/FileSystem/Handle/Read.hs
+++ b/benchmark/Streamly/Benchmark/FileSystem/Handle/Read.hs
@@ -249,6 +249,7 @@ chunksOf :: Int -> Handle -> IO Int
 chunksOf n inh =
     S.fold Fold.length $ A.chunksOf n (S.unfold FH.reader inh)
 
+-- XXX all these require @-fspec-constr-recursive=12@.
 o_1_space_reduce_read_grouped :: BenchEnv -> [Benchmark]
 o_1_space_reduce_read_grouped env =
     [ bgroup "reduce/read/chunks"
diff --git a/benchmark/Streamly/Benchmark/Unicode/Stream.hs b/benchmark/Streamly/Benchmark/Unicode/Stream.hs
index d0329f66ee..d95d39499f 100644
--- a/benchmark/Streamly/Benchmark/Unicode/Stream.hs
+++ b/benchmark/Streamly/Benchmark/Unicode/Stream.hs
@@ -28,7 +28,7 @@ import Streamly.Data.Stream (Stream)
 import Streamly.Data.Fold (Fold)
 import Prelude hiding (last, length)
 import System.IO (Handle)
-import Streamly.Internal.System.IO (arrayPayloadSize)
+-- import Streamly.Internal.System.IO (arrayPayloadSize)
 
 import qualified Streamly.Data.Array as Array
 import qualified Streamly.Data.Fold as Fold
@@ -37,7 +37,7 @@ import qualified Streamly.Internal.Data.Unfold as Unfold
 import qualified Streamly.Internal.FileSystem.Handle as Handle
 import qualified Streamly.Internal.Unicode.Array as UnicodeArr
 import qualified Streamly.Internal.Unicode.Stream as Unicode
-import qualified Streamly.Internal.Data.Array as Array
+-- import qualified Streamly.Internal.Data.Array as Array
 
 import Test.Tasty.Bench hiding (env)
 import Streamly.Benchmark.Common
@@ -89,6 +89,7 @@ splitOnSuffix
     => (a -> Bool) -> Fold m a b -> Stream m a -> Stream m b
 splitOnSuffix predicate f = Stream.foldMany (Fold.takeEndBy_ predicate f)
 
+-- XXX require @-fspec-constr-recursive=12@.
 {-# NOINLINE linesUnlinesCopy #-}
 linesUnlinesCopy :: Handle -> Handle -> IO ()
 linesUnlinesCopy inh outh =
@@ -108,6 +109,7 @@ linesUnlinesArrayWord8Copy inh outh =
       $ Stream.unfold Handle.reader inh
 
 -- XXX splitSuffixOn requires -funfolding-use-threshold=150 for better fusion
+-- XXX require @-fspec-constr-recursive=12@.
 -- | Lines and unlines
 {-# NOINLINE linesUnlinesArrayCharCopy #-}
 linesUnlinesArrayCharCopy :: Handle -> Handle -> IO ()
@@ -176,6 +178,7 @@ wordsUnwordsCopy inh outh =
 -- inspect $ 'wordsUnwordsCopy `hasNoType` ''Step
 #endif
 
+-- XXX require @-fspec-constr-recursive=12@.
 {-# NOINLINE wordsUnwordsCharArrayCopy #-}
 wordsUnwordsCharArrayCopy :: Handle -> Handle -> IO ()
 wordsUnwordsCharArrayCopy inh outh =
@@ -186,6 +189,7 @@ wordsUnwordsCharArrayCopy inh outh =
       $ Unicode.decodeLatin1
       $ Stream.unfold Handle.reader inh
 
+-- XXX all these require @-fspec-constr-recursive=12@.
 o_1_space_copy_read_group_ungroup :: BenchEnv -> [Benchmark]
 o_1_space_copy_read_group_ungroup env =
     [ bgroup "ungroup-group"
@@ -258,6 +262,7 @@ _copyStreamUtf8' inh outh =
      $ Unicode.decodeUtf8'
      $ Stream.unfold Handle.reader inh
 
+{-
 -- | Copy file
 {-# NOINLINE copyStreamUtf16 #-}
 copyStreamUtf16 :: Handle -> Handle -> IO ()
@@ -266,8 +271,11 @@ copyStreamUtf16 inh outh =
      $ fmap Array.unsafeCast $ Array.chunksOf (arrayPayloadSize (16 * 1024))
      $ Unicode.encodeUtf16le'
      $ Unicode.decodeUtf16le
+     -- XXX we have a commented implementation of mkEvenW8Chunks in
+     -- streamly-core Unicode module, copy it here to enable this benchmark.
      $ Array.concat $ fmap Array.unsafeCast $ Unicode.mkEvenW8Chunks
      $ Handle.readChunks inh
+ -}
 
 #ifdef INSPECTION
 inspect $ hasNoTypeClasses '_copyStreamUtf8'
@@ -310,6 +318,7 @@ _copyStreamUtf8Parser inh outh =
            (Unicode.parseCharUtf8With Unicode.TransliterateCodingFailure)
      $ Stream.unfold Handle.reader inh
 
+-- XXX all these require @-fspec-constr-recursive=12@.
 o_1_space_decode_encode_read :: BenchEnv -> [Benchmark]
 o_1_space_decode_encode_read env =
     [ bgroup "decode-encode"
@@ -330,8 +339,10 @@ o_1_space_decode_encode_read env =
               $ \inh outh -> _copyStreamUtf8Parser inh outh
         , mkBenchSmall "encodeUtf8 . decodeUtf8" env $ \inh outh ->
             copyStreamUtf8 inh outh
+        {-
         , mkBenchSmall "encodeUtf16 . decodeUtf16" env $ \inh outh ->
             copyStreamUtf16 inh outh
+        -}
         ]
     ]
 
diff --git a/benchmark/lib/Streamly/Benchmark/Common.hs b/benchmark/lib/Streamly/Benchmark/Common.hs
index 213d5acefd..5c8d4298ba 100644
--- a/benchmark/lib/Streamly/Benchmark/Common.hs
+++ b/benchmark/lib/Streamly/Benchmark/Common.hs
@@ -18,6 +18,7 @@ module Streamly.Benchmark.Common
     , runWithCLIOptsEnv
     , runWithCLIOpts
 
+    , streamUnfoldrM
     , benchIOSink1
     , benchPure
     , benchPureSink1
@@ -54,8 +55,9 @@ import Control.DeepSeq (NFData(..))
 import Data.Functor.Identity (Identity, runIdentity)
 import System.Random (randomRIO)
 
+import Streamly.Internal.Data.Stream (Stream)
 import qualified Streamly.Internal.Data.Fold as Fold
-import qualified Streamly.Internal.Data.Stream as S
+import qualified Streamly.Internal.Data.Stream as Stream
 
 import Test.Tasty.Bench
 
@@ -81,6 +83,15 @@ o_n_stack_prefix name = name ++ "/o-n-stack"
 -- Benchmarking utilities
 -------------------------------------------------------------------------------
 
+{-# INLINE streamUnfoldrM #-}
+streamUnfoldrM :: Monad m => Int -> Int -> Stream m Int
+streamUnfoldrM value n = Stream.unfoldrM step n
+    where
+    step cnt =
+        if cnt > n + value
+        then return Nothing
+        else return (Just (cnt, cnt + 1))
+
 -- XXX once we convert all the functions to use this we can rename this to
 -- benchIOSink
 {-# INLINE benchIOSink1 #-}
@@ -110,12 +121,12 @@ benchPureSink1 name f =
     bench name $ nfIO $ randomRIO (1,1) >>= return . runIdentity . f
 
 {-# INLINE benchPureSrc #-}
-benchPureSrc :: String -> (Int -> S.Stream Identity a) -> Benchmark
+benchPureSrc :: String -> (Int -> Stream Identity a) -> Benchmark
 benchPureSrc name src = benchPure name src (runIdentity . drain)
 
     where
 
-    drain = S.fold Fold.drain
+    drain = Stream.fold Fold.drain
 
 -------------------------------------------------------------------------------
 -- String/List generation for read instances
diff --git a/benchmark/streamly-benchmarks.cabal b/benchmark/streamly-benchmarks.cabal
index 27174f978a..b3addc9cc4 100644
--- a/benchmark/streamly-benchmarks.cabal
+++ b/benchmark/streamly-benchmarks.cabal
@@ -228,7 +228,7 @@ common bench-options
   include-dirs: .
   ghc-options: -rtsopts -with-rtsopts "-t"
   if flag(limit-build-mem)
-    ghc-options: +RTS -M400M -RTS
+    ghc-options: -j1 +RTS -M400M -RTS
   build-depends: streamly-benchmarks == 0.0.0
 
 -- Some benchmarks are threaded some are not
@@ -238,7 +238,7 @@ common bench-options-threaded
   -- trigger only with these options.
   ghc-options: -threaded -rtsopts -with-rtsopts "-t -N2"
   if flag(limit-build-mem)
-    ghc-options: +RTS -M400M -RTS
+    ghc-options: -j1 +RTS -M400M -RTS
   build-depends: streamly-benchmarks == 0.0.0
 
 -------------------------------------------------------------------------------
@@ -256,8 +256,8 @@ benchmark Data.Array
     buildable: False
   else
     buildable: True
-  if flag(limit-build-mem)
-    ghc-options: +RTS -M1500M -RTS
+  if flag(limit-build-mem) && !flag(fusion-plugin)
+    ghc-options: +RTS -M600M -RTS
 
 benchmark Data.Array.Generic
   import: bench-options
@@ -270,8 +270,8 @@ benchmark Data.Array.Generic
     buildable: False
   else
     buildable: True
-  if flag(limit-build-mem)
-    ghc-options: +RTS -M1000M -RTS
+  if flag(limit-build-mem) && !flag(fusion-plugin)
+    ghc-options: +RTS -M500M -RTS
 
 benchmark Data.Array.Stream
   import: bench-options
@@ -293,6 +293,8 @@ benchmark Data.Fold
     buildable: False
   else
     buildable: True
+  if flag(limit-build-mem) && !flag(fusion-plugin)
+    ghc-options: +RTS -M500M -RTS
 
 benchmark Data.Fold.Prelude
   import: bench-options
@@ -319,8 +321,8 @@ benchmark Data.MutArray
   import: bench-options
   type: exitcode-stdio-1.0
   main-is: Streamly/Benchmark/Data/MutArray.hs
-  if flag(limit-build-mem)
-    ghc-options: +RTS -M1500M -RTS
+  if flag(limit-build-mem) && !flag(fusion-plugin)
+    ghc-options: +RTS -M500M -RTS
   if flag(use-streamly-core)
     buildable: False
   else
@@ -330,19 +332,26 @@ benchmark Data.Parser
   import: bench-options
   type: exitcode-stdio-1.0
   hs-source-dirs: .
-  main-is: Streamly/Benchmark/Data/ParserMain.hs
+  main-is: Streamly/Benchmark/Data/Parser.hs
   other-modules:
-    Streamly.Benchmark.Data.Parser
+      Streamly.Benchmark.Data.Parser.Common
+    , Streamly.Benchmark.Data.Parser.Alternative
+    , Streamly.Benchmark.Data.Parser.Applicative
+    , Streamly.Benchmark.Data.Parser.Monad
+    , Streamly.Benchmark.Data.Parser.Interleave
+    , Streamly.Benchmark.Data.Parser.Sequence
+    , Streamly.Benchmark.Data.Parser.Producer
+    , Streamly.Benchmark.Data.Parser.Groups
   if impl(ghcjs)
     buildable: False
   else
     buildable: True
-    build-depends: exceptions >= 0.8   && < 0.11
+    build-depends: exceptions >= 0.8 && < 0.11
   if flag(limit-build-mem)
-    if flag(dev)
-      ghc-options: +RTS -M3000M -RTS
+    if flag(dev) || !flag(fusion-plugin)
+      ghc-options: +RTS -M750M -RTS
     else
-      ghc-options: +RTS -M2500M -RTS
+      ghc-options: +RTS -M500M -RTS
 
 benchmark Data.ParserK
   import: bench-options
@@ -384,8 +393,6 @@ benchmark Data.RingArray
   import: bench-options
   type: exitcode-stdio-1.0
   main-is: Streamly/Benchmark/Data/RingArray.hs
-  if flag(limit-build-mem)
-    ghc-options: +RTS -M1500M -RTS
 
 benchmark Data.Scanl
   import: bench-options
@@ -421,8 +428,6 @@ benchmark Data.Serialize
       Streamly.Benchmark.Data.Serialize.TH
       Streamly.Benchmark.Data.Serialize.RecCompatible
       Streamly.Benchmark.Data.Serialize.RecNonCompatible
-  if flag(limit-build-mem)
-    ghc-options: +RTS -M1000M -RTS
 
 benchmark Data.SmallArray
   import: bench-options
@@ -439,20 +444,22 @@ benchmark Data.Stream
   hs-source-dirs: Streamly/Benchmark/Data
   main-is: Stream.hs
   other-modules:
-      Stream.Generate
+      Stream.Common
       Stream.Eliminate
-      Stream.Transform
-      Stream.Reduce
+      Stream.Exceptions
       Stream.Expand
+      Stream.Generate
       Stream.Lift
+      Stream.Reduce
       Stream.Split
-      Stream.Common
-      Stream.Exceptions
+      -- XXX uses lot of memory
+      Stream.SplitChunks
+      Stream.Transform
   if flag(limit-build-mem)
     if flag(dev)
-      ghc-options: +RTS -M3500M -RTS
+      ghc-options: +RTS -M1000M -RTS
     else
-      ghc-options: +RTS -M2500M -RTS
+      ghc-options: +RTS -M500M -RTS
 
 benchmark Data.Stream.Adaptive
   import: bench-options-threaded
@@ -611,33 +618,31 @@ benchmark Data.Unfold.Prelude
   else
     buildable: True
 
-benchmark FileSystem.Handle
+benchmark FileSystem.DirIO
   import: bench-options
   type: exitcode-stdio-1.0
   hs-source-dirs: Streamly/Benchmark/FileSystem
-  main-is: Handle.hs
-  other-modules:
-      Handle.Read
-    , Handle.ReadWrite
-  if flag(limit-build-mem)
-    ghc-options: +RTS -M1000M -RTS
+  main-is: DirIO.hs
   if flag(use-streamly-core)
     buildable: False
   else
     buildable: True
+  -- Fix this benchmark for Windows
+  if os(windows)
+    buildable: False
 
-benchmark FileSystem.DirIO
+benchmark FileSystem.Handle
   import: bench-options
   type: exitcode-stdio-1.0
   hs-source-dirs: Streamly/Benchmark/FileSystem
-  main-is: DirIO.hs
+  main-is: Handle.hs
+  other-modules:
+      Handle.Read
+    , Handle.ReadWrite
   if flag(use-streamly-core)
     buildable: False
   else
     buildable: True
-  -- Fix this benchmark for Windows
-  if os(windows)
-    buildable: False
 
 benchmark Unicode.Char
   import: bench-options
@@ -666,8 +671,6 @@ benchmark Unicode.Stream
   type: exitcode-stdio-1.0
   hs-source-dirs: Streamly/Benchmark/Unicode
   main-is: Stream.hs
-  if flag(limit-build-mem)
-    ghc-options: +RTS -M1000M -RTS
   if flag(use-streamly-core)
     buildable: False
   else
diff --git a/bin/ghc-make.sh b/bin/ghc-make.sh
new file mode 100755
index 0000000000..3357014808
--- /dev/null
+++ b/bin/ghc-make.sh
@@ -0,0 +1,137 @@
+#!/bin/bash
+
+curdir=`pwd`
+SRC_ROOT=${curdir}/core
+BLD_ROOT=${curdir}/build
+RSP_FILE=${BLD_ROOT}/ghc-streamly.rsp
+CONFIG_FILE=${BLD_ROOT}/src/config.h
+
+mkdir -p ${BLD_ROOT}
+CABAL_MACROS=${BLD_ROOT}/cabal_macros.h
+
+if test ! -f "$CABAL_MACROS"
+then
+  echo "$CABAL_MACROS: not found"
+  echo "Use cabal to generate cabal_macros.h and place it in build dir."
+  exit 1
+fi
+
+# configure creates config.h in a directory reflecting the relative path of
+# config.h.in in the source tree. We keep that src path in the include path and
+# then all source include paths are translated to include paths relative to the
+# build dir. So that all such paths get included.
+#
+
+# -I${BLD_ROOT}/src/doctest
+# -I${BLD_ROOT}/src/Streamly/Internal/Data
+# -I${BLD_ROOT}/src/Streamly/Internal/Data/Array
+# -I${BLD_ROOT}/src/Streamly/Internal/Data/Stream
+
+if test ! -f "$CONFIG_FILE"
+then
+  cd ${BLD_ROOT}
+  ${SRC_ROOT}/configure
+  cd $curdir
+fi
+
+RTS_INCLUDE_DIRS=$(ghc-pkg field rts include-dirs --simple-output)
+RTS_INCLUDE_FLAGS=""
+for dir in $RTS_INCLUDE_DIRS; do
+    RTS_INCLUDE_FLAGS="$RTS_INCLUDE_FLAGS -I$dir"
+done
+
+CPP_FLAGS="\
+-I${SRC_ROOT}/src \
+-I${SRC_ROOT}/src/doctest \
+-I${SRC_ROOT}/src/Streamly/Internal/Data \
+-I${SRC_ROOT}/src/Streamly/Internal/Data/Array \
+-I${SRC_ROOT}/src/Streamly/Internal/Data/Stream \
+-I${BLD_ROOT}/src \
+$RTS_INCLUDE_FLAGS"
+
+# Use cabal build with -v option. Copy paste compiler flags from the
+# response file arguments shown in the output.
+
+if test ! -f "$RSP_FILE"
+then
+cat << EOF > ${RSP_FILE}
+--make
+-static -dynamic-too -dynosuf dyn_o -dynhisuf dyn_hi
+'-package-env=-'
+-this-unit-id streamly-core-0.4.0-temp
+-outputdir ${BLD_ROOT} -odir ${BLD_ROOT} -hidir ${BLD_ROOT} -hiedir ${BLD_ROOT} -stubdir ${BLD_ROOT}
+-i -i${SRC_ROOT}/src -i${BLD_ROOT}/src
+-optP-include -optP${CABAL_MACROS}
+$CPP_FLAGS
+-XHaskell2010
+-XBangPatterns
+-XConstraintKinds
+-XDeriveDataTypeable
+-XDeriveGeneric
+-XDeriveTraversable
+-XExistentialQuantification
+-XFlexibleContexts
+-XFlexibleInstances
+-XGeneralizedNewtypeDeriving
+-XInstanceSigs
+-XKindSignatures
+-XLambdaCase
+-XMultiParamTypeClasses
+-XRankNTypes
+-XScopedTypeVariables
+-XStandaloneDeriving
+-XTupleSections
+-XTypeApplications
+-XTypeOperators
+-XCApiFFI
+-XCPP
+-XDefaultSignatures
+-XMagicHash
+-XRecordWildCards
+-XStandaloneKindSignatures
+-XQuantifiedConstraints
+-Weverything
+-Wno-implicit-prelude
+-Wno-missing-deriving-strategies
+-Wno-missing-exported-signatures
+-Wno-missing-import-lists
+-Wno-missing-local-signatures
+-Wno-missing-safe-haskell-mode
+-Wno-missed-specialisations
+-Wno-all-missed-specialisations
+-Wno-monomorphism-restriction
+-Wno-prepositive-qualified-module
+-Wno-unsafe
+-Wno-missing-kind-signatures
+-Wno-redundant-bang-patterns
+-Wno-operator-whitespace
+-Wno-missing-role-annotations
+-Wno-missing-poly-kind-signatures
+-O2
+-fdicts-strict
+'-fspec-constr-recursive=16'
+'-fmax-worker-args=16'
+-Rghc-timing
+EOF
+fi
+
+HSC2HS_FLAGS="$CPP_FLAGS -D__GLASGOW_HASKELL__=910 -i${CABAL_MACROS}"
+
+HSC_FILES=$(find "$SRC_ROOT" -name '*.hsc' -printf '%P\n')
+for f in $HSC_FILES; do
+    src="$SRC_ROOT/$f"
+    dst="$BLD_ROOT/${f%.hsc}.hs"
+
+    mkdir -p "$(dirname "$dst")"
+    #echo "src=$src, dst=$dst"
+    echo "hsc2hs $HSC2HS_FLAGS $src -o $dst"
+    hsc2hs $HSC2HS_FLAGS "$src" -o "$dst"
+done
+
+#    -fplugin Fusion.Plugin           \
+#    -ddump-to-file                   \
+#    -ddump-simpl                     \
+
+ghc @${RSP_FILE}                     \
+    +RTS -M400M -RTS                 \
+    $*
diff --git a/cabal.project b/cabal.project
index e1fe3bfc99..d0102b2061 100644
--- a/cabal.project
+++ b/cabal.project
@@ -3,3 +3,6 @@ packages:   streamly.cabal
           , test/streamly-tests.cabal
           , benchmark/streamly-benchmarks.cabal
           , bench-test-lib/bench-test-lib.cabal
+
+-- For debugging heap overflow
+jobs: 1
diff --git a/cabal.project.Werror b/cabal.project.Werror
index 281c05e992..d01b665f45 100644
--- a/cabal.project.Werror
+++ b/cabal.project.Werror
@@ -5,6 +5,9 @@ packages:
   core
   bench-test-lib
 
+-- For debugging heap overflow
+jobs: 1
+
 package streamly
   ghc-options: -Werror
 
diff --git a/cabal.project.report b/cabal.project.report
index 49517512af..2902b5f710 100644
--- a/cabal.project.report
+++ b/cabal.project.report
@@ -10,33 +10,28 @@ package bench-report
 package bench-show
   flags: +no-charts
 
-source-repository-package
-  type: git
-  location: https://github.com/composewell/streamly.git
-  tag: 8500b6446dae07e763370608625f26eff5c84600
-
-source-repository-package
-  type: git
-  location: https://github.com/composewell/streamly.git
-  tag: 8500b6446dae07e763370608625f26eff5c84600
-  subdir: core
-
 source-repository-package
   type: git
   location: https://github.com/composewell/bench-report.git
-  tag: d012e03bea201b6af605f4c4a7f2eb4d36452c5f
+  tag: 26a70093f9da76144fefe9b5305b5f928ef59caf
 
 source-repository-package
   type: git
   location: https://github.com/composewell/streamly-coreutils.git
-  tag: 2b62fc242b9645a85124ef8c52c8212d316f24dc
-
-source-repository-package
-  type: git
-  location: https://github.com/composewell/streamly-shell.git
-  tag: 070ff21fda7aab8ca45b08b330746299a91eb981
-
-source-repository-package
-  type: git
-  location: https://github.com/composewell/streamly-process.git
-  tag: 9af2f96971d3365be6f4e75d7ded0d3ae5458fbf
+  tag: fa180060c7510c89d2767980ca6f7ec7011d04b9
+
+--source-repository-package
+--  type: git
+--  location: https://github.com/composewell/streamly.git
+--  tag: 8500b6446dae07e763370608625f26eff5c84600
+--
+--source-repository-package
+--  type: git
+--  location: https://github.com/composewell/streamly.git
+--  tag: 8500b6446dae07e763370608625f26eff5c84600
+--  subdir: core
+
+--source-repository-package
+--  type: git
+--  location: https://github.com/composewell/streamly-process.git
+--  tag: 9af2f96971d3365be6f4e75d7ded0d3ae5458fbf
diff --git a/core/src/Streamly/Internal/Data/MutArray.hs b/core/src/Streamly/Internal/Data/MutArray.hs
index 1d29de4ee2..95961ec0dd 100644
--- a/core/src/Streamly/Internal/Data/MutArray.hs
+++ b/core/src/Streamly/Internal/Data/MutArray.hs
@@ -74,7 +74,7 @@ import Streamly.Internal.Data.Fold.Type (Fold)
 import qualified Streamly.Internal.Data.IORef as IORef
 import qualified Streamly.Internal.Data.RingArray as RingArray
 import qualified Streamly.Internal.Data.Serialize.Type as Serialize
-import qualified Streamly.Internal.Data.Stream.Nesting as Stream
+import qualified Streamly.Internal.Data.Stream.Parse as Stream
 import qualified Streamly.Internal.Data.Stream.Type as Stream
 import qualified Streamly.Internal.Data.Fold.Type as Fold
 -- import qualified Streamly.Internal.Data.Stream.Transform as Stream
diff --git a/core/src/Streamly/Internal/Data/Stream.hs b/core/src/Streamly/Internal/Data/Stream.hs
index 812bb4d1c0..4569111e3e 100644
--- a/core/src/Streamly/Internal/Data/Stream.hs
+++ b/core/src/Streamly/Internal/Data/Stream.hs
@@ -23,6 +23,7 @@ module Streamly.Internal.Data.Stream
     , module Streamly.Internal.Data.Stream.Lift
     , module Streamly.Internal.Data.Stream.Transformer
     , module Streamly.Internal.Data.Stream.Nesting
+    , module Streamly.Internal.Data.Stream.Parse
     , module Streamly.Internal.Data.Stream.Transform
     , module Streamly.Internal.Data.Stream.Top
     , module Streamly.Internal.Data.Stream.Container
@@ -36,6 +37,7 @@ import Streamly.Internal.Data.Stream.Exception
 import Streamly.Internal.Data.Stream.Lift
 import Streamly.Internal.Data.Stream.Transformer
 import Streamly.Internal.Data.Stream.Nesting
+import Streamly.Internal.Data.Stream.Parse
 import Streamly.Internal.Data.Stream.Transform
 import Streamly.Internal.Data.Stream.Top
 import Streamly.Internal.Data.Stream.Container
diff --git a/core/src/Streamly/Internal/Data/Stream/Eliminate.hs b/core/src/Streamly/Internal/Data/Stream/Eliminate.hs
index 084b4481c9..edfce3b297 100644
--- a/core/src/Streamly/Internal/Data/Stream/Eliminate.hs
+++ b/core/src/Streamly/Internal/Data/Stream/Eliminate.hs
@@ -83,7 +83,7 @@ import qualified Streamly.Internal.Data.Array.Type as Array
 import qualified Streamly.Internal.Data.Fold as Fold
 import qualified Streamly.Internal.Data.Parser as PR
 import qualified Streamly.Internal.Data.ParserDrivers as Drivers
-import qualified Streamly.Internal.Data.Stream.Nesting as Nesting
+import qualified Streamly.Internal.Data.Stream.Parse as Nesting
 import qualified Streamly.Internal.Data.Stream.Transform as StreamD
 
 import Prelude hiding
diff --git a/core/src/Streamly/Internal/Data/Stream/Nesting.hs b/core/src/Streamly/Internal/Data/Stream/Nesting.hs
index 30f93dc0da..55f230a15f 100644
--- a/core/src/Streamly/Internal/Data/Stream/Nesting.hs
+++ b/core/src/Streamly/Internal/Data/Stream/Nesting.hs
@@ -8,31 +8,22 @@
 -- Stability   : experimental
 -- Portability : GHC
 --
--- This module contains transformations involving multiple streams, unfolds or
--- folds. There are two types of transformations generational or eliminational.
--- Generational transformations are like the "Generate" module but they
--- generate a stream by combining streams instead of elements. Eliminational
--- transformations are like the "Eliminate" module but they transform a stream
--- by eliminating parts of the stream instead of eliminating the whole stream.
+-- This module contains unfolding (generational) transformations involving
+-- multiple streams, unfolds or folds. There are two types of transformations
+-- generational or eliminational. Generational transformations are like the
+-- "Generate" module but they generate a stream by combining streams instead of
+-- elements. Eliminational transformations are like the "Eliminate" module but
+-- they transform a stream by eliminating parts of the stream instead of
+-- eliminating the whole stream.
 --
 -- These combinators involve transformation, generation, elimination so can be
 -- classified under any of those.
-
--- The zipWithM combinator in this module has been adapted from the vector
--- package (c) Roman Leshchinskiy.
 --
 -- Flipped versions can be named as:
 -- mapFor/forEach, concatFor, unfoldStepFor (only step function)
 -- foreach would be better for streams than mapFor as map could be used for any
 -- type not just containers with multiple elements.
 --
--- Flipped versions for folding streams:
--- groupsFor :: stream -> fold -> stream (flipped groupsWhile)
---
--- Flipped versions for folds:
--- foldMany :: outer fold -> inner fold -> fold (original version)
--- groupFoldFor :: inner fold -> outer fold -> fold (flipped version)
--- groupStepFor :: inner fold -> outer fold step -> fold (flipped version)
 -- This can be convenient for defining the outer fold step using a lambda.
 --
 module Streamly.Internal.Data.Stream.Nesting
@@ -140,62 +131,6 @@ module Streamly.Internal.Data.Stream.Nesting
     , fairSchedForM
     , fairSchedFor
 
-    -- * Eliminate
-    -- | Folding and Parsing chunks of streams to eliminate nested streams.
-    -- Functions generally ending in these shapes:
-    --
-    -- @
-    -- f (Fold m a b) -> t m a -> t m b
-    -- f (Parser a m b) -> t m a -> t m b
-    -- @
-
-    -- ** Folding
-    -- | Apply folds on a stream.
-    , foldSequence
-    , foldIterateM
-
-    -- ** Parsing
-    -- | Parsing is opposite to flattening. 'parseMany' is dual to concatMap or
-    -- unfoldEach concatMap generates a stream from single values in a
-    -- stream and flattens, parseMany does the opposite of flattening by
-    -- splitting the stream and then folds each such split to single value in
-    -- the output stream.
-    , parseMany
-    , parseManyPos
-    , parseSequence
-    , parseManyTill
-    , parseIterate
-    , parseIteratePos
-
-    -- ** Grouping
-    -- | Group segments of a stream and fold. Special case of parsing.
-    , groupsWhile
-    , groupsRollingBy
-
-    -- ** Splitting
-    -- | A special case of parsing.
-    , takeEndBySeq
-    , takeEndBySeq_
-    , wordsBy
-    , splitSepBySeq_
-    , splitEndBySeq
-    , splitEndBySeq_
-    , splitOnSuffixSeq -- internal
-
-    , splitBeginBy_
-    , splitEndBySeqOneOf
-    , splitSepBySeqOneOf
-
-    -- * Transform (Nested Containers)
-    -- | Opposite to compact in ArrayStream
-    , splitInnerBy -- XXX innerSplitOn
-    , splitInnerBySuffix -- XXX innerSplitOnSuffix
-
-    -- * Reduce By Streams
-    , dropPrefix
-    , dropInfix
-    , dropSuffix
-
     -- * Deprecated
     , interpose
     , interposeM
@@ -210,10 +145,6 @@ module Streamly.Internal.Data.Stream.Nesting
     , interleaveMin
     , interleaveFst
     , interleaveFstSuffix
-    , parseManyD
-    , parseIterateD
-    , groupsBy
-    , splitOnSeq
     )
 where
 
@@ -221,38 +152,19 @@ where
 #include "inline.hs"
 #include "ArrayMacros.h"
 
-import Control.Exception (assert)
-import Control.Monad.IO.Class (MonadIO(..))
-import Data.Bits (shiftR, shiftL, (.|.), (.&.))
-import Data.Proxy (Proxy(..))
-import Data.Word (Word32)
 import Fusion.Plugin.Types (Fuse(..))
-import GHC.Types (SPEC(..))
-
-import Streamly.Internal.Data.Array.Type (Array(..))
 import Streamly.Internal.Data.Fold.Type (Fold(..))
-import Streamly.Internal.Data.MutArray.Type (MutArray(..))
-import Streamly.Internal.Data.Parser (ParseError(..), ParseErrorPos)
-import Streamly.Internal.Data.RingArray (RingArray(..))
 import Streamly.Internal.Data.SVar.Type (adaptState)
-import Streamly.Internal.Data.Unbox (Unbox(..))
 import Streamly.Internal.Data.Unfold.Type (Unfold(..))
 
-import qualified Streamly.Internal.Data.Array.Type as A
-import qualified Streamly.Internal.Data.MutArray.Type as MutArray
-import qualified Streamly.Internal.Data.Fold as FL
-import qualified Streamly.Internal.Data.Parser as PR
-import qualified Streamly.Internal.Data.Parser as PRD
-import qualified Streamly.Internal.Data.ParserDrivers as Drivers
-import qualified Streamly.Internal.Data.RingArray as RB
 import qualified Streamly.Internal.Data.Stream.Generate as Stream
 import qualified Streamly.Internal.Data.Unfold.Type as Unfold
 
 import Streamly.Internal.Data.Stream.Transform
     (intersperse, intersperseEndByM)
-import Streamly.Internal.Data.Stream.Type hiding (splitAt)
+import Streamly.Internal.Data.Stream.Type
 
-import Prelude hiding (concatMap, mapM, zipWith, splitAt)
+import Prelude hiding (concatMap, zipWith)
 
 #include "DocTestDataStream.hs"
 
@@ -1925,2057 +1837,3 @@ unfoldEachSepBySeq seed unf str = unfoldEach unf $ intersperse seed str
 intercalate :: Monad m
     => Unfold m b c -> b -> Stream m b -> Stream m c
 intercalate u x = unfoldEachSepBySeq x u
-
-------------------------------------------------------------------------------
--- Folding
-------------------------------------------------------------------------------
-
--- | Apply a stream of folds to an input stream and emit the results in the
--- output stream.
---
--- /Unimplemented/
---
-{-# INLINE foldSequence #-}
-foldSequence
-       :: -- Monad m =>
-       Stream m (Fold m a b)
-    -> Stream m a
-    -> Stream m b
-foldSequence _f _m = undefined
-
-{-# ANN type FIterState Fuse #-}
-data FIterState s f m a b
-    = FIterInit s f
-    | forall fs. FIterStream s (fs -> a -> m (FL.Step fs b)) fs (fs -> m b)
-        (fs -> m b)
-    | FIterYield b (FIterState s f m a b)
-    | FIterStop
-
--- | Iterate a fold generator on a stream. The initial value @b@ is used to
--- generate the first fold, the fold is applied on the stream and the result of
--- the fold is used to generate the next fold and so on.
---
--- Usage:
---
--- >>> import Data.Monoid (Sum(..))
--- >>> f x = return (Fold.take 2 (Fold.sconcat x))
--- >>> s = fmap Sum $ Stream.fromList [1..10]
--- >>> Stream.fold Fold.toList $ fmap getSum $ Stream.foldIterateM f (pure 0) s
--- [3,10,21,36,55,55]
---
--- This is the streaming equivalent of monad like sequenced application of
--- folds where next fold is dependent on the previous fold.
---
--- /Pre-release/
---
-{-# INLINE_NORMAL foldIterateM #-}
-foldIterateM ::
-       Monad m => (b -> m (FL.Fold m a b)) -> m b -> Stream m a -> Stream m b
-foldIterateM func seed0 (Stream step state) =
-    Stream stepOuter (FIterInit state seed0)
-
-    where
-
-    {-# INLINE iterStep #-}
-    iterStep from st fstep extract final = do
-        res <- from
-        return
-            $ Skip
-            $ case res of
-                  FL.Partial fs -> FIterStream st fstep fs extract final
-                  FL.Done fb -> FIterYield fb $ FIterInit st (return fb)
-
-    {-# INLINE_LATE stepOuter #-}
-    stepOuter _ (FIterInit st seed) = do
-        (FL.Fold fstep initial extract final) <- seed >>= func
-        iterStep initial st fstep extract final
-    stepOuter gst (FIterStream st fstep fs extract final) = do
-        r <- step (adaptState gst) st
-        case r of
-            Yield x s -> do
-                iterStep (fstep fs x) s fstep extract final
-            Skip s -> return $ Skip $ FIterStream s fstep fs extract final
-            Stop -> do
-                b <- final fs
-                return $ Skip $ FIterYield b FIterStop
-    stepOuter _ (FIterYield a next) = return $ Yield a next
-    stepOuter _ FIterStop = return Stop
-
-------------------------------------------------------------------------------
--- Parsing
-------------------------------------------------------------------------------
-
--- | Apply a 'Parser' repeatedly on a stream and emit the parsed values in the
--- output stream.
---
--- Usage:
---
--- >>> s = Stream.fromList [1..10]
--- >>> parser = Parser.takeBetween 0 2 Fold.sum
--- >>> Stream.toList $ Stream.parseMany parser s
--- [Right 3,Right 7,Right 11,Right 15,Right 19]
---
--- This is the streaming equivalent of the 'Streamly.Data.Parser.many' parse
--- combinator.
---
--- Known Issues: When the parser fails there is no way to get the remaining
--- stream.
---
-{-# INLINE parseMany #-}
-parseMany
-    :: Monad m
-    => PRD.Parser a m b
-    -> Stream m a
-    -> Stream m (Either ParseError b)
-parseMany = Drivers.parseMany
-
--- | Like 'parseMany' but includes stream position information in the error
--- messages.
---
-{-# INLINE parseManyPos #-}
-parseManyPos
-    :: Monad m
-    => PRD.Parser a m b
-    -> Stream m a
-    -> Stream m (Either ParseErrorPos b)
-parseManyPos = Drivers.parseManyPos
-
-{-# DEPRECATED parseManyD "Please use parseMany instead." #-}
-{-# INLINE parseManyD #-}
-parseManyD
-    :: Monad m
-    => PR.Parser a m b
-    -> Stream m a
-    -> Stream m (Either ParseError b)
-parseManyD = parseMany
-
--- | Apply a stream of parsers to an input stream and emit the results in the
--- output stream.
---
--- /Unimplemented/
---
-{-# INLINE parseSequence #-}
-parseSequence
-       :: -- Monad m =>
-       Stream m (PR.Parser a m b)
-    -> Stream m a
-    -> Stream m b
-parseSequence _f _m = undefined
-
--- XXX Change the parser arguments' order
-
--- | @parseManyTill collect test stream@ tries the parser @test@ on the input,
--- if @test@ fails it backtracks and tries @collect@, after @collect@ succeeds
--- @test@ is tried again and so on. The parser stops when @test@ succeeds.  The
--- output of @test@ is discarded and the output of @collect@ is emitted in the
--- output stream. The parser fails if @collect@ fails.
---
--- /Unimplemented/
---
-{-# INLINE parseManyTill #-}
-parseManyTill ::
-    -- MonadThrow m =>
-       PR.Parser a m b
-    -> PR.Parser a m x
-    -> Stream m a
-    -> Stream m b
-parseManyTill = undefined
-
--- | Iterate a parser generating function on a stream. The initial value @b@ is
--- used to generate the first parser, the parser is applied on the stream and
--- the result is used to generate the next parser and so on.
---
--- Example:
---
--- >>> import Data.Monoid (Sum(..))
--- >>> s = Stream.fromList [1..10]
--- >>> Stream.toList $ fmap getSum $ Stream.catRights $ Stream.parseIterate (\b -> Parser.takeBetween 0 2 (Fold.sconcat b)) (Sum 0) $ fmap Sum s
--- [3,10,21,36,55,55]
---
--- This is the streaming equivalent of monad like sequenced application of
--- parsers where next parser is dependent on the previous parser.
---
--- /Pre-release/
---
-{-# INLINE parseIterate #-}
-parseIterate
-    :: Monad m
-    => (b -> PRD.Parser a m b)
-    -> b
-    -> Stream m a
-    -> Stream m (Either ParseError b)
-parseIterate = Drivers.parseIterate
-
--- | Like 'parseIterate' but includes stream position information in the error
--- messages.
---
-{-# INLINE parseIteratePos #-}
-parseIteratePos
-    :: Monad m
-    => (b -> PRD.Parser a m b)
-    -> b
-    -> Stream m a
-    -> Stream m (Either ParseErrorPos b)
-parseIteratePos = Drivers.parseIteratePos
-
-{-# DEPRECATED parseIterateD "Please use parseIterate instead." #-}
-{-# INLINE parseIterateD #-}
-parseIterateD
-    :: Monad m
-    => (b -> PR.Parser a m b)
-    -> b
-    -> Stream m a
-    -> Stream m (Either ParseError b)
-parseIterateD = parseIterate
-
-------------------------------------------------------------------------------
--- Grouping
-------------------------------------------------------------------------------
-
-data GroupByState st fs a b
-    = GroupingInit st
-    | GroupingDo st !fs
-    | GroupingInitWith st !a
-    | GroupingDoWith st !fs !a
-    | GroupingYield !b (GroupByState st fs a b)
-    | GroupingDone
-
--- | Keep collecting items in a group as long as the comparison function
--- returns true. The comparison function is @cmp old new@ where @old@ is the
--- first item in the group and @new@ is the incoming item being tested for
--- membership of the group. The collected items are folded by the supplied
--- fold.
---
--- Definition:
---
--- >>> groupsWhile cmp f = Stream.parseMany (Parser.groupBy cmp f)
-{-# INLINE_NORMAL groupsWhile #-}
-groupsWhile :: Monad m
-    => (a -> a -> Bool)
-    -> Fold m a b
-    -> Stream m a
-    -> Stream m b
-{-
-groupsWhile eq fld = parseMany (PRD.groupBy eq fld)
--}
-groupsWhile cmp (Fold fstep initial _ final) (Stream step state) =
-    Stream stepOuter (GroupingInit state)
-
-    where
-
-    {-# INLINE_LATE stepOuter #-}
-    stepOuter _ (GroupingInit st) = do
-        -- XXX Note that if the stream stops without yielding a single element
-        -- in the group we discard the "initial" effect.
-        res <- initial
-        return
-            $ case res of
-                  FL.Partial s -> Skip $ GroupingDo st s
-                  FL.Done b -> Yield b $ GroupingInit st
-    stepOuter gst (GroupingDo st fs) = do
-        res <- step (adaptState gst) st
-        case res of
-            Yield x s -> do
-                r <- fstep fs x
-                case r of
-                    FL.Partial fs1 -> go SPEC x s fs1
-                    FL.Done b -> return $ Yield b (GroupingInit s)
-            Skip s -> return $ Skip $ GroupingDo s fs
-            Stop -> final fs >> return Stop
-
-        where
-
-        go !_ prev stt !acc = do
-            res <- step (adaptState gst) stt
-            case res of
-                Yield x s -> do
-                    if cmp prev x
-                    then do
-                        r <- fstep acc x
-                        case r of
-                            FL.Partial fs1 -> go SPEC prev s fs1
-                            FL.Done b -> return $ Yield b (GroupingInit s)
-                    else do
-                        r <- final acc
-                        return $ Yield r (GroupingInitWith s x)
-                Skip s -> go SPEC prev s acc
-                Stop -> do
-                    r <- final acc
-                    return $ Yield r GroupingDone
-    stepOuter _ (GroupingInitWith st x) = do
-        res <- initial
-        return
-            $ case res of
-                  FL.Partial s -> Skip $ GroupingDoWith st s x
-                  FL.Done b -> Yield b $ GroupingInitWith st x
-    stepOuter gst (GroupingDoWith st fs prev) = do
-        res <- fstep fs prev
-        case res of
-            FL.Partial fs1 -> go SPEC st fs1
-            FL.Done b -> return $ Yield b (GroupingInit st)
-
-        where
-
-        -- XXX code duplicated from the previous equation
-        go !_ stt !acc = do
-            res <- step (adaptState gst) stt
-            case res of
-                Yield x s -> do
-                    if cmp prev x
-                    then do
-                        r <- fstep acc x
-                        case r of
-                            FL.Partial fs1 -> go SPEC s fs1
-                            FL.Done b -> return $ Yield b (GroupingInit s)
-                    else do
-                        r <- final acc
-                        return $ Yield r (GroupingInitWith s x)
-                Skip s -> go SPEC s acc
-                Stop -> do
-                    r <- final acc
-                    return $ Yield r GroupingDone
-    stepOuter _ (GroupingYield _ _) = error "groupsWhile: Unreachable"
-    stepOuter _ GroupingDone = return Stop
-
--- | The argument order of the comparison function in `groupsWhile` is
--- different than that of `groupsBy`.
---
--- In `groupsBy` the comparison function takes the next element as the first
--- argument and the previous element as the second argument. In `groupsWhile`
--- the first argument is the previous element and second argument is the next
--- element.
-{-# DEPRECATED groupsBy "Please use groupsWhile instead. Please note the change in the argument order of the comparison function." #-}
-{-# INLINE_NORMAL groupsBy #-}
-groupsBy :: Monad m
-    => (a -> a -> Bool)
-    -> Fold m a b
-    -> Stream m a
-    -> Stream m b
-groupsBy cmp = groupsWhile (flip cmp)
-
--- |
---
--- Definition:
---
--- >>> groupsRollingBy cmp f = Stream.parseMany (Parser.groupByRolling cmp f)
---
-{-# INLINE_NORMAL groupsRollingBy #-}
-groupsRollingBy :: Monad m
-    => (a -> a -> Bool)
-    -> Fold m a b
-    -> Stream m a
-    -> Stream m b
-{-
-groupsRollingBy eq fld = parseMany (PRD.groupByRolling eq fld)
--}
-groupsRollingBy cmp (Fold fstep initial _ final) (Stream step state) =
-    Stream stepOuter (GroupingInit state)
-
-    where
-
-    {-# INLINE_LATE stepOuter #-}
-    stepOuter _ (GroupingInit st) = do
-        -- XXX Note that if the stream stops without yielding a single element
-        -- in the group we discard the "initial" effect.
-        res <- initial
-        return
-            $ case res of
-                  FL.Partial fs -> Skip $ GroupingDo st fs
-                  FL.Done fb -> Yield fb $ GroupingInit st
-    stepOuter gst (GroupingDo st fs) = do
-        res <- step (adaptState gst) st
-        case res of
-            Yield x s -> do
-                r <- fstep fs x
-                case r of
-                    FL.Partial fs1 -> go SPEC x s fs1
-                    FL.Done fb -> return $ Yield fb (GroupingInit s)
-            Skip s -> return $ Skip $ GroupingDo s fs
-            Stop -> final fs >> return Stop
-
-        where
-
-        go !_ prev stt !acc = do
-            res <- step (adaptState gst) stt
-            case res of
-                Yield x s -> do
-                    if cmp prev x
-                    then do
-                        r <- fstep acc x
-                        case r of
-                            FL.Partial fs1 -> go SPEC x s fs1
-                            FL.Done b -> return $ Yield b (GroupingInit s)
-                    else do
-                        r <- final acc
-                        return $ Yield r (GroupingInitWith s x)
-                Skip s -> go SPEC prev s acc
-                Stop -> do
-                    r <- final acc
-                    return $ Yield r GroupingDone
-    stepOuter _ (GroupingInitWith st x) = do
-        res <- initial
-        return
-            $ case res of
-                  FL.Partial s -> Skip $ GroupingDoWith st s x
-                  FL.Done b -> Yield b $ GroupingInitWith st x
-    stepOuter gst (GroupingDoWith st fs previous) = do
-        res <- fstep fs previous
-        case res of
-            FL.Partial s -> go SPEC previous st s
-            FL.Done b -> return $ Yield b (GroupingInit st)
-
-        where
-
-        -- XXX GHC: groupsWhile has one less parameter in this go loop and it
-        -- fuses. However, groupsRollingBy does not fuse, removing the prev
-        -- parameter makes it fuse. Something needs to be fixed in GHC. The
-        -- workaround for this is noted in the comments below.
-        go !_ prev !stt !acc = do
-            res <- step (adaptState gst) stt
-            case res of
-                Yield x s -> do
-                    if cmp prev x
-                    then do
-                        r <- fstep acc x
-                        case r of
-                            FL.Partial fs1 -> go SPEC x s fs1
-                            FL.Done b -> return $ Yield b (GroupingInit st)
-                    else do
-                        {-
-                        r <- final acc
-                        return $ Yield r (GroupingInitWith s x)
-                        -}
-                        -- The code above does not let groupBy fuse. We use the
-                        -- alternative code below instead.  Instead of jumping
-                        -- to GroupingInitWith state, we unroll the code of
-                        -- GroupingInitWith state here to help GHC with stream
-                        -- fusion.
-                        result <- initial
-                        r <- final acc
-                        return
-                            $ Yield r
-                            $ case result of
-                                  FL.Partial fsi -> GroupingDoWith s fsi x
-                                  FL.Done b -> GroupingYield b (GroupingInit s)
-                Skip s -> go SPEC prev s acc
-                Stop -> do
-                    r <- final acc
-                    return $ Yield r GroupingDone
-    stepOuter _ (GroupingYield r next) = return $ Yield r next
-    stepOuter _ GroupingDone = return Stop
-
-------------------------------------------------------------------------------
--- Splitting - by a predicate
-------------------------------------------------------------------------------
-
-data WordsByState st fs b
-    = WordsByInit st
-    | WordsByDo st !fs
-    | WordsByDone
-    | WordsByYield !b (WordsByState st fs b)
-
--- | Split the stream after stripping leading, trailing, and repeated
--- separators determined by the predicate supplied. The tokens after splitting
--- are collected by the supplied fold. In other words, the tokens are parsed in
--- the same way as words are parsed from whitespace separated text.
---
--- >>> f x = Stream.toList $ Stream.wordsBy (== '.') Fold.toList $ Stream.fromList x
--- >>> f "a.b"
--- ["a","b"]
--- >>> f "a..b"
--- ["a","b"]
--- >>> f ".a..b."
--- ["a","b"]
---
-{-# INLINE_NORMAL wordsBy #-}
-wordsBy :: Monad m => (a -> Bool) -> Fold m a b -> Stream m a -> Stream m b
-wordsBy predicate (Fold fstep initial _ final) (Stream step state) =
-    Stream stepOuter (WordsByInit state)
-
-    where
-
-    {-# INLINE_LATE stepOuter #-}
-    stepOuter _ (WordsByInit st) = do
-        res <- initial
-        return
-            $ case res of
-                  FL.Partial s -> Skip $ WordsByDo st s
-                  FL.Done b -> Yield b (WordsByInit st)
-
-    stepOuter gst (WordsByDo st fs) = do
-        res <- step (adaptState gst) st
-        case res of
-            Yield x s -> do
-                if predicate x
-                then do
-                    resi <- initial
-                    return
-                        $ case resi of
-                              FL.Partial fs1 -> Skip $ WordsByDo s fs1
-                              FL.Done b -> Yield b (WordsByInit s)
-                else do
-                    r <- fstep fs x
-                    case r of
-                        FL.Partial fs1 -> go SPEC s fs1
-                        FL.Done b -> return $ Yield b (WordsByInit s)
-            Skip s    -> return $ Skip $ WordsByDo s fs
-            Stop      -> final fs >> return Stop
-
-        where
-
-        go !_ stt !acc = do
-            res <- step (adaptState gst) stt
-            case res of
-                Yield x s -> do
-                    if predicate x
-                    then do
-                        {-
-                        r <- final acc
-                        return $ Yield r (WordsByInit s)
-                        -}
-                        -- The above code does not fuse well. Need to check why
-                        -- GHC is not able to simplify it well.  Using the code
-                        -- below, instead of jumping through the WordsByInit
-                        -- state always, we directly go to WordsByDo state in
-                        -- the common case of Partial.
-                        resi <- initial
-                        r <- final acc
-                        return
-                            $ Yield r
-                            $ case resi of
-                                  FL.Partial fs1 -> WordsByDo s fs1
-                                  FL.Done b -> WordsByYield b (WordsByInit s)
-                    else do
-                        r <- fstep acc x
-                        case r of
-                            FL.Partial fs1 -> go SPEC s fs1
-                            FL.Done b -> return $ Yield b (WordsByInit s)
-                Skip s -> go SPEC s acc
-                Stop -> do
-                    r <- final acc
-                    return $ Yield r WordsByDone
-
-    stepOuter _ WordsByDone = return Stop
-
-    stepOuter _ (WordsByYield b next) = return $ Yield b next
-
-------------------------------------------------------------------------------
--- Splitting on a sequence
-------------------------------------------------------------------------------
-
--- String search algorithms:
--- http://www-igm.univ-mlv.fr/~lecroq/string/index.html
-
--- XXX Can GHC find a way to modularise this? Can we write different cases
--- i.e.g single element, word hash, karp-rabin as different functions and then
--- be able to combine them into a single state machine?
-
-{-# ANN type TakeEndBySeqState Fuse #-}
-data TakeEndBySeqState mba rb rh ck w s b x =
-      TakeEndBySeqInit
-    | TakeEndBySeqYield !b (TakeEndBySeqState mba rb rh ck w s b x)
-    | TakeEndBySeqDone
-
-    | TakeEndBySeqSingle s x
-
-    | TakeEndBySeqWordInit !Int !w s
-    | TakeEndBySeqWordLoop !w s
-    | TakeEndBySeqWordDone !Int !w
-
-    | TakeEndBySeqKRInit s mba
-    | TakeEndBySeqKRInit1 s mba !Int
-    | TakeEndBySeqKRLoop s mba !rh !ck
-    | TakeEndBySeqKRCheck s mba !rh
-    | TakeEndBySeqKRDone !Int rb
-
--- | If the pattern is empty the output stream is empty.
-{-# INLINE_NORMAL takeEndBySeqWith #-}
-takeEndBySeqWith
-    :: forall m a. (MonadIO m, Unbox a, Enum a, Eq a)
-    => Bool
-    -> Array a
-    -> Stream m a
-    -> Stream m a
-takeEndBySeqWith withSep patArr (Stream step state) =
-    Stream stepOuter TakeEndBySeqInit
-
-    where
-
-    patLen = A.length patArr
-    patBytes = A.byteLength patArr
-    maxIndex = patLen - 1
-    maxOffset = patBytes - SIZE_OF(a)
-    elemBits = SIZE_OF(a) * 8
-
-    -- For word pattern case
-    wordMask :: Word
-    wordMask = (1 `shiftL` (elemBits * patLen)) - 1
-
-    elemMask :: Word
-    elemMask = (1 `shiftL` elemBits) - 1
-
-    wordPat :: Word
-    wordPat = wordMask .&. A.foldl' addToWord 0 patArr
-
-    addToWord wd a = (wd `shiftL` elemBits) .|. fromIntegral (fromEnum a)
-
-    -- For Rabin-Karp search
-    k = 2891336453 :: Word32
-    coeff = k ^ patLen
-
-    addCksum cksum a = cksum * k + fromIntegral (fromEnum a)
-
-    deltaCksum cksum old new =
-        addCksum cksum new - coeff * fromIntegral (fromEnum old)
-
-    -- XXX shall we use a random starting hash or 1 instead of 0?
-    patHash = A.foldl' addCksum 0 patArr
-
-    skip = return . Skip
-
-    {-# INLINE yield #-}
-    yield x !s = skip $ TakeEndBySeqYield x s
-
-    {-# INLINE_LATE stepOuter #-}
-    stepOuter _ TakeEndBySeqInit = do
-        -- XXX When we statically specify the method compiler is able to
-        -- simplify the code better and removes the handling of other states.
-        -- When it is determined dynamically, the code is less efficient. For
-        -- example, the single element search degrades by 80% if the handling
-        -- of other cases is present. We need to investigate this further but
-        -- until then we can guide the compiler statically where we can. If we
-        -- want to use single element search statically then we can use
-        -- takeEndBy instead.
-        --
-        -- XXX Is there a way for GHC to statically determine patLen when we
-        -- use an array created from a static string as pattern e.g. "\n".
-        case () of
-            _ | patLen == 0 -> return Stop
-              | patLen == 1 -> do
-                    pat <- liftIO $ A.unsafeGetIndexIO 0 patArr
-                    return $ Skip $ TakeEndBySeqSingle state pat
-              | SIZE_OF(a) * patLen <= sizeOf (Proxy :: Proxy Word) ->
-                    return $ Skip $ TakeEndBySeqWordInit 0 0 state
-              | otherwise -> do
-                    (MutArray mba _ _ _) :: MutArray a <-
-                        liftIO $ MutArray.emptyOf patLen
-                    skip $ TakeEndBySeqKRInit state mba
-
-    ---------------------
-    -- Single yield point
-    ---------------------
-
-    stepOuter _ (TakeEndBySeqYield x next) = return $ Yield x next
-
-    -----------------
-    -- Done
-    -----------------
-
-    stepOuter _ TakeEndBySeqDone = return Stop
-
-    -----------------
-    -- Single Pattern
-    -----------------
-
-    stepOuter gst (TakeEndBySeqSingle st pat) = do
-        res <- step (adaptState gst) st
-        case res of
-            Yield x s ->
-                if pat /= x
-                then yield x (TakeEndBySeqSingle s pat)
-                else do
-                    if withSep
-                    then yield x TakeEndBySeqDone
-                    else return Stop
-            Skip s -> skip $ TakeEndBySeqSingle s pat
-            Stop -> return Stop
-
-    ---------------------------
-    -- Short Pattern - Shift Or
-    ---------------------------
-
-    -- Note: Karp-Rabin is roughly 15% slower than word hash for a 2 element
-    -- pattern. This may be useful for common cases like splitting lines using
-    -- "\r\n".
-    stepOuter _ (TakeEndBySeqWordDone 0 _) = do
-        return Stop
-    stepOuter _ (TakeEndBySeqWordDone n wrd) = do
-        let old = elemMask .&. (wrd `shiftR` (elemBits * (n - 1)))
-         in yield
-                (toEnum $ fromIntegral old)
-                (TakeEndBySeqWordDone (n - 1) wrd)
-
-    -- XXX If we remove this init state for perf experiment the time taken
-    -- reduces to half, there may be some optimization opportunity here.
-    stepOuter gst (TakeEndBySeqWordInit idx wrd st) = do
-        res <- step (adaptState gst) st
-        case res of
-            Yield x s -> do
-                let wrd1 = addToWord wrd x
-                    next
-                      | idx /= maxIndex =
-                            TakeEndBySeqWordInit (idx + 1) wrd1 s
-                      | wrd1 .&. wordMask /= wordPat =
-                            TakeEndBySeqWordLoop wrd1 s
-                      | otherwise = TakeEndBySeqDone
-                if withSep
-                then yield x next
-                else skip next
-            Skip s -> skip $ TakeEndBySeqWordInit idx wrd s
-            Stop ->
-                if withSep
-                then return Stop
-                else skip $ TakeEndBySeqWordDone idx wrd
-
-    stepOuter gst (TakeEndBySeqWordLoop wrd st) = do
-        res <- step (adaptState gst) st
-        case res of
-            Yield x s -> do
-                -- XXX Never use a lazy expression as state, that causes issues
-                -- in simplification because the state argument of Yield is
-                -- lazy, maybe we can make that strict.
-                let wrd1 = addToWord wrd x
-                    old = (wordMask .&. wrd)
-                            `shiftR` (elemBits * (patLen - 1))
-                    !y =
-                            if withSep
-                            then x
-                            else toEnum $ fromIntegral old
-                -- Note: changing the nesting order of if and yield makes a
-                -- difference in performance.
-                if wrd1 .&. wordMask /= wordPat
-                then yield y (TakeEndBySeqWordLoop wrd1 s)
-                else yield y TakeEndBySeqDone
-            Skip s -> skip $ TakeEndBySeqWordLoop wrd s
-            Stop ->
-                 if withSep
-                 then return Stop
-                 else skip $ TakeEndBySeqWordDone patLen wrd
-
-    -------------------------------
-    -- General Pattern - Karp Rabin
-    -------------------------------
-
-    stepOuter gst (TakeEndBySeqKRInit st0 mba) = do
-        res <- step (adaptState gst) st0
-        case res of
-            Yield x s -> do
-                liftIO $ pokeAt 0 mba x
-                if withSep
-                then yield x (TakeEndBySeqKRInit1 s mba (SIZE_OF(a)))
-                else skip $ TakeEndBySeqKRInit1 s mba (SIZE_OF(a))
-            Skip s -> skip $ TakeEndBySeqKRInit s mba
-            Stop -> return Stop
-
-    stepOuter gst (TakeEndBySeqKRInit1 st mba offset) = do
-        res <- step (adaptState gst) st
-        let arr :: Array a = Array
-                    { arrContents = mba
-                    , arrStart = 0
-                    , arrEnd = patBytes
-                    }
-        case res of
-            Yield x s -> do
-                liftIO $ pokeAt offset mba x
-                let next =
-                        if offset /= maxOffset
-                        then TakeEndBySeqKRInit1 s mba (offset + SIZE_OF(a))
-                        else
-                            let ringHash = A.foldl' addCksum 0 arr
-                             in if ringHash == patHash
-                                then TakeEndBySeqKRCheck s mba 0
-                                else TakeEndBySeqKRLoop s mba 0 ringHash
-                if withSep
-                then yield x next
-                else skip next
-            Skip s -> skip $ TakeEndBySeqKRInit1 s mba offset
-            Stop -> do
-                if withSep
-                then return Stop
-                else do
-                    let rb = RingArray
-                            { ringContents = mba
-                            , ringSize = offset
-                            , ringHead = 0
-                            }
-                     in skip $ TakeEndBySeqKRDone offset rb
-
-    stepOuter gst (TakeEndBySeqKRLoop st mba rh cksum) = do
-        res <- step (adaptState gst) st
-        let rb = RingArray
-                { ringContents = mba
-                , ringSize = patBytes
-                , ringHead = rh
-                }
-        case res of
-            Yield x s -> do
-                (rb1, old) <- liftIO (RB.replace rb x)
-                let cksum1 = deltaCksum cksum old x
-                let rh1 = ringHead rb1
-                    next =
-                        if cksum1 /= patHash
-                        then TakeEndBySeqKRLoop s mba rh1 cksum1
-                        else TakeEndBySeqKRCheck s mba rh1
-                if withSep
-                then yield x next
-                else yield old next
-            Skip s -> skip $ TakeEndBySeqKRLoop s mba rh cksum
-            Stop -> do
-                if withSep
-                then return Stop
-                else skip $ TakeEndBySeqKRDone patBytes rb
-
-    stepOuter _ (TakeEndBySeqKRCheck st mba rh) = do
-        let rb = RingArray
-                    { ringContents = mba
-                    , ringSize = patBytes
-                    , ringHead = rh
-                    }
-        matches <- liftIO $ RB.eqArray rb patArr
-        if matches
-        then return Stop
-        else skip $ TakeEndBySeqKRLoop st mba rh patHash
-
-    stepOuter _ (TakeEndBySeqKRDone 0 _) = return Stop
-    stepOuter _ (TakeEndBySeqKRDone len rb) = do
-        assert (len >= 0) (return ())
-        old <- RB.unsafeGetHead rb
-        let rb1 = RB.moveForward rb
-        yield old $ TakeEndBySeqKRDone (len - SIZE_OF(a)) rb1
-
--- | Take the stream until the supplied sequence is encountered. Take the
--- sequence as well and stop.
---
--- Usage:
---
--- >>> f pat xs = Stream.toList $ Stream.takeEndBySeq (Array.fromList pat) $ Stream.fromList xs
--- >>> f "fgh" "abcdefghijk"
--- "abcdefgh"
--- >>> f "lmn" "abcdefghijk"
--- "abcdefghijk"
--- >>> f "" "abcdefghijk"
--- ""
---
-{-# INLINE takeEndBySeq #-}
-takeEndBySeq
-    :: forall m a. (MonadIO m, Unbox a, Enum a, Eq a)
-    => Array a
-    -> Stream m a
-    -> Stream m a
-takeEndBySeq = takeEndBySeqWith True
-
--- | Take the stream until the supplied sequence is encountered. Do not take
--- the sequence.
---
--- Usage:
---
--- >>> f pat xs = Stream.toList $ Stream.takeEndBySeq_ (Array.fromList pat) $ Stream.fromList xs
--- >>> f "fgh" "abcdefghijk"
--- "abcde"
--- >>> f "lmn" "abcdefghijk"
--- "abcdefghijk"
--- >>> f "" "abcdefghijk"
--- ""
---
-{-# INLINE takeEndBySeq_ #-}
-takeEndBySeq_
-    :: forall m a. (MonadIO m, Unbox a, Enum a, Eq a)
-    => Array a
-    -> Stream m a
-    -> Stream m a
-takeEndBySeq_ = takeEndBySeqWith False
-
-{-
--- TODO can we unify the splitting operations using a splitting configuration
--- like in the split package.
---
-data SplitStyle = Infix | Suffix | Prefix deriving (Eq, Show)
-data SplitOptions = SplitOptions
-    { style    :: SplitStyle
-    , withSep  :: Bool  -- ^ keep the separators in output
-    -- , compact  :: Bool  -- ^ treat multiple consecutive separators as one
-    -- , trimHead :: Bool  -- ^ drop blank at head
-    -- , trimTail :: Bool  -- ^ drop blank at tail
-    }
--}
-
--- XXX using "fs" as the last arg in Constructors may simplify the code a bit,
--- because we can use the constructor directly without having to create "jump"
--- functions.
-{-# ANN type SplitOnSeqState Fuse #-}
-data SplitOnSeqState mba rb rh ck w fs s b x =
-      SplitOnSeqInit
-    | SplitOnSeqYield b (SplitOnSeqState mba rb rh ck w fs s b x)
-    | SplitOnSeqDone
-
-    | SplitOnSeqEmpty !fs s
-
-    | SplitOnSeqSingle0 !fs s x
-    | SplitOnSeqSingle !fs s x
-
-    | SplitOnSeqWordInit0 !fs s
-    | SplitOnSeqWordInit Int Word !fs s
-    | SplitOnSeqWordLoop !w s !fs
-    | SplitOnSeqWordDone Int !fs !w
-
-    | SplitOnSeqKRInit0 Int !fs s mba
-    | SplitOnSeqKRInit Int !fs s mba
-    | SplitOnSeqKRLoop fs s mba !rh !ck
-    | SplitOnSeqKRCheck fs s mba !rh
-    | SplitOnSeqKRDone Int !fs rb
-
-    | SplitOnSeqReinit (fs -> SplitOnSeqState mba rb rh ck w fs s b x)
-
--- XXX Need to fix empty stream split behavior
-
--- | Like 'splitSepBy_' but splits the stream on a sequence of elements rather than
--- a single element. Parses a sequence of tokens separated by an infixed
--- separator e.g. @a;b;c@ is parsed as @a@, @b@, @c@. If the pattern is empty
--- then each element is a match, thus the fold is finalized on each element.
---
--- >>> splitSepBy p xs = Stream.fold Fold.toList $ Stream.splitSepBySeq_ (Array.fromList p) Fold.toList (Stream.fromList xs)
---
--- >>> splitSepBy "" ""
--- []
---
--- >>> splitSepBy "" "a...b"
--- ["a",".",".",".","b"]
---
--- >>> splitSepBy ".." ""
--- []
---
--- >>> splitSepBy ".." "a...b"
--- ["a",".b"]
---
--- >>> splitSepBy ".." "abc"
--- ["abc"]
---
--- >>> splitSepBy ".." ".."
--- ["",""]
---
--- >>> splitSepBy "." ".a"
--- ["","a"]
---
--- >>> splitSepBy "." "a."
--- ["a",""]
---
--- Uses Rabin-Karp algorithm for substring search.
---
-{-# INLINE_NORMAL splitSepBySeq_ #-}
-splitSepBySeq_, splitOnSeq
-    :: forall m a b. (MonadIO m, Unbox a, Enum a, Eq a)
-    => Array a
-    -> Fold m a b
-    -> Stream m a
-    -> Stream m b
-splitSepBySeq_ patArr (Fold fstep initial _ final) (Stream step state) =
-    Stream stepOuter SplitOnSeqInit
-
-    where
-
-    patLen = A.length patArr
-    patBytes = A.byteLength patArr
-    maxIndex = patLen - 1
-    maxOffset = patBytes - SIZE_OF(a)
-    elemBits = SIZE_OF(a) * 8
-
-    -- For word pattern case
-    wordMask :: Word
-    wordMask = (1 `shiftL` (elemBits * patLen)) - 1
-
-    elemMask :: Word
-    elemMask = (1 `shiftL` elemBits) - 1
-
-    wordPat :: Word
-    wordPat = wordMask .&. A.foldl' addToWord 0 patArr
-
-    addToWord wd a = (wd `shiftL` elemBits) .|. fromIntegral (fromEnum a)
-
-    -- For Rabin-Karp search
-    k = 2891336453 :: Word32
-    coeff = k ^ patLen
-
-    addCksum cksum a = cksum * k + fromIntegral (fromEnum a)
-
-    deltaCksum cksum old new =
-        addCksum cksum new - coeff * fromIntegral (fromEnum old)
-
-    -- XXX shall we use a random starting hash or 1 instead of 0?
-    patHash = A.foldl' addCksum 0 patArr
-
-    skip = return . Skip
-
-    nextAfterInit nextGen stepRes =
-        case stepRes of
-            FL.Partial s -> nextGen s
-            FL.Done b -> SplitOnSeqYield b (SplitOnSeqReinit nextGen)
-
-    {-# INLINE yieldReinit #-}
-    yieldReinit nextGen fs =
-        initial >>= skip . SplitOnSeqYield fs . nextAfterInit nextGen
-
-    {-# INLINE_LATE stepOuter #-}
-    stepOuter _ SplitOnSeqInit = do
-        res <- initial
-        case res of
-            FL.Partial acc
-                | patLen == 0 ->
-                    return $ Skip $ SplitOnSeqEmpty acc state
-                | patLen == 1 -> do
-                    pat <- liftIO $ A.unsafeGetIndexIO 0 patArr
-                    return $ Skip $ SplitOnSeqSingle0 acc state pat
-                | SIZE_OF(a) * patLen <= sizeOf (Proxy :: Proxy Word) ->
-                    return $ Skip $ SplitOnSeqWordInit0 acc state
-                | otherwise -> do
-                    (MutArray mba _ _ _) :: MutArray a <-
-                        liftIO $ MutArray.emptyOf patLen
-                    skip $ SplitOnSeqKRInit0 0 acc state mba
-            FL.Done b -> skip $ SplitOnSeqYield b SplitOnSeqInit
-
-    stepOuter _ (SplitOnSeqYield x next) = return $ Yield x next
-
-    ---------------------------
-    -- Checkpoint
-    ---------------------------
-
-    stepOuter _ (SplitOnSeqReinit nextGen) =
-        initial >>= skip . nextAfterInit nextGen
-
-    ---------------------------
-    -- Empty pattern
-    ---------------------------
-
-    stepOuter gst (SplitOnSeqEmpty acc st) = do
-        res <- step (adaptState gst) st
-        case res of
-            Yield x s -> do
-                r <- fstep acc x
-                b1 <-
-                    case r of
-                        FL.Partial acc1 -> final acc1
-                        FL.Done b -> return b
-                let jump c = SplitOnSeqEmpty c s
-                 in yieldReinit jump b1
-            Skip s -> skip (SplitOnSeqEmpty acc s)
-            Stop -> final acc >> return Stop
-
-    -----------------
-    -- Done
-    -----------------
-
-    stepOuter _ SplitOnSeqDone = return Stop
-
-    -----------------
-    -- Single Pattern
-    -----------------
-
-    stepOuter gst (SplitOnSeqSingle0 fs st pat) = do
-        res <- step (adaptState gst) st
-        case res of
-            Yield x s -> do
-                -- XXX This code block is duplicated in SplitOnSeqSingle state
-                let jump c = SplitOnSeqSingle c s pat
-                if pat == x
-                then final fs >>= yieldReinit jump
-                else do
-                    r <- fstep fs x
-                    case r of
-                        FL.Partial fs1 ->
-                            pure $ Skip $ SplitOnSeqSingle fs1 s pat
-                        FL.Done b -> yieldReinit jump b
-            Skip s -> pure $ Skip $ SplitOnSeqSingle0 fs s pat
-            Stop -> final fs >> pure Stop
-
-    stepOuter gst (SplitOnSeqSingle fs0 st0 pat) = do
-        go SPEC fs0 st0
-
-        where
-
-        -- The local loop increases allocations by 6% but improves CPU
-        -- performance by 14%.
-        go !_ !fs !st = do
-            res <- step (adaptState gst) st
-            case res of
-                Yield x s -> do
-                    let jump c = SplitOnSeqSingle c s pat
-                    if pat == x
-                    then final fs >>= yieldReinit jump
-                    else do
-                        r <- fstep fs x
-                        case r of
-                            FL.Partial fs1 -> go SPEC fs1 s
-                            FL.Done b -> yieldReinit jump b
-                Skip s -> go SPEC fs s
-                Stop -> do
-                    r <- final fs
-                    return $ Skip $ SplitOnSeqYield r SplitOnSeqDone
-
-    ---------------------------
-    -- Short Pattern - Shift Or
-    ---------------------------
-
-    -- Note: We fill the matching buffer before we emit anything, in case it
-    -- matches and we have to drop it. Though we could be more eager in
-    -- emitting as soon as we know that the pattern cannot match. But still the
-    -- worst case will remain the same, in case a match is going to happen we
-    -- will have to delay until the very end.
-
-    stepOuter _ (SplitOnSeqWordDone 0 fs _) = do
-        r <- final fs
-        skip $ SplitOnSeqYield r SplitOnSeqDone
-    stepOuter _ (SplitOnSeqWordDone n fs wrd) = do
-        let old = elemMask .&. (wrd `shiftR` (elemBits * (n - 1)))
-        r <- fstep fs (toEnum $ fromIntegral old)
-        case r of
-            FL.Partial fs1 -> skip $ SplitOnSeqWordDone (n - 1) fs1 wrd
-            FL.Done b -> do
-                 let jump c = SplitOnSeqWordDone (n - 1) c wrd
-                 yieldReinit jump b
-
-    stepOuter gst (SplitOnSeqWordInit0 fs st) = do
-        res <- step (adaptState gst) st
-        case res of
-            Yield x s ->
-                let wrd1 = addToWord 0 x
-                 in pure $ Skip $ SplitOnSeqWordInit 1 wrd1 fs s
-            Skip s -> pure $ Skip $ SplitOnSeqWordInit0 fs s
-            Stop -> final fs >> pure Stop
-
-    stepOuter gst (SplitOnSeqWordInit idx0 wrd0 fs st0) =
-        go SPEC idx0 wrd0 st0
-
-        where
-
-        {-# INLINE go #-}
-        go !_ !idx !wrd !st = do
-            res <- step (adaptState gst) st
-            case res of
-                Yield x s -> do
-                    let wrd1 = addToWord wrd x
-                    if idx == maxIndex
-                    then do
-                        if wrd1 .&. wordMask == wordPat
-                        then do
-                            let jump c = SplitOnSeqWordInit 0 0 c s
-                            final fs >>= yieldReinit jump
-                        else skip $ SplitOnSeqWordLoop wrd1 s fs
-                    else go SPEC (idx + 1) wrd1 s
-                Skip s -> go SPEC idx wrd s
-                Stop -> do
-                    if idx /= 0
-                    then skip $ SplitOnSeqWordDone idx fs wrd
-                    else do
-                        r <- final fs
-                        skip $ SplitOnSeqYield r SplitOnSeqDone
-
-    stepOuter gst (SplitOnSeqWordLoop wrd0 st0 fs0) =
-        go SPEC wrd0 st0 fs0
-
-        where
-
-        -- This loop does not affect allocations but it improves the CPU
-        -- performance signifcantly compared to looping using state.
-        {-# INLINE go #-}
-        go !_ !wrd !st !fs = do
-            res <- step (adaptState gst) st
-            case res of
-                Yield x s -> do
-                    let jump c = SplitOnSeqWordInit 0 0 c s
-                        wrd1 = addToWord wrd x
-                        old = (wordMask .&. wrd)
-                                `shiftR` (elemBits * (patLen - 1))
-                    r <- fstep fs (toEnum $ fromIntegral old)
-                    case r of
-                        FL.Partial fs1 -> do
-                            if wrd1 .&. wordMask == wordPat
-                            then final fs1 >>= yieldReinit jump
-                            else go SPEC wrd1 s fs1
-                        FL.Done b -> yieldReinit jump b
-                Skip s -> go SPEC wrd s fs
-                Stop -> skip $ SplitOnSeqWordDone patLen fs wrd
-
-    -------------------------------
-    -- General Pattern - Karp Rabin
-    -------------------------------
-
-    -- XXX Document this pattern for writing efficient code. Loop around only
-    -- required elements in the recursive loop, build the structures being
-    -- manipulated locally e.g. we are passing only mba, here and build an
-    -- array using patLen and arrStart from the surrounding context.
-
-    stepOuter gst (SplitOnSeqKRInit0 offset fs st mba) = do
-        res <- step (adaptState gst) st
-        case res of
-            Yield x s -> do
-                liftIO $ pokeAt offset mba x
-                skip $ SplitOnSeqKRInit (offset + SIZE_OF(a)) fs s mba
-            Skip s -> skip $ SplitOnSeqKRInit0 offset fs s mba
-            Stop -> final fs >> pure Stop
-
-    stepOuter gst (SplitOnSeqKRInit offset fs st mba) = do
-        res <- step (adaptState gst) st
-        case res of
-            Yield x s -> do
-                liftIO $ pokeAt offset mba x
-                if offset == maxOffset
-                then do
-                    let arr :: Array a = Array
-                                { arrContents = mba
-                                , arrStart = 0
-                                , arrEnd = patBytes
-                                }
-                    let ringHash = A.foldl' addCksum 0 arr
-                    if ringHash == patHash && A.byteEq arr patArr
-                    then skip $ SplitOnSeqKRCheck fs s mba 0
-                    else skip $ SplitOnSeqKRLoop fs s mba 0 ringHash
-                else skip $ SplitOnSeqKRInit (offset + SIZE_OF(a)) fs s mba
-            Skip s -> skip $ SplitOnSeqKRInit offset fs s mba
-            Stop -> do
-                let rb = RingArray
-                        { ringContents = mba
-                        , ringSize = offset
-                        , ringHead = 0
-                        }
-                skip $ SplitOnSeqKRDone offset fs rb
-
-    -- XXX The recursive "go" is more efficient than the state based recursion
-    -- code commented out below. Perhaps its more efficient because of
-    -- factoring out "mba" outside the loop.
-    --
-    stepOuter gst (SplitOnSeqKRLoop fs0 st0 mba rh0 cksum0) =
-        go SPEC fs0 st0 rh0 cksum0
-
-        where
-
-        go !_ !fs !st !rh !cksum = do
-            res <- step (adaptState gst) st
-            let rb = RingArray
-                    { ringContents = mba
-                    , ringSize = patBytes
-                    , ringHead = rh
-                    }
-            case res of
-                Yield x s -> do
-                    (rb1, old) <- liftIO (RB.replace rb x)
-                    r <- fstep fs old
-                    case r of
-                        FL.Partial fs1 -> do
-                            let cksum1 = deltaCksum cksum old x
-                            let rh1 = ringHead rb1
-                            if cksum1 == patHash
-                            then skip $ SplitOnSeqKRCheck fs1 s mba rh1
-                            else go SPEC fs1 s rh1 cksum1
-                        FL.Done b -> do
-                            -- XXX the old code looks wrong as we are resetting
-                            -- the ring head but the ring still has old
-                            -- elements as we are not resetting the size.
-                            let jump c = SplitOnSeqKRInit 0 c s mba
-                            yieldReinit jump b
-                Skip s -> go SPEC fs s rh cksum
-                Stop -> skip $ SplitOnSeqKRDone patBytes fs rb
-
-    -- XXX The following code is 5 times slower compared to the recursive loop
-    -- based code above. Need to investigate why. One possibility is that the
-    -- go loop above does not thread around the ring buffer (rb). This code may
-    -- be causing the state to bloat and getting allocated on each iteration.
-    -- We can check the cmm/asm code to confirm.  If so a good GHC solution to
-    -- such problem is needed. One way to avoid this could be to use unboxed
-    -- mutable state?
-    {-
-    stepOuter gst (SplitOnSeqKRLoop fs st rb rh cksum) = do
-            res <- step (adaptState gst) st
-            case res of
-                Yield x s -> do
-                    old <- liftIO $ peek rh
-                    let cksum1 = deltaCksum cksum old x
-                    fs1 <- fstep fs old
-                    if (cksum1 == patHash)
-                    then do
-                        r <- done fs1
-                        skip $ SplitOnSeqYield r $ SplitOnSeqKRInit 0 s rb rh
-                    else do
-                        rh1 <- liftIO (RB.unsafeInsert rb rh x)
-                        skip $ SplitOnSeqKRLoop fs1 s rb rh1 cksum1
-                Skip s -> skip $ SplitOnSeqKRLoop fs s rb rh cksum
-                Stop -> skip $ SplitOnSeqKRDone patLen fs rb rh
-    -}
-
-    stepOuter _ (SplitOnSeqKRCheck fs st mba rh) = do
-        let rb = RingArray
-                    { ringContents = mba
-                    , ringSize = patBytes
-                    , ringHead = rh
-                    }
-        res <- liftIO $ RB.eqArray rb patArr
-        if res
-        then do
-            r <- final fs
-            let jump c = SplitOnSeqKRInit 0 c st mba
-            yieldReinit jump r
-        else skip $ SplitOnSeqKRLoop fs st mba rh patHash
-
-    stepOuter _ (SplitOnSeqKRDone 0 fs _) = do
-        r <- final fs
-        skip $ SplitOnSeqYield r SplitOnSeqDone
-    stepOuter _ (SplitOnSeqKRDone len fs rb) = do
-        assert (len >= 0) (return ())
-        old <- RB.unsafeGetHead rb
-        let rb1 = RB.moveForward rb
-        r <- fstep fs old
-        case r of
-            FL.Partial fs1 -> skip $ SplitOnSeqKRDone (len - SIZE_OF(a)) fs1 rb1
-            FL.Done b -> do
-                 let jump c = SplitOnSeqKRDone (len - SIZE_OF(a)) c rb1
-                 yieldReinit jump b
-
-RENAME(splitOnSeq,splitSepBySeq_)
-
-{-# ANN type SplitOnSuffixSeqState Fuse #-}
-data SplitOnSuffixSeqState mba rb rh ck w fs s b x =
-      SplitOnSuffixSeqInit
-    | SplitOnSuffixSeqYield b (SplitOnSuffixSeqState mba rb rh ck w fs s b x)
-    | SplitOnSuffixSeqDone
-
-    | SplitOnSuffixSeqEmpty !fs s
-
-    | SplitOnSuffixSeqSingleInit !fs s x
-    | SplitOnSuffixSeqSingle !fs s x
-
-    | SplitOnSuffixSeqWordInit !fs s
-    | SplitOnSuffixSeqWordLoop !w s !fs
-    | SplitOnSuffixSeqWordDone Int !fs !w
-
-    | SplitOnSuffixSeqKRInit !fs s mba
-    | SplitOnSuffixSeqKRInit1 !fs s mba
-    | SplitOnSuffixSeqKRLoop fs s mba !rh !ck
-    | SplitOnSuffixSeqKRCheck fs s mba !rh
-    | SplitOnSuffixSeqKRDone Int !fs rb
-
-    | SplitOnSuffixSeqReinit
-          (fs -> SplitOnSuffixSeqState mba rb rh ck w fs s b x)
-
--- | @splitOnSuffixSeq withSep pat fld input@ splits the input using @pat@ as a
--- suffixed separator, the resulting split segments are fed to the fold @fld@.
--- If @withSep@ is True then the separator sequence is also suffixed with the
--- split segments.
---
--- /Internal/
-{-# INLINE_NORMAL splitOnSuffixSeq #-}
-splitOnSuffixSeq
-    :: forall m a b. (MonadIO m, Unbox a, Enum a, Eq a)
-    => Bool
-    -> Array a
-    -> Fold m a b
-    -> Stream m a
-    -> Stream m b
-splitOnSuffixSeq withSep patArr (Fold fstep initial _ final) (Stream step state) =
-    Stream stepOuter SplitOnSuffixSeqInit
-
-    where
-
-    patLen = A.length patArr
-    patBytes = A.byteLength patArr
-    maxIndex = patLen - 1
-    maxOffset = patBytes - SIZE_OF(a)
-    elemBits = SIZE_OF(a) * 8
-
-    -- For word pattern case
-    wordMask :: Word
-    wordMask = (1 `shiftL` (elemBits * patLen)) - 1
-
-    elemMask :: Word
-    elemMask = (1 `shiftL` elemBits) - 1
-
-    wordPat :: Word
-    wordPat = wordMask .&. A.foldl' addToWord 0 patArr
-
-    addToWord wd a = (wd `shiftL` elemBits) .|. fromIntegral (fromEnum a)
-
-    nextAfterInit nextGen stepRes =
-        case stepRes of
-            FL.Partial s -> nextGen s
-            FL.Done b ->
-                SplitOnSuffixSeqYield b (SplitOnSuffixSeqReinit nextGen)
-
-    {-# INLINE yieldReinit #-}
-    yieldReinit nextGen fs =
-        initial >>= skip . SplitOnSuffixSeqYield fs . nextAfterInit nextGen
-
-    -- For single element pattern case
-    {-# INLINE processYieldSingle #-}
-    processYieldSingle pat x s fs = do
-        let jump c = SplitOnSuffixSeqSingleInit c s pat
-        if pat == x
-        then do
-            r <- if withSep then fstep fs x else return $ FL.Partial fs
-            b1 <-
-                case r of
-                    FL.Partial fs1 -> final fs1
-                    FL.Done b -> return b
-            yieldReinit jump b1
-        else do
-            r <- fstep fs x
-            case r of
-                FL.Partial fs1 -> skip $ SplitOnSuffixSeqSingle fs1 s pat
-                FL.Done b -> yieldReinit jump b
-
-    -- For Rabin-Karp search
-    k = 2891336453 :: Word32
-    coeff = k ^ patLen
-
-    addCksum cksum a = cksum * k + fromIntegral (fromEnum a)
-
-    deltaCksum cksum old new =
-        addCksum cksum new - coeff * fromIntegral (fromEnum old)
-
-    -- XXX shall we use a random starting hash or 1 instead of 0?
-    patHash = A.foldl' addCksum 0 patArr
-
-    skip = return . Skip
-
-    {-# INLINE_LATE stepOuter #-}
-    stepOuter _ SplitOnSuffixSeqInit = do
-        res <- initial
-        case res of
-            FL.Partial fs
-                | patLen == 0 ->
-                    skip $ SplitOnSuffixSeqEmpty fs state
-                | patLen == 1 -> do
-                    pat <- liftIO $ A.unsafeGetIndexIO 0 patArr
-                    skip $ SplitOnSuffixSeqSingleInit fs state pat
-                | SIZE_OF(a) * patLen <= sizeOf (Proxy :: Proxy Word) ->
-                    skip $ SplitOnSuffixSeqWordInit fs state
-                | otherwise -> do
-                    (MutArray mba _ _ _) :: MutArray a <-
-                        liftIO $ MutArray.emptyOf patLen
-                    skip $ SplitOnSuffixSeqKRInit fs state mba
-            FL.Done fb -> skip $ SplitOnSuffixSeqYield fb SplitOnSuffixSeqInit
-
-    stepOuter _ (SplitOnSuffixSeqYield x next) = return $ Yield x next
-
-    ---------------------------
-    -- Reinit
-    ---------------------------
-
-    stepOuter _ (SplitOnSuffixSeqReinit nextGen) =
-        initial >>= skip . nextAfterInit nextGen
-
-    ---------------------------
-    -- Empty pattern
-    ---------------------------
-
-    stepOuter gst (SplitOnSuffixSeqEmpty acc st) = do
-        res <- step (adaptState gst) st
-        case res of
-            Yield x s -> do
-                let jump c = SplitOnSuffixSeqEmpty c s
-                r <- fstep acc x
-                b1 <-
-                    case r of
-                        FL.Partial fs -> final fs
-                        FL.Done b -> return b
-                yieldReinit jump b1
-            Skip s -> skip (SplitOnSuffixSeqEmpty acc s)
-            Stop -> final acc >> return Stop
-
-    -----------------
-    -- Done
-    -----------------
-
-    stepOuter _ SplitOnSuffixSeqDone = return Stop
-
-    -----------------
-    -- Single Pattern
-    -----------------
-
-    stepOuter gst (SplitOnSuffixSeqSingleInit fs st pat) = do
-        res <- step (adaptState gst) st
-        case res of
-            Yield x s -> processYieldSingle pat x s fs
-            Skip s -> skip $ SplitOnSuffixSeqSingleInit fs s pat
-            Stop -> final fs >> return Stop
-
-    stepOuter gst (SplitOnSuffixSeqSingle fs st pat) = do
-        res <- step (adaptState gst) st
-        case res of
-            Yield x s -> processYieldSingle pat x s fs
-            Skip s -> skip $ SplitOnSuffixSeqSingle fs s pat
-            Stop -> do
-                r <- final fs
-                skip $ SplitOnSuffixSeqYield r SplitOnSuffixSeqDone
-
-    ---------------------------
-    -- Short Pattern - Shift Or
-    ---------------------------
-
-    stepOuter _ (SplitOnSuffixSeqWordDone 0 fs _) = do
-        r <- final fs
-        skip $ SplitOnSuffixSeqYield r SplitOnSuffixSeqDone
-    stepOuter _ (SplitOnSuffixSeqWordDone n fs wrd) = do
-        let old = elemMask .&. (wrd `shiftR` (elemBits * (n - 1)))
-        r <- fstep fs (toEnum $ fromIntegral old)
-        case r of
-            FL.Partial fs1 -> skip $ SplitOnSuffixSeqWordDone (n - 1) fs1 wrd
-            FL.Done b -> do
-                let jump c = SplitOnSuffixSeqWordDone (n - 1) c wrd
-                yieldReinit jump b
-
-    stepOuter gst (SplitOnSuffixSeqWordInit fs0 st0) = do
-        res <- step (adaptState gst) st0
-        case res of
-            Yield x s -> do
-                let wrd = addToWord 0 x
-                r <- if withSep then fstep fs0 x else return $ FL.Partial fs0
-                case r of
-                    FL.Partial fs1 -> go SPEC 1 wrd s fs1
-                    FL.Done b -> do
-                        let jump c = SplitOnSuffixSeqWordInit c s
-                        yieldReinit jump b
-            Skip s -> skip (SplitOnSuffixSeqWordInit fs0 s)
-            Stop -> final fs0 >> return Stop
-
-        where
-
-        {-# INLINE go #-}
-        go !_ !idx !wrd !st !fs = do
-            res <- step (adaptState gst) st
-            case res of
-                Yield x s -> do
-                    let jump c = SplitOnSuffixSeqWordInit c s
-                    let wrd1 = addToWord wrd x
-                    r <- if withSep then fstep fs x else return $ FL.Partial fs
-                    case r of
-                        FL.Partial fs1
-                            | idx /= maxIndex ->
-                                go SPEC (idx + 1) wrd1 s fs1
-                            | wrd1 .&. wordMask /= wordPat ->
-                                skip $ SplitOnSuffixSeqWordLoop wrd1 s fs1
-                            | otherwise ->
-                                final fs1 >>= yieldReinit jump
-                        FL.Done b -> yieldReinit jump b
-                Skip s -> go SPEC idx wrd s fs
-                Stop ->
-                    if withSep
-                    then do
-                        r <- final fs
-                        skip $ SplitOnSuffixSeqYield r SplitOnSuffixSeqDone
-                    else skip $ SplitOnSuffixSeqWordDone idx fs wrd
-
-    stepOuter gst (SplitOnSuffixSeqWordLoop wrd0 st0 fs0) =
-        go SPEC wrd0 st0 fs0
-
-        where
-
-        {-# INLINE go #-}
-        go !_ !wrd !st !fs = do
-            res <- step (adaptState gst) st
-            case res of
-                Yield x s -> do
-                    let jump c = SplitOnSuffixSeqWordInit c s
-                        wrd1 = addToWord wrd x
-                        old = (wordMask .&. wrd)
-                                `shiftR` (elemBits * (patLen - 1))
-                    r <-
-                        if withSep
-                        then fstep fs x
-                        else fstep fs (toEnum $ fromIntegral old)
-                    case r of
-                        FL.Partial fs1 ->
-                            if wrd1 .&. wordMask == wordPat
-                            then final fs1 >>= yieldReinit jump
-                            else go SPEC wrd1 s fs1
-                        FL.Done b -> yieldReinit jump b
-                Skip s -> go SPEC wrd s fs
-                Stop ->
-                    if withSep
-                    then do
-                        r <- final fs
-                        skip $ SplitOnSuffixSeqYield r SplitOnSuffixSeqDone
-                    else skip $ SplitOnSuffixSeqWordDone patLen fs wrd
-
-    -------------------------------
-    -- General Pattern - Karp Rabin
-    -------------------------------
-
-    stepOuter gst (SplitOnSuffixSeqKRInit fs st0 mba) = do
-        res <- step (adaptState gst) st0
-        case res of
-            Yield x s -> do
-                liftIO $ pokeAt 0 mba x
-                r <- if withSep then fstep fs x else return $ FL.Partial fs
-                case r of
-                    FL.Partial fs1 ->
-                        skip $ SplitOnSuffixSeqKRInit1 fs1 s mba
-                    FL.Done b -> do
-                        let jump c = SplitOnSuffixSeqKRInit c s mba
-                        yieldReinit jump b
-            Skip s -> skip $ SplitOnSuffixSeqKRInit fs s mba
-            Stop -> final fs >> return Stop
-
-    stepOuter gst (SplitOnSuffixSeqKRInit1 fs0 st0 mba) = do
-        go SPEC (SIZE_OF(a)) st0 fs0
-
-        where
-
-        go !_ !offset st !fs = do
-            res <- step (adaptState gst) st
-            let arr :: Array a = Array
-                        { arrContents = mba
-                        , arrStart = 0
-                        , arrEnd = patBytes
-                        }
-            case res of
-                Yield x s -> do
-                    liftIO $ pokeAt offset mba x
-                    r <- if withSep then fstep fs x else return $ FL.Partial fs
-                    let ringHash = A.foldl' addCksum 0 arr
-                    case r of
-                        FL.Partial fs1
-                            | offset /= maxOffset ->
-                                go SPEC (offset + SIZE_OF(a)) s fs1
-                            | ringHash == patHash ->
-                                skip $ SplitOnSuffixSeqKRCheck fs1 s mba 0
-                            | otherwise ->
-                                skip $ SplitOnSuffixSeqKRLoop
-                                    fs1 s mba 0 ringHash
-                        FL.Done b -> do
-                            let jump c = SplitOnSuffixSeqKRInit c s mba
-                            yieldReinit jump b
-                Skip s -> go SPEC offset s fs
-                Stop -> do
-                    -- do not issue a blank segment when we end at pattern
-                    if offset == maxOffset && A.byteEq arr patArr
-                    then final fs >> return Stop
-                    else if withSep
-                    then do
-                        r <- final fs
-                        skip $ SplitOnSuffixSeqYield r SplitOnSuffixSeqDone
-                    else do
-                        let rb = RingArray
-                                { ringContents = mba
-                                , ringSize = offset
-                                , ringHead = 0
-                                }
-                         in skip $ SplitOnSuffixSeqKRDone offset fs rb
-
-    stepOuter gst (SplitOnSuffixSeqKRLoop fs0 st0 mba rh0 cksum0) =
-        go SPEC fs0 st0 rh0 cksum0
-
-        where
-
-        go !_ !fs !st !rh !cksum = do
-            res <- step (adaptState gst) st
-            let rb = RingArray
-                    { ringContents = mba
-                    , ringSize = patBytes
-                    , ringHead = rh
-                    }
-            case res of
-                Yield x s -> do
-                    (rb1, old) <- liftIO (RB.replace rb x)
-                    let cksum1 = deltaCksum cksum old x
-                    let rh1 = ringHead rb1
-                    r <- if withSep then fstep fs x else fstep fs old
-                    case r of
-                        FL.Partial fs1 ->
-                            if cksum1 /= patHash
-                            then go SPEC fs1 s rh1 cksum1
-                            else skip $ SplitOnSuffixSeqKRCheck fs1 s mba rh1
-                        FL.Done b -> do
-                            let jump c = SplitOnSuffixSeqKRInit c s mba
-                            yieldReinit jump b
-                Skip s -> go SPEC fs s rh cksum
-                Stop -> do
-                    if withSep
-                    then do
-                        r <- final fs
-                        skip $ SplitOnSuffixSeqYield r SplitOnSuffixSeqDone
-                    else skip $ SplitOnSuffixSeqKRDone patBytes fs rb
-
-    stepOuter _ (SplitOnSuffixSeqKRCheck fs st mba rh) = do
-        let rb = RingArray
-                    { ringContents = mba
-                    , ringSize = patBytes
-                    , ringHead = rh
-                    }
-        matches <- liftIO $ RB.eqArray rb patArr
-        if matches
-        then do
-            r <- final fs
-            let jump c = SplitOnSuffixSeqKRInit c st mba
-            yieldReinit jump r
-        else skip $ SplitOnSuffixSeqKRLoop fs st mba rh patHash
-
-    stepOuter _ (SplitOnSuffixSeqKRDone 0 fs _) = do
-        r <- final fs
-        skip $ SplitOnSuffixSeqYield r SplitOnSuffixSeqDone
-    stepOuter _ (SplitOnSuffixSeqKRDone len fs rb) = do
-        assert (len >= 0) (return ())
-        old <- RB.unsafeGetHead rb
-        let rb1 = RB.moveForward rb
-        r <- fstep fs old
-        case r of
-            FL.Partial fs1 ->
-                skip $ SplitOnSuffixSeqKRDone (len - SIZE_OF(a)) fs1 rb1
-            FL.Done b -> do
-                let jump c = SplitOnSuffixSeqKRDone (len - SIZE_OF(a)) c rb1
-                yieldReinit jump b
-
--- | Parses a sequence of tokens suffixed by a separator e.g. @a;b;c;@ is
--- parsed as @a;@, @b;@, @c;@. If the pattern is empty the input stream is
--- returned as it is.
---
--- Equivalent to the following:
---
--- >>> splitEndBySeq pat f = Stream.foldMany (Fold.takeEndBySeq pat f)
---
--- Usage:
---
--- >>> f p = Stream.splitEndBySeq (Array.fromList p) Fold.toList
--- >>> splitEndBy p xs = Stream.fold Fold.toList $ f p (Stream.fromList xs)
---
--- >>> splitEndBy "" ""
--- []
---
--- >>> splitEndBy "" "a...b"
--- ["a",".",".",".","b"]
---
--- >>> splitEndBy ".." ""
--- []
---
---
--- >>> splitEndBy ".." "a...b"
--- ["a..",".b"]
---
---
--- >>> splitEndBy ".." "abc"
--- ["abc"]
---
--- >>> splitEndBy ".." ".."
--- [".."]
---
--- >>> splitEndBy "." ".a"
--- [".","a"]
---
--- >>> splitEndBy "." "a."
--- ["a."]
---
--- Uses Rabin-Karp algorithm for substring search.
---
-{-# INLINE_NORMAL splitEndBySeq #-}
-splitEndBySeq
-    :: forall m a b. (MonadIO m, Unbox a, Enum a, Eq a)
-    => Array a
-    -> Fold m a b
-    -> Stream m a
-    -> Stream m b
-splitEndBySeq = splitOnSuffixSeq True
-
--- | Like 'splitEndBySeq' but drops the separators and returns only the tokens.
---
--- Equivalent to the following:
---
--- >>> splitEndBySeq_ pat f = Stream.foldMany (Fold.takeEndBySeq_ pat f)
---
--- Usage:
---
--- >>> f p = Stream.splitEndBySeq_ (Array.fromList p) Fold.toList
--- >>> splitEndBy_ p xs = Stream.fold Fold.toList $ f p (Stream.fromList xs)
---
--- >>> splitEndBy_ "" ""
--- []
---
--- >>> splitEndBy_ "" "a...b"
--- ["a",".",".",".","b"]
---
--- >>> splitEndBy_ ".." ""
--- []
---
--- >>> splitEndBy_ ".." "a...b"
--- ["a",".b"]
---
--- >>> splitEndBy_ ".." "abc"
--- ["abc"]
---
--- >>> splitEndBy_ ".." ".."
--- [""]
---
--- >>> splitEndBy_ "." ".a"
--- ["","a"]
---
--- >>> splitEndBy_ "." "a."
--- ["a"]
---
--- Uses Rabin-Karp algorithm for substring search.
---
-{-# INLINE_NORMAL splitEndBySeq_ #-}
-splitEndBySeq_
-    :: forall m a b. (MonadIO m, Unbox a, Enum a, Eq a)
-    => Array a
-    -> Fold m a b
-    -> Stream m a
-    -> Stream m b
-splitEndBySeq_ = splitOnSuffixSeq False
-
--- Implement this as a fold or a parser instead.
--- This can be implemented easily using Rabin Karp
-
--- | Split post any one of the given patterns.
---
--- /Unimplemented/
-{-# INLINE splitEndBySeqOneOf #-}
-splitEndBySeqOneOf :: -- (Monad m, Unboxed a, Integral a) =>
-    [Array a] -> Fold m a b -> Stream m a -> Stream m b
-splitEndBySeqOneOf _subseq _f _m = undefined
-
--- | Split on a prefixed separator element, dropping the separator.  The
--- supplied 'Fold' is applied on the split segments.
---
--- @
--- > splitOnPrefix' p xs = Stream.toList $ Stream.splitOnPrefix p (Fold.toList) (Stream.fromList xs)
--- > splitOnPrefix' (== '.') ".a.b"
--- ["a","b"]
--- @
---
--- An empty stream results in an empty output stream:
--- @
--- > splitOnPrefix' (== '.') ""
--- []
--- @
---
--- An empty segment consisting of only a prefix is folded to the default output
--- of the fold:
---
--- @
--- > splitOnPrefix' (== '.') "."
--- [""]
---
--- > splitOnPrefix' (== '.') ".a.b."
--- ["a","b",""]
---
--- > splitOnPrefix' (== '.') ".a..b"
--- ["a","","b"]
---
--- @
---
--- A prefix is optional at the beginning of the stream:
---
--- @
--- > splitOnPrefix' (== '.') "a"
--- ["a"]
---
--- > splitOnPrefix' (== '.') "a.b"
--- ["a","b"]
--- @
---
--- 'splitOnPrefix' is an inverse of 'intercalatePrefix' with a single element:
---
--- > Stream.intercalatePrefix (Stream.fromPure '.') Unfold.fromList . Stream.splitOnPrefix (== '.') Fold.toList === id
---
--- Assuming the input stream does not contain the separator:
---
--- > Stream.splitOnPrefix (== '.') Fold.toList . Stream.intercalatePrefix (Stream.fromPure '.') Unfold.fromList === id
---
--- /Unimplemented/
-{-# INLINE splitBeginBy_ #-}
-splitBeginBy_ :: -- (MonadCatch m) =>
-    (a -> Bool) -> Fold m a b -> Stream m a -> Stream m b
-splitBeginBy_ _predicate _f = undefined
-    -- parseMany (Parser.sliceBeginBy predicate f)
-
--- Int list examples for splitOn:
---
--- >>> splitList [] [1,2,3,3,4]
--- > [[1],[2],[3],[3],[4]]
---
--- >>> splitList [5] [1,2,3,3,4]
--- > [[1,2,3,3,4]]
---
--- >>> splitList [1] [1,2,3,3,4]
--- > [[],[2,3,3,4]]
---
--- >>> splitList [4] [1,2,3,3,4]
--- > [[1,2,3,3],[]]
---
--- >>> splitList [2] [1,2,3,3,4]
--- > [[1],[3,3,4]]
---
--- >>> splitList [3] [1,2,3,3,4]
--- > [[1,2],[],[4]]
---
--- >>> splitList [3,3] [1,2,3,3,4]
--- > [[1,2],[4]]
---
--- >>> splitList [1,2,3,3,4] [1,2,3,3,4]
--- > [[],[]]
-
--- This can be implemented easily using Rabin Karp
--- | Split on any one of the given patterns.
---
--- /Unimplemented/
---
-{-# INLINE splitSepBySeqOneOf #-}
-splitSepBySeqOneOf :: -- (Monad m, Unboxed a, Integral a) =>
-    [Array a] -> Fold m a b -> Stream m a -> Stream m b
-splitSepBySeqOneOf _subseq _f _m =
-    undefined -- D.fromStreamD $ D.splitOnAny f subseq (D.toStreamD m)
-
-------------------------------------------------------------------------------
--- Nested Container Transformation
-------------------------------------------------------------------------------
-
-{-# ANN type SplitState Fuse #-}
-data SplitState s arr
-    = SplitInitial s
-    | SplitBuffering s arr
-    | SplitSplitting s arr
-    | SplitYielding arr (SplitState s arr)
-    | SplitFinishing
-
--- XXX An alternative approach would be to use a partial fold (Fold m a b) to
--- split using a splitBy like combinator. The Fold would consume upto the
--- separator and return any leftover which can then be fed to the next fold.
---
--- We can revisit this once we have partial folds/parsers.
---
--- | Performs infix separator style splitting.
-{-# INLINE_NORMAL splitInnerBy #-}
-splitInnerBy
-    :: Monad m
-    => (f a -> m (f a, Maybe (f a)))  -- splitter
-    -> (f a -> f a -> m (f a))        -- joiner
-    -> Stream m (f a)
-    -> Stream m (f a)
-splitInnerBy splitter joiner (Stream step1 state1) =
-    Stream step (SplitInitial state1)
-
-    where
-
-    {-# INLINE_LATE step #-}
-    step gst (SplitInitial st) = do
-        r <- step1 gst st
-        case r of
-            Yield x s -> do
-                (x1, mx2) <- splitter x
-                return $ case mx2 of
-                    Nothing -> Skip (SplitBuffering s x1)
-                    Just x2 -> Skip (SplitYielding x1 (SplitSplitting s x2))
-            Skip s -> return $ Skip (SplitInitial s)
-            Stop -> return Stop
-
-    step gst (SplitBuffering st buf) = do
-        r <- step1 gst st
-        case r of
-            Yield x s -> do
-                (x1, mx2) <- splitter x
-                buf' <- joiner buf x1
-                return $ case mx2 of
-                    Nothing -> Skip (SplitBuffering s buf')
-                    Just x2 -> Skip (SplitYielding buf' (SplitSplitting s x2))
-            Skip s -> return $ Skip (SplitBuffering s buf)
-            Stop -> return $ Skip (SplitYielding buf SplitFinishing)
-
-    step _ (SplitSplitting st buf) = do
-        (x1, mx2) <- splitter buf
-        return $ case mx2 of
-                Nothing -> Skip $ SplitBuffering st x1
-                Just x2 -> Skip $ SplitYielding x1 (SplitSplitting st x2)
-
-    step _ (SplitYielding x next) = return $ Yield x next
-    step _ SplitFinishing = return Stop
-
--- | Performs infix separator style splitting.
-{-# INLINE_NORMAL splitInnerBySuffix #-}
-splitInnerBySuffix
-    :: Monad m
-    => (f a -> Bool)                  -- isEmpty?
-    -> (f a -> m (f a, Maybe (f a)))  -- splitter
-    -> (f a -> f a -> m (f a))        -- joiner
-    -> Stream m (f a)
-    -> Stream m (f a)
-splitInnerBySuffix isEmpty splitter joiner (Stream step1 state1) =
-    Stream step (SplitInitial state1)
-
-    where
-
-    {-# INLINE_LATE step #-}
-    step gst (SplitInitial st) = do
-        r <- step1 gst st
-        case r of
-            Yield x s -> do
-                (x1, mx2) <- splitter x
-                return $ case mx2 of
-                    Nothing -> Skip (SplitBuffering s x1)
-                    Just x2 -> Skip (SplitYielding x1 (SplitSplitting s x2))
-            Skip s -> return $ Skip (SplitInitial s)
-            Stop -> return Stop
-
-    step gst (SplitBuffering st buf) = do
-        r <- step1 gst st
-        case r of
-            Yield x s -> do
-                (x1, mx2) <- splitter x
-                buf' <- joiner buf x1
-                return $ case mx2 of
-                    Nothing -> Skip (SplitBuffering s buf')
-                    Just x2 -> Skip (SplitYielding buf' (SplitSplitting s x2))
-            Skip s -> return $ Skip (SplitBuffering s buf)
-            Stop ->
-                return $
-                    if isEmpty buf
-                    then Stop
-                    else Skip (SplitYielding buf SplitFinishing)
-
-    step _ (SplitSplitting st buf) = do
-        (x1, mx2) <- splitter buf
-        return $ case mx2 of
-                Nothing -> Skip $ SplitBuffering st x1
-                Just x2 -> Skip $ SplitYielding x1 (SplitSplitting st x2)
-
-    step _ (SplitYielding x next) = return $ Yield x next
-    step _ SplitFinishing = return Stop
-
-------------------------------------------------------------------------------
--- Trimming
-------------------------------------------------------------------------------
-
--- | Drop prefix from the input stream if present.
---
--- Space: @O(1)@
---
--- See also stripPrefix.
---
--- /Unimplemented/
-{-# INLINE dropPrefix #-}
-dropPrefix ::
-    -- (Monad m, Eq a) =>
-    Stream m a -> Stream m a -> Stream m a
-dropPrefix = error "Not implemented yet!"
-
--- | Drop all matching infix from the input stream if present. Infix stream
--- may be consumed multiple times.
---
--- Space: @O(n)@ where n is the length of the infix.
---
--- See also stripInfix.
---
--- /Unimplemented/
-{-# INLINE dropInfix #-}
-dropInfix ::
-    -- (Monad m, Eq a) =>
-    Stream m a -> Stream m a -> Stream m a
-dropInfix = error "Not implemented yet!"
-
--- | Drop suffix from the input stream if present. Suffix stream may be
--- consumed multiple times.
---
--- Space: @O(n)@ where n is the length of the suffix.
---
--- See also stripSuffix.
---
--- /Unimplemented/
-{-# INLINE dropSuffix #-}
-dropSuffix ::
-    -- (Monad m, Eq a) =>
-    Stream m a -> Stream m a -> Stream m a
-dropSuffix = error "Not implemented yet!"
diff --git a/core/src/Streamly/Internal/Data/Stream/Parse.hs b/core/src/Streamly/Internal/Data/Stream/Parse.hs
new file mode 100644
index 0000000000..3cbfc7384e
--- /dev/null
+++ b/core/src/Streamly/Internal/Data/Stream/Parse.hs
@@ -0,0 +1,2179 @@
+{-# LANGUAGE CPP #-}
+-- |
+-- Module      : Streamly.Internal.Data.Stream.Parse
+-- Copyright   : (c) 2018 Composewell Technologies
+--               (c) Roman Leshchinskiy 2008-2010
+-- License     : BSD-3-Clause
+-- Maintainer  : streamly@composewell.com
+-- Stability   : experimental
+-- Portability : GHC
+--
+-- This module contains folding (eliminating) transformations involving
+-- multiple streams. folds. There are two types of transformations generational
+-- or eliminational. Eliminational transformations are like the "Eliminate"
+-- module but they transform a stream by eliminating parts of the stream
+-- instead of eliminating the whole stream.
+--
+-- These combinators involve transformation, generation, elimination so can be
+-- classified under any of those.
+
+-- Flipped versions can be named as:
+-- groupsFor :: stream -> fold -> stream (flipped groupsWhile)
+--
+-- Flipped versions for folds:
+-- foldMany :: outer fold -> inner fold -> fold (original version)
+-- groupFoldFor :: inner fold -> outer fold -> fold (flipped version)
+-- groupStepFor :: inner fold -> outer fold step -> fold (flipped version)
+-- This can be convenient for defining the outer fold step using a lambda.
+--
+module Streamly.Internal.Data.Stream.Parse
+    (
+    -- * Eliminate
+    -- | Folding and Parsing chunks of streams to eliminate nested streams.
+    -- Functions generally ending in these shapes:
+    --
+    -- @
+    -- f (Fold m a b) -> t m a -> t m b
+    -- f (Parser a m b) -> t m a -> t m b
+    -- @
+
+    -- ** Folding
+    -- | Apply folds on a stream.
+      foldSequence
+    , foldIterateM
+
+    -- ** Parsing
+    -- | Parsing is opposite to flattening. 'parseMany' is dual to concatMap or
+    -- unfoldEach concatMap generates a stream from single values in a
+    -- stream and flattens, parseMany does the opposite of flattening by
+    -- splitting the stream and then folds each such split to single value in
+    -- the output stream.
+    , parseMany
+    , parseManyPos
+    , parseSequence
+    , parseManyTill
+    , parseIterate
+    , parseIteratePos
+
+    -- ** Grouping
+    -- | Group segments of a stream and fold. Special case of parsing.
+    , groupsWhile
+    , groupsRollingBy
+
+    -- ** Splitting
+    -- | A special case of parsing.
+    , takeEndBySeq
+    , takeEndBySeq_
+    , wordsBy
+    , splitSepBySeq_
+    , splitEndBySeq
+    , splitEndBySeq_
+    , splitOnSuffixSeq -- internal
+
+    , splitBeginBy_
+    , splitEndBySeqOneOf
+    , splitSepBySeqOneOf
+
+    -- * Transform (Nested Containers)
+    -- | Opposite to compact in ArrayStream
+    , splitInnerBy -- XXX innerSplitOn
+    , splitInnerBySuffix -- XXX innerSplitOnSuffix
+
+    -- * Reduce By Streams
+    , dropPrefix
+    , dropInfix
+    , dropSuffix
+
+    -- * Deprecated
+    , parseManyD
+    , parseIterateD
+    , groupsBy
+    , splitOnSeq
+    )
+where
+
+#include "deprecation.h"
+#include "inline.hs"
+#include "ArrayMacros.h"
+
+import Control.Exception (assert)
+import Control.Monad.IO.Class (MonadIO(..))
+import Data.Bits (shiftR, shiftL, (.|.), (.&.))
+import Data.Proxy (Proxy(..))
+import Data.Word (Word32)
+import Fusion.Plugin.Types (Fuse(..))
+import GHC.Types (SPEC(..))
+
+import Streamly.Internal.Data.Array.Type (Array(..))
+import Streamly.Internal.Data.Fold.Type (Fold(..))
+import Streamly.Internal.Data.MutArray.Type (MutArray(..))
+import Streamly.Internal.Data.Parser (ParseError(..), ParseErrorPos)
+import Streamly.Internal.Data.RingArray (RingArray(..))
+import Streamly.Internal.Data.SVar.Type (adaptState)
+import Streamly.Internal.Data.Unbox (Unbox(..))
+
+import qualified Streamly.Internal.Data.Array.Type as A
+import qualified Streamly.Internal.Data.MutArray.Type as MutArray
+import qualified Streamly.Internal.Data.Fold as FL
+import qualified Streamly.Internal.Data.Parser as PR
+import qualified Streamly.Internal.Data.Parser as PRD
+import qualified Streamly.Internal.Data.ParserDrivers as Drivers
+import qualified Streamly.Internal.Data.RingArray as RB
+
+import Streamly.Internal.Data.Stream.Type
+
+#include "DocTestDataStream.hs"
+
+------------------------------------------------------------------------------
+-- Folding
+------------------------------------------------------------------------------
+
+-- | Apply a stream of folds to an input stream and emit the results in the
+-- output stream.
+--
+-- /Unimplemented/
+--
+{-# INLINE foldSequence #-}
+foldSequence
+       :: -- Monad m =>
+       Stream m (Fold m a b)
+    -> Stream m a
+    -> Stream m b
+foldSequence _f _m = undefined
+
+{-# ANN type FIterState Fuse #-}
+data FIterState s f m a b
+    = FIterInit s f
+    | forall fs. FIterStream s (fs -> a -> m (FL.Step fs b)) fs (fs -> m b)
+        (fs -> m b)
+    | FIterYield b (FIterState s f m a b)
+    | FIterStop
+
+-- | Iterate a fold generator on a stream. The initial value @b@ is used to
+-- generate the first fold, the fold is applied on the stream and the result of
+-- the fold is used to generate the next fold and so on.
+--
+-- Usage:
+--
+-- >>> import Data.Monoid (Sum(..))
+-- >>> f x = return (Fold.take 2 (Fold.sconcat x))
+-- >>> s = fmap Sum $ Stream.fromList [1..10]
+-- >>> Stream.fold Fold.toList $ fmap getSum $ Stream.foldIterateM f (pure 0) s
+-- [3,10,21,36,55,55]
+--
+-- This is the streaming equivalent of monad like sequenced application of
+-- folds where next fold is dependent on the previous fold.
+--
+-- /Pre-release/
+--
+{-# INLINE_NORMAL foldIterateM #-}
+foldIterateM ::
+       Monad m => (b -> m (FL.Fold m a b)) -> m b -> Stream m a -> Stream m b
+foldIterateM func seed0 (Stream step state) =
+    Stream stepOuter (FIterInit state seed0)
+
+    where
+
+    {-# INLINE iterStep #-}
+    iterStep from st fstep extract final = do
+        res <- from
+        return
+            $ Skip
+            $ case res of
+                  FL.Partial fs -> FIterStream st fstep fs extract final
+                  FL.Done fb -> FIterYield fb $ FIterInit st (return fb)
+
+    {-# INLINE_LATE stepOuter #-}
+    stepOuter _ (FIterInit st seed) = do
+        (FL.Fold fstep initial extract final) <- seed >>= func
+        iterStep initial st fstep extract final
+    stepOuter gst (FIterStream st fstep fs extract final) = do
+        r <- step (adaptState gst) st
+        case r of
+            Yield x s -> do
+                iterStep (fstep fs x) s fstep extract final
+            Skip s -> return $ Skip $ FIterStream s fstep fs extract final
+            Stop -> do
+                b <- final fs
+                return $ Skip $ FIterYield b FIterStop
+    stepOuter _ (FIterYield a next) = return $ Yield a next
+    stepOuter _ FIterStop = return Stop
+
+------------------------------------------------------------------------------
+-- Parsing
+------------------------------------------------------------------------------
+
+-- | Apply a 'Parser' repeatedly on a stream and emit the parsed values in the
+-- output stream.
+--
+-- Usage:
+--
+-- >>> s = Stream.fromList [1..10]
+-- >>> parser = Parser.takeBetween 0 2 Fold.sum
+-- >>> Stream.toList $ Stream.parseMany parser s
+-- [Right 3,Right 7,Right 11,Right 15,Right 19]
+--
+-- This is the streaming equivalent of the 'Streamly.Data.Parser.many' parse
+-- combinator.
+--
+-- Known Issues: When the parser fails there is no way to get the remaining
+-- stream.
+--
+{-# INLINE parseMany #-}
+parseMany
+    :: Monad m
+    => PRD.Parser a m b
+    -> Stream m a
+    -> Stream m (Either ParseError b)
+parseMany = Drivers.parseMany
+
+-- | Like 'parseMany' but includes stream position information in the error
+-- messages.
+--
+{-# INLINE parseManyPos #-}
+parseManyPos
+    :: Monad m
+    => PRD.Parser a m b
+    -> Stream m a
+    -> Stream m (Either ParseErrorPos b)
+parseManyPos = Drivers.parseManyPos
+
+{-# DEPRECATED parseManyD "Please use parseMany instead." #-}
+{-# INLINE parseManyD #-}
+parseManyD
+    :: Monad m
+    => PR.Parser a m b
+    -> Stream m a
+    -> Stream m (Either ParseError b)
+parseManyD = parseMany
+
+-- | Apply a stream of parsers to an input stream and emit the results in the
+-- output stream.
+--
+-- /Unimplemented/
+--
+{-# INLINE parseSequence #-}
+parseSequence
+       :: -- Monad m =>
+       Stream m (PR.Parser a m b)
+    -> Stream m a
+    -> Stream m b
+parseSequence _f _m = undefined
+
+-- XXX Change the parser arguments' order
+
+-- | @parseManyTill collect test stream@ tries the parser @test@ on the input,
+-- if @test@ fails it backtracks and tries @collect@, after @collect@ succeeds
+-- @test@ is tried again and so on. The parser stops when @test@ succeeds.  The
+-- output of @test@ is discarded and the output of @collect@ is emitted in the
+-- output stream. The parser fails if @collect@ fails.
+--
+-- /Unimplemented/
+--
+{-# INLINE parseManyTill #-}
+parseManyTill ::
+    -- MonadThrow m =>
+       PR.Parser a m b
+    -> PR.Parser a m x
+    -> Stream m a
+    -> Stream m b
+parseManyTill = undefined
+
+-- | Iterate a parser generating function on a stream. The initial value @b@ is
+-- used to generate the first parser, the parser is applied on the stream and
+-- the result is used to generate the next parser and so on.
+--
+-- Example:
+--
+-- >>> import Data.Monoid (Sum(..))
+-- >>> s = Stream.fromList [1..10]
+-- >>> Stream.toList $ fmap getSum $ Stream.catRights $ Stream.parseIterate (\b -> Parser.takeBetween 0 2 (Fold.sconcat b)) (Sum 0) $ fmap Sum s
+-- [3,10,21,36,55,55]
+--
+-- This is the streaming equivalent of monad like sequenced application of
+-- parsers where next parser is dependent on the previous parser.
+--
+-- /Pre-release/
+--
+{-# INLINE parseIterate #-}
+parseIterate
+    :: Monad m
+    => (b -> PRD.Parser a m b)
+    -> b
+    -> Stream m a
+    -> Stream m (Either ParseError b)
+parseIterate = Drivers.parseIterate
+
+-- | Like 'parseIterate' but includes stream position information in the error
+-- messages.
+--
+{-# INLINE parseIteratePos #-}
+parseIteratePos
+    :: Monad m
+    => (b -> PRD.Parser a m b)
+    -> b
+    -> Stream m a
+    -> Stream m (Either ParseErrorPos b)
+parseIteratePos = Drivers.parseIteratePos
+
+{-# DEPRECATED parseIterateD "Please use parseIterate instead." #-}
+{-# INLINE parseIterateD #-}
+parseIterateD
+    :: Monad m
+    => (b -> PR.Parser a m b)
+    -> b
+    -> Stream m a
+    -> Stream m (Either ParseError b)
+parseIterateD = parseIterate
+
+------------------------------------------------------------------------------
+-- Grouping
+------------------------------------------------------------------------------
+
+data GroupByState st fs a b
+    = GroupingInit st
+    | GroupingDo st !fs
+    | GroupingInitWith st !a
+    | GroupingDoWith st !fs !a
+    | GroupingYield !b (GroupByState st fs a b)
+    | GroupingDone
+
+-- | Keep collecting items in a group as long as the comparison function
+-- returns true. The comparison function is @cmp old new@ where @old@ is the
+-- first item in the group and @new@ is the incoming item being tested for
+-- membership of the group. The collected items are folded by the supplied
+-- fold.
+--
+-- Definition:
+--
+-- >>> groupsWhile cmp f = Stream.parseMany (Parser.groupBy cmp f)
+{-# INLINE_NORMAL groupsWhile #-}
+groupsWhile :: Monad m
+    => (a -> a -> Bool)
+    -> Fold m a b
+    -> Stream m a
+    -> Stream m b
+{-
+groupsWhile eq fld = parseMany (PRD.groupBy eq fld)
+-}
+groupsWhile cmp (Fold fstep initial _ final) (Stream step state) =
+    Stream stepOuter (GroupingInit state)
+
+    where
+
+    {-# INLINE_LATE stepOuter #-}
+    stepOuter _ (GroupingInit st) = do
+        -- XXX Note that if the stream stops without yielding a single element
+        -- in the group we discard the "initial" effect.
+        res <- initial
+        return
+            $ case res of
+                  FL.Partial s -> Skip $ GroupingDo st s
+                  FL.Done b -> Yield b $ GroupingInit st
+    stepOuter gst (GroupingDo st fs) = do
+        res <- step (adaptState gst) st
+        case res of
+            Yield x s -> do
+                r <- fstep fs x
+                case r of
+                    FL.Partial fs1 -> go SPEC x s fs1
+                    FL.Done b -> return $ Yield b (GroupingInit s)
+            Skip s -> return $ Skip $ GroupingDo s fs
+            Stop -> final fs >> return Stop
+
+        where
+
+        go !_ prev stt !acc = do
+            res <- step (adaptState gst) stt
+            case res of
+                Yield x s -> do
+                    if cmp prev x
+                    then do
+                        r <- fstep acc x
+                        case r of
+                            FL.Partial fs1 -> go SPEC prev s fs1
+                            FL.Done b -> return $ Yield b (GroupingInit s)
+                    else do
+                        r <- final acc
+                        return $ Yield r (GroupingInitWith s x)
+                Skip s -> go SPEC prev s acc
+                Stop -> do
+                    r <- final acc
+                    return $ Yield r GroupingDone
+    stepOuter _ (GroupingInitWith st x) = do
+        res <- initial
+        return
+            $ case res of
+                  FL.Partial s -> Skip $ GroupingDoWith st s x
+                  FL.Done b -> Yield b $ GroupingInitWith st x
+    stepOuter gst (GroupingDoWith st fs prev) = do
+        res <- fstep fs prev
+        case res of
+            FL.Partial fs1 -> go SPEC st fs1
+            FL.Done b -> return $ Yield b (GroupingInit st)
+
+        where
+
+        -- XXX code duplicated from the previous equation
+        go !_ stt !acc = do
+            res <- step (adaptState gst) stt
+            case res of
+                Yield x s -> do
+                    if cmp prev x
+                    then do
+                        r <- fstep acc x
+                        case r of
+                            FL.Partial fs1 -> go SPEC s fs1
+                            FL.Done b -> return $ Yield b (GroupingInit s)
+                    else do
+                        r <- final acc
+                        return $ Yield r (GroupingInitWith s x)
+                Skip s -> go SPEC s acc
+                Stop -> do
+                    r <- final acc
+                    return $ Yield r GroupingDone
+    stepOuter _ (GroupingYield _ _) = error "groupsWhile: Unreachable"
+    stepOuter _ GroupingDone = return Stop
+
+-- | The argument order of the comparison function in `groupsWhile` is
+-- different than that of `groupsBy`.
+--
+-- In `groupsBy` the comparison function takes the next element as the first
+-- argument and the previous element as the second argument. In `groupsWhile`
+-- the first argument is the previous element and second argument is the next
+-- element.
+{-# DEPRECATED groupsBy "Please use groupsWhile instead. Please note the change in the argument order of the comparison function." #-}
+{-# INLINE_NORMAL groupsBy #-}
+groupsBy :: Monad m
+    => (a -> a -> Bool)
+    -> Fold m a b
+    -> Stream m a
+    -> Stream m b
+groupsBy cmp = groupsWhile (flip cmp)
+
+-- |
+--
+-- Definition:
+--
+-- >>> groupsRollingBy cmp f = Stream.parseMany (Parser.groupByRolling cmp f)
+--
+{-# INLINE_NORMAL groupsRollingBy #-}
+groupsRollingBy :: Monad m
+    => (a -> a -> Bool)
+    -> Fold m a b
+    -> Stream m a
+    -> Stream m b
+{-
+groupsRollingBy eq fld = parseMany (PRD.groupByRolling eq fld)
+-}
+groupsRollingBy cmp (Fold fstep initial _ final) (Stream step state) =
+    Stream stepOuter (GroupingInit state)
+
+    where
+
+    {-# INLINE_LATE stepOuter #-}
+    stepOuter _ (GroupingInit st) = do
+        -- XXX Note that if the stream stops without yielding a single element
+        -- in the group we discard the "initial" effect.
+        res <- initial
+        return
+            $ case res of
+                  FL.Partial fs -> Skip $ GroupingDo st fs
+                  FL.Done fb -> Yield fb $ GroupingInit st
+    stepOuter gst (GroupingDo st fs) = do
+        res <- step (adaptState gst) st
+        case res of
+            Yield x s -> do
+                r <- fstep fs x
+                case r of
+                    FL.Partial fs1 -> go SPEC x s fs1
+                    FL.Done fb -> return $ Yield fb (GroupingInit s)
+            Skip s -> return $ Skip $ GroupingDo s fs
+            Stop -> final fs >> return Stop
+
+        where
+
+        go !_ prev stt !acc = do
+            res <- step (adaptState gst) stt
+            case res of
+                Yield x s -> do
+                    if cmp prev x
+                    then do
+                        r <- fstep acc x
+                        case r of
+                            FL.Partial fs1 -> go SPEC x s fs1
+                            FL.Done b -> return $ Yield b (GroupingInit s)
+                    else do
+                        r <- final acc
+                        return $ Yield r (GroupingInitWith s x)
+                Skip s -> go SPEC prev s acc
+                Stop -> do
+                    r <- final acc
+                    return $ Yield r GroupingDone
+    stepOuter _ (GroupingInitWith st x) = do
+        res <- initial
+        return
+            $ case res of
+                  FL.Partial s -> Skip $ GroupingDoWith st s x
+                  FL.Done b -> Yield b $ GroupingInitWith st x
+    stepOuter gst (GroupingDoWith st fs previous) = do
+        res <- fstep fs previous
+        case res of
+            FL.Partial s -> go SPEC previous st s
+            FL.Done b -> return $ Yield b (GroupingInit st)
+
+        where
+
+        -- XXX GHC: groupsWhile has one less parameter in this go loop and it
+        -- fuses. However, groupsRollingBy does not fuse, removing the prev
+        -- parameter makes it fuse. Something needs to be fixed in GHC. The
+        -- workaround for this is noted in the comments below.
+        go !_ prev !stt !acc = do
+            res <- step (adaptState gst) stt
+            case res of
+                Yield x s -> do
+                    if cmp prev x
+                    then do
+                        r <- fstep acc x
+                        case r of
+                            FL.Partial fs1 -> go SPEC x s fs1
+                            FL.Done b -> return $ Yield b (GroupingInit st)
+                    else do
+                        {-
+                        r <- final acc
+                        return $ Yield r (GroupingInitWith s x)
+                        -}
+                        -- The code above does not let groupBy fuse. We use the
+                        -- alternative code below instead.  Instead of jumping
+                        -- to GroupingInitWith state, we unroll the code of
+                        -- GroupingInitWith state here to help GHC with stream
+                        -- fusion.
+                        result <- initial
+                        r <- final acc
+                        return
+                            $ Yield r
+                            $ case result of
+                                  FL.Partial fsi -> GroupingDoWith s fsi x
+                                  FL.Done b -> GroupingYield b (GroupingInit s)
+                Skip s -> go SPEC prev s acc
+                Stop -> do
+                    r <- final acc
+                    return $ Yield r GroupingDone
+    stepOuter _ (GroupingYield r next) = return $ Yield r next
+    stepOuter _ GroupingDone = return Stop
+
+------------------------------------------------------------------------------
+-- Splitting - by a predicate
+------------------------------------------------------------------------------
+
+data WordsByState st fs b
+    = WordsByInit st
+    | WordsByDo st !fs
+    | WordsByDone
+    | WordsByYield !b (WordsByState st fs b)
+
+-- | Split the stream after stripping leading, trailing, and repeated
+-- separators determined by the predicate supplied. The tokens after splitting
+-- are collected by the supplied fold. In other words, the tokens are parsed in
+-- the same way as words are parsed from whitespace separated text.
+--
+-- >>> f x = Stream.toList $ Stream.wordsBy (== '.') Fold.toList $ Stream.fromList x
+-- >>> f "a.b"
+-- ["a","b"]
+-- >>> f "a..b"
+-- ["a","b"]
+-- >>> f ".a..b."
+-- ["a","b"]
+--
+{-# INLINE_NORMAL wordsBy #-}
+wordsBy :: Monad m => (a -> Bool) -> Fold m a b -> Stream m a -> Stream m b
+wordsBy predicate (Fold fstep initial _ final) (Stream step state) =
+    Stream stepOuter (WordsByInit state)
+
+    where
+
+    {-# INLINE_LATE stepOuter #-}
+    stepOuter _ (WordsByInit st) = do
+        res <- initial
+        return
+            $ case res of
+                  FL.Partial s -> Skip $ WordsByDo st s
+                  FL.Done b -> Yield b (WordsByInit st)
+
+    stepOuter gst (WordsByDo st fs) = do
+        res <- step (adaptState gst) st
+        case res of
+            Yield x s -> do
+                if predicate x
+                then do
+                    resi <- initial
+                    return
+                        $ case resi of
+                              FL.Partial fs1 -> Skip $ WordsByDo s fs1
+                              FL.Done b -> Yield b (WordsByInit s)
+                else do
+                    r <- fstep fs x
+                    case r of
+                        FL.Partial fs1 -> go SPEC s fs1
+                        FL.Done b -> return $ Yield b (WordsByInit s)
+            Skip s    -> return $ Skip $ WordsByDo s fs
+            Stop      -> final fs >> return Stop
+
+        where
+
+        go !_ stt !acc = do
+            res <- step (adaptState gst) stt
+            case res of
+                Yield x s -> do
+                    if predicate x
+                    then do
+                        {-
+                        r <- final acc
+                        return $ Yield r (WordsByInit s)
+                        -}
+                        -- The above code does not fuse well. Need to check why
+                        -- GHC is not able to simplify it well.  Using the code
+                        -- below, instead of jumping through the WordsByInit
+                        -- state always, we directly go to WordsByDo state in
+                        -- the common case of Partial.
+                        resi <- initial
+                        r <- final acc
+                        return
+                            $ Yield r
+                            $ case resi of
+                                  FL.Partial fs1 -> WordsByDo s fs1
+                                  FL.Done b -> WordsByYield b (WordsByInit s)
+                    else do
+                        r <- fstep acc x
+                        case r of
+                            FL.Partial fs1 -> go SPEC s fs1
+                            FL.Done b -> return $ Yield b (WordsByInit s)
+                Skip s -> go SPEC s acc
+                Stop -> do
+                    r <- final acc
+                    return $ Yield r WordsByDone
+
+    stepOuter _ WordsByDone = return Stop
+
+    stepOuter _ (WordsByYield b next) = return $ Yield b next
+
+------------------------------------------------------------------------------
+-- Splitting on a sequence
+------------------------------------------------------------------------------
+
+-- String search algorithms:
+-- http://www-igm.univ-mlv.fr/~lecroq/string/index.html
+
+-- XXX Can GHC find a way to modularise this? Can we write different cases
+-- i.e.g single element, word hash, karp-rabin as different functions and then
+-- be able to combine them into a single state machine?
+
+{-# ANN type TakeEndBySeqState Fuse #-}
+data TakeEndBySeqState mba rb rh ck w s b x =
+      TakeEndBySeqInit
+    | TakeEndBySeqYield !b (TakeEndBySeqState mba rb rh ck w s b x)
+    | TakeEndBySeqDone
+
+    | TakeEndBySeqSingle s x
+
+    | TakeEndBySeqWordInit !Int !w s
+    | TakeEndBySeqWordLoop !w s
+    | TakeEndBySeqWordDone !Int !w
+
+    | TakeEndBySeqKRInit s mba
+    | TakeEndBySeqKRInit1 s mba !Int
+    | TakeEndBySeqKRLoop s mba !rh !ck
+    | TakeEndBySeqKRCheck s mba !rh
+    | TakeEndBySeqKRDone !Int rb
+
+-- | If the pattern is empty the output stream is empty.
+{-# INLINE_NORMAL takeEndBySeqWith #-}
+takeEndBySeqWith
+    :: forall m a. (MonadIO m, Unbox a, Enum a, Eq a)
+    => Bool
+    -> Array a
+    -> Stream m a
+    -> Stream m a
+takeEndBySeqWith withSep patArr (Stream step state) =
+    Stream stepOuter TakeEndBySeqInit
+
+    where
+
+    patLen = A.length patArr
+    patBytes = A.byteLength patArr
+    maxIndex = patLen - 1
+    maxOffset = patBytes - SIZE_OF(a)
+    elemBits = SIZE_OF(a) * 8
+
+    -- For word pattern case
+    wordMask :: Word
+    wordMask = (1 `shiftL` (elemBits * patLen)) - 1
+
+    elemMask :: Word
+    elemMask = (1 `shiftL` elemBits) - 1
+
+    wordPat :: Word
+    wordPat = wordMask .&. A.foldl' addToWord 0 patArr
+
+    addToWord wd a = (wd `shiftL` elemBits) .|. fromIntegral (fromEnum a)
+
+    -- For Rabin-Karp search
+    k = 2891336453 :: Word32
+    coeff = k ^ patLen
+
+    addCksum cksum a = cksum * k + fromIntegral (fromEnum a)
+
+    deltaCksum cksum old new =
+        addCksum cksum new - coeff * fromIntegral (fromEnum old)
+
+    -- XXX shall we use a random starting hash or 1 instead of 0?
+    patHash = A.foldl' addCksum 0 patArr
+
+    skip = return . Skip
+
+    {-# INLINE yield #-}
+    yield x !s = skip $ TakeEndBySeqYield x s
+
+    {-# INLINE_LATE stepOuter #-}
+    stepOuter _ TakeEndBySeqInit = do
+        -- XXX When we statically specify the method compiler is able to
+        -- simplify the code better and removes the handling of other states.
+        -- When it is determined dynamically, the code is less efficient. For
+        -- example, the single element search degrades by 80% if the handling
+        -- of other cases is present. We need to investigate this further but
+        -- until then we can guide the compiler statically where we can. If we
+        -- want to use single element search statically then we can use
+        -- takeEndBy instead.
+        --
+        -- XXX Is there a way for GHC to statically determine patLen when we
+        -- use an array created from a static string as pattern e.g. "\n".
+        case () of
+            _ | patLen == 0 -> return Stop
+              | patLen == 1 -> do
+                    pat <- liftIO $ A.unsafeGetIndexIO 0 patArr
+                    return $ Skip $ TakeEndBySeqSingle state pat
+              | SIZE_OF(a) * patLen <= sizeOf (Proxy :: Proxy Word) ->
+                    return $ Skip $ TakeEndBySeqWordInit 0 0 state
+              | otherwise -> do
+                    (MutArray mba _ _ _) :: MutArray a <-
+                        liftIO $ MutArray.emptyOf patLen
+                    skip $ TakeEndBySeqKRInit state mba
+
+    ---------------------
+    -- Single yield point
+    ---------------------
+
+    stepOuter _ (TakeEndBySeqYield x next) = return $ Yield x next
+
+    -----------------
+    -- Done
+    -----------------
+
+    stepOuter _ TakeEndBySeqDone = return Stop
+
+    -----------------
+    -- Single Pattern
+    -----------------
+
+    stepOuter gst (TakeEndBySeqSingle st pat) = do
+        res <- step (adaptState gst) st
+        case res of
+            Yield x s ->
+                if pat /= x
+                then yield x (TakeEndBySeqSingle s pat)
+                else do
+                    if withSep
+                    then yield x TakeEndBySeqDone
+                    else return Stop
+            Skip s -> skip $ TakeEndBySeqSingle s pat
+            Stop -> return Stop
+
+    ---------------------------
+    -- Short Pattern - Shift Or
+    ---------------------------
+
+    -- Note: Karp-Rabin is roughly 15% slower than word hash for a 2 element
+    -- pattern. This may be useful for common cases like splitting lines using
+    -- "\r\n".
+    stepOuter _ (TakeEndBySeqWordDone 0 _) = do
+        return Stop
+    stepOuter _ (TakeEndBySeqWordDone n wrd) = do
+        let old = elemMask .&. (wrd `shiftR` (elemBits * (n - 1)))
+         in yield
+                (toEnum $ fromIntegral old)
+                (TakeEndBySeqWordDone (n - 1) wrd)
+
+    -- XXX If we remove this init state for perf experiment the time taken
+    -- reduces to half, there may be some optimization opportunity here.
+    stepOuter gst (TakeEndBySeqWordInit idx wrd st) = do
+        res <- step (adaptState gst) st
+        case res of
+            Yield x s -> do
+                let wrd1 = addToWord wrd x
+                    next
+                      | idx /= maxIndex =
+                            TakeEndBySeqWordInit (idx + 1) wrd1 s
+                      | wrd1 .&. wordMask /= wordPat =
+                            TakeEndBySeqWordLoop wrd1 s
+                      | otherwise = TakeEndBySeqDone
+                if withSep
+                then yield x next
+                else skip next
+            Skip s -> skip $ TakeEndBySeqWordInit idx wrd s
+            Stop ->
+                if withSep
+                then return Stop
+                else skip $ TakeEndBySeqWordDone idx wrd
+
+    stepOuter gst (TakeEndBySeqWordLoop wrd st) = do
+        res <- step (adaptState gst) st
+        case res of
+            Yield x s -> do
+                -- XXX Never use a lazy expression as state, that causes issues
+                -- in simplification because the state argument of Yield is
+                -- lazy, maybe we can make that strict.
+                let wrd1 = addToWord wrd x
+                    old = (wordMask .&. wrd)
+                            `shiftR` (elemBits * (patLen - 1))
+                    !y =
+                            if withSep
+                            then x
+                            else toEnum $ fromIntegral old
+                -- Note: changing the nesting order of if and yield makes a
+                -- difference in performance.
+                if wrd1 .&. wordMask /= wordPat
+                then yield y (TakeEndBySeqWordLoop wrd1 s)
+                else yield y TakeEndBySeqDone
+            Skip s -> skip $ TakeEndBySeqWordLoop wrd s
+            Stop ->
+                 if withSep
+                 then return Stop
+                 else skip $ TakeEndBySeqWordDone patLen wrd
+
+    -------------------------------
+    -- General Pattern - Karp Rabin
+    -------------------------------
+
+    stepOuter gst (TakeEndBySeqKRInit st0 mba) = do
+        res <- step (adaptState gst) st0
+        case res of
+            Yield x s -> do
+                liftIO $ pokeAt 0 mba x
+                if withSep
+                then yield x (TakeEndBySeqKRInit1 s mba (SIZE_OF(a)))
+                else skip $ TakeEndBySeqKRInit1 s mba (SIZE_OF(a))
+            Skip s -> skip $ TakeEndBySeqKRInit s mba
+            Stop -> return Stop
+
+    stepOuter gst (TakeEndBySeqKRInit1 st mba offset) = do
+        res <- step (adaptState gst) st
+        let arr :: Array a = Array
+                    { arrContents = mba
+                    , arrStart = 0
+                    , arrEnd = patBytes
+                    }
+        case res of
+            Yield x s -> do
+                liftIO $ pokeAt offset mba x
+                let next =
+                        if offset /= maxOffset
+                        then TakeEndBySeqKRInit1 s mba (offset + SIZE_OF(a))
+                        else
+                            let ringHash = A.foldl' addCksum 0 arr
+                             in if ringHash == patHash
+                                then TakeEndBySeqKRCheck s mba 0
+                                else TakeEndBySeqKRLoop s mba 0 ringHash
+                if withSep
+                then yield x next
+                else skip next
+            Skip s -> skip $ TakeEndBySeqKRInit1 s mba offset
+            Stop -> do
+                if withSep
+                then return Stop
+                else do
+                    let rb = RingArray
+                            { ringContents = mba
+                            , ringSize = offset
+                            , ringHead = 0
+                            }
+                     in skip $ TakeEndBySeqKRDone offset rb
+
+    stepOuter gst (TakeEndBySeqKRLoop st mba rh cksum) = do
+        res <- step (adaptState gst) st
+        let rb = RingArray
+                { ringContents = mba
+                , ringSize = patBytes
+                , ringHead = rh
+                }
+        case res of
+            Yield x s -> do
+                (rb1, old) <- liftIO (RB.replace rb x)
+                let cksum1 = deltaCksum cksum old x
+                let rh1 = ringHead rb1
+                    next =
+                        if cksum1 /= patHash
+                        then TakeEndBySeqKRLoop s mba rh1 cksum1
+                        else TakeEndBySeqKRCheck s mba rh1
+                if withSep
+                then yield x next
+                else yield old next
+            Skip s -> skip $ TakeEndBySeqKRLoop s mba rh cksum
+            Stop -> do
+                if withSep
+                then return Stop
+                else skip $ TakeEndBySeqKRDone patBytes rb
+
+    stepOuter _ (TakeEndBySeqKRCheck st mba rh) = do
+        let rb = RingArray
+                    { ringContents = mba
+                    , ringSize = patBytes
+                    , ringHead = rh
+                    }
+        matches <- liftIO $ RB.eqArray rb patArr
+        if matches
+        then return Stop
+        else skip $ TakeEndBySeqKRLoop st mba rh patHash
+
+    stepOuter _ (TakeEndBySeqKRDone 0 _) = return Stop
+    stepOuter _ (TakeEndBySeqKRDone len rb) = do
+        assert (len >= 0) (return ())
+        old <- RB.unsafeGetHead rb
+        let rb1 = RB.moveForward rb
+        yield old $ TakeEndBySeqKRDone (len - SIZE_OF(a)) rb1
+
+-- | Take the stream until the supplied sequence is encountered. Take the
+-- sequence as well and stop.
+--
+-- Usage:
+--
+-- >>> f pat xs = Stream.toList $ Stream.takeEndBySeq (Array.fromList pat) $ Stream.fromList xs
+-- >>> f "fgh" "abcdefghijk"
+-- "abcdefgh"
+-- >>> f "lmn" "abcdefghijk"
+-- "abcdefghijk"
+-- >>> f "" "abcdefghijk"
+-- ""
+--
+{-# INLINE takeEndBySeq #-}
+takeEndBySeq
+    :: forall m a. (MonadIO m, Unbox a, Enum a, Eq a)
+    => Array a
+    -> Stream m a
+    -> Stream m a
+takeEndBySeq = takeEndBySeqWith True
+
+-- | Take the stream until the supplied sequence is encountered. Do not take
+-- the sequence.
+--
+-- Usage:
+--
+-- >>> f pat xs = Stream.toList $ Stream.takeEndBySeq_ (Array.fromList pat) $ Stream.fromList xs
+-- >>> f "fgh" "abcdefghijk"
+-- "abcde"
+-- >>> f "lmn" "abcdefghijk"
+-- "abcdefghijk"
+-- >>> f "" "abcdefghijk"
+-- ""
+--
+{-# INLINE takeEndBySeq_ #-}
+takeEndBySeq_
+    :: forall m a. (MonadIO m, Unbox a, Enum a, Eq a)
+    => Array a
+    -> Stream m a
+    -> Stream m a
+takeEndBySeq_ = takeEndBySeqWith False
+
+{-
+-- TODO can we unify the splitting operations using a splitting configuration
+-- like in the split package.
+--
+data SplitStyle = Infix | Suffix | Prefix deriving (Eq, Show)
+data SplitOptions = SplitOptions
+    { style    :: SplitStyle
+    , withSep  :: Bool  -- ^ keep the separators in output
+    -- , compact  :: Bool  -- ^ treat multiple consecutive separators as one
+    -- , trimHead :: Bool  -- ^ drop blank at head
+    -- , trimTail :: Bool  -- ^ drop blank at tail
+    }
+-}
+
+-- XXX using "fs" as the last arg in Constructors may simplify the code a bit,
+-- because we can use the constructor directly without having to create "jump"
+-- functions.
+{-# ANN type SplitOnSeqState Fuse #-}
+data SplitOnSeqState mba rb rh ck w fs s b x =
+      SplitOnSeqInit
+    | SplitOnSeqYield b (SplitOnSeqState mba rb rh ck w fs s b x)
+    | SplitOnSeqDone
+
+    | SplitOnSeqEmpty !fs s
+
+    | SplitOnSeqSingle0 !fs s x
+    | SplitOnSeqSingle !fs s x
+
+    | SplitOnSeqWordInit0 !fs s
+    | SplitOnSeqWordInit Int Word !fs s
+    | SplitOnSeqWordLoop !w s !fs
+    | SplitOnSeqWordDone Int !fs !w
+
+    | SplitOnSeqKRInit0 Int !fs s mba
+    | SplitOnSeqKRInit Int !fs s mba
+    | SplitOnSeqKRLoop fs s mba !rh !ck
+    | SplitOnSeqKRCheck fs s mba !rh
+    | SplitOnSeqKRDone Int !fs rb
+
+    | SplitOnSeqReinit (fs -> SplitOnSeqState mba rb rh ck w fs s b x)
+
+-- XXX Need to fix empty stream split behavior
+
+-- | Like 'splitSepBy_' but splits the stream on a sequence of elements rather than
+-- a single element. Parses a sequence of tokens separated by an infixed
+-- separator e.g. @a;b;c@ is parsed as @a@, @b@, @c@. If the pattern is empty
+-- then each element is a match, thus the fold is finalized on each element.
+--
+-- >>> splitSepBy p xs = Stream.fold Fold.toList $ Stream.splitSepBySeq_ (Array.fromList p) Fold.toList (Stream.fromList xs)
+--
+-- >>> splitSepBy "" ""
+-- []
+--
+-- >>> splitSepBy "" "a...b"
+-- ["a",".",".",".","b"]
+--
+-- >>> splitSepBy ".." ""
+-- []
+--
+-- >>> splitSepBy ".." "a...b"
+-- ["a",".b"]
+--
+-- >>> splitSepBy ".." "abc"
+-- ["abc"]
+--
+-- >>> splitSepBy ".." ".."
+-- ["",""]
+--
+-- >>> splitSepBy "." ".a"
+-- ["","a"]
+--
+-- >>> splitSepBy "." "a."
+-- ["a",""]
+--
+-- Uses Rabin-Karp algorithm for substring search.
+--
+{-# INLINE_NORMAL splitSepBySeq_ #-}
+splitSepBySeq_, splitOnSeq
+    :: forall m a b. (MonadIO m, Unbox a, Enum a, Eq a)
+    => Array a
+    -> Fold m a b
+    -> Stream m a
+    -> Stream m b
+splitSepBySeq_ patArr (Fold fstep initial _ final) (Stream step state) =
+    Stream stepOuter SplitOnSeqInit
+
+    where
+
+    patLen = A.length patArr
+    patBytes = A.byteLength patArr
+    maxIndex = patLen - 1
+    maxOffset = patBytes - SIZE_OF(a)
+    elemBits = SIZE_OF(a) * 8
+
+    -- For word pattern case
+    wordMask :: Word
+    wordMask = (1 `shiftL` (elemBits * patLen)) - 1
+
+    elemMask :: Word
+    elemMask = (1 `shiftL` elemBits) - 1
+
+    wordPat :: Word
+    wordPat = wordMask .&. A.foldl' addToWord 0 patArr
+
+    addToWord wd a = (wd `shiftL` elemBits) .|. fromIntegral (fromEnum a)
+
+    -- For Rabin-Karp search
+    k = 2891336453 :: Word32
+    coeff = k ^ patLen
+
+    addCksum cksum a = cksum * k + fromIntegral (fromEnum a)
+
+    deltaCksum cksum old new =
+        addCksum cksum new - coeff * fromIntegral (fromEnum old)
+
+    -- XXX shall we use a random starting hash or 1 instead of 0?
+    patHash = A.foldl' addCksum 0 patArr
+
+    skip = return . Skip
+
+    nextAfterInit nextGen stepRes =
+        case stepRes of
+            FL.Partial s -> nextGen s
+            FL.Done b -> SplitOnSeqYield b (SplitOnSeqReinit nextGen)
+
+    {-# INLINE yieldReinit #-}
+    yieldReinit nextGen fs =
+        initial >>= skip . SplitOnSeqYield fs . nextAfterInit nextGen
+
+    {-# INLINE_LATE stepOuter #-}
+    stepOuter _ SplitOnSeqInit = do
+        res <- initial
+        case res of
+            FL.Partial acc
+                | patLen == 0 ->
+                    return $ Skip $ SplitOnSeqEmpty acc state
+                | patLen == 1 -> do
+                    pat <- liftIO $ A.unsafeGetIndexIO 0 patArr
+                    return $ Skip $ SplitOnSeqSingle0 acc state pat
+                | SIZE_OF(a) * patLen <= sizeOf (Proxy :: Proxy Word) ->
+                    return $ Skip $ SplitOnSeqWordInit0 acc state
+                | otherwise -> do
+                    (MutArray mba _ _ _) :: MutArray a <-
+                        liftIO $ MutArray.emptyOf patLen
+                    skip $ SplitOnSeqKRInit0 0 acc state mba
+            FL.Done b -> skip $ SplitOnSeqYield b SplitOnSeqInit
+
+    stepOuter _ (SplitOnSeqYield x next) = return $ Yield x next
+
+    ---------------------------
+    -- Checkpoint
+    ---------------------------
+
+    stepOuter _ (SplitOnSeqReinit nextGen) =
+        initial >>= skip . nextAfterInit nextGen
+
+    ---------------------------
+    -- Empty pattern
+    ---------------------------
+
+    stepOuter gst (SplitOnSeqEmpty acc st) = do
+        res <- step (adaptState gst) st
+        case res of
+            Yield x s -> do
+                r <- fstep acc x
+                b1 <-
+                    case r of
+                        FL.Partial acc1 -> final acc1
+                        FL.Done b -> return b
+                let jump c = SplitOnSeqEmpty c s
+                 in yieldReinit jump b1
+            Skip s -> skip (SplitOnSeqEmpty acc s)
+            Stop -> final acc >> return Stop
+
+    -----------------
+    -- Done
+    -----------------
+
+    stepOuter _ SplitOnSeqDone = return Stop
+
+    -----------------
+    -- Single Pattern
+    -----------------
+
+    stepOuter gst (SplitOnSeqSingle0 fs st pat) = do
+        res <- step (adaptState gst) st
+        case res of
+            Yield x s -> do
+                -- XXX This code block is duplicated in SplitOnSeqSingle state
+                let jump c = SplitOnSeqSingle c s pat
+                if pat == x
+                then final fs >>= yieldReinit jump
+                else do
+                    r <- fstep fs x
+                    case r of
+                        FL.Partial fs1 ->
+                            pure $ Skip $ SplitOnSeqSingle fs1 s pat
+                        FL.Done b -> yieldReinit jump b
+            Skip s -> pure $ Skip $ SplitOnSeqSingle0 fs s pat
+            Stop -> final fs >> pure Stop
+
+    stepOuter gst (SplitOnSeqSingle fs0 st0 pat) = do
+        go SPEC fs0 st0
+
+        where
+
+        -- The local loop increases allocations by 6% but improves CPU
+        -- performance by 14%.
+        go !_ !fs !st = do
+            res <- step (adaptState gst) st
+            case res of
+                Yield x s -> do
+                    let jump c = SplitOnSeqSingle c s pat
+                    if pat == x
+                    then final fs >>= yieldReinit jump
+                    else do
+                        r <- fstep fs x
+                        case r of
+                            FL.Partial fs1 -> go SPEC fs1 s
+                            FL.Done b -> yieldReinit jump b
+                Skip s -> go SPEC fs s
+                Stop -> do
+                    r <- final fs
+                    return $ Skip $ SplitOnSeqYield r SplitOnSeqDone
+
+    ---------------------------
+    -- Short Pattern - Shift Or
+    ---------------------------
+
+    -- Note: We fill the matching buffer before we emit anything, in case it
+    -- matches and we have to drop it. Though we could be more eager in
+    -- emitting as soon as we know that the pattern cannot match. But still the
+    -- worst case will remain the same, in case a match is going to happen we
+    -- will have to delay until the very end.
+
+    stepOuter _ (SplitOnSeqWordDone 0 fs _) = do
+        r <- final fs
+        skip $ SplitOnSeqYield r SplitOnSeqDone
+    stepOuter _ (SplitOnSeqWordDone n fs wrd) = do
+        let old = elemMask .&. (wrd `shiftR` (elemBits * (n - 1)))
+        r <- fstep fs (toEnum $ fromIntegral old)
+        case r of
+            FL.Partial fs1 -> skip $ SplitOnSeqWordDone (n - 1) fs1 wrd
+            FL.Done b -> do
+                 let jump c = SplitOnSeqWordDone (n - 1) c wrd
+                 yieldReinit jump b
+
+    stepOuter gst (SplitOnSeqWordInit0 fs st) = do
+        res <- step (adaptState gst) st
+        case res of
+            Yield x s ->
+                let wrd1 = addToWord 0 x
+                 in pure $ Skip $ SplitOnSeqWordInit 1 wrd1 fs s
+            Skip s -> pure $ Skip $ SplitOnSeqWordInit0 fs s
+            Stop -> final fs >> pure Stop
+
+    stepOuter gst (SplitOnSeqWordInit idx0 wrd0 fs st0) =
+        go SPEC idx0 wrd0 st0
+
+        where
+
+        {-# INLINE go #-}
+        go !_ !idx !wrd !st = do
+            res <- step (adaptState gst) st
+            case res of
+                Yield x s -> do
+                    let wrd1 = addToWord wrd x
+                    if idx == maxIndex
+                    then do
+                        if wrd1 .&. wordMask == wordPat
+                        then do
+                            let jump c = SplitOnSeqWordInit 0 0 c s
+                            final fs >>= yieldReinit jump
+                        else skip $ SplitOnSeqWordLoop wrd1 s fs
+                    else go SPEC (idx + 1) wrd1 s
+                Skip s -> go SPEC idx wrd s
+                Stop -> do
+                    if idx /= 0
+                    then skip $ SplitOnSeqWordDone idx fs wrd
+                    else do
+                        r <- final fs
+                        skip $ SplitOnSeqYield r SplitOnSeqDone
+
+    stepOuter gst (SplitOnSeqWordLoop wrd0 st0 fs0) =
+        go SPEC wrd0 st0 fs0
+
+        where
+
+        -- This loop does not affect allocations but it improves the CPU
+        -- performance signifcantly compared to looping using state.
+        {-# INLINE go #-}
+        go !_ !wrd !st !fs = do
+            res <- step (adaptState gst) st
+            case res of
+                Yield x s -> do
+                    let jump c = SplitOnSeqWordInit 0 0 c s
+                        wrd1 = addToWord wrd x
+                        old = (wordMask .&. wrd)
+                                `shiftR` (elemBits * (patLen - 1))
+                    r <- fstep fs (toEnum $ fromIntegral old)
+                    case r of
+                        FL.Partial fs1 -> do
+                            if wrd1 .&. wordMask == wordPat
+                            then final fs1 >>= yieldReinit jump
+                            else go SPEC wrd1 s fs1
+                        FL.Done b -> yieldReinit jump b
+                Skip s -> go SPEC wrd s fs
+                Stop -> skip $ SplitOnSeqWordDone patLen fs wrd
+
+    -------------------------------
+    -- General Pattern - Karp Rabin
+    -------------------------------
+
+    -- XXX Document this pattern for writing efficient code. Loop around only
+    -- required elements in the recursive loop, build the structures being
+    -- manipulated locally e.g. we are passing only mba, here and build an
+    -- array using patLen and arrStart from the surrounding context.
+
+    stepOuter gst (SplitOnSeqKRInit0 offset fs st mba) = do
+        res <- step (adaptState gst) st
+        case res of
+            Yield x s -> do
+                liftIO $ pokeAt offset mba x
+                skip $ SplitOnSeqKRInit (offset + SIZE_OF(a)) fs s mba
+            Skip s -> skip $ SplitOnSeqKRInit0 offset fs s mba
+            Stop -> final fs >> pure Stop
+
+    stepOuter gst (SplitOnSeqKRInit offset fs st mba) = do
+        res <- step (adaptState gst) st
+        case res of
+            Yield x s -> do
+                liftIO $ pokeAt offset mba x
+                if offset == maxOffset
+                then do
+                    let arr :: Array a = Array
+                                { arrContents = mba
+                                , arrStart = 0
+                                , arrEnd = patBytes
+                                }
+                    let ringHash = A.foldl' addCksum 0 arr
+                    if ringHash == patHash && A.byteEq arr patArr
+                    then skip $ SplitOnSeqKRCheck fs s mba 0
+                    else skip $ SplitOnSeqKRLoop fs s mba 0 ringHash
+                else skip $ SplitOnSeqKRInit (offset + SIZE_OF(a)) fs s mba
+            Skip s -> skip $ SplitOnSeqKRInit offset fs s mba
+            Stop -> do
+                let rb = RingArray
+                        { ringContents = mba
+                        , ringSize = offset
+                        , ringHead = 0
+                        }
+                skip $ SplitOnSeqKRDone offset fs rb
+
+    -- XXX The recursive "go" is more efficient than the state based recursion
+    -- code commented out below. Perhaps its more efficient because of
+    -- factoring out "mba" outside the loop.
+    --
+    stepOuter gst (SplitOnSeqKRLoop fs0 st0 mba rh0 cksum0) =
+        go SPEC fs0 st0 rh0 cksum0
+
+        where
+
+        go !_ !fs !st !rh !cksum = do
+            res <- step (adaptState gst) st
+            let rb = RingArray
+                    { ringContents = mba
+                    , ringSize = patBytes
+                    , ringHead = rh
+                    }
+            case res of
+                Yield x s -> do
+                    (rb1, old) <- liftIO (RB.replace rb x)
+                    r <- fstep fs old
+                    case r of
+                        FL.Partial fs1 -> do
+                            let cksum1 = deltaCksum cksum old x
+                            let rh1 = ringHead rb1
+                            if cksum1 == patHash
+                            then skip $ SplitOnSeqKRCheck fs1 s mba rh1
+                            else go SPEC fs1 s rh1 cksum1
+                        FL.Done b -> do
+                            -- XXX the old code looks wrong as we are resetting
+                            -- the ring head but the ring still has old
+                            -- elements as we are not resetting the size.
+                            let jump c = SplitOnSeqKRInit 0 c s mba
+                            yieldReinit jump b
+                Skip s -> go SPEC fs s rh cksum
+                Stop -> skip $ SplitOnSeqKRDone patBytes fs rb
+
+    -- XXX The following code is 5 times slower compared to the recursive loop
+    -- based code above. Need to investigate why. One possibility is that the
+    -- go loop above does not thread around the ring buffer (rb). This code may
+    -- be causing the state to bloat and getting allocated on each iteration.
+    -- We can check the cmm/asm code to confirm.  If so a good GHC solution to
+    -- such problem is needed. One way to avoid this could be to use unboxed
+    -- mutable state?
+    {-
+    stepOuter gst (SplitOnSeqKRLoop fs st rb rh cksum) = do
+            res <- step (adaptState gst) st
+            case res of
+                Yield x s -> do
+                    old <- liftIO $ peek rh
+                    let cksum1 = deltaCksum cksum old x
+                    fs1 <- fstep fs old
+                    if (cksum1 == patHash)
+                    then do
+                        r <- done fs1
+                        skip $ SplitOnSeqYield r $ SplitOnSeqKRInit 0 s rb rh
+                    else do
+                        rh1 <- liftIO (RB.unsafeInsert rb rh x)
+                        skip $ SplitOnSeqKRLoop fs1 s rb rh1 cksum1
+                Skip s -> skip $ SplitOnSeqKRLoop fs s rb rh cksum
+                Stop -> skip $ SplitOnSeqKRDone patLen fs rb rh
+    -}
+
+    stepOuter _ (SplitOnSeqKRCheck fs st mba rh) = do
+        let rb = RingArray
+                    { ringContents = mba
+                    , ringSize = patBytes
+                    , ringHead = rh
+                    }
+        res <- liftIO $ RB.eqArray rb patArr
+        if res
+        then do
+            r <- final fs
+            let jump c = SplitOnSeqKRInit 0 c st mba
+            yieldReinit jump r
+        else skip $ SplitOnSeqKRLoop fs st mba rh patHash
+
+    stepOuter _ (SplitOnSeqKRDone 0 fs _) = do
+        r <- final fs
+        skip $ SplitOnSeqYield r SplitOnSeqDone
+    stepOuter _ (SplitOnSeqKRDone len fs rb) = do
+        assert (len >= 0) (return ())
+        old <- RB.unsafeGetHead rb
+        let rb1 = RB.moveForward rb
+        r <- fstep fs old
+        case r of
+            FL.Partial fs1 -> skip $ SplitOnSeqKRDone (len - SIZE_OF(a)) fs1 rb1
+            FL.Done b -> do
+                 let jump c = SplitOnSeqKRDone (len - SIZE_OF(a)) c rb1
+                 yieldReinit jump b
+
+RENAME(splitOnSeq,splitSepBySeq_)
+
+{-# ANN type SplitOnSuffixSeqState Fuse #-}
+data SplitOnSuffixSeqState mba rb rh ck w fs s b x =
+      SplitOnSuffixSeqInit
+    | SplitOnSuffixSeqYield b (SplitOnSuffixSeqState mba rb rh ck w fs s b x)
+    | SplitOnSuffixSeqDone
+
+    | SplitOnSuffixSeqEmpty !fs s
+
+    | SplitOnSuffixSeqSingleInit !fs s x
+    | SplitOnSuffixSeqSingle !fs s x
+
+    | SplitOnSuffixSeqWordInit !fs s
+    | SplitOnSuffixSeqWordLoop !w s !fs
+    | SplitOnSuffixSeqWordDone Int !fs !w
+
+    | SplitOnSuffixSeqKRInit !fs s mba
+    | SplitOnSuffixSeqKRInit1 !fs s mba
+    | SplitOnSuffixSeqKRLoop fs s mba !rh !ck
+    | SplitOnSuffixSeqKRCheck fs s mba !rh
+    | SplitOnSuffixSeqKRDone Int !fs rb
+
+    | SplitOnSuffixSeqReinit
+          (fs -> SplitOnSuffixSeqState mba rb rh ck w fs s b x)
+
+-- | @splitOnSuffixSeq withSep pat fld input@ splits the input using @pat@ as a
+-- suffixed separator, the resulting split segments are fed to the fold @fld@.
+-- If @withSep@ is True then the separator sequence is also suffixed with the
+-- split segments.
+--
+-- /Internal/
+{-# INLINE_NORMAL splitOnSuffixSeq #-}
+splitOnSuffixSeq
+    :: forall m a b. (MonadIO m, Unbox a, Enum a, Eq a)
+    => Bool
+    -> Array a
+    -> Fold m a b
+    -> Stream m a
+    -> Stream m b
+splitOnSuffixSeq withSep patArr (Fold fstep initial _ final) (Stream step state) =
+    Stream stepOuter SplitOnSuffixSeqInit
+
+    where
+
+    patLen = A.length patArr
+    patBytes = A.byteLength patArr
+    maxIndex = patLen - 1
+    maxOffset = patBytes - SIZE_OF(a)
+    elemBits = SIZE_OF(a) * 8
+
+    -- For word pattern case
+    wordMask :: Word
+    wordMask = (1 `shiftL` (elemBits * patLen)) - 1
+
+    elemMask :: Word
+    elemMask = (1 `shiftL` elemBits) - 1
+
+    wordPat :: Word
+    wordPat = wordMask .&. A.foldl' addToWord 0 patArr
+
+    addToWord wd a = (wd `shiftL` elemBits) .|. fromIntegral (fromEnum a)
+
+    nextAfterInit nextGen stepRes =
+        case stepRes of
+            FL.Partial s -> nextGen s
+            FL.Done b ->
+                SplitOnSuffixSeqYield b (SplitOnSuffixSeqReinit nextGen)
+
+    {-# INLINE yieldReinit #-}
+    yieldReinit nextGen fs =
+        initial >>= skip . SplitOnSuffixSeqYield fs . nextAfterInit nextGen
+
+    -- For single element pattern case
+    {-# INLINE processYieldSingle #-}
+    processYieldSingle pat x s fs = do
+        let jump c = SplitOnSuffixSeqSingleInit c s pat
+        if pat == x
+        then do
+            r <- if withSep then fstep fs x else return $ FL.Partial fs
+            b1 <-
+                case r of
+                    FL.Partial fs1 -> final fs1
+                    FL.Done b -> return b
+            yieldReinit jump b1
+        else do
+            r <- fstep fs x
+            case r of
+                FL.Partial fs1 -> skip $ SplitOnSuffixSeqSingle fs1 s pat
+                FL.Done b -> yieldReinit jump b
+
+    -- For Rabin-Karp search
+    k = 2891336453 :: Word32
+    coeff = k ^ patLen
+
+    addCksum cksum a = cksum * k + fromIntegral (fromEnum a)
+
+    deltaCksum cksum old new =
+        addCksum cksum new - coeff * fromIntegral (fromEnum old)
+
+    -- XXX shall we use a random starting hash or 1 instead of 0?
+    patHash = A.foldl' addCksum 0 patArr
+
+    skip = return . Skip
+
+    {-# INLINE_LATE stepOuter #-}
+    stepOuter _ SplitOnSuffixSeqInit = do
+        res <- initial
+        case res of
+            FL.Partial fs
+                | patLen == 0 ->
+                    skip $ SplitOnSuffixSeqEmpty fs state
+                | patLen == 1 -> do
+                    pat <- liftIO $ A.unsafeGetIndexIO 0 patArr
+                    skip $ SplitOnSuffixSeqSingleInit fs state pat
+                | SIZE_OF(a) * patLen <= sizeOf (Proxy :: Proxy Word) ->
+                    skip $ SplitOnSuffixSeqWordInit fs state
+                | otherwise -> do
+                    (MutArray mba _ _ _) :: MutArray a <-
+                        liftIO $ MutArray.emptyOf patLen
+                    skip $ SplitOnSuffixSeqKRInit fs state mba
+            FL.Done fb -> skip $ SplitOnSuffixSeqYield fb SplitOnSuffixSeqInit
+
+    stepOuter _ (SplitOnSuffixSeqYield x next) = return $ Yield x next
+
+    ---------------------------
+    -- Reinit
+    ---------------------------
+
+    stepOuter _ (SplitOnSuffixSeqReinit nextGen) =
+        initial >>= skip . nextAfterInit nextGen
+
+    ---------------------------
+    -- Empty pattern
+    ---------------------------
+
+    stepOuter gst (SplitOnSuffixSeqEmpty acc st) = do
+        res <- step (adaptState gst) st
+        case res of
+            Yield x s -> do
+                let jump c = SplitOnSuffixSeqEmpty c s
+                r <- fstep acc x
+                b1 <-
+                    case r of
+                        FL.Partial fs -> final fs
+                        FL.Done b -> return b
+                yieldReinit jump b1
+            Skip s -> skip (SplitOnSuffixSeqEmpty acc s)
+            Stop -> final acc >> return Stop
+
+    -----------------
+    -- Done
+    -----------------
+
+    stepOuter _ SplitOnSuffixSeqDone = return Stop
+
+    -----------------
+    -- Single Pattern
+    -----------------
+
+    stepOuter gst (SplitOnSuffixSeqSingleInit fs st pat) = do
+        res <- step (adaptState gst) st
+        case res of
+            Yield x s -> processYieldSingle pat x s fs
+            Skip s -> skip $ SplitOnSuffixSeqSingleInit fs s pat
+            Stop -> final fs >> return Stop
+
+    stepOuter gst (SplitOnSuffixSeqSingle fs st pat) = do
+        res <- step (adaptState gst) st
+        case res of
+            Yield x s -> processYieldSingle pat x s fs
+            Skip s -> skip $ SplitOnSuffixSeqSingle fs s pat
+            Stop -> do
+                r <- final fs
+                skip $ SplitOnSuffixSeqYield r SplitOnSuffixSeqDone
+
+    ---------------------------
+    -- Short Pattern - Shift Or
+    ---------------------------
+
+    stepOuter _ (SplitOnSuffixSeqWordDone 0 fs _) = do
+        r <- final fs
+        skip $ SplitOnSuffixSeqYield r SplitOnSuffixSeqDone
+    stepOuter _ (SplitOnSuffixSeqWordDone n fs wrd) = do
+        let old = elemMask .&. (wrd `shiftR` (elemBits * (n - 1)))
+        r <- fstep fs (toEnum $ fromIntegral old)
+        case r of
+            FL.Partial fs1 -> skip $ SplitOnSuffixSeqWordDone (n - 1) fs1 wrd
+            FL.Done b -> do
+                let jump c = SplitOnSuffixSeqWordDone (n - 1) c wrd
+                yieldReinit jump b
+
+    stepOuter gst (SplitOnSuffixSeqWordInit fs0 st0) = do
+        res <- step (adaptState gst) st0
+        case res of
+            Yield x s -> do
+                let wrd = addToWord 0 x
+                r <- if withSep then fstep fs0 x else return $ FL.Partial fs0
+                case r of
+                    FL.Partial fs1 -> go SPEC 1 wrd s fs1
+                    FL.Done b -> do
+                        let jump c = SplitOnSuffixSeqWordInit c s
+                        yieldReinit jump b
+            Skip s -> skip (SplitOnSuffixSeqWordInit fs0 s)
+            Stop -> final fs0 >> return Stop
+
+        where
+
+        {-# INLINE go #-}
+        go !_ !idx !wrd !st !fs = do
+            res <- step (adaptState gst) st
+            case res of
+                Yield x s -> do
+                    let jump c = SplitOnSuffixSeqWordInit c s
+                    let wrd1 = addToWord wrd x
+                    r <- if withSep then fstep fs x else return $ FL.Partial fs
+                    case r of
+                        FL.Partial fs1
+                            | idx /= maxIndex ->
+                                go SPEC (idx + 1) wrd1 s fs1
+                            | wrd1 .&. wordMask /= wordPat ->
+                                skip $ SplitOnSuffixSeqWordLoop wrd1 s fs1
+                            | otherwise ->
+                                final fs1 >>= yieldReinit jump
+                        FL.Done b -> yieldReinit jump b
+                Skip s -> go SPEC idx wrd s fs
+                Stop ->
+                    if withSep
+                    then do
+                        r <- final fs
+                        skip $ SplitOnSuffixSeqYield r SplitOnSuffixSeqDone
+                    else skip $ SplitOnSuffixSeqWordDone idx fs wrd
+
+    stepOuter gst (SplitOnSuffixSeqWordLoop wrd0 st0 fs0) =
+        go SPEC wrd0 st0 fs0
+
+        where
+
+        {-# INLINE go #-}
+        go !_ !wrd !st !fs = do
+            res <- step (adaptState gst) st
+            case res of
+                Yield x s -> do
+                    let jump c = SplitOnSuffixSeqWordInit c s
+                        wrd1 = addToWord wrd x
+                        old = (wordMask .&. wrd)
+                                `shiftR` (elemBits * (patLen - 1))
+                    r <-
+                        if withSep
+                        then fstep fs x
+                        else fstep fs (toEnum $ fromIntegral old)
+                    case r of
+                        FL.Partial fs1 ->
+                            if wrd1 .&. wordMask == wordPat
+                            then final fs1 >>= yieldReinit jump
+                            else go SPEC wrd1 s fs1
+                        FL.Done b -> yieldReinit jump b
+                Skip s -> go SPEC wrd s fs
+                Stop ->
+                    if withSep
+                    then do
+                        r <- final fs
+                        skip $ SplitOnSuffixSeqYield r SplitOnSuffixSeqDone
+                    else skip $ SplitOnSuffixSeqWordDone patLen fs wrd
+
+    -------------------------------
+    -- General Pattern - Karp Rabin
+    -------------------------------
+
+    stepOuter gst (SplitOnSuffixSeqKRInit fs st0 mba) = do
+        res <- step (adaptState gst) st0
+        case res of
+            Yield x s -> do
+                liftIO $ pokeAt 0 mba x
+                r <- if withSep then fstep fs x else return $ FL.Partial fs
+                case r of
+                    FL.Partial fs1 ->
+                        skip $ SplitOnSuffixSeqKRInit1 fs1 s mba
+                    FL.Done b -> do
+                        let jump c = SplitOnSuffixSeqKRInit c s mba
+                        yieldReinit jump b
+            Skip s -> skip $ SplitOnSuffixSeqKRInit fs s mba
+            Stop -> final fs >> return Stop
+
+    stepOuter gst (SplitOnSuffixSeqKRInit1 fs0 st0 mba) = do
+        go SPEC (SIZE_OF(a)) st0 fs0
+
+        where
+
+        go !_ !offset st !fs = do
+            res <- step (adaptState gst) st
+            let arr :: Array a = Array
+                        { arrContents = mba
+                        , arrStart = 0
+                        , arrEnd = patBytes
+                        }
+            case res of
+                Yield x s -> do
+                    liftIO $ pokeAt offset mba x
+                    r <- if withSep then fstep fs x else return $ FL.Partial fs
+                    let ringHash = A.foldl' addCksum 0 arr
+                    case r of
+                        FL.Partial fs1
+                            | offset /= maxOffset ->
+                                go SPEC (offset + SIZE_OF(a)) s fs1
+                            | ringHash == patHash ->
+                                skip $ SplitOnSuffixSeqKRCheck fs1 s mba 0
+                            | otherwise ->
+                                skip $ SplitOnSuffixSeqKRLoop
+                                    fs1 s mba 0 ringHash
+                        FL.Done b -> do
+                            let jump c = SplitOnSuffixSeqKRInit c s mba
+                            yieldReinit jump b
+                Skip s -> go SPEC offset s fs
+                Stop -> do
+                    -- do not issue a blank segment when we end at pattern
+                    if offset == maxOffset && A.byteEq arr patArr
+                    then final fs >> return Stop
+                    else if withSep
+                    then do
+                        r <- final fs
+                        skip $ SplitOnSuffixSeqYield r SplitOnSuffixSeqDone
+                    else do
+                        let rb = RingArray
+                                { ringContents = mba
+                                , ringSize = offset
+                                , ringHead = 0
+                                }
+                         in skip $ SplitOnSuffixSeqKRDone offset fs rb
+
+    stepOuter gst (SplitOnSuffixSeqKRLoop fs0 st0 mba rh0 cksum0) =
+        go SPEC fs0 st0 rh0 cksum0
+
+        where
+
+        go !_ !fs !st !rh !cksum = do
+            res <- step (adaptState gst) st
+            let rb = RingArray
+                    { ringContents = mba
+                    , ringSize = patBytes
+                    , ringHead = rh
+                    }
+            case res of
+                Yield x s -> do
+                    (rb1, old) <- liftIO (RB.replace rb x)
+                    let cksum1 = deltaCksum cksum old x
+                    let rh1 = ringHead rb1
+                    r <- if withSep then fstep fs x else fstep fs old
+                    case r of
+                        FL.Partial fs1 ->
+                            if cksum1 /= patHash
+                            then go SPEC fs1 s rh1 cksum1
+                            else skip $ SplitOnSuffixSeqKRCheck fs1 s mba rh1
+                        FL.Done b -> do
+                            let jump c = SplitOnSuffixSeqKRInit c s mba
+                            yieldReinit jump b
+                Skip s -> go SPEC fs s rh cksum
+                Stop -> do
+                    if withSep
+                    then do
+                        r <- final fs
+                        skip $ SplitOnSuffixSeqYield r SplitOnSuffixSeqDone
+                    else skip $ SplitOnSuffixSeqKRDone patBytes fs rb
+
+    stepOuter _ (SplitOnSuffixSeqKRCheck fs st mba rh) = do
+        let rb = RingArray
+                    { ringContents = mba
+                    , ringSize = patBytes
+                    , ringHead = rh
+                    }
+        matches <- liftIO $ RB.eqArray rb patArr
+        if matches
+        then do
+            r <- final fs
+            let jump c = SplitOnSuffixSeqKRInit c st mba
+            yieldReinit jump r
+        else skip $ SplitOnSuffixSeqKRLoop fs st mba rh patHash
+
+    stepOuter _ (SplitOnSuffixSeqKRDone 0 fs _) = do
+        r <- final fs
+        skip $ SplitOnSuffixSeqYield r SplitOnSuffixSeqDone
+    stepOuter _ (SplitOnSuffixSeqKRDone len fs rb) = do
+        assert (len >= 0) (return ())
+        old <- RB.unsafeGetHead rb
+        let rb1 = RB.moveForward rb
+        r <- fstep fs old
+        case r of
+            FL.Partial fs1 ->
+                skip $ SplitOnSuffixSeqKRDone (len - SIZE_OF(a)) fs1 rb1
+            FL.Done b -> do
+                let jump c = SplitOnSuffixSeqKRDone (len - SIZE_OF(a)) c rb1
+                yieldReinit jump b
+
+-- | Parses a sequence of tokens suffixed by a separator e.g. @a;b;c;@ is
+-- parsed as @a;@, @b;@, @c;@. If the pattern is empty the input stream is
+-- returned as it is.
+--
+-- Equivalent to the following:
+--
+-- >>> splitEndBySeq pat f = Stream.foldMany (Fold.takeEndBySeq pat f)
+--
+-- Usage:
+--
+-- >>> f p = Stream.splitEndBySeq (Array.fromList p) Fold.toList
+-- >>> splitEndBy p xs = Stream.fold Fold.toList $ f p (Stream.fromList xs)
+--
+-- >>> splitEndBy "" ""
+-- []
+--
+-- >>> splitEndBy "" "a...b"
+-- ["a",".",".",".","b"]
+--
+-- >>> splitEndBy ".." ""
+-- []
+--
+--
+-- >>> splitEndBy ".." "a...b"
+-- ["a..",".b"]
+--
+--
+-- >>> splitEndBy ".." "abc"
+-- ["abc"]
+--
+-- >>> splitEndBy ".." ".."
+-- [".."]
+--
+-- >>> splitEndBy "." ".a"
+-- [".","a"]
+--
+-- >>> splitEndBy "." "a."
+-- ["a."]
+--
+-- Uses Rabin-Karp algorithm for substring search.
+--
+{-# INLINE_NORMAL splitEndBySeq #-}
+splitEndBySeq
+    :: forall m a b. (MonadIO m, Unbox a, Enum a, Eq a)
+    => Array a
+    -> Fold m a b
+    -> Stream m a
+    -> Stream m b
+splitEndBySeq = splitOnSuffixSeq True
+
+-- | Like 'splitEndBySeq' but drops the separators and returns only the tokens.
+--
+-- Equivalent to the following:
+--
+-- >>> splitEndBySeq_ pat f = Stream.foldMany (Fold.takeEndBySeq_ pat f)
+--
+-- Usage:
+--
+-- >>> f p = Stream.splitEndBySeq_ (Array.fromList p) Fold.toList
+-- >>> splitEndBy_ p xs = Stream.fold Fold.toList $ f p (Stream.fromList xs)
+--
+-- >>> splitEndBy_ "" ""
+-- []
+--
+-- >>> splitEndBy_ "" "a...b"
+-- ["a",".",".",".","b"]
+--
+-- >>> splitEndBy_ ".." ""
+-- []
+--
+-- >>> splitEndBy_ ".." "a...b"
+-- ["a",".b"]
+--
+-- >>> splitEndBy_ ".." "abc"
+-- ["abc"]
+--
+-- >>> splitEndBy_ ".." ".."
+-- [""]
+--
+-- >>> splitEndBy_ "." ".a"
+-- ["","a"]
+--
+-- >>> splitEndBy_ "." "a."
+-- ["a"]
+--
+-- Uses Rabin-Karp algorithm for substring search.
+--
+{-# INLINE_NORMAL splitEndBySeq_ #-}
+splitEndBySeq_
+    :: forall m a b. (MonadIO m, Unbox a, Enum a, Eq a)
+    => Array a
+    -> Fold m a b
+    -> Stream m a
+    -> Stream m b
+splitEndBySeq_ = splitOnSuffixSeq False
+
+-- Implement this as a fold or a parser instead.
+-- This can be implemented easily using Rabin Karp
+
+-- | Split post any one of the given patterns.
+--
+-- /Unimplemented/
+{-# INLINE splitEndBySeqOneOf #-}
+splitEndBySeqOneOf :: -- (Monad m, Unboxed a, Integral a) =>
+    [Array a] -> Fold m a b -> Stream m a -> Stream m b
+splitEndBySeqOneOf _subseq _f _m = undefined
+
+-- | Split on a prefixed separator element, dropping the separator.  The
+-- supplied 'Fold' is applied on the split segments.
+--
+-- @
+-- > splitOnPrefix' p xs = Stream.toList $ Stream.splitOnPrefix p (Fold.toList) (Stream.fromList xs)
+-- > splitOnPrefix' (== '.') ".a.b"
+-- ["a","b"]
+-- @
+--
+-- An empty stream results in an empty output stream:
+-- @
+-- > splitOnPrefix' (== '.') ""
+-- []
+-- @
+--
+-- An empty segment consisting of only a prefix is folded to the default output
+-- of the fold:
+--
+-- @
+-- > splitOnPrefix' (== '.') "."
+-- [""]
+--
+-- > splitOnPrefix' (== '.') ".a.b."
+-- ["a","b",""]
+--
+-- > splitOnPrefix' (== '.') ".a..b"
+-- ["a","","b"]
+--
+-- @
+--
+-- A prefix is optional at the beginning of the stream:
+--
+-- @
+-- > splitOnPrefix' (== '.') "a"
+-- ["a"]
+--
+-- > splitOnPrefix' (== '.') "a.b"
+-- ["a","b"]
+-- @
+--
+-- 'splitOnPrefix' is an inverse of 'intercalatePrefix' with a single element:
+--
+-- > Stream.intercalatePrefix (Stream.fromPure '.') Unfold.fromList . Stream.splitOnPrefix (== '.') Fold.toList === id
+--
+-- Assuming the input stream does not contain the separator:
+--
+-- > Stream.splitOnPrefix (== '.') Fold.toList . Stream.intercalatePrefix (Stream.fromPure '.') Unfold.fromList === id
+--
+-- /Unimplemented/
+{-# INLINE splitBeginBy_ #-}
+splitBeginBy_ :: -- (MonadCatch m) =>
+    (a -> Bool) -> Fold m a b -> Stream m a -> Stream m b
+splitBeginBy_ _predicate _f = undefined
+    -- parseMany (Parser.sliceBeginBy predicate f)
+
+-- Int list examples for splitOn:
+--
+-- >>> splitList [] [1,2,3,3,4]
+-- > [[1],[2],[3],[3],[4]]
+--
+-- >>> splitList [5] [1,2,3,3,4]
+-- > [[1,2,3,3,4]]
+--
+-- >>> splitList [1] [1,2,3,3,4]
+-- > [[],[2,3,3,4]]
+--
+-- >>> splitList [4] [1,2,3,3,4]
+-- > [[1,2,3,3],[]]
+--
+-- >>> splitList [2] [1,2,3,3,4]
+-- > [[1],[3,3,4]]
+--
+-- >>> splitList [3] [1,2,3,3,4]
+-- > [[1,2],[],[4]]
+--
+-- >>> splitList [3,3] [1,2,3,3,4]
+-- > [[1,2],[4]]
+--
+-- >>> splitList [1,2,3,3,4] [1,2,3,3,4]
+-- > [[],[]]
+
+-- This can be implemented easily using Rabin Karp
+-- | Split on any one of the given patterns.
+--
+-- /Unimplemented/
+--
+{-# INLINE splitSepBySeqOneOf #-}
+splitSepBySeqOneOf :: -- (Monad m, Unboxed a, Integral a) =>
+    [Array a] -> Fold m a b -> Stream m a -> Stream m b
+splitSepBySeqOneOf _subseq _f _m =
+    undefined -- D.fromStreamD $ D.splitOnAny f subseq (D.toStreamD m)
+
+------------------------------------------------------------------------------
+-- Nested Container Transformation
+------------------------------------------------------------------------------
+
+{-# ANN type SplitState Fuse #-}
+data SplitState s arr
+    = SplitInitial s
+    | SplitBuffering s arr
+    | SplitSplitting s arr
+    | SplitYielding arr (SplitState s arr)
+    | SplitFinishing
+
+-- XXX An alternative approach would be to use a partial fold (Fold m a b) to
+-- split using a splitBy like combinator. The Fold would consume upto the
+-- separator and return any leftover which can then be fed to the next fold.
+--
+-- We can revisit this once we have partial folds/parsers.
+--
+-- | Performs infix separator style splitting.
+{-# INLINE_NORMAL splitInnerBy #-}
+splitInnerBy
+    :: Monad m
+    => (f a -> m (f a, Maybe (f a)))  -- splitter
+    -> (f a -> f a -> m (f a))        -- joiner
+    -> Stream m (f a)
+    -> Stream m (f a)
+splitInnerBy splitter joiner (Stream step1 state1) =
+    Stream step (SplitInitial state1)
+
+    where
+
+    {-# INLINE_LATE step #-}
+    step gst (SplitInitial st) = do
+        r <- step1 gst st
+        case r of
+            Yield x s -> do
+                (x1, mx2) <- splitter x
+                return $ case mx2 of
+                    Nothing -> Skip (SplitBuffering s x1)
+                    Just x2 -> Skip (SplitYielding x1 (SplitSplitting s x2))
+            Skip s -> return $ Skip (SplitInitial s)
+            Stop -> return Stop
+
+    step gst (SplitBuffering st buf) = do
+        r <- step1 gst st
+        case r of
+            Yield x s -> do
+                (x1, mx2) <- splitter x
+                buf' <- joiner buf x1
+                return $ case mx2 of
+                    Nothing -> Skip (SplitBuffering s buf')
+                    Just x2 -> Skip (SplitYielding buf' (SplitSplitting s x2))
+            Skip s -> return $ Skip (SplitBuffering s buf)
+            Stop -> return $ Skip (SplitYielding buf SplitFinishing)
+
+    step _ (SplitSplitting st buf) = do
+        (x1, mx2) <- splitter buf
+        return $ case mx2 of
+                Nothing -> Skip $ SplitBuffering st x1
+                Just x2 -> Skip $ SplitYielding x1 (SplitSplitting st x2)
+
+    step _ (SplitYielding x next) = return $ Yield x next
+    step _ SplitFinishing = return Stop
+
+-- | Performs infix separator style splitting.
+{-# INLINE_NORMAL splitInnerBySuffix #-}
+splitInnerBySuffix
+    :: Monad m
+    => (f a -> Bool)                  -- isEmpty?
+    -> (f a -> m (f a, Maybe (f a)))  -- splitter
+    -> (f a -> f a -> m (f a))        -- joiner
+    -> Stream m (f a)
+    -> Stream m (f a)
+splitInnerBySuffix isEmpty splitter joiner (Stream step1 state1) =
+    Stream step (SplitInitial state1)
+
+    where
+
+    {-# INLINE_LATE step #-}
+    step gst (SplitInitial st) = do
+        r <- step1 gst st
+        case r of
+            Yield x s -> do
+                (x1, mx2) <- splitter x
+                return $ case mx2 of
+                    Nothing -> Skip (SplitBuffering s x1)
+                    Just x2 -> Skip (SplitYielding x1 (SplitSplitting s x2))
+            Skip s -> return $ Skip (SplitInitial s)
+            Stop -> return Stop
+
+    step gst (SplitBuffering st buf) = do
+        r <- step1 gst st
+        case r of
+            Yield x s -> do
+                (x1, mx2) <- splitter x
+                buf' <- joiner buf x1
+                return $ case mx2 of
+                    Nothing -> Skip (SplitBuffering s buf')
+                    Just x2 -> Skip (SplitYielding buf' (SplitSplitting s x2))
+            Skip s -> return $ Skip (SplitBuffering s buf)
+            Stop ->
+                return $
+                    if isEmpty buf
+                    then Stop
+                    else Skip (SplitYielding buf SplitFinishing)
+
+    step _ (SplitSplitting st buf) = do
+        (x1, mx2) <- splitter buf
+        return $ case mx2 of
+                Nothing -> Skip $ SplitBuffering st x1
+                Just x2 -> Skip $ SplitYielding x1 (SplitSplitting st x2)
+
+    step _ (SplitYielding x next) = return $ Yield x next
+    step _ SplitFinishing = return Stop
+
+------------------------------------------------------------------------------
+-- Trimming
+------------------------------------------------------------------------------
+
+-- | Drop prefix from the input stream if present.
+--
+-- Space: @O(1)@
+--
+-- See also stripPrefix.
+--
+-- /Unimplemented/
+{-# INLINE dropPrefix #-}
+dropPrefix ::
+    -- (Monad m, Eq a) =>
+    Stream m a -> Stream m a -> Stream m a
+dropPrefix = error "Not implemented yet!"
+
+-- | Drop all matching infix from the input stream if present. Infix stream
+-- may be consumed multiple times.
+--
+-- Space: @O(n)@ where n is the length of the infix.
+--
+-- See also stripInfix.
+--
+-- /Unimplemented/
+{-# INLINE dropInfix #-}
+dropInfix ::
+    -- (Monad m, Eq a) =>
+    Stream m a -> Stream m a -> Stream m a
+dropInfix = error "Not implemented yet!"
+
+-- | Drop suffix from the input stream if present. Suffix stream may be
+-- consumed multiple times.
+--
+-- Space: @O(n)@ where n is the length of the suffix.
+--
+-- See also stripSuffix.
+--
+-- /Unimplemented/
+{-# INLINE dropSuffix #-}
+dropSuffix ::
+    -- (Monad m, Eq a) =>
+    Stream m a -> Stream m a -> Stream m a
+dropSuffix = error "Not implemented yet!"
diff --git a/core/src/Streamly/Internal/Unicode/Decode.hs b/core/src/Streamly/Internal/Unicode/Decode.hs
new file mode 100644
index 0000000000..b9a0ea856b
--- /dev/null
+++ b/core/src/Streamly/Internal/Unicode/Decode.hs
@@ -0,0 +1,979 @@
+{-# LANGUAGE CPP #-}
+-- |
+-- Module      : Streamly.Internal.Unicode.Decode
+-- Copyright   : (c) 2018 Composewell Technologies
+--               (c) Bjoern Hoehrmann 2008-2009
+--
+-- License     : BSD-3-Clause
+-- Maintainer  : streamly@composewell.com
+-- Stability   : experimental
+-- Portability : GHC
+--
+
+module Streamly.Internal.Unicode.Decode
+    (
+    -- * Setup
+    -- | To execute the code examples provided in this module in ghci, please
+    -- run the following commands first.
+    --
+    -- $setup
+
+    -- XXX Use to/from instead of encode/decode for more compact naming.
+
+    -- * Construction (Decoding)
+      decodeLatin1
+
+    -- ** UTF-8 Byte Stream Decoding
+    , CodingFailureMode(..)
+    , writeCharUtf8'
+    , parseCharUtf8With
+    , decodeUtf8
+    , decodeUtf8'
+    , decodeUtf8_
+
+    -- ** UTF-16 Byte Stream Decoding
+    , decodeUtf16le'
+    , decodeUtf16le
+
+    -- ** Resumable UTF-8 Byte Stream Decoding
+    , DecodeError(..)
+    , DecodeState
+    , CodePoint
+    , decodeUtf8Either
+    , resumeDecodeUtf8Either
+
+    -- ** UTF-8 Array Stream Decoding
+    , decodeUtf8Chunks
+    , decodeUtf8Chunks'
+    , decodeUtf8Chunks_
+    -- , fromUtf8ChunksEndByLn
+
+    -- * StreamD UTF8 Encoding / Decoding transformations.
+    , decodeUtf8D
+    , decodeUtf8D'
+    , decodeUtf8D_
+    , decodeUtf8EitherD
+    , resumeDecodeUtf8EitherD
+
+    -- * Decoding String Literals
+    , fromStr#
+
+    -- * Word16 Utilities
+    -- , mkEvenW8Chunks
+    , swapByteOrder
+
+    -- * Deprecations
+    , decodeUtf8Lax
+    )
+where
+
+
+#include "inline.hs"
+
+-- MachDeps.h includes ghcautoconf.h that defines WORDS_BIGENDIAN for big endian
+-- systems.
+#include "MachDeps.h"
+
+import Control.Monad (void)
+import Control.Monad.IO.Class (MonadIO, liftIO)
+import Data.Bits (shiftR, shiftL, (.|.), (.&.))
+import Data.Word (Word8, Word16)
+import Foreign.Marshal.Alloc (mallocBytes)
+import Foreign.Storable (Storable(..))
+#ifndef __GHCJS__
+import Fusion.Plugin.Types (Fuse(..))
+#endif
+import GHC.Base (assert, unsafeChr)
+import GHC.Exts (Addr#)
+import GHC.Ptr (Ptr (..), plusPtr)
+import System.IO.Unsafe (unsafePerformIO)
+import Streamly.Internal.Data.Array.Type (Array(..))
+import Streamly.Internal.Data.MutByteArray.Type (MutByteArray)
+import Streamly.Internal.Data.Parser (Parser)
+import Streamly.Internal.Data.Stream (Stream)
+import Streamly.Internal.Data.Stream (Step (..))
+import Streamly.Internal.Data.SVar.Type (adaptState)
+import Streamly.Internal.Data.Tuple.Strict (Tuple'(..))
+import Streamly.Internal.Data.Unbox (Unbox(peekAt))
+import Streamly.Internal.System.IO (unsafeInlineIO)
+
+import qualified Streamly.Data.Fold as Fold
+import qualified Streamly.Internal.Data.Parser as Parser (Parser)
+import qualified Streamly.Internal.Data.Parser as ParserD
+import qualified Streamly.Internal.Data.Stream as Stream
+import qualified Streamly.Internal.Data.Stream as D
+
+import Prelude hiding (lines, words, unlines, unwords)
+
+#include "DocTestUnicodeStream.hs"
+
+-------------------------------------------------------------------------------
+-- Latin1 decoding
+-------------------------------------------------------------------------------
+
+-- | Decode a stream of bytes to Unicode characters by mapping each byte to a
+-- corresponding Unicode 'Char' in 0-255 range.
+--
+{-# INLINE decodeLatin1 #-}
+decodeLatin1 :: Monad m => Stream m Word8 -> Stream m Char
+decodeLatin1 = fmap (unsafeChr . fromIntegral)
+
+-------------------------------------------------------------------------------
+-- UTF-8 decoding
+-------------------------------------------------------------------------------
+
+-- Int helps in cheaper conversion from Int to Char
+type CodePoint = Int
+type DecodeState = Word8
+
+-- We can divide the errors in three general categories:
+-- * A non-starter was encountered in a begin state
+-- * A starter was encountered without completing a codepoint
+-- * The last codepoint was not complete (input underflow)
+--
+-- Need to separate resumable and non-resumable error. In case of non-resumable
+-- error we can also provide the failing byte. In case of resumable error the
+-- state can be opaque.
+--
+data DecodeError = DecodeError !DecodeState !CodePoint deriving Show
+
+-- See http://bjoern.hoehrmann.de/utf-8/decoder/dfa/ for details.
+
+-- XXX Use names decodeSuccess = 0, decodeFailure = 12
+
+decodeTable :: [Word8]
+decodeTable = [
+   -- The first part of the table maps bytes to character classes that
+   -- to reduce the size of the transition table and create bitmasks.
+   0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+   0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+   0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+   0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+   1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,  9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,
+   7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,  7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
+   8,8,2,2,2,2,2,2,2,2,2,2,2,2,2,2,  2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
+  10,3,3,3,3,3,3,3,3,3,3,3,3,4,3,3, 11,6,6,6,5,8,8,8,8,8,8,8,8,8,8,8,
+
+   -- The second part is a transition table that maps a combination
+   -- of a state of the automaton and a character class to a state.
+   0,12,24,36,60,96,84,12,12,12,48,72, 12,12,12,12,12,12,12,12,12,12,12,12,
+  12, 0,12,12,12,12,12, 0,12, 0,12,12, 12,24,12,12,12,12,12,24,12,24,12,12,
+  12,12,12,12,12,12,12,24,12,12,12,12, 12,24,12,12,12,12,12,12,12,24,12,12,
+  12,12,12,12,12,12,12,36,12,36,12,12, 12,36,12,12,12,12,12,36,12,36,12,12,
+  12,36,12,12,12,12,12,12,12,12,12,12
+  ]
+
+{-# INLINE utf8dLength #-}
+utf8dLength :: Int
+utf8dLength = length decodeTable
+
+-- | We do not want to garbage collect this and free the memory, we want to
+-- keep this persistent. We don't know how to do that with GHC without having a
+-- reference in some global structure. So we use a hack, use mallocBytes so
+-- that the GC has no way to free it.
+{-# NOINLINE utf8d #-}
+utf8d :: Ptr Word8
+utf8d = unsafePerformIO $ do
+    let size = utf8dLength
+    p <- liftIO $ mallocBytes size
+    void $ D.fold
+        (Fold.foldlM' (\b a -> poke b a >> return (b `plusPtr` 1)) (return p))
+        (D.fromList decodeTable)
+    return p
+
+-- | Return element at the specified index without checking the bounds.
+-- and without touching the foreign ptr.
+{-# INLINE_NORMAL unsafePeekElemOff #-}
+unsafePeekElemOff :: forall a. Storable a => Ptr a -> Int -> a
+unsafePeekElemOff p i =
+    let !x = unsafeInlineIO $ peekElemOff p i
+     in x
+
+-- XXX We can use a fromPtr stream to implement it.
+{-# INLINE showMemory #-}
+showMemory ::
+    forall a. (Show a, Storable a) => Ptr a -> Ptr a -> String
+showMemory cur end
+    | cur < end =
+        let cur1 = cur `plusPtr` sizeOf (undefined :: a)
+         in show (unsafeInlineIO $ peek cur) ++ " " ++ showMemory cur1 end
+showMemory _ _ = ""
+
+-- decode is split into two separate cases to avoid branching instructions.
+-- From the higher level flow we already know which case we are in so we can
+-- call the appropriate decode function.
+--
+-- When the state is 0
+{-# INLINE decode0 #-}
+decode0 :: Ptr Word8 -> Word8 -> Tuple' DecodeState CodePoint
+decode0 table byte =
+    let !t = table `unsafePeekElemOff` fromIntegral byte
+        !codep' = (0xff `shiftR` fromIntegral t) .&. fromIntegral byte
+        !state' = table `unsafePeekElemOff` (256 + fromIntegral t)
+     in assert ((byte > 0x7f || error showByte)
+                && (state' /= 0 || error (showByte ++ showTable)))
+               (Tuple' state' codep')
+
+    where
+
+    utf8tableEnd = table `plusPtr` 364
+    showByte = "Streamly: decode0: byte: " ++ show byte
+    showTable = " table: " ++ showMemory table utf8tableEnd
+
+-- When the state is not 0
+{-# INLINE decode1 #-}
+decode1
+    :: Ptr Word8
+    -> DecodeState
+    -> CodePoint
+    -> Word8
+    -> Tuple' DecodeState CodePoint
+decode1 table state codep byte =
+    -- Remember codep is Int type!
+    -- Can it be unsafe to convert the resulting Int to Char?
+    let !t = table `unsafePeekElemOff` fromIntegral byte
+        !codep' = (fromIntegral byte .&. 0x3f) .|. (codep `shiftL` 6)
+        !state' = table `unsafePeekElemOff`
+                    (256 + fromIntegral state + fromIntegral t)
+     in assert (codep' <= 0x10FFFF
+                    || error (showByte ++ showState state codep))
+               (Tuple' state' codep')
+    where
+
+    utf8tableEnd = table `plusPtr` 364
+    showByte = "Streamly: decode1: byte: " ++ show byte
+    showState st cp =
+        " state: " ++ show st ++
+        " codepoint: " ++ show cp ++
+        " table: " ++ showMemory table utf8tableEnd
+
+-------------------------------------------------------------------------------
+-- Resumable UTF-8 decoding
+-------------------------------------------------------------------------------
+
+-- Strangely, GHCJS hangs linking template-haskell with this
+#ifndef __GHCJS__
+{-# ANN type UTF8DecodeState Fuse #-}
+#endif
+data UTF8DecodeState s a
+    = UTF8DecodeInit s
+    | UTF8DecodeInit1 s Word8
+    | UTF8DecodeFirst s Word8
+    | UTF8Decoding s !DecodeState !CodePoint
+    | YieldAndContinue a (UTF8DecodeState s a)
+    | Done
+
+{- HLINT ignore "Use if" -}
+{-# INLINE_NORMAL resumeDecodeUtf8EitherD #-}
+resumeDecodeUtf8EitherD
+    :: Monad m
+    => DecodeState
+    -> CodePoint
+    -> D.Stream m Word8
+    -> D.Stream m (Either DecodeError Char)
+resumeDecodeUtf8EitherD dst codep (D.Stream step state) =
+    let stt =
+            if dst == 0
+            then UTF8DecodeInit state
+            else UTF8Decoding state dst codep
+    in D.Stream (step' utf8d) stt
+  where
+    {-# INLINE_LATE step' #-}
+    step' _ gst (UTF8DecodeInit st) = do
+        r <- step (adaptState gst) st
+        return $ case r of
+            Yield x s -> Skip (UTF8DecodeInit1 s x)
+            Skip s -> Skip (UTF8DecodeInit s)
+            Stop   -> Skip Done
+
+    step' _ _ (UTF8DecodeInit1 st x) = do
+        -- Note: It is important to use a ">" instead of a "<=" test
+        -- here for GHC to generate code layout for default branch
+        -- prediction for the common case. This is fragile and might
+        -- change with the compiler versions, we need a more reliable
+        -- "likely" primitive to control branch predication.
+        case x > 0x7f of
+            False ->
+                return $ Skip $ YieldAndContinue
+                    (Right $ unsafeChr (fromIntegral x))
+                    (UTF8DecodeInit st)
+            -- Using a separate state here generates a jump to a
+            -- separate code block in the core which seems to perform
+            -- slightly better for the non-ascii case.
+            True -> return $ Skip $ UTF8DecodeFirst st x
+
+    -- XXX should we merge it with UTF8DecodeInit1?
+    step' table _ (UTF8DecodeFirst st x) = do
+        let (Tuple' sv cp) = decode0 table x
+        return $
+            case sv of
+                12 ->
+                    Skip $ YieldAndContinue (Left $ DecodeError 0 (fromIntegral x))
+                                            (UTF8DecodeInit st)
+                0 -> error "unreachable state"
+                _ -> Skip (UTF8Decoding st sv cp)
+
+    -- We recover by trying the new byte x a starter of a new codepoint.
+    -- XXX on error need to report the next byte "x" as well.
+    -- XXX need to use the same recovery in array decoding routine as well
+    step' table gst (UTF8Decoding st statePtr codepointPtr) = do
+        r <- step (adaptState gst) st
+        case r of
+            Yield x s -> do
+                let (Tuple' sv cp) = decode1 table statePtr codepointPtr x
+                return $
+                    case sv of
+                        0 -> Skip $ YieldAndContinue (Right $ unsafeChr cp)
+                                        (UTF8DecodeInit s)
+                        12 ->
+                            Skip $ YieldAndContinue (Left $ DecodeError statePtr codepointPtr)
+                                        (UTF8DecodeInit1 s x)
+                        _ -> Skip (UTF8Decoding s sv cp)
+            Skip s -> return $ Skip (UTF8Decoding s statePtr codepointPtr)
+            Stop -> return $ Skip $ YieldAndContinue (Left $ DecodeError statePtr codepointPtr) Done
+
+    step' _ _ (YieldAndContinue c s) = return $ Yield c s
+    step' _ _ Done = return Stop
+
+-- XXX We can use just one API, and define InitState = 0 and InitCodePoint = 0
+-- to use as starting state.
+--
+{-# INLINE_NORMAL decodeUtf8EitherD #-}
+decodeUtf8EitherD :: Monad m
+    => D.Stream m Word8 -> D.Stream m (Either DecodeError Char)
+decodeUtf8EitherD = resumeDecodeUtf8EitherD 0 0
+
+-- |
+--
+-- /Pre-release/
+{-# INLINE decodeUtf8Either #-}
+decodeUtf8Either :: Monad m
+    => Stream m Word8 -> Stream m (Either DecodeError Char)
+decodeUtf8Either = decodeUtf8EitherD
+
+-- |
+--
+-- /Pre-release/
+{-# INLINE resumeDecodeUtf8Either #-}
+resumeDecodeUtf8Either
+    :: Monad m
+    => DecodeState
+    -> CodePoint
+    -> Stream m Word8
+    -> Stream m (Either DecodeError Char)
+resumeDecodeUtf8Either = resumeDecodeUtf8EitherD
+
+-------------------------------------------------------------------------------
+-- One shot decoding
+-------------------------------------------------------------------------------
+
+data CodingFailureMode
+    = TransliterateCodingFailure
+    | ErrorOnCodingFailure
+    | DropOnCodingFailure
+    deriving (Show)
+
+{-# INLINE replacementChar #-}
+replacementChar :: Char
+replacementChar = '\xFFFD'
+
+data UTF8CharDecodeState a
+    = UTF8CharDecodeInit
+    | UTF8CharDecoding !DecodeState !CodePoint
+
+{-# INLINE parseCharUtf8WithD #-}
+parseCharUtf8WithD ::
+       Monad m => CodingFailureMode -> ParserD.Parser Word8 m Char
+parseCharUtf8WithD cfm = ParserD.Parser (step' utf8d) initial extract
+
+    where
+
+    prefix = "Streamly.Internal.Data.Stream.parseCharUtf8WithD:"
+
+    {-# INLINE initial #-}
+    initial = return $ ParserD.IPartial UTF8CharDecodeInit
+
+    handleError err souldBackTrack =
+        case cfm of
+            ErrorOnCodingFailure -> ParserD.SError err
+            TransliterateCodingFailure ->
+                case souldBackTrack of
+                    True -> ParserD.SDone 0 replacementChar
+                    False -> ParserD.SDone 1 replacementChar
+            DropOnCodingFailure ->
+                case souldBackTrack of
+                    True -> ParserD.SContinue 0 UTF8CharDecodeInit
+                    False -> ParserD.SContinue 1 UTF8CharDecodeInit
+
+    {-# INLINE step' #-}
+    step' table UTF8CharDecodeInit x =
+        -- Note: It is important to use a ">" instead of a "<=" test
+        -- here for GHC to generate code layout for default branch
+        -- prediction for the common case. This is fragile and might
+        -- change with the compiler versions, we need a more reliable
+        -- "likely" primitive to control branch predication.
+        return $ case x > 0x7f of
+            False -> ParserD.SDone 1 $ unsafeChr $ fromIntegral x
+            True ->
+                let (Tuple' sv cp) = decode0 table x
+                 in case sv of
+                        12 ->
+                            let msg = prefix
+                                    ++ "Invalid first UTF8 byte" ++ show x
+                             in handleError msg False
+                        0 -> error $ prefix ++ "unreachable state"
+                        _ -> ParserD.SContinue 1 (UTF8CharDecoding sv cp)
+
+    step' table (UTF8CharDecoding statePtr codepointPtr) x = return $
+        let (Tuple' sv cp) = decode1 table statePtr codepointPtr x
+         in case sv of
+            0 -> ParserD.SDone 1 $ unsafeChr cp
+            12 ->
+                let msg = prefix
+                        ++ "Invalid subsequent UTF8 byte"
+                        ++ show x
+                        ++ "in state"
+                        ++ show statePtr
+                        ++ "accumulated value"
+                        ++ show codepointPtr
+                 in handleError msg True
+            _ -> ParserD.SContinue 1 (UTF8CharDecoding sv cp)
+
+    {-# INLINE extract #-}
+    extract UTF8CharDecodeInit =  error $ prefix ++ "Not enough input"
+    extract (UTF8CharDecoding _ _) =
+        case cfm of
+            ErrorOnCodingFailure ->
+                return $ ParserD.FError $ prefix ++ "Not enough input"
+            TransliterateCodingFailure ->
+                return (ParserD.FDone 0 replacementChar)
+            -- XXX We shouldn't error out here. There is no way to represent an
+            -- empty parser result unless we return a "Maybe" type.
+            DropOnCodingFailure -> error $ prefix ++ "Not enough input"
+
+-- XXX This should ideally accept a "CodingFailureMode" and perform appropriate
+-- error handling. This isn't possible now as "TransliterateCodingFailure"'s
+-- workflow requires backtracking 1 element. This can be revisited once "Fold"
+-- supports backtracking.
+{-# INLINE writeCharUtf8' #-}
+writeCharUtf8' :: Monad m => Parser Word8 m Char
+writeCharUtf8' =  parseCharUtf8WithD ErrorOnCodingFailure
+
+-- XXX The initial idea was to have "parseCharUtf8" and offload the error
+-- handling to another parser. So, say we had "parseCharUtf8'",
+--
+-- >>> parseCharUtf8Smart = parseCharUtf8' <|> Parser.fromPure replacementChar
+--
+-- But unfortunately parseCharUtf8Smart used in conjunction with "parseMany" -
+-- that is "parseMany parseCharUtf8Smart" on a stream causes the heap to
+-- overflow. Even a heap size of 500 MB was not sufficient.
+--
+-- This needs to be investigated futher.
+{-# INLINE parseCharUtf8With #-}
+parseCharUtf8With ::
+       Monad m => CodingFailureMode -> Parser.Parser Word8 m Char
+parseCharUtf8With = parseCharUtf8WithD
+
+-- XXX write it as a parser and use parseMany to decode a stream, need to check
+-- if that preserves the same performance. Or we can use a resumable parser
+-- that parses a chunk at a time.
+--
+-- XXX Implement this in terms of decodeUtf8Either. Need to make sure that
+-- decodeUtf8Either preserves the performance characterstics.
+--
+-- XXX Currently this requires @-fspec-constr-recursive=16@ for best perf but
+-- it takes too much memory and compile time. Reimplement to avoid that.
+{-# INLINE_NORMAL decodeUtf8WithD #-}
+decodeUtf8WithD :: Monad m
+    => CodingFailureMode -> D.Stream m Word8 -> D.Stream m Char
+decodeUtf8WithD cfm (D.Stream step state) =
+    D.Stream (step' utf8d) (UTF8DecodeInit state)
+
+    where
+
+    prefix = "Streamly.Internal.Data.Stream.decodeUtf8With: "
+
+    {-# INLINE handleError #-}
+    handleError e s =
+        case cfm of
+            ErrorOnCodingFailure -> error e
+            TransliterateCodingFailure -> YieldAndContinue replacementChar s
+            DropOnCodingFailure -> s
+
+    {-# INLINE handleUnderflow #-}
+    handleUnderflow =
+        case cfm of
+            ErrorOnCodingFailure -> error $ prefix ++ "Not enough input"
+            TransliterateCodingFailure -> YieldAndContinue replacementChar Done
+            DropOnCodingFailure -> Done
+
+    {-# INLINE_LATE step' #-}
+    step' _ gst (UTF8DecodeInit st) = do
+        r <- step (adaptState gst) st
+        return $ case r of
+            Yield x s -> Skip (UTF8DecodeInit1 s x)
+            Skip s -> Skip (UTF8DecodeInit s)
+            Stop   -> Skip Done
+
+    step' _ _ (UTF8DecodeInit1 st x) = do
+        -- Note: It is important to use a ">" instead of a "<=" test
+        -- here for GHC to generate code layout for default branch
+        -- prediction for the common case. This is fragile and might
+        -- change with the compiler versions, we need a more reliable
+        -- "likely" primitive to control branch predication.
+        case x > 0x7f of
+            False ->
+                return $ Skip $ YieldAndContinue
+                    (unsafeChr (fromIntegral x))
+                    (UTF8DecodeInit st)
+            -- Using a separate state here generates a jump to a
+            -- separate code block in the core which seems to perform
+            -- slightly better for the non-ascii case.
+            True -> return $ Skip $ UTF8DecodeFirst st x
+
+    -- XXX should we merge it with UTF8DecodeInit1?
+    step' table _ (UTF8DecodeFirst st x) = do
+        let (Tuple' sv cp) = decode0 table x
+        return $
+            case sv of
+                12 ->
+                    let msg = prefix ++ "Invalid first UTF8 byte " ++ show x
+                     in Skip $ handleError msg (UTF8DecodeInit st)
+                0 -> error "unreachable state"
+                _ -> Skip (UTF8Decoding st sv cp)
+
+    -- We recover by trying the new byte x as a starter of a new codepoint.
+    -- XXX need to use the same recovery in array decoding routine as well
+    step' table gst (UTF8Decoding st statePtr codepointPtr) = do
+        r <- step (adaptState gst) st
+        case r of
+            Yield x s -> do
+                let (Tuple' sv cp) = decode1 table statePtr codepointPtr x
+                return $ case sv of
+                    0 -> Skip $ YieldAndContinue
+                            (unsafeChr cp) (UTF8DecodeInit s)
+                    12 ->
+                        let msg = prefix
+                                ++ "Invalid subsequent UTF8 byte "
+                                ++ show x
+                                ++ " in state "
+                                ++ show statePtr
+                                ++ " accumulated value "
+                                ++ show codepointPtr
+                         in Skip $ handleError msg (UTF8DecodeInit1 s x)
+                    _ -> Skip (UTF8Decoding s sv cp)
+            Skip s -> return $
+                Skip (UTF8Decoding s statePtr codepointPtr)
+            Stop -> return $ Skip handleUnderflow
+
+    step' _ _ (YieldAndContinue c s) = return $ Yield c s
+    step' _ _ Done = return Stop
+
+{-# INLINE decodeUtf8D #-}
+decodeUtf8D :: Monad m => D.Stream m Word8 -> D.Stream m Char
+decodeUtf8D = decodeUtf8WithD TransliterateCodingFailure
+
+-- | Decode a UTF-8 encoded bytestream to a stream of Unicode characters.
+-- Any invalid codepoint encountered is replaced with the unicode replacement
+-- character.
+--
+{-# INLINE decodeUtf8 #-}
+decodeUtf8 :: Monad m => Stream m Word8 -> Stream m Char
+decodeUtf8 = decodeUtf8D
+
+{-# INLINE decodeUtf8D' #-}
+decodeUtf8D' :: Monad m => D.Stream m Word8 -> D.Stream m Char
+decodeUtf8D' = decodeUtf8WithD ErrorOnCodingFailure
+
+-- | Decode a UTF-8 encoded bytestream to a stream of Unicode characters.
+-- The function throws an error if an invalid codepoint is encountered.
+--
+{-# INLINE decodeUtf8' #-}
+decodeUtf8' :: Monad m => Stream m Word8 -> Stream m Char
+decodeUtf8' = decodeUtf8D'
+
+{-# INLINE decodeUtf8D_ #-}
+decodeUtf8D_ :: Monad m => D.Stream m Word8 -> D.Stream m Char
+decodeUtf8D_ = decodeUtf8WithD DropOnCodingFailure
+
+-- | Decode a UTF-8 encoded bytestream to a stream of Unicode characters.
+-- Any invalid codepoint encountered is dropped.
+--
+{-# INLINE decodeUtf8_ #-}
+decodeUtf8_ :: Monad m => Stream m Word8 -> Stream m Char
+decodeUtf8_ = decodeUtf8D_
+
+-- | Same as 'decodeUtf8'
+--
+{-# DEPRECATED decodeUtf8Lax "Please use 'decodeUtf8' instead" #-}
+{-# INLINE decodeUtf8Lax #-}
+decodeUtf8Lax :: Monad m => Stream m Word8 -> Stream m Char
+decodeUtf8Lax = decodeUtf8
+
+-------------------------------------------------------------------------------
+-- Decoding Utf16
+-------------------------------------------------------------------------------
+
+{-
+data MkEvenW8ChunksState s w8 arr
+    = MECSInit s
+    | MECSBuffer w8 s
+    | MECSYieldAndInit arr s
+    | MECSYieldAndBuffer arr w8 s
+
+-- | Ensure chunks of even length. This can be used before casting the arrays to
+-- Word16. Use this API when interacting with external data.
+--
+-- The chunks are split and merged accordingly to create arrays of even length.
+-- If the sum of length of all the arrays in the stream is odd then the trailing
+-- byte of the last array is dropped.
+--
+{-# INLINE_NORMAL mkEvenW8Chunks #-}
+mkEvenW8Chunks :: Monad m => Stream m (Array Word8) -> Stream m (Array Word8)
+mkEvenW8Chunks (D.Stream step state) = D.Stream step1 (MECSInit state)
+
+    where
+
+    {-# INLINE_LATE step1 #-}
+    step1 gst (MECSInit st) = do
+        r <- step (adaptState gst) st
+        return $
+            case r of
+                Yield arr st1 ->
+                    let len = Array.length arr
+                     in if (len .&. 1) == 1
+                        then let arr1 = Array.unsafeSliceOffLen 0 (len - 1) arr
+                                 remElem = Array.unsafeGetIndex (len - 1) arr
+                              in Yield arr1 (MECSBuffer remElem st1)
+                        else Yield arr (MECSInit st1)
+                Skip s -> Skip (MECSInit s)
+                Stop -> Stop
+    step1 gst (MECSBuffer remElem st) = do
+        r <- step (adaptState gst) st
+        return $
+            case r of
+                Yield arr st1 | Array.length arr == 0 ->
+                                  Skip (MECSBuffer remElem st1)
+                Yield arr st1 | Array.length arr == 1 ->
+                    let fstElem = Array.unsafeGetIndex 0 arr
+                        w16 = Array.fromList [remElem, fstElem]
+                     in Yield w16 (MECSInit st1)
+                Yield arr st1 ->
+                    let len = Array.length arr
+                     in if (len .&. 1) == 1
+                        then let arr1 = Array.unsafeSliceOffLen 1 (len - 1) arr
+                                 fstElem = Array.unsafeGetIndex 0 arr
+                                 w16 = Array.fromList [remElem, fstElem]
+                              in Yield w16 (MECSYieldAndInit arr1 st1)
+                        else let arr1 = Array.unsafeSliceOffLen 1 (len - 2) arr
+                                 fstElem = Array.unsafeGetIndex 0 arr
+                                 lstElem = Array.unsafeGetIndex (len - 1) arr
+                                 w16 = Array.fromList [remElem, fstElem]
+                              in Yield w16
+                                     (MECSYieldAndBuffer arr1 lstElem st1)
+                Skip s -> Skip (MECSBuffer remElem s)
+                Stop -> Stop -- Here the last Word8 is lost
+    step1 _ (MECSYieldAndInit arr st) =
+        pure $ Yield arr (MECSInit st)
+    step1 _ (MECSYieldAndBuffer arr lastElem st) =
+        pure $ Yield arr (MECSBuffer lastElem st)
+-}
+
+-- | Swap the byte order of Word16
+--
+-- > swapByteOrder 0xABCD == 0xCDAB
+-- > swapByteOrder . swapByteOrder == id
+{-# INLINE swapByteOrder #-}
+swapByteOrder :: Word16 -> Word16
+swapByteOrder w = (w `shiftL` 8) .|. (w `shiftR` 8)
+
+{-# INLINE utf16LowSurrogate #-}
+utf16LowSurrogate :: Word16
+utf16LowSurrogate = 0xDC00
+
+{-# INLINE utf16HighSurrogate #-}
+utf16HighSurrogate :: Word16
+utf16HighSurrogate = 0xD800
+
+data DecodeUtf16WithState w c s
+    = U16NoSurrogate s
+    | U16HighSurrogate w s
+    | U16D
+    | U16YAndC c (DecodeUtf16WithState w c s)
+
+{-# INLINE_NORMAL decodeUtf16With #-}
+decodeUtf16With ::
+       Monad m
+    => CodingFailureMode
+    -> D.Stream m Word16
+    -> D.Stream m Char
+decodeUtf16With cfm (D.Stream step state) =
+    D.Stream step1 (U16NoSurrogate state)
+
+    where
+
+    prefix = "Streamly.Internal.Unicode.Stream.decodeUtf16With: "
+
+    {-# INLINE combineSurrogates #-}
+    combineSurrogates hi lo =
+        let first10 = fromIntegral (hi - utf16HighSurrogate) `shiftL` 10
+            second10 = fromIntegral (lo - utf16LowSurrogate)
+         in unsafeChr (0x10000 + (first10 .|. second10))
+
+    {-# INLINE transliterateOrError #-}
+    transliterateOrError e s =
+        case cfm of
+            ErrorOnCodingFailure -> error e
+            TransliterateCodingFailure -> U16YAndC replacementChar s
+            DropOnCodingFailure -> s
+
+    {-# INLINE inputUnderflow #-}
+    inputUnderflow =
+        case cfm of
+            ErrorOnCodingFailure -> error $ prefix ++ "Input Underflow"
+            TransliterateCodingFailure -> U16YAndC replacementChar U16D
+            DropOnCodingFailure -> U16D
+
+    {-# INLINE_LATE step1 #-}
+    step1 gst (U16NoSurrogate st) = do
+        r <- step (adaptState gst) st
+        pure $
+            case r of
+                Yield x st1
+                    | x < 0xD800 || x > 0xDFFF ->
+                        Yield (unsafeChr (fromIntegral x)) (U16NoSurrogate st1)
+                    | x >= 0xD800 && x <= 0xDBFF ->
+                        Skip (U16HighSurrogate x st1)
+                    | otherwise ->
+                          let msg = prefix
+                                 ++ "Invalid first UTF16 word " ++ show x
+                           in Skip $
+                              transliterateOrError msg (U16NoSurrogate st1)
+                Skip st1 -> Skip (U16NoSurrogate st1)
+                Stop -> Stop
+    step1 gst (U16HighSurrogate hi st) = do
+        r <- step (adaptState gst) st
+        pure $
+            case r of
+                Yield x st1
+                    | x >= 0xDC00 && x <= 0xDFFF ->
+                          Yield (combineSurrogates hi x) (U16NoSurrogate st1)
+                    | otherwise ->
+                          let msg = prefix
+                                 ++ "Invalid subsequent UTF16 word " ++ show x
+                                 ++ " in state " ++ show hi
+                           in Skip $
+                              transliterateOrError msg (U16NoSurrogate st1)
+                Skip st1 -> Skip (U16HighSurrogate hi st1)
+                Stop -> Skip inputUnderflow
+    step1 _ (U16YAndC x st) = pure $ Yield x st
+    step1 _ U16D = pure Stop
+
+{-# INLINE decodeUtf16' #-}
+decodeUtf16' :: Monad m => Stream m Word16 -> Stream m Char
+decodeUtf16' = decodeUtf16With ErrorOnCodingFailure
+
+{-# INLINE decodeUtf16 #-}
+decodeUtf16 :: Monad m => Stream m Word16 -> Stream m Char
+decodeUtf16 = decodeUtf16With TransliterateCodingFailure
+
+-- | Similar to 'decodeUtf16le' but throws an error if an invalid codepoint is
+-- encountered.
+--
+{-# INLINE decodeUtf16le' #-}
+decodeUtf16le' :: Monad m => Stream m Word16 -> Stream m Char
+decodeUtf16le' =
+    decodeUtf16'
+#ifdef WORDS_BIGENDIAN
+        . fmap swapByteOrder
+#endif
+
+-- | Decode a UTF-16 encoded stream to a stream of Unicode characters. Any
+-- invalid codepoint encountered is replaced with the unicode replacement
+-- character.
+--
+-- The Word16s are expected to be in the little-endian byte order.
+--
+{-# INLINE decodeUtf16le #-}
+decodeUtf16le :: Monad m => Stream m Word16 -> Stream m Char
+decodeUtf16le =
+    decodeUtf16
+#ifdef WORDS_BIGENDIAN
+        . fmap swapByteOrder
+#endif
+
+-------------------------------------------------------------------------------
+-- Decoding Array Streams
+-------------------------------------------------------------------------------
+
+#ifndef __GHCJS__
+{-# ANN type FlattenState Fuse #-}
+#endif
+data FlattenState s
+    = OuterLoop s !(Maybe (DecodeState, CodePoint))
+    | InnerLoopDecodeInit s MutByteArray !Int !Int
+    | InnerLoopDecodeFirst s MutByteArray !Int !Int Word8
+    | InnerLoopDecoding s MutByteArray !Int !Int
+        !DecodeState !CodePoint
+    | YAndC !Char (FlattenState s)   -- These constructors can be
+                                     -- encoded in the UTF8DecodeState
+                                     -- type, I prefer to keep these
+                                     -- flat even though that means
+                                     -- coming up with new names
+    | D
+
+-- The normal decodeUtf8 above should fuse with flattenArrays
+-- to create this exact code but it doesn't for some reason, as of now this
+-- remains the fastest way I could figure out to decodeUtf8.
+--
+-- XXX Add Proper error messages
+{-# INLINE_NORMAL decodeUtf8ArraysWithD #-}
+decodeUtf8ArraysWithD ::
+       MonadIO m
+    => CodingFailureMode
+    -> D.Stream m (Array Word8)
+    -> D.Stream m Char
+decodeUtf8ArraysWithD cfm (D.Stream step state) =
+    D.Stream (step' utf8d) (OuterLoop state Nothing)
+  where
+    {-# INLINE transliterateOrError #-}
+    transliterateOrError e s =
+        case cfm of
+            ErrorOnCodingFailure -> error e
+            TransliterateCodingFailure -> YAndC replacementChar s
+            DropOnCodingFailure -> s
+    {-# INLINE inputUnderflow #-}
+    inputUnderflow =
+        case cfm of
+            ErrorOnCodingFailure ->
+                error $
+                show "Streamly.Internal.Data.Stream."
+                ++ "decodeUtf8ArraysWith: Input Underflow"
+            TransliterateCodingFailure -> YAndC replacementChar D
+            DropOnCodingFailure -> D
+    {-# INLINE_LATE step' #-}
+    step' _ gst (OuterLoop st Nothing) = do
+        r <- step (adaptState gst) st
+        return $
+            case r of
+                Yield Array {..} s ->
+                     Skip (InnerLoopDecodeInit s arrContents arrStart arrEnd)
+                Skip s -> Skip (OuterLoop s Nothing)
+                Stop -> Skip D
+    step' _ gst (OuterLoop st dst@(Just (ds, cp))) = do
+        r <- step (adaptState gst) st
+        return $
+            case r of
+                Yield Array {..} s ->
+                     Skip (InnerLoopDecoding s arrContents arrStart arrEnd ds cp)
+                Skip s -> Skip (OuterLoop s dst)
+                Stop -> Skip inputUnderflow
+    step' _ _ (InnerLoopDecodeInit st _ p end)
+        | p == end = do
+            return $ Skip $ OuterLoop st Nothing
+    step' _ _ (InnerLoopDecodeInit st contents p end) = do
+        x <- liftIO $ peekAt p contents
+        -- Note: It is important to use a ">" instead of a "<=" test here for
+        -- GHC to generate code layout for default branch prediction for the
+        -- common case. This is fragile and might change with the compiler
+        -- versions, we need a more reliable "likely" primitive to control
+        -- branch predication.
+        case x > 0x7f of
+            False ->
+                return $ Skip $ YAndC
+                    (unsafeChr (fromIntegral x))
+                    (InnerLoopDecodeInit st contents (p + 1) end)
+            -- Using a separate state here generates a jump to a separate code
+            -- block in the core which seems to perform slightly better for the
+            -- non-ascii case.
+            True -> return $ Skip $ InnerLoopDecodeFirst st contents p end x
+
+    step' table _ (InnerLoopDecodeFirst st contents p end x) = do
+        let (Tuple' sv cp) = decode0 table x
+        return $
+            case sv of
+                12 ->
+                    Skip $
+                    transliterateOrError
+                        (
+                           "Streamly.Internal.Data.Stream."
+                        ++ "decodeUtf8ArraysWith: Invalid UTF8"
+                        ++ " codepoint encountered"
+                        )
+                        (InnerLoopDecodeInit st contents (p + 1) end)
+                0 -> error "unreachable state"
+                _ -> Skip (InnerLoopDecoding st contents (p + 1) end sv cp)
+    step' _ _ (InnerLoopDecoding st _ p end sv cp)
+        | p == end = return $ Skip $ OuterLoop st (Just (sv, cp))
+    step' table _ (InnerLoopDecoding st contents p end statePtr codepointPtr) = do
+        x <- liftIO $ peekAt p contents
+        let (Tuple' sv cp) = decode1 table statePtr codepointPtr x
+        return $
+            case sv of
+                0 ->
+                    Skip $
+                    YAndC
+                        (unsafeChr cp)
+                        (InnerLoopDecodeInit st contents (p + 1) end)
+                12 ->
+                    Skip $
+                    transliterateOrError
+                        (
+                           "Streamly.Internal.Data.Stream."
+                        ++ "decodeUtf8ArraysWith: Invalid UTF8"
+                        ++ " codepoint encountered"
+                        )
+                        (InnerLoopDecodeInit st contents (p + 1) end)
+                _ ->
+                    Skip
+                    (InnerLoopDecoding st contents (p + 1) end sv cp)
+    step' _ _ (YAndC c s) = return $ Yield c s
+    step' _ _ D = return Stop
+
+-- | Like 'decodeUtf8' but for a chunked stream. It may be slightly faster than
+-- flattening the stream and then decoding with 'decodeUtf8'.
+{-# INLINE decodeUtf8Chunks #-}
+decodeUtf8Chunks ::
+       MonadIO m
+    => D.Stream m (Array Word8)
+    -> D.Stream m Char
+decodeUtf8Chunks = decodeUtf8ArraysWithD TransliterateCodingFailure
+
+-- | Like 'decodeUtf8\'' but for a chunked stream. It may be slightly faster
+-- than flattening the stream and then decoding with 'decodeUtf8\''.
+{-# INLINE decodeUtf8Chunks' #-}
+decodeUtf8Chunks' ::
+       MonadIO m
+    => D.Stream m (Array Word8)
+    -> D.Stream m Char
+decodeUtf8Chunks' = decodeUtf8ArraysWithD ErrorOnCodingFailure
+
+-- | Like 'decodeUtf8_' but for a chunked stream. It may be slightly faster
+-- than flattening the stream and then decoding with 'decodeUtf8_'.
+{-# INLINE decodeUtf8Chunks_ #-}
+decodeUtf8Chunks_ ::
+       MonadIO m
+    => D.Stream m (Array Word8)
+    -> D.Stream m Char
+decodeUtf8Chunks_ = decodeUtf8ArraysWithD DropOnCodingFailure
+
+-------------------------------------------------------------------------------
+-- Decoding string literals
+-------------------------------------------------------------------------------
+
+-- XXX decodeCString#
+
+-- | Read UTF-8 encoded bytes as chars from an 'Addr#' until a 0 byte is
+-- encountered, the 0 byte is not included in the stream.
+--
+-- /Unsafe:/ The caller is responsible for safe addressing.
+--
+-- Note that this is completely safe when reading from Haskell string
+-- literals because they are guaranteed to be NULL terminated:
+--
+-- >>> Stream.fold Fold.toList (Unicode.fromStr# "Haskell"#)
+-- "Haskell"
+--
+{-# INLINE fromStr# #-}
+fromStr# :: MonadIO m => Addr# -> Stream m Char
+fromStr# addr = decodeUtf8 $ Stream.fromCString# addr
diff --git a/core/src/Streamly/Internal/Unicode/Encode.hs b/core/src/Streamly/Internal/Unicode/Encode.hs
new file mode 100644
index 0000000000..df33732d7f
--- /dev/null
+++ b/core/src/Streamly/Internal/Unicode/Encode.hs
@@ -0,0 +1,396 @@
+{-# LANGUAGE CPP #-}
+-- |
+-- Module      : Streamly.Internal.Unicode.Encode
+-- Copyright   : (c) 2018 Composewell Technologies
+--
+-- License     : BSD-3-Clause
+-- Maintainer  : streamly@composewell.com
+-- Stability   : experimental
+-- Portability : GHC
+
+module Streamly.Internal.Unicode.Encode
+    (
+    -- * Setup
+    -- | To execute the code examples provided in this module in ghci, please
+    -- run the following commands first.
+    --
+    -- $setup
+
+    -- XXX Use to/from instead of encode/decode for more compact naming.
+
+    -- * Elimination (Encoding)
+    -- ** Latin1 Encoding to Byte Stream
+      encodeLatin1
+    , encodeLatin1'
+    , encodeLatin1_
+
+    -- ** UTF-8 Encoding to Byte Stream
+    , readCharUtf8'
+    , readCharUtf8
+    , readCharUtf8_
+    , encodeUtf8
+    , encodeUtf8'
+    , encodeUtf8_
+    , encodeStrings
+
+    -- ** UTF-8 Encoding to Chunk Stream
+    -- , toUtf8Chunks
+    -- , toUtf8Chunks'
+    -- , toUtf8Chunks_
+    -- , toUtf8ChunksEndByLn
+
+    -- , toPinnedUtf8Chunks
+    -- , toPinnedUtf8Chunks'
+    -- , toPinnedUtf8Chunks_
+    -- , toPinnedUtf8ChunksEndByLn
+
+    -- ** UTF-16 Encoding to Byte Stream
+    , encodeUtf16le'
+    , encodeUtf16le
+
+    -- * StreamD UTF8 Encoding / Decoding transformations.
+    , encodeUtf8D
+    , encodeUtf8D'
+    , encodeUtf8D_
+
+    -- * Word16 Utilities
+    -- , swapByteOrder
+
+    -- * Deprecations
+    , encodeLatin1Lax
+    , encodeUtf8Lax
+    )
+where
+
+#include "inline.hs"
+
+-- MachDeps.h includes ghcautoconf.h that defines WORDS_BIGENDIAN for big endian
+-- systems.
+#include "MachDeps.h"
+
+import Control.Monad.IO.Class (MonadIO)
+import Data.Bits (shiftR, shiftL, (.|.), (.&.))
+import Data.Char (chr, ord)
+import Data.Word (Word8, Word16)
+import GHC.Base (assert)
+import GHC.IO.Encoding.Failure (isSurrogate)
+import Streamly.Internal.Data.Array.Type (Array(..))
+import Streamly.Internal.Data.Stream (Stream)
+import Streamly.Internal.Data.Stream (Step (..))
+import Streamly.Internal.Data.Unfold.Type (Unfold(..))
+
+import qualified Streamly.Data.Unfold as Unfold
+import qualified Streamly.Internal.Data.Array as Array
+import qualified Streamly.Internal.Data.Stream as Stream
+import qualified Streamly.Internal.Data.Stream as D
+
+#include "DocTestUnicodeStream.hs"
+
+-------------------------------------------------------------------------------
+-- Latin1 encoding
+-------------------------------------------------------------------------------
+
+-- | Encode a stream of Unicode characters to bytes by mapping each character
+-- to a byte in 0-255 range. Throws an error if the input stream contains
+-- characters beyond 255.
+--
+{-# INLINE encodeLatin1' #-}
+encodeLatin1' :: Monad m => Stream m Char -> Stream m Word8
+encodeLatin1' = fmap convert
+    where
+    convert c =
+        let codepoint = ord c
+        in if codepoint > 255
+           then error $ "Streamly.Unicode.encodeLatin1 invalid " ++
+                      "input char codepoint " ++ show codepoint
+           else fromIntegral codepoint
+
+-- XXX Should we instead replace the invalid chars by NUL or whitespace or some
+-- other control char? That may affect the perf a bit but may be a better
+-- behavior.
+--
+-- | Like 'encodeLatin1'' but silently maps input codepoints beyond 255 to
+-- arbitrary Latin1 chars in 0-255 range. No error or exception is thrown when
+-- such mapping occurs.
+--
+{-# INLINE encodeLatin1 #-}
+encodeLatin1 :: Monad m => Stream m Char -> Stream m Word8
+encodeLatin1 = fmap (fromIntegral . ord)
+
+-- | Like 'encodeLatin1' but drops the input characters beyond 255.
+--
+{-# INLINE encodeLatin1_ #-}
+encodeLatin1_ :: Monad m => Stream m Char -> Stream m Word8
+encodeLatin1_ = fmap (fromIntegral . ord) . Stream.filter (<= chr 255)
+
+-- | Same as 'encodeLatin1'
+--
+{-# DEPRECATED encodeLatin1Lax "Please use 'encodeLatin1' instead" #-}
+{-# INLINE encodeLatin1Lax #-}
+encodeLatin1Lax :: Monad m => Stream m Char -> Stream m Word8
+encodeLatin1Lax = encodeLatin1
+
+-------------------------------------------------------------------------------
+-- UTF-8 decoding
+-------------------------------------------------------------------------------
+
+-- Int helps in cheaper conversion from Int to Char
+type CodePoint = Int
+type DecodeState = Word8
+
+-- We can divide the errors in three general categories:
+-- * A non-starter was encountered in a begin state
+-- * A starter was encountered without completing a codepoint
+-- * The last codepoint was not complete (input underflow)
+--
+-- Need to separate resumable and non-resumable error. In case of non-resumable
+-- error we can also provide the failing byte. In case of resumable error the
+-- state can be opaque.
+--
+data DecodeError = DecodeError !DecodeState !CodePoint deriving Show
+
+-------------------------------------------------------------------------------
+-- One shot decoding
+-------------------------------------------------------------------------------
+
+data CodingFailureMode
+    = TransliterateCodingFailure
+    | ErrorOnCodingFailure
+    | DropOnCodingFailure
+    deriving (Show)
+
+-- XXX this is defined in both Encode/Decode modules.
+
+-- | Swap the byte order of Word16
+--
+-- > swapByteOrder 0xABCD == 0xCDAB
+-- > swapByteOrder . swapByteOrder == id
+{-# INLINE _swapByteOrder #-}
+_swapByteOrder :: Word16 -> Word16
+_swapByteOrder w = (w `shiftL` 8) .|. (w `shiftR` 8)
+
+-------------------------------------------------------------------------------
+-- Encoding Unicode (UTF-8) Characters
+-------------------------------------------------------------------------------
+
+data WList a = WCons !a !(WList a) | WNil
+
+-- UTF-8 primitives, Lifted from GHC.IO.Encoding.UTF8.
+
+{-# INLINE ord2 #-}
+ord2 :: Char -> (WList Word8)
+ord2 c = assert (n >= 0x80 && n <= 0x07ff) (WCons x1 (WCons x2 WNil))
+  where
+    n = ord c
+    x1 = fromIntegral $ (n `shiftR` 6) + 0xC0
+    x2 = fromIntegral $ (n .&. 0x3F) + 0x80
+
+{-# INLINE ord3 #-}
+ord3 :: Char -> (WList Word8)
+ord3 c = assert (n >= 0x0800 && n <= 0xffff) (WCons x1 (WCons x2 (WCons x3 WNil)))
+  where
+    n = ord c
+    x1 = fromIntegral $ (n `shiftR` 12) + 0xE0
+    x2 = fromIntegral $ ((n `shiftR` 6) .&. 0x3F) + 0x80
+    x3 = fromIntegral $ (n .&. 0x3F) + 0x80
+
+{-# INLINE ord4 #-}
+ord4 :: Char -> (WList Word8)
+ord4 c = assert (n >= 0x10000)  (WCons x1 (WCons x2 (WCons x3 (WCons x4 WNil))))
+  where
+    n = ord c
+    x1 = fromIntegral $ (n `shiftR` 18) + 0xF0
+    x2 = fromIntegral $ ((n `shiftR` 12) .&. 0x3F) + 0x80
+    x3 = fromIntegral $ ((n `shiftR` 6) .&. 0x3F) + 0x80
+    x4 = fromIntegral $ (n .&. 0x3F) + 0x80
+
+{-# INLINE_NORMAL readCharUtf8With #-}
+readCharUtf8With :: Monad m => (WList Word8) -> Unfold m Char Word8
+readCharUtf8With surr = Unfold step inject
+
+    where
+
+    inject c =
+        return $ case ord c of
+            x | x <= 0x7F -> fromIntegral x `WCons` WNil
+              | x <= 0x7FF -> ord2 c
+              | x <= 0xFFFF -> if isSurrogate c then surr else ord3 c
+              | otherwise -> ord4 c
+
+    {-# INLINE_LATE step #-}
+    step WNil = return Stop
+    step (WCons x xs) = return $ Yield x xs
+
+{-# INLINE_NORMAL readCharUtf8' #-}
+readCharUtf8' :: Monad m => Unfold m Char Word8
+readCharUtf8' =
+    readCharUtf8With $
+        error "Streamly.Internal.Unicode.readCharUtf8': Encountered a surrogate"
+
+-- More yield points improve performance, but I am not sure if they can cause
+-- too much code bloat or some trouble with fusion. So keeping only two yield
+-- points for now, one for the ascii chars (fast path) and one for all other
+-- paths (slow path).
+{-# INLINE_NORMAL encodeUtf8D' #-}
+encodeUtf8D' :: Monad m => D.Stream m Char -> D.Stream m Word8
+encodeUtf8D' = D.unfoldEach readCharUtf8'
+
+-- | Encode a stream of Unicode characters to a UTF-8 encoded bytestream. When
+-- any invalid character (U+D800-U+D8FF) is encountered in the input stream the
+-- function errors out.
+--
+{-# INLINE encodeUtf8' #-}
+encodeUtf8' :: Monad m => Stream m Char -> Stream m Word8
+encodeUtf8' = encodeUtf8D'
+
+{-# INLINE_NORMAL readCharUtf8 #-}
+readCharUtf8 :: Monad m => Unfold m Char Word8
+readCharUtf8 = readCharUtf8With $ WCons 239 (WCons 191 (WCons 189 WNil))
+
+-- | See section "3.9 Unicode Encoding Forms" in
+-- https://www.unicode.org/versions/Unicode13.0.0/UnicodeStandard-13.0.pdf
+--
+{-# INLINE_NORMAL encodeUtf8D #-}
+encodeUtf8D :: Monad m => D.Stream m Char -> D.Stream m Word8
+encodeUtf8D = D.unfoldEach readCharUtf8
+
+-- | Encode a stream of Unicode characters to a UTF-8 encoded bytestream. Any
+-- Invalid characters (U+D800-U+D8FF) in the input stream are replaced by the
+-- Unicode replacement character U+FFFD.
+--
+{-# INLINE encodeUtf8 #-}
+encodeUtf8 :: Monad m => Stream m Char -> Stream m Word8
+encodeUtf8 = encodeUtf8D
+
+{-# INLINE_NORMAL readCharUtf8_ #-}
+readCharUtf8_ :: Monad m => Unfold m Char Word8
+readCharUtf8_ = readCharUtf8With WNil
+
+{-# INLINE_NORMAL encodeUtf8D_ #-}
+encodeUtf8D_ :: Monad m => D.Stream m Char -> D.Stream m Word8
+encodeUtf8D_ = D.unfoldEach readCharUtf8_
+
+-- | Encode a stream of Unicode characters to a UTF-8 encoded bytestream. Any
+-- Invalid characters (U+D800-U+D8FF) in the input stream are dropped.
+--
+{-# INLINE encodeUtf8_ #-}
+encodeUtf8_ :: Monad m => Stream m Char -> Stream m Word8
+encodeUtf8_ = encodeUtf8D_
+
+-- | Same as 'encodeUtf8'
+--
+{-# DEPRECATED encodeUtf8Lax "Please use 'encodeUtf8' instead" #-}
+{-# INLINE encodeUtf8Lax #-}
+encodeUtf8Lax :: Monad m => Stream m Char -> Stream m Word8
+encodeUtf8Lax = encodeUtf8
+
+-------------------------------------------------------------------------------
+-- Encoding to Utf16
+-------------------------------------------------------------------------------
+
+{-# INLINE utf16LowSurrogate #-}
+utf16LowSurrogate :: Word16
+utf16LowSurrogate = 0xDC00
+
+{-# INLINE utf16HighSurrogate #-}
+utf16HighSurrogate :: Word16
+utf16HighSurrogate = 0xD800
+
+{-# INLINE_NORMAL readCharUtf16With #-}
+readCharUtf16With :: Monad m => WList Word16 -> Unfold m Char Word16
+readCharUtf16With invalidReplacement = Unfold step inject
+
+    where
+
+    inject c =
+        return $ case ord c of
+            x | x < 0xD800 -> fromIntegral x `WCons` WNil
+              | x > 0xDFFF && x <= 0xFFFF -> fromIntegral x `WCons` WNil
+              | x >= 0x10000 && x <= 0x10FFFF ->
+                    let u = x - 0x10000                         -- 20 bits
+                        h = utf16HighSurrogate
+                                + fromIntegral (u `shiftR` 10)  -- 10 bits
+                        l = utf16LowSurrogate
+                                + fromIntegral (u .&. 0x3FF)    -- 10 bits
+                    in WCons h $ WCons l WNil
+              | otherwise -> invalidReplacement
+
+    {-# INLINE_LATE step #-}
+    step WNil = return Stop
+    step (WCons x xs) = return $ Yield x xs
+
+{-# INLINE encodeUtf16' #-}
+encodeUtf16' :: Monad m => Stream m Char -> Stream m Word16
+encodeUtf16' = D.unfoldEach (readCharUtf16With errString)
+    where
+    errString =
+        error
+            $ "Streamly.Internal.Unicode.encodeUtf16': Encountered an \
+               invalid character"
+
+{-# INLINE encodeUtf16 #-}
+encodeUtf16 :: Monad m => Stream m Char -> Stream m Word16
+encodeUtf16 = D.unfoldEach (readCharUtf16With WNil)
+
+-- | Similar to 'encodeUtf16le' but throws an error if any invalid character is
+-- encountered.
+--
+{-# INLINE encodeUtf16le' #-}
+encodeUtf16le' :: Monad m => Stream m Char -> Stream m Word16
+encodeUtf16le' =
+#ifdef WORDS_BIGENDIAN
+    fmap _swapByteOrder .
+#endif
+        encodeUtf16'
+
+-- | Encode a stream of Unicode characters to a UTF-16 encoded stream. Any
+-- invalid characters in the input stream are replaced by the Unicode
+-- replacement character U+FFFD.
+--
+-- The resulting Word16s are encoded in little-endian byte order.
+--
+{-# INLINE encodeUtf16le #-}
+encodeUtf16le :: Monad m => Stream m Char -> Stream m Word16
+encodeUtf16le =
+#ifdef WORDS_BIGENDIAN
+    fmap _swapByteOrder .
+#endif
+        encodeUtf16
+
+-------------------------------------------------------------------------------
+-- Encode streams of containers
+-------------------------------------------------------------------------------
+
+-- | Encode a container to @Array Word8@ provided an unfold to covert it to a
+-- Char stream and an encoding function.
+--
+-- /Internal/
+{-# INLINE encodeObject #-}
+encodeObject :: MonadIO m =>
+       (Stream m Char -> Stream m Word8)
+    -> Unfold m a Char
+    -> a
+    -> m (Array Word8)
+encodeObject encode u = Stream.fold Array.create . encode . Stream.unfold u
+
+-- | Encode a stream of container objects using the supplied encoding scheme.
+-- Each object is encoded as an @Array Word8@.
+--
+-- /Internal/
+{-# INLINE encodeObjects #-}
+encodeObjects :: MonadIO m =>
+       (Stream m Char -> Stream m Word8)
+    -> Unfold m a Char
+    -> Stream m a
+    -> Stream m (Array Word8)
+encodeObjects encode u = Stream.mapM (encodeObject encode u)
+
+-- | Encode a stream of 'String' using the supplied encoding scheme. Each
+-- string is encoded as an @Array Word8@.
+--
+{-# INLINE encodeStrings #-}
+encodeStrings :: MonadIO m =>
+       (Stream m Char -> Stream m Word8)
+    -> Stream m String
+    -> Stream m (Array Word8)
+encodeStrings encode = encodeObjects encode Unfold.fromList
diff --git a/core/src/Streamly/Internal/Unicode/Stream.hs b/core/src/Streamly/Internal/Unicode/Stream.hs
index 04ca5718ec..a444b93184 100644
--- a/core/src/Streamly/Internal/Unicode/Stream.hs
+++ b/core/src/Streamly/Internal/Unicode/Stream.hs
@@ -2,7 +2,6 @@
 -- |
 -- Module      : Streamly.Internal.Unicode.Stream
 -- Copyright   : (c) 2018 Composewell Technologies
---               (c) Bjoern Hoehrmann 2008-2009
 --
 -- License     : BSD-3-Clause
 -- Maintainer  : streamly@composewell.com
@@ -18,65 +17,9 @@ module Streamly.Internal.Unicode.Stream
     --
     -- $setup
 
-    -- XXX Use to/from instead of encode/decode for more compact naming.
+      module Streamly.Internal.Unicode.Encode
+    , module Streamly.Internal.Unicode.Decode
 
-    -- * Construction (Decoding)
-      decodeLatin1
-
-    -- ** UTF-8 Byte Stream Decoding
-    , CodingFailureMode(..)
-    , writeCharUtf8'
-    , parseCharUtf8With
-    , decodeUtf8
-    , decodeUtf8'
-    , decodeUtf8_
-
-    -- ** UTF-16 Byte Stream Decoding
-    , decodeUtf16le'
-    , decodeUtf16le
-
-    -- ** Resumable UTF-8 Byte Stream Decoding
-    , DecodeError(..)
-    , DecodeState
-    , CodePoint
-    , decodeUtf8Either
-    , resumeDecodeUtf8Either
-
-    -- ** UTF-8 Array Stream Decoding
-    , decodeUtf8Chunks
-    , decodeUtf8Chunks'
-    , decodeUtf8Chunks_
-    -- , fromUtf8ChunksEndByLn
-
-    -- * Elimination (Encoding)
-    -- ** Latin1 Encoding to Byte Stream
-    , encodeLatin1
-    , encodeLatin1'
-    , encodeLatin1_
-
-    -- ** UTF-8 Encoding to Byte Stream
-    , readCharUtf8'
-    , readCharUtf8
-    , readCharUtf8_
-    , encodeUtf8
-    , encodeUtf8'
-    , encodeUtf8_
-    , encodeStrings
-
-    -- ** UTF-8 Encoding to Chunk Stream
-    -- , toUtf8Chunks
-    -- , toUtf8Chunks'
-    -- , toUtf8Chunks_
-    -- , toUtf8ChunksEndByLn
-
-    -- , toPinnedUtf8Chunks
-    -- , toPinnedUtf8Chunks'
-    -- , toPinnedUtf8Chunks_
-    -- , toPinnedUtf8ChunksEndByLn
-
-    -- ** UTF-16 Encoding to Byte Stream
-    , encodeUtf16le'
-    , encodeUtf16le
     {-
     -- * Operations on character strings
     , strip -- (dropAround isSpace)
@@ -89,1206 +32,30 @@ module Streamly.Internal.Unicode.Stream
     , words -- foldWords
     , unlines -- unfoldLines
     , unwords -- unfoldWords
-
-    -- * StreamD UTF8 Encoding / Decoding transformations.
-    , decodeUtf8D
-    , decodeUtf8D'
-    , decodeUtf8D_
-    , encodeUtf8D
-    , encodeUtf8D'
-    , encodeUtf8D_
-    , decodeUtf8EitherD
-    , resumeDecodeUtf8EitherD
-
-    -- * Decoding String Literals
-    , fromStr#
-
-    -- * Word16 Utilities
-    , mkEvenW8Chunks
-    , swapByteOrder
-
-    -- * Deprecations
-    , decodeUtf8Lax
-    , encodeLatin1Lax
-    , encodeUtf8Lax
     )
 where
 
 #include "inline.hs"
 
--- MachDeps.h includes ghcautoconf.h that defines WORDS_BIGENDIAN for big endian
--- systems.
-#include "MachDeps.h"
-
-import Control.Monad (void)
-import Control.Monad.IO.Class (MonadIO, liftIO)
-import Data.Bits (shiftR, shiftL, (.|.), (.&.))
-import Data.Char (chr, ord)
+import Control.Monad.IO.Class (MonadIO)
+import Data.Char (ord)
 #if MIN_VERSION_base(4,17,0)
 import Data.Char (generalCategory, GeneralCategory(Space))
 #endif
-import Data.Word (Word8, Word16)
-import Foreign.Marshal.Alloc (mallocBytes)
-import Foreign.Storable (Storable(..))
-#ifndef __GHCJS__
-import Fusion.Plugin.Types (Fuse(..))
-#endif
-import GHC.Base (assert, unsafeChr)
-import GHC.Exts (Addr#)
-import GHC.IO.Encoding.Failure (isSurrogate)
-import GHC.Ptr (Ptr (..), plusPtr)
-import System.IO.Unsafe (unsafePerformIO)
-import Streamly.Internal.Data.Array.Type (Array(..))
-import Streamly.Internal.Data.MutByteArray.Type (MutByteArray)
 import Streamly.Internal.Data.Fold (Fold)
-import Streamly.Internal.Data.Parser (Parser)
 import Streamly.Internal.Data.Stream (Stream)
-import Streamly.Internal.Data.Stream (Step (..))
-import Streamly.Internal.Data.SVar.Type (adaptState)
-import Streamly.Internal.Data.Tuple.Strict (Tuple'(..))
-import Streamly.Internal.Data.Unbox (Unbox(peekAt))
 import Streamly.Internal.Data.Unfold.Type (Unfold(..))
-import Streamly.Internal.System.IO (unsafeInlineIO)
 
 import qualified Streamly.Data.Fold as Fold
-import qualified Streamly.Data.Unfold as Unfold
-import qualified Streamly.Internal.Data.Array as Array
-import qualified Streamly.Internal.Data.Parser as Parser (Parser)
-import qualified Streamly.Internal.Data.Parser as ParserD
 import qualified Streamly.Internal.Data.Stream as Stream
 import qualified Streamly.Internal.Data.Stream as D
 
 import Prelude hiding (lines, words, unlines, unwords)
 
-#include "DocTestUnicodeStream.hs"
-
--------------------------------------------------------------------------------
--- Latin1 decoding
--------------------------------------------------------------------------------
-
--- | Decode a stream of bytes to Unicode characters by mapping each byte to a
--- corresponding Unicode 'Char' in 0-255 range.
---
-{-# INLINE decodeLatin1 #-}
-decodeLatin1 :: Monad m => Stream m Word8 -> Stream m Char
-decodeLatin1 = fmap (unsafeChr . fromIntegral)
-
--------------------------------------------------------------------------------
--- Latin1 encoding
--------------------------------------------------------------------------------
-
--- | Encode a stream of Unicode characters to bytes by mapping each character
--- to a byte in 0-255 range. Throws an error if the input stream contains
--- characters beyond 255.
---
-{-# INLINE encodeLatin1' #-}
-encodeLatin1' :: Monad m => Stream m Char -> Stream m Word8
-encodeLatin1' = fmap convert
-    where
-    convert c =
-        let codepoint = ord c
-        in if codepoint > 255
-           then error $ "Streamly.Unicode.encodeLatin1 invalid " ++
-                      "input char codepoint " ++ show codepoint
-           else fromIntegral codepoint
-
--- XXX Should we instead replace the invalid chars by NUL or whitespace or some
--- other control char? That may affect the perf a bit but may be a better
--- behavior.
---
--- | Like 'encodeLatin1'' but silently maps input codepoints beyond 255 to
--- arbitrary Latin1 chars in 0-255 range. No error or exception is thrown when
--- such mapping occurs.
---
-{-# INLINE encodeLatin1 #-}
-encodeLatin1 :: Monad m => Stream m Char -> Stream m Word8
-encodeLatin1 = fmap (fromIntegral . ord)
-
--- | Like 'encodeLatin1' but drops the input characters beyond 255.
---
-{-# INLINE encodeLatin1_ #-}
-encodeLatin1_ :: Monad m => Stream m Char -> Stream m Word8
-encodeLatin1_ = fmap (fromIntegral . ord) . Stream.filter (<= chr 255)
-
--- | Same as 'encodeLatin1'
---
-{-# DEPRECATED encodeLatin1Lax "Please use 'encodeLatin1' instead" #-}
-{-# INLINE encodeLatin1Lax #-}
-encodeLatin1Lax :: Monad m => Stream m Char -> Stream m Word8
-encodeLatin1Lax = encodeLatin1
-
--------------------------------------------------------------------------------
--- UTF-8 decoding
--------------------------------------------------------------------------------
-
--- Int helps in cheaper conversion from Int to Char
-type CodePoint = Int
-type DecodeState = Word8
-
--- We can divide the errors in three general categories:
--- * A non-starter was encountered in a begin state
--- * A starter was encountered without completing a codepoint
--- * The last codepoint was not complete (input underflow)
---
--- Need to separate resumable and non-resumable error. In case of non-resumable
--- error we can also provide the failing byte. In case of resumable error the
--- state can be opaque.
---
-data DecodeError = DecodeError !DecodeState !CodePoint deriving Show
-
--- See http://bjoern.hoehrmann.de/utf-8/decoder/dfa/ for details.
-
--- XXX Use names decodeSuccess = 0, decodeFailure = 12
-
-decodeTable :: [Word8]
-decodeTable = [
-   -- The first part of the table maps bytes to character classes that
-   -- to reduce the size of the transition table and create bitmasks.
-   0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-   0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-   0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-   0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-   1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,  9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,
-   7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,  7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
-   8,8,2,2,2,2,2,2,2,2,2,2,2,2,2,2,  2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
-  10,3,3,3,3,3,3,3,3,3,3,3,3,4,3,3, 11,6,6,6,5,8,8,8,8,8,8,8,8,8,8,8,
-
-   -- The second part is a transition table that maps a combination
-   -- of a state of the automaton and a character class to a state.
-   0,12,24,36,60,96,84,12,12,12,48,72, 12,12,12,12,12,12,12,12,12,12,12,12,
-  12, 0,12,12,12,12,12, 0,12, 0,12,12, 12,24,12,12,12,12,12,24,12,24,12,12,
-  12,12,12,12,12,12,12,24,12,12,12,12, 12,24,12,12,12,12,12,12,12,24,12,12,
-  12,12,12,12,12,12,12,36,12,36,12,12, 12,36,12,12,12,12,12,36,12,36,12,12,
-  12,36,12,12,12,12,12,12,12,12,12,12
-  ]
-
-{-# INLINE utf8dLength #-}
-utf8dLength :: Int
-utf8dLength = length decodeTable
-
--- | We do not want to garbage collect this and free the memory, we want to
--- keep this persistent. We don't know how to do that with GHC without having a
--- reference in some global structure. So we use a hack, use mallocBytes so
--- that the GC has no way to free it.
-{-# NOINLINE utf8d #-}
-utf8d :: Ptr Word8
-utf8d = unsafePerformIO $ do
-    let size = utf8dLength
-    p <- liftIO $ mallocBytes size
-    void $ D.fold
-        (Fold.foldlM' (\b a -> poke b a >> return (b `plusPtr` 1)) (return p))
-        (D.fromList decodeTable)
-    return p
-
--- | Return element at the specified index without checking the bounds.
--- and without touching the foreign ptr.
-{-# INLINE_NORMAL unsafePeekElemOff #-}
-unsafePeekElemOff :: forall a. Storable a => Ptr a -> Int -> a
-unsafePeekElemOff p i =
-    let !x = unsafeInlineIO $ peekElemOff p i
-     in x
-
--- XXX We can use a fromPtr stream to implement it.
-{-# INLINE showMemory #-}
-showMemory ::
-    forall a. (Show a, Storable a) => Ptr a -> Ptr a -> String
-showMemory cur end
-    | cur < end =
-        let cur1 = cur `plusPtr` sizeOf (undefined :: a)
-         in show (unsafeInlineIO $ peek cur) ++ " " ++ showMemory cur1 end
-showMemory _ _ = ""
-
--- decode is split into two separate cases to avoid branching instructions.
--- From the higher level flow we already know which case we are in so we can
--- call the appropriate decode function.
---
--- When the state is 0
-{-# INLINE decode0 #-}
-decode0 :: Ptr Word8 -> Word8 -> Tuple' DecodeState CodePoint
-decode0 table byte =
-    let !t = table `unsafePeekElemOff` fromIntegral byte
-        !codep' = (0xff `shiftR` fromIntegral t) .&. fromIntegral byte
-        !state' = table `unsafePeekElemOff` (256 + fromIntegral t)
-     in assert ((byte > 0x7f || error showByte)
-                && (state' /= 0 || error (showByte ++ showTable)))
-               (Tuple' state' codep')
-
-    where
-
-    utf8tableEnd = table `plusPtr` 364
-    showByte = "Streamly: decode0: byte: " ++ show byte
-    showTable = " table: " ++ showMemory table utf8tableEnd
-
--- When the state is not 0
-{-# INLINE decode1 #-}
-decode1
-    :: Ptr Word8
-    -> DecodeState
-    -> CodePoint
-    -> Word8
-    -> Tuple' DecodeState CodePoint
-decode1 table state codep byte =
-    -- Remember codep is Int type!
-    -- Can it be unsafe to convert the resulting Int to Char?
-    let !t = table `unsafePeekElemOff` fromIntegral byte
-        !codep' = (fromIntegral byte .&. 0x3f) .|. (codep `shiftL` 6)
-        !state' = table `unsafePeekElemOff`
-                    (256 + fromIntegral state + fromIntegral t)
-     in assert (codep' <= 0x10FFFF
-                    || error (showByte ++ showState state codep))
-               (Tuple' state' codep')
-    where
-
-    utf8tableEnd = table `plusPtr` 364
-    showByte = "Streamly: decode1: byte: " ++ show byte
-    showState st cp =
-        " state: " ++ show st ++
-        " codepoint: " ++ show cp ++
-        " table: " ++ showMemory table utf8tableEnd
-
--------------------------------------------------------------------------------
--- Resumable UTF-8 decoding
--------------------------------------------------------------------------------
-
--- Strangely, GHCJS hangs linking template-haskell with this
-#ifndef __GHCJS__
-{-# ANN type UTF8DecodeState Fuse #-}
-#endif
-data UTF8DecodeState s a
-    = UTF8DecodeInit s
-    | UTF8DecodeInit1 s Word8
-    | UTF8DecodeFirst s Word8
-    | UTF8Decoding s !DecodeState !CodePoint
-    | YieldAndContinue a (UTF8DecodeState s a)
-    | Done
-
-{-# INLINE_NORMAL resumeDecodeUtf8EitherD #-}
-resumeDecodeUtf8EitherD
-    :: Monad m
-    => DecodeState
-    -> CodePoint
-    -> D.Stream m Word8
-    -> D.Stream m (Either DecodeError Char)
-resumeDecodeUtf8EitherD dst codep (D.Stream step state) =
-    let stt =
-            if dst == 0
-            then UTF8DecodeInit state
-            else UTF8Decoding state dst codep
-    in D.Stream (step' utf8d) stt
-  where
-    {-# INLINE_LATE step' #-}
-    step' _ gst (UTF8DecodeInit st) = do
-        r <- step (adaptState gst) st
-        return $ case r of
-            Yield x s -> Skip (UTF8DecodeInit1 s x)
-            Skip s -> Skip (UTF8DecodeInit s)
-            Stop   -> Skip Done
-
-    step' _ _ (UTF8DecodeInit1 st x) = do
-        -- Note: It is important to use a ">" instead of a "<=" test
-        -- here for GHC to generate code layout for default branch
-        -- prediction for the common case. This is fragile and might
-        -- change with the compiler versions, we need a more reliable
-        -- "likely" primitive to control branch predication.
-        case x > 0x7f of
-            False ->
-                return $ Skip $ YieldAndContinue
-                    (Right $ unsafeChr (fromIntegral x))
-                    (UTF8DecodeInit st)
-            -- Using a separate state here generates a jump to a
-            -- separate code block in the core which seems to perform
-            -- slightly better for the non-ascii case.
-            True -> return $ Skip $ UTF8DecodeFirst st x
-
-    -- XXX should we merge it with UTF8DecodeInit1?
-    step' table _ (UTF8DecodeFirst st x) = do
-        let (Tuple' sv cp) = decode0 table x
-        return $
-            case sv of
-                12 ->
-                    Skip $ YieldAndContinue (Left $ DecodeError 0 (fromIntegral x))
-                                            (UTF8DecodeInit st)
-                0 -> error "unreachable state"
-                _ -> Skip (UTF8Decoding st sv cp)
-
-    -- We recover by trying the new byte x a starter of a new codepoint.
-    -- XXX on error need to report the next byte "x" as well.
-    -- XXX need to use the same recovery in array decoding routine as well
-    step' table gst (UTF8Decoding st statePtr codepointPtr) = do
-        r <- step (adaptState gst) st
-        case r of
-            Yield x s -> do
-                let (Tuple' sv cp) = decode1 table statePtr codepointPtr x
-                return $
-                    case sv of
-                        0 -> Skip $ YieldAndContinue (Right $ unsafeChr cp)
-                                        (UTF8DecodeInit s)
-                        12 ->
-                            Skip $ YieldAndContinue (Left $ DecodeError statePtr codepointPtr)
-                                        (UTF8DecodeInit1 s x)
-                        _ -> Skip (UTF8Decoding s sv cp)
-            Skip s -> return $ Skip (UTF8Decoding s statePtr codepointPtr)
-            Stop -> return $ Skip $ YieldAndContinue (Left $ DecodeError statePtr codepointPtr) Done
-
-    step' _ _ (YieldAndContinue c s) = return $ Yield c s
-    step' _ _ Done = return Stop
-
--- XXX We can use just one API, and define InitState = 0 and InitCodePoint = 0
--- to use as starting state.
---
-{-# INLINE_NORMAL decodeUtf8EitherD #-}
-decodeUtf8EitherD :: Monad m
-    => D.Stream m Word8 -> D.Stream m (Either DecodeError Char)
-decodeUtf8EitherD = resumeDecodeUtf8EitherD 0 0
-
--- |
---
--- /Pre-release/
-{-# INLINE decodeUtf8Either #-}
-decodeUtf8Either :: Monad m
-    => Stream m Word8 -> Stream m (Either DecodeError Char)
-decodeUtf8Either = decodeUtf8EitherD
-
--- |
---
--- /Pre-release/
-{-# INLINE resumeDecodeUtf8Either #-}
-resumeDecodeUtf8Either
-    :: Monad m
-    => DecodeState
-    -> CodePoint
-    -> Stream m Word8
-    -> Stream m (Either DecodeError Char)
-resumeDecodeUtf8Either = resumeDecodeUtf8EitherD
-
--------------------------------------------------------------------------------
--- One shot decoding
--------------------------------------------------------------------------------
-
-data CodingFailureMode
-    = TransliterateCodingFailure
-    | ErrorOnCodingFailure
-    | DropOnCodingFailure
-    deriving (Show)
-
-{-# INLINE replacementChar #-}
-replacementChar :: Char
-replacementChar = '\xFFFD'
-
-data UTF8CharDecodeState a
-    = UTF8CharDecodeInit
-    | UTF8CharDecoding !DecodeState !CodePoint
-
-{-# INLINE parseCharUtf8WithD #-}
-parseCharUtf8WithD ::
-       Monad m => CodingFailureMode -> ParserD.Parser Word8 m Char
-parseCharUtf8WithD cfm = ParserD.Parser (step' utf8d) initial extract
-
-    where
-
-    prefix = "Streamly.Internal.Data.Stream.parseCharUtf8WithD:"
-
-    {-# INLINE initial #-}
-    initial = return $ ParserD.IPartial UTF8CharDecodeInit
-
-    handleError err souldBackTrack =
-        case cfm of
-            ErrorOnCodingFailure -> ParserD.SError err
-            TransliterateCodingFailure ->
-                case souldBackTrack of
-                    True -> ParserD.SDone 0 replacementChar
-                    False -> ParserD.SDone 1 replacementChar
-            DropOnCodingFailure ->
-                case souldBackTrack of
-                    True -> ParserD.SContinue 0 UTF8CharDecodeInit
-                    False -> ParserD.SContinue 1 UTF8CharDecodeInit
-
-    {-# INLINE step' #-}
-    step' table UTF8CharDecodeInit x =
-        -- Note: It is important to use a ">" instead of a "<=" test
-        -- here for GHC to generate code layout for default branch
-        -- prediction for the common case. This is fragile and might
-        -- change with the compiler versions, we need a more reliable
-        -- "likely" primitive to control branch predication.
-        return $ case x > 0x7f of
-            False -> ParserD.SDone 1 $ unsafeChr $ fromIntegral x
-            True ->
-                let (Tuple' sv cp) = decode0 table x
-                 in case sv of
-                        12 ->
-                            let msg = prefix
-                                    ++ "Invalid first UTF8 byte" ++ show x
-                             in handleError msg False
-                        0 -> error $ prefix ++ "unreachable state"
-                        _ -> ParserD.SContinue 1 (UTF8CharDecoding sv cp)
-
-    step' table (UTF8CharDecoding statePtr codepointPtr) x = return $
-        let (Tuple' sv cp) = decode1 table statePtr codepointPtr x
-         in case sv of
-            0 -> ParserD.SDone 1 $ unsafeChr cp
-            12 ->
-                let msg = prefix
-                        ++ "Invalid subsequent UTF8 byte"
-                        ++ show x
-                        ++ "in state"
-                        ++ show statePtr
-                        ++ "accumulated value"
-                        ++ show codepointPtr
-                 in handleError msg True
-            _ -> ParserD.SContinue 1 (UTF8CharDecoding sv cp)
-
-    {-# INLINE extract #-}
-    extract UTF8CharDecodeInit =  error $ prefix ++ "Not enough input"
-    extract (UTF8CharDecoding _ _) =
-        case cfm of
-            ErrorOnCodingFailure ->
-                return $ ParserD.FError $ prefix ++ "Not enough input"
-            TransliterateCodingFailure ->
-                return (ParserD.FDone 0 replacementChar)
-            -- XXX We shouldn't error out here. There is no way to represent an
-            -- empty parser result unless we return a "Maybe" type.
-            DropOnCodingFailure -> error $ prefix ++ "Not enough input"
-
--- XXX This should ideally accept a "CodingFailureMode" and perform appropriate
--- error handling. This isn't possible now as "TransliterateCodingFailure"'s
--- workflow requires backtracking 1 element. This can be revisited once "Fold"
--- supports backtracking.
-{-# INLINE writeCharUtf8' #-}
-writeCharUtf8' :: Monad m => Parser Word8 m Char
-writeCharUtf8' =  parseCharUtf8WithD ErrorOnCodingFailure
-
--- XXX The initial idea was to have "parseCharUtf8" and offload the error
--- handling to another parser. So, say we had "parseCharUtf8'",
---
--- >>> parseCharUtf8Smart = parseCharUtf8' <|> Parser.fromPure replacementChar
---
--- But unfortunately parseCharUtf8Smart used in conjunction with "parseMany" -
--- that is "parseMany parseCharUtf8Smart" on a stream causes the heap to
--- overflow. Even a heap size of 500 MB was not sufficient.
---
--- This needs to be investigated futher.
-{-# INLINE parseCharUtf8With #-}
-parseCharUtf8With ::
-       Monad m => CodingFailureMode -> Parser.Parser Word8 m Char
-parseCharUtf8With = parseCharUtf8WithD
-
--- XXX write it as a parser and use parseMany to decode a stream, need to check
--- if that preserves the same performance. Or we can use a resumable parser
--- that parses a chunk at a time.
---
--- XXX Implement this in terms of decodeUtf8Either. Need to make sure that
--- decodeUtf8Either preserves the performance characterstics.
---
-{-# INLINE_NORMAL decodeUtf8WithD #-}
-decodeUtf8WithD :: Monad m
-    => CodingFailureMode -> D.Stream m Word8 -> D.Stream m Char
-decodeUtf8WithD cfm (D.Stream step state) =
-    D.Stream (step' utf8d) (UTF8DecodeInit state)
-
-    where
-
-    prefix = "Streamly.Internal.Data.Stream.decodeUtf8With: "
-
-    {-# INLINE handleError #-}
-    handleError e s =
-        case cfm of
-            ErrorOnCodingFailure -> error e
-            TransliterateCodingFailure -> YieldAndContinue replacementChar s
-            DropOnCodingFailure -> s
-
-    {-# INLINE handleUnderflow #-}
-    handleUnderflow =
-        case cfm of
-            ErrorOnCodingFailure -> error $ prefix ++ "Not enough input"
-            TransliterateCodingFailure -> YieldAndContinue replacementChar Done
-            DropOnCodingFailure -> Done
-
-    {-# INLINE_LATE step' #-}
-    step' _ gst (UTF8DecodeInit st) = do
-        r <- step (adaptState gst) st
-        return $ case r of
-            Yield x s -> Skip (UTF8DecodeInit1 s x)
-            Skip s -> Skip (UTF8DecodeInit s)
-            Stop   -> Skip Done
-
-    step' _ _ (UTF8DecodeInit1 st x) = do
-        -- Note: It is important to use a ">" instead of a "<=" test
-        -- here for GHC to generate code layout for default branch
-        -- prediction for the common case. This is fragile and might
-        -- change with the compiler versions, we need a more reliable
-        -- "likely" primitive to control branch predication.
-        case x > 0x7f of
-            False ->
-                return $ Skip $ YieldAndContinue
-                    (unsafeChr (fromIntegral x))
-                    (UTF8DecodeInit st)
-            -- Using a separate state here generates a jump to a
-            -- separate code block in the core which seems to perform
-            -- slightly better for the non-ascii case.
-            True -> return $ Skip $ UTF8DecodeFirst st x
-
-    -- XXX should we merge it with UTF8DecodeInit1?
-    step' table _ (UTF8DecodeFirst st x) = do
-        let (Tuple' sv cp) = decode0 table x
-        return $
-            case sv of
-                12 ->
-                    let msg = prefix ++ "Invalid first UTF8 byte " ++ show x
-                     in Skip $ handleError msg (UTF8DecodeInit st)
-                0 -> error "unreachable state"
-                _ -> Skip (UTF8Decoding st sv cp)
-
-    -- We recover by trying the new byte x as a starter of a new codepoint.
-    -- XXX need to use the same recovery in array decoding routine as well
-    step' table gst (UTF8Decoding st statePtr codepointPtr) = do
-        r <- step (adaptState gst) st
-        case r of
-            Yield x s -> do
-                let (Tuple' sv cp) = decode1 table statePtr codepointPtr x
-                return $ case sv of
-                    0 -> Skip $ YieldAndContinue
-                            (unsafeChr cp) (UTF8DecodeInit s)
-                    12 ->
-                        let msg = prefix
-                                ++ "Invalid subsequent UTF8 byte "
-                                ++ show x
-                                ++ " in state "
-                                ++ show statePtr
-                                ++ " accumulated value "
-                                ++ show codepointPtr
-                         in Skip $ handleError msg (UTF8DecodeInit1 s x)
-                    _ -> Skip (UTF8Decoding s sv cp)
-            Skip s -> return $
-                Skip (UTF8Decoding s statePtr codepointPtr)
-            Stop -> return $ Skip handleUnderflow
-
-    step' _ _ (YieldAndContinue c s) = return $ Yield c s
-    step' _ _ Done = return Stop
-
-{-# INLINE decodeUtf8D #-}
-decodeUtf8D :: Monad m => D.Stream m Word8 -> D.Stream m Char
-decodeUtf8D = decodeUtf8WithD TransliterateCodingFailure
-
--- | Decode a UTF-8 encoded bytestream to a stream of Unicode characters.
--- Any invalid codepoint encountered is replaced with the unicode replacement
--- character.
---
-{-# INLINE decodeUtf8 #-}
-decodeUtf8 :: Monad m => Stream m Word8 -> Stream m Char
-decodeUtf8 = decodeUtf8D
-
-{-# INLINE decodeUtf8D' #-}
-decodeUtf8D' :: Monad m => D.Stream m Word8 -> D.Stream m Char
-decodeUtf8D' = decodeUtf8WithD ErrorOnCodingFailure
-
--- | Decode a UTF-8 encoded bytestream to a stream of Unicode characters.
--- The function throws an error if an invalid codepoint is encountered.
---
-{-# INLINE decodeUtf8' #-}
-decodeUtf8' :: Monad m => Stream m Word8 -> Stream m Char
-decodeUtf8' = decodeUtf8D'
+import Streamly.Internal.Unicode.Encode
+import Streamly.Internal.Unicode.Decode
 
-{-# INLINE decodeUtf8D_ #-}
-decodeUtf8D_ :: Monad m => D.Stream m Word8 -> D.Stream m Char
-decodeUtf8D_ = decodeUtf8WithD DropOnCodingFailure
-
--- | Decode a UTF-8 encoded bytestream to a stream of Unicode characters.
--- Any invalid codepoint encountered is dropped.
---
-{-# INLINE decodeUtf8_ #-}
-decodeUtf8_ :: Monad m => Stream m Word8 -> Stream m Char
-decodeUtf8_ = decodeUtf8D_
-
--- | Same as 'decodeUtf8'
---
-{-# DEPRECATED decodeUtf8Lax "Please use 'decodeUtf8' instead" #-}
-{-# INLINE decodeUtf8Lax #-}
-decodeUtf8Lax :: Monad m => Stream m Word8 -> Stream m Char
-decodeUtf8Lax = decodeUtf8
-
--------------------------------------------------------------------------------
--- Decoding Utf16
--------------------------------------------------------------------------------
-
-data MkEvenW8ChunksState s w8 arr
-    = MECSInit s
-    | MECSBuffer w8 s
-    | MECSYieldAndInit arr s
-    | MECSYieldAndBuffer arr w8 s
-
--- | Ensure chunks of even length. This can be used before casting the arrays to
--- Word16. Use this API when interacting with external data.
---
--- The chunks are split and merged accordingly to create arrays of even length.
--- If the sum of length of all the arrays in the stream is odd then the trailing
--- byte of the last array is dropped.
---
-{-# INLINE_NORMAL mkEvenW8Chunks #-}
-mkEvenW8Chunks :: Monad m => Stream m (Array Word8) -> Stream m (Array Word8)
-mkEvenW8Chunks (D.Stream step state) = D.Stream step1 (MECSInit state)
-
-    where
-
-    {-# INLINE_LATE step1 #-}
-    step1 gst (MECSInit st) = do
-        r <- step (adaptState gst) st
-        return $
-            case r of
-                Yield arr st1 ->
-                    let len = Array.length arr
-                     in if (len .&. 1) == 1
-                        then let arr1 = Array.unsafeSliceOffLen 0 (len - 1) arr
-                                 remElem = Array.unsafeGetIndex (len - 1) arr
-                              in Yield arr1 (MECSBuffer remElem st1)
-                        else Yield arr (MECSInit st1)
-                Skip s -> Skip (MECSInit s)
-                Stop -> Stop
-    step1 gst (MECSBuffer remElem st) = do
-        r <- step (adaptState gst) st
-        return $
-            case r of
-                Yield arr st1 | Array.length arr == 0 ->
-                                  Skip (MECSBuffer remElem st1)
-                Yield arr st1 | Array.length arr == 1 ->
-                    let fstElem = Array.unsafeGetIndex 0 arr
-                        w16 = Array.fromList [remElem, fstElem]
-                     in Yield w16 (MECSInit st1)
-                Yield arr st1 ->
-                    let len = Array.length arr
-                     in if (len .&. 1) == 1
-                        then let arr1 = Array.unsafeSliceOffLen 1 (len - 1) arr
-                                 fstElem = Array.unsafeGetIndex 0 arr
-                                 w16 = Array.fromList [remElem, fstElem]
-                              in Yield w16 (MECSYieldAndInit arr1 st1)
-                        else let arr1 = Array.unsafeSliceOffLen 1 (len - 2) arr
-                                 fstElem = Array.unsafeGetIndex 0 arr
-                                 lstElem = Array.unsafeGetIndex (len - 1) arr
-                                 w16 = Array.fromList [remElem, fstElem]
-                              in Yield w16
-                                     (MECSYieldAndBuffer arr1 lstElem st1)
-                Skip s -> Skip (MECSBuffer remElem s)
-                Stop -> Stop -- Here the last Word8 is lost
-    step1 _ (MECSYieldAndInit arr st) =
-        pure $ Yield arr (MECSInit st)
-    step1 _ (MECSYieldAndBuffer arr lastElem st) =
-        pure $ Yield arr (MECSBuffer lastElem st)
-
--- | Swap the byte order of Word16
---
--- > swapByteOrder 0xABCD == 0xCDAB
--- > swapByteOrder . swapByteOrder == id
-{-# INLINE swapByteOrder #-}
-swapByteOrder :: Word16 -> Word16
-swapByteOrder w = (w `shiftL` 8) .|. (w `shiftR` 8)
-
-data DecodeUtf16WithState w c s
-    = U16NoSurrogate s
-    | U16HighSurrogate w s
-    | U16D
-    | U16YAndC c (DecodeUtf16WithState w c s)
-
-{-# INLINE_NORMAL decodeUtf16With #-}
-decodeUtf16With ::
-       Monad m
-    => CodingFailureMode
-    -> D.Stream m Word16
-    -> D.Stream m Char
-decodeUtf16With cfm (D.Stream step state) =
-    D.Stream step1 (U16NoSurrogate state)
-
-    where
-
-    prefix = "Streamly.Internal.Unicode.Stream.decodeUtf16With: "
-
-    {-# INLINE combineSurrogates #-}
-    combineSurrogates hi lo =
-        let first10 = fromIntegral (hi - utf16HighSurrogate) `shiftL` 10
-            second10 = fromIntegral (lo - utf16LowSurrogate)
-         in unsafeChr (0x10000 + (first10 .|. second10))
-
-    {-# INLINE transliterateOrError #-}
-    transliterateOrError e s =
-        case cfm of
-            ErrorOnCodingFailure -> error e
-            TransliterateCodingFailure -> U16YAndC replacementChar s
-            DropOnCodingFailure -> s
-
-    {-# INLINE inputUnderflow #-}
-    inputUnderflow =
-        case cfm of
-            ErrorOnCodingFailure -> error $ prefix ++ "Input Underflow"
-            TransliterateCodingFailure -> U16YAndC replacementChar U16D
-            DropOnCodingFailure -> U16D
-
-    {-# INLINE_LATE step1 #-}
-    step1 gst (U16NoSurrogate st) = do
-        r <- step (adaptState gst) st
-        pure $
-            case r of
-                Yield x st1
-                    | x < 0xD800 || x > 0xDFFF ->
-                        Yield (unsafeChr (fromIntegral x)) (U16NoSurrogate st1)
-                    | x >= 0xD800 && x <= 0xDBFF ->
-                        Skip (U16HighSurrogate x st1)
-                    | otherwise ->
-                          let msg = prefix
-                                 ++ "Invalid first UTF16 word " ++ show x
-                           in Skip $
-                              transliterateOrError msg (U16NoSurrogate st1)
-                Skip st1 -> Skip (U16NoSurrogate st1)
-                Stop -> Stop
-    step1 gst (U16HighSurrogate hi st) = do
-        r <- step (adaptState gst) st
-        pure $
-            case r of
-                Yield x st1
-                    | x >= 0xDC00 && x <= 0xDFFF ->
-                          Yield (combineSurrogates hi x) (U16NoSurrogate st1)
-                    | otherwise ->
-                          let msg = prefix
-                                 ++ "Invalid subsequent UTF16 word " ++ show x
-                                 ++ " in state " ++ show hi
-                           in Skip $
-                              transliterateOrError msg (U16NoSurrogate st1)
-                Skip st1 -> Skip (U16HighSurrogate hi st1)
-                Stop -> Skip inputUnderflow
-    step1 _ (U16YAndC x st) = pure $ Yield x st
-    step1 _ U16D = pure Stop
-
-{-# INLINE decodeUtf16' #-}
-decodeUtf16' :: Monad m => Stream m Word16 -> Stream m Char
-decodeUtf16' = decodeUtf16With ErrorOnCodingFailure
-
-{-# INLINE decodeUtf16 #-}
-decodeUtf16 :: Monad m => Stream m Word16 -> Stream m Char
-decodeUtf16 = decodeUtf16With TransliterateCodingFailure
-
--- | Similar to 'decodeUtf16le' but throws an error if an invalid codepoint is
--- encountered.
---
-{-# INLINE decodeUtf16le' #-}
-decodeUtf16le' :: Monad m => Stream m Word16 -> Stream m Char
-decodeUtf16le' =
-    decodeUtf16'
-#ifdef WORDS_BIGENDIAN
-        . fmap swapByteOrder
-#endif
-
--- | Decode a UTF-16 encoded stream to a stream of Unicode characters. Any
--- invalid codepoint encountered is replaced with the unicode replacement
--- character.
---
--- The Word16s are expected to be in the little-endian byte order.
---
-{-# INLINE decodeUtf16le #-}
-decodeUtf16le :: Monad m => Stream m Word16 -> Stream m Char
-decodeUtf16le =
-    decodeUtf16
-#ifdef WORDS_BIGENDIAN
-        . fmap swapByteOrder
-#endif
-
--------------------------------------------------------------------------------
--- Decoding Array Streams
--------------------------------------------------------------------------------
-
-#ifndef __GHCJS__
-{-# ANN type FlattenState Fuse #-}
-#endif
-data FlattenState s
-    = OuterLoop s !(Maybe (DecodeState, CodePoint))
-    | InnerLoopDecodeInit s MutByteArray !Int !Int
-    | InnerLoopDecodeFirst s MutByteArray !Int !Int Word8
-    | InnerLoopDecoding s MutByteArray !Int !Int
-        !DecodeState !CodePoint
-    | YAndC !Char (FlattenState s)   -- These constructors can be
-                                     -- encoded in the UTF8DecodeState
-                                     -- type, I prefer to keep these
-                                     -- flat even though that means
-                                     -- coming up with new names
-    | D
-
--- The normal decodeUtf8 above should fuse with flattenArrays
--- to create this exact code but it doesn't for some reason, as of now this
--- remains the fastest way I could figure out to decodeUtf8.
---
--- XXX Add Proper error messages
-{-# INLINE_NORMAL decodeUtf8ArraysWithD #-}
-decodeUtf8ArraysWithD ::
-       MonadIO m
-    => CodingFailureMode
-    -> D.Stream m (Array Word8)
-    -> D.Stream m Char
-decodeUtf8ArraysWithD cfm (D.Stream step state) =
-    D.Stream (step' utf8d) (OuterLoop state Nothing)
-  where
-    {-# INLINE transliterateOrError #-}
-    transliterateOrError e s =
-        case cfm of
-            ErrorOnCodingFailure -> error e
-            TransliterateCodingFailure -> YAndC replacementChar s
-            DropOnCodingFailure -> s
-    {-# INLINE inputUnderflow #-}
-    inputUnderflow =
-        case cfm of
-            ErrorOnCodingFailure ->
-                error $
-                show "Streamly.Internal.Data.Stream."
-                ++ "decodeUtf8ArraysWith: Input Underflow"
-            TransliterateCodingFailure -> YAndC replacementChar D
-            DropOnCodingFailure -> D
-    {-# INLINE_LATE step' #-}
-    step' _ gst (OuterLoop st Nothing) = do
-        r <- step (adaptState gst) st
-        return $
-            case r of
-                Yield Array {..} s ->
-                     Skip (InnerLoopDecodeInit s arrContents arrStart arrEnd)
-                Skip s -> Skip (OuterLoop s Nothing)
-                Stop -> Skip D
-    step' _ gst (OuterLoop st dst@(Just (ds, cp))) = do
-        r <- step (adaptState gst) st
-        return $
-            case r of
-                Yield Array {..} s ->
-                     Skip (InnerLoopDecoding s arrContents arrStart arrEnd ds cp)
-                Skip s -> Skip (OuterLoop s dst)
-                Stop -> Skip inputUnderflow
-    step' _ _ (InnerLoopDecodeInit st _ p end)
-        | p == end = do
-            return $ Skip $ OuterLoop st Nothing
-    step' _ _ (InnerLoopDecodeInit st contents p end) = do
-        x <- liftIO $ peekAt p contents
-        -- Note: It is important to use a ">" instead of a "<=" test here for
-        -- GHC to generate code layout for default branch prediction for the
-        -- common case. This is fragile and might change with the compiler
-        -- versions, we need a more reliable "likely" primitive to control
-        -- branch predication.
-        case x > 0x7f of
-            False ->
-                return $ Skip $ YAndC
-                    (unsafeChr (fromIntegral x))
-                    (InnerLoopDecodeInit st contents (p + 1) end)
-            -- Using a separate state here generates a jump to a separate code
-            -- block in the core which seems to perform slightly better for the
-            -- non-ascii case.
-            True -> return $ Skip $ InnerLoopDecodeFirst st contents p end x
-
-    step' table _ (InnerLoopDecodeFirst st contents p end x) = do
-        let (Tuple' sv cp) = decode0 table x
-        return $
-            case sv of
-                12 ->
-                    Skip $
-                    transliterateOrError
-                        (
-                           "Streamly.Internal.Data.Stream."
-                        ++ "decodeUtf8ArraysWith: Invalid UTF8"
-                        ++ " codepoint encountered"
-                        )
-                        (InnerLoopDecodeInit st contents (p + 1) end)
-                0 -> error "unreachable state"
-                _ -> Skip (InnerLoopDecoding st contents (p + 1) end sv cp)
-    step' _ _ (InnerLoopDecoding st _ p end sv cp)
-        | p == end = return $ Skip $ OuterLoop st (Just (sv, cp))
-    step' table _ (InnerLoopDecoding st contents p end statePtr codepointPtr) = do
-        x <- liftIO $ peekAt p contents
-        let (Tuple' sv cp) = decode1 table statePtr codepointPtr x
-        return $
-            case sv of
-                0 ->
-                    Skip $
-                    YAndC
-                        (unsafeChr cp)
-                        (InnerLoopDecodeInit st contents (p + 1) end)
-                12 ->
-                    Skip $
-                    transliterateOrError
-                        (
-                           "Streamly.Internal.Data.Stream."
-                        ++ "decodeUtf8ArraysWith: Invalid UTF8"
-                        ++ " codepoint encountered"
-                        )
-                        (InnerLoopDecodeInit st contents (p + 1) end)
-                _ ->
-                    Skip
-                    (InnerLoopDecoding st contents (p + 1) end sv cp)
-    step' _ _ (YAndC c s) = return $ Yield c s
-    step' _ _ D = return Stop
-
--- | Like 'decodeUtf8' but for a chunked stream. It may be slightly faster than
--- flattening the stream and then decoding with 'decodeUtf8'.
-{-# INLINE decodeUtf8Chunks #-}
-decodeUtf8Chunks ::
-       MonadIO m
-    => D.Stream m (Array Word8)
-    -> D.Stream m Char
-decodeUtf8Chunks = decodeUtf8ArraysWithD TransliterateCodingFailure
-
--- | Like 'decodeUtf8\'' but for a chunked stream. It may be slightly faster
--- than flattening the stream and then decoding with 'decodeUtf8\''.
-{-# INLINE decodeUtf8Chunks' #-}
-decodeUtf8Chunks' ::
-       MonadIO m
-    => D.Stream m (Array Word8)
-    -> D.Stream m Char
-decodeUtf8Chunks' = decodeUtf8ArraysWithD ErrorOnCodingFailure
-
--- | Like 'decodeUtf8_' but for a chunked stream. It may be slightly faster
--- than flattening the stream and then decoding with 'decodeUtf8_'.
-{-# INLINE decodeUtf8Chunks_ #-}
-decodeUtf8Chunks_ ::
-       MonadIO m
-    => D.Stream m (Array Word8)
-    -> D.Stream m Char
-decodeUtf8Chunks_ = decodeUtf8ArraysWithD DropOnCodingFailure
-
--------------------------------------------------------------------------------
--- Encoding Unicode (UTF-8) Characters
--------------------------------------------------------------------------------
-
-data WList a = WCons !a !(WList a) | WNil
-
--- UTF-8 primitives, Lifted from GHC.IO.Encoding.UTF8.
-
-{-# INLINE ord2 #-}
-ord2 :: Char -> (WList Word8)
-ord2 c = assert (n >= 0x80 && n <= 0x07ff) (WCons x1 (WCons x2 WNil))
-  where
-    n = ord c
-    x1 = fromIntegral $ (n `shiftR` 6) + 0xC0
-    x2 = fromIntegral $ (n .&. 0x3F) + 0x80
-
-{-# INLINE ord3 #-}
-ord3 :: Char -> (WList Word8)
-ord3 c = assert (n >= 0x0800 && n <= 0xffff) (WCons x1 (WCons x2 (WCons x3 WNil)))
-  where
-    n = ord c
-    x1 = fromIntegral $ (n `shiftR` 12) + 0xE0
-    x2 = fromIntegral $ ((n `shiftR` 6) .&. 0x3F) + 0x80
-    x3 = fromIntegral $ (n .&. 0x3F) + 0x80
-
-{-# INLINE ord4 #-}
-ord4 :: Char -> (WList Word8)
-ord4 c = assert (n >= 0x10000)  (WCons x1 (WCons x2 (WCons x3 (WCons x4 WNil))))
-  where
-    n = ord c
-    x1 = fromIntegral $ (n `shiftR` 18) + 0xF0
-    x2 = fromIntegral $ ((n `shiftR` 12) .&. 0x3F) + 0x80
-    x3 = fromIntegral $ ((n `shiftR` 6) .&. 0x3F) + 0x80
-    x4 = fromIntegral $ (n .&. 0x3F) + 0x80
-
-{-# INLINE_NORMAL readCharUtf8With #-}
-readCharUtf8With :: Monad m => (WList Word8) -> Unfold m Char Word8
-readCharUtf8With surr = Unfold step inject
-
-    where
-
-    inject c =
-        return $ case ord c of
-            x | x <= 0x7F -> fromIntegral x `WCons` WNil
-              | x <= 0x7FF -> ord2 c
-              | x <= 0xFFFF -> if isSurrogate c then surr else ord3 c
-              | otherwise -> ord4 c
-
-    {-# INLINE_LATE step #-}
-    step WNil = return Stop
-    step (WCons x xs) = return $ Yield x xs
-
-{-# INLINE_NORMAL readCharUtf8' #-}
-readCharUtf8' :: Monad m => Unfold m Char Word8
-readCharUtf8' =
-    readCharUtf8With $
-        error "Streamly.Internal.Unicode.readCharUtf8': Encountered a surrogate"
-
--- More yield points improve performance, but I am not sure if they can cause
--- too much code bloat or some trouble with fusion. So keeping only two yield
--- points for now, one for the ascii chars (fast path) and one for all other
--- paths (slow path).
-{-# INLINE_NORMAL encodeUtf8D' #-}
-encodeUtf8D' :: Monad m => D.Stream m Char -> D.Stream m Word8
-encodeUtf8D' = D.unfoldEach readCharUtf8'
-
--- | Encode a stream of Unicode characters to a UTF-8 encoded bytestream. When
--- any invalid character (U+D800-U+D8FF) is encountered in the input stream the
--- function errors out.
---
-{-# INLINE encodeUtf8' #-}
-encodeUtf8' :: Monad m => Stream m Char -> Stream m Word8
-encodeUtf8' = encodeUtf8D'
-
-{-# INLINE_NORMAL readCharUtf8 #-}
-readCharUtf8 :: Monad m => Unfold m Char Word8
-readCharUtf8 = readCharUtf8With $ WCons 239 (WCons 191 (WCons 189 WNil))
-
--- | See section "3.9 Unicode Encoding Forms" in
--- https://www.unicode.org/versions/Unicode13.0.0/UnicodeStandard-13.0.pdf
---
-{-# INLINE_NORMAL encodeUtf8D #-}
-encodeUtf8D :: Monad m => D.Stream m Char -> D.Stream m Word8
-encodeUtf8D = D.unfoldEach readCharUtf8
-
--- | Encode a stream of Unicode characters to a UTF-8 encoded bytestream. Any
--- Invalid characters (U+D800-U+D8FF) in the input stream are replaced by the
--- Unicode replacement character U+FFFD.
---
-{-# INLINE encodeUtf8 #-}
-encodeUtf8 :: Monad m => Stream m Char -> Stream m Word8
-encodeUtf8 = encodeUtf8D
-
-{-# INLINE_NORMAL readCharUtf8_ #-}
-readCharUtf8_ :: Monad m => Unfold m Char Word8
-readCharUtf8_ = readCharUtf8With WNil
-
-{-# INLINE_NORMAL encodeUtf8D_ #-}
-encodeUtf8D_ :: Monad m => D.Stream m Char -> D.Stream m Word8
-encodeUtf8D_ = D.unfoldEach readCharUtf8_
-
--- | Encode a stream of Unicode characters to a UTF-8 encoded bytestream. Any
--- Invalid characters (U+D800-U+D8FF) in the input stream are dropped.
---
-{-# INLINE encodeUtf8_ #-}
-encodeUtf8_ :: Monad m => Stream m Char -> Stream m Word8
-encodeUtf8_ = encodeUtf8D_
-
--- | Same as 'encodeUtf8'
---
-{-# DEPRECATED encodeUtf8Lax "Please use 'encodeUtf8' instead" #-}
-{-# INLINE encodeUtf8Lax #-}
-encodeUtf8Lax :: Monad m => Stream m Char -> Stream m Word8
-encodeUtf8Lax = encodeUtf8
-
--------------------------------------------------------------------------------
--- Encoding to Utf16
--------------------------------------------------------------------------------
-
-{-# INLINE utf16LowSurrogate #-}
-utf16LowSurrogate :: Word16
-utf16LowSurrogate = 0xDC00
-
-{-# INLINE utf16HighSurrogate #-}
-utf16HighSurrogate :: Word16
-utf16HighSurrogate = 0xD800
-
-{-# INLINE_NORMAL readCharUtf16With #-}
-readCharUtf16With :: Monad m => WList Word16 -> Unfold m Char Word16
-readCharUtf16With invalidReplacement = Unfold step inject
-
-    where
-
-    inject c =
-        return $ case ord c of
-            x | x < 0xD800 -> fromIntegral x `WCons` WNil
-              | x > 0xDFFF && x <= 0xFFFF -> fromIntegral x `WCons` WNil
-              | x >= 0x10000 && x <= 0x10FFFF ->
-                    let u = x - 0x10000                         -- 20 bits
-                        h = utf16HighSurrogate
-                                + fromIntegral (u `shiftR` 10)  -- 10 bits
-                        l = utf16LowSurrogate
-                                + fromIntegral (u .&. 0x3FF)    -- 10 bits
-                    in WCons h $ WCons l WNil
-              | otherwise -> invalidReplacement
-
-    {-# INLINE_LATE step #-}
-    step WNil = return Stop
-    step (WCons x xs) = return $ Yield x xs
-
-{-# INLINE encodeUtf16' #-}
-encodeUtf16' :: Monad m => Stream m Char -> Stream m Word16
-encodeUtf16' = D.unfoldEach (readCharUtf16With errString)
-    where
-    errString =
-        error
-            $ "Streamly.Internal.Unicode.encodeUtf16': Encountered an \
-               invalid character"
-
-{-# INLINE encodeUtf16 #-}
-encodeUtf16 :: Monad m => Stream m Char -> Stream m Word16
-encodeUtf16 = D.unfoldEach (readCharUtf16With WNil)
-
--- | Similar to 'encodeUtf16le' but throws an error if any invalid character is
--- encountered.
---
-{-# INLINE encodeUtf16le' #-}
-encodeUtf16le' :: Monad m => Stream m Char -> Stream m Word16
-encodeUtf16le' =
-#ifdef WORDS_BIGENDIAN
-    fmap swapByteOrder .
-#endif
-        encodeUtf16'
-
--- | Encode a stream of Unicode characters to a UTF-16 encoded stream. Any
--- invalid characters in the input stream are replaced by the Unicode
--- replacement character U+FFFD.
---
--- The resulting Word16s are encoded in little-endian byte order.
---
-{-# INLINE encodeUtf16le #-}
-encodeUtf16le :: Monad m => Stream m Char -> Stream m Word16
-encodeUtf16le =
-#ifdef WORDS_BIGENDIAN
-    fmap swapByteOrder .
-#endif
-        encodeUtf16
-
--------------------------------------------------------------------------------
--- Decoding string literals
--------------------------------------------------------------------------------
-
--- XXX decodeCString#
-
--- | Read UTF-8 encoded bytes as chars from an 'Addr#' until a 0 byte is
--- encountered, the 0 byte is not included in the stream.
---
--- /Unsafe:/ The caller is responsible for safe addressing.
---
--- Note that this is completely safe when reading from Haskell string
--- literals because they are guaranteed to be NULL terminated:
---
--- >>> Stream.fold Fold.toList (Unicode.fromStr# "Haskell"#)
--- "Haskell"
---
-{-# INLINE fromStr# #-}
-fromStr# :: MonadIO m => Addr# -> Stream m Char
-fromStr# addr = decodeUtf8 $ Stream.fromCString# addr
-
--------------------------------------------------------------------------------
--- Encode streams of containers
--------------------------------------------------------------------------------
-
--- | Encode a container to @Array Word8@ provided an unfold to covert it to a
--- Char stream and an encoding function.
---
--- /Internal/
-{-# INLINE encodeObject #-}
-encodeObject :: MonadIO m =>
-       (Stream m Char -> Stream m Word8)
-    -> Unfold m a Char
-    -> a
-    -> m (Array Word8)
-encodeObject encode u = Stream.fold Array.create . encode . Stream.unfold u
-
--- | Encode a stream of container objects using the supplied encoding scheme.
--- Each object is encoded as an @Array Word8@.
---
--- /Internal/
-{-# INLINE encodeObjects #-}
-encodeObjects :: MonadIO m =>
-       (Stream m Char -> Stream m Word8)
-    -> Unfold m a Char
-    -> Stream m a
-    -> Stream m (Array Word8)
-encodeObjects encode u = Stream.mapM (encodeObject encode u)
-
--- | Encode a stream of 'String' using the supplied encoding scheme. Each
--- string is encoded as an @Array Word8@.
---
-{-# INLINE encodeStrings #-}
-encodeStrings :: MonadIO m =>
-       (Stream m Char -> Stream m Word8)
-    -> Stream m String
-    -> Stream m (Array Word8)
-encodeStrings encode = encodeObjects encode Unfold.fromList
+#include "DocTestUnicodeStream.hs"
 
 {-
 -------------------------------------------------------------------------------
@@ -1360,7 +127,7 @@ isSpace c
 -- /Pre-release/
 {-# INLINE words #-}
 words :: Monad m => Fold m Char b -> Stream m Char -> Stream m b
-words f = D.wordsBy isSpace f
+words = D.wordsBy isSpace
 
 -- | Unfold a stream to character streams using the supplied 'Unfold'
 -- and concat the results suffixing a newline character @\\n@ to each stream.
diff --git a/core/src/doctest/DocTestUnicodeStream.hs b/core/src/doctest/DocTestUnicodeStream.hs
index 7bde1b55f0..83f4a70eb5 100644
--- a/core/src/doctest/DocTestUnicodeStream.hs
+++ b/core/src/doctest/DocTestUnicodeStream.hs
@@ -1,12 +1,15 @@
 {- $setup
 >>> :m
 
+>>> import qualified Data.Char as Char
 >>> import qualified Streamly.Data.Fold as Fold
 >>> import qualified Streamly.Data.Stream as Stream
+>>> import qualified Streamly.Data.Unfold as Unfold
 >>> import qualified Streamly.Unicode.Stream as Unicode
 
 For APIs that have not been released yet.
 
 >>> :set -XMagicHash
+>>> import qualified Streamly.Internal.Data.Stream as Stream
 >>> import qualified Streamly.Internal.Unicode.Stream as Unicode
 -}
diff --git a/core/streamly-core.cabal b/core/streamly-core.cabal
index 027aed6943..d0d743ad3f 100644
--- a/core/streamly-core.cabal
+++ b/core/streamly-core.cabal
@@ -201,7 +201,7 @@ common compile-options
                       -Wall-missed-specialisations
 
     if flag(limit-build-mem)
-        ghc-options: +RTS -M1000M -RTS
+        ghc-options: -j1 +RTS -M500M -RTS
 
     if flag(internal-use-unliftio)
       cpp-options: -DUSE_UNLIFTIO
@@ -515,6 +515,7 @@ library
                     , Streamly.Internal.Data.Stream.Generate
                     , Streamly.Internal.Data.Stream.Lift
                     , Streamly.Internal.Data.Stream.Nesting
+                    , Streamly.Internal.Data.Stream.Parse
                     , Streamly.Internal.Data.Stream.Step
                     , Streamly.Internal.Data.Stream.Top
                     , Streamly.Internal.Data.Stream.Transform
@@ -550,6 +551,9 @@ library
                     , Streamly.Internal.FileSystem.Path.Common
                     , Streamly.Internal.FileSystem.DirOptions
 
+                    , Streamly.Internal.Unicode.Encode
+                    , Streamly.Internal.Unicode.Decode
+
     if flag(internal-dev)
       exposed-modules:
                         Streamly.Internal.Data.StreamK.Alt
diff --git a/docs/User/Project/Changelog.md b/docs/User/Project/Changelog.md
index cb23f869b8..f87c8c2215 100644
--- a/docs/User/Project/Changelog.md
+++ b/docs/User/Project/Changelog.md
@@ -4,6 +4,42 @@
 
 ## Unreleased
 
+* The following deprecated modules have been removed:
+  * Streamly.Data.Array.Foreign
+  * Streamly.Data.Fold.Tee
+  * Streamly.Prelude
+
+* The following deprecated internal modules have been removed:
+  * Streamly.Internal.Data.SVar
+  * Streamly.Internal.Data.Stream.SVar
+  * Streamly.Internal.Data.Stream.Serial
+  * Streamly.Internal.Data.Stream.Zip
+  * Streamly.Internal.Data.Stream.Async
+  * Streamly.Internal.Data.Stream.Parallel
+  * Streamly.Internal.Data.Stream.Ahead
+  * Streamly.Internal.Data.Stream.ZipAsync
+  * Streamly.Internal.Data.Stream.IsStream
+  * Streamly.Internal.Data.Unfold.SVar
+  * Streamly.Internal.Data.Stream.Common
+  * Streamly.Internal.Data.Stream.IsStream.Type
+  * Streamly.Internal.Data.Stream.IsStream.Generate
+  * Streamly.Internal.Data.Stream.IsStream.Eliminate
+  * Streamly.Internal.Data.Stream.IsStream.Transform
+  * Streamly.Internal.Data.Stream.IsStream.Expand
+  * Streamly.Internal.Data.Stream.IsStream.Reduce
+  * Streamly.Internal.Data.Stream.IsStream.Exception
+  * Streamly.Internal.Data.Stream.IsStream.Lift
+  * Streamly.Internal.Data.Stream.IsStream.Top
+  * Streamly.Internal.Data.Stream.IsStream.Combinators
+  * Streamly.Internal.Data.Stream.IsStream.Common
+  * Streamly.Internal.Data.Stream.IsStream.Enumeration
+  * Streamly.Internal.Data.Stream.SVar.Generate
+  * Streamly.Internal.Data.Stream.SVar.Eliminate
+  * Streamly.Internal.Data.Fold.SVar
+  * Streamly.Internal.Data.SVar.Worker
+  * Streamly.Internal.Data.SVar.Dispatch
+  * Streamly.Internal.Data.SVar.Pull
+
 ## 0.11.0
 
 See
diff --git a/hie.yaml b/hie.yaml
index 517d6025d2..085a65e20a 100644
--- a/hie.yaml
+++ b/hie.yaml
@@ -18,6 +18,8 @@ cradle:
           cabal:
             - path: "./benchmark/lib/"
               component: "lib:streamly-benchmarks"
+            - path: "./benchmark/NanoBenchmarks.hs"
+              component: "exe:nano-bench"
             - path: "./benchmark/Streamly/Benchmark/Data/Array.hs"
               component: "bench:Data.Array"
             - path: "./benchmark/Streamly/Benchmark/Data/Array/Generic.hs"
@@ -26,7 +28,7 @@ cradle:
               component: "bench:Data.Array.Stream"
             - path: "./benchmark/Streamly/Benchmark/Data/Fold.hs"
               component: "bench:Data.Fold"
-            - path: "./benchmark/Streamly/Benchmark/Data/Fold/Prelude.hs"
+            - path: "./benchmark/Streamly/Benchmark/Data/Fold/Prelood.hs"
               component: "bench:Data.Fold.Prelude"
             - path: "./benchmark/Streamly/Benchmark/Data/Fold/Window.hs"
               component: "bench:Data.Fold.Window"
@@ -36,44 +38,50 @@ cradle:
               component: "bench:Data.Parser"
             - path: "./benchmark/Streamly/Benchmark/Data/ParserK.hs"
               component: "bench:Data.ParserK"
-            - path: "./benchmark/Streamly/Benchmark/Data/Stream/Common.hs"
-              component: "bench:Data.Stream"
-            - path: "./benchmark/Streamly/Benchmark/Data/Stream/Concurrent.hs"
-              component: "bench:Data.Stream.Concurrent"
-            - path: "./benchmark/Streamly/Benchmark/Data/Stream/ConcurrentCommon.hs"
-              component: "bench:Data.Stream.Concurrent"
-            - path: "./benchmark/Streamly/Benchmark/Data/Stream/ConcurrentInterleaved.hs"
-              component: "bench:Data.Stream.ConcurrentInterleaved"
-            - path: "./benchmark/Streamly/Benchmark/Data/Stream/ConcurrentEager.hs"
-              component: "bench:Data.Stream.ConcurrentEager"
-            - path: "./benchmark/Streamly/Benchmark/Data/Stream/ConcurrentOrdered.hs"
-              component: "bench:Data.Stream.ConcurrentOrdered"
-            - path: "./benchmark/Streamly/Benchmark/Data/Stream/Eliminate.hs"
-              component: "bench:Data.Stream"
-            - path: "./benchmark/Streamly/Benchmark/Data/Stream/Exceptions.hs"
-              component: "bench:Data.Stream"
-            - path: "./benchmark/Streamly/Benchmark/Data/Stream/Expand.hs"
-              component: "bench:Data.Stream"
-            - path: "./benchmark/Streamly/Benchmark/Data/Stream/Generate.hs"
-              component: "bench:Data.Stream"
-            - path: "./benchmark/Streamly/Benchmark/Data/Stream/Lift.hs"
-              component: "bench:Data.Stream"
-            - path: "./benchmark/Streamly/Benchmark/Data/Stream/Reduce.hs"
-              component: "bench:Data.Stream"
-            - path: "./benchmark/Streamly/Benchmark/Data/Stream/Split.hs"
-              component: "bench:Data.Stream"
-            - path: "./benchmark/Streamly/Benchmark/Data/Stream/Transform.hs"
+            - path: "./benchmark/Streamly/Benchmark/Data/ParserK.hs"
+              component: "bench:Data.ParserK.Chunked"
+            - path: "./benchmark/Streamly/Benchmark/Data/ParserK.hs"
+              component: "bench:Data.ParserK.Chunked.Generic"
+            - path: "./benchmark/Streamly/Benchmark/Data/RingArray.hs"
+              component: "bench:Data.RingArray"
+            - path: "./benchmark/Streamly/Benchmark/Data/Scanl.hs"
+              component: "bench:Data.Scanl"
+            - path: "./benchmark/Streamly/Benchmark/Data/Scanl/Concurrent.hs"
+              component: "bench:Data.Scanl.Concurrent"
+            - path: "./benchmark/Streamly/Benchmark/Data/Scanl/Window.hs"
+              component: "bench:Data.Scanl.Window"
+            - path: "./benchmark/Streamly/Benchmark/Data/Serialize.hs"
+              component: "bench:Data.Serialize"
+            - path: "./benchmark/Streamly/Benchmark/Data/Array/SmallArray.hs"
+              component: "bench:Data.SmallArray"
+            - path: "./benchmark/Streamly/Benchmark/Data/Stream/"
               component: "bench:Data.Stream"
+            - path: "./benchmark/Streamly/Benchmark/Data/Stream/Prelude/Adaptive.hs"
+              component: "bench:Data.Stream.Adaptive"
+            - path: "./benchmark/Streamly/Benchmark/Data/Stream/Prelude/ConcurrentThreadHeavy.hs"
+              component: "bench:Data.Stream.ConcurrentThreadHeavy"
+            - path: "./benchmark/Streamly/Benchmark/Data/Stream/Prelude/Exceptions.hs"
+              component: "bench:Data.Stream.Prelude"
+            - path: "./benchmark/Streamly/Benchmark/Data/Stream/Prelude/Exceptions.hs"
+              component: "bench:Data.Stream.Prelude.Exceptions"
+            - path: "./benchmark/Streamly/Benchmark/Data/Stream/Prelude/Rate.hs"
+              component: "bench:Data.Stream.Rate"
             - path: "./benchmark/Streamly/Benchmark/Data/StreamK.hs"
               component: "bench:Data.StreamK"
             - path: "./benchmark/Streamly/Benchmark/Data/StreamK/FromStream.hs"
               component: "bench:Data.StreamK.FromStream"
-            - path: "./benchmark/Streamly/Benchmark/Data/Stream/Prelude/Exceptions.hs"
-              component: "bench:Data.Stream.Prelude.Exceptions"
+            - path: "./benchmark/Streamly/Benchmark/Data/StreamK/StreamKAlt.hs"
+              component: "bench:Data.StreamK.StreamKAlt"
+            - path: "./benchmark/Streamly/Benchmark/Data/Serialize.hs"
+              component: "bench:Data.Unbox"
+            - path: "./benchmark/Streamly/Benchmark/Data/Serialize.hs"
+              component: "bench:Data.Unbox.Derive.TH"
             - path: "./benchmark/Streamly/Benchmark/Data/Unfold.hs"
               component: "bench:Data.Unfold"
             - path: "./benchmark/Streamly/Benchmark/Data/Unfold/Prelude1.hs"
               component: "bench:Data.Unfold.Prelude"
+            - path: "./benchmark/Streamly/Benchmark/FileSystem/DirIO.hs"
+              component: "bench:FileSystem.DirIO"
             - path: "./benchmark/Streamly/Benchmark/FileSystem/Handle/Read.hs"
               component: "bench:FileSystem.Handle"
             - path: "./benchmark/Streamly/Benchmark/FileSystem/Handle/ReadWrite.hs"
@@ -104,6 +112,8 @@ cradle:
               component: "test:Data.Fold.Window"
             - path: "./test/Streamly/Test/Data/List.hs"
               component: "test:Data.List"
+            - path: "./test/Streamly/Test/Data/List.hs"
+              component: "test:Data.List.Base"
             - path: "./test/Streamly/Test/Data/MutArray.hs"
               component: "test:Data.MutArray"
             - path: "./test/Streamly/Test/Data/Parser.hs"
@@ -112,18 +122,36 @@ cradle:
               component: "test:Data.Parser"
             - path: "./test/Streamly/Test/Data/ParserK.hs"
               component: "test:Data.ParserK"
+            - path: "./test/Streamly/Test/Data/RingArray.hs"
+              component: "test:Data.RingArray"
+            - path: "./test/Streamly/Test/Data/Scanl/Concurrent.hs"
+              component: "test:Data.Scanl.Concurrent"
+            - path: "./test/Streamly/Test/Data/Serialize.hs"
+              component: "test:Data.Serialize"
+            - path: "./test/Streamly/Test/Data/Serialize.hs"
+              component: "test:Data.Serialize.ENABLE_constructorTagAsString"
+            - path: "./test/Streamly/Test/Data/SmallArray.hs"
+              component: "test:Data.SmallArray"
             - path: "./test/Streamly/Test/Data/Stream.hs"
               component: "test:Data.Stream"
-            - path: "./test/Streamly/Test/Data/Stream/Common.hs"
-              component: "test:Data.Stream.Concurrent"
             - path: "./test/Streamly/Test/Data/Stream/Concurrent.hs"
               component: "test:Data.Stream.Concurrent"
             - path: "./test/Streamly/Test/Data/Stream/Exception.hs"
               component: "test:Data.Stream.Exception"
+            - path: "./test/Streamly/Test/Data/Stream/Rate.hs"
+              component: "test:Data.Stream.Rate"
             - path: "./test/Streamly/Test/Data/Stream/Time.hs"
               component: "test:Data.Stream.Time"
+            - path: "./test/Streamly/Test/Data/Unbox.hs"
+              component: "test:Data.Serialize.Derive.TH"
             - path: "./test/Streamly/Test/Data/Unbox.hs"
               component: "test:Data.Unbox"
+            - path: "./test/Streamly/Test/Data/Unbox.hs"
+              component: "test:Data.Unbox.Derive.Generic"
+            - path: "./test/Streamly/Test/Data/Unbox.hs"
+              component: "test:Data.Unbox.Derive.TH"
+            - path: "./test/Streamly/Test/Data/Unbox/TH.hs"
+              component: "test:Data.Unbox.TH"
             - path: "./test/Streamly/Test/Data/Unfold.hs"
               component: "test:Data.Unfold"
             - path: "./test/Streamly/Test/FileSystem/DirIO.hs"
@@ -160,6 +188,8 @@ cradle:
               component: "test:Prelude.Rate"
             - path: "./test/Streamly/Test/Prelude/Serial.hs"
               component: "test:Prelude.Serial"
+            - path: "./test/Streamly/Test/Prelude/Top.hs"
+              component: "test:Prelude.Top"
             - path: "./test/Streamly/Test/Prelude/WAsync.hs"
               component: "test:Prelude.WAsync"
             - path: "./test/Streamly/Test/Prelude/WSerial.hs"
@@ -168,8 +198,12 @@ cradle:
               component: "test:Prelude.ZipAsync"
             - path: "./test/Streamly/Test/Prelude/ZipSerial.hs"
               component: "test:Prelude.ZipSerial"
+            - path: "./test/Streamly/Test/Serialize/Serializable.hs"
+              component: "test:Data.Binary"
             - path: "./test/Streamly/Test/Serialize/Serializable.hs"
               component: "test:Serialize.Serializable"
+            - path: "./test/Streamly/Test/Unicode/Char.hs"
+              component: "test:Unicode.Char"
             - path: "./test/Streamly/Test/Unicode/Parser.hs"
               component: "test:Unicode.Parser"
             - path: "./test/Streamly/Test/Unicode/Stream.hs"
diff --git a/src/Streamly/Internal/Data/Fold/Prelude.hs b/src/Streamly/Internal/Data/Fold/Prelude.hs
index 05d5a533f4..f6f9b22334 100644
--- a/src/Streamly/Internal/Data/Fold/Prelude.hs
+++ b/src/Streamly/Internal/Data/Fold/Prelude.hs
@@ -16,12 +16,9 @@ module Streamly.Internal.Data.Fold.Prelude
     , module Streamly.Internal.Data.Fold.Concurrent
     -- * Time
     , module Streamly.Internal.Data.Fold.Time
-    -- * Deprecated
-    , module Streamly.Internal.Data.Fold.SVar
     )
 where
 
 import Streamly.Internal.Data.Fold.Channel
 import Streamly.Internal.Data.Fold.Concurrent
-import Streamly.Internal.Data.Fold.SVar
 import Streamly.Internal.Data.Fold.Time
diff --git a/src/Streamly/Internal/Data/Unfold/Prelude.hs b/src/Streamly/Internal/Data/Unfold/Prelude.hs
index a2f0ad4d19..02a9bb1d3c 100644
--- a/src/Streamly/Internal/Data/Unfold/Prelude.hs
+++ b/src/Streamly/Internal/Data/Unfold/Prelude.hs
@@ -11,10 +11,7 @@
 module Streamly.Internal.Data.Unfold.Prelude
     (
       module Streamly.Internal.Data.Unfold.Exception
-    -- * Deprecated
-    , module Streamly.Internal.Data.Unfold.SVar
     )
 where
 
 import Streamly.Internal.Data.Unfold.Exception
-import Streamly.Internal.Data.Unfold.SVar
diff --git a/streamly.cabal b/streamly.cabal
index 200d77da96..93bc978258 100644
--- a/streamly.cabal
+++ b/streamly.cabal
@@ -79,6 +79,7 @@ extra-source-files:
     benchmark/Streamly/Benchmark/Data/Fold/*.hs
     benchmark/Streamly/Benchmark/Data/Fold/Window.hs
     benchmark/Streamly/Benchmark/Data/MutArray.hs
+    benchmark/Streamly/Benchmark/Data/Parser/*.hs
     benchmark/Streamly/Benchmark/Data/RingArray.hs
     benchmark/Streamly/Benchmark/Data/Scanl/*.hs
     benchmark/Streamly/Benchmark/Data/Serialize/*.hs
@@ -284,7 +285,7 @@ common compile-options
                       -Wall-missed-specialisations
 
     if flag(limit-build-mem)
-        ghc-options: +RTS -M1000M -RTS
+        ghc-options: +RTS -M600M -RTS
 
     if flag(internal-use-unliftio)
       cpp-options: -DUSE_UNLIFTIO
@@ -430,6 +431,15 @@ library
                      , Streamly.Network.Socket
                      , Streamly.Network.Inet.TCP
 
+    if !impl(ghcjs) && flag(internal-dev)
+        other-modules:
+               Streamly.Internal.System.IOVec.Type
+             , Streamly.Internal.System.IOVec
+             , Streamly.Internal.FileSystem.FDIO
+             , Streamly.Internal.FileSystem.FD
+
+    if flag(internal-dev)
+        exposed-modules: Streamly.Internal.Data.SmallArray
                      -- Deprecated
                      , Streamly.Data.Array.Foreign
                      , Streamly.Data.Fold.Tee
@@ -448,19 +458,35 @@ library
 
                      , Streamly.Internal.Data.Stream.IsStream
 
-    if !impl(ghcjs) && flag(internal-dev)
-        other-modules:
-                       Streamly.Internal.System.IOVec.Type
-                     , Streamly.Internal.System.IOVec
-                     , Streamly.Internal.FileSystem.FDIO
-                     , Streamly.Internal.FileSystem.FD
-
-    if flag(internal-dev)
-        exposed-modules: Streamly.Internal.Data.SmallArray
         -- Exposed modules show up on hackage irrespective of the flag, so keep
         -- it hidden.
-        other-modules:  Streamly.Data.SmallArray
-                      , Streamly.Internal.Data.SmallArray.Type
+        other-modules:
+               Streamly.Data.SmallArray
+             , Streamly.Internal.Data.SmallArray.Type
+             -- Deprecated
+             , Streamly.Internal.Data.Unfold.SVar
+             , Streamly.Internal.Data.Stream.Common
+             , Streamly.Internal.Data.Stream.IsStream.Type
+             , Streamly.Internal.Data.Stream.IsStream.Generate
+             , Streamly.Internal.Data.Stream.IsStream.Eliminate
+             , Streamly.Internal.Data.Stream.IsStream.Transform
+             , Streamly.Internal.Data.Stream.IsStream.Expand
+             , Streamly.Internal.Data.Stream.IsStream.Reduce
+             , Streamly.Internal.Data.Stream.IsStream.Exception
+             , Streamly.Internal.Data.Stream.IsStream.Lift
+             , Streamly.Internal.Data.Stream.IsStream.Top
+             , Streamly.Internal.Data.Stream.IsStream.Combinators
+             , Streamly.Internal.Data.Stream.IsStream.Common
+             , Streamly.Internal.Data.Stream.IsStream.Enumeration
+
+             , Streamly.Internal.Data.Stream.SVar.Generate
+             , Streamly.Internal.Data.Stream.SVar.Eliminate
+
+             , Streamly.Internal.Data.Fold.SVar
+
+             , Streamly.Internal.Data.SVar.Worker
+             , Streamly.Internal.Data.SVar.Dispatch
+             , Streamly.Internal.Data.SVar.Pull
 
     if os(windows)
           exposed-modules: Streamly.Internal.FileSystem.Event.Windows
@@ -499,31 +525,6 @@ library
          , Streamly.Internal.Data.Scanl.Concurrent
 
          , Streamly.Internal.Data.Unfold.Exception
-         , Streamly.Internal.Data.Unfold.SVar
-
-         -- Deprecated
-         , Streamly.Internal.Data.Stream.Common
-         , Streamly.Internal.Data.Stream.IsStream.Type
-         , Streamly.Internal.Data.Stream.IsStream.Generate
-         , Streamly.Internal.Data.Stream.IsStream.Eliminate
-         , Streamly.Internal.Data.Stream.IsStream.Transform
-         , Streamly.Internal.Data.Stream.IsStream.Expand
-         , Streamly.Internal.Data.Stream.IsStream.Reduce
-         , Streamly.Internal.Data.Stream.IsStream.Exception
-         , Streamly.Internal.Data.Stream.IsStream.Lift
-         , Streamly.Internal.Data.Stream.IsStream.Top
-         , Streamly.Internal.Data.Stream.IsStream.Combinators
-         , Streamly.Internal.Data.Stream.IsStream.Common
-         , Streamly.Internal.Data.Stream.IsStream.Enumeration
-
-         , Streamly.Internal.Data.Stream.SVar.Generate
-         , Streamly.Internal.Data.Stream.SVar.Eliminate
-
-         , Streamly.Internal.Data.Fold.SVar
-
-         , Streamly.Internal.Data.SVar.Worker
-         , Streamly.Internal.Data.SVar.Dispatch
-         , Streamly.Internal.Data.SVar.Pull
 
     build-depends:
                     -- Core libraries shipped with ghc, the min and max
@@ -535,7 +536,6 @@ library
                        base              >= 4.12  && < 4.22
                      , fusion-plugin-types >= 0.1 && < 0.2
                      , containers        >= 0.6.0 && < 0.8
-                     , deepseq           >= 1.4.4 && < 1.6
                      , exceptions        >= 0.8.0 && < 0.11
                      , mtl               >= 2.2.2 && < 2.4
                      , transformers      >= 0.5.5 && < 0.7
@@ -572,6 +572,7 @@ library
 
     if flag(internal-dev)
       build-depends: primitive >= 0.5.4 && < 0.9
+                   , deepseq           >= 1.4.4 && < 1.6
 
     -- For FileSystem.Event module
     if os(linux)
diff --git a/targets/Targets.hs b/targets/Targets.hs
index c4ecc24d25..596420c1e5 100644
--- a/targets/Targets.hs
+++ b/targets/Targets.hs
@@ -107,6 +107,12 @@ targets =
             , "noTest"
             ]
       )
+    , ("Data.Scanl.Concurrent",
+            [ "infinite_grp"
+            , "fold_parser_grp"
+            , "noTest"
+            ]
+      )
     , ("Data.Scanl.Window",
             [ "infinite_grp"
             , "fold_parser_grp"
@@ -229,6 +235,13 @@ targets =
             , "streamly_core_grp"
             ]
       )
+    , ("Data.Unfold.Prelude",
+            [ "infinite_grp"
+            , "serial_stream_grp"
+            , "noTest"
+            ]
+      )
+    , ("FileSystem.DirIO", [])
     , ("FileSystem.Event",
             [ "noBench"
             ]
@@ -253,7 +266,6 @@ targets =
       )
 #endif
     , ("FileSystem.Handle", [])
-    , ("FileSystem.DirIO", [])
     , ("Network.Inet.TCP", ["noBench"])
     , ("Network.Socket", ["noBench"])
     , ("Unicode.Char", ["testDevOnly"])
diff --git a/test/Streamly/Test/Unicode/Stream.hs b/test/Streamly/Test/Unicode/Stream.hs
index c36de806de..1d61ad49af 100644
--- a/test/Streamly/Test/Unicode/Stream.hs
+++ b/test/Streamly/Test/Unicode/Stream.hs
@@ -56,8 +56,10 @@ genUnicode = listOf arbitraryUnicodeChar
 genWord8List :: Gen [Word8]
 genWord8List = listOf arbitrary
 
+{-
 genListOfW8List :: Gen [[Word8]]
 genListOfW8List = listOf (listOf arbitrary)
+-}
 
 propDecodeEncodeId' :: Property
 propDecodeEncodeId' =
@@ -78,6 +80,7 @@ propDecodeEncodeUtf16Id encoder decoder =
             chrs <- run $ Stream.toList $ decoder wrds
             assertEq chrs list
 
+{-
 propMkEvenW8Chunks :: Property
 propMkEvenW8Chunks =
     forAll genListOfW8List $ \list ->
@@ -93,6 +96,7 @@ propMkEvenW8Chunks =
             if (odd (length concatedList))
             then assertEq concatedList1 (init concatedList)
             else assertEq concatedList1 concatedList
+-}
 
 -- XXX need to use invalid characters
 propDecodeEncodeId :: Property
@@ -229,7 +233,7 @@ main = H.hspec
              (propDecodeEncodeUtf16Id IUS.encodeUtf16le' IUS.decodeUtf16le')
         prop "decodeUtf16le . encodeUtf16le == id"
              (propDecodeEncodeUtf16Id IUS.encodeUtf16le' IUS.decodeUtf16le)
-        prop "mkEvenW8Chunks" propMkEvenW8Chunks
+        -- prop "mkEvenW8Chunks" propMkEvenW8Chunks
 
     H.describe "Latin1 - Encoding / Decoding" $ do
         prop "ASCII to Latin1" propASCIIToLatin1
diff --git a/test/streamly-tests.cabal b/test/streamly-tests.cabal
index 7dbe887a7f..f6052aafee 100644
--- a/test/streamly-tests.cabal
+++ b/test/streamly-tests.cabal
@@ -189,7 +189,7 @@ library
     exposed-modules:
         Streamly.Test.Common
         Streamly.Test.Parser.Common
-    if !flag(use-streamly-core)
+    if !flag(use-streamly-core) && flag(dev)
       exposed-modules: Streamly.Test.Prelude.Common
     if flag(limit-build-mem)
       ghc-options: +RTS -M1500M -RTS
@@ -547,6 +547,8 @@ test-suite Prelude
   ghc-options: -main-is Streamly.Test.Prelude.main
   if flag(use-streamly-core)
     buildable: False
+  if !flag(dev)
+    buildable: False
 
 test-suite Prelude.Ahead
   import: test-options
@@ -555,6 +557,8 @@ test-suite Prelude.Ahead
   ghc-options: -main-is Streamly.Test.Prelude.Ahead.main
   if flag(use-streamly-core)
     buildable: False
+  if !flag(dev)
+    buildable: False
 
 test-suite Prelude.Async
   import: test-options
@@ -563,6 +567,8 @@ test-suite Prelude.Async
   ghc-options: -main-is Streamly.Test.Prelude.Async.main
   if flag(use-streamly-core)
     buildable: False
+  if !flag(dev)
+    buildable: False
 
 test-suite Prelude.Concurrent
   import: test-options
@@ -573,6 +579,8 @@ test-suite Prelude.Concurrent
     ghc-options: +RTS -M2000M -RTS
   if flag(use-streamly-core)
     buildable: False
+  if !flag(dev)
+    buildable: False
 
 test-suite Prelude.Fold
   import: test-options
@@ -581,6 +589,8 @@ test-suite Prelude.Fold
   ghc-options: -main-is Streamly.Test.Prelude.Fold.main
   if flag(use-streamly-core)
     buildable: False
+  if !flag(dev)
+    buildable: False
 
 test-suite Prelude.Parallel
   import: test-options
@@ -589,6 +599,8 @@ test-suite Prelude.Parallel
   ghc-options: -main-is Streamly.Test.Prelude.Parallel.main
   if flag(use-streamly-core)
     buildable: False
+  if !flag(dev)
+    buildable: False
 
 test-suite Prelude.Rate
   import:always-optimized
@@ -611,6 +623,8 @@ test-suite Prelude.Serial
     ghc-options: +RTS -M1500M -RTS
   if flag(use-streamly-core)
     buildable: False
+  if !flag(dev)
+    buildable: False
 
 test-suite Prelude.Top
   import: test-options
@@ -618,6 +632,8 @@ test-suite Prelude.Top
   main-is: Streamly/Test/Prelude/Top.hs
   if flag(use-streamly-core)
     buildable: False
+  if !flag(dev)
+    buildable: False
 
 test-suite Prelude.WAsync
   import: test-options
@@ -626,6 +642,8 @@ test-suite Prelude.WAsync
   ghc-options: -main-is Streamly.Test.Prelude.WAsync.main
   if flag(use-streamly-core)
     buildable: False
+  if !flag(dev)
+    buildable: False
 
 test-suite Prelude.WSerial
   import: test-options
@@ -634,6 +652,8 @@ test-suite Prelude.WSerial
   ghc-options: -main-is Streamly.Test.Prelude.WSerial.main
   if flag(use-streamly-core)
     buildable: False
+  if !flag(dev)
+    buildable: False
 
 test-suite Prelude.ZipAsync
   import: test-options
@@ -644,6 +664,8 @@ test-suite Prelude.ZipAsync
     ghc-options: +RTS -M750M -RTS
   if flag(use-streamly-core)
     buildable: False
+  if !flag(dev)
+    buildable: False
 
 test-suite Prelude.ZipSerial
   import: test-options
@@ -652,3 +674,5 @@ test-suite Prelude.ZipSerial
   ghc-options: -main-is Streamly.Test.Prelude.ZipSerial.main
   if flag(use-streamly-core)
     buildable: False
+  if !flag(dev)
+    buildable: False