Skip to content

Commit 01606bd

Browse files
committed
scanl/scanr should replace invalid Char in the initial value
1 parent dbb4249 commit 01606bd

File tree

3 files changed

+83
-10
lines changed

3 files changed

+83
-10
lines changed

src/Data/Text.hs

Lines changed: 58 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -254,7 +254,7 @@ import Data.Text.Internal.Encoding.Utf8 (utf8Length, utf8LengthByLeader, chr3, o
254254
import qualified Data.Text.Internal.Fusion as S
255255
import qualified Data.Text.Internal.Fusion.Common as S
256256
import Data.Text.Encoding (decodeUtf8', encodeUtf8Builder)
257-
import Data.Text.Internal.Fusion (stream, reverseStream, unstream)
257+
import Data.Text.Internal.Fusion (stream, unstream)
258258
import Data.Text.Internal.Private (span_)
259259
import Data.Text.Internal (Text(..), StrictText, empty, firstf, mul, safe, text, append, pack)
260260
import Data.Text.Internal.Unsafe.Char (unsafeWrite)
@@ -1210,8 +1210,32 @@ minimum = foldl1' min
12101210
--
12111211
-- @'last' ('scanl' f z xs) = 'foldl' f z xs@
12121212
scanl :: (Char -> Char -> Char) -> Char -> Text -> Text
1213-
scanl f z t = unstream (S.scanl g z (stream t))
1214-
where g a b = safe (f a b)
1213+
scanl f c0 = go
1214+
where
1215+
go (Text src o l) = runST $ do
1216+
let l' = l + 4
1217+
c0' = safe c0
1218+
marr <- A.new l'
1219+
d' <- unsafeWrite marr 0 c0'
1220+
outer marr l' o d' c0'
1221+
where
1222+
outer :: forall s. A.MArray s -> Int -> Int -> Int -> Char -> ST s Text
1223+
outer !dst !dstLen = inner
1224+
where
1225+
inner !srcOff !dstOff !c
1226+
| srcOff >= l + o = do
1227+
A.shrinkM dst dstOff
1228+
arr <- A.unsafeFreeze dst
1229+
pure $ Text arr 0 dstOff
1230+
| dstOff + 4 > dstLen = do
1231+
let !dstLen' = dstLen + (l + o) - srcOff + 4
1232+
dst' <- A.resizeM dst dstLen'
1233+
outer dst' dstLen' srcOff dstOff c
1234+
| otherwise = do
1235+
let !(Iter c' d) = iterArray src srcOff
1236+
c'' = safe $ f c c'
1237+
d' <- unsafeWrite dst dstOff c''
1238+
inner (srcOff + d) (dstOff + d') c''
12151239
{-# INLINE scanl #-}
12161240

12171241
-- | /O(n)/ 'scanl1' is a variant of 'scanl' that has no starting
@@ -1228,8 +1252,37 @@ scanl1 f t | null t = empty
12281252
--
12291253
-- > scanr f v == reverse . scanl (flip f) v . reverse
12301254
scanr :: (Char -> Char -> Char) -> Char -> Text -> Text
1231-
scanr f z = S.reverse . S.reverseScanr g z . reverseStream
1232-
where g a b = safe (f a b)
1255+
scanr f c0 = go
1256+
where
1257+
go (Text src o l) = runST $ do
1258+
let l' = l + 4
1259+
c0' = safe c0
1260+
!d' = utf8Length c0'
1261+
marr <- A.new l'
1262+
_ <- unsafeWrite marr (l' - d') c0'
1263+
outer marr (l + o - 1) (l' - d' - 1) c0'
1264+
where
1265+
outer :: forall s. A.MArray s -> Int -> Int -> Char -> ST s Text
1266+
outer !dst = inner
1267+
where
1268+
inner !srcOff !dstOff !c
1269+
| srcOff < o = do
1270+
dstLen <- A.getSizeofMArray dst
1271+
arr <- A.unsafeFreeze dst
1272+
pure $ Text arr (dstOff + 1) (dstLen - dstOff - 1)
1273+
| dstOff < 3 = do
1274+
dstLen <- A.getSizeofMArray dst
1275+
let !dstLen' = dstLen + (srcOff - o) + 4
1276+
dst' <- A.new dstLen'
1277+
A.copyM dst' (dstLen' - dstLen) dst 0 dstLen
1278+
outer dst' srcOff (dstOff + dstLen' - dstLen) c
1279+
| otherwise = do
1280+
let !(Iter c' d) = reverseIterArray src srcOff
1281+
c'' = safe $ f c' c
1282+
!d' = utf8Length c''
1283+
dstOff' = dstOff - d'
1284+
_ <- unsafeWrite dst (dstOff' + 1) c''
1285+
inner (srcOff + d) dstOff' c''
12331286
{-# INLINE scanr #-}
12341287

12351288
-- | /O(n)/ 'scanr1' is a variant of 'scanr' that has no starting

src/Data/Text/Lazy.hs

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -972,8 +972,12 @@ isAscii = foldrChunks (\chnk acc -> T.isAscii chnk && acc) True
972972
--
973973
-- > last (scanl f z xs) == foldl f z xs.
974974
scanl :: (Char -> Char -> Char) -> Char -> Text -> Text
975-
scanl f z t = unstream (S.scanl g z (stream t))
976-
where g a b = safe (f a b)
975+
scanl f z t = cons z $ P.snd $
976+
mapAccumL (\acc c -> let c' = f acc c in (c', c')) (safe z) t
977+
-- This is a bit suboptimal: we could have used
978+
-- Data.Text.scanl for the first chunk and mapAccumL
979+
-- for subsequent ones, but but I doubt anyone cares
980+
-- about the performance of 'scanl' much.
977981
{-# INLINE scanl #-}
978982

979983
-- | /O(n)/ 'scanl1' is a variant of 'scanl' that has no starting
@@ -991,8 +995,10 @@ scanl1 f t0 = case uncons t0 of
991995
--
992996
-- > scanr f v == reverse . scanl (flip f) v . reverse
993997
scanr :: (Char -> Char -> Char) -> Char -> Text -> Text
994-
scanr f v = reverse . scanl g v . reverse
995-
where g a b = safe (f b a)
998+
scanr f z t = (`snoc` z) $ P.snd $
999+
mapAccumR (\acc c -> let c' = f c acc in (c', c')) (safe z) t
1000+
-- See the comment for 'scanl' above.
1001+
{-# INLINE scanr #-}
9961002

9971003
-- | /O(n)/ 'scanr1' is a variant of 'scanr' that has no starting
9981004
-- value argument. Performs replacement on invalid scalar values.

tests/Tests/Properties/Folds.hs

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -140,6 +140,15 @@ tl_scanr (applyFun2 -> f) z = L.scanr f z `eqP` (unpackS . TL.scanr f z)
140140
t_scanr1 (applyFun2 -> f) = L.scanr1 f `eqP` (unpackS . T.scanr1 f)
141141
tl_scanr1 (applyFun2 -> f) = L.scanr1 f `eqP` (unpackS . TL.scanr1 f)
142142

143+
t_scanl_is_safe = let c = '\55296' in
144+
T.scanl undefined c mempty === T.singleton c
145+
tl_scanl_is_safe = let c = '\55296' in
146+
TL.scanl undefined c mempty === TL.singleton c
147+
t_scanr_is_safe = let c = '\55296' in
148+
T.scanr undefined c mempty === T.singleton c
149+
tl_scanr_is_safe = let c = '\55296' in
150+
TL.scanr undefined c mempty === TL.singleton c
151+
143152
t_mapAccumL_char c t =
144153
snd (T.mapAccumL (const (const (0 :: Int, c))) 0 t) === T.replicate (T.length t) (T.singleton c)
145154
t_mapAccumL (applyFun2 -> f) z = L.mapAccumL f z `eqP` (second unpackS . T.mapAccumL f z)
@@ -303,7 +312,12 @@ testFolds =
303312
testProperty "t_scanr" t_scanr,
304313
testProperty "tl_scanr" tl_scanr,
305314
testProperty "t_scanr1" t_scanr1,
306-
testProperty "tl_scanr1" tl_scanr1
315+
testProperty "tl_scanr1" tl_scanr1,
316+
317+
testProperty "t_scanl_is_safe" t_scanl_is_safe,
318+
testProperty "tl_scanl_is_safe" tl_scanl_is_safe,
319+
testProperty "t_scanr_is_safe" t_scanr_is_safe,
320+
testProperty "tl_scanr_is_safe" tl_scanr_is_safe
307321
],
308322

309323
testGroup "mapAccum" [

0 commit comments

Comments
 (0)