Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 17 additions & 0 deletions scripts/CaseMapping.hs
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
import Data.List (stripPrefix)
import Data.Maybe (fromJust)
import System.Environment
import System.IO

Expand All @@ -21,6 +23,7 @@ main = do
h <- openFile oname WriteMode
let comments = map ("--" ++) $
take 2 (cfComments cfs) ++ take 2 (scComments scs)
version = parseVersion (cfComments cfs)
mapM_ (hPutStrLn h) $
["-- AUTOMATICALLY GENERATED - DO NOT EDIT"
,"-- Generated by scripts/CaseMapping.hs"] ++
Expand All @@ -31,6 +34,9 @@ main = do
,"module Data.Text.Internal.Fusion.CaseMapping where"
,"import GHC.Int"
,"import GHC.Exts"
,"import Data.Version (Version, makeVersion)"
,"unicodeVersion :: Version"
,"unicodeVersion = makeVersion " ++ version
,"unI64 :: Int64 -> _ {- unboxed Int64 -}"
,"unI64 (I64# n) = n"
,""]
Expand All @@ -39,3 +45,14 @@ main = do
mapM_ (hPutStrLn h) (mapSC "title" title toTitle scs)
mapM_ (hPutStrLn h) (mapCF cfs)
hClose h

-- Parse version from CaseFolding comments
-- and render it as a list (an argument of makeVersion)
parseVersion :: [String] -> String
parseVersion comments = fromJust $ do
line' : _ <- pure comments
line'' <- stripPrefix " CaseFolding-" line'
let (v1, line1) = span isDigit line''
(v2, line2) = span isDigit (drop 1 line1)
(v3, _) = span isDigit (drop 1 line2)
pure $ "[" ++ v1 ++ ", " ++ v2 ++ ", " ++ v3 ++ "]"
3 changes: 3 additions & 0 deletions src/Data/Text/Internal/Fusion/CaseMapping.hs
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,9 @@
module Data.Text.Internal.Fusion.CaseMapping where
import GHC.Int
import GHC.Exts
import Data.Version (Version, makeVersion)
unicodeVersion :: Version
unicodeVersion = makeVersion [17, 0, 0]
unI64 :: Int64 -> _ {- unboxed Int64 -}
unI64 (I64# n) = n

Expand Down
34 changes: 15 additions & 19 deletions tests/Tests/Properties/Text.hs
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
-- | Tests for operations that don't fit in the other @Test.Properties.*@ modules.

{-# LANGUAGE BangPatterns #-}
{-# LANGUAGE CPP #-}
{-# LANGUAGE CPP #-}
{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE ViewPatterns #-}

Expand Down Expand Up @@ -32,6 +32,10 @@ import qualified Data.Text.Internal.Lazy.Search as S (indices)
import qualified Data.Text.Internal.Search as T (indices)
import qualified Data.Text.Lazy as TL
import qualified Tests.SlowFunctions as Slow
#if MIN_VERSION_base(4, 15, 0)
import qualified GHC.Unicode as G (unicodeVersion)
import qualified Data.Text.Internal.Fusion.CaseMapping as T (unicodeVersion)
#endif

t_pack_unpack = (T.unpack . T.pack) `eq` id
tl_pack_unpack = (TL.unpack . TL.pack) `eq` id
Expand Down Expand Up @@ -109,10 +113,8 @@ sf_toCaseFold_length (applyFun -> p) xs =
t_toCaseFold_length t = T.length (T.toCaseFold t) >= T.length t

tl_toCaseFold_length t = TL.length (TL.toCaseFold t) >= TL.length t
#if MIN_VERSION_base(4,16,0)
t_toCaseFold_char c = c `notElem` (toCaseFoldExceptions ++ cherokeeLower ++ cherokeeUpper) ==>
T.toCaseFold (T.singleton c) === T.singleton (C.toLower c)
#endif

-- | Baseline generated with GHC 9.2 + text-1.2.5.0,
t_toCaseFold_exceptions = T.unpack (T.toCaseFold (T.pack toCaseFoldExceptions)) === "\956ssi\775\700nsj\780\953\953\776\769\965\776\769\963\946\952\966\960\954\961\949\1381\1410\5104\5105\5106\5107\5108\5109\1074\1076\1086\1089\1090\1090\1098\1123\42571h\817t\776w\778y\778a\702\7777ss\965\787\965\787\768\965\787\769\965\787\834\7936\953\7937\953\7938\953\7939\953\7940\953\7941\953\7942\953\7943\953\7936\953\7937\953\7938\953\7939\953\7940\953\7941\953\7942\953\7943\953\7968\953\7969\953\7970\953\7971\953\7972\953\7973\953\7974\953\7975\953\7968\953\7969\953\7970\953\7971\953\7972\953\7973\953\7974\953\7975\953\8032\953\8033\953\8034\953\8035\953\8036\953\8037\953\8038\953\8039\953\8032\953\8033\953\8034\953\8035\953\8036\953\8037\953\8038\953\8039\953\8048\953\945\953\940\953\945\834\945\834\953\945\953\953\8052\953\951\953\942\953\951\834\951\834\953\951\953\953\776\768\953\776\769\953\834\953\776\834\965\776\768\965\776\769\961\787\965\834\965\776\834\8060\953\969\953\974\953\969\834\969\834\953\969\953fffiflffifflstst\1396\1398\1396\1381\1396\1387\1406\1398\1396\1389"
Expand All @@ -133,21 +135,17 @@ t_toLower_lower t = p (T.toLower t) >= p t
where p = T.length . T.filter isLower
tl_toLower_lower t = p (TL.toLower t) >= p t
where p = TL.length . TL.filter isLower
#if MIN_VERSION_base(4,13,0)
t_toLower_char c = c /= '\304' ==>
T.toLower (T.singleton c) === T.singleton (C.toLower c)
#endif
t_toLower_dotted_i = T.unpack (T.toLower (T.singleton '\304')) === "i\775"

t_toUpper_length t = T.length (T.toUpper t) >= T.length t
t_toUpper_upper t = p (T.toUpper t) >= p t
where p = T.length . T.filter isUpper
tl_toUpper_upper t = p (TL.toUpper t) >= p t
where p = TL.length . TL.filter isUpper
#if MIN_VERSION_base(4,13,0)
t_toUpper_char c = c `notElem` toUpperExceptions ==>
T.toUpper (T.singleton c) === T.singleton (C.toUpper c)
#endif

-- | Baseline generated with GHC 9.2 + text-1.2.5.0,
t_toUpper_exceptions = T.unpack (T.toUpper (T.pack toUpperExceptions)) === "SS\700NJ\780\921\776\769\933\776\769\1333\1362H\817T\776W\778Y\778A\702\933\787\933\787\768\933\787\769\933\787\834\7944\921\7945\921\7946\921\7947\921\7948\921\7949\921\7950\921\7951\921\7944\921\7945\921\7946\921\7947\921\7948\921\7949\921\7950\921\7951\921\7976\921\7977\921\7978\921\7979\921\7980\921\7981\921\7982\921\7983\921\7976\921\7977\921\7978\921\7979\921\7980\921\7981\921\7982\921\7983\921\8040\921\8041\921\8042\921\8043\921\8044\921\8045\921\8046\921\8047\921\8040\921\8041\921\8042\921\8043\921\8044\921\8045\921\8046\921\8047\921\8122\921\913\921\902\921\913\834\913\834\921\913\921\8138\921\919\921\905\921\919\834\919\834\921\919\921\921\776\768\921\776\769\921\834\921\776\834\933\776\768\933\776\769\929\787\933\834\933\776\834\8186\921\937\921\911\921\937\834\937\834\921\937\921FFFIFLFFIFFLSTST\1348\1350\1348\1333\1348\1339\1358\1350\1348\1341"
Expand All @@ -173,10 +171,8 @@ t_toTitle_1stNotLower = and . notLow . T.toTitle . T.filter stable . T.filter (n
-- Georgian text does not have a concept of title case
-- https://en.wikipedia.org/wiki/Georgian_Extended
isGeorgian c = c >= '\4256' && c < '\4352'
#if MIN_VERSION_base(4,13,0)
t_toTitle_char c = c `notElem` toTitleExceptions ==>
T.toTitle (T.singleton c) === T.singleton (C.toUpper c)
#endif

-- | Baseline generated with GHC 9.2 + text-1.2.5.0,
t_toTitle_exceptions = T.unpack (T.concatMap (T.toTitle . T.singleton) (T.pack toTitleExceptions)) === "Ss\700N\453\453\453\456\456\456\459\459\459J\780\498\498\498\921\776\769\933\776\769\1333\1410\4304\4305\4306\4307\4308\4309\4310\4311\4312\4313\4314\4315\4316\4317\4318\4319\4320\4321\4322\4323\4324\4325\4326\4327\4328\4329\4330\4331\4332\4333\4334\4335\4336\4337\4338\4339\4340\4341\4342\4343\4344\4345\4346\4349\4350\4351H\817T\776W\778Y\778A\702\933\787\933\787\768\933\787\769\933\787\834\8122\837\902\837\913\834\913\834\837\8138\837\905\837\919\834\919\834\837\921\776\768\921\776\769\921\834\921\776\834\933\776\768\933\776\769\929\787\933\834\933\776\834\8186\837\911\837\937\834\937\834\837FfFiFlFfiFflStSt\1348\1398\1348\1381\1348\1387\1358\1398\1348\1389"
Expand Down Expand Up @@ -393,9 +389,6 @@ testText =
testProperty "sf_toCaseFold_length" sf_toCaseFold_length,
testProperty "t_toCaseFold_length" t_toCaseFold_length,
testProperty "tl_toCaseFold_length" tl_toCaseFold_length,
#if MIN_VERSION_base(4,16,0)
testProperty "t_toCaseFold_char" t_toCaseFold_char,
#endif
testProperty "t_toCaseFold_exceptions" t_toCaseFold_exceptions,
testProperty "t_toCaseFold_cherokeeLower" t_toCaseFold_cherokeeLower,
testProperty "t_toCaseFold_cherokeeUpper" t_toCaseFold_cherokeeUpper,
Expand All @@ -414,13 +407,6 @@ testText =
testProperty "t_toTitle_1stNotLower" t_toTitle_1stNotLower,
testProperty "t_toTitle_exceptions" t_toTitle_exceptions,

#if MIN_VERSION_base(4,13,0)
-- Requires base compliant with Unicode 12.0
testProperty "t_toLower_char" t_toLower_char,
testProperty "t_toUpper_char" t_toUpper_char,
testProperty "t_toTitle_char" t_toTitle_char,
#endif

testProperty "t_toUpper_idempotent" t_toUpper_idempotent,
testProperty "t_toLower_idempotent" t_toLower_idempotent,
testProperty "t_toCaseFold_idempotent" t_toCaseFold_idempotent,
Expand All @@ -431,6 +417,16 @@ testText =
testProperty "ascii_toCaseFold" ascii_toCaseFold
],

#if MIN_VERSION_base(4, 15, 0)
-- Requires matching version of Unicode in base and text
testGroup "char case conversion" $ if T.unicodeVersion == G.unicodeVersion then [
Copy link
Contributor

@phadej phadej Dec 31, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

FWIW, it's very easy to get into state where those tests are not run at all. Any upgrade's to Unicode version used by text will cause these tests to not be run, as GHC is (usually) behind.

And you kind of want these tests to be run when Unicode stuff is changed...

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah, that's a downside. But on the other hand having sporadical CI failures and maintaining t_{toUpper,toTitle}_exceptions is not confidence inducing either.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That downside is why I took this long to decide to do this eventually. On the other hand, these tests will still run when CI is updated with the right GHC version. And when they do fail that will most probably mean that we need to update the list of exceptions in the test suite rather than an actual bug in the library.

testProperty "t_toCaseFold_char" t_toCaseFold_char,
testProperty "t_toLower_char" t_toLower_char,
testProperty "t_toUpper_char" t_toUpper_char,
testProperty "t_toTitle_char" t_toTitle_char
] else [],
#endif

testGroup "justification" [
testProperty "s_justifyLeft" s_justifyLeft,
testProperty "s_justifyLeft_s" s_justifyLeft_s,
Expand Down