{-# LANGUAGE BangPatterns,CPP #-}
{-# LANGUAGE Trustworthy #-}
{-# LANGUAGE ViewPatterns #-}
module Data.Text.Lazy.Encoding
(
decodeLatin1
, decodeUtf8'
, decodeUtf8With
, decodeUtf16LEWith
, decodeUtf16BEWith
, decodeUtf32LEWith
, decodeUtf32BEWith
, decodeASCII
, decodeUtf8
, decodeUtf16LE
, decodeUtf16BE
, decodeUtf32LE
, decodeUtf32BE
, encodeUtf8
, encodeUtf16LE
, encodeUtf16BE
, encodeUtf32LE
, encodeUtf32BE
, encodeUtf8Builder
, encodeUtf8BuilderEscaped
) where
import Control.Exception (evaluate, try)
import Data.Monoid (Monoid(..))
import Data.Text.Encoding.Error (OnDecodeError, UnicodeException, strictDecode)
import Data.Text.Internal.Lazy (Text(..), chunk, empty, foldrChunks)
import Data.Word (Word8)
import qualified Data.ByteString.Builder as B
import qualified Data.ByteString.Builder.Prim as BP
import qualified Data.ByteString.Lazy as B
import qualified Data.ByteString.Lazy.Internal as B
import qualified Data.Text.Encoding as TE
import qualified Data.Text.Internal.Encoding as TE
import qualified Data.Text.Internal.Lazy.Encoding.Fusion as E
import qualified Data.Text.Internal.Lazy.Fusion as F
import qualified Data.Text.Internal.StrictBuilder as SB
import Data.Text.Unsafe (unsafeDupablePerformIO)
decodeASCII :: B.ByteString -> Text
decodeASCII :: ByteString -> Text
decodeASCII = (ByteString -> Text -> Text) -> Text -> [ByteString] -> Text
forall a b. (a -> b -> b) -> b -> [a] -> b
forall (t :: * -> *) a b.
Foldable t =>
(a -> b -> b) -> b -> t a -> b
foldr (Text -> Text -> Text
chunk (Text -> Text -> Text)
-> (ByteString -> Text) -> ByteString -> Text -> Text
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ByteString -> Text
TE.decodeASCII) Text
empty ([ByteString] -> Text)
-> (ByteString -> [ByteString]) -> ByteString -> Text
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ByteString -> [ByteString]
B.toChunks
decodeLatin1 :: B.ByteString -> Text
decodeLatin1 :: ByteString -> Text
decodeLatin1 = (ByteString -> Text -> Text) -> Text -> [ByteString] -> Text
forall a b. (a -> b -> b) -> b -> [a] -> b
forall (t :: * -> *) a b.
Foldable t =>
(a -> b -> b) -> b -> t a -> b
foldr (Text -> Text -> Text
chunk (Text -> Text -> Text)
-> (ByteString -> Text) -> ByteString -> Text -> Text
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ByteString -> Text
TE.decodeLatin1) Text
empty ([ByteString] -> Text)
-> (ByteString -> [ByteString]) -> ByteString -> Text
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ByteString -> [ByteString]
B.toChunks
decodeUtf8With :: OnDecodeError -> B.ByteString -> Text
decodeUtf8With :: OnDecodeError -> ByteString -> Text
decodeUtf8With OnDecodeError
onErr = Utf8State -> ByteString -> Text
loop Utf8State
TE.startUtf8State
where
chunkb :: StrictBuilder -> Text -> Text
chunkb StrictBuilder
builder Text
t | StrictBuilder -> Int
SB.sbLength StrictBuilder
builder Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== Int
0 = Text
t
| Bool
otherwise = Text -> Text -> Text
Chunk (StrictBuilder -> Text
TE.strictBuilderToText StrictBuilder
builder) Text
t
loop :: Utf8State -> ByteString -> Text
loop Utf8State
s (B.Chunk ByteString
b ByteString
bs) = case OnDecodeError
-> String
-> Utf8State
-> ByteString
-> (StrictBuilder, ByteString, Utf8State)
TE.decodeUtf8With2 OnDecodeError
onErr String
msg Utf8State
s ByteString
b of
(StrictBuilder
builder, ByteString
_, Utf8State
s') -> StrictBuilder -> Text -> Text
chunkb StrictBuilder
builder (Utf8State -> ByteString -> Text
loop Utf8State
s' ByteString
bs)
loop Utf8State
s ByteString
B.Empty = StrictBuilder -> Text -> Text
chunkb (OnDecodeError -> String -> Utf8State -> StrictBuilder
TE.skipIncomplete OnDecodeError
onErr String
msg Utf8State
s) Text
Empty
msg :: String
msg = String
"Data.Text.Internal.Encoding: Invalid UTF-8 stream"
decodeUtf8 :: B.ByteString -> Text
decodeUtf8 :: ByteString -> Text
decodeUtf8 = OnDecodeError -> ByteString -> Text
decodeUtf8With OnDecodeError
strictDecode
{-# INLINE[0] decodeUtf8 #-}
decodeUtf8' :: B.ByteString -> Either UnicodeException Text
decodeUtf8' :: ByteString -> Either UnicodeException Text
decodeUtf8' ByteString
bs = IO (Either UnicodeException Text) -> Either UnicodeException Text
forall a. IO a -> a
unsafeDupablePerformIO (IO (Either UnicodeException Text) -> Either UnicodeException Text)
-> IO (Either UnicodeException Text)
-> Either UnicodeException Text
forall a b. (a -> b) -> a -> b
$ do
let t :: Text
t = ByteString -> Text
decodeUtf8 ByteString
bs
IO Text -> IO (Either UnicodeException Text)
forall e a. Exception e => IO a -> IO (Either e a)
try (Text -> IO Text
forall a. a -> IO a
evaluate (Text -> ()
rnf Text
t () -> Text -> Text
forall a b. a -> b -> b
`seq` Text
t))
where
rnf :: Text -> ()
rnf Text
Empty = ()
rnf (Chunk Text
_ Text
ts) = Text -> ()
rnf Text
ts
{-# INLINE decodeUtf8' #-}
encodeUtf8 :: Text -> B.ByteString
encodeUtf8 :: Text -> ByteString
encodeUtf8 = (Text -> ByteString -> ByteString)
-> ByteString -> Text -> ByteString
forall a. (Text -> a -> a) -> a -> Text -> a
foldrChunks (ByteString -> ByteString -> ByteString
B.Chunk (ByteString -> ByteString -> ByteString)
-> (Text -> ByteString) -> Text -> ByteString -> ByteString
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Text -> ByteString
TE.encodeUtf8) ByteString
B.Empty
encodeUtf8Builder :: Text -> B.Builder
encodeUtf8Builder :: Text -> Builder
encodeUtf8Builder =
(Text -> Builder -> Builder) -> Builder -> Text -> Builder
forall a. (Text -> a -> a) -> a -> Text -> a
foldrChunks (\Text
c Builder
b -> Text -> Builder
TE.encodeUtf8Builder Text
c Builder -> Builder -> Builder
forall a. Monoid a => a -> a -> a
`mappend` Builder
b) Builder
forall a. Monoid a => a
Data.Monoid.mempty
{-# INLINE encodeUtf8BuilderEscaped #-}
encodeUtf8BuilderEscaped :: BP.BoundedPrim Word8 -> Text -> B.Builder
encodeUtf8BuilderEscaped :: BoundedPrim Word8 -> Text -> Builder
encodeUtf8BuilderEscaped BoundedPrim Word8
prim =
(Text -> Builder -> Builder) -> Builder -> Text -> Builder
forall a. (Text -> a -> a) -> a -> Text -> a
foldrChunks (\Text
c Builder
b -> BoundedPrim Word8 -> Text -> Builder
TE.encodeUtf8BuilderEscaped BoundedPrim Word8
prim Text
c Builder -> Builder -> Builder
forall a. Monoid a => a -> a -> a
`mappend` Builder
b) Builder
forall a. Monoid a => a
mempty
decodeUtf16LEWith :: OnDecodeError -> B.ByteString -> Text
decodeUtf16LEWith :: OnDecodeError -> ByteString -> Text
decodeUtf16LEWith OnDecodeError
onErr ByteString
bs = Stream Char -> Text
F.unstream (OnDecodeError -> ByteString -> Stream Char
E.streamUtf16LE OnDecodeError
onErr ByteString
bs)
{-# INLINE decodeUtf16LEWith #-}
decodeUtf16LE :: B.ByteString -> Text
decodeUtf16LE :: ByteString -> Text
decodeUtf16LE = OnDecodeError -> ByteString -> Text
decodeUtf16LEWith OnDecodeError
strictDecode
{-# INLINE decodeUtf16LE #-}
decodeUtf16BEWith :: OnDecodeError -> B.ByteString -> Text
decodeUtf16BEWith :: OnDecodeError -> ByteString -> Text
decodeUtf16BEWith OnDecodeError
onErr ByteString
bs = Stream Char -> Text
F.unstream (OnDecodeError -> ByteString -> Stream Char
E.streamUtf16BE OnDecodeError
onErr ByteString
bs)
{-# INLINE decodeUtf16BEWith #-}
decodeUtf16BE :: B.ByteString -> Text
decodeUtf16BE :: ByteString -> Text
decodeUtf16BE = OnDecodeError -> ByteString -> Text
decodeUtf16BEWith OnDecodeError
strictDecode
{-# INLINE decodeUtf16BE #-}
encodeUtf16LE :: Text -> B.ByteString
encodeUtf16LE :: Text -> ByteString
encodeUtf16LE Text
txt = [ByteString] -> ByteString
B.fromChunks ((Text -> [ByteString] -> [ByteString])
-> [ByteString] -> Text -> [ByteString]
forall a. (Text -> a -> a) -> a -> Text -> a
foldrChunks ((:) (ByteString -> [ByteString] -> [ByteString])
-> (Text -> ByteString) -> Text -> [ByteString] -> [ByteString]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Text -> ByteString
TE.encodeUtf16LE) [] Text
txt)
{-# INLINE encodeUtf16LE #-}
encodeUtf16BE :: Text -> B.ByteString
encodeUtf16BE :: Text -> ByteString
encodeUtf16BE Text
txt = [ByteString] -> ByteString
B.fromChunks ((Text -> [ByteString] -> [ByteString])
-> [ByteString] -> Text -> [ByteString]
forall a. (Text -> a -> a) -> a -> Text -> a
foldrChunks ((:) (ByteString -> [ByteString] -> [ByteString])
-> (Text -> ByteString) -> Text -> [ByteString] -> [ByteString]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Text -> ByteString
TE.encodeUtf16BE) [] Text
txt)
{-# INLINE encodeUtf16BE #-}
decodeUtf32LEWith :: OnDecodeError -> B.ByteString -> Text
decodeUtf32LEWith :: OnDecodeError -> ByteString -> Text
decodeUtf32LEWith OnDecodeError
onErr ByteString
bs = Stream Char -> Text
F.unstream (OnDecodeError -> ByteString -> Stream Char
E.streamUtf32LE OnDecodeError
onErr ByteString
bs)
{-# INLINE decodeUtf32LEWith #-}
decodeUtf32LE :: B.ByteString -> Text
decodeUtf32LE :: ByteString -> Text
decodeUtf32LE = OnDecodeError -> ByteString -> Text
decodeUtf32LEWith OnDecodeError
strictDecode
{-# INLINE decodeUtf32LE #-}
decodeUtf32BEWith :: OnDecodeError -> B.ByteString -> Text
decodeUtf32BEWith :: OnDecodeError -> ByteString -> Text
decodeUtf32BEWith OnDecodeError
onErr ByteString
bs = Stream Char -> Text
F.unstream (OnDecodeError -> ByteString -> Stream Char
E.streamUtf32BE OnDecodeError
onErr ByteString
bs)
{-# INLINE decodeUtf32BEWith #-}
decodeUtf32BE :: B.ByteString -> Text
decodeUtf32BE :: ByteString -> Text
decodeUtf32BE = OnDecodeError -> ByteString -> Text
decodeUtf32BEWith OnDecodeError
strictDecode
{-# INLINE decodeUtf32BE #-}
encodeUtf32LE :: Text -> B.ByteString
encodeUtf32LE :: Text -> ByteString
encodeUtf32LE Text
txt = [ByteString] -> ByteString
B.fromChunks ((Text -> [ByteString] -> [ByteString])
-> [ByteString] -> Text -> [ByteString]
forall a. (Text -> a -> a) -> a -> Text -> a
foldrChunks ((:) (ByteString -> [ByteString] -> [ByteString])
-> (Text -> ByteString) -> Text -> [ByteString] -> [ByteString]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Text -> ByteString
TE.encodeUtf32LE) [] Text
txt)
{-# INLINE encodeUtf32LE #-}
encodeUtf32BE :: Text -> B.ByteString
encodeUtf32BE :: Text -> ByteString
encodeUtf32BE Text
txt = [ByteString] -> ByteString
B.fromChunks ((Text -> [ByteString] -> [ByteString])
-> [ByteString] -> Text -> [ByteString]
forall a. (Text -> a -> a) -> a -> Text -> a
foldrChunks ((:) (ByteString -> [ByteString] -> [ByteString])
-> (Text -> ByteString) -> Text -> [ByteString] -> [ByteString]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Text -> ByteString
TE.encodeUtf32BE) [] Text
txt)
{-# INLINE encodeUtf32BE #-}