{-# LANGUAGE BangPatterns #-}
{-# LANGUAGE MagicHash #-}
module Data.Text.Internal.Validate.Native
( isValidUtf8ByteStringHaskell
, isValidUtf8ByteArrayHaskell
) where
import Data.Array.Byte (ByteArray(ByteArray))
import Data.ByteString (ByteString)
import GHC.Exts (ByteArray#,Int(I#),indexWord8Array#)
import GHC.Word (Word8(W8#))
import Data.Text.Internal.Encoding.Utf8 (CodePoint(..),DecoderResult(..),utf8DecodeStart,utf8DecodeContinue)
import qualified Data.ByteString as B
import qualified Data.ByteString.Unsafe as B
isValidUtf8ByteStringHaskell :: ByteString -> Bool
isValidUtf8ByteStringHaskell :: ByteString -> Bool
isValidUtf8ByteStringHaskell ByteString
bs = Int -> Bool
start Int
0
where
start :: Int -> Bool
start Int
ix
| Int
ix Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
>= ByteString -> Int
B.length ByteString
bs = Bool
True
| Bool
otherwise = case Word8 -> DecoderResult
utf8DecodeStart (ByteString -> Int -> Word8
B.unsafeIndex ByteString
bs Int
ix) of
Accept{} -> Int -> Bool
start (Int
ix Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1)
Reject{} -> Bool
False
Incomplete DecoderState
st CodePoint
_ -> Int -> DecoderState -> Bool
step (Int
ix Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1) DecoderState
st
step :: Int -> DecoderState -> Bool
step Int
ix DecoderState
st
| Int
ix Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
>= ByteString -> Int
B.length ByteString
bs = Bool
False
| Bool
otherwise = case Word8 -> DecoderState -> CodePoint -> DecoderResult
utf8DecodeContinue (ByteString -> Int -> Word8
B.unsafeIndex ByteString
bs Int
ix) DecoderState
st (Int -> CodePoint
CodePoint Int
0) of
Accept{} -> Int -> Bool
start (Int
ix Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1)
Reject{} -> Bool
False
Incomplete DecoderState
st' CodePoint
_ -> Int -> DecoderState -> Bool
step (Int
ix Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1) DecoderState
st'
isValidUtf8ByteArrayHaskell ::
ByteArray
-> Int
-> Int
-> Bool
isValidUtf8ByteArrayHaskell :: ByteArray -> Int -> Int -> Bool
isValidUtf8ByteArrayHaskell (ByteArray ByteArray#
b) !Int
off !Int
len = Int -> Bool
start Int
off
where
indexWord8 :: ByteArray# -> Int -> Word8
indexWord8 :: ByteArray# -> Int -> Word8
indexWord8 !ByteArray#
x (I# Int#
i) = Word8# -> Word8
W8# (ByteArray# -> Int# -> Word8#
indexWord8Array# ByteArray#
x Int#
i)
start :: Int -> Bool
start Int
ix
| Int
ix Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
>= Int
off Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
len = Bool
True
| Bool
otherwise = case Word8 -> DecoderResult
utf8DecodeStart (ByteArray# -> Int -> Word8
indexWord8 ByteArray#
b Int
ix) of
Accept{} -> Int -> Bool
start (Int
ix Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1)
Reject{} -> Bool
False
Incomplete DecoderState
st CodePoint
_ -> Int -> DecoderState -> Bool
step (Int
ix Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1) DecoderState
st
step :: Int -> DecoderState -> Bool
step Int
ix DecoderState
st
| Int
ix Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
>= Int
off Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
len = Bool
False
| Bool
otherwise = case Word8 -> DecoderState -> CodePoint -> DecoderResult
utf8DecodeContinue (ByteArray# -> Int -> Word8
indexWord8 ByteArray#
b Int
ix) DecoderState
st (Int -> CodePoint
CodePoint Int
0) of
Accept{} -> Int -> Bool
start (Int
ix Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1)
Reject{} -> Bool
False
Incomplete DecoderState
st' CodePoint
_ -> Int -> DecoderState -> Bool
step (Int
ix Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1) DecoderState
st'