{-# LANGUAGE BangPatterns #-}
{-# LANGUAGE CPP #-}
{-# LANGUAGE MagicHash #-}
{-# LANGUAGE GeneralizedNewtypeDeriving #-}
{-# LANGUAGE UnliftedFFITypes #-}
module Data.Text.Utf16.Lines
( I.TextLines
, I.fromText
, I.toText
, I.null
, I.lines
, I.lengthInLines
, I.splitAtLine
, length
, splitAt
, Position(..)
, lengthAsPosition
, splitAtPosition
) where
import Prelude ((+), (-), seq)
import Control.DeepSeq (NFData, rnf)
import Data.Bool (otherwise)
import Data.Eq (Eq, (==))
import Data.Function ((.), ($))
import Data.Maybe (Maybe(..))
import Data.Monoid (Monoid(..))
import Data.Ord (Ord, (<=), (>), (>=))
import Data.Semigroup (Semigroup(..))
import qualified Data.Text.Array as TA
import Data.Text.Internal (Text(..))
import qualified Data.Text.Lines.Internal as I
import qualified Data.Vector.Unboxed as U
import Data.Word (Word)
import Text.Show (Show)
#if MIN_VERSION_text(2,0,0)
import Prelude (fromIntegral)
import Foreign.C.Types (CSize(..))
import GHC.Exts (ByteArray#)
import System.IO (IO)
import System.IO.Unsafe (unsafeDupablePerformIO)
import System.Posix.Types (CSsize(..))
#else
import Data.Bool ((&&))
import Data.Ord ((<))
#endif
#ifdef DEBUG
import GHC.Stack (HasCallStack)
#else
#define HasCallStack ()
#endif
lengthTextUtf16 :: Text -> Word
#if MIN_VERSION_text(2,0,0)
lengthTextUtf16 :: Text -> Word
lengthTextUtf16 (Text (TA.ByteArray ByteArray#
arr) Int
off Int
len) = CSsize -> Word
forall a b. (Integral a, Num b) => a -> b
fromIntegral (CSsize -> Word) -> CSsize -> Word
forall a b. (a -> b) -> a -> b
$ IO CSsize -> CSsize
forall a. IO a -> a
unsafeDupablePerformIO (IO CSsize -> CSsize) -> IO CSsize -> CSsize
forall a b. (a -> b) -> a -> b
$
ByteArray# -> CSize -> CSize -> IO CSsize
lengthUtf8AsUtf16 ByteArray#
arr (Int -> CSize
forall a b. (Integral a, Num b) => a -> b
fromIntegral Int
off) (Int -> CSize
forall a b. (Integral a, Num b) => a -> b
fromIntegral Int
len)
foreign import ccall unsafe "_hs_text_lines_length_utf8_as_utf16" lengthUtf8AsUtf16
:: ByteArray# -> CSize -> CSize -> IO CSsize
#else
lengthTextUtf16 (Text _ _ len) = I.intToWord len
#endif
length :: I.TextLines -> Word
length :: TextLines -> Word
length = Text -> Word
lengthTextUtf16 (Text -> Word) -> (TextLines -> Text) -> TextLines -> Word
forall b c a. (b -> c) -> (a -> b) -> a -> c
. TextLines -> Text
I.toText
data Position = Position
{ Position -> Word
posLine :: !Word
, Position -> Word
posColumn :: !Word
} deriving (Position -> Position -> Bool
(Position -> Position -> Bool)
-> (Position -> Position -> Bool) -> Eq Position
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
$c== :: Position -> Position -> Bool
== :: Position -> Position -> Bool
$c/= :: Position -> Position -> Bool
/= :: Position -> Position -> Bool
Eq, Eq Position
Eq Position =>
(Position -> Position -> Ordering)
-> (Position -> Position -> Bool)
-> (Position -> Position -> Bool)
-> (Position -> Position -> Bool)
-> (Position -> Position -> Bool)
-> (Position -> Position -> Position)
-> (Position -> Position -> Position)
-> Ord Position
Position -> Position -> Bool
Position -> Position -> Ordering
Position -> Position -> Position
forall a.
Eq a =>
(a -> a -> Ordering)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> a)
-> (a -> a -> a)
-> Ord a
$ccompare :: Position -> Position -> Ordering
compare :: Position -> Position -> Ordering
$c< :: Position -> Position -> Bool
< :: Position -> Position -> Bool
$c<= :: Position -> Position -> Bool
<= :: Position -> Position -> Bool
$c> :: Position -> Position -> Bool
> :: Position -> Position -> Bool
$c>= :: Position -> Position -> Bool
>= :: Position -> Position -> Bool
$cmax :: Position -> Position -> Position
max :: Position -> Position -> Position
$cmin :: Position -> Position -> Position
min :: Position -> Position -> Position
Ord, Int -> Position -> ShowS
[Position] -> ShowS
Position -> String
(Int -> Position -> ShowS)
-> (Position -> String) -> ([Position] -> ShowS) -> Show Position
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
$cshowsPrec :: Int -> Position -> ShowS
showsPrec :: Int -> Position -> ShowS
$cshow :: Position -> String
show :: Position -> String
$cshowList :: [Position] -> ShowS
showList :: [Position] -> ShowS
Show)
instance NFData Position where
rnf :: Position -> ()
rnf = (Position -> () -> ()
forall a b. a -> b -> b
`seq` ())
instance Semigroup Position where
Position Word
l1 Word
c1 <> :: Position -> Position -> Position
<> Position Word
l2 Word
c2 =
Word -> Word -> Position
Position (Word
l1 Word -> Word -> Word
forall a. Num a => a -> a -> a
+ Word
l2) (if Word
l2 Word -> Word -> Bool
forall a. Eq a => a -> a -> Bool
== Word
0 then Word
c1 Word -> Word -> Word
forall a. Num a => a -> a -> a
+ Word
c2 else Word
c2)
instance Monoid Position where
mempty :: Position
mempty = Word -> Word -> Position
Position Word
0 Word
0
mappend :: Position -> Position -> Position
mappend = Position -> Position -> Position
forall a. Semigroup a => a -> a -> a
(<>)
lengthAsPosition
:: I.TextLines
-> Position
lengthAsPosition :: TextLines -> Position
lengthAsPosition (I.TextLines (Text ByteArray
arr Int
off Int
len) Vector Int
nls) = Position
{ posLine :: Word
posLine = Int -> Word
I.intToWord (Int -> Word) -> Int -> Word
forall a b. (a -> b) -> a -> b
$ Vector Int -> Int
forall a. Unbox a => Vector a -> Int
U.length Vector Int
nls
, posColumn :: Word
posColumn = Text -> Word
lengthTextUtf16 (Text -> Word) -> Text -> Word
forall a b. (a -> b) -> a -> b
$ ByteArray -> Int -> Int -> Text
Text ByteArray
arr Int
nl (Int
off Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
len Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
nl)
}
where
nl :: Int
nl = if Vector Int -> Bool
forall a. Unbox a => Vector a -> Bool
U.null Vector Int
nls then Int
off else Vector Int -> Int
forall a. Unbox a => Vector a -> a
U.last Vector Int
nls Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1
splitTextAtUtf16Index :: Word -> Text -> Maybe (Text, Text)
splitTextAtUtf16Index :: Word -> Text -> Maybe (Text, Text)
splitTextAtUtf16Index Word
k t :: Text
t@(Text ByteArray
arr Int
off Int
len)
| Word
k Word -> Word -> Bool
forall a. Ord a => a -> a -> Bool
<= Word
0 = (Text, Text) -> Maybe (Text, Text)
forall a. a -> Maybe a
Just (ByteArray -> Int -> Int -> Text
Text ByteArray
arr Int
off Int
0, Text
t)
| Word
k Word -> Word -> Bool
forall a. Ord a => a -> a -> Bool
>= Int -> Word
I.intToWord Int
len = (Text, Text) -> Maybe (Text, Text)
forall a. a -> Maybe a
Just (Text
t, Text
forall a. Monoid a => a
mempty)
#if MIN_VERSION_text(2,0,0)
| Int
o Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
>= Int
0 = (Text, Text) -> Maybe (Text, Text)
forall a. a -> Maybe a
Just (ByteArray -> Int -> Int -> Text
Text ByteArray
arr Int
off Int
o, ByteArray -> Int -> Int -> Text
Text ByteArray
arr (Int
off Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
o) (Int
len Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
o))
| Bool
otherwise = Maybe (Text, Text)
forall a. Maybe a
Nothing
where
!(TA.ByteArray ByteArray#
arr#) = ByteArray
arr
o :: Int
o = CSsize -> Int
forall a b. (Integral a, Num b) => a -> b
fromIntegral (CSsize -> Int) -> CSsize -> Int
forall a b. (a -> b) -> a -> b
$ IO CSsize -> CSsize
forall a. IO a -> a
unsafeDupablePerformIO (IO CSsize -> CSsize) -> IO CSsize -> CSsize
forall a b. (a -> b) -> a -> b
$
ByteArray# -> CSize -> CSize -> CSize -> IO CSsize
takeUtf8AsUtf16 ByteArray#
arr# (Int -> CSize
forall a b. (Integral a, Num b) => a -> b
fromIntegral Int
off) (Int -> CSize
forall a b. (Integral a, Num b) => a -> b
fromIntegral Int
len) (Word -> CSize
forall a b. (Integral a, Num b) => a -> b
fromIntegral Word
k)
foreign import ccall unsafe "_hs_text_lines_take_utf8_as_utf16" takeUtf8AsUtf16
:: ByteArray# -> CSize -> CSize -> CSize -> IO CSsize
#else
| otherwise = if c >= 0xdc00 && c < 0xe000 then Nothing else Just
(Text arr off k', Text arr (off + k') (len - k'))
where
k' = I.wordToInt k
c = TA.unsafeIndex arr (off + k')
#endif
splitAtPosition
:: HasCallStack
=> Position
-> I.TextLines
-> Maybe (I.TextLines, I.TextLines)
splitAtPosition :: Position -> TextLines -> Maybe (TextLines, TextLines)
splitAtPosition (Position Word
line Word
column) (I.TextLines (Text ByteArray
arr Int
off Int
len) Vector Int
nls) =
case Word -> Text -> Maybe (Text, Text)
splitTextAtUtf16Index Word
column Text
tx of
Maybe (Text, Text)
Nothing -> Maybe (TextLines, TextLines)
forall a. Maybe a
Nothing
Just (Text ByteArray
_ Int
off' Int
len', Text
tz) -> let n :: Int
n = Vector Int -> Int -> Int
forall a. (Ord a, Unbox a) => Vector a -> a -> Int
I.binarySearch Vector Int
nls (Int
off' Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
len') in (TextLines, TextLines) -> Maybe (TextLines, TextLines)
forall a. a -> Maybe a
Just
( Text -> Vector Int -> TextLines
I.textLines (ByteArray -> Int -> Int -> Text
Text ByteArray
arr Int
off (Int
off' Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
len' Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
off)) (Int -> Vector Int -> Vector Int
forall a. Unbox a => Int -> Vector a -> Vector a
U.take Int
n Vector Int
nls)
, Text -> Vector Int -> TextLines
I.textLines Text
tz (Int -> Vector Int -> Vector Int
forall a. Unbox a => Int -> Vector a -> Vector a
U.drop Int
n Vector Int
nls))
where
arrLen :: Int
arrLen = Int
off Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
len
nl :: Int
nl
| Word
line Word -> Word -> Bool
forall a. Ord a => a -> a -> Bool
<= Word
0 = Int
off
| Word
line Word -> Word -> Bool
forall a. Ord a => a -> a -> Bool
> Int -> Word
I.intToWord (Vector Int -> Int
forall a. Unbox a => Vector a -> Int
U.length Vector Int
nls) = Int
arrLen
| Bool
otherwise = Vector Int
nls Vector Int -> Int -> Int
forall a. Unbox a => Vector a -> Int -> a
U.! (Word -> Int
I.wordToInt Word
line Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
1) Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1
tx :: Text
tx = ByteArray -> Int -> Int -> Text
Text ByteArray
arr Int
nl (Int
arrLen Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
nl)
splitAt :: HasCallStack => Word -> I.TextLines -> Maybe (I.TextLines, I.TextLines)
splitAt :: Word -> TextLines -> Maybe (TextLines, TextLines)
splitAt = Position -> TextLines -> Maybe (TextLines, TextLines)
splitAtPosition (Position -> TextLines -> Maybe (TextLines, TextLines))
-> (Word -> Position)
-> Word
-> TextLines
-> Maybe (TextLines, TextLines)
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Word -> Word -> Position
Position Word
0