{-# LANGUAGE BangPatterns, ScopedTypeVariables #-}
{-# LANGUAGE MagicHash #-}
{-# LANGUAGE UnliftedFFITypes #-}
module Data.Text.Internal.Lazy.Search
(
indices
) where
import Data.Bits (unsafeShiftL, (.|.), (.&.))
import qualified Data.Text.Array as A
import Data.Int (Int64)
import Data.Word (Word8, Word64)
import qualified Data.Text.Internal as T
import qualified Data.Text as T (concat, isPrefixOf)
import Data.Text.Internal.ArrayUtils (memchr)
import Data.Text.Internal.Fusion.Types (PairS(..))
import Data.Text.Internal.Lazy (Text(..), foldrChunks)
indices :: Text
-> Text
-> [Int64]
indices :: Text -> Text -> [Int64]
indices Text
needle
| Int
nlen Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
<= Int
0 = [Int64] -> Text -> [Int64]
forall a b. a -> b -> a
const []
| Int
nlen Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== Int
1 = Word8 -> Int64 -> Text -> [Int64]
indicesOne (Array -> Int -> Word8
A.unsafeIndex Array
narr Int
noff) Int64
0
| Bool
otherwise = Int64 -> Int -> Text -> [Int64]
advance Int64
0 Int
0
where
T.Text Array
narr Int
noff Int
nlen = [Text] -> Text
T.concat ((Text -> [Text] -> [Text]) -> [Text] -> Text -> [Text]
forall a. (Text -> a -> a) -> a -> Text -> a
foldrChunks (:) [] Text
needle)
advance :: Int64 -> Int -> Text -> [Int64]
advance !Int64
_ !Int
_ Text
Empty = []
advance !(Int64
g :: Int64) !(Int
i :: Int) xxs :: Text
xxs@(Chunk x :: Text
x@(T.Text xarr :: Array
xarr@(A.ByteArray ByteArray#
xarr#) Int
xoff Int
l) Text
xs)
| Int
i Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
>= Int
l = Int64 -> Int -> Text -> [Int64]
advance Int64
g (Int
i Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
l) Text
xs
| Int -> Text -> Text -> Bool
lackingHay (Int
i Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
nlen) Text
x Text
xs = []
| Word8
c Word8 -> Word8 -> Bool
forall a. Eq a => a -> a -> Bool
== Word8
z Bool -> Bool -> Bool
&& Bool
candidateMatch = Int64
g Int64 -> [Int64] -> [Int64]
forall a. a -> [a] -> [a]
: Int64 -> Int -> Text -> [Int64]
advance (Int64
g Int64 -> Int64 -> Int64
forall a. Num a => a -> a -> a
+ Int -> Int64
intToInt64 Int
nlen) (Int
i Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
nlen) Text
xxs
| Bool
otherwise = Int64 -> Int -> Text -> [Int64]
advance (Int64
g Int64 -> Int64 -> Int64
forall a. Num a => a -> a -> a
+ Int -> Int64
intToInt64 Int
delta) (Int
i Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
delta) Text
xxs
where
c :: Word8
c = Text -> Int -> Word8
index Text
xxs (Int
i Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
nlast)
delta :: Int
delta | Bool
nextInPattern = Int
nlen Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1
| Word8
c Word8 -> Word8 -> Bool
forall a. Eq a => a -> a -> Bool
== Word8
z = Int
skip Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1
| Int
l Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
>= Int
i Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
nlen = case
ByteArray# -> Int -> Int -> Word8 -> Int
memchr ByteArray#
xarr# (Int
xoff Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
i Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
nlen) (Int
l Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
i Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
nlen) Word8
z of
-1 -> Int -> Int -> Int
forall a. Ord a => a -> a -> a
max Int
1 (Int
l Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
i Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
nlen)
Int
s -> Int
s Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1
| Bool
otherwise = Int
1
nextInPattern :: Bool
nextInPattern = Word64
mask Word64 -> Word64 -> Word64
forall a. Bits a => a -> a -> a
.&. Word8 -> Word64
swizzle (Text -> Int -> Word8
index Text
xxs (Int
i Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
nlen)) Word64 -> Word64 -> Bool
forall a. Eq a => a -> a -> Bool
== Word64
0
candidateMatch :: Bool
candidateMatch
| Int
i Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
nlen Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
<= Int
l = Array -> Int -> Array -> Int -> Int -> Bool
A.equal Array
narr Int
noff Array
xarr (Int
xoff Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
i) Int
nlen
| Bool
otherwise = Array -> Int -> Array -> Int -> Int -> Bool
A.equal Array
narr Int
noff Array
xarr (Int
xoff Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
i) (Int
l Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
i) Bool -> Bool -> Bool
&&
Array -> Int -> Int -> Text
T.Text Array
narr (Int
noff Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
l Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
i) (Int
nlen Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
l Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
i) Text -> Text -> Bool
`isPrefixOf` Text
xs
nlast :: Int
nlast = Int
nlen Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
1
z :: Word8
z = Array -> Int -> Word8
A.unsafeIndex Array
narr (Int
noff Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
nlen Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
1)
(Word64
mask :: Word64) :*: Int
skip = Int -> Int -> Word64 -> Int -> PairS Word64 Int
buildTable Int
0 Int
0 Word64
0 (Int
nlenInt -> Int -> Int
forall a. Num a => a -> a -> a
-Int
2)
swizzle :: Word8 -> Word64
swizzle :: Word8 -> Word64
swizzle Word8
w = Word64
1 Word64 -> Int -> Word64
forall a. Bits a => a -> Int -> a
`unsafeShiftL` (Word8 -> Int
word8ToInt Word8
w Int -> Int -> Int
forall a. Bits a => a -> a -> a
.&. Int
0x3f)
buildTable :: Int -> Int -> Word64 -> Int -> PairS Word64 Int
buildTable !Int
g !Int
i !Word64
msk !Int
skp
| Int
i Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
>= Int
nlast = (Word64
msk Word64 -> Word64 -> Word64
forall a. Bits a => a -> a -> a
.|. Word8 -> Word64
swizzle Word8
z) Word64 -> Int -> PairS Word64 Int
forall a b. a -> b -> PairS a b
:*: Int
skp
| Bool
otherwise = Int -> Int -> Word64 -> Int -> PairS Word64 Int
buildTable (Int
gInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
1) (Int
iInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
1) Word64
msk' Int
skp'
where c :: Word8
c = Array -> Int -> Word8
A.unsafeIndex Array
narr (Int
noffInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
i)
msk' :: Word64
msk' = Word64
msk Word64 -> Word64 -> Word64
forall a. Bits a => a -> a -> a
.|. Word8 -> Word64
swizzle Word8
c
skp' :: Int
skp' | Word8
c Word8 -> Word8 -> Bool
forall a. Eq a => a -> a -> Bool
== Word8
z = Int
nlen Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
g Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
2
| Bool
otherwise = Int
skp
lackingHay :: Int -> T.Text -> Text -> Bool
lackingHay :: Int -> Text -> Text -> Bool
lackingHay Int
q (T.Text Array
_ Int
_ Int
l) Text
ps = Int
l Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
< Int
q Bool -> Bool -> Bool
&& case Text
ps of
Text
Empty -> Bool
True
Chunk Text
r Text
rs -> Int -> Text -> Text -> Bool
lackingHay (Int
q Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
l) Text
r Text
rs
index :: Text -> Int -> Word8
index :: Text -> Int -> Word8
index Text
Empty !Int
_ = Word8
0
index (Chunk (T.Text Array
arr Int
off Int
len) Text
xs) !Int
i
| Int
i Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
< Int
len = Array -> Int -> Word8
A.unsafeIndex Array
arr (Int
off Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
i)
| Bool
otherwise = Text -> Int -> Word8
index Text
xs (Int
i Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
len)
indicesOne :: Word8 -> Int64 -> Text -> [Int64]
indicesOne :: Word8 -> Int64 -> Text -> [Int64]
indicesOne Word8
c = Int64 -> Text -> [Int64]
chunk
where
chunk :: Int64 -> Text -> [Int64]
chunk :: Int64 -> Text -> [Int64]
chunk !Int64
_ Text
Empty = []
chunk !Int64
i (Chunk (T.Text Array
oarr Int
ooff Int
olen) Text
os) = Int -> [Int64]
go Int
0
where
go :: Int -> [Int64]
go Int
h | Int
h Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
>= Int
olen = Int64 -> Text -> [Int64]
chunk (Int64
iInt64 -> Int64 -> Int64
forall a. Num a => a -> a -> a
+Int -> Int64
intToInt64 Int
olen) Text
os
| Word8
on Word8 -> Word8 -> Bool
forall a. Eq a => a -> a -> Bool
== Word8
c = Int64
i Int64 -> Int64 -> Int64
forall a. Num a => a -> a -> a
+ Int -> Int64
intToInt64 Int
h Int64 -> [Int64] -> [Int64]
forall a. a -> [a] -> [a]
: Int -> [Int64]
go (Int
hInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
1)
| Bool
otherwise = Int -> [Int64]
go (Int
hInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
1)
where on :: Word8
on = Array -> Int -> Word8
A.unsafeIndex Array
oarr (Int
ooffInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
h)
isPrefixOf :: T.Text -> Text -> Bool
isPrefixOf :: Text -> Text -> Bool
isPrefixOf (T.Text Array
_ Int
_ Int
xlen) Text
Empty = Int
xlen Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== Int
0
isPrefixOf x :: Text
x@(T.Text Array
xarr Int
xoff Int
xlen) (Chunk y :: Text
y@(T.Text Array
_ Int
_ Int
ylen) Text
ys)
| Int
xlen Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
<= Int
ylen = Text
x Text -> Text -> Bool
`T.isPrefixOf` Text
y
| Bool
otherwise = Text
y Text -> Text -> Bool
`T.isPrefixOf` Text
x Bool -> Bool -> Bool
&& Array -> Int -> Int -> Text
T.Text Array
xarr (Int
xoff Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
ylen) (Int
xlen Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
ylen) Text -> Text -> Bool
`isPrefixOf` Text
ys
intToInt64 :: Int -> Int64
intToInt64 :: Int -> Int64
intToInt64 = Int -> Int64
forall a b. (Integral a, Num b) => a -> b
fromIntegral
word8ToInt :: Word8 -> Int
word8ToInt :: Word8 -> Int
word8ToInt = Word8 -> Int
forall a b. (Integral a, Num b) => a -> b
fromIntegral