module Data.TCache.IndexText(
indexText
, indexList
, contains
, containsElem
, allElemsOf) where
import Data.TCache
import Data.TCache.IndexQuery
import Data.TCache.Defs
import qualified Data.Text.Lazy as T
import Data.Typeable
import qualified Data.Map as M
import Data.Maybe
import Data.Bits
import System.Mem.StableName
import Data.List((\\))
import GHC.Conc(unsafeIOToSTM)
import Control.Concurrent(forkIO)
import Data.Char
import Control.Concurrent(threadDelay)
import Data.ByteString.Lazy.Char8(pack, unpack)
import Control.Monad
import System.IO.Unsafe
data IndexText= IndexText
{ fieldType :: !String
, lastDoc :: Int
, mapDocKeyInt :: M.Map String Int
, mapIntDocKey :: M.Map Int String
, mapTextInteger :: M.Map T.Text Integer
} deriving (Typeable)
instance Show IndexText where
show (IndexText t a b c d)= show (t,a,b,c,d)
instance Read IndexText where
readsPrec n str= [(IndexText t a b c d, str2)| ((t,a,b,c,d),str2) <- readsPrec n str]
instance Serializable IndexText where
serialize= pack . show
deserialize= read . unpack
setPersist= const Nothing
instance Indexable IndexText where
key (IndexText v _ _ _ _)= "indextext-" ++ v
instance IResource IndexText where
keyResource = key
writeResource =defWriteResource
readResourceByKey = defReadResourceByKey
delResource = defDelResource
readInitDBRef v x= do
mv <- readDBRef x
case mv of
Nothing -> writeDBRef x v >> return v
Just v -> return v
add ref t key w = op ref t setBit w key
del ref t key w = op ref t clearBit w key
op refIndex t set ws key = do
mindex <- readDBRef refIndex
let mindex'= process mindex ws
writeDBRef refIndex $ fromJust mindex'
where
process mindex []= mindex
process mindex (w:ws)=
case mindex of
Nothing -> process (Just $ IndexText t 0 (M.singleton key 0) (M.singleton 0 key) (M.singleton w 1)) ws
Just (IndexText t n mapSI mapIS map) -> do
let (docLocation,n', mapSI',mapIS')= case M.lookup key mapSI of
Nothing -> let n'= n+1 in (n', n'
, M.insert key n' mapSI
, M.insert n' key mapIS)
Just m -> (m,n, mapSI,mapIS)
case M.lookup w map of
Nothing ->
process (Just $ IndexText t n' mapSI' mapIS' (M.insert w (set 0 docLocation) map)) ws
Just integer ->
process (Just $ IndexText t n' mapSI' mapIS' $ M.insert w (set integer docLocation) map) ws
indexText
:: (IResource a, Typeable a, Typeable b)
=> (a -> b)
-> (b -> T.Text)
-> IO ()
indexText sel convert= do
addTrigger (indext sel (words1 . convert))
let [t1,t2]= typeRepArgs $! typeOf sel
t= show t1 ++ show t2
let proto = IndexText t 0 M.empty M.empty M.empty
withResources [proto] $ init proto
where
init proto [Nothing] = [proto]
init _ [Just _] = []
indexList
:: (IResource a, Typeable a, Typeable b)
=> (a -> b)
-> (b -> [T.Text])
-> IO ()
indexList sel convert= do
addTrigger (indext sel convert)
let [t1,t2]= typeRepArgs $! typeOf sel
t= show t1 ++ show t2
let proto= IndexText t 0 M.empty M.empty M.empty
withResources [proto] $ init proto
where
init proto [Nothing] = [proto]
init _ [Just _]= []
indext :: (IResource a, Typeable a,Typeable b)
=> (a -> b) -> (b -> [T.Text]) -> DBRef a -> Maybe a -> STM()
indext sel convert dbref mreg= f1
where
f= forkIO (atomically f1) >> return()
f1= do
moldreg <- readDBRef dbref
case ( moldreg, mreg) of
(Nothing, Just reg) -> add refIndex t (keyResource reg) . convert $ sel reg
(Just oldreg, Nothing) -> del refIndex t (keyResource oldreg) . convert $ sel oldreg
(Just oldreg, Just reg) -> do
st <- unsafeIOToSTM $ makeStableName $ sel oldreg
st' <- unsafeIOToSTM $ makeStableName $ sel reg
if st== st'
then return ()
else do
let key= keyResource reg
let wrds = convert $ sel oldreg
let wrds'= convert $ sel reg
let new= wrds' \\ wrds
let old= wrds \\ wrds'
when(not $ null old) $ del refIndex t key old
when(not $ null new) $ add refIndex t key new
return()
where
[t1,t2]= typeRepArgs $! typeOf sel
t= show t1 ++ show t2
refIndex= getDBRef . key $ IndexText t u u u u where u= undefined
containsElem :: (IResource a, Typeable a, Typeable b) => (a -> b) -> String -> STM [DBRef a]
containsElem sel wstr = do
let w= T.pack wstr
let [t1, t2]= typeRepArgs $! typeOf sel
let t= show t1 ++ show t2
let u= undefined
mr <- withSTMResources [IndexText t u u u u]
$ \[r] -> resources{toReturn= r}
case mr of
Nothing -> do
let fields= show $ typeOf sel
error $ "the index for "++ fields ++" do not exist. At main, use \"Data.TCache.IdexQuery.index\" to start indexing this field"
Just (IndexText t n _ mmapIntString map1) ->
case M.lookup w map1 of
Nothing -> return []
Just integer -> do
let mns=map (\n ->case testBit integer n of True -> Just n; _ -> Nothing) [0..n]
let wordsr = catMaybes $ map (\n -> M.lookup n mmapIntString) $ catMaybes mns
return $ map getDBRef wordsr
allElemsOf :: (IResource a, Typeable a, Typeable b) => (a -> b) -> STM [T.Text]
allElemsOf sel = do
let [t1, t2]= typeRepArgs $! typeOf sel
let t= show t1 ++ show t2
let u= undefined
mr <- withSTMResources [IndexText t u u u u]
$ \[r] -> resources{toReturn= r}
case mr of
Nothing -> return []
Just (IndexText t n _ _ map) -> return $ M.keys map
words1= filter filterWordt . T.split (\c -> isSeparator c || c=='\n' || isPunctuation c )
contains
:: (IResource a, Typeable a, Typeable b)
=>( a -> b)
-> String
-> STM [DBRef a]
contains sel str= case words str of
[] -> return []
[w] -> containsElem sel w
ws -> do
let rs = map (containsElem sel) $ filter filterWord ws
foldl (.&&.) (head rs) (tail rs)
filterWordt w= T.length w >2 || or (map (\c -> isUpper c || isDigit c) (T.unpack w))
filterWord w= length w >2 || or (map (\c -> isUpper c || isDigit c) w)