{-# LANGUAGE NamedFieldPuns, RecordWildCards #-}
module Data.SearchEngine.Types (
SearchEngine(..),
SearchConfig(..),
SearchRankParameters(..),
BM25F.FeatureFunction(..),
initSearchEngine,
cacheBM25Context,
NoFeatures,
noFeatures,
SearchIndex, Term, TermId,
DocIdSet, DocId,
DocTermIds, DocFeatVals,
invariant,
) where
import Data.SearchEngine.SearchIndex (SearchIndex, Term, TermId)
import qualified Data.SearchEngine.SearchIndex as SI
import Data.SearchEngine.DocIdSet (DocIdSet, DocId)
import qualified Data.SearchEngine.DocIdSet as DocIdSet
import Data.SearchEngine.DocFeatVals (DocFeatVals)
import Data.SearchEngine.DocTermIds (DocTermIds)
import qualified Data.SearchEngine.BM25F as BM25F
import Data.Ix
import Data.Array.Unboxed
data SearchConfig doc key field feature = SearchConfig {
forall doc key field feature.
SearchConfig doc key field feature -> doc -> key
documentKey :: doc -> key,
:: doc -> field -> [Term],
forall doc key field feature.
SearchConfig doc key field feature -> Term -> field -> Term
transformQueryTerm :: Term -> field -> Term,
forall doc key field feature.
SearchConfig doc key field feature -> doc -> feature -> Float
documentFeatureValue :: doc -> feature -> Float
}
data SearchRankParameters field feature = SearchRankParameters {
forall field feature. SearchRankParameters field feature -> Float
paramK1 :: !Float,
forall field feature.
SearchRankParameters field feature -> field -> Float
paramB :: field -> Float,
forall field feature.
SearchRankParameters field feature -> field -> Float
paramFieldWeights :: field -> Float,
forall field feature.
SearchRankParameters field feature -> feature -> Float
paramFeatureWeights :: feature -> Float,
forall field feature.
SearchRankParameters field feature -> feature -> FeatureFunction
paramFeatureFunctions :: feature -> BM25F.FeatureFunction,
forall field feature. SearchRankParameters field feature -> Int
paramResultsetSoftLimit :: !Int,
forall field feature. SearchRankParameters field feature -> Int
paramResultsetHardLimit :: !Int,
forall field feature. SearchRankParameters field feature -> Int
paramAutosuggestPrefilterLimit :: !Int,
forall field feature. SearchRankParameters field feature -> Int
paramAutosuggestPostfilterLimit :: !Int
}
data SearchEngine doc key field feature = SearchEngine {
forall doc key field feature.
SearchEngine doc key field feature -> SearchIndex key field feature
searchIndex :: !(SearchIndex key field feature),
forall doc key field feature.
SearchEngine doc key field feature
-> SearchConfig doc key field feature
searchConfig :: !(SearchConfig doc key field feature),
forall doc key field feature.
SearchEngine doc key field feature
-> SearchRankParameters field feature
searchRankParams :: !(SearchRankParameters field feature),
forall doc key field feature.
SearchEngine doc key field feature -> UArray field Int
sumFieldLengths :: !(UArray field Int),
forall doc key field feature.
SearchEngine doc key field feature -> Context TermId field feature
bm25Context :: BM25F.Context TermId field feature
}
invariant :: (Ord key, Ix field, Bounded field) =>
SearchEngine doc key field feature -> Bool
invariant :: forall key field doc feature.
(Ord key, Ix field, Bounded field) =>
SearchEngine doc key field feature -> Bool
invariant SearchEngine{SearchIndex key field feature
searchIndex :: forall doc key field feature.
SearchEngine doc key field feature -> SearchIndex key field feature
searchIndex :: SearchIndex key field feature
searchIndex} =
SearchIndex key field feature -> Bool
forall key field feature.
(Ord key, Ix field, Bounded field) =>
SearchIndex key field feature -> Bool
SI.invariant SearchIndex key field feature
searchIndex
initSearchEngine :: (Ix field, Bounded field, Ix feature, Bounded feature) =>
SearchConfig doc key field feature ->
SearchRankParameters field feature ->
SearchEngine doc key field feature
initSearchEngine :: forall field feature doc key.
(Ix field, Bounded field, Ix feature, Bounded feature) =>
SearchConfig doc key field feature
-> SearchRankParameters field feature
-> SearchEngine doc key field feature
initSearchEngine SearchConfig doc key field feature
config SearchRankParameters field feature
params =
SearchEngine doc key field feature
-> SearchEngine doc key field feature
forall field doc key feature.
Ix field =>
SearchEngine doc key field feature
-> SearchEngine doc key field feature
cacheBM25Context
SearchEngine {
searchIndex :: SearchIndex key field feature
searchIndex = SearchIndex key field feature
forall key field feature. SearchIndex key field feature
SI.emptySearchIndex,
searchConfig :: SearchConfig doc key field feature
searchConfig = SearchConfig doc key field feature
config,
searchRankParams :: SearchRankParameters field feature
searchRankParams = SearchRankParameters field feature
params,
sumFieldLengths :: UArray field Int
sumFieldLengths = (field, field) -> [Int] -> UArray field Int
forall (a :: * -> * -> *) e i.
(IArray a e, Ix i) =>
(i, i) -> [e] -> a i e
listArray (field
forall a. Bounded a => a
minBound, field
forall a. Bounded a => a
maxBound) (Int -> [Int]
forall a. a -> [a]
repeat Int
0),
bm25Context :: Context TermId field feature
bm25Context = Context TermId field feature
forall a. HasCallStack => a
undefined
}
cacheBM25Context :: Ix field =>
SearchEngine doc key field feature ->
SearchEngine doc key field feature
cacheBM25Context :: forall field doc key feature.
Ix field =>
SearchEngine doc key field feature
-> SearchEngine doc key field feature
cacheBM25Context
se :: SearchEngine doc key field feature
se@SearchEngine {
searchRankParams :: forall doc key field feature.
SearchEngine doc key field feature
-> SearchRankParameters field feature
searchRankParams = SearchRankParameters{Float
Int
field -> Float
feature -> Float
feature -> FeatureFunction
paramK1 :: forall field feature. SearchRankParameters field feature -> Float
paramB :: forall field feature.
SearchRankParameters field feature -> field -> Float
paramFieldWeights :: forall field feature.
SearchRankParameters field feature -> field -> Float
paramFeatureWeights :: forall field feature.
SearchRankParameters field feature -> feature -> Float
paramFeatureFunctions :: forall field feature.
SearchRankParameters field feature -> feature -> FeatureFunction
paramResultsetSoftLimit :: forall field feature. SearchRankParameters field feature -> Int
paramResultsetHardLimit :: forall field feature. SearchRankParameters field feature -> Int
paramAutosuggestPrefilterLimit :: forall field feature. SearchRankParameters field feature -> Int
paramAutosuggestPostfilterLimit :: forall field feature. SearchRankParameters field feature -> Int
paramK1 :: Float
paramB :: field -> Float
paramFieldWeights :: field -> Float
paramFeatureWeights :: feature -> Float
paramFeatureFunctions :: feature -> FeatureFunction
paramResultsetSoftLimit :: Int
paramResultsetHardLimit :: Int
paramAutosuggestPrefilterLimit :: Int
paramAutosuggestPostfilterLimit :: Int
..},
SearchIndex key field feature
searchIndex :: forall doc key field feature.
SearchEngine doc key field feature -> SearchIndex key field feature
searchIndex :: SearchIndex key field feature
searchIndex,
UArray field Int
sumFieldLengths :: forall doc key field feature.
SearchEngine doc key field feature -> UArray field Int
sumFieldLengths :: UArray field Int
sumFieldLengths
}
= SearchEngine doc key field feature
se { bm25Context = bm25Context' }
where
bm25Context' :: Context TermId field feature
bm25Context' = BM25F.Context {
numDocsTotal :: Int
BM25F.numDocsTotal = SearchIndex key field feature -> Int
forall key field feature. SearchIndex key field feature -> Int
SI.docCount SearchIndex key field feature
searchIndex,
avgFieldLength :: field -> Float
BM25F.avgFieldLength = \field
f -> Int -> Float
forall a b. (Integral a, Num b) => a -> b
fromIntegral (UArray field Int
sumFieldLengths UArray field Int -> field -> Int
forall (a :: * -> * -> *) e i.
(IArray a e, Ix i) =>
a i e -> i -> e
! field
f)
Float -> Float -> Float
forall a. Fractional a => a -> a -> a
/ Int -> Float
forall a b. (Integral a, Num b) => a -> b
fromIntegral (SearchIndex key field feature -> Int
forall key field feature. SearchIndex key field feature -> Int
SI.docCount SearchIndex key field feature
searchIndex),
numDocsWithTerm :: TermId -> Int
BM25F.numDocsWithTerm = DocIdSet -> Int
DocIdSet.size (DocIdSet -> Int) -> (TermId -> DocIdSet) -> TermId -> Int
forall b c a. (b -> c) -> (a -> b) -> a -> c
. SearchIndex key field feature -> TermId -> DocIdSet
forall key field feature.
SearchIndex key field feature -> TermId -> DocIdSet
SI.lookupTermId SearchIndex key field feature
searchIndex,
paramK1 :: Float
BM25F.paramK1 = Float
paramK1,
paramB :: field -> Float
BM25F.paramB = field -> Float
paramB,
fieldWeight :: field -> Float
BM25F.fieldWeight = field -> Float
paramFieldWeights,
featureWeight :: feature -> Float
BM25F.featureWeight = feature -> Float
paramFeatureWeights,
featureFunction :: feature -> FeatureFunction
BM25F.featureFunction = feature -> FeatureFunction
paramFeatureFunctions
}
data NoFeatures = NoFeatures
deriving (NoFeatures -> NoFeatures -> Bool
(NoFeatures -> NoFeatures -> Bool)
-> (NoFeatures -> NoFeatures -> Bool) -> Eq NoFeatures
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
$c== :: NoFeatures -> NoFeatures -> Bool
== :: NoFeatures -> NoFeatures -> Bool
$c/= :: NoFeatures -> NoFeatures -> Bool
/= :: NoFeatures -> NoFeatures -> Bool
Eq, Eq NoFeatures
Eq NoFeatures =>
(NoFeatures -> NoFeatures -> Ordering)
-> (NoFeatures -> NoFeatures -> Bool)
-> (NoFeatures -> NoFeatures -> Bool)
-> (NoFeatures -> NoFeatures -> Bool)
-> (NoFeatures -> NoFeatures -> Bool)
-> (NoFeatures -> NoFeatures -> NoFeatures)
-> (NoFeatures -> NoFeatures -> NoFeatures)
-> Ord NoFeatures
NoFeatures -> NoFeatures -> Bool
NoFeatures -> NoFeatures -> Ordering
NoFeatures -> NoFeatures -> NoFeatures
forall a.
Eq a =>
(a -> a -> Ordering)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> a)
-> (a -> a -> a)
-> Ord a
$ccompare :: NoFeatures -> NoFeatures -> Ordering
compare :: NoFeatures -> NoFeatures -> Ordering
$c< :: NoFeatures -> NoFeatures -> Bool
< :: NoFeatures -> NoFeatures -> Bool
$c<= :: NoFeatures -> NoFeatures -> Bool
<= :: NoFeatures -> NoFeatures -> Bool
$c> :: NoFeatures -> NoFeatures -> Bool
> :: NoFeatures -> NoFeatures -> Bool
$c>= :: NoFeatures -> NoFeatures -> Bool
>= :: NoFeatures -> NoFeatures -> Bool
$cmax :: NoFeatures -> NoFeatures -> NoFeatures
max :: NoFeatures -> NoFeatures -> NoFeatures
$cmin :: NoFeatures -> NoFeatures -> NoFeatures
min :: NoFeatures -> NoFeatures -> NoFeatures
Ord, NoFeatures
NoFeatures -> NoFeatures -> Bounded NoFeatures
forall a. a -> a -> Bounded a
$cminBound :: NoFeatures
minBound :: NoFeatures
$cmaxBound :: NoFeatures
maxBound :: NoFeatures
Bounded, Int -> NoFeatures -> ShowS
[NoFeatures] -> ShowS
NoFeatures -> String
(Int -> NoFeatures -> ShowS)
-> (NoFeatures -> String)
-> ([NoFeatures] -> ShowS)
-> Show NoFeatures
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
$cshowsPrec :: Int -> NoFeatures -> ShowS
showsPrec :: Int -> NoFeatures -> ShowS
$cshow :: NoFeatures -> String
show :: NoFeatures -> String
$cshowList :: [NoFeatures] -> ShowS
showList :: [NoFeatures] -> ShowS
Show)
instance Ix NoFeatures where
range :: (NoFeatures, NoFeatures) -> [NoFeatures]
range (NoFeatures, NoFeatures)
_ = []
inRange :: (NoFeatures, NoFeatures) -> NoFeatures -> Bool
inRange (NoFeatures, NoFeatures)
_ NoFeatures
_ = Bool
False
index :: (NoFeatures, NoFeatures) -> NoFeatures -> Int
index (NoFeatures, NoFeatures)
_ NoFeatures
_ = -Int
1
noFeatures :: NoFeatures -> a
noFeatures :: forall a. NoFeatures -> a
noFeatures NoFeatures
_ = String -> a
forall a. HasCallStack => String -> a
error String
"noFeatures"