{-# LANGUAGE BangPatterns, NamedFieldPuns, RecordWildCards,
ScopedTypeVariables #-}
module Data.SearchEngine.Autosuggest (
queryAutosuggest,
ResultsFilter(..),
queryAutosuggestPredicate,
queryAutosuggestMatchingDocuments
) where
import Data.SearchEngine.Types
import Data.SearchEngine.Query (ResultsFilter(..))
import qualified Data.SearchEngine.Query as Query
import qualified Data.SearchEngine.SearchIndex as SI
import qualified Data.SearchEngine.DocIdSet as DocIdSet
import qualified Data.SearchEngine.DocTermIds as DocTermIds
import qualified Data.SearchEngine.BM25F as BM25F
import Data.Ix
import Data.Ord
import Data.List
import Data.Maybe
import qualified Data.Map as Map
import qualified Data.IntSet as IntSet
import qualified Data.Vector.Unboxed as Vec
queryAutosuggest :: (Ix field, Bounded field, Ix feature, Bounded feature) =>
SearchEngine doc key field feature ->
ResultsFilter key ->
[Term] -> Term -> ([(Term, Float)], [(key, Float)])
queryAutosuggest :: forall field feature doc key.
(Ix field, Bounded field, Ix feature, Bounded feature) =>
SearchEngine doc key field feature
-> ResultsFilter key
-> [Term]
-> Term
-> ([(Term, Float)], [(key, Float)])
queryAutosuggest SearchEngine doc key field feature
se ResultsFilter key
resultsFilter [Term]
precedingTerms Term
partialTerm =
([(TermId, Float)], [(DocId, Float)])
-> ([(Term, Float)], [(key, Float)])
forall {v}.
([(TermId, v)], [(DocId, v)]) -> ([(Term, v)], [(key, v)])
step_external
(([(TermId, Float)], [(DocId, Float)])
-> ([(Term, Float)], [(key, Float)]))
-> (AutosuggestQuery -> ([(TermId, Float)], [(DocId, Float)]))
-> AutosuggestQuery
-> ([(Term, Float)], [(key, Float)])
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ([(TermId, Float)], [(DocId, Float)])
-> ([(TermId, Float)], [(DocId, Float)])
forall {a} {b}.
([(a, Float)], [(b, Float)]) -> ([(a, Float)], [(b, Float)])
step_rank
(([(TermId, Float)], [(DocId, Float)])
-> ([(TermId, Float)], [(DocId, Float)]))
-> (AutosuggestQuery -> ([(TermId, Float)], [(DocId, Float)]))
-> AutosuggestQuery
-> ([(TermId, Float)], [(DocId, Float)])
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Map DocId (Float, Map TermId Float), [(TermId, DocIdSet)],
[(TermId, Float)])
-> ([(TermId, Float)], [(DocId, Float)])
step_scoreDs
((Map DocId (Float, Map TermId Float), [(TermId, DocIdSet)],
[(TermId, Float)])
-> ([(TermId, Float)], [(DocId, Float)]))
-> (AutosuggestQuery
-> (Map DocId (Float, Map TermId Float), [(TermId, DocIdSet)],
[(TermId, Float)]))
-> AutosuggestQuery
-> ([(TermId, Float)], [(DocId, Float)])
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Map DocId (Float, Map TermId Float), [(TermId, DocIdSet)])
-> (Map DocId (Float, Map TermId Float), [(TermId, DocIdSet)],
[(TermId, Float)])
step_scoreTs
((Map DocId (Float, Map TermId Float), [(TermId, DocIdSet)])
-> (Map DocId (Float, Map TermId Float), [(TermId, DocIdSet)],
[(TermId, Float)]))
-> (AutosuggestQuery
-> (Map DocId (Float, Map TermId Float), [(TermId, DocIdSet)]))
-> AutosuggestQuery
-> (Map DocId (Float, Map TermId Float), [(TermId, DocIdSet)],
[(TermId, Float)])
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ([TermId], [(DocId, (key, DocTermIds field, DocFeatVals feature))],
[(TermId, DocIdSet)], [TermId])
-> (Map DocId (Float, Map TermId Float), [(TermId, DocIdSet)])
forall {b}.
([TermId], [(DocId, (key, DocTermIds field, DocFeatVals feature))],
b, [TermId])
-> (Map DocId (Float, Map TermId Float), b)
step_cache
(([TermId],
[(DocId, (key, DocTermIds field, DocFeatVals feature))],
[(TermId, DocIdSet)], [TermId])
-> (Map DocId (Float, Map TermId Float), [(TermId, DocIdSet)]))
-> (AutosuggestQuery
-> ([TermId],
[(DocId, (key, DocTermIds field, DocFeatVals feature))],
[(TermId, DocIdSet)], [TermId]))
-> AutosuggestQuery
-> (Map DocId (Float, Map TermId Float), [(TermId, DocIdSet)])
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ([TermId], [(DocId, (key, DocTermIds field, DocFeatVals feature))],
[(TermId, DocIdSet)], [TermId])
-> ([TermId],
[(DocId, (key, DocTermIds field, DocFeatVals feature))],
[(TermId, DocIdSet)], [TermId])
forall {a} {a} {a} {a}.
([a], [a], [a], [a]) -> ([a], [a], [a], [a])
step_postfilterlimit
(([TermId],
[(DocId, (key, DocTermIds field, DocFeatVals feature))],
[(TermId, DocIdSet)], [TermId])
-> ([TermId],
[(DocId, (key, DocTermIds field, DocFeatVals feature))],
[(TermId, DocIdSet)], [TermId]))
-> (AutosuggestQuery
-> ([TermId],
[(DocId, (key, DocTermIds field, DocFeatVals feature))],
[(TermId, DocIdSet)], [TermId]))
-> AutosuggestQuery
-> ([TermId],
[(DocId, (key, DocTermIds field, DocFeatVals feature))],
[(TermId, DocIdSet)], [TermId])
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ([TermId], DocIdSet, [(TermId, DocIdSet)], [TermId])
-> ([TermId],
[(DocId, (key, DocTermIds field, DocFeatVals feature))],
[(TermId, DocIdSet)], [TermId])
forall {a} {c} {d}.
(a, DocIdSet, c, d)
-> (a, [(DocId, (key, DocTermIds field, DocFeatVals feature))], c,
d)
step_filter
(([TermId], DocIdSet, [(TermId, DocIdSet)], [TermId])
-> ([TermId],
[(DocId, (key, DocTermIds field, DocFeatVals feature))],
[(TermId, DocIdSet)], [TermId]))
-> (AutosuggestQuery
-> ([TermId], DocIdSet, [(TermId, DocIdSet)], [TermId]))
-> AutosuggestQuery
-> ([TermId],
[(DocId, (key, DocTermIds field, DocFeatVals feature))],
[(TermId, DocIdSet)], [TermId])
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ([TermId], DocIdSet, [(TermId, DocIdSet)], [TermId])
-> ([TermId], DocIdSet, [(TermId, DocIdSet)], [TermId])
forall {a} {a} {a}.
([a], DocIdSet, [a], [a]) -> ([a], DocIdSet, [a], [a])
step_prefilterlimit
(([TermId], DocIdSet, [(TermId, DocIdSet)], [TermId])
-> ([TermId], DocIdSet, [(TermId, DocIdSet)], [TermId]))
-> (AutosuggestQuery
-> ([TermId], DocIdSet, [(TermId, DocIdSet)], [TermId]))
-> AutosuggestQuery
-> ([TermId], DocIdSet, [(TermId, DocIdSet)], [TermId])
forall b c a. (b -> c) -> (a -> b) -> a -> c
. AutosuggestQuery
-> ([TermId], DocIdSet, [(TermId, DocIdSet)], [TermId])
step_process
(AutosuggestQuery -> ([(Term, Float)], [(key, Float)]))
-> AutosuggestQuery -> ([(Term, Float)], [(key, Float)])
forall a b. (a -> b) -> a -> b
$ [Term] -> Term -> AutosuggestQuery
step_prep
[Term]
precedingTerms Term
partialTerm
where
step_prep :: [Term] -> Term -> AutosuggestQuery
step_prep [Term]
pre_ts Term
t = SearchEngine doc key field feature
-> [Term] -> Term -> AutosuggestQuery
forall field doc key feature.
(Ix field, Bounded field) =>
SearchEngine doc key field feature
-> [Term] -> Term -> AutosuggestQuery
mkAutosuggestQuery SearchEngine doc key field feature
se [Term]
pre_ts Term
t
step_process :: AutosuggestQuery
-> ([TermId], DocIdSet, [(TermId, DocIdSet)], [TermId])
step_process (Map TermId DocIdSet
ts, Maybe DocIdSet
ds, [TermId]
pre_ts) = ([TermId]
ts', DocIdSet
ds', [(TermId, DocIdSet)]
tdss', [TermId]
pre_ts)
where
([(TermId, DocIdSet)]
tdss', [TermId]
ts', DocIdSet
ds') = SearchEngine doc key field feature
-> AutosuggestQuery -> ([(TermId, DocIdSet)], [TermId], DocIdSet)
forall doc key field feature.
SearchEngine doc key field feature
-> AutosuggestQuery -> ([(TermId, DocIdSet)], [TermId], DocIdSet)
processAutosuggestQuery SearchEngine doc key field feature
se (Map TermId DocIdSet
ts, Maybe DocIdSet
ds, [TermId]
pre_ts)
step_prefilterlimit :: ([a], DocIdSet, [a], [a]) -> ([a], DocIdSet, [a], [a])
step_prefilterlimit args :: ([a], DocIdSet, [a], [a])
args@([a]
_, DocIdSet
ds, [a]
_, [a]
_)
| SearchEngine doc key field feature -> DocIdSet -> Bool
forall doc key field feature.
SearchEngine doc key field feature -> DocIdSet -> Bool
withinPrefilterLimit SearchEngine doc key field feature
se DocIdSet
ds = ([a], DocIdSet, [a], [a])
args
| Bool
otherwise = ([], DocIdSet
DocIdSet.empty, [], [])
step_filter :: (a, DocIdSet, c, d)
-> (a, [(DocId, (key, DocTermIds field, DocFeatVals feature))], c,
d)
step_filter (a
ts, DocIdSet
ds, c
tdss, d
pre_ts) = (a
ts, [(DocId, (key, DocTermIds field, DocFeatVals feature))]
ds_info, c
tdss, d
pre_ts)
where
ds_info :: [(DocId, (key, DocTermIds field, DocFeatVals feature))]
ds_info = SearchEngine doc key field feature
-> ResultsFilter key
-> DocIdSet
-> [(DocId, (key, DocTermIds field, DocFeatVals feature))]
forall doc key field feature.
SearchEngine doc key field feature
-> ResultsFilter key
-> DocIdSet
-> [(DocId, (key, DocTermIds field, DocFeatVals feature))]
filterAutosuggestQuery SearchEngine doc key field feature
se ResultsFilter key
resultsFilter DocIdSet
ds
step_postfilterlimit :: ([a], [a], [a], [a]) -> ([a], [a], [a], [a])
step_postfilterlimit args :: ([a], [a], [a], [a])
args@([a]
_, [a]
ds_info, [a]
_, [a]
_)
| SearchEngine doc key field feature -> [a] -> Bool
forall doc key field feature a.
SearchEngine doc key field feature -> [a] -> Bool
withinPostfilterLimit SearchEngine doc key field feature
se [a]
ds_info = ([a], [a], [a], [a])
args
| Bool
otherwise = ([], [], [], [])
step_cache :: ([TermId], [(DocId, (key, DocTermIds field, DocFeatVals feature))],
b, [TermId])
-> (Map DocId (Float, Map TermId Float), b)
step_cache ([TermId]
ts, [(DocId, (key, DocTermIds field, DocFeatVals feature))]
ds_info, b
tdss, [TermId]
pre_ts) = (Map DocId (Float, Map TermId Float)
ds_info', b
tdss)
where
ds_info' :: Map DocId (Float, Map TermId Float)
ds_info' = SearchEngine doc key field feature
-> [TermId]
-> [(DocId, (key, DocTermIds field, DocFeatVals feature))]
-> [TermId]
-> Map DocId (Float, Map TermId Float)
forall field feature doc key.
(Ix field, Bounded field, Ix feature, Bounded feature) =>
SearchEngine doc key field feature
-> [TermId]
-> [(DocId, (key, DocTermIds field, DocFeatVals feature))]
-> [TermId]
-> Map DocId (Float, Map TermId Float)
cacheDocScoringInfo SearchEngine doc key field feature
se [TermId]
ts [(DocId, (key, DocTermIds field, DocFeatVals feature))]
ds_info [TermId]
pre_ts
step_scoreTs :: (Map DocId (Float, Map TermId Float), [(TermId, DocIdSet)])
-> (Map DocId (Float, Map TermId Float), [(TermId, DocIdSet)],
[(TermId, Float)])
step_scoreTs (Map DocId (Float, Map TermId Float)
ds_info, [(TermId, DocIdSet)]
tdss) = (Map DocId (Float, Map TermId Float)
ds_info, [(TermId, DocIdSet)]
tdss, [(TermId, Float)]
ts_scored)
where
ts_scored :: [(TermId, Float)]
ts_scored = [(TermId, DocIdSet)]
-> Map DocId (Float, Map TermId Float) -> [(TermId, Float)]
scoreAutosuggestQueryCompletions [(TermId, DocIdSet)]
tdss Map DocId (Float, Map TermId Float)
ds_info
step_scoreDs :: (Map DocId (Float, Map TermId Float), [(TermId, DocIdSet)],
[(TermId, Float)])
-> ([(TermId, Float)], [(DocId, Float)])
step_scoreDs (Map DocId (Float, Map TermId Float)
ds_info, [(TermId, DocIdSet)]
tdss, [(TermId, Float)]
ts_scored) = ([(TermId, Float)]
ts_scored, [(DocId, Float)]
ds_scored)
where
ds_scored :: [(DocId, Float)]
ds_scored = [(TermId, DocIdSet)]
-> Map DocId (Float, Map TermId Float)
-> [(TermId, Float)]
-> [(DocId, Float)]
scoreAutosuggestQueryResults [(TermId, DocIdSet)]
tdss Map DocId (Float, Map TermId Float)
ds_info [(TermId, Float)]
ts_scored
step_rank :: ([(a, Float)], [(b, Float)]) -> ([(a, Float)], [(b, Float)])
step_rank = ([(a, Float)], [(b, Float)]) -> ([(a, Float)], [(b, Float)])
forall av bv a b.
(Ord av, Ord bv) =>
([(a, av)], [(b, bv)]) -> ([(a, av)], [(b, bv)])
sortResults
step_external :: ([(TermId, v)], [(DocId, v)]) -> ([(Term, v)], [(key, v)])
step_external = SearchEngine doc key field feature
-> ([(TermId, v)], [(DocId, v)]) -> ([(Term, v)], [(key, v)])
forall doc key field feature v.
SearchEngine doc key field feature
-> ([(TermId, v)], [(DocId, v)]) -> ([(Term, v)], [(key, v)])
convertIdsToExternal SearchEngine doc key field feature
se
queryAutosuggestMatchingDocuments :: (Ix field, Bounded field, Ord key) =>
SearchEngine doc key field feature ->
[Term] -> Term -> [key]
queryAutosuggestMatchingDocuments :: forall field key doc feature.
(Ix field, Bounded field, Ord key) =>
SearchEngine doc key field feature -> [Term] -> Term -> [key]
queryAutosuggestMatchingDocuments se :: SearchEngine doc key field feature
se@SearchEngine{SearchIndex key field feature
searchIndex :: SearchIndex key field feature
searchIndex :: forall doc key field feature.
SearchEngine doc key field feature -> SearchIndex key field feature
searchIndex} [Term]
precedingTerms Term
partialTerm =
let ([(TermId, DocIdSet)]
_, [TermId]
_, DocIdSet
ds) = SearchEngine doc key field feature
-> AutosuggestQuery -> ([(TermId, DocIdSet)], [TermId], DocIdSet)
forall doc key field feature.
SearchEngine doc key field feature
-> AutosuggestQuery -> ([(TermId, DocIdSet)], [TermId], DocIdSet)
processAutosuggestQuery SearchEngine doc key field feature
se (SearchEngine doc key field feature
-> [Term] -> Term -> AutosuggestQuery
forall field doc key feature.
(Ix field, Bounded field) =>
SearchEngine doc key field feature
-> [Term] -> Term -> AutosuggestQuery
mkAutosuggestQuery SearchEngine doc key field feature
se [Term]
precedingTerms Term
partialTerm)
in (DocId -> key) -> [DocId] -> [key]
forall a b. (a -> b) -> [a] -> [b]
map (SearchIndex key field feature -> DocId -> key
forall key field feature.
SearchIndex key field feature -> DocId -> key
SI.getDocKey SearchIndex key field feature
searchIndex) (DocIdSet -> [DocId]
DocIdSet.toList DocIdSet
ds)
queryAutosuggestPredicate :: (Ix field, Bounded field, Ord key) =>
SearchEngine doc key field feature ->
[Term] -> Term -> (key -> Bool)
queryAutosuggestPredicate :: forall field key doc feature.
(Ix field, Bounded field, Ord key) =>
SearchEngine doc key field feature -> [Term] -> Term -> key -> Bool
queryAutosuggestPredicate se :: SearchEngine doc key field feature
se@SearchEngine{SearchIndex key field feature
searchIndex :: forall doc key field feature.
SearchEngine doc key field feature -> SearchIndex key field feature
searchIndex :: SearchIndex key field feature
searchIndex} [Term]
precedingTerms Term
partialTerm =
let ([(TermId, DocIdSet)]
_, [TermId]
_, DocIdSet
ds) = SearchEngine doc key field feature
-> AutosuggestQuery -> ([(TermId, DocIdSet)], [TermId], DocIdSet)
forall doc key field feature.
SearchEngine doc key field feature
-> AutosuggestQuery -> ([(TermId, DocIdSet)], [TermId], DocIdSet)
processAutosuggestQuery SearchEngine doc key field feature
se (SearchEngine doc key field feature
-> [Term] -> Term -> AutosuggestQuery
forall field doc key feature.
(Ix field, Bounded field) =>
SearchEngine doc key field feature
-> [Term] -> Term -> AutosuggestQuery
mkAutosuggestQuery SearchEngine doc key field feature
se [Term]
precedingTerms Term
partialTerm)
in (\ key
key -> Bool -> (DocId -> Bool) -> Maybe DocId -> Bool
forall b a. b -> (a -> b) -> Maybe a -> b
maybe Bool
False ((DocId -> DocIdSet -> Bool) -> DocIdSet -> DocId -> Bool
forall a b c. (a -> b -> c) -> b -> a -> c
flip DocId -> DocIdSet -> Bool
DocIdSet.member DocIdSet
ds) (SearchIndex key field feature -> key -> Maybe DocId
forall key field feature.
Ord key =>
SearchIndex key field feature -> key -> Maybe DocId
SI.lookupDocKeyDocId SearchIndex key field feature
searchIndex key
key))
withinPrefilterLimit :: SearchEngine doc key field feature ->
DocIdSet -> Bool
withinPrefilterLimit :: forall doc key field feature.
SearchEngine doc key field feature -> DocIdSet -> Bool
withinPrefilterLimit SearchEngine{SearchRankParameters field feature
searchRankParams :: SearchRankParameters field feature
searchRankParams :: forall doc key field feature.
SearchEngine doc key field feature
-> SearchRankParameters field feature
searchRankParams} DocIdSet
ds =
DocIdSet -> Int
DocIdSet.size DocIdSet
ds Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
<= SearchRankParameters field feature -> Int
forall field feature. SearchRankParameters field feature -> Int
paramAutosuggestPrefilterLimit SearchRankParameters field feature
searchRankParams
withinPostfilterLimit :: SearchEngine doc key field feature ->
[a] -> Bool
withinPostfilterLimit :: forall doc key field feature a.
SearchEngine doc key field feature -> [a] -> Bool
withinPostfilterLimit SearchEngine{SearchRankParameters field feature
searchRankParams :: forall doc key field feature.
SearchEngine doc key field feature
-> SearchRankParameters field feature
searchRankParams :: SearchRankParameters field feature
searchRankParams} [a]
ds_info =
[a] -> Int
forall a. [a] -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length [a]
ds_info Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
<= SearchRankParameters field feature -> Int
forall field feature. SearchRankParameters field feature -> Int
paramAutosuggestPostfilterLimit SearchRankParameters field feature
searchRankParams
sortResults :: (Ord av, Ord bv) => ([(a,av)], [(b,bv)]) -> ([(a,av)], [(b,bv)])
sortResults :: forall av bv a b.
(Ord av, Ord bv) =>
([(a, av)], [(b, bv)]) -> ([(a, av)], [(b, bv)])
sortResults ([(a, av)]
xs, [(b, bv)]
ys) =
( [(a, av)] -> [(a, av)]
forall v x. Ord v => [(x, v)] -> [(x, v)]
sortBySndDescending [(a, av)]
xs
, [(b, bv)] -> [(b, bv)]
forall v x. Ord v => [(x, v)] -> [(x, v)]
sortBySndDescending [(b, bv)]
ys )
where
sortBySndDescending :: Ord v => [(x,v)] -> [(x,v)]
sortBySndDescending :: forall v x. Ord v => [(x, v)] -> [(x, v)]
sortBySndDescending = ((x, v) -> (x, v) -> Ordering) -> [(x, v)] -> [(x, v)]
forall a. (a -> a -> Ordering) -> [a] -> [a]
sortBy (((x, v) -> (x, v) -> Ordering) -> (x, v) -> (x, v) -> Ordering
forall a b c. (a -> b -> c) -> b -> a -> c
flip (((x, v) -> v) -> (x, v) -> (x, v) -> Ordering
forall a b. Ord a => (b -> a) -> b -> b -> Ordering
comparing (x, v) -> v
forall a b. (a, b) -> b
snd))
convertIdsToExternal :: SearchEngine doc key field feature ->
([(TermId, v)], [(DocId, v)]) -> ([(Term, v)], [(key, v)])
convertIdsToExternal :: forall doc key field feature v.
SearchEngine doc key field feature
-> ([(TermId, v)], [(DocId, v)]) -> ([(Term, v)], [(key, v)])
convertIdsToExternal SearchEngine{SearchIndex key field feature
searchIndex :: forall doc key field feature.
SearchEngine doc key field feature -> SearchIndex key field feature
searchIndex :: SearchIndex key field feature
searchIndex} ([(TermId, v)]
termids, [(DocId, v)]
docids) =
( [ (SearchIndex key field feature -> TermId -> Term
forall key field feature.
SearchIndex key field feature -> TermId -> Term
SI.getTerm SearchIndex key field feature
searchIndex TermId
termid, v
s) | (TermId
termid, v
s) <- [(TermId, v)]
termids ]
, [ (SearchIndex key field feature -> DocId -> key
forall key field feature.
SearchIndex key field feature -> DocId -> key
SI.getDocKey SearchIndex key field feature
searchIndex DocId
docid, v
s) | (DocId
docid, v
s) <- [(DocId, v)]
docids ]
)
type AutosuggestQuery = (Map.Map TermId DocIdSet, Maybe DocIdSet, [TermId])
mkAutosuggestQuery :: (Ix field, Bounded field) =>
SearchEngine doc key field feature ->
[Term] -> Term -> AutosuggestQuery
mkAutosuggestQuery :: forall field doc key feature.
(Ix field, Bounded field) =>
SearchEngine doc key field feature
-> [Term] -> Term -> AutosuggestQuery
mkAutosuggestQuery se :: SearchEngine doc key field feature
se@SearchEngine{ SearchIndex key field feature
searchIndex :: forall doc key field feature.
SearchEngine doc key field feature -> SearchIndex key field feature
searchIndex :: SearchIndex key field feature
searchIndex }
[Term]
precedingTerms Term
partialTerm =
(Map TermId DocIdSet
completionTerms, Maybe DocIdSet
precedingDocHits, [TermId]
precedingTerms')
where
completionTerms :: Map TermId DocIdSet
completionTerms =
[Map TermId DocIdSet] -> Map TermId DocIdSet
forall (f :: * -> *) k a.
(Foldable f, Ord k) =>
f (Map k a) -> Map k a
Map.unions
[ [(TermId, DocIdSet)] -> Map TermId DocIdSet
forall k a. Ord k => [(k, a)] -> Map k a
Map.fromList (SearchIndex key field feature -> Term -> [(TermId, DocIdSet)]
forall key field feature.
SearchIndex key field feature -> Term -> [(TermId, DocIdSet)]
SI.lookupTermsByPrefix SearchIndex key field feature
searchIndex Term
partialTerm')
| Term
partialTerm' <- SearchEngine doc key field feature -> Term -> [Term]
forall field doc key feature.
(Ix field, Bounded field) =>
SearchEngine doc key field feature -> Term -> [Term]
Query.expandTransformedQueryTerm SearchEngine doc key field feature
se Term
partialTerm
]
([TermId]
precedingTerms', Maybe DocIdSet
precedingDocHits)
| [Term] -> Bool
forall a. [a] -> Bool
forall (t :: * -> *) a. Foldable t => t a -> Bool
null [Term]
precedingTerms = ([], Maybe DocIdSet
forall a. Maybe a
Nothing)
| Bool
otherwise = ([DocIdSet] -> Maybe DocIdSet)
-> ([TermId], [DocIdSet]) -> ([TermId], Maybe DocIdSet)
forall a b. (a -> b) -> ([TermId], a) -> ([TermId], b)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap [DocIdSet] -> Maybe DocIdSet
carefulUnions
([Term] -> ([TermId], [DocIdSet])
lookupRawResults [Term]
precedingTerms)
carefulUnions :: [DocIdSet] -> Maybe DocIdSet
carefulUnions :: [DocIdSet] -> Maybe DocIdSet
carefulUnions [DocIdSet]
dss
| [DocIdSet] -> Bool
forall a. [a] -> Bool
forall (t :: * -> *) a. Foldable t => t a -> Bool
null [DocIdSet]
dss = DocIdSet -> Maybe DocIdSet
forall a. a -> Maybe a
Just DocIdSet
DocIdSet.empty
| [DocIdSet] -> Bool
forall a. [a] -> Bool
forall (t :: * -> *) a. Foldable t => t a -> Bool
null [DocIdSet]
dss' = Maybe DocIdSet
forall a. Maybe a
Nothing
| Bool
otherwise = DocIdSet -> Maybe DocIdSet
forall a. a -> Maybe a
Just ([DocIdSet] -> DocIdSet
DocIdSet.unions [DocIdSet]
dss')
where
dss' :: [DocIdSet]
dss' = (DocIdSet -> Bool) -> [DocIdSet] -> [DocIdSet]
forall a. (a -> Bool) -> [a] -> [a]
filter (SearchEngine doc key field feature -> DocIdSet -> Bool
forall doc key field feature.
SearchEngine doc key field feature -> DocIdSet -> Bool
withinPrefilterLimit SearchEngine doc key field feature
se) [DocIdSet]
dss
lookupRawResults :: [Term] -> ([TermId], [DocIdSet])
lookupRawResults :: [Term] -> ([TermId], [DocIdSet])
lookupRawResults [Term]
ts =
[(TermId, DocIdSet)] -> ([TermId], [DocIdSet])
forall a b. [(a, b)] -> ([a], [b])
unzip ([(TermId, DocIdSet)] -> ([TermId], [DocIdSet]))
-> [(TermId, DocIdSet)] -> ([TermId], [DocIdSet])
forall a b. (a -> b) -> a -> b
$ [Maybe (TermId, DocIdSet)] -> [(TermId, DocIdSet)]
forall a. [Maybe a] -> [a]
catMaybes
[ SearchIndex key field feature -> Term -> Maybe (TermId, DocIdSet)
forall key field feature.
SearchIndex key field feature -> Term -> Maybe (TermId, DocIdSet)
SI.lookupTerm SearchIndex key field feature
searchIndex Term
t'
| Term
t <- [Term]
ts
, Term
t' <- SearchEngine doc key field feature -> Term -> [Term]
forall field doc key feature.
(Ix field, Bounded field) =>
SearchEngine doc key field feature -> Term -> [Term]
Query.expandTransformedQueryTerm SearchEngine doc key field feature
se Term
t
]
processAutosuggestQuery :: SearchEngine doc key field feature ->
AutosuggestQuery ->
([(TermId, DocIdSet)], [TermId], DocIdSet)
processAutosuggestQuery :: forall doc key field feature.
SearchEngine doc key field feature
-> AutosuggestQuery -> ([(TermId, DocIdSet)], [TermId], DocIdSet)
processAutosuggestQuery SearchEngine doc key field feature
se (Map TermId DocIdSet
completionTerms, Maybe DocIdSet
precedingDocHits, [TermId]
_)
| (DocIdSet -> Bool) -> [DocIdSet] -> Bool
forall (t :: * -> *) a. Foldable t => (a -> Bool) -> t a -> Bool
all (SearchEngine doc key field feature -> DocIdSet -> Bool
forall doc key field feature.
SearchEngine doc key field feature -> DocIdSet -> Bool
withinPrefilterLimit SearchEngine doc key field feature
se) [DocIdSet]
docSets =
( [(TermId, DocIdSet)]
completionTermAndDocSets
, [TermId]
completionTerms'
, DocIdSet
allTermDocSet
)
| Bool
otherwise = ([], [], DocIdSet
DocIdSet.empty)
where
completionTermAndDocSets :: [(TermId, DocIdSet)]
completionTermAndDocSets :: [(TermId, DocIdSet)]
completionTermAndDocSets =
[ (TermId
t, DocIdSet
ds_t')
| (TermId
t, DocIdSet
ds_t) <- Map TermId DocIdSet -> [(TermId, DocIdSet)]
forall k a. Map k a -> [(k, a)]
Map.toList Map TermId DocIdSet
completionTerms
, let ds_t' :: DocIdSet
ds_t' = case Maybe DocIdSet
precedingDocHits of
Just DocIdSet
ds -> DocIdSet
ds DocIdSet -> DocIdSet -> DocIdSet
`DocIdSet.intersection` DocIdSet
ds_t
Maybe DocIdSet
Nothing -> DocIdSet
ds_t
, Bool -> Bool
not (DocIdSet -> Bool
DocIdSet.null DocIdSet
ds_t')
]
completionTerms' :: [TermId]
docSets :: [DocIdSet]
([TermId]
completionTerms', [DocIdSet]
docSets) = [(TermId, DocIdSet)] -> ([TermId], [DocIdSet])
forall a b. [(a, b)] -> ([a], [b])
unzip [(TermId, DocIdSet)]
completionTermAndDocSets
allTermDocSet :: DocIdSet
allTermDocSet :: DocIdSet
allTermDocSet = [DocIdSet] -> DocIdSet
DocIdSet.unions [DocIdSet]
docSets
filterAutosuggestQuery :: SearchEngine doc key field feature ->
ResultsFilter key ->
DocIdSet ->
[(DocId, (key, DocTermIds field, DocFeatVals feature))]
filterAutosuggestQuery :: forall doc key field feature.
SearchEngine doc key field feature
-> ResultsFilter key
-> DocIdSet
-> [(DocId, (key, DocTermIds field, DocFeatVals feature))]
filterAutosuggestQuery SearchEngine{ SearchIndex key field feature
searchIndex :: forall doc key field feature.
SearchEngine doc key field feature -> SearchIndex key field feature
searchIndex :: SearchIndex key field feature
searchIndex } ResultsFilter key
resultsFilter DocIdSet
ds =
case ResultsFilter key
resultsFilter of
ResultsFilter key
NoFilter ->
[ (DocId
docid, (key, DocTermIds field, DocFeatVals feature)
doc)
| DocId
docid <- DocIdSet -> [DocId]
DocIdSet.toList DocIdSet
ds
, let doc :: (key, DocTermIds field, DocFeatVals feature)
doc = SearchIndex key field feature
-> DocId -> (key, DocTermIds field, DocFeatVals feature)
forall key field feature.
SearchIndex key field feature
-> DocId -> (key, DocTermIds field, DocFeatVals feature)
SI.lookupDocId SearchIndex key field feature
searchIndex DocId
docid ]
FilterPredicate key -> Bool
predicate ->
[ (DocId
docid, (key, DocTermIds field, DocFeatVals feature)
doc)
| DocId
docid <- DocIdSet -> [DocId]
DocIdSet.toList DocIdSet
ds
, let doc :: (key, DocTermIds field, DocFeatVals feature)
doc@(key
k,DocTermIds field
_,DocFeatVals feature
_) = SearchIndex key field feature
-> DocId -> (key, DocTermIds field, DocFeatVals feature)
forall key field feature.
SearchIndex key field feature
-> DocId -> (key, DocTermIds field, DocFeatVals feature)
SI.lookupDocId SearchIndex key field feature
searchIndex DocId
docid
, key -> Bool
predicate key
k ]
FilterBulkPredicate [key] -> [Bool]
bulkPredicate ->
[ (DocId
docid, (key, DocTermIds field, DocFeatVals feature)
doc)
| let docids :: [DocId]
docids = DocIdSet -> [DocId]
DocIdSet.toList DocIdSet
ds
docinf :: [(key, DocTermIds field, DocFeatVals feature)]
docinf = (DocId -> (key, DocTermIds field, DocFeatVals feature))
-> [DocId] -> [(key, DocTermIds field, DocFeatVals feature)]
forall a b. (a -> b) -> [a] -> [b]
map (SearchIndex key field feature
-> DocId -> (key, DocTermIds field, DocFeatVals feature)
forall key field feature.
SearchIndex key field feature
-> DocId -> (key, DocTermIds field, DocFeatVals feature)
SI.lookupDocId SearchIndex key field feature
searchIndex) [DocId]
docids
keep :: [Bool]
keep = [key] -> [Bool]
bulkPredicate [ key
k | (key
k,DocTermIds field
_,DocFeatVals feature
_) <- [(key, DocTermIds field, DocFeatVals feature)]
docinf ]
, (DocId
docid, (key, DocTermIds field, DocFeatVals feature)
doc, Bool
True) <- [DocId]
-> [(key, DocTermIds field, DocFeatVals feature)]
-> [Bool]
-> [(DocId, (key, DocTermIds field, DocFeatVals feature), Bool)]
forall a b c. [a] -> [b] -> [c] -> [(a, b, c)]
zip3 [DocId]
docids [(key, DocTermIds field, DocFeatVals feature)]
docinf [Bool]
keep ]
type DocImportance = Float
type TermRelevanceBreakdown = Map.Map TermId Float
cacheDocScoringInfo :: (Ix field, Bounded field, Ix feature, Bounded feature) =>
SearchEngine doc key field feature ->
[TermId] ->
[(DocId, (key, DocTermIds field, DocFeatVals feature))] ->
[TermId] ->
Map.Map DocId (DocImportance, TermRelevanceBreakdown)
cacheDocScoringInfo :: forall field feature doc key.
(Ix field, Bounded field, Ix feature, Bounded feature) =>
SearchEngine doc key field feature
-> [TermId]
-> [(DocId, (key, DocTermIds field, DocFeatVals feature))]
-> [TermId]
-> Map DocId (Float, Map TermId Float)
cacheDocScoringInfo SearchEngine doc key field feature
se [TermId]
completionTerms [(DocId, (key, DocTermIds field, DocFeatVals feature))]
allTermDocInfo [TermId]
precedingTerms =
[(DocId, (Float, Map TermId Float))]
-> Map DocId (Float, Map TermId Float)
forall k a. Ord k => [(k, a)] -> Map k a
Map.fromList
[ (DocId
docid, (Float
docImportance, Map TermId Float
termRelevances))
| (DocId
docid, (key
_dockey, DocTermIds field
doctermids, DocFeatVals feature
docfeatvals)) <- [(DocId, (key, DocTermIds field, DocFeatVals feature))]
allTermDocInfo
, let docImportance :: Float
docImportance = SearchEngine doc key field feature
-> [TermId] -> DocTermIds field -> DocFeatVals feature -> Float
forall field feature doc key.
(Ix field, Bounded field, Ix feature, Bounded feature) =>
SearchEngine doc key field feature
-> [TermId] -> DocTermIds field -> DocFeatVals feature -> Float
Query.relevanceScore SearchEngine doc key field feature
se [TermId]
precedingTerms
DocTermIds field
doctermids DocFeatVals feature
docfeatvals
termRelevances :: Map TermId Float
termRelevances = SearchEngine doc key field feature
-> DocTermIds field
-> DocFeatVals feature
-> [TermId]
-> Map TermId Float
forall doc key field feature.
(Ix field, Bounded field, Ix feature, Bounded feature) =>
SearchEngine doc key field feature
-> DocTermIds field
-> DocFeatVals feature
-> [TermId]
-> Map TermId Float
relevanceBreakdown SearchEngine doc key field feature
se DocTermIds field
doctermids DocFeatVals feature
docfeatvals
[TermId]
completionTerms
]
relevanceBreakdown :: forall doc key field feature.
(Ix field, Bounded field, Ix feature, Bounded feature) =>
SearchEngine doc key field feature ->
DocTermIds field -> DocFeatVals feature ->
[TermId] -> TermRelevanceBreakdown
relevanceBreakdown :: forall doc key field feature.
(Ix field, Bounded field, Ix feature, Bounded feature) =>
SearchEngine doc key field feature
-> DocTermIds field
-> DocFeatVals feature
-> [TermId]
-> Map TermId Float
relevanceBreakdown SearchEngine{ Context TermId field feature
bm25Context :: Context TermId field feature
bm25Context :: forall doc key field feature.
SearchEngine doc key field feature -> Context TermId field feature
bm25Context } DocTermIds field
doctermids DocFeatVals feature
docfeatvals [TermId]
ts =
let
bm25Doc :: Doc TermId field feature
bm25Doc = DocTermIds field -> DocFeatVals feature -> Doc TermId field feature
forall field feature.
(Ix field, Bounded field, Ix feature, Bounded feature) =>
DocTermIds field -> DocFeatVals feature -> Doc TermId field feature
Query.indexDocToBM25Doc DocTermIds field
doctermids DocFeatVals feature
docfeatvals
termScore :: (TermId -> (field -> Int) -> Float)
termScore :: TermId -> (field -> Int) -> Float
termScore = Context TermId field feature
-> Doc TermId field feature -> TermId -> (field -> Int) -> Float
forall field term feature.
(Ix field, Bounded field) =>
Context term field feature
-> Doc term field feature -> term -> (field -> Int) -> Float
BM25F.scoreTermsBulk Context TermId field feature
bm25Context Doc TermId field feature
bm25Doc
term :: Int -> TermId
count :: Int -> field -> Int
(!Int
numTerms, Int -> TermId
term, Int -> field -> Int
count) = DocTermIds field -> (Int, Int -> TermId, Int -> field -> Int)
forall field.
(Ix field, Bounded field) =>
DocTermIds field -> (Int, Int -> TermId, Int -> field -> Int)
DocTermIds.denseTable DocTermIds field
doctermids
termScores :: Vec.Vector Float
!termScores :: Vector Float
termScores = Int -> (Int -> Float) -> Vector Float
forall a. Unbox a => Int -> (Int -> a) -> Vector a
Vec.generate Int
numTerms ((Int -> Float) -> Vector Float) -> (Int -> Float) -> Vector Float
forall a b. (a -> b) -> a -> b
$ \Int
i ->
TermId -> (field -> Int) -> Float
termScore (Int -> TermId
term Int
i) (\field
f -> Int -> field -> Int
count Int
i field
f)
!scoreSum :: Float
scoreSum = Vector Float -> Float
forall a. (Unbox a, Num a) => Vector a -> a
Vec.sum Vector Float
termScores
!tset :: IntSet
tset = [Int] -> IntSet
IntSet.fromList ((TermId -> Int) -> [TermId] -> [Int]
forall a b. (a -> b) -> [a] -> [b]
map TermId -> Int
forall a. Enum a => a -> Int
fromEnum [TermId]
ts)
in [(TermId, Float)] -> Map TermId Float
forall k a. Ord k => [(k, a)] -> Map k a
Map.fromList
([(TermId, Float)] -> Map TermId Float)
-> (Vector Float -> [(TermId, Float)])
-> Vector Float
-> Map TermId Float
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Vector (TermId, Float) -> [(TermId, Float)]
forall a. Unbox a => Vector a -> [a]
Vec.toList
(Vector (TermId, Float) -> [(TermId, Float)])
-> (Vector Float -> Vector (TermId, Float))
-> Vector Float
-> [(TermId, Float)]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ((TermId, Float) -> (TermId, Float))
-> Vector (TermId, Float) -> Vector (TermId, Float)
forall a b. (Unbox a, Unbox b) => (a -> b) -> Vector a -> Vector b
Vec.map (\(TermId
t,Float
s) -> (TermId
t, Float
sFloat -> Float -> Float
forall a. Fractional a => a -> a -> a
/Float
scoreSum))
(Vector (TermId, Float) -> Vector (TermId, Float))
-> (Vector Float -> Vector (TermId, Float))
-> Vector Float
-> Vector (TermId, Float)
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ((TermId, Float) -> Bool)
-> Vector (TermId, Float) -> Vector (TermId, Float)
forall a. Unbox a => (a -> Bool) -> Vector a -> Vector a
Vec.filter (\(TermId
t,Float
_) -> TermId -> Int
forall a. Enum a => a -> Int
fromEnum TermId
t Int -> IntSet -> Bool
`IntSet.member` IntSet
tset)
(Vector (TermId, Float) -> Vector (TermId, Float))
-> (Vector Float -> Vector (TermId, Float))
-> Vector Float
-> Vector (TermId, Float)
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Int -> Float -> (TermId, Float))
-> Vector Float -> Vector (TermId, Float)
forall a b.
(Unbox a, Unbox b) =>
(Int -> a -> b) -> Vector a -> Vector b
Vec.imap (\Int
i Float
s -> (Int -> TermId
term Int
i, Float
s))
(Vector Float -> Map TermId Float)
-> Vector Float -> Map TermId Float
forall a b. (a -> b) -> a -> b
$ Vector Float
termScores
scoreAutosuggestQueryCompletions :: [(TermId, DocIdSet)]
-> Map.Map DocId (Float, Map.Map TermId Float)
-> [(TermId, Float)]
scoreAutosuggestQueryCompletions :: [(TermId, DocIdSet)]
-> Map DocId (Float, Map TermId Float) -> [(TermId, Float)]
scoreAutosuggestQueryCompletions [(TermId, DocIdSet)]
completionTermAndDocSets Map DocId (Float, Map TermId Float)
allTermDocInfo =
[ (TermId
t, TermId -> DocIdSet -> Float
candidateScore TermId
t DocIdSet
ds_t)
| (TermId
t, DocIdSet
ds_t) <- [(TermId, DocIdSet)]
completionTermAndDocSets ]
where
candidateScore :: TermId -> DocIdSet -> Float
candidateScore :: TermId -> DocIdSet -> Float
candidateScore TermId
t DocIdSet
ds_t =
[Float] -> Float
forall a. Num a => [a] -> a
forall (t :: * -> *) a. (Foldable t, Num a) => t a -> a
sum [ Float
docImportance Float -> Float -> Float
forall a. Num a => a -> a -> a
* Float
termRelevance
| Just (Float
docImportance, Map TermId Float
termRelevances) <-
(DocId -> Maybe (Float, Map TermId Float))
-> [DocId] -> [Maybe (Float, Map TermId Float)]
forall a b. (a -> b) -> [a] -> [b]
map (DocId
-> Map DocId (Float, Map TermId Float)
-> Maybe (Float, Map TermId Float)
forall k a. Ord k => k -> Map k a -> Maybe a
`Map.lookup` Map DocId (Float, Map TermId Float)
allTermDocInfo) (DocIdSet -> [DocId]
DocIdSet.toList DocIdSet
ds_t)
, let termRelevance :: Float
termRelevance = Map TermId Float
termRelevances Map TermId Float -> TermId -> Float
forall k a. Ord k => Map k a -> k -> a
Map.! TermId
t
]
scoreAutosuggestQueryResults :: [(TermId, DocIdSet)] ->
Map.Map DocId (Float, Map.Map TermId Float) ->
[(TermId, Float)] ->
[(DocId, Float)]
scoreAutosuggestQueryResults :: [(TermId, DocIdSet)]
-> Map DocId (Float, Map TermId Float)
-> [(TermId, Float)]
-> [(DocId, Float)]
scoreAutosuggestQueryResults [(TermId, DocIdSet)]
completionTermAndDocSets Map DocId (Float, Map TermId Float)
allTermDocInfo
[(TermId, Float)]
scoredCandidates =
Map DocId Float -> [(DocId, Float)]
forall k a. Map k a -> [(k, a)]
Map.toList (Map DocId Float -> [(DocId, Float)])
-> Map DocId Float -> [(DocId, Float)]
forall a b. (a -> b) -> a -> b
$ (Float -> Float -> Float) -> [(DocId, Float)] -> Map DocId Float
forall k a. Ord k => (a -> a -> a) -> [(k, a)] -> Map k a
Map.fromListWith Float -> Float -> Float
forall a. Num a => a -> a -> a
(+)
[ (DocId
docid, Float
docImportance Float -> Float -> Float
forall a. Num a => a -> a -> a
* Float
score_t)
| ((TermId
_, DocIdSet
ds_t), (TermId
_, Float
score_t)) <- [(TermId, DocIdSet)]
-> [(TermId, Float)] -> [((TermId, DocIdSet), (TermId, Float))]
forall a b. [a] -> [b] -> [(a, b)]
zip [(TermId, DocIdSet)]
completionTermAndDocSets [(TermId, Float)]
scoredCandidates
, let docids :: [DocId]
docids = DocIdSet -> [DocId]
DocIdSet.toList DocIdSet
ds_t
docinfo :: [Maybe (Float, Map TermId Float)]
docinfo = (DocId -> Maybe (Float, Map TermId Float))
-> [DocId] -> [Maybe (Float, Map TermId Float)]
forall a b. (a -> b) -> [a] -> [b]
map (DocId
-> Map DocId (Float, Map TermId Float)
-> Maybe (Float, Map TermId Float)
forall k a. Ord k => k -> Map k a -> Maybe a
`Map.lookup` Map DocId (Float, Map TermId Float)
allTermDocInfo) [DocId]
docids
, (DocId
docid, Just (Float
docImportance, Map TermId Float
_)) <- [DocId]
-> [Maybe (Float, Map TermId Float)]
-> [(DocId, Maybe (Float, Map TermId Float))]
forall a b. [a] -> [b] -> [(a, b)]
zip [DocId]
docids [Maybe (Float, Map TermId Float)]
docinfo
]