{-# LANGUAGE RecordWildCards, BangPatterns, ScopedTypeVariables #-}
module Data.SearchEngine.BM25F (
score,
Context(..),
FeatureFunction(..),
Doc(..),
scoreTermsBulk,
Explanation(..),
explain,
) where
import Data.Ix
import Data.Array.Unboxed
data Context term field feature = Context {
forall term field feature. Context term field feature -> Int
numDocsTotal :: !Int,
forall term field feature.
Context term field feature -> field -> Float
avgFieldLength :: field -> Float,
forall term field feature.
Context term field feature -> term -> Int
numDocsWithTerm :: term -> Int,
forall term field feature. Context term field feature -> Float
paramK1 :: !Float,
forall term field feature.
Context term field feature -> field -> Float
paramB :: field -> Float,
forall term field feature.
Context term field feature -> field -> Float
fieldWeight :: field -> Float,
forall term field feature.
Context term field feature -> feature -> Float
featureWeight :: feature -> Float,
forall term field feature.
Context term field feature -> feature -> FeatureFunction
featureFunction :: feature -> FeatureFunction
}
data Doc term field feature = Doc {
forall term field feature. Doc term field feature -> field -> Int
docFieldLength :: field -> Int,
forall term field feature.
Doc term field feature -> field -> term -> Int
docFieldTermFrequency :: field -> term -> Int,
forall term field feature.
Doc term field feature -> feature -> Float
docFeatureValue :: feature -> Float
}
score :: (Ix field, Bounded field, Ix feature, Bounded feature) =>
Context term field feature ->
Doc term field feature -> [term] -> Float
score :: forall field feature term.
(Ix field, Bounded field, Ix feature, Bounded feature) =>
Context term field feature
-> Doc term field feature -> [term] -> Float
score Context term field feature
ctx Doc term field feature
doc [term]
terms =
[Float] -> Float
forall a. Num a => [a] -> a
forall (t :: * -> *) a. (Foldable t, Num a) => t a -> a
sum ((term -> Float) -> [term] -> [Float]
forall a b. (a -> b) -> [a] -> [b]
map (Context term field feature
-> Doc term field feature -> term -> Float
forall field term feature.
(Ix field, Bounded field) =>
Context term field feature
-> Doc term field feature -> term -> Float
weightedTermScore Context term field feature
ctx Doc term field feature
doc) [term]
terms)
Float -> Float -> Float
forall a. Num a => a -> a -> a
+ [Float] -> Float
forall a. Num a => [a] -> a
forall (t :: * -> *) a. (Foldable t, Num a) => t a -> a
sum ((feature -> Float) -> [feature] -> [Float]
forall a b. (a -> b) -> [a] -> [b]
map (Context term field feature
-> Doc term field feature -> feature -> Float
forall feature term field.
(Ix feature, Bounded feature) =>
Context term field feature
-> Doc term field feature -> feature -> Float
weightedNonTermScore Context term field feature
ctx Doc term field feature
doc) [feature]
features)
where
features :: [feature]
features = (feature, feature) -> [feature]
forall a. Ix a => (a, a) -> [a]
range (feature
forall a. Bounded a => a
minBound, feature
forall a. Bounded a => a
maxBound)
weightedTermScore :: (Ix field, Bounded field) =>
Context term field feature ->
Doc term field feature -> term -> Float
weightedTermScore :: forall field term feature.
(Ix field, Bounded field) =>
Context term field feature
-> Doc term field feature -> term -> Float
weightedTermScore Context term field feature
ctx Doc term field feature
doc term
t =
Context term field feature -> term -> Float
forall term field feature.
Context term field feature -> term -> Float
weightIDF Context term field feature
ctx term
t Float -> Float -> Float
forall a. Num a => a -> a -> a
* Float
tf'
Float -> Float -> Float
forall a. Fractional a => a -> a -> a
/ (Float
k1 Float -> Float -> Float
forall a. Num a => a -> a -> a
+ Float
tf')
where
tf' :: Float
tf' = Context term field feature
-> Doc term field feature -> term -> Float
forall field term feature.
(Ix field, Bounded field) =>
Context term field feature
-> Doc term field feature -> term -> Float
weightedDocTermFrequency Context term field feature
ctx Doc term field feature
doc term
t
k1 :: Float
k1 = Context term field feature -> Float
forall term field feature. Context term field feature -> Float
paramK1 Context term field feature
ctx
weightIDF :: Context term field feature -> term -> Float
weightIDF :: forall term field feature.
Context term field feature -> term -> Float
weightIDF Context term field feature
ctx term
t =
Float -> Float
forall a. Floating a => a -> a
log ((Float
n Float -> Float -> Float
forall a. Num a => a -> a -> a
- Float
n_t Float -> Float -> Float
forall a. Num a => a -> a -> a
+ Float
0.5) Float -> Float -> Float
forall a. Fractional a => a -> a -> a
/ (Float
n_t Float -> Float -> Float
forall a. Num a => a -> a -> a
+ Float
0.5))
where
n :: Float
n = Int -> Float
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Context term field feature -> Int
forall term field feature. Context term field feature -> Int
numDocsTotal Context term field feature
ctx)
n_t :: Float
n_t = Int -> Float
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Context term field feature -> term -> Int
forall term field feature.
Context term field feature -> term -> Int
numDocsWithTerm Context term field feature
ctx term
t)
weightedDocTermFrequency :: (Ix field, Bounded field) =>
Context term field feature ->
Doc term field feature -> term -> Float
weightedDocTermFrequency :: forall field term feature.
(Ix field, Bounded field) =>
Context term field feature
-> Doc term field feature -> term -> Float
weightedDocTermFrequency Context term field feature
ctx Doc term field feature
doc term
t =
[Float] -> Float
forall a. Num a => [a] -> a
forall (t :: * -> *) a. (Foldable t, Num a) => t a -> a
sum [ Float
w_f Float -> Float -> Float
forall a. Num a => a -> a -> a
* Float
tf_f Float -> Float -> Float
forall a. Fractional a => a -> a -> a
/ Float
_B_f
| field
field <- (field, field) -> [field]
forall a. Ix a => (a, a) -> [a]
range (field
forall a. Bounded a => a
minBound, field
forall a. Bounded a => a
maxBound)
, let w_f :: Float
w_f = Context term field feature -> field -> Float
forall term field feature.
Context term field feature -> field -> Float
fieldWeight Context term field feature
ctx field
field
tf_f :: Float
tf_f = Int -> Float
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Doc term field feature -> field -> term -> Int
forall term field feature.
Doc term field feature -> field -> term -> Int
docFieldTermFrequency Doc term field feature
doc field
field term
t)
_B_f :: Float
_B_f = Context term field feature
-> Doc term field feature -> field -> Float
forall term field feature.
Context term field feature
-> Doc term field feature -> field -> Float
lengthNorm Context term field feature
ctx Doc term field feature
doc field
field
, Bool -> Bool
not (Float -> Bool
forall a. RealFloat a => a -> Bool
isNaN Float
_B_f)
]
lengthNorm :: Context term field feature ->
Doc term field feature -> field -> Float
lengthNorm :: forall term field feature.
Context term field feature
-> Doc term field feature -> field -> Float
lengthNorm Context term field feature
ctx Doc term field feature
doc field
field =
(Float
1Float -> Float -> Float
forall a. Num a => a -> a -> a
-Float
b_f) Float -> Float -> Float
forall a. Num a => a -> a -> a
+ Float
b_f Float -> Float -> Float
forall a. Num a => a -> a -> a
* Float
sl_f Float -> Float -> Float
forall a. Fractional a => a -> a -> a
/ Float
avgsl_f
where
b_f :: Float
b_f = Context term field feature -> field -> Float
forall term field feature.
Context term field feature -> field -> Float
paramB Context term field feature
ctx field
field
sl_f :: Float
sl_f = Int -> Float
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Doc term field feature -> field -> Int
forall term field feature. Doc term field feature -> field -> Int
docFieldLength Doc term field feature
doc field
field)
avgsl_f :: Float
avgsl_f = Context term field feature -> field -> Float
forall term field feature.
Context term field feature -> field -> Float
avgFieldLength Context term field feature
ctx field
field
weightedNonTermScore :: (Ix feature, Bounded feature) =>
Context term field feature ->
Doc term field feature -> feature -> Float
weightedNonTermScore :: forall feature term field.
(Ix feature, Bounded feature) =>
Context term field feature
-> Doc term field feature -> feature -> Float
weightedNonTermScore Context term field feature
ctx Doc term field feature
doc feature
feature =
Float
w_f Float -> Float -> Float
forall a. Num a => a -> a -> a
* Float -> Float
_V_f Float
f_f
where
w_f :: Float
w_f = Context term field feature -> feature -> Float
forall term field feature.
Context term field feature -> feature -> Float
featureWeight Context term field feature
ctx feature
feature
_V_f :: Float -> Float
_V_f = FeatureFunction -> Float -> Float
applyFeatureFunction (Context term field feature -> feature -> FeatureFunction
forall term field feature.
Context term field feature -> feature -> FeatureFunction
featureFunction Context term field feature
ctx feature
feature)
f_f :: Float
f_f = Doc term field feature -> feature -> Float
forall term field feature.
Doc term field feature -> feature -> Float
docFeatureValue Doc term field feature
doc feature
feature
data FeatureFunction
= LogarithmicFunction Float
| RationalFunction Float
| SigmoidFunction Float Float
applyFeatureFunction :: FeatureFunction -> (Float -> Float)
applyFeatureFunction :: FeatureFunction -> Float -> Float
applyFeatureFunction (LogarithmicFunction Float
p1) = \Float
fi -> Float -> Float
forall a. Floating a => a -> a
log (Float
p1 Float -> Float -> Float
forall a. Num a => a -> a -> a
+ Float
fi)
applyFeatureFunction (RationalFunction Float
p1) = \Float
fi -> Float
fi Float -> Float -> Float
forall a. Fractional a => a -> a -> a
/ (Float
p1 Float -> Float -> Float
forall a. Num a => a -> a -> a
+ Float
fi)
applyFeatureFunction (SigmoidFunction Float
p1 Float
p2) = \Float
fi -> Float
1 Float -> Float -> Float
forall a. Fractional a => a -> a -> a
/ (Float
p1 Float -> Float -> Float
forall a. Num a => a -> a -> a
+ Float -> Float
forall a. Floating a => a -> a
exp (-Float
fi Float -> Float -> Float
forall a. Num a => a -> a -> a
* Float
p2))
scoreTermsBulk :: forall field term feature. (Ix field, Bounded field) =>
Context term field feature ->
Doc term field feature ->
(term -> (field -> Int) -> Float)
scoreTermsBulk :: forall field term feature.
(Ix field, Bounded field) =>
Context term field feature
-> Doc term field feature -> term -> (field -> Int) -> Float
scoreTermsBulk Context term field feature
ctx Doc term field feature
doc =
\term
t field -> Int
tFreq ->
let !tf' :: Float
tf' = [Float] -> Float
forall a. Num a => [a] -> a
forall (t :: * -> *) a. (Foldable t, Num a) => t a -> a
sum [ UArray field Float
wUArray field Float -> field -> Float
forall (a :: * -> * -> *) e i.
(IArray a e, Ix i) =>
a i e -> i -> e
!field
f Float -> Float -> Float
forall a. Num a => a -> a -> a
* Float
tf_f Float -> Float -> Float
forall a. Fractional a => a -> a -> a
/ UArray field Float
_BUArray field Float -> field -> Float
forall (a :: * -> * -> *) e i.
(IArray a e, Ix i) =>
a i e -> i -> e
!field
f
| field
f <- (field, field) -> [field]
forall a. Ix a => (a, a) -> [a]
range (field
forall a. Bounded a => a
minBound, field
forall a. Bounded a => a
maxBound)
, let tf_f :: Float
tf_f = Int -> Float
forall a b. (Integral a, Num b) => a -> b
fromIntegral (field -> Int
tFreq field
f)
_B_f :: Float
_B_f = UArray field Float
_BUArray field Float -> field -> Float
forall (a :: * -> * -> *) e i.
(IArray a e, Ix i) =>
a i e -> i -> e
!field
f
, Bool -> Bool
not (Float -> Bool
forall a. RealFloat a => a -> Bool
isNaN Float
_B_f)
]
in Context term field feature -> term -> Float
forall term field feature.
Context term field feature -> term -> Float
weightIDF Context term field feature
ctx term
t Float -> Float -> Float
forall a. Num a => a -> a -> a
* Float
tf'
Float -> Float -> Float
forall a. Fractional a => a -> a -> a
/ (Float
k1 Float -> Float -> Float
forall a. Num a => a -> a -> a
+ Float
tf')
where
!k1 :: Float
k1 = Context term field feature -> Float
forall term field feature. Context term field feature -> Float
paramK1 Context term field feature
ctx
w, _B :: UArray field Float
!w :: UArray field Float
w = (field, field) -> [(field, Float)] -> UArray field Float
forall (a :: * -> * -> *) e i.
(IArray a e, Ix i) =>
(i, i) -> [(i, e)] -> a i e
array (field
forall a. Bounded a => a
minBound, field
forall a. Bounded a => a
maxBound)
[ (field
field, Context term field feature -> field -> Float
forall term field feature.
Context term field feature -> field -> Float
fieldWeight Context term field feature
ctx field
field)
| field
field <- (field, field) -> [field]
forall a. Ix a => (a, a) -> [a]
range (field
forall a. Bounded a => a
minBound, field
forall a. Bounded a => a
maxBound) ]
!_B :: UArray field Float
_B = (field, field) -> [(field, Float)] -> UArray field Float
forall (a :: * -> * -> *) e i.
(IArray a e, Ix i) =>
(i, i) -> [(i, e)] -> a i e
array (field
forall a. Bounded a => a
minBound, field
forall a. Bounded a => a
maxBound)
[ (field
field, Context term field feature
-> Doc term field feature -> field -> Float
forall term field feature.
Context term field feature
-> Doc term field feature -> field -> Float
lengthNorm Context term field feature
ctx Doc term field feature
doc field
field)
| field
field <- (field, field) -> [field]
forall a. Ix a => (a, a) -> [a]
range (field
forall a. Bounded a => a
minBound, field
forall a. Bounded a => a
maxBound) ]
data Explanation field feature term = Explanation {
forall field feature term. Explanation field feature term -> Float
overallScore :: Float,
forall field feature term.
Explanation field feature term -> [(term, Float)]
termScores :: [(term, Float)],
forall field feature term.
Explanation field feature term -> [(feature, Float)]
nonTermScores :: [(feature, Float)],
forall field feature term.
Explanation field feature term -> [(term, [(field, Float)])]
termFieldScores :: [(term, [(field, Float)])]
}
deriving Int -> Explanation field feature term -> ShowS
[Explanation field feature term] -> ShowS
Explanation field feature term -> String
(Int -> Explanation field feature term -> ShowS)
-> (Explanation field feature term -> String)
-> ([Explanation field feature term] -> ShowS)
-> Show (Explanation field feature term)
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
forall field feature term.
(Show term, Show feature, Show field) =>
Int -> Explanation field feature term -> ShowS
forall field feature term.
(Show term, Show feature, Show field) =>
[Explanation field feature term] -> ShowS
forall field feature term.
(Show term, Show feature, Show field) =>
Explanation field feature term -> String
$cshowsPrec :: forall field feature term.
(Show term, Show feature, Show field) =>
Int -> Explanation field feature term -> ShowS
showsPrec :: Int -> Explanation field feature term -> ShowS
$cshow :: forall field feature term.
(Show term, Show feature, Show field) =>
Explanation field feature term -> String
show :: Explanation field feature term -> String
$cshowList :: forall field feature term.
(Show term, Show feature, Show field) =>
[Explanation field feature term] -> ShowS
showList :: [Explanation field feature term] -> ShowS
Show
instance Functor (Explanation field feature) where
fmap :: forall a b.
(a -> b)
-> Explanation field feature a -> Explanation field feature b
fmap a -> b
f e :: Explanation field feature a
e@Explanation{Float
[(feature, Float)]
[(a, Float)]
[(a, [(field, Float)])]
termScores :: forall field feature term.
Explanation field feature term -> [(term, Float)]
overallScore :: forall field feature term. Explanation field feature term -> Float
termFieldScores :: forall field feature term.
Explanation field feature term -> [(term, [(field, Float)])]
nonTermScores :: forall field feature term.
Explanation field feature term -> [(feature, Float)]
overallScore :: Float
termScores :: [(a, Float)]
nonTermScores :: [(feature, Float)]
termFieldScores :: [(a, [(field, Float)])]
..} =
Explanation field feature a
e {
termScores = [ (f t, s) | (t, s) <- termScores ],
termFieldScores = [ (f t, fs) | (t, fs) <- termFieldScores ]
}
explain :: (Ix field, Bounded field, Ix feature, Bounded feature) =>
Context term field feature ->
Doc term field feature -> [term] -> Explanation field feature term
explain :: forall field feature term.
(Ix field, Bounded field, Ix feature, Bounded feature) =>
Context term field feature
-> Doc term field feature
-> [term]
-> Explanation field feature term
explain Context term field feature
ctx Doc term field feature
doc [term]
ts =
Explanation {Float
[(feature, Float)]
[(term, Float)]
[(term, [(field, Float)])]
termScores :: [(term, Float)]
overallScore :: Float
termFieldScores :: [(term, [(field, Float)])]
nonTermScores :: [(feature, Float)]
overallScore :: Float
termScores :: [(term, Float)]
nonTermScores :: [(feature, Float)]
termFieldScores :: [(term, [(field, Float)])]
..}
where
overallScore :: Float
overallScore = [Float] -> Float
forall a. Num a => [a] -> a
forall (t :: * -> *) a. (Foldable t, Num a) => t a -> a
sum (((term, Float) -> Float) -> [(term, Float)] -> [Float]
forall a b. (a -> b) -> [a] -> [b]
map (term, Float) -> Float
forall a b. (a, b) -> b
snd [(term, Float)]
termScores)
Float -> Float -> Float
forall a. Num a => a -> a -> a
+ [Float] -> Float
forall a. Num a => [a] -> a
forall (t :: * -> *) a. (Foldable t, Num a) => t a -> a
sum (((feature, Float) -> Float) -> [(feature, Float)] -> [Float]
forall a b. (a -> b) -> [a] -> [b]
map (feature, Float) -> Float
forall a b. (a, b) -> b
snd [(feature, Float)]
nonTermScores)
termScores :: [(term, Float)]
termScores = [ (term
t, Context term field feature
-> Doc term field feature -> term -> Float
forall field term feature.
(Ix field, Bounded field) =>
Context term field feature
-> Doc term field feature -> term -> Float
weightedTermScore Context term field feature
ctx Doc term field feature
doc term
t) | term
t <- [term]
ts ]
nonTermScores :: [(feature, Float)]
nonTermScores = [ (feature
feature, Context term field feature
-> Doc term field feature -> feature -> Float
forall feature term field.
(Ix feature, Bounded feature) =>
Context term field feature
-> Doc term field feature -> feature -> Float
weightedNonTermScore Context term field feature
ctx Doc term field feature
doc feature
feature)
| feature
feature <- (feature, feature) -> [feature]
forall a. Ix a => (a, a) -> [a]
range (feature
forall a. Bounded a => a
minBound, feature
forall a. Bounded a => a
maxBound) ]
termFieldScores :: [(term, [(field, Float)])]
termFieldScores =
[ (term
t, [(field, Float)]
fieldScores)
| term
t <- [term]
ts
, let fieldScores :: [(field, Float)]
fieldScores =
[ (field
f, Context term field feature
-> Doc term field feature -> term -> Float
forall field term feature.
(Ix field, Bounded field) =>
Context term field feature
-> Doc term field feature -> term -> Float
weightedTermScore Context term field feature
ctx' Doc term field feature
doc term
t)
| field
f <- (field, field) -> [field]
forall a. Ix a => (a, a) -> [a]
range (field
forall a. Bounded a => a
minBound, field
forall a. Bounded a => a
maxBound)
, let ctx' :: Context term field feature
ctx' = Context term field feature
ctx { fieldWeight = fieldWeightOnly f }
]
]
fieldWeightOnly :: a -> field -> Float
fieldWeightOnly a
f field
f' | a -> field -> Bool
forall {a} {a}.
(Ix a, Ix a, Bounded a, Bounded a) =>
a -> a -> Bool
sameField a
f field
f' = Context term field feature -> field -> Float
forall term field feature.
Context term field feature -> field -> Float
fieldWeight Context term field feature
ctx field
f'
| Bool
otherwise = Float
0
sameField :: a -> a -> Bool
sameField a
f a
f' = (a, a) -> a -> Int
forall a. Ix a => (a, a) -> a -> Int
index (a
forall a. Bounded a => a
minBound, a
forall a. Bounded a => a
maxBound) a
f
Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== (a, a) -> a -> Int
forall a. Ix a => (a, a) -> a -> Int
index (a
forall a. Bounded a => a
minBound, a
forall a. Bounded a => a
maxBound) a
f'