{-# Language OverloadedStrings #-}
{-# Language ScopedTypeVariables #-}
module Text.HTML.Scalpel.Search
(
Query,
buildDuckduckgoURL,
buildGoogleURL,
url2Text,
SearchResult (..),
duckduckgoScraper,
googleScraper
)
where
import Text.HTML.Scalpel.Core
import qualified Data.Maybe as M
import qualified Data.Text as T
import qualified Text.StringLike as SL
import qualified Text.URI as U
buildDuckduckgoURL :: Query -> U.URI
buildDuckduckgoURL :: Query -> URI
buildDuckduckgoURL Query
q =
URI
baseURI { uriRegName :: Maybe String
U.uriRegName = forall a. a -> Maybe a
Just String
"html.duckduckgo.com",
uriPath :: String
U.uriPath = String
"html",
uriQuery :: Maybe String
U.uriQuery = forall a. a -> Maybe a
Just String
query }
where
query :: String
query = [(String, String)] -> String
U.pairsToQuery [(String
"q", Query -> String
T.unpack Query
q)]
buildGoogleURL :: Query -> U.URI
buildGoogleURL :: Query -> URI
buildGoogleURL Query
q =
URI
baseURI { uriRegName :: Maybe String
U.uriRegName = forall a. a -> Maybe a
Just String
"www.google.com",
uriPath :: String
U.uriPath = String
"search",
uriQuery :: Maybe String
U.uriQuery = forall a. a -> Maybe a
Just String
query }
where
query :: String
query = [(String, String)] -> String
U.pairsToQuery [(String
"q", Query -> String
T.unpack Query
q),
(String
"no_redirect", String
"1"),
(String
"skip_disambig", String
"1"),
(String
"s", String
"0")]
url2Text :: U.URI -> T.Text
url2Text :: URI -> Query
url2Text URI
u = String -> Query
T.pack forall a b. (a -> b) -> a -> b
$ forall a. Show a => a -> String
show URI
u
googleScraper :: (Show str, SL.StringLike str, Monad m) =>
ScraperT str m SearchResult
googleScraper :: forall str (m :: * -> *).
(Show str, StringLike str, Monad m) =>
ScraperT str m SearchResult
googleScraper = forall str (m :: * -> *).
(StringLike str, Monad m) =>
(TagName, String)
-> (TagName, String)
-> ScraperT str m str
-> (URI -> Maybe URI)
-> ScraperT str m SearchResult
searchScraper (TagName
"div", String
"ZINbbc") (TagName
"h3", String
"zBAuLc")
(forall str (m :: * -> *).
(Show str, StringLike str, Monad m) =>
String -> Selector -> ScraperT str m str
attr String
"href" Selector
"a") URI -> Maybe URI
extractGoogleUrl
duckduckgoScraper :: (SL.StringLike str, Monad m) =>
ScraperT str m SearchResult
duckduckgoScraper :: forall str (m :: * -> *).
(StringLike str, Monad m) =>
ScraperT str m SearchResult
duckduckgoScraper =
forall str (m :: * -> *).
(StringLike str, Monad m) =>
(TagName, String)
-> (TagName, String)
-> ScraperT str m str
-> (URI -> Maybe URI)
-> ScraperT str m SearchResult
searchScraper (TagName
"div", String
"web-result") (TagName
"h2", String
"result__title")
ScraperT str m str
us forall a. a -> Maybe a
Just
where
us :: ScraperT str m str
us = forall a b. (StringLike a, StringLike b) => a -> b
SL.castString forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> forall str (m :: * -> *).
(StringLike str, Monad m) =>
TagName -> String -> ScraperT str m Query
tagClassText TagName
"h2" String
"result__title"
type Query = T.Text
type Title = T.Text
newtype SearchResult = SearchResult { SearchResult -> [(Query, URI)]
unwrapSR :: [(Title, U.URI)] }
deriving (Int -> SearchResult -> ShowS
[SearchResult] -> ShowS
SearchResult -> String
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [SearchResult] -> ShowS
$cshowList :: [SearchResult] -> ShowS
show :: SearchResult -> String
$cshow :: SearchResult -> String
showsPrec :: Int -> SearchResult -> ShowS
$cshowsPrec :: Int -> SearchResult -> ShowS
Show, SearchResult -> SearchResult -> Bool
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: SearchResult -> SearchResult -> Bool
$c/= :: SearchResult -> SearchResult -> Bool
== :: SearchResult -> SearchResult -> Bool
$c== :: SearchResult -> SearchResult -> Bool
Eq)
baseURI :: U.URI
baseURI :: URI
baseURI = U.URI {
uriScheme :: Maybe String
U.uriScheme = forall a. a -> Maybe a
Just String
"https",
uriUserInfo :: Maybe String
U.uriUserInfo = forall a. Maybe a
Nothing,
uriRegName :: Maybe String
U.uriRegName = forall a. Maybe a
Nothing,
uriPort :: Maybe Integer
U.uriPort = forall a. Maybe a
Nothing,
uriPath :: String
U.uriPath = String
"",
uriQuery :: Maybe String
U.uriQuery = forall a. Maybe a
Nothing,
uriFragment :: Maybe String
U.uriFragment = forall a. Maybe a
Nothing }
searchScraper :: (SL.StringLike str, Monad m) =>
(TagName, String) -> (TagName, String) ->
ScraperT str m str -> (U.URI -> Maybe U.URI) ->
ScraperT str m SearchResult
searchScraper :: forall str (m :: * -> *).
(StringLike str, Monad m) =>
(TagName, String)
-> (TagName, String)
-> ScraperT str m str
-> (URI -> Maybe URI)
-> ScraperT str m SearchResult
searchScraper (TagName
et, String
ec) (TagName
tt, String
tc) ScraperT str m str
ts URI -> Maybe URI
cf = do
[(Query, Maybe URI)]
es <- forall str (m :: * -> *) a.
(StringLike str, Monad m) =>
Selector -> ScraperT str m a -> ScraperT str m [a]
chroots (TagName
et TagName -> [AttributePredicate] -> Selector
@: [String -> AttributePredicate
hasClass String
ec]) ScraperT str m (Query, Maybe URI)
entry
let es' :: [(Query, URI)]
es' = forall a b. (a -> Maybe b) -> [a] -> [b]
M.mapMaybe forall {m :: * -> *} {a} {a} {b}.
(Monad m, StringLike a, StringLike a) =>
(a, m b) -> m (a, b)
filFun [(Query, Maybe URI)]
es
forall (m :: * -> *) a. Monad m => a -> m a
return ([(Query, URI)] -> SearchResult
SearchResult [(Query, URI)]
es')
where
entry :: ScraperT str m (Query, Maybe URI)
entry = (,) forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> forall str (m :: * -> *).
(StringLike str, Monad m) =>
TagName -> String -> ScraperT str m Query
tagClassText TagName
tt String
tc
forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> forall str (m :: * -> *).
(StringLike str, Monad m) =>
ScraperT str m str
-> (URI -> Maybe URI) -> ScraperT str m (Maybe URI)
url ScraperT str m str
ts URI -> Maybe URI
cf
filFun :: (a, m b) -> m (a, b)
filFun (a
a, m b
mb) = m b
mb forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= \b
b -> forall (m :: * -> *) a. Monad m => a -> m a
return (forall a b. (StringLike a, StringLike b) => a -> b
SL.castString a
a, b
b)
tagClassText :: (SL.StringLike str, Monad m) =>
TagName -> String -> ScraperT str m T.Text
tagClassText :: forall str (m :: * -> *).
(StringLike str, Monad m) =>
TagName -> String -> ScraperT str m Query
tagClassText TagName
tt String
tc = forall a b. (StringLike a, StringLike b) => a -> b
SL.castString forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall str. StringLike str => str -> String
normString forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> forall str (m :: * -> *).
(StringLike str, Monad m) =>
Selector -> ScraperT str m str
text (TagName
tt TagName -> [AttributePredicate] -> Selector
@: [String -> AttributePredicate
hasClass String
tc])
url :: (SL.StringLike str, Monad m) =>
ScraperT str m str -> (U.URI -> Maybe U.URI) ->
ScraperT str m (Maybe U.URI)
url :: forall str (m :: * -> *).
(StringLike str, Monad m) =>
ScraperT str m str
-> (URI -> Maybe URI) -> ScraperT str m (Maybe URI)
url ScraperT str m str
ts URI -> Maybe URI
cf = do
String
surl <- forall str. StringLike str => str -> String
normString forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> ScraperT str m str
ts
let r :: Maybe URI
r = String -> Maybe URI
U.parseURI String
surl forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= URI -> Maybe URI
cf
forall (m :: * -> *) a. Monad m => a -> m a
return Maybe URI
r
extractGoogleUrl :: U.URI -> Maybe U.URI
URI
u
| String
up forall a. Eq a => a -> a -> Bool
/= String
"/url" Bool -> Bool -> Bool
||
forall a. Maybe a -> Bool
M.isNothing Maybe String
q = forall a. Maybe a
Nothing
| Just String
u2 <- Maybe String
q = String -> Maybe URI
U.parseURI String
u2
| Bool
otherwise = forall a. Maybe a
Nothing
where
up :: String
up = URI -> String
U.uriPath URI
u
q :: Maybe String
q = (String -> [(String, String)]
U.queryToPairs forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> URI -> Maybe String
U.uriQuery URI
u) forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= forall a b. Eq a => a -> [(a, b)] -> Maybe b
lookup String
"q"
normString :: SL.StringLike str => str -> String
normString :: forall str. StringLike str => str -> String
normString str
cs = [String] -> String
unwords forall b c a. (b -> c) -> (a -> b) -> a -> c
. String -> [String]
words forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a b. (StringLike a, StringLike b) => a -> b
SL.castString forall a b. (a -> b) -> a -> b
$ str
cs