{-# LANGUAGE PatternGuards #-}

-- | URL Utility Functions

module Lambdabot.Util.Browser
    ( urlPageTitle
    , browseLB
    ) where

import Codec.Binary.UTF8.String
import Control.Applicative
import Control.Monad.Trans
import Lambdabot.Config
import Lambdabot.Config.Reference
import Lambdabot.Monad
import Lambdabot.Util (limitStr)
import Network.Browser
import Network.HTTP
import Network.URI
import Text.HTML.TagSoup
import Text.HTML.TagSoup.Match
import Data.Char (toLower)
import Data.List (isPrefixOf)

-- | Run a browser action with some standardized settings
browseLB :: MonadLB m => BrowserAction conn a -> m a
browseLB :: BrowserAction conn a -> m a
browseLB BrowserAction conn a
act = LB a -> m a
forall (m :: * -> *) a. MonadLB m => LB a -> m a
lb (LB a -> m a) -> LB a -> m a
forall a b. (a -> b) -> a -> b
$ do
    Proxy
proxy' <- Config Proxy -> LB Proxy
forall (m :: * -> *) a. MonadConfig m => Config a -> m a
getConfig Config Proxy
proxy
    IO a -> LB a
forall (m :: * -> *) a. MonadIO m => IO a -> m a
liftIO (IO a -> LB a)
-> (BrowserAction conn a -> IO a) -> BrowserAction conn a -> LB a
forall b c a. (b -> c) -> (a -> b) -> a -> c
. BrowserAction conn a -> IO a
forall conn a. BrowserAction conn a -> IO a
browse (BrowserAction conn a -> LB a) -> BrowserAction conn a -> LB a
forall a b. (a -> b) -> a -> b
$ do
        (String -> IO ()) -> BrowserAction conn ()
forall t. (String -> IO ()) -> BrowserAction t ()
setOutHandler (IO () -> String -> IO ()
forall a b. a -> b -> a
const (() -> IO ()
forall (m :: * -> *) a. Monad m => a -> m a
return ()))
        (String -> IO ()) -> BrowserAction conn ()
forall t. (String -> IO ()) -> BrowserAction t ()
setErrHandler (IO () -> String -> IO ()
forall a b. a -> b -> a
const (() -> IO ()
forall (m :: * -> *) a. Monad m => a -> m a
return ()))

        Bool -> BrowserAction conn ()
forall t. Bool -> BrowserAction t ()
setAllowRedirects Bool
True
        Maybe Int -> BrowserAction conn ()
forall t. Maybe Int -> BrowserAction t ()
setMaxRedirects (Int -> Maybe Int
forall a. a -> Maybe a
Just Int
5)
        Proxy -> BrowserAction conn ()
forall t. Proxy -> BrowserAction t ()
setProxy Proxy
proxy'
        BrowserAction conn a
act

-- | Limit the maximum title length to prevent jokers from spamming
-- the channel with specially crafted HTML pages.
maxTitleLength :: Int
maxTitleLength :: Int
maxTitleLength = Int
80

-- | Fetches a page title suitable for display.  Ideally, other
-- plugins should make use of this function if the result is to be
-- displayed in an IRC channel because it ensures that a consistent
-- look is used (and also lets the URL plugin effectively ignore
-- contextual URLs that might be generated by another instance of
-- lambdabot; the URL plugin matches on 'urlTitlePrompt').
urlPageTitle :: String -> BrowserAction (HandleStream String) (Maybe String)
urlPageTitle :: String -> BrowserAction (HandleStream String) (Maybe String)
urlPageTitle = (Maybe String -> Maybe String)
-> BrowserAction (HandleStream String) (Maybe String)
-> BrowserAction (HandleStream String) (Maybe String)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap ((String -> String) -> Maybe String -> Maybe String
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap (Int -> String -> String
limitStr Int
maxTitleLength)) (BrowserAction (HandleStream String) (Maybe String)
 -> BrowserAction (HandleStream String) (Maybe String))
-> (String -> BrowserAction (HandleStream String) (Maybe String))
-> String
-> BrowserAction (HandleStream String) (Maybe String)
forall b c a. (b -> c) -> (a -> b) -> a -> c
. String -> BrowserAction (HandleStream String) (Maybe String)
rawPageTitle

-- | Fetches a page title for the specified URL.  This function should
-- only be used by other plugins if and only if the result is not to
-- be displayed in an IRC channel.  Instead, use 'urlPageTitle'.
rawPageTitle :: String -> BrowserAction (HandleStream String) (Maybe String)
rawPageTitle :: String -> BrowserAction (HandleStream String) (Maybe String)
rawPageTitle String
url = BrowserAction (HandleStream String) (Maybe String)
-> BrowserAction (HandleStream String) (Maybe String)
forall (m :: * -> *) a. Monad m => m (Maybe a) -> m (Maybe a)
checkHTTPS (BrowserAction (HandleStream String) (Maybe String)
 -> BrowserAction (HandleStream String) (Maybe String))
-> BrowserAction (HandleStream String) (Maybe String)
-> BrowserAction (HandleStream String) (Maybe String)
forall a b. (a -> b) -> a -> b
$ do
    (URI
_, Response String
result) <- Request String
-> BrowserAction (HandleStream String) (URI, Response String)
forall ty.
HStream ty =>
Request ty -> BrowserAction (HandleStream ty) (URI, Response ty)
request (String -> Request String
getRequest ((Char -> Bool) -> String -> String
forall a. (a -> Bool) -> [a] -> [a]
takeWhile (Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
/=Char
'#') String
url))
    case Response String -> ResponseCode
forall a. Response a -> ResponseCode
rspCode Response String
result of
        (Int
2,Int
0,Int
0)   -> do
            case (Char -> Bool) -> String -> String
forall a. (a -> Bool) -> [a] -> [a]
takeWhile (Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
/= Char
';') (String -> String) -> Maybe String -> Maybe String
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> HeaderName -> [Header] -> Maybe String
lookupHeader HeaderName
HdrContentType (Response String -> [Header]
forall a. Response a -> [Header]
rspHeaders Response String
result) of
                Just String
"text/html"       -> Maybe String -> BrowserAction (HandleStream String) (Maybe String)
forall (m :: * -> *) a. Monad m => a -> m a
return (Maybe String
 -> BrowserAction (HandleStream String) (Maybe String))
-> Maybe String
-> BrowserAction (HandleStream String) (Maybe String)
forall a b. (a -> b) -> a -> b
$ String -> Maybe String
extractTitle (Response String -> String
forall a. Response a -> a
rspBody Response String
result)
                Just String
"application/pdf" -> String -> BrowserAction (HandleStream String) (Maybe String)
rawPageTitle (String -> String
googleCacheURL String
url)
                Maybe String
_                      -> Maybe String -> BrowserAction (HandleStream String) (Maybe String)
forall (m :: * -> *) a. Monad m => a -> m a
return (Maybe String
 -> BrowserAction (HandleStream String) (Maybe String))
-> Maybe String
-> BrowserAction (HandleStream String) (Maybe String)
forall a b. (a -> b) -> a -> b
$ Maybe String
forall a. Maybe a
Nothing
        ResponseCode
_         -> Maybe String -> BrowserAction (HandleStream String) (Maybe String)
forall (m :: * -> *) a. Monad m => a -> m a
return Maybe String
forall a. Maybe a
Nothing
    
    where googleCacheURL :: String -> String
googleCacheURL = (String
gURLString -> String -> String
forall a. [a] -> [a] -> [a]
++) (String -> String) -> (String -> String) -> String -> String
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Char -> Bool) -> String -> String
escapeURIString (Bool -> Char -> Bool
forall a b. a -> b -> a
const Bool
False)
          gURL :: String
gURL = String
"http://www.google.com/search?hl=en&q=cache:"
          checkHTTPS :: m (Maybe a) -> m (Maybe a)
checkHTTPS m (Maybe a)
act | String
"https:" String -> String -> Bool
forall a. Eq a => [a] -> [a] -> Bool
`isPrefixOf` (Char -> Char) -> String -> String
forall a b. (a -> b) -> [a] -> [b]
map Char -> Char
toLower String
url = Maybe a -> m (Maybe a)
forall (m :: * -> *) a. Monad m => a -> m a
return Maybe a
forall a. Maybe a
Nothing
                         | Bool
otherwise = m (Maybe a)
act

-- | Given a server response (list of Strings), return the text in
-- between the title HTML element, only if it is text/html content.
-- Now supports all(?) HTML entities thanks to TagSoup.
extractTitle :: String -> Maybe String
extractTitle :: String -> Maybe String
extractTitle = [Tag String] -> Maybe String
content ([Tag String] -> Maybe String)
-> (String -> [Tag String]) -> String -> Maybe String
forall b c a. (b -> c) -> (a -> b) -> a -> c
. String -> [Tag String]
tags (String -> [Tag String])
-> (String -> String) -> String -> [Tag String]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. String -> String
decodeString where
    tags :: String -> [Tag String]
tags = [Tag String] -> [Tag String]
closing ([Tag String] -> [Tag String])
-> (String -> [Tag String]) -> String -> [Tag String]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [Tag String] -> [Tag String]
opening ([Tag String] -> [Tag String])
-> (String -> [Tag String]) -> String -> [Tag String]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [Tag String] -> [Tag String]
forall str. StringLike str => [Tag str] -> [Tag str]
canonicalizeTags ([Tag String] -> [Tag String])
-> (String -> [Tag String]) -> String -> [Tag String]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. String -> [Tag String]
forall str. StringLike str => str -> [Tag str]
parseTags
    opening :: [Tag String] -> [Tag String]
opening = (Tag String -> Bool) -> [Tag String] -> [Tag String]
forall a. (a -> Bool) -> [a] -> [a]
dropWhile (Bool -> Bool
not (Bool -> Bool) -> (Tag String -> Bool) -> Tag String -> Bool
forall b c a. (b -> c) -> (a -> b) -> a -> c
. String -> ([Attribute String] -> Bool) -> Tag String -> Bool
forall str.
Eq str =>
str -> ([Attribute str] -> Bool) -> Tag str -> Bool
tagOpenLit String
"title" (Bool -> [Attribute String] -> Bool
forall a b. a -> b -> a
const Bool
True))
    closing :: [Tag String] -> [Tag String]
closing = (Tag String -> Bool) -> [Tag String] -> [Tag String]
forall a. (a -> Bool) -> [a] -> [a]
takeWhile (Bool -> Bool
not (Bool -> Bool) -> (Tag String -> Bool) -> Tag String -> Bool
forall b c a. (b -> c) -> (a -> b) -> a -> c
. String -> Tag String -> Bool
forall str. Eq str => str -> Tag str -> Bool
tagCloseLit String
"title")

    content :: [Tag String] -> Maybe String
content = String -> Maybe String
maybeText (String -> Maybe String)
-> ([Tag String] -> String) -> [Tag String] -> Maybe String
forall b c a. (b -> c) -> (a -> b) -> a -> c
. String -> String
format (String -> String)
-> ([Tag String] -> String) -> [Tag String] -> String
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [Tag String] -> String
forall str. StringLike str => [Tag str] -> str
innerText
    format :: String -> String
format = [String] -> String
unwords ([String] -> String) -> (String -> [String]) -> String -> String
forall b c a. (b -> c) -> (a -> b) -> a -> c
. String -> [String]
words
    maybeText :: String -> Maybe String
maybeText [] = Maybe String
forall a. Maybe a
Nothing
    maybeText String
t  = String -> Maybe String
forall a. a -> Maybe a
Just (String -> String
encodeString String
t)