module Network.URL
( URL(..), URLType(..), Host(..), Protocol(..)
, secure, secure_prot
, exportURL, importURL, exportHost
, add_param
, decString, encString
, ok_host, ok_url, ok_param, ok_path
, exportParams, importParams
) where
import Data.Char (isAlpha, isAscii, isDigit)
import Data.List (intersperse)
import Data.Word (Word8)
import Numeric (readHex, showHex)
import qualified Codec.Binary.UTF8.String as UTF8
data Host = Host { protocol :: Protocol
, host :: String
, port :: Maybe Integer
} deriving (Eq,Ord,Show)
data Protocol = HTTP Bool | FTP Bool | RawProt String deriving (Eq,Ord,Show)
secure_prot :: Protocol -> Bool
secure_prot (HTTP s) = s
secure_prot (FTP s) = s
secure_prot (RawProt _) = False
secure :: Host -> Bool
secure = secure_prot . protocol
data URLType = Absolute Host
| HostRelative
| PathRelative
deriving (Eq, Ord, Show)
data URL = URL
{ url_type :: URLType
, url_path :: String
, url_params :: [(String,String)]
} deriving (Eq,Ord,Show)
add_param :: URL -> (String,String) -> URL
add_param url x = url { url_params = x : url_params url }
importURL :: String -> Maybe URL
importURL cs0 =
do (ho,cs5) <- front cs0
(pa,cs6) <- the_path cs5
as <- the_args cs6
return URL { url_type = ho, url_path = pa, url_params = as }
where
front ('/':cs) = return (HostRelative,cs)
front cs =
case the_prot cs of
Just (pr,cs1) ->
do let (ho,cs2) = the_host cs1
(po,cs3) <- the_port cs2
cs4 <- case cs3 of
[] -> return []
'/':cs5 -> return cs5
_ -> Nothing
return (Absolute Host { protocol = pr
, host = ho
, port = po
}, cs4)
_ -> return (PathRelative,cs)
the_prot :: String -> Maybe (Protocol, String)
the_prot urlStr = case break (':' ==) urlStr of
(as@(_:_), ':' : '/' : '/' : bs) -> Just (prot, bs)
where prot = case as of
"https" -> HTTP True
"http" -> HTTP False
"ftps" -> FTP True
"ftp" -> FTP False
_ -> RawProt as
_ -> Nothing
the_host = span ok_host
the_port (':':cs) = case span isDigit cs of
([],_) -> Nothing
(xs,ds) -> Just (Just (read xs),ds)
the_port cs5 = return (Nothing, cs5)
the_path cs = do let (as,bs) = break end_path cs
s <- decString False as
return (s,bs)
where end_path c = c == '#' || c == '?'
the_args ('?' : cs) = importParams cs
the_args _ = return []
importParams :: String -> Maybe [(String,String)]
importParams [] = return []
importParams ds = mapM a_param (breaks ('&'==) ds)
where
a_param cs = do let (as,bs) = break ('=' ==) cs
k <- decString True as
v <- case bs of
"" -> return ""
_:xs -> decString True xs
return (k,v)
exportHost :: Host -> String
exportHost absol = the_prot ++ "://" ++ host absol ++ the_port
where the_prot = exportProt (protocol absol)
the_port = maybe "" (\x -> ':' : show x) (port absol)
exportProt :: Protocol -> String
exportProt prot = case prot of
HTTP True -> "https"
HTTP False -> "http"
FTP True -> "ftps"
FTP False -> "ftp"
RawProt s -> s
exportURL :: URL -> String
exportURL url = absol ++ the_path ++ the_params
where
absol = case url_type url of
Absolute hst -> exportHost hst ++ "/"
HostRelative -> "/"
PathRelative -> ""
the_path = encString False ok_path (url_path url)
the_params = case url_params url of
[] -> ""
xs -> '?' : exportParams xs
exportParams :: [(String,String)] -> String
exportParams ps = concat (intersperse "&" $ map a_param ps)
where
a_param (k,mv) = encString True ok_param k ++
case mv of
"" -> ""
v -> '=' : encString True ok_param v
encString :: Bool -> (Char -> Bool) -> String -> String
encString pl p ys = foldr enc1 [] ys
where enc1 ' ' xs | pl = '+' : xs
enc1 x xs = if p x then x : xs else encChar x ++ xs
encChar :: Char -> String
encChar c = concatMap encByte (UTF8.encode [c])
encByte :: Word8 -> String
encByte b = '%' : case showHex b "" of
d@[_] -> '0' : d
d -> d
decString :: Bool -> String -> Maybe String
decString b = fmap UTF8.decode . decStrBytes b
decStrBytes :: Bool -> String -> Maybe [Word8]
decStrBytes _ [] = Just []
decStrBytes p ('%' : cs) = do (n,cs1) <- decByte cs
fmap (n:) (decStrBytes p cs1)
decStrBytes p (c : cs) = let b = if p && c == '+'
then 32
else fromIntegral (fromEnum c)
in (b :) `fmap` decStrBytes p cs
decByte :: String -> Maybe (Word8,String)
decByte (x : y : cs) = case readHex [x,y] of
[(n,"")] -> Just (n,cs)
_ -> Nothing
decByte _ = Nothing
ok_host :: Char -> Bool
ok_host c = isDigit c || isAlphaASCII c || c == '.' || c == '-'
ok_param :: Char -> Bool
ok_param c = ok_host c || c `elem` "~;:@$_!*'(),"
ok_path :: Char -> Bool
ok_path c = ok_param c || c `elem` "/=&"
ok_url :: Char -> Bool
ok_url c = isDigit c || isAlphaASCII c || c `elem` ".-;:@$_!*'(),/=&?~+"
isAlphaASCII :: Char -> Bool
isAlphaASCII x = isAscii x && isAlpha x
breaks :: (a -> Bool) -> [a] -> [[a]]
breaks p xs = case break p xs of
(as,[]) -> [as]
(as,_:bs) -> as : breaks p bs