module Network.URI
(
URI(..)
, URIAuth(..)
, nullURI
, parseURI
, parseURIReference
, parseRelativeReference
, parseAbsoluteURI
, isURI
, isURIReference
, isRelativeReference
, isAbsoluteURI
, isIPv6address
, isIPv4address
, relativeTo
, nonStrictRelativeTo
, relativeFrom
, uriToString
, isReserved, isUnreserved
, isAllowedInURI, isUnescapedInURI
, escapeURIChar
, escapeURIString
, unEscapeString
, normalizeCase
, normalizeEscape
, normalizePathSegments
, parseabsoluteURI
, escapeString
, reserved, unreserved
, scheme, authority, path, query, fragment
) where
import Text.ParserCombinators.Parsec
( GenParser, ParseError
, parse, (<|>), (<?>), try
, option, many, many1, count, notFollowedBy
, char, satisfy, oneOf, string, eof
, unexpected
)
import Control.Monad (MonadPlus(..))
import Data.Char (ord, chr, isHexDigit, toLower, toUpper, digitToInt)
import Debug.Trace (trace)
import Numeric (showIntAtBase)
#ifdef __GLASGOW_HASKELL__
import Data.Typeable (Typeable)
# if MIN_VERSION_base(4,0,0)
import Data.Data (Data)
# else
import Data.Generics (Data)
# endif
#else
import Data.Typeable (Typeable(..), TyCon, mkTyCon, mkTyConApp)
#endif
data URI = URI
{ uriScheme :: String
, uriAuthority :: Maybe URIAuth
, uriPath :: String
, uriQuery :: String
, uriFragment :: String
} deriving (Eq
#ifdef __GLASGOW_HASKELL__
, Typeable, Data
#endif
)
#ifndef __GLASGOW_HASKELL__
uriTc :: TyCon
uriTc = mkTyCon "URI"
instance Typeable URI where
typeOf _ = mkTyConApp uriTc []
#endif
data URIAuth = URIAuth
{ uriUserInfo :: String
, uriRegName :: String
, uriPort :: String
} deriving (Eq
#ifdef __GLASGOW_HASKELL__
, Typeable, Data
#endif
)
#ifndef __GLASGOW_HASKELL__
uriAuthTc :: TyCon
uriAuthTc = mkTyCon "URIAuth"
instance Typeable URIAuth where
typeOf _ = mkTyConApp uriAuthTc []
#endif
nullURI :: URI
nullURI = URI
{ uriScheme = ""
, uriAuthority = Nothing
, uriPath = ""
, uriQuery = ""
, uriFragment = ""
}
instance Show URI where
showsPrec _ = uriToString defaultUserInfoMap
defaultUserInfoMap :: String -> String
defaultUserInfoMap uinf = user++newpass
where
(user,pass) = break (==':') uinf
newpass = if null pass || (pass == "@")
|| (pass == ":@")
then pass
else ":...@"
testDefaultUserInfoMap :: [Bool]
testDefaultUserInfoMap =
[ defaultUserInfoMap "" == ""
, defaultUserInfoMap "@" == "@"
, defaultUserInfoMap "user@" == "user@"
, defaultUserInfoMap "user:@" == "user:@"
, defaultUserInfoMap "user:anonymous@" == "user:...@"
, defaultUserInfoMap "user:pass@" == "user:...@"
, defaultUserInfoMap "user:pass" == "user:...@"
, defaultUserInfoMap "user:anonymous" == "user:...@"
]
parseURI :: String -> Maybe URI
parseURI = parseURIAny uri
parseURIReference :: String -> Maybe URI
parseURIReference = parseURIAny uriReference
parseRelativeReference :: String -> Maybe URI
parseRelativeReference = parseURIAny relativeRef
parseAbsoluteURI :: String -> Maybe URI
parseAbsoluteURI = parseURIAny absoluteURI
isURI :: String -> Bool
isURI = isValidParse uri
isURIReference :: String -> Bool
isURIReference = isValidParse uriReference
isRelativeReference :: String -> Bool
isRelativeReference = isValidParse relativeRef
isAbsoluteURI :: String -> Bool
isAbsoluteURI = isValidParse absoluteURI
isIPv6address :: String -> Bool
isIPv6address = isValidParse ipv6address
isIPv4address :: String -> Bool
isIPv4address = isValidParse ipv4address
testURIReference :: String -> String
testURIReference uristr = show (parseAll uriReference "" uristr)
parseURIAny :: URIParser URI -> String -> Maybe URI
parseURIAny parser uristr = case parseAll parser "" uristr of
Left _ -> Nothing
Right u -> Just u
isValidParse :: URIParser a -> String -> Bool
isValidParse parser uristr = case parseAll parser "" uristr of
Left _ -> False
Right _ -> True
parseAll :: URIParser a -> String -> String -> Either ParseError a
parseAll parser filename uristr = parse newparser filename uristr
where
newparser =
do { res <- parser
; eof
; return res
}
type URIParser a = GenParser Char () a
escaped :: URIParser String
escaped =
do { char '%'
; h1 <- hexDigitChar
; h2 <- hexDigitChar
; return $ ['%',h1,h2]
}
isReserved :: Char -> Bool
isReserved c = isGenDelims c || isSubDelims c
isGenDelims :: Char -> Bool
isGenDelims c = c `elem` ":/?#[]@"
isSubDelims :: Char -> Bool
isSubDelims c = c `elem` "!$&'()*+,;="
genDelims :: URIParser String
genDelims = do { c <- satisfy isGenDelims ; return [c] }
subDelims :: URIParser String
subDelims = do { c <- satisfy isSubDelims ; return [c] }
isUnreserved :: Char -> Bool
isUnreserved c = isAlphaNumChar c || (c `elem` "-_.~")
unreservedChar :: URIParser String
unreservedChar = do { c <- satisfy isUnreserved ; return [c] }
uri :: URIParser URI
uri =
do { us <- try uscheme
; (ua,up) <- hierPart
; uq <- option "" ( do { char '?' ; uquery } )
; uf <- option "" ( do { char '#' ; ufragment } )
; return $ URI
{ uriScheme = us
, uriAuthority = ua
, uriPath = up
, uriQuery = uq
, uriFragment = uf
}
}
hierPart :: URIParser ((Maybe URIAuth),String)
hierPart =
do { try (string "//")
; ua <- uauthority
; up <- pathAbEmpty
; return (ua,up)
}
<|> do { up <- pathAbs
; return (Nothing,up)
}
<|> do { up <- pathRootLess
; return (Nothing,up)
}
<|> do { return (Nothing,"")
}
uscheme :: URIParser String
uscheme =
do { s <- oneThenMany alphaChar (satisfy isSchemeChar)
; char ':'
; return $ s++":"
}
uauthority :: URIParser (Maybe URIAuth)
uauthority =
do { uu <- option "" (try userinfo)
; uh <- host
; up <- option "" port
; return $ Just $ URIAuth
{ uriUserInfo = uu
, uriRegName = uh
, uriPort = up
}
}
userinfo :: URIParser String
userinfo =
do { uu <- many (uchar ";:&=+$,")
; char '@'
; return (concat uu ++"@")
}
host :: URIParser String
host = ipLiteral <|> try ipv4address <|> regName
ipLiteral :: URIParser String
ipLiteral =
do { char '['
; ua <- ( ipv6address <|> ipvFuture )
; char ']'
; return $ "[" ++ ua ++ "]"
}
<?> "IP address literal"
ipvFuture :: URIParser String
ipvFuture =
do { char 'v'
; h <- hexDigitChar
; char '.'
; a <- many1 (satisfy isIpvFutureChar)
; return $ 'c':h:'.':a
}
isIpvFutureChar :: Char -> Bool
isIpvFutureChar c = isUnreserved c || isSubDelims c || (c==';')
ipv6address :: URIParser String
ipv6address =
try ( do
{ a2 <- count 6 h4c
; a3 <- ls32
; return $ concat a2 ++ a3
} )
<|> try ( do
{ string "::"
; a2 <- count 5 h4c
; a3 <- ls32
; return $ "::" ++ concat a2 ++ a3
} )
<|> try ( do
{ a1 <- opt_n_h4c_h4 0
; string "::"
; a2 <- count 4 h4c
; a3 <- ls32
; return $ a1 ++ "::" ++ concat a2 ++ a3
} )
<|> try ( do
{ a1 <- opt_n_h4c_h4 1
; string "::"
; a2 <- count 3 h4c
; a3 <- ls32
; return $ a1 ++ "::" ++ concat a2 ++ a3
} )
<|> try ( do
{ a1 <- opt_n_h4c_h4 2
; string "::"
; a2 <- count 2 h4c
; a3 <- ls32
; return $ a1 ++ "::" ++ concat a2 ++ a3
} )
<|> try ( do
{ a1 <- opt_n_h4c_h4 3
; string "::"
; a2 <- h4c
; a3 <- ls32
; return $ a1 ++ "::" ++ a2 ++ a3
} )
<|> try ( do
{ a1 <- opt_n_h4c_h4 4
; string "::"
; a3 <- ls32
; return $ a1 ++ "::" ++ a3
} )
<|> try ( do
{ a1 <- opt_n_h4c_h4 5
; string "::"
; a3 <- h4
; return $ a1 ++ "::" ++ a3
} )
<|> try ( do
{ a1 <- opt_n_h4c_h4 6
; string "::"
; return $ a1 ++ "::"
} )
<?> "IPv6 address"
opt_n_h4c_h4 :: Int -> URIParser String
opt_n_h4c_h4 n = option "" $
do { a1 <- countMinMax 0 n h4c
; a2 <- h4
; return $ concat a1 ++ a2
}
ls32 :: URIParser String
ls32 = try ( do
{ a1 <- h4c
; a2 <- h4
; return (a1++a2)
} )
<|> ipv4address
h4c :: URIParser String
h4c = try $
do { a1 <- h4
; char ':'
; notFollowedBy (char ':')
; return $ a1 ++ ":"
}
h4 :: URIParser String
h4 = countMinMax 1 4 hexDigitChar
ipv4address :: URIParser String
ipv4address =
do { a1 <- decOctet ; char '.'
; a2 <- decOctet ; char '.'
; a3 <- decOctet ; char '.'
; a4 <- decOctet
; notFollowedBy regName
; return $ a1++"."++a2++"."++a3++"."++a4
}
<?> "IPv4 Address"
decOctet :: URIParser String
decOctet =
do { a1 <- countMinMax 1 3 digitChar
; if (read a1 :: Integer) > 255 then
fail "Decimal octet value too large"
else
return a1
}
regName :: URIParser String
regName =
do { ss <- countMinMax 0 255 ( unreservedChar <|> escaped <|> subDelims )
; return $ concat ss
}
<?> "Registered name"
port :: URIParser String
port =
do { char ':'
; p <- many digitChar
; return (':':p)
}
pathAbEmpty :: URIParser String
pathAbEmpty =
do { ss <- many slashSegment
; return $ concat ss
}
pathAbs :: URIParser String
pathAbs =
do { char '/'
; ss <- option "" pathRootLess
; return $ '/':ss
}
pathNoScheme :: URIParser String
pathNoScheme =
do { s1 <- segmentNzc
; ss <- many slashSegment
; return $ concat (s1:ss)
}
pathRootLess :: URIParser String
pathRootLess =
do { s1 <- segmentNz
; ss <- many slashSegment
; return $ concat (s1:ss)
}
slashSegment :: URIParser String
slashSegment =
do { char '/'
; s <- segment
; return ('/':s)
}
segment :: URIParser String
segment =
do { ps <- many pchar
; return $ concat ps
}
segmentNz :: URIParser String
segmentNz =
do { ps <- many1 pchar
; return $ concat ps
}
segmentNzc :: URIParser String
segmentNzc =
do { ps <- many1 (uchar "@")
; return $ concat ps
}
pchar :: URIParser String
pchar = uchar ":@"
uchar :: String -> URIParser String
uchar extras =
unreservedChar
<|> escaped
<|> subDelims
<|> do { c <- oneOf extras ; return [c] }
uquery :: URIParser String
uquery =
do { ss <- many $ uchar (":@"++"/?")
; return $ '?':concat ss
}
ufragment :: URIParser String
ufragment =
do { ss <- many $ uchar (":@"++"/?")
; return $ '#':concat ss
}
uriReference :: URIParser URI
uriReference = uri <|> relativeRef
relativeRef :: URIParser URI
relativeRef =
do { notMatching uscheme
; (ua,up) <- relativePart
; uq <- option "" ( do { char '?' ; uquery } )
; uf <- option "" ( do { char '#' ; ufragment } )
; return $ URI
{ uriScheme = ""
, uriAuthority = ua
, uriPath = up
, uriQuery = uq
, uriFragment = uf
}
}
relativePart :: URIParser ((Maybe URIAuth),String)
relativePart =
do { try (string "//")
; ua <- uauthority
; up <- pathAbEmpty
; return (ua,up)
}
<|> do { up <- pathAbs
; return (Nothing,up)
}
<|> do { up <- pathNoScheme
; return (Nothing,up)
}
<|> do { return (Nothing,"")
}
absoluteURI :: URIParser URI
absoluteURI =
do { us <- uscheme
; (ua,up) <- hierPart
; uq <- option "" ( do { char '?' ; uquery } )
; return $ URI
{ uriScheme = us
, uriAuthority = ua
, uriPath = up
, uriQuery = uq
, uriFragment = ""
}
}
isAlphaChar :: Char -> Bool
isAlphaChar c = (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z')
isDigitChar :: Char -> Bool
isDigitChar c = (c >= '0' && c <= '9')
isAlphaNumChar :: Char -> Bool
isAlphaNumChar c = isAlphaChar c || isDigitChar c
isHexDigitChar :: Char -> Bool
isHexDigitChar c = isHexDigit c
isSchemeChar :: Char -> Bool
isSchemeChar c = (isAlphaNumChar c) || (c `elem` "+-.")
alphaChar :: URIParser Char
alphaChar = satisfy isAlphaChar
digitChar :: URIParser Char
digitChar = satisfy isDigitChar
alphaNumChar :: URIParser Char
alphaNumChar = satisfy isAlphaNumChar
hexDigitChar :: URIParser Char
hexDigitChar = satisfy isHexDigitChar
oneThenMany :: GenParser t s a -> GenParser t s a -> GenParser t s [a]
oneThenMany p1 pr =
do { a1 <- p1
; ar <- many pr
; return (a1:ar)
}
countMinMax :: Int -> Int -> GenParser t s a -> GenParser t s [a]
countMinMax m n p | m > 0 =
do { a1 <- p
; ar <- countMinMax (m1) (n1) p
; return (a1:ar)
}
countMinMax _ n _ | n <= 0 = return []
countMinMax _ n p = option [] $
do { a1 <- p
; ar <- countMinMax 0 (n1) p
; return (a1:ar)
}
notMatching :: Show a => GenParser tok st a -> GenParser tok st ()
notMatching p = do { a <- try p ; unexpected (show a) } <|> return ()
uriToString :: (String->String) -> URI -> ShowS
uriToString userinfomap URI { uriScheme=myscheme
, uriAuthority=myauthority
, uriPath=mypath
, uriQuery=myquery
, uriFragment=myfragment
} =
(myscheme++) . (uriAuthToString userinfomap myauthority)
. (mypath++) . (myquery++) . (myfragment++)
uriAuthToString :: (String->String) -> (Maybe URIAuth) -> ShowS
uriAuthToString _ Nothing = id
uriAuthToString userinfomap
(Just URIAuth { uriUserInfo = myuinfo
, uriRegName = myregname
, uriPort = myport
} ) =
("//"++) . (if null myuinfo then id else ((userinfomap myuinfo)++))
. (myregname++)
. (myport++)
isAllowedInURI :: Char -> Bool
isAllowedInURI c = isReserved c || isUnreserved c || c == '%'
isUnescapedInURI :: Char -> Bool
isUnescapedInURI c = isReserved c || isUnreserved c
escapeURIChar :: (Char->Bool) -> Char -> String
escapeURIChar p c
| p c = [c]
| otherwise = '%' : myShowHex (ord c) ""
where
myShowHex :: Int -> ShowS
myShowHex n r = case showIntAtBase 16 (toChrHex) n r of
[] -> "00"
[x] -> ['0',x]
cs -> cs
toChrHex d
| d < 10 = chr (ord '0' + fromIntegral d)
| otherwise = chr (ord 'A' + fromIntegral (d 10))
escapeURIString
:: (Char->Bool)
-> String
-> String
escapeURIString p s = concatMap (escapeURIChar p) s
unEscapeString :: String -> String
unEscapeString [] = ""
unEscapeString ('%':x1:x2:s) | isHexDigit x1 && isHexDigit x2 =
chr (digitToInt x1 * 16 + digitToInt x2) : unEscapeString s
unEscapeString (c:s) = c : unEscapeString s
nonStrictRelativeTo :: URI -> URI -> Maybe URI
nonStrictRelativeTo ref base = relativeTo ref' base
where
ref' = if uriScheme ref == uriScheme base
then ref { uriScheme="" }
else ref
isDefined :: ( MonadPlus m, Eq (m a) ) => m a -> Bool
isDefined a = a /= mzero
relativeTo :: URI -> URI -> Maybe URI
relativeTo ref base
| isDefined ( uriScheme ref ) =
just_segments ref
| isDefined ( uriAuthority ref ) =
just_segments ref { uriScheme = uriScheme base }
| isDefined ( uriPath ref ) =
if (head (uriPath ref) == '/') then
just_segments ref
{ uriScheme = uriScheme base
, uriAuthority = uriAuthority base
}
else
just_segments ref
{ uriScheme = uriScheme base
, uriAuthority = uriAuthority base
, uriPath = mergePaths base ref
}
| isDefined ( uriQuery ref ) =
just_segments ref
{ uriScheme = uriScheme base
, uriAuthority = uriAuthority base
, uriPath = uriPath base
}
| otherwise =
just_segments ref
{ uriScheme = uriScheme base
, uriAuthority = uriAuthority base
, uriPath = uriPath base
, uriQuery = uriQuery base
}
where
just_segments u =
Just $ u { uriPath = removeDotSegments (uriPath u) }
mergePaths b r
| isDefined (uriAuthority b) && null pb = '/':pr
| otherwise = dropLast pb ++ pr
where
pb = uriPath b
pr = uriPath r
dropLast = fst . splitLast
removeDotSegments :: String -> String
removeDotSegments ('/':ps) = '/':elimDots ps []
removeDotSegments ps = elimDots ps []
elimDots :: String -> [String] -> String
elimDots [] [] = ""
elimDots [] rs = concat (reverse rs)
elimDots ( '.':'/':ps) rs = elimDots ps rs
elimDots ( '.':[] ) rs = elimDots [] rs
elimDots ( '.':'.':'/':ps) rs = elimDots ps (drop 1 rs)
elimDots ( '.':'.':[] ) rs = elimDots [] (drop 1 rs)
elimDots ps rs = elimDots ps1 (r:rs)
where
(r,ps1) = nextSegment ps
nextSegment :: String -> (String,String)
nextSegment ps =
case break (=='/') ps of
(r,'/':ps1) -> (r++"/",ps1)
(r,_) -> (r,[])
splitLast :: String -> (String,String)
splitLast p = (reverse revpath,reverse revname)
where
(revname,revpath) = break (=='/') $ reverse p
relativeFrom :: URI -> URI -> URI
relativeFrom uabs base
| diff uriScheme uabs base = uabs
| diff uriAuthority uabs base = uabs { uriScheme = "" }
| diff uriPath uabs base = uabs
{ uriScheme = ""
, uriAuthority = Nothing
, uriPath = relPathFrom (removeBodyDotSegments $ uriPath uabs)
(removeBodyDotSegments $ uriPath base)
}
| diff uriQuery uabs base = uabs
{ uriScheme = ""
, uriAuthority = Nothing
, uriPath = ""
}
| otherwise = uabs
{ uriScheme = ""
, uriAuthority = Nothing
, uriPath = ""
, uriQuery = ""
}
where
diff :: Eq b => (a -> b) -> a -> a -> Bool
diff sel u1 u2 = sel u1 /= sel u2
removeBodyDotSegments p = removeDotSegments p1 ++ p2
where
(p1,p2) = splitLast p
relPathFrom :: String -> String -> String
relPathFrom [] _ = "/"
relPathFrom pabs [] = pabs
relPathFrom pabs base =
if sa1 == sb1
then if (sa1 == "/")
then if (sa2 == sb2)
then relPathFrom1 ra2 rb2
else pabs
else relPathFrom1 ra1 rb1
else pabs
where
(sa1,ra1) = nextSegment pabs
(sb1,rb1) = nextSegment base
(sa2,ra2) = nextSegment ra1
(sb2,rb2) = nextSegment rb1
relPathFrom1 :: String -> String -> String
relPathFrom1 pabs base = relName
where
(sa,na) = splitLast pabs
(sb,nb) = splitLast base
rp = relSegsFrom sa sb
relName = if null rp then
if (na == nb) then ""
else if protect na then "./"++na
else na
else
rp++na
protect s = null s || ':' `elem` s
relSegsFrom :: String -> String -> String
relSegsFrom [] [] = ""
relSegsFrom sabs base =
if sa1 == sb1
then relSegsFrom ra1 rb1
else difSegsFrom sabs base
where
(sa1,ra1) = nextSegment sabs
(sb1,rb1) = nextSegment base
difSegsFrom :: String -> String -> String
difSegsFrom sabs "" = sabs
difSegsFrom sabs base = difSegsFrom ("../"++sabs) (snd $ nextSegment base)
normalizeCase :: String -> String
normalizeCase uristr = ncScheme uristr
where
ncScheme (':':cs) = ':':ncEscape cs
ncScheme (c:cs) | isSchemeChar c = toLower c:ncScheme cs
ncScheme _ = ncEscape uristr
ncEscape ('%':h1:h2:cs) = '%':toUpper h1:toUpper h2:ncEscape cs
ncEscape (c:cs) = c:ncEscape cs
ncEscape [] = []
normalizeEscape :: String -> String
normalizeEscape ('%':h1:h2:cs)
| isHexDigit h1 && isHexDigit h2 && isUnreserved escval =
escval:normalizeEscape cs
where
escval = chr (digitToInt h1*16+digitToInt h2)
normalizeEscape (c:cs) = c:normalizeEscape cs
normalizeEscape [] = []
normalizePathSegments :: String -> String
normalizePathSegments uristr = normstr juri
where
juri = parseURI uristr
normstr Nothing = uristr
normstr (Just u) = show (normuri u)
normuri u = u { uriPath = removeDotSegments (uriPath u) }
traceShow :: Show a => String -> a -> a
traceShow msg x = trace (msg ++ show x) x
traceVal :: Show a => String -> a -> b -> b
traceVal msg x y = trace (msg ++ show x) y
parseabsoluteURI :: String -> Maybe URI
parseabsoluteURI = parseAbsoluteURI
escapeString :: String -> (Char->Bool) -> String
escapeString = flip escapeURIString
reserved :: Char -> Bool
reserved = isReserved
unreserved :: Char -> Bool
unreserved = isUnreserved
scheme :: URI -> String
scheme = orNull init . uriScheme
authority :: URI -> String
authority = dropss . ($"") . uriAuthToString id . uriAuthority
where
dropss ('/':'/':s) = s
dropss s = s
path :: URI -> String
path = uriPath
query :: URI -> String
query = orNull tail . uriQuery
fragment :: URI -> String
fragment = orNull tail . uriFragment
orNull :: ([a]->[a]) -> [a] -> [a]
orNull _ [] = []
orNull f as = f as