Copyright | (c) Soostone Inc., 2014-2015 Michael Xavier, 2014-2015 |
---|---|
License | BSD3 |
Maintainer | michael.xavier@soostone.com |
Stability | experimental |
Safe Haskell | None |
Language | Haskell2010 |
URI.ByteString aims to be an RFC3986 compliant URI parser that uses efficient ByteStrings for parsing and representing the data. This module provides a URI datatype as well as a parser and serializer.
Note that this library is an early release and may have issues. It is currently being used in production and no issues have been encountered, however. Please report any issues encountered to the issue tracker.
This module also provides analogs to Lens over the various types in this library. These are written in a generic way to avoid a dependency on any particular lens library. You should be able to use these with a number of packages including lens and lens-family-core.
- newtype Scheme = Scheme {}
- newtype Host = Host {
- hostBS :: ByteString
- newtype Port = Port {
- portNumber :: Int
- data Authority = Authority {}
- data UserInfo = UserInfo {}
- newtype Query = Query {
- queryPairs :: [(ByteString, ByteString)]
- data URIRef a where
- URI :: {
- uriScheme :: Scheme
- uriAuthority :: Maybe Authority
- uriPath :: ByteString
- uriQuery :: Query
- uriFragment :: Maybe ByteString
- RelativeRef :: { } -> URIRef Relative
- URI :: {
- data Absolute
- data Relative
- data SchemaError
- data URIParseError
- data URIParserOptions = URIParserOptions {
- upoValidQueryChar :: Word8 -> Bool
- strictURIParserOptions :: URIParserOptions
- laxURIParserOptions :: URIParserOptions
- data URINormalizationOptions = URINormalizationOptions {}
- noNormalization :: URINormalizationOptions
- rfc3986Normalization :: URINormalizationOptions
- httpNormalization :: URINormalizationOptions
- aggressiveNormalization :: URINormalizationOptions
- httpDefaultPorts :: Map Scheme Port
- toAbsolute :: Scheme -> URIRef a -> URIRef Absolute
- parseURI :: URIParserOptions -> ByteString -> Either URIParseError (URIRef Absolute)
- parseRelativeRef :: URIParserOptions -> ByteString -> Either URIParseError (URIRef Relative)
- uriParser :: URIParserOptions -> Parser (URIRef Absolute)
- relativeRefParser :: URIParserOptions -> Parser (URIRef Relative)
- serializeURIRef :: URIRef a -> Builder
- serializeURIRef' :: URIRef a -> ByteString
- normalizeURIRef :: URINormalizationOptions -> URIRef a -> Builder
- normalizeURIRef' :: URINormalizationOptions -> URIRef a -> ByteString
- urlDecode :: Bool -> ByteString -> ByteString
- urlDecodeQuery :: ByteString -> ByteString
- urlEncodeQuery :: ByteString -> Builder
- urlEncodePath :: ByteString -> Builder
- urlEncode :: [Word8] -> ByteString -> Builder
- schemeBSL :: Lens' Scheme ByteString
- hostBSL :: Lens' Host ByteString
- portNumberL :: Lens' Port Int
- authorityUserInfoL :: Lens' Authority (Maybe UserInfo)
- authorityHostL :: Lens' Authority Host
- authorityPortL :: Lens' Authority (Maybe Port)
- uiUsernameL :: Lens' UserInfo ByteString
- uiPasswordL :: Lens' UserInfo ByteString
- queryPairsL :: Lens' Query [(ByteString, ByteString)]
- uriSchemeL :: Lens' (URIRef Absolute) Scheme
- authorityL :: Lens' (URIRef a) (Maybe Authority)
- pathL :: Lens' (URIRef a) ByteString
- queryL :: Lens' (URIRef a) Query
- fragmentL :: Lens' (URIRef a) (Maybe ByteString)
- upoValidQueryCharL :: Lens' URIParserOptions (Word8 -> Bool)
- type URI = URIRef Absolute
- type RelativeRef = URIRef Relative
- serializeURI :: URIRef Absolute -> Builder
- serializeURI' :: URIRef Absolute -> ByteString
- serializeRelativeRef :: URIRef Relative -> Builder
- serializeRelativeRef' :: URIRef Relative -> ByteString
- uriAuthorityL :: Lens' URI (Maybe Authority)
- uriPathL :: Lens' URI ByteString
- uriQueryL :: Lens' URI Query
- uriFragmentL :: Lens' URI (Maybe ByteString)
- rrAuthorityL :: Lens' RelativeRef (Maybe Authority)
- rrPathL :: Lens' RelativeRef ByteString
- rrQueryL :: Lens' RelativeRef Query
- rrFragmentL :: Lens' RelativeRef (Maybe ByteString)
URI-related types
Required first component to referring to a specification for the remainder of the URI's components, e.g. "http" or "https"
Host | |
|
While some libraries have chosen to limit this to a Word16, the spec only specifies that the string be comprised of digits.
Port | |
|
Query | |
|
Note: URI fragment does not include the #
URI :: Scheme -> Maybe Authority -> ByteString -> Query -> Maybe ByteString -> URIRef Absolute | |
| |
RelativeRef :: Maybe Authority -> ByteString -> Query -> Maybe ByteString -> URIRef Relative | |
|
data SchemaError Source
URI Parser Types
NonAlphaLeading | Scheme must start with an alphabet character |
InvalidChars | Subsequent characters in the schema were invalid |
MissingColon | Schemas must be followed by a colon |
data URIParseError Source
MalformedScheme SchemaError | |
MalformedUserInfo | |
MalformedQuery | |
MalformedFragment | |
MalformedHost | |
MalformedPort | |
MalformedPath | |
OtherError String | Catchall for unpredictable errors |
data URIParserOptions Source
Options for the parser. You will probably want to use either "strictURIParserOptions" or "laxURIParserOptions"
strictURIParserOptions :: URIParserOptions Source
Strict URI Parser config. Follows RFC3986 as-specified. Use this if you can be certain that your URIs are properly encoded or if you want parsing to fail if they deviate from the spec at all.
laxURIParserOptions :: URIParserOptions Source
Lax URI Parser config. Use this if you you want to handle common deviations from the spec gracefully.
- Allows non-encoded [ and ] in query string
data URINormalizationOptions Source
URINormalizationOptions | |
|
noNormalization :: URINormalizationOptions Source
All normalization options disabled
rfc3986Normalization :: URINormalizationOptions Source
Only normalizations deemed appropriate for all protocols by RFC3986 enabled, namely:
- Downcase Scheme
- Downcase Host
- Remove Dot Segments
httpNormalization :: URINormalizationOptions Source
The same as rfc3986Normalization
but with additional enabled
features if you're working with HTTP URIs:
- Drop Default Port (with
httpDefaultPorts
) - Drop Extra Slashes
aggressiveNormalization :: URINormalizationOptions Source
All options enabled
httpDefaultPorts :: Map Scheme Port Source
The set of known default ports to schemes. Currently only
contains http/80 and https/443. Feel free to extend it if needed
with unoDefaultPorts
.
Operations
toAbsolute :: Scheme -> URIRef a -> URIRef Absolute Source
toAbsolute scheme ref
converts ref
to an absolute URI.
If ref
is already absolute, then it is unchanged.
Parsing
parseURI :: URIParserOptions -> ByteString -> Either URIParseError (URIRef Absolute) Source
Parse a strict ByteString into a URI or an error.
Example:
>>>
parseURI strictURIParserOptions "http://www.example.org/foo?bar=baz#quux"
Right (URI {uriScheme = Scheme {schemeBS = "http"}, uriAuthority = Just (Authority {authorityUserInfo = Nothing, authorityHost = Host {hostBS = "www.example.org"}, authorityPort = Nothing}), uriPath = "/foo", uriQuery = Query {queryPairs = [("bar","baz")]}, uriFragment = Just "quux"})
>>>
parseURI strictURIParserOptions "$$$$://badurl.example.org"
Left (MalformedScheme NonAlphaLeading)
There are some urls that you'll encounter which defy the spec, such as those with square brackets in the query string. If you must be able to parse those, you can use "laxURIParserOptions" or specify your own
>>>
parseURI strictURIParserOptions "http://www.example.org/foo?bar[]=baz"
Left MalformedQuery
>>>
parseURI laxURIParserOptions "http://www.example.org/foo?bar[]=baz"
Right (URI {uriScheme = Scheme {schemeBS = "http"}, uriAuthority = Just (Authority {authorityUserInfo = Nothing, authorityHost = Host {hostBS = "www.example.org"}, authorityPort = Nothing}), uriPath = "/foo", uriQuery = Query {queryPairs = [("bar[]","baz")]}, uriFragment = Nothing})
>>>
let myLaxOptions = URIParserOptions { upoValidQueryChar = liftA2 (||) (upoValidQueryChar strictURIParserOptions) (inClass "[]")}
>>>
parseURI myLaxOptions "http://www.example.org/foo?bar[]=baz"
Right (URI {uriScheme = Scheme {schemeBS = "http"}, uriAuthority = Just (Authority {authorityUserInfo = Nothing, authorityHost = Host {hostBS = "www.example.org"}, authorityPort = Nothing}), uriPath = "/foo", uriQuery = Query {queryPairs = [("bar[]","baz")]}, uriFragment = Nothing})
parseRelativeRef :: URIParserOptions -> ByteString -> Either URIParseError (URIRef Relative) Source
Like parseURI
, but do not parse scheme.
uriParser :: URIParserOptions -> Parser (URIRef Absolute) Source
Underlying attoparsec parser. Useful for composing with your own parsers.
relativeRefParser :: URIParserOptions -> Parser (URIRef Relative) Source
Underlying attoparsec parser. Useful for composing with your own parsers.
Serializing
serializeURIRef :: URIRef a -> Builder Source
URI Serializer
Serialize a URI reference into a Builder
.
Example of serializing + converting to a lazy Data.ByteString.Lazy.ByteString:
>>>
BB.toLazyByteString $ serializeURIRef $ URI {uriScheme = Scheme {schemeBS = "http"}, uriAuthority = Just (Authority {authorityUserInfo = Nothing, authorityHost = Host {hostBS = "www.example.org"}, authorityPort = Nothing}), uriPath = "/foo", uriQuery = Query {queryPairs = [("bar","baz")]}, uriFragment = Just "quux"}
"http://www.example.org/foo?bar=baz#quux"
serializeURIRef' :: URIRef a -> ByteString Source
Like serializeURIRef
, with conversion into a strict ByteString
.
Normalized Serialization
normalizeURIRef :: URINormalizationOptions -> URIRef a -> Builder Source
Similar to serializeURIRef
but performs configurable degrees of
URI normalization. If your goal is the fastest serialization speed
possible, serializeURIRef
will be fine. If you intend on
comparing URIs (say for caching purposes), you'll want to use this.
Low level utility functions
:: Bool | Whether to decode |
-> ByteString | |
-> ByteString |
This function was extracted from the http-types
package. The
license can be found in licenseshttp-typesLICENSE
urlDecodeQuery :: ByteString -> ByteString Source
ByteString Utilities
Decoding specifically for the query string, which decodes + as
space. Shorthand for urlDecode True
urlEncodeQuery :: ByteString -> Builder Source
Encode a ByteString for use in the query section of a URL
urlEncodePath :: ByteString -> Builder Source
Encode a ByteString for use in the path section of a URL
urlEncode :: [Word8] -> ByteString -> Builder Source
Percent-encoding for URLs. Specify a list of additional unreserved characters to permit.
Lenses
Lenses over Scheme
schemeBSL :: Lens' Scheme ByteString Source
Lenses over Host
hostBSL :: Lens' Host ByteString Source
Lenses over Port
portNumberL :: Lens' Port Int Source
Lenses over Authority
authorityUserInfoL :: Lens' Authority (Maybe UserInfo) Source
authorityHostL :: Lens' Authority Host Source
authorityPortL :: Lens' Authority (Maybe Port) Source
Lenses over UserInfo
uiUsernameL :: Lens' UserInfo ByteString Source
uiPasswordL :: Lens' UserInfo ByteString Source
Lenses over Query
queryPairsL :: Lens' Query [(ByteString, ByteString)] Source
Lenses over URIRef
uriSchemeL :: Lens' (URIRef Absolute) Scheme Source
authorityL :: Lens' (URIRef a) (Maybe Authority) Source
pathL :: Lens' (URIRef a) ByteString Source
fragmentL :: Lens' (URIRef a) (Maybe ByteString) Source
Lenses over URIParserOptions
upoValidQueryCharL :: Lens' URIParserOptions (Word8 -> Bool) Source
Deprecated
type RelativeRef = URIRef Relative Source
serializeURI :: URIRef Absolute -> Builder Source
Deprecated: Use serializeURIRef
instead
Serialize a URI into a Builder.
serializeURI' :: URIRef Absolute -> ByteString Source
Deprecated: Use serializeURIRef'
instead
Like serializeURI
, with conversion into a strict ByteString
.
serializeRelativeRef :: URIRef Relative -> Builder Source
Deprecated: Use serializeURIRef
instead
Like serializeURI
, but do not render scheme.
serializeRelativeRef' :: URIRef Relative -> ByteString Source
Deprecated: Use serializeURIRef'
instead
Like serializeRelativeRef
, with conversion into a strict ByteString
.
uriAuthorityL :: Lens' URI (Maybe Authority) Source
Deprecated: Use authorityL
instead
uriPathL :: Lens' URI ByteString Source
Deprecated: Use pathL
instead
uriFragmentL :: Lens' URI (Maybe ByteString) Source
Deprecated: Use fragmentL
instead
rrAuthorityL :: Lens' RelativeRef (Maybe Authority) Source
Deprecated: Use authorityL
instead
rrPathL :: Lens' RelativeRef ByteString Source
Deprecated: Use pathL
instead
rrQueryL :: Lens' RelativeRef Query Source
Deprecated: Use queryL
instead
rrFragmentL :: Lens' RelativeRef (Maybe ByteString) Source
Deprecated: Use fragmentL
instead