Copyright | (c) 2004 Graham Klyne |
---|---|
License | BSD-style (see end of this file) |
Maintainer | Graham Klyne <gk@ninebynine.org> |
Stability | provisional |
Portability | portable |
Safe Haskell | Safe |
Language | Haskell98 |
This module defines functions for handling URIs. It presents substantially the same interface as the older GHC Network.URI module, but is implemented using Parsec rather than a Regex library that is not available with Hugs. The internal representation of URI has been changed so that URI strings are more completely preserved when round-tripping to a URI value and back.
In addition, four methods are provided for parsing different
kinds of URI string (as noted in RFC3986):
parseURI
,
parseURIReference
,
parseRelativeReference
and
parseAbsoluteURI
.
Further, four methods are provided for classifying different
kinds of URI string (as noted in RFC3986):
isURI
,
isURIReference
,
isRelativeReference
and
isAbsoluteURI
.
The long-standing official reference for URI handling was RFC2396 [1], as updated by RFC 2732 [2], but this was replaced by a new specification, RFC3986 [3] in January 2005. This latter specification has been used as the primary reference for constructing the URI parser implemented here, and it is intended that there is a direct relationship between the syntax definition in that document and this parser implementation.
RFC 1808 [4] contains a number of test cases for relative URI handling.
Dan Connolly's Python module uripath.py
[5] also contains useful details
and test cases.
Some of the code has been copied from the previous GHC implementation, but the parser is replaced with one that performs more complete syntax checking of the URI itself, according to RFC3986 [3].
References
Synopsis
- data URI = URI {}
- data URIAuth = URIAuth {
- uriUserInfo :: String
- uriRegName :: String
- uriPort :: String
- nullURI :: URI
- nullURIAuth :: URIAuth
- rectify :: URI -> URI
- rectifyAuth :: URIAuth -> URIAuth
- parseURI :: String -> Maybe URI
- parseURIReference :: String -> Maybe URI
- parseRelativeReference :: String -> Maybe URI
- parseAbsoluteURI :: String -> Maybe URI
- isURI :: String -> Bool
- isURIReference :: String -> Bool
- isRelativeReference :: String -> Bool
- isAbsoluteURI :: String -> Bool
- isIPv6address :: String -> Bool
- isIPv4address :: String -> Bool
- uriIsAbsolute :: URI -> Bool
- uriIsRelative :: URI -> Bool
- relativeTo :: URI -> URI -> URI
- nonStrictRelativeTo :: URI -> URI -> URI
- relativeFrom :: URI -> URI -> URI
- uriToString :: (String -> String) -> URI -> ShowS
- uriAuthToString :: (String -> String) -> Maybe URIAuth -> ShowS
- isReserved :: Char -> Bool
- isUnreserved :: Char -> Bool
- isAllowedInURI :: Char -> Bool
- isUnescapedInURI :: Char -> Bool
- isUnescapedInURIComponent :: Char -> Bool
- escapeURIChar :: (Char -> Bool) -> Char -> String
- escapeURIString :: (Char -> Bool) -> String -> String
- unEscapeString :: String -> String
- pathSegments :: URI -> [String]
- normalizeCase :: String -> String
- normalizeEscape :: String -> String
- normalizePathSegments :: String -> String
- parseabsoluteURI :: String -> Maybe URI
- escapeString :: String -> (Char -> Bool) -> String
- reserved :: Char -> Bool
- unreserved :: Char -> Bool
- scheme :: URI -> String
- authority :: URI -> String
- path :: URI -> String
- query :: URI -> String
- fragment :: URI -> String
The URI type
Represents a general universal resource identifier using its component parts.
For example, for the URI
foo://anonymous@www.haskell.org:42/ghc?query#frag
the components are:
Instances
Data URI Source # | |||||
Defined in Network.URI gfoldl :: (forall d b. Data d => c (d -> b) -> d -> c b) -> (forall g. g -> c g) -> URI -> c URI # gunfold :: (forall b r. Data b => c (b -> r) -> c r) -> (forall r. r -> c r) -> Constr -> c URI # dataTypeOf :: URI -> DataType # dataCast1 :: Typeable t => (forall d. Data d => c (t d)) -> Maybe (c URI) # dataCast2 :: Typeable t => (forall d e. (Data d, Data e) => c (t d e)) -> Maybe (c URI) # gmapT :: (forall b. Data b => b -> b) -> URI -> URI # gmapQl :: (r -> r' -> r) -> r -> (forall d. Data d => d -> r') -> URI -> r # gmapQr :: forall r r'. (r' -> r -> r) -> r -> (forall d. Data d => d -> r') -> URI -> r # gmapQ :: (forall d. Data d => d -> u) -> URI -> [u] # gmapQi :: Int -> (forall d. Data d => d -> u) -> URI -> u # gmapM :: Monad m => (forall d. Data d => d -> m d) -> URI -> m URI # gmapMp :: MonadPlus m => (forall d. Data d => d -> m d) -> URI -> m URI # gmapMo :: MonadPlus m => (forall d. Data d => d -> m d) -> URI -> m URI # | |||||
Generic URI Source # | |||||
Defined in Network.URI
| |||||
Show URI Source # | |||||
NFData URI Source # | |||||
Defined in Network.URI | |||||
Eq URI Source # | |||||
Ord URI Source # | |||||
Lift URI Source # | |||||
type Rep URI Source # | |||||
Defined in Network.URI type Rep URI = D1 ('MetaData "URI" "Network.URI" "network-uri-2.6.4.2-HuL39KtH3iU67M4oxVzqS" 'False) (C1 ('MetaCons "URI" 'PrefixI 'True) ((S1 ('MetaSel ('Just "uriScheme") 'NoSourceUnpackedness 'NoSourceStrictness 'DecidedLazy) (Rec0 String) :*: S1 ('MetaSel ('Just "uriAuthority") 'NoSourceUnpackedness 'NoSourceStrictness 'DecidedLazy) (Rec0 (Maybe URIAuth))) :*: (S1 ('MetaSel ('Just "uriPath") 'NoSourceUnpackedness 'NoSourceStrictness 'DecidedLazy) (Rec0 String) :*: (S1 ('MetaSel ('Just "uriQuery") 'NoSourceUnpackedness 'NoSourceStrictness 'DecidedLazy) (Rec0 String) :*: S1 ('MetaSel ('Just "uriFragment") 'NoSourceUnpackedness 'NoSourceStrictness 'DecidedLazy) (Rec0 String))))) |
Type for authority value within a URI
URIAuth | |
|
Instances
Data URIAuth Source # | |||||
Defined in Network.URI gfoldl :: (forall d b. Data d => c (d -> b) -> d -> c b) -> (forall g. g -> c g) -> URIAuth -> c URIAuth # gunfold :: (forall b r. Data b => c (b -> r) -> c r) -> (forall r. r -> c r) -> Constr -> c URIAuth # toConstr :: URIAuth -> Constr # dataTypeOf :: URIAuth -> DataType # dataCast1 :: Typeable t => (forall d. Data d => c (t d)) -> Maybe (c URIAuth) # dataCast2 :: Typeable t => (forall d e. (Data d, Data e) => c (t d e)) -> Maybe (c URIAuth) # gmapT :: (forall b. Data b => b -> b) -> URIAuth -> URIAuth # gmapQl :: (r -> r' -> r) -> r -> (forall d. Data d => d -> r') -> URIAuth -> r # gmapQr :: forall r r'. (r' -> r -> r) -> r -> (forall d. Data d => d -> r') -> URIAuth -> r # gmapQ :: (forall d. Data d => d -> u) -> URIAuth -> [u] # gmapQi :: Int -> (forall d. Data d => d -> u) -> URIAuth -> u # gmapM :: Monad m => (forall d. Data d => d -> m d) -> URIAuth -> m URIAuth # gmapMp :: MonadPlus m => (forall d. Data d => d -> m d) -> URIAuth -> m URIAuth # gmapMo :: MonadPlus m => (forall d. Data d => d -> m d) -> URIAuth -> m URIAuth # | |||||
Generic URIAuth Source # | |||||
Defined in Network.URI
| |||||
Show URIAuth Source # | |||||
NFData URIAuth Source # | |||||
Defined in Network.URI | |||||
Eq URIAuth Source # | |||||
Ord URIAuth Source # | |||||
Lift URIAuth Source # | |||||
type Rep URIAuth Source # | |||||
Defined in Network.URI type Rep URIAuth = D1 ('MetaData "URIAuth" "Network.URI" "network-uri-2.6.4.2-HuL39KtH3iU67M4oxVzqS" 'False) (C1 ('MetaCons "URIAuth" 'PrefixI 'True) (S1 ('MetaSel ('Just "uriUserInfo") 'NoSourceUnpackedness 'NoSourceStrictness 'DecidedLazy) (Rec0 String) :*: (S1 ('MetaSel ('Just "uriRegName") 'NoSourceUnpackedness 'NoSourceStrictness 'DecidedLazy) (Rec0 String) :*: S1 ('MetaSel ('Just "uriPort") 'NoSourceUnpackedness 'NoSourceStrictness 'DecidedLazy) (Rec0 String)))) |
nullURIAuth :: URIAuth Source #
Blank URIAuth.
rectify :: URI -> URI Source #
Given a URI in "nonstandard" form (lacking required separator characters), return one that is standard.
rectifyAuth :: URIAuth -> URIAuth Source #
Given a URIAuth in "nonstandard" form (lacking required separator characters), return one that is standard.
Parsing
parseURI :: String -> Maybe URI Source #
Turn a string containing a URI into a URI
.
Returns Nothing
if the string is not a valid URI;
(an absolute URI with optional fragment identifier).
NOTE: this is different from the previous network.URI,
whose parseURI
function works like parseURIReference
in this module.
Test for strings containing various kinds of URI
isURI :: String -> Bool Source #
Test if string contains a valid URI (an absolute URI with optional fragment identifier).
isURIReference :: String -> Bool Source #
Test if string contains a valid URI reference (an absolute or relative URI with optional fragment identifier).
isRelativeReference :: String -> Bool Source #
Test if string contains a valid relative URI (a relative URI with optional fragment identifier).
isAbsoluteURI :: String -> Bool Source #
Test if string contains a valid absolute URI (an absolute URI without a fragment identifier).
isIPv6address :: String -> Bool Source #
Test if string contains a valid IPv6 address
isIPv4address :: String -> Bool Source #
Test if string contains a valid IPv4 address
Predicates
uriIsAbsolute :: URI -> Bool Source #
uriIsRelative :: URI -> Bool Source #
Relative URIs
relativeFrom :: URI -> URI -> URI Source #
Returns a new URI
which represents the relative location of
the first URI
with respect to the second URI
. Thus, the
values supplied are expected to be absolute URIs, and the result
returned may be a relative URI.
Example:
"http://example.com/Root/sub1/name2#frag" `relativeFrom` "http://example.com/Root/sub2/name2#frag" == "../sub1/name2#frag"
There is no single correct implementation of this function, but any acceptable implementation must satisfy the following:
(uabs `relativeFrom` ubase) `relativeTo` ubase == uabs
For any valid absolute URI. (cf. http://lists.w3.org/Archives/Public/uri/2003Jan/0008.html http://lists.w3.org/Archives/Public/uri/2003Jan/0005.html)
Operations on URI strings
Support for putting strings into URI-friendly escaped format and getting them back again. This can't be done transparently in all cases, because certain characters have different meanings in different kinds of URI. The URI spec [3], section 2.4, indicates that all URI components should be escaped before they are assembled as a URI: "Once produced, a URI is always in its percent-encoded form"
uriToString :: (String -> String) -> URI -> ShowS Source #
Turn a URI
into a string.
Uses a supplied function to map the userinfo part of the URI.
The Show instance for URI uses a mapping that hides any password
that may be present in the URI. Use this function with argument id
to preserve the password in the formatted output.
isReserved :: Char -> Bool Source #
Returns True
if the character is a "reserved" character in a
URI. To include a literal instance of one of these characters in a
component of a URI, it must be escaped.
isUnreserved :: Char -> Bool Source #
Returns True
if the character is an "unreserved" character in
a URI. These characters do not need to be escaped in a URI. The
only characters allowed in a URI are either "reserved",
"unreserved", or an escape sequence (%
followed by two hex digits).
isUnescapedInURI :: Char -> Bool Source #
Returns True
if the character is allowed unescaped in a URI.
>>>
escapeURIString isUnescapedInURI "http://haskell.org:80?some_param=true&other_param=їґ"
"http://haskell.org:80?some_param=true&other_param=%D1%97%D2%91"
isUnescapedInURIComponent :: Char -> Bool Source #
Returns True
if the character is allowed unescaped in a URI component.
>>>
escapeURIString isUnescapedInURIComponent "http://haskell.org:80?some_param=true&other_param=їґ"
"http%3A%2F%2Fhaskell.org%3A80%3Fsome_param%3Dtrue%26other_param%3D%D1%97%D2%91"
escapeURIChar :: (Char -> Bool) -> Char -> String Source #
Escape character if supplied predicate is not satisfied, otherwise return character as singleton string.
:: (Char -> Bool) | a predicate which returns |
-> String | the string to process |
-> String | the resulting URI string |
Can be used to make a string valid for use in a URI.
unEscapeString :: String -> String Source #
Turns all instances of escaped characters in the string back into literal characters.
pathSegments :: URI -> [String] Source #
Returns the segments of the path component. E.g., pathSegments $ parseURI "http://example.org/foo/bar/baz" == ["foo", "bar", "baz"]
URI Normalization functions
normalizeCase :: String -> String Source #
Case normalization; cf. RFC3986 section 6.2.2.1 NOTE: authority case normalization is not performed
normalizeEscape :: String -> String Source #
Encoding normalization; cf. RFC3986 section 6.2.2.2
normalizePathSegments :: String -> String Source #
Path segment normalization; cf. RFC3986 section 6.2.2.3
Deprecated functions
escapeString :: String -> (Char -> Bool) -> String Source #
Deprecated: use escapeURIString, and note the flipped arguments
unreserved :: Char -> Bool Source #
Deprecated: use isUnreserved