module Addy.Internal.Parser
( Mode (..),
Atom (..),
parse,
parseWithMode,
nameAddr,
addrSpec,
localPartP,
domainP,
displayNameP,
word,
atom,
dotAtom,
dotAtomLh,
dotAtomRh,
quoted,
quotedLh,
cfws,
)
where
import Addy.Internal.Char
import Addy.Internal.Types
import Addy.Internal.Validation
import Data.Attoparsec.Text ((<?>))
import qualified Data.Attoparsec.Text as Atto
import Data.Foldable
import qualified Data.Text as Text
import qualified Net.IP as IP
import qualified Net.IPv4 as IP4
import qualified Net.IPv6 as IP6
import qualified Validation
data Mode
=
Strict
|
Lenient
deriving (Eq, Show)
data Atom = Atom (Maybe CommentContent) Text (Maybe CommentContent)
deriving (Show)
instance Semigroup Atom where
(<>) (Atom x0 y0 z0) (Atom x1 y1 z1) =
Atom (x0 <> x1) (y0 <> y1) (z0 <> z1)
instance Monoid Atom where
mempty = Atom Nothing mempty Nothing
atomJoin :: Foldable t => Char -> t Atom -> Atom
atomJoin sep as
| null as = mempty
| otherwise = foldr1 go as
where
go :: Atom -> Atom -> Atom
go (Atom c0 t0 c1) (Atom c2 t1 c3) =
Atom (go' c0 c2) (t0 <> one sep <> t1) (go' c1 c3)
go' :: Maybe CommentContent -> Maybe CommentContent -> Maybe CommentContent
go' (Just x) (Just y) = Just (x <> CC (one ' ') <> y)
go' x y = x <|> y
parse :: Mode -> Atto.Parser EmailAddr
parse m = cleanComments <$> (nameAddr m <|> addrSpec m)
where
cleanComments :: EmailAddr -> EmailAddr
cleanComments addr@EmailAddr {_comments} =
addr
{ _comments =
filter
( \(Comment _ (CC t)) -> not $ Text.null (Text.strip t)
)
_comments
}
parseWithMode :: Mode -> Text -> Either (NonEmpty Error) EmailAddr
parseWithMode mode text = do
addr <-
first (toText >>> ParserFailedError >>> one) $
Atto.parseOnly
( parse mode <* (Atto.endOfInput <?> "unparsed input")
)
text
case validateEmailAddr addr of
Validation.Success ea -> Right ea
Validation.Failure es -> Left es
nameAddr :: Mode -> Atto.Parser EmailAddr
nameAddr mode = do
dp <- optional (displayNameP mode)
c0 <- optional (cfws mode)
_ <- Atto.char '<'
(c1, lp) <- localPartP mode <* Atto.char '@'
(dn, c2) <- domainP mode
_ <- Atto.char '>'
c3 <- optional (cfws mode)
let (dpc0, dpt, dpc1) = case dp of
Nothing -> (Nothing, Nothing, Nothing)
Just (Atom x y z) -> (x, Just (DP y), z)
pure $
EmailAddr
{ _displayName = dpt,
_localPart = lp,
_domain = dn,
_comments =
catMaybes
[ Comment BeforeDisplayName <$> dpc0,
Comment AfterDisplayName <$> dpc1,
Comment AfterDisplayName <$> c0,
Comment BeforeLocalPart <$> c1,
Comment AfterDomain <$> c2,
Comment AfterAddress <$> c3
]
}
addrSpec :: Mode -> Atto.Parser EmailAddr
addrSpec mode = do
(c0, lp) <- localPartP mode <* Atto.char '@'
(dn, c1) <- domainP mode
pure $
EmailAddr
{ _displayName = Nothing,
_localPart = lp,
_domain = dn,
_comments =
catMaybes
[ Comment BeforeLocalPart <$> c0,
Comment AfterDomain <$> c1
]
}
localPartP :: Mode -> Atto.Parser (Maybe CommentContent, LocalPart)
localPartP mode = go <?> "local part"
where
go =
case mode of
Strict -> do
Atom c0 t c1 <- (dotAtomLh <* thenAt) <|> (quotedLh <* thenAt)
pure (c0 <> c1, LP t)
Lenient -> do
Atom c0 t c1 <-
Atto.choice
[ dotAtom mode <* thenAt,
obsLocalPart <* thenAt,
quoted mode <* thenAt
]
pure (c0 <> c1, LP t)
obsLocalPart :: Atto.Parser Atom
obsLocalPart = do
t0 <- word mode
ts <- many (Atto.char '.' *> word mode)
pure (atomJoin '.' (t0 : ts))
thenAt :: Atto.Parser ()
thenAt =
Atto.peekChar'
>>= bool empty pass . (== '@')
domainP :: Mode -> Atto.Parser (Domain, Maybe CommentContent)
domainP mode = go <?> "domain name"
where
go =
case mode of
Strict ->
domainNameP
<|> domainLiteralP (fws mode $> CC (one ' '))
Lenient ->
obsDomainP
<|> domainNameP
<|> domainLiteralP (cfws mode)
domainNameP :: Atto.Parser (Domain, Maybe CommentContent)
domainNameP = do
Atom c0 t c1 <- dotAtomRh
pure (Domain $ DN t, c0 <> c1)
domainLiteralP ::
Atto.Parser CommentContent ->
Atto.Parser (Domain, Maybe CommentContent)
domainLiteralP lh = do
c0 <- optional lh
t <- addressLiteral mode
c1 <- optional (cfws mode)
pure (DomainLiteral t, c0 <> c1)
obsDomainP :: Atto.Parser (Domain, Maybe CommentContent)
obsDomainP = do
t0 <- atom mode
ts <- many (Atto.char '.' *> atom mode)
let Atom c0 t c1 = atomJoin '.' (t0 : ts)
pure (Domain $ DN t, c0 <> c1)
displayNameP :: Mode -> Atto.Parser Atom
displayNameP mode =
case mode of
Strict -> phrase
Lenient -> phrase <|> obsPhrase
where
phrase = (<?> "display name") $ do
w0 <- word Strict
ws <- many (word Strict)
pure (atomJoin ' ' (w0 : ws))
obsPhrase = (<?> "obsolete display name") $ do
w0 <- word mode
ws <-
many
( word mode
<|> Atom Nothing <$> (Atto.char '.' <&> one) <*> pure Nothing
<|> Atom <$> (cfws mode <&> Just) <*> pure mempty <*> pure Nothing
)
pure (atomJoin ' ' (w0 : ws))
word :: Mode -> Atto.Parser Atom
word mode = atom mode <|> quoted mode
atom :: Mode -> Atto.Parser Atom
atom mode =
Atom
<$> optional (cfws mode)
<*> atextP
<*> optional (cfws mode)
dotAtom' ::
Atto.Parser CommentContent ->
Atto.Parser CommentContent ->
Atto.Parser Atom
dotAtom' lh rh = do
c0 <- optional lh
t0 <- atextP
ts <- many (Atto.char '.' *> atextP)
c1 <- optional rh
pure (Atom c0 (Text.intercalate "." (t0 : ts)) c1)
dotAtom :: Mode -> Atto.Parser Atom
dotAtom mode = dotAtom' (cfws mode) (cfws mode)
dotAtomLh :: Atto.Parser Atom
dotAtomLh =
dotAtom'
(cfws Strict)
(CC <$> (fws Strict $> one ' '))
dotAtomRh :: Atto.Parser Atom
dotAtomRh =
dotAtom'
(CC <$> (fws Strict $> one ' '))
(cfws Strict)
atextP :: Atto.Parser Text
atextP = Atto.takeWhile1 atext
quoted :: Mode -> Atto.Parser Atom
quoted mode = quoted' (cfws mode) (cfws mode) mode
quoted' ::
Atto.Parser CommentContent ->
Atto.Parser CommentContent ->
Mode ->
Atto.Parser Atom
quoted' lh rh mode = (<?> "quoted content") $ do
c0 <- optional lh
_ <- Atto.char '"'
t <- Atto.many1 ((<>) <$> fws' <*> qcontent)
w <- fws'
_ <- Atto.char '"'
c1 <- optional rh
pure (Atom c0 (mconcat t <> w) c1)
where
qcontent :: Atto.Parser Text
qcontent = qtextP <|> quotedPairP mode
qtextP :: Atto.Parser Text
qtextP = case mode of
Strict -> Atto.takeWhile1 qtext
Lenient ->
Atto.takeWhile1 (\c -> qtext c || qtextObs c)
<&> Text.filter (not . qtextObs)
fws' = (fws mode $> one ' ') <|> pure mempty
quotedLh :: Atto.Parser Atom
quotedLh =
quoted'
(cfws Strict)
(CC <$> (fws Strict $> one ' '))
Strict
quotedPairP :: Mode -> Atto.Parser Text
quotedPairP mode = go <?> "quoted char"
where
go = Atto.char '\\' *> allowed
allowed = case mode of
Strict ->
Atto.satisfy quotedPair
<&> one
Lenient ->
Atto.satisfy (\c -> quotedPair c || quotedPairObs c)
<&> (one >>> Text.filter (not . quotedPairObs))
cfws :: Mode -> Atto.Parser CommentContent
cfws mode =
(<?> "comment or space")
(cfws' <|> (CC <$> fws mode))
where
cfws' :: Atto.Parser CommentContent
cfws' = do
cs <- Atto.many1 (fws' *> comment) <* fws'
pure (CC $ mconcat cs)
comment :: Atto.Parser Text
comment = do
_ <- Atto.char '('
ts <- many (fws' *> (mconcat <$> Atto.many1 ccontent)) <* fws'
_ <- Atto.char ')'
pure (Text.intercalate " " ts)
ccontent :: Atto.Parser Text
ccontent = ctextP <|> quotedPairP mode <|> comment
ctextP :: Atto.Parser Text
ctextP = case mode of
Strict ->
Atto.takeWhile1 ctext
Lenient ->
Atto.takeWhile1 (\c -> ctext c || ctextObs c)
<&> Text.filter (not . ctextObs)
fws' :: Atto.Parser ()
fws' = void (optional (fws mode))
fws :: Mode -> Atto.Parser Text
fws = \case
Strict -> do
w0 <- (Atto.takeWhile wsp <* crlf) <|> pure Text.empty
w1 <- Atto.takeWhile1 wsp
pure (w0 <> w1)
Lenient -> do
w0 <- Atto.takeWhile1 wsp
ws <- many (crlf *> Atto.takeWhile1 wsp)
pure (w0 <> mconcat ws)
where
crlf = Atto.string "\r\n"
addressLiteral :: Mode -> Atto.Parser AddressLiteral
addressLiteral mode =
(<?> "address literal")
$ Atto.choice
$ map
wrap
[ IpAddressLiteral . IP.fromIPv6 <$> (Atto.string "IPv6:" *> IP6.parser),
TaggedAddressLiteral <$> tag <*> (Atto.char ':' *> lit),
IpAddressLiteral . IP.fromIPv4 <$> IP4.parser,
AddressLiteral <$> lit
]
where
wrap :: Atto.Parser a -> Atto.Parser a
wrap p =
Atto.char '['
*> p
<* optional (fws mode)
<* Atto.char ']'
tag :: Atto.Parser AddressTag
tag = Atto.takeWhile1 (\c -> c /= ':' && dtext c) <&> AT
lit :: Atto.Parser Literal
lit =
Lit . mconcat
<$> many
( do
f0 <- fws'
ts <- dtextP
f1 <- fws'
pure (f0 <> ts <> f1)
)
dtextP :: Atto.Parser Text
dtextP =
case mode of
Strict ->
Atto.takeWhile1 dtext
Lenient ->
mconcat
<$> Atto.many1
( ( Atto.takeWhile1 (\c -> dtext c || obsNoWsCtl c)
<&> Text.filter (not . obsNoWsCtl)
)
<|> (quotedPairP mode $> one '-')
)
fws' :: Atto.Parser Text
fws' = fws mode $> one ' ' <|> pure mempty