{-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE OverloadedStrings #-}
module Brok.Parser.Links
( links
, url
) where
import ClassyPrelude
import Data.Attoparsec.Text
import Data.List (nub)
import Brok.Parser.Attoparsec
import Brok.Types.URL (URL)
type Token = Maybe URL
preQueryChars :: String
preQueryChars = "-._~:/#%@"
queryBodyChars :: String
queryBodyChars = preQueryChars <> "!$&*+,;="
chars :: String -> Parser Char
chars chrs = digit <|> letter <|> choice (char <$> chrs)
parens :: Parser Text -> Parser Text
parens parser = surround '(' ')' parser <|> surround '[' ']' parser
part :: String -> Parser Text
part str = concat <$> many1 (parens (part str) <|> manyChars (chars str))
query :: Parser Text
query = (<>) <$> string "?" <*> part queryBodyChars
url :: Parser Text
url =
concat5 <$> string "http" <*> chopt 's' <*> string "://" <*> part preQueryChars <*>
option "" query
noise :: Parser Token
noise = anyChar $> Nothing
urls :: Parser [URL]
urls = nub . catMaybes <$> many1 ((Just <$> url) <|> noise)
links :: Text -> Either Text [URL]
links "" = Right []
links content = first tshow $ parseOnly urls content