{-# LANGUAGE OverloadedStrings #-}
{- |
   Module      : Text.Pandoc.Readers.OPML
   Copyright   : Copyright (C) 2013-2021 John MacFarlane
   License     : GNU GPL, version 2 or above

   Maintainer  : John MacFarlane <jgm@berkeley.edu>
   Stability   : alpha
   Portability : portable

Conversion of OPML to 'Pandoc' document.
-}

module Text.Pandoc.Readers.OPML ( readOPML ) where
import Control.Monad.State.Strict
import Data.Char (toUpper)
import Data.Default
import Data.Generics
import Data.Maybe (fromMaybe)
import Data.Text (Text)
import qualified Data.Text as T
import Text.HTML.TagSoup.Entity (lookupEntity)
import Text.Pandoc.Builder
import Text.Pandoc.Class.PandocMonad (PandocMonad)
import Text.Pandoc.Options
import Text.Pandoc.Readers.HTML (readHtml)
import Text.Pandoc.Readers.Markdown (readMarkdown)
import Text.Pandoc.Shared (crFilter, blocksToInlines')
import Text.XML.Light

type OPML m = StateT OPMLState m

data OPMLState = OPMLState{
                        OPMLState -> Int
opmlSectionLevel :: Int
                      , OPMLState -> Inlines
opmlDocTitle     :: Inlines
                      , OPMLState -> [Inlines]
opmlDocAuthors   :: [Inlines]
                      , OPMLState -> Inlines
opmlDocDate      :: Inlines
                      , OPMLState -> ReaderOptions
opmlOptions      :: ReaderOptions
                      } deriving Int -> OPMLState -> ShowS
[OPMLState] -> ShowS
OPMLState -> String
(Int -> OPMLState -> ShowS)
-> (OPMLState -> String)
-> ([OPMLState] -> ShowS)
-> Show OPMLState
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [OPMLState] -> ShowS
$cshowList :: [OPMLState] -> ShowS
show :: OPMLState -> String
$cshow :: OPMLState -> String
showsPrec :: Int -> OPMLState -> ShowS
$cshowsPrec :: Int -> OPMLState -> ShowS
Show

instance Default OPMLState where
  def :: OPMLState
def = OPMLState :: Int
-> Inlines -> [Inlines] -> Inlines -> ReaderOptions -> OPMLState
OPMLState{ opmlSectionLevel :: Int
opmlSectionLevel = Int
0
                 , opmlDocTitle :: Inlines
opmlDocTitle = Inlines
forall a. Monoid a => a
mempty
                 , opmlDocAuthors :: [Inlines]
opmlDocAuthors = []
                 , opmlDocDate :: Inlines
opmlDocDate = Inlines
forall a. Monoid a => a
mempty
                 , opmlOptions :: ReaderOptions
opmlOptions = ReaderOptions
forall a. Default a => a
def
                 }

readOPML :: PandocMonad m => ReaderOptions -> Text -> m Pandoc
readOPML :: ReaderOptions -> Text -> m Pandoc
readOPML ReaderOptions
opts Text
inp  = do
  ([Blocks]
bs, OPMLState
st') <- StateT OPMLState m [Blocks] -> OPMLState -> m ([Blocks], OPMLState)
forall s (m :: * -> *) a. StateT s m a -> s -> m (a, s)
runStateT
                 ((Content -> StateT OPMLState m Blocks)
-> [Content] -> StateT OPMLState m [Blocks]
forall (t :: * -> *) (m :: * -> *) a b.
(Traversable t, Monad m) =>
(a -> m b) -> t a -> m (t b)
mapM Content -> StateT OPMLState m Blocks
forall (m :: * -> *). PandocMonad m => Content -> OPML m Blocks
parseBlock ([Content] -> StateT OPMLState m [Blocks])
-> [Content] -> StateT OPMLState m [Blocks]
forall a b. (a -> b) -> a -> b
$ [Content] -> [Content]
normalizeTree ([Content] -> [Content]) -> [Content] -> [Content]
forall a b. (a -> b) -> a -> b
$
                    String -> [Content]
forall s. XmlSource s => s -> [Content]
parseXML (Text -> String
T.unpack (Text -> Text
crFilter Text
inp))) OPMLState
forall a. Default a => a
def{ opmlOptions :: ReaderOptions
opmlOptions = ReaderOptions
opts }
  Pandoc -> m Pandoc
forall (m :: * -> *) a. Monad m => a -> m a
return (Pandoc -> m Pandoc) -> Pandoc -> m Pandoc
forall a b. (a -> b) -> a -> b
$
    Inlines -> Pandoc -> Pandoc
setTitle (OPMLState -> Inlines
opmlDocTitle OPMLState
st') (Pandoc -> Pandoc) -> Pandoc -> Pandoc
forall a b. (a -> b) -> a -> b
$
    [Inlines] -> Pandoc -> Pandoc
setAuthors (OPMLState -> [Inlines]
opmlDocAuthors OPMLState
st') (Pandoc -> Pandoc) -> Pandoc -> Pandoc
forall a b. (a -> b) -> a -> b
$
    Inlines -> Pandoc -> Pandoc
setDate (OPMLState -> Inlines
opmlDocDate OPMLState
st') (Pandoc -> Pandoc) -> Pandoc -> Pandoc
forall a b. (a -> b) -> a -> b
$
    Blocks -> Pandoc
doc (Blocks -> Pandoc) -> Blocks -> Pandoc
forall a b. (a -> b) -> a -> b
$ [Blocks] -> Blocks
forall a. Monoid a => [a] -> a
mconcat [Blocks]
bs

-- normalize input, consolidating adjacent Text and CRef elements
normalizeTree :: [Content] -> [Content]
normalizeTree :: [Content] -> [Content]
normalizeTree = (forall a. Data a => a -> a) -> forall a. Data a => a -> a
everywhere (([Content] -> [Content]) -> a -> a
forall a b. (Typeable a, Typeable b) => (b -> b) -> a -> a
mkT [Content] -> [Content]
go)
  where go :: [Content] -> [Content]
        go :: [Content] -> [Content]
go (Text (CData CDataKind
CDataRaw String
_ Maybe Line
_):[Content]
xs) = [Content]
xs
        go (Text (CData CDataKind
CDataText String
s1 Maybe Line
z):Text (CData CDataKind
CDataText String
s2 Maybe Line
_):[Content]
xs) =
           CData -> Content
Text (CDataKind -> String -> Maybe Line -> CData
CData CDataKind
CDataText (String
s1 String -> ShowS
forall a. [a] -> [a] -> [a]
++ String
s2) Maybe Line
z)Content -> [Content] -> [Content]
forall a. a -> [a] -> [a]
:[Content]
xs
        go (Text (CData CDataKind
CDataText String
s1 Maybe Line
z):CRef String
r:[Content]
xs) =
           CData -> Content
Text (CDataKind -> String -> Maybe Line -> CData
CData CDataKind
CDataText (String
s1 String -> ShowS
forall a. [a] -> [a] -> [a]
++ ShowS
convertEntity String
r) Maybe Line
z)Content -> [Content] -> [Content]
forall a. a -> [a] -> [a]
:[Content]
xs
        go (CRef String
r:Text (CData CDataKind
CDataText String
s1 Maybe Line
z):[Content]
xs) =
             CData -> Content
Text (CDataKind -> String -> Maybe Line -> CData
CData CDataKind
CDataText (ShowS
convertEntity String
r String -> ShowS
forall a. [a] -> [a] -> [a]
++ String
s1) Maybe Line
z)Content -> [Content] -> [Content]
forall a. a -> [a] -> [a]
:[Content]
xs
        go (CRef String
r1:CRef String
r2:[Content]
xs) =
             CData -> Content
Text (CDataKind -> String -> Maybe Line -> CData
CData CDataKind
CDataText (ShowS
convertEntity String
r1 String -> ShowS
forall a. [a] -> [a] -> [a]
++ ShowS
convertEntity String
r2) Maybe Line
forall a. Maybe a
Nothing)Content -> [Content] -> [Content]
forall a. a -> [a] -> [a]
:[Content]
xs
        go [Content]
xs = [Content]
xs

convertEntity :: String -> String
convertEntity :: ShowS
convertEntity String
e = String -> Maybe String -> String
forall a. a -> Maybe a -> a
Data.Maybe.fromMaybe ((Char -> Char) -> ShowS
forall a b. (a -> b) -> [a] -> [b]
map Char -> Char
toUpper String
e) (String -> Maybe String
lookupEntity String
e)

-- convenience function to get an attribute value, defaulting to ""
attrValue :: String -> Element -> Text
attrValue :: String -> Element -> Text
attrValue String
attr Element
elt =
  Text -> (String -> Text) -> Maybe String -> Text
forall b a. b -> (a -> b) -> Maybe a -> b
maybe Text
"" String -> Text
T.pack ((QName -> Bool) -> [Attr] -> Maybe String
lookupAttrBy (\QName
x -> QName -> String
qName QName
x String -> String -> Bool
forall a. Eq a => a -> a -> Bool
== String
attr) (Element -> [Attr]
elAttribs Element
elt))

textContent :: Element -> Text
textContent :: Element -> Text
textContent = String -> Text
T.pack (String -> Text) -> (Element -> String) -> Element -> Text
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Element -> String
strContent

-- exceptT :: PandocMonad m => Either PandocError a -> OPML m a
-- exceptT = either throwError return

asHtml :: PandocMonad m => Text -> OPML m Inlines
asHtml :: Text -> OPML m Inlines
asHtml Text
s = do
  ReaderOptions
opts <- (OPMLState -> ReaderOptions) -> StateT OPMLState m ReaderOptions
forall s (m :: * -> *) a. MonadState s m => (s -> a) -> m a
gets OPMLState -> ReaderOptions
opmlOptions
  Pandoc Meta
_ [Block]
bs <- ReaderOptions -> Text -> StateT OPMLState m Pandoc
forall (m :: * -> *).
PandocMonad m =>
ReaderOptions -> Text -> m Pandoc
readHtml ReaderOptions
forall a. Default a => a
def{ readerExtensions :: Extensions
readerExtensions = ReaderOptions -> Extensions
readerExtensions ReaderOptions
opts } Text
s
  Inlines -> OPML m Inlines
forall (m :: * -> *) a. Monad m => a -> m a
return (Inlines -> OPML m Inlines) -> Inlines -> OPML m Inlines
forall a b. (a -> b) -> a -> b
$ [Block] -> Inlines
blocksToInlines' [Block]
bs

asMarkdown :: PandocMonad m => Text -> OPML m Blocks
asMarkdown :: Text -> OPML m Blocks
asMarkdown Text
s = do
  ReaderOptions
opts <- (OPMLState -> ReaderOptions) -> StateT OPMLState m ReaderOptions
forall s (m :: * -> *) a. MonadState s m => (s -> a) -> m a
gets OPMLState -> ReaderOptions
opmlOptions
  Pandoc Meta
_ [Block]
bs <- ReaderOptions -> Text -> StateT OPMLState m Pandoc
forall (m :: * -> *).
PandocMonad m =>
ReaderOptions -> Text -> m Pandoc
readMarkdown ReaderOptions
forall a. Default a => a
def{ readerExtensions :: Extensions
readerExtensions = ReaderOptions -> Extensions
readerExtensions ReaderOptions
opts } Text
s
  Blocks -> OPML m Blocks
forall (m :: * -> *) a. Monad m => a -> m a
return (Blocks -> OPML m Blocks) -> Blocks -> OPML m Blocks
forall a b. (a -> b) -> a -> b
$ [Block] -> Blocks
forall a. [a] -> Many a
fromList [Block]
bs

getBlocks :: PandocMonad m => Element -> OPML m Blocks
getBlocks :: Element -> OPML m Blocks
getBlocks Element
e =  [Blocks] -> Blocks
forall a. Monoid a => [a] -> a
mconcat ([Blocks] -> Blocks)
-> StateT OPMLState m [Blocks] -> OPML m Blocks
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> (Content -> OPML m Blocks)
-> [Content] -> StateT OPMLState m [Blocks]
forall (t :: * -> *) (m :: * -> *) a b.
(Traversable t, Monad m) =>
(a -> m b) -> t a -> m (t b)
mapM Content -> OPML m Blocks
forall (m :: * -> *). PandocMonad m => Content -> OPML m Blocks
parseBlock (Element -> [Content]
elContent Element
e)

parseBlock :: PandocMonad m => Content -> OPML m Blocks
parseBlock :: Content -> OPML m Blocks
parseBlock (Elem Element
e) =
  case QName -> String
qName (Element -> QName
elName Element
e) of
        String
"ownerName"    -> Blocks
forall a. Monoid a => a
mempty Blocks -> StateT OPMLState m () -> OPML m Blocks
forall (f :: * -> *) a b. Functor f => a -> f b -> f a
<$ (OPMLState -> OPMLState) -> StateT OPMLState m ()
forall s (m :: * -> *). MonadState s m => (s -> s) -> m ()
modify (\OPMLState
st ->
                              OPMLState
st{opmlDocAuthors :: [Inlines]
opmlDocAuthors = [Text -> Inlines
text (Text -> Inlines) -> Text -> Inlines
forall a b. (a -> b) -> a -> b
$ Element -> Text
textContent Element
e]})
        String
"dateModified" -> Blocks
forall a. Monoid a => a
mempty Blocks -> StateT OPMLState m () -> OPML m Blocks
forall (f :: * -> *) a b. Functor f => a -> f b -> f a
<$ (OPMLState -> OPMLState) -> StateT OPMLState m ()
forall s (m :: * -> *). MonadState s m => (s -> s) -> m ()
modify (\OPMLState
st ->
                              OPMLState
st{opmlDocDate :: Inlines
opmlDocDate = Text -> Inlines
text (Text -> Inlines) -> Text -> Inlines
forall a b. (a -> b) -> a -> b
$ Element -> Text
textContent Element
e})
        String
"title"        -> Blocks
forall a. Monoid a => a
mempty Blocks -> StateT OPMLState m () -> OPML m Blocks
forall (f :: * -> *) a b. Functor f => a -> f b -> f a
<$ (OPMLState -> OPMLState) -> StateT OPMLState m ()
forall s (m :: * -> *). MonadState s m => (s -> s) -> m ()
modify (\OPMLState
st ->
                              OPMLState
st{opmlDocTitle :: Inlines
opmlDocTitle = Text -> Inlines
text (Text -> Inlines) -> Text -> Inlines
forall a b. (a -> b) -> a -> b
$ Element -> Text
textContent Element
e})
        String
"outline" -> (OPMLState -> Int) -> StateT OPMLState m Int
forall s (m :: * -> *) a. MonadState s m => (s -> a) -> m a
gets OPMLState -> Int
opmlSectionLevel StateT OPMLState m Int -> (Int -> OPML m Blocks) -> OPML m Blocks
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= Int -> OPML m Blocks
forall (m :: * -> *).
PandocMonad m =>
Int -> StateT OPMLState m Blocks
sect (Int -> OPML m Blocks) -> (Int -> Int) -> Int -> OPML m Blocks
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Int -> Int -> Int
forall a. Num a => a -> a -> a
+Int
1)
        String
"?xml"  -> Blocks -> OPML m Blocks
forall (m :: * -> *) a. Monad m => a -> m a
return Blocks
forall a. Monoid a => a
mempty
        String
_       -> Element -> OPML m Blocks
forall (m :: * -> *). PandocMonad m => Element -> OPML m Blocks
getBlocks Element
e
   where sect :: Int -> StateT OPMLState m Blocks
sect Int
n = do Inlines
headerText <- Text -> OPML m Inlines
forall (m :: * -> *). PandocMonad m => Text -> OPML m Inlines
asHtml (Text -> OPML m Inlines) -> Text -> OPML m Inlines
forall a b. (a -> b) -> a -> b
$ String -> Element -> Text
attrValue String
"text" Element
e
                     Blocks
noteBlocks <- Text -> StateT OPMLState m Blocks
forall (m :: * -> *). PandocMonad m => Text -> OPML m Blocks
asMarkdown (Text -> StateT OPMLState m Blocks)
-> Text -> StateT OPMLState m Blocks
forall a b. (a -> b) -> a -> b
$ String -> Element -> Text
attrValue String
"_note" Element
e
                     (OPMLState -> OPMLState) -> StateT OPMLState m ()
forall s (m :: * -> *). MonadState s m => (s -> s) -> m ()
modify ((OPMLState -> OPMLState) -> StateT OPMLState m ())
-> (OPMLState -> OPMLState) -> StateT OPMLState m ()
forall a b. (a -> b) -> a -> b
$ \OPMLState
st -> OPMLState
st{ opmlSectionLevel :: Int
opmlSectionLevel = Int
n }
                     Blocks
bs <- Element -> StateT OPMLState m Blocks
forall (m :: * -> *). PandocMonad m => Element -> OPML m Blocks
getBlocks Element
e
                     (OPMLState -> OPMLState) -> StateT OPMLState m ()
forall s (m :: * -> *). MonadState s m => (s -> s) -> m ()
modify ((OPMLState -> OPMLState) -> StateT OPMLState m ())
-> (OPMLState -> OPMLState) -> StateT OPMLState m ()
forall a b. (a -> b) -> a -> b
$ \OPMLState
st -> OPMLState
st{ opmlSectionLevel :: Int
opmlSectionLevel = Int
n Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
1 }
                     let headerText' :: Inlines
headerText' = case Text -> Text
T.toUpper (String -> Element -> Text
attrValue String
"type" Element
e) of
                                             Text
"LINK"  -> Text -> Text -> Inlines -> Inlines
link
                                               (String -> Element -> Text
attrValue String
"url" Element
e) Text
"" Inlines
headerText
                                             Text
_ -> Inlines
headerText
                     Blocks -> StateT OPMLState m Blocks
forall (m :: * -> *) a. Monad m => a -> m a
return (Blocks -> StateT OPMLState m Blocks)
-> Blocks -> StateT OPMLState m Blocks
forall a b. (a -> b) -> a -> b
$ Int -> Inlines -> Blocks
header Int
n Inlines
headerText' Blocks -> Blocks -> Blocks
forall a. Semigroup a => a -> a -> a
<> Blocks
noteBlocks Blocks -> Blocks -> Blocks
forall a. Semigroup a => a -> a -> a
<> Blocks
bs
parseBlock Content
_ = Blocks -> OPML m Blocks
forall (m :: * -> *) a. Monad m => a -> m a
return Blocks
forall a. Monoid a => a
mempty