{-# LANGUAGE OverloadedStrings #-}
module PDF.DocumentStructure
( parseTrailer
, expandObjStm
, rootRef
, contentsStream
, rawStreamByRef
, findKids
, findPages
, findDict
, findDictByRef
, findDictOfType
, findObjFromDict
, findObjFromDictWithRef
, findObjsByRef
, findObjs
, findTrailer
, rawStream
) where
import Data.Char (chr)
import Data.List (find)
import qualified Data.ByteString.Char8 as BS
import qualified Data.ByteString.Lazy.Char8 as BSL
import qualified Data.ByteString.Builder as B
import qualified Data.Text as T
import Data.Maybe (fromMaybe)
import Numeric (readDec)
import Data.Attoparsec.ByteString.Char8 hiding (take)
import Data.Attoparsec.Combinator
import Control.Applicative
import Codec.Compression.Zlib (decompress)
import Debug.Trace
import PDF.Definition
import PDF.Object
import PDF.ContentStream (parseStream, parseColorSpace)
import PDF.Cmap (parseCMap)
import qualified PDF.OpenType as OpenType
import qualified PDF.CFF as CFF
import qualified PDF.Type1 as Type1
spaces :: Parser ()
spaces = Parser ()
skipSpace
oneOf :: String -> Parser Char
oneOf = (Char -> Bool) -> Parser Char
satisfy ((Char -> Bool) -> Parser Char)
-> (String -> Char -> Bool) -> String -> Parser Char
forall b c a. (b -> c) -> (a -> b) -> a -> c
. String -> Char -> Bool
inClass
noneOf :: String -> Parser Char
noneOf = (Char -> Bool) -> Parser Char
satisfy ((Char -> Bool) -> Parser Char)
-> (String -> Char -> Bool) -> String -> Parser Char
forall b c a. (b -> c) -> (a -> b) -> a -> c
. String -> Char -> Bool
notInClass
findObjs :: BS.ByteString -> [PDFBS]
findObjs :: ByteString -> [PDFBS]
findObjs ByteString
contents = case Parser [PDFBS] -> ByteString -> Either String [PDFBS]
forall a. Parser a -> ByteString -> Either String a
parseOnly (Parser ByteString PDFBS -> Parser [PDFBS]
forall (f :: * -> *) a. Alternative f => f a -> f [a]
many1 Parser ByteString PDFBS
pdfObj) ByteString
contents of
Left String
err -> []
Right [PDFBS]
rlt -> [PDFBS]
rlt
findXref :: BS.ByteString -> String
findXref :: ByteString -> String
findXref ByteString
contents = case Parser String -> ByteString -> Either String String
forall a. Parser a -> ByteString -> Either String a
parseOnly (Parser String
xref) ByteString
contents of
Left String
err -> []
Right String
rlt -> String
rlt
findObjsByRef :: Int -> [PDFObj] -> Maybe [Obj]
findObjsByRef :: Int -> [PDFObj] -> Maybe [Obj]
findObjsByRef Int
x [PDFObj]
pdfobjs = case (PDFObj -> Bool) -> [PDFObj] -> Maybe PDFObj
forall (t :: * -> *) a. Foldable t => (a -> Bool) -> t a -> Maybe a
find (Maybe Int -> PDFObj -> Bool
forall a b. Eq a => Maybe a -> (a, b) -> Bool
isRefObj (Int -> Maybe Int
forall a. a -> Maybe a
Just Int
x)) [PDFObj]
pdfobjs of
Just (Int
_,[Obj]
objs) -> [Obj] -> Maybe [Obj]
forall a. a -> Maybe a
Just [Obj]
objs
Maybe PDFObj
Nothing -> Maybe [Obj]
forall a. Maybe a
Nothing
where
isRefObj :: Maybe a -> (a, b) -> Bool
isRefObj (Just a
x) (a
y, b
objs) = if a
xa -> a -> Bool
forall a. Eq a => a -> a -> Bool
==a
y then Bool
True else Bool
False
isRefObj Maybe a
_ (a, b)
_ = Bool
False
findObjFromDictWithRef :: Int -> String -> [PDFObj] -> Maybe Obj
findObjFromDictWithRef :: Int -> String -> [PDFObj] -> Maybe Obj
findObjFromDictWithRef Int
ref String
name [PDFObj]
objs = case Int -> [PDFObj] -> Maybe Dict
findDictByRef Int
ref [PDFObj]
objs of
Just Dict
d -> Dict -> String -> Maybe Obj
findObjFromDict Dict
d String
name
Maybe Dict
Nothing -> Maybe Obj
forall a. Maybe a
Nothing
findObjFromDict :: Dict -> String -> Maybe Obj
findObjFromDict :: Dict -> String -> Maybe Obj
findObjFromDict Dict
d String
name = case ((Obj, Obj) -> Bool) -> Dict -> Maybe (Obj, Obj)
forall (t :: * -> *) a. Foldable t => (a -> Bool) -> t a -> Maybe a
find (Obj, Obj) -> Bool
forall b. (Obj, b) -> Bool
isName Dict
d of
Just (Obj
_, Obj
o) -> Obj -> Maybe Obj
forall a. a -> Maybe a
Just Obj
o
Maybe (Obj, Obj)
otherwise -> Maybe Obj
forall a. Maybe a
Nothing
where isName :: (Obj, b) -> Bool
isName (PdfName String
n, b
_) = if String
name String -> String -> Bool
forall a. Eq a => a -> a -> Bool
== String
n then Bool
True else Bool
False
isName (Obj, b)
_ = Bool
False
findDictByRef :: Int -> [PDFObj] -> Maybe Dict
findDictByRef :: Int -> [PDFObj] -> Maybe Dict
findDictByRef Int
ref [PDFObj]
objs = case Int -> [PDFObj] -> Maybe [Obj]
findObjsByRef Int
ref [PDFObj]
objs of
Just [Obj]
os -> [Obj] -> Maybe Dict
findDict [Obj]
os
Maybe [Obj]
Nothing -> Maybe Dict
forall a. Maybe a
Nothing
findDictOfType :: String -> [Obj] -> Maybe Dict
findDictOfType :: String -> [Obj] -> Maybe Dict
findDictOfType String
typename [Obj]
objs = case [Obj] -> Maybe Dict
findDict [Obj]
objs of
Just Dict
d -> if Dict -> Bool
forall (t :: * -> *). Foldable t => t (Obj, Obj) -> Bool
isType Dict
d then Dict -> Maybe Dict
forall a. a -> Maybe a
Just Dict
d else Maybe Dict
forall a. Maybe a
Nothing
Maybe Dict
Nothing -> Maybe Dict
forall a. Maybe a
Nothing
where
isType :: t (Obj, Obj) -> Bool
isType t (Obj, Obj)
dict = (String -> Obj
PdfName String
"/Type",String -> Obj
PdfName String
typename) (Obj, Obj) -> t (Obj, Obj) -> Bool
forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem` t (Obj, Obj)
dict
findDict :: [Obj] -> Maybe Dict
findDict :: [Obj] -> Maybe Dict
findDict [Obj]
objs = case (Obj -> Bool) -> [Obj] -> Maybe Obj
forall (t :: * -> *) a. Foldable t => (a -> Bool) -> t a -> Maybe a
find Obj -> Bool
isDict [Obj]
objs of
Just (PdfDict Dict
d) -> Dict -> Maybe Dict
forall a. a -> Maybe a
Just Dict
d
Maybe Obj
otherwise -> Maybe Dict
forall a. Maybe a
Nothing
where
isDict :: Obj -> Bool
isDict :: Obj -> Bool
isDict (PdfDict Dict
d) = Bool
True
isDict Obj
_ = Bool
False
findPages :: Dict -> Maybe Int
findPages :: Dict -> Maybe Int
findPages Dict
dict = case ((Obj, Obj) -> Bool) -> Dict -> Maybe (Obj, Obj)
forall (t :: * -> *) a. Foldable t => (a -> Bool) -> t a -> Maybe a
find (Obj, Obj) -> Bool
isPagesRef Dict
dict of
Just (Obj
_, ObjRef Int
x) -> Int -> Maybe Int
forall a. a -> Maybe a
Just Int
x
Maybe (Obj, Obj)
Nothing -> Maybe Int
forall a. Maybe a
Nothing
where
isPagesRef :: (Obj, Obj) -> Bool
isPagesRef (PdfName String
"/Pages", ObjRef Int
x) = Bool
True
isPagesRef (Obj
_,Obj
_) = Bool
False
findKids :: Dict -> Maybe [Int]
findKids :: Dict -> Maybe [Int]
findKids Dict
dict = case ((Obj, Obj) -> Bool) -> Dict -> Maybe (Obj, Obj)
forall (t :: * -> *) a. Foldable t => (a -> Bool) -> t a -> Maybe a
find (Obj, Obj) -> Bool
isKidsRefs Dict
dict of
Just (Obj
_, PdfArray [Obj]
arr) -> [Int] -> Maybe [Int]
forall a. a -> Maybe a
Just ([Obj] -> [Int]
parseRefsArray [Obj]
arr)
Maybe (Obj, Obj)
Nothing -> Maybe [Int]
forall a. Maybe a
Nothing
where
isKidsRefs :: (Obj, Obj) -> Bool
isKidsRefs (PdfName String
"/Kids", PdfArray [Obj]
x) = Bool
True
isKidsRefs (Obj
_,Obj
_) = Bool
False
contentsStream :: Dict -> PSR -> [PDFObj] -> PDFStream
contentsStream :: Dict -> PSR -> [PDFObj] -> PDFStream
contentsStream Dict
dict PSR
st [PDFObj]
objs = case ((Obj, Obj) -> Bool) -> Dict -> Maybe (Obj, Obj)
forall (t :: * -> *) a. Foldable t => (a -> Bool) -> t a -> Maybe a
find (Obj, Obj) -> Bool
forall b. (Obj, b) -> Bool
contents Dict
dict of
Just (PdfName String
"/Contents", PdfArray [Obj]
arr) -> [Obj] -> PDFStream
getContentArray [Obj]
arr
Just (PdfName String
"/Contents", ObjRef Int
r) ->
case Int -> [PDFObj] -> Maybe [Obj]
findObjsByRef Int
r [PDFObj]
objs of
Just [PdfArray [Obj]
arr] -> [Obj] -> PDFStream
getContentArray [Obj]
arr
Just [Obj]
_ -> Int -> PDFStream
getContent Int
r
Maybe [Obj]
Nothing -> String -> PDFStream
forall a. HasCallStack => String -> a
error String
"No content to be shown"
Maybe (Obj, Obj)
Nothing -> String -> PDFStream
forall a. HasCallStack => String -> a
error String
"No content to be shown"
where
contents :: (Obj, b) -> Bool
contents (PdfName String
"/Contents", b
_) = Bool
True
contents (Obj, b)
_ = Bool
False
getContentArray :: [Obj] -> PDFStream
getContentArray [Obj]
arr = Dict -> PSR -> [PDFObj] -> PDFStream -> PDFStream
parseContentStream Dict
dict PSR
st [PDFObj]
objs (PDFStream -> PDFStream) -> PDFStream -> PDFStream
forall a b. (a -> b) -> a -> b
$
[PDFStream] -> PDFStream
BSL.concat ([PDFStream] -> PDFStream) -> [PDFStream] -> PDFStream
forall a b. (a -> b) -> a -> b
$ (Int -> PDFStream) -> [Int] -> [PDFStream]
forall a b. (a -> b) -> [a] -> [b]
map ([PDFObj] -> Int -> PDFStream
rawStreamByRef [PDFObj]
objs) ([Obj] -> [Int]
parseRefsArray [Obj]
arr)
getContent :: Int -> PDFStream
getContent Int
r = Dict -> PSR -> [PDFObj] -> PDFStream -> PDFStream
parseContentStream Dict
dict PSR
st [PDFObj]
objs (PDFStream -> PDFStream) -> PDFStream -> PDFStream
forall a b. (a -> b) -> a -> b
$ [PDFObj] -> Int -> PDFStream
rawStreamByRef [PDFObj]
objs Int
r
parseContentStream :: Dict -> PSR -> [PDFObj] -> BSL.ByteString -> PDFStream
parseContentStream :: Dict -> PSR -> [PDFObj] -> PDFStream -> PDFStream
parseContentStream Dict
dict PSR
st [PDFObj]
objs PDFStream
s =
PSR -> PDFStream -> PDFStream
parseStream (PSR
st {fontmaps :: [(String, Encoding)]
fontmaps=[(String, Encoding)]
fontdict, cmaps :: [(String, CMap)]
cmaps=[(String, CMap)]
cmap}) PDFStream
s
where fontdict :: [(String, Encoding)]
fontdict = Dict -> [PDFObj] -> [(String, Encoding)]
findFontEncoding Dict
dict [PDFObj]
objs
cmap :: [(String, CMap)]
cmap = Dict -> [PDFObj] -> [(String, CMap)]
findCMap Dict
dict [PDFObj]
objs
rawStreamByRef :: [PDFObj] -> Int -> BSL.ByteString
rawStreamByRef :: [PDFObj] -> Int -> PDFStream
rawStreamByRef [PDFObj]
pdfobjs Int
x = case Int -> [PDFObj] -> Maybe [Obj]
findObjsByRef Int
x [PDFObj]
pdfobjs of
Just [Obj]
objs -> [Obj] -> PDFStream
rawStream [Obj]
objs
Maybe [Obj]
Nothing -> String -> PDFStream
forall a. HasCallStack => String -> a
error String
"No object with stream to be shown"
rawStream :: [Obj] -> BSL.ByteString
rawStream :: [Obj] -> PDFStream
rawStream [Obj]
objs = case (Obj -> Bool) -> [Obj] -> Maybe Obj
forall (t :: * -> *) a. Foldable t => (a -> Bool) -> t a -> Maybe a
find Obj -> Bool
isStream [Obj]
objs of
Just (PdfStream PDFStream
strm) -> Dict -> PDFStream -> PDFStream
rawStream' (Dict -> Maybe Dict -> Dict
forall a. a -> Maybe a -> a
fromMaybe [] (Maybe Dict -> Dict) -> Maybe Dict -> Dict
forall a b. (a -> b) -> a -> b
$ [Obj] -> Maybe Dict
findDict [Obj]
objs) PDFStream
strm
Maybe Obj
Nothing -> String -> PDFStream
BSL.pack (String -> PDFStream) -> String -> PDFStream
forall a b. (a -> b) -> a -> b
$ [Obj] -> String
forall a. Show a => a -> String
show [Obj]
objs
where
isStream :: Obj -> Bool
isStream (PdfStream PDFStream
s) = Bool
True
isStream Obj
_ = Bool
False
rawStream' :: Dict -> BSL.ByteString -> BSL.ByteString
rawStream' :: Dict -> PDFStream -> PDFStream
rawStream' Dict
d PDFStream
s = Dict -> PDFStream -> PDFStream
forall (t :: * -> *).
Foldable t =>
t (Obj, Obj) -> PDFStream -> PDFStream
streamFilter Dict
d PDFStream
s
streamFilter :: t (Obj, Obj) -> PDFStream -> PDFStream
streamFilter t (Obj, Obj)
d = case ((Obj, Obj) -> Bool) -> t (Obj, Obj) -> Maybe (Obj, Obj)
forall (t :: * -> *) a. Foldable t => (a -> Bool) -> t a -> Maybe a
find (Obj, Obj) -> Bool
forall b. (Obj, b) -> Bool
withFilter t (Obj, Obj)
d of
Just (PdfName String
"/Filter", PdfName String
"/FlateDecode")
-> PDFStream -> PDFStream
decompress
Just (PdfName String
"/Filter", PdfName String
f)
-> String -> PDFStream -> PDFStream
forall a. HasCallStack => String -> a
error (String -> PDFStream -> PDFStream)
-> String -> PDFStream -> PDFStream
forall a b. (a -> b) -> a -> b
$ String
"Unknown Stream Compression: " String -> String -> String
forall a. [a] -> [a] -> [a]
++ String
f
Just (Obj, Obj)
_ -> String -> PDFStream -> PDFStream
forall a. HasCallStack => String -> a
error (String -> PDFStream -> PDFStream)
-> String -> PDFStream -> PDFStream
forall a b. (a -> b) -> a -> b
$ String
"No Stream Compression Filter."
Maybe (Obj, Obj)
Nothing -> PDFStream -> PDFStream
forall a. a -> a
id
withFilter :: (Obj, b) -> Bool
withFilter (PdfName String
"/Filter", b
_) = Bool
True
withFilter (Obj, b)
_ = Bool
False
contentsColorSpace :: Dict -> PSR -> [PDFObj] -> [T.Text]
contentsColorSpace :: Dict -> PSR -> [PDFObj] -> [Text]
contentsColorSpace Dict
dict PSR
st [PDFObj]
objs = case ((Obj, Obj) -> Bool) -> Dict -> Maybe (Obj, Obj)
forall (t :: * -> *) a. Foldable t => (a -> Bool) -> t a -> Maybe a
find (Obj, Obj) -> Bool
forall b. (Obj, b) -> Bool
contents Dict
dict of
Just (PdfName String
"/Contents", PdfArray [Obj]
arr) -> [[Text]] -> [Text]
forall (t :: * -> *) a. Foldable t => t [a] -> [a]
concat ([[Text]] -> [Text]) -> [[Text]] -> [Text]
forall a b. (a -> b) -> a -> b
$ (Int -> [Text]) -> [Int] -> [[Text]]
forall a b. (a -> b) -> [a] -> [b]
map (PSR -> PDFStream -> [Text]
parseColorSpace (PSR
st {xcolorspaces :: [String]
xcolorspaces=[String]
xobjcs}) (PDFStream -> [Text]) -> (Int -> PDFStream) -> Int -> [Text]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [PDFObj] -> Int -> PDFStream
rawStreamByRef [PDFObj]
objs) ([Obj] -> [Int]
parseRefsArray [Obj]
arr)
Just (PdfName String
"/Contents", ObjRef Int
x) -> PSR -> PDFStream -> [Text]
parseColorSpace (PSR
st {xcolorspaces :: [String]
xcolorspaces=[String]
xobjcs}) (PDFStream -> [Text]) -> PDFStream -> [Text]
forall a b. (a -> b) -> a -> b
$ [PDFObj] -> Int -> PDFStream
rawStreamByRef [PDFObj]
objs Int
x
Maybe (Obj, Obj)
Nothing -> String -> [Text]
forall a. HasCallStack => String -> a
error String
"No content to be shown"
where
contents :: (Obj, b) -> Bool
contents (PdfName String
"/Contents", b
_) = Bool
True
contents (Obj, b)
_ = Bool
False
xobjcs :: [String]
xobjcs = Dict -> [PDFObj] -> [String]
findXObjectColorSpace Dict
dict [PDFObj]
objs
findXObjectColorSpace :: Dict -> [PDFObj] -> [String]
findXObjectColorSpace Dict
d [PDFObj]
os = Dict -> [PDFObj] -> [String]
xobjColorSpaceMap (Dict -> [PDFObj] -> Dict
findXObject Dict
d [PDFObj]
os) [PDFObj]
os
xobjColorSpaceMap :: Dict -> [PDFObj] -> [String]
xobjColorSpaceMap Dict
dict [PDFObj]
objs = ((Obj, Obj) -> String) -> Dict -> [String]
forall a b. (a -> b) -> [a] -> [b]
map (Obj, Obj) -> String
pairwise Dict
dict
where
pairwise :: (Obj, Obj) -> String
pairwise (PdfName String
n, ObjRef Int
r) = Int -> [PDFObj] -> String
xobjColorSpace Int
r [PDFObj]
objs
pairwise (Obj, Obj)
x = String
""
findXObject :: Dict -> [PDFObj] -> Dict
findXObject Dict
dict [PDFObj]
objs = case Dict -> [PDFObj] -> Maybe Dict
findResourcesDict Dict
dict [PDFObj]
objs of
Just Dict
d -> case Dict -> String -> Maybe Obj
findObjFromDict Dict
d String
"/XObject" of
Just (PdfDict Dict
d) -> Dict
d
Maybe Obj
otherwise -> []
Maybe Dict
Nothing -> []
xobjColorSpace :: Int -> [PDFObj] -> String
xobjColorSpace :: Int -> [PDFObj] -> String
xobjColorSpace Int
x [PDFObj]
objs = case Int -> String -> [PDFObj] -> Maybe Obj
findObjFromDictWithRef Int
x String
"/ColorSpace" [PDFObj]
objs of
Just (PdfName String
cs) -> String
cs
Maybe Obj
otherwise -> String
""
parseTrailer :: BS.ByteString -> Maybe Dict
parseTrailer :: ByteString -> Maybe Dict
parseTrailer ByteString
bs = case (Char -> Bool) -> ByteString -> (ByteString, ByteString)
BS.breakEnd (Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
'\n') ByteString
bs of
(ByteString
source, ByteString
eofLine)
| ByteString
"%%EOF" ByteString -> ByteString -> Bool
`BS.isPrefixOf` ByteString
eofLine
-> Dict -> Maybe Dict
forall a. a -> Maybe a
Just (ByteString -> Dict
parseCRDict (ByteString -> Dict) -> ByteString -> Dict
forall a b. (a -> b) -> a -> b
$ Int -> ByteString -> ByteString
BS.drop (ByteString -> Int
forall p. (Eq p, Num p) => ByteString -> p
getOffset ByteString
source) ByteString
bs)
| ByteString
source ByteString -> ByteString -> Bool
forall a. Eq a => a -> a -> Bool
== ByteString
"" -> Maybe Dict
forall a. Maybe a
Nothing
| Bool
otherwise -> ByteString -> Maybe Dict
parseTrailer (ByteString -> ByteString
BS.init ByteString
bs)
getOffset :: ByteString -> p
getOffset ByteString
bs = case (Char -> Bool) -> ByteString -> (ByteString, ByteString)
BS.breakEnd (Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
'\n') (ByteString -> ByteString
BS.init ByteString
bs) of
(ByteString
_, ByteString
nstr) -> case ReadS p
forall a. (Eq a, Num a) => ReadS a
readDec ReadS p -> ReadS p
forall a b. (a -> b) -> a -> b
$ ByteString -> String
BS.unpack ByteString
nstr of
[(p
n,String
_)] -> p
n
[(p, String)]
_ -> String -> p
forall a. HasCallStack => String -> a
error String
"Could not find Offset"
parseCRDict :: BS.ByteString -> Dict
parseCRDict :: ByteString -> Dict
parseCRDict ByteString
rlt = case Parser Obj -> ByteString -> Either String Obj
forall a. Parser a -> ByteString -> Either String a
parseOnly Parser Obj
crdict ByteString
rlt of
Left String
err -> String -> Dict
forall a. HasCallStack => String -> a
error (String -> Dict) -> String -> Dict
forall a b. (a -> b) -> a -> b
$ ByteString -> String
forall a. Show a => a -> String
show (Int -> ByteString -> ByteString
BS.take Int
100 ByteString
rlt)
Right (PdfDict Dict
dict) -> Dict
dict
Right Obj
_ -> String -> Dict
forall a. HasCallStack => String -> a
error String
"Could not find Cross-Reference dictionary"
where
crdict :: Parser Obj
crdict :: Parser Obj
crdict = do
Parser ()
spaces
(Parser () -> Parser ()
forall i a. Parser i a -> Parser i a
try Parser ()
skipCRtable Parser () -> Parser () -> Parser ()
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> Parser ()
skipCRstream)
Obj
d <- Parser Obj
pdfdictionary Parser Obj -> Parser () -> Parser Obj
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* Parser ()
spaces
Obj -> Parser Obj
forall (m :: * -> *) a. Monad m => a -> m a
return Obj
d
skipCRtable :: Parser ()
skipCRtable = ((Parser Char -> Parser ByteString ByteString -> Parser String
forall (f :: * -> *) a b. Alternative f => f a -> f b -> f [a]
manyTill Parser Char
anyChar (Parser ByteString ByteString -> Parser ByteString ByteString
forall i a. Parser i a -> Parser i a
try (Parser ByteString ByteString -> Parser ByteString ByteString)
-> Parser ByteString ByteString -> Parser ByteString ByteString
forall a b. (a -> b) -> a -> b
$ ByteString -> Parser ByteString ByteString
string ByteString
"trailer")) Parser String -> Parser () -> Parser ()
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Parser ()
spaces)
skipCRstream :: Parser ()
skipCRstream = (Parser Char -> Parser String
forall (f :: * -> *) a. Alternative f => f a -> f [a]
many1 Parser Char
digit Parser String -> Parser () -> Parser ()
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Parser ()
spaces Parser () -> Parser Char -> Parser Char
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Parser Char
digit Parser Char
-> Parser ByteString ByteString -> Parser ByteString ByteString
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> ByteString -> Parser ByteString ByteString
string ByteString
" obj" Parser ByteString ByteString -> Parser () -> Parser ()
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Parser ()
spaces)
rootRef :: BS.ByteString -> Maybe Int
rootRef :: ByteString -> Maybe Int
rootRef ByteString
bs = case ByteString -> Maybe Dict
parseTrailer ByteString
bs of
Just Dict
dict -> ((Obj, Obj) -> Bool) -> Dict -> Maybe Int
findRefs (Obj, Obj) -> Bool
isRootRef Dict
dict
Maybe Dict
Nothing -> ByteString -> Maybe Int
rootRefFromCRStream ByteString
bs
rootRefFromCRStream :: BS.ByteString -> Maybe Int
rootRefFromCRStream :: ByteString -> Maybe Int
rootRefFromCRStream ByteString
bs =
let offset :: Int
offset = (String -> Int
forall a. Read a => String -> a
read (String -> Int) -> (ByteString -> String) -> ByteString -> Int
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ByteString -> String
BS.unpack (ByteString -> String)
-> (ByteString -> ByteString) -> ByteString -> String
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [ByteString] -> ByteString
forall a. [a] -> a
head ([ByteString] -> ByteString)
-> (ByteString -> [ByteString]) -> ByteString -> ByteString
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Int -> [ByteString] -> [ByteString]
forall a. Int -> [a] -> [a]
drop Int
1 ([ByteString] -> [ByteString])
-> (ByteString -> [ByteString]) -> ByteString -> [ByteString]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [ByteString] -> [ByteString]
forall a. [a] -> [a]
reverse ([ByteString] -> [ByteString])
-> (ByteString -> [ByteString]) -> ByteString -> [ByteString]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ByteString -> [ByteString]
BS.lines (ByteString -> Int) -> ByteString -> Int
forall a b. (a -> b) -> a -> b
$ (String -> ByteString -> ByteString
forall a. String -> a -> a
trace (ByteString -> String
forall a. Show a => a -> String
show ByteString
bs) ByteString
bs)) :: Int
crstrm :: ByteString
crstrm = PDFBS -> ByteString
forall a b. (a, b) -> b
snd (PDFBS -> ByteString)
-> (ByteString -> PDFBS) -> ByteString -> ByteString
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [PDFBS] -> PDFBS
forall a. [a] -> a
head ([PDFBS] -> PDFBS)
-> (ByteString -> [PDFBS]) -> ByteString -> PDFBS
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ByteString -> [PDFBS]
findObjs (ByteString -> ByteString) -> ByteString -> ByteString
forall a b. (a -> b) -> a -> b
$ Int -> ByteString -> ByteString
BS.drop Int
offset ByteString
bs
crdict :: Dict
crdict = ByteString -> Dict
parseCRDict ByteString
crstrm
in ((Obj, Obj) -> Bool) -> Dict -> Maybe Int
findRefs (Obj, Obj) -> Bool
isRootRef (Dict -> Maybe Int) -> Dict -> Maybe Int
forall a b. (a -> b) -> a -> b
$ Dict
crdict
isRootRef :: (Obj, Obj) -> Bool
isRootRef (PdfName String
"/Root", ObjRef Int
x) = Bool
True
isRootRef (Obj
_,Obj
_) = Bool
False
findRefs :: ((Obj,Obj) -> Bool) -> Dict -> Maybe Int
findRefs :: ((Obj, Obj) -> Bool) -> Dict -> Maybe Int
findRefs (Obj, Obj) -> Bool
pred Dict
dict = case ((Obj, Obj) -> Bool) -> Dict -> Maybe (Obj, Obj)
forall (t :: * -> *) a. Foldable t => (a -> Bool) -> t a -> Maybe a
find (Obj, Obj) -> Bool
pred Dict
dict of
Just (Obj
_, ObjRef Int
x) -> Int -> Maybe Int
forall a. a -> Maybe a
Just Int
x
Maybe (Obj, Obj)
Nothing -> Maybe Int
forall a. Maybe a
Nothing
findTrailer :: ByteString -> Dict
findTrailer ByteString
bs = do
case ByteString -> Maybe Dict
parseTrailer ByteString
bs of
Just Dict
d -> Dict
d
Maybe Dict
Nothing -> []
infoRef :: ByteString -> Maybe Int
infoRef ByteString
bs = case ByteString -> Maybe Dict
parseTrailer ByteString
bs of
Just Dict
dict -> ((Obj, Obj) -> Bool) -> Dict -> Maybe Int
findRefs (Obj, Obj) -> Bool
isInfoRef Dict
dict
Maybe Dict
Nothing -> String -> Maybe Int
forall a. HasCallStack => String -> a
error String
"No ref for info"
isInfoRef :: (Obj, Obj) -> Bool
isInfoRef (PdfName String
"/Info", ObjRef Int
x) = Bool
True
isInfoRef (Obj
_,Obj
_) = Bool
False
expandObjStm :: [PDFObj] -> [PDFObj]
expandObjStm :: [PDFObj] -> [PDFObj]
expandObjStm [PDFObj]
os = [[PDFObj]] -> [PDFObj]
forall (t :: * -> *) a. Foldable t => t [a] -> [a]
concat ([[PDFObj]] -> [PDFObj]) -> [[PDFObj]] -> [PDFObj]
forall a b. (a -> b) -> a -> b
$ (PDFObj -> [PDFObj]) -> [PDFObj] -> [[PDFObj]]
forall a b. (a -> b) -> [a] -> [b]
map PDFObj -> [PDFObj]
objStm [PDFObj]
os
objStm :: PDFObj -> [PDFObj]
objStm :: PDFObj -> [PDFObj]
objStm (Int
n, [Obj]
obj) = case String -> [Obj] -> Maybe Dict
findDictOfType String
"/ObjStm" [Obj]
obj of
Maybe Dict
Nothing -> [(Int
n,[Obj]
obj)]
Just Dict
_ -> Int -> ByteString -> [PDFObj]
forall p. p -> ByteString -> [PDFObj]
pdfObjStm Int
n (ByteString -> [PDFObj]) -> ByteString -> [PDFObj]
forall a b. (a -> b) -> a -> b
$ PDFStream -> ByteString
BSL.toStrict (PDFStream -> ByteString) -> PDFStream -> ByteString
forall a b. (a -> b) -> a -> b
$ [Obj] -> PDFStream
rawStream [Obj]
obj
refOffset :: Parser ([(Int, Int)], String)
refOffset :: Parser ([(Int, Int)], String)
refOffset = Parser ()
spaces Parser ()
-> Parser ([(Int, Int)], String) -> Parser ([(Int, Int)], String)
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> ((,)
([(Int, Int)] -> String -> ([(Int, Int)], String))
-> Parser ByteString [(Int, Int)]
-> Parser ByteString (String -> ([(Int, Int)], String))
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Parser ByteString (Int, Int) -> Parser ByteString [(Int, Int)]
forall (f :: * -> *) a. Alternative f => f a -> f [a]
many1 ((\String
r String
o -> (String -> Int
forall a. Read a => String -> a
read String
r :: Int, String -> Int
forall a. Read a => String -> a
read String
o :: Int))
(String -> String -> (Int, Int))
-> Parser String -> Parser ByteString (String -> (Int, Int))
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> (Parser Char -> Parser String
forall (f :: * -> *) a. Alternative f => f a -> f [a]
many1 Parser Char
digit Parser String -> Parser () -> Parser String
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* Parser ()
spaces)
Parser ByteString (String -> (Int, Int))
-> Parser String -> Parser ByteString (Int, Int)
forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> (Parser Char -> Parser String
forall (f :: * -> *) a. Alternative f => f a -> f [a]
many1 Parser Char
digit Parser String -> Parser () -> Parser String
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* Parser ()
spaces))
Parser ByteString (String -> ([(Int, Int)], String))
-> Parser String -> Parser ([(Int, Int)], String)
forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> Parser Char -> Parser String
forall (f :: * -> *) a. Alternative f => f a -> f [a]
many1 Parser Char
anyChar)
pdfObjStm :: p -> ByteString -> [PDFObj]
pdfObjStm p
n ByteString
s =
let ([(Int, Int)]
location, String
objstr) = case Parser ([(Int, Int)], String)
-> ByteString -> Either String ([(Int, Int)], String)
forall a. Parser a -> ByteString -> Either String a
parseOnly Parser ([(Int, Int)], String)
refOffset ByteString
s of
Right ([(Int, Int)], String)
val -> ([(Int, Int)], String)
val
Left String
err -> String -> ([(Int, Int)], String)
forall a. HasCallStack => String -> a
error (String -> ([(Int, Int)], String))
-> String -> ([(Int, Int)], String)
forall a b. (a -> b) -> a -> b
$ String
"Failed to parse Object Stream: "
in ((Int, Int) -> PDFObj) -> [(Int, Int)] -> [PDFObj]
forall a b. (a -> b) -> [a] -> [b]
map (\(Int
r,Int
o) -> (Int
r, ByteString -> [Obj]
parseDict (ByteString -> [Obj]) -> ByteString -> [Obj]
forall a b. (a -> b) -> a -> b
$ String -> ByteString
BS.pack (String -> ByteString) -> String -> ByteString
forall a b. (a -> b) -> a -> b
$ Int -> String -> String
forall a. Int -> [a] -> [a]
drop Int
o String
objstr)) [(Int, Int)]
location
where parseDict :: ByteString -> [Obj]
parseDict ByteString
s' = case Parser Obj -> ByteString -> Either String Obj
forall a. Parser a -> ByteString -> Either String a
parseOnly Parser Obj
pdfdictionary ByteString
s' of
Right Obj
obj -> [Obj
obj]
Left String
_ -> case Parser Obj -> ByteString -> Either String Obj
forall a. Parser a -> ByteString -> Either String a
parseOnly Parser Obj
pdfarray ByteString
s' of
Right Obj
obj -> [Obj
obj]
Left String
_ -> case Parser Obj -> ByteString -> Either String Obj
forall a. Parser a -> ByteString -> Either String a
parseOnly Parser Obj
pdfletters ByteString
s' of
Right Obj
obj -> [Obj
obj]
Left String
err -> String -> [Obj]
forall a. HasCallStack => String -> a
error (String -> [Obj]) -> String -> [Obj]
forall a b. (a -> b) -> a -> b
$ (String -> String
forall a. Show a => a -> String
show String
err) String -> String -> String
forall a. [a] -> [a] -> [a]
++ String
":\n Failed to parse obj around; \n"
String -> String -> String
forall a. [a] -> [a] -> [a]
++ (ByteString -> String
forall a. Show a => a -> String
show (ByteString -> String) -> ByteString -> String
forall a b. (a -> b) -> a -> b
$ Int -> ByteString -> ByteString
BS.take Int
100 ByteString
s')
findFontEncoding :: Dict -> [PDFObj] -> [(String, Encoding)]
findFontEncoding Dict
d [PDFObj]
os = Dict -> [PDFObj] -> [(String, Encoding)]
findEncoding (Dict -> [PDFObj] -> Dict
fontObjs Dict
d [PDFObj]
os) [PDFObj]
os
findEncoding :: Dict -> [PDFObj] -> [(String, Encoding)]
findEncoding :: Dict -> [PDFObj] -> [(String, Encoding)]
findEncoding Dict
dict [PDFObj]
objs = ((Obj, Obj) -> (String, Encoding)) -> Dict -> [(String, Encoding)]
forall a b. (a -> b) -> [a] -> [b]
map (Obj, Obj) -> (String, Encoding)
pairwise Dict
dict
where
pairwise :: (Obj, Obj) -> (String, Encoding)
pairwise (PdfName String
n, ObjRef Int
r) = (String
n, Int -> [PDFObj] -> Encoding
encoding Int
r [PDFObj]
objs)
pairwise (Obj, Obj)
x = (String
"", Encoding
NullMap)
fontObjs :: Dict -> [PDFObj] -> Dict
fontObjs :: Dict -> [PDFObj] -> Dict
fontObjs Dict
dict [PDFObj]
objs = case Dict -> [PDFObj] -> Maybe Dict
findResourcesDict Dict
dict [PDFObj]
objs of
Just Dict
d -> case Dict -> String -> Maybe Obj
findObjFromDict Dict
d String
"/Font" of
Just (PdfDict Dict
d') -> Dict
d'
Just (ObjRef Int
x) -> case Int -> [PDFObj] -> Maybe Dict
findDictByRef Int
x [PDFObj]
objs of
Just Dict
d' -> Dict
d'
Maybe Dict
otherwise -> String -> Dict
forall a. HasCallStack => String -> a
error String
"cannot find /Font dictionary"
Maybe Obj
otherwise -> String -> Dict -> Dict
forall a. String -> a -> a
trace (Dict -> String
forall a. Show a => a -> String
show Dict
d) (Dict -> Dict) -> Dict -> Dict
forall a b. (a -> b) -> a -> b
$ []
Maybe Dict
Nothing -> []
findResourcesDict :: Dict -> [PDFObj] -> Maybe Dict
findResourcesDict :: Dict -> [PDFObj] -> Maybe Dict
findResourcesDict Dict
dict [PDFObj]
objs = case ((Obj, Obj) -> Bool) -> Dict -> Maybe (Obj, Obj)
forall (t :: * -> *) a. Foldable t => (a -> Bool) -> t a -> Maybe a
find (Obj, Obj) -> Bool
forall b. (Obj, b) -> Bool
resources Dict
dict of
Just (Obj
_, ObjRef Int
x) -> Int -> [PDFObj] -> Maybe Dict
findDictByRef Int
x [PDFObj]
objs
Just (Obj
_, PdfDict Dict
d) -> Dict -> Maybe Dict
forall a. a -> Maybe a
Just Dict
d
Maybe (Obj, Obj)
otherwise -> String -> Maybe Dict
forall a. HasCallStack => String -> a
error (Dict -> String
forall a. Show a => a -> String
show Dict
dict)
where
resources :: (Obj, b) -> Bool
resources (PdfName String
"/Resources", b
_) = Bool
True
resources (Obj, b)
_ = Bool
False
encoding :: Int -> [PDFObj] -> Encoding
encoding :: Int -> [PDFObj] -> Encoding
encoding Int
x [PDFObj]
objs = case Maybe Obj
subtype of
Just (PdfName String
"/Type0") -> case Maybe Obj
encoding of
Just (PdfName String
"/Identity-H") -> [Encoding] -> Encoding
forall a. [a] -> a
head ([Encoding] -> Encoding) -> [Encoding] -> Encoding
forall a b. (a -> b) -> a -> b
$ [Obj] -> [Encoding]
cidSysInfo [Obj]
descendantFonts
Just (PdfName String
s) -> String -> Encoding
forall a. HasCallStack => String -> a
error (String -> Encoding) -> String -> Encoding
forall a b. (a -> b) -> a -> b
$ String
"Unknown Encoding " String -> String -> String
forall a. [a] -> [a] -> [a]
++ (String -> String
forall a. Show a => a -> String
show String
s) String -> String -> String
forall a. [a] -> [a] -> [a]
++ String
" for a Type0 font. Check " String -> String -> String
forall a. [a] -> [a] -> [a]
++ Int -> String
forall a. Show a => a -> String
show Int
x
Maybe Obj
_ -> String -> Encoding
forall a. HasCallStack => String -> a
error (String -> Encoding) -> String -> Encoding
forall a b. (a -> b) -> a -> b
$ String
"Something wrong with a Type0 font. Check " String -> String -> String
forall a. [a] -> [a] -> [a]
++ (Int -> String
forall a. Show a => a -> String
show Int
x)
Just (PdfName String
"/Type1") -> case Maybe Obj
encoding of
Just (ObjRef Int
r) -> case Int -> String -> [PDFObj] -> Maybe Obj
findObjFromDictWithRef Int
r String
"/Differences" [PDFObj]
objs of
Just (PdfArray [Obj]
arr) -> [Obj] -> Encoding
charDiff [Obj]
arr
Maybe Obj
_ -> String -> Encoding
forall a. HasCallStack => String -> a
error String
"No /Differences"
Just (PdfDict Dict
d) -> case Dict -> String -> Maybe Obj
findObjFromDict Dict
d String
"/Differences" of
Just (PdfArray [Obj]
arr) -> [Obj] -> Encoding
charDiff [Obj]
arr
Maybe Obj
_ -> String -> Encoding
forall a. HasCallStack => String -> a
error String
"No /Differences"
Just (PdfName String
"/MacRomanEncoding") -> Encoding
NullMap
Just (PdfName String
"/MacExpertEncoding") -> Encoding
NullMap
Just (PdfName String
"/WinAnsiEncoding") -> Encoding
NullMap
Maybe Obj
_ -> case Dict -> String -> Maybe Obj
findObjFromDict (Int -> Dict
fontDescriptor' Int
x) String
"/FontFile3" of
Just (ObjRef Int
fontfile) ->
ByteString -> Encoding
CFF.encoding (ByteString -> Encoding) -> ByteString -> Encoding
forall a b. (a -> b) -> a -> b
$ PDFStream -> ByteString
BSL.toStrict (PDFStream -> ByteString) -> PDFStream -> ByteString
forall a b. (a -> b) -> a -> b
$ [PDFObj] -> Int -> PDFStream
rawStreamByRef [PDFObj]
objs Int
fontfile
Maybe Obj
_ -> case Dict -> String -> Maybe Obj
findObjFromDict (Int -> Dict
fontDescriptor' Int
x) String
"/FontFile" of
Just (ObjRef Int
fontfile) ->
ByteString -> Encoding
Type1.encoding (ByteString -> Encoding) -> ByteString -> Encoding
forall a b. (a -> b) -> a -> b
$ PDFStream -> ByteString
BSL.toStrict (PDFStream -> ByteString) -> PDFStream -> ByteString
forall a b. (a -> b) -> a -> b
$ [PDFObj] -> Int -> PDFStream
rawStreamByRef [PDFObj]
objs Int
fontfile
Maybe Obj
_ -> Encoding
NullMap
Just (PdfName String
"/Type2") -> Encoding
NullMap
Just (PdfName String
"/Type3") -> Encoding
NullMap
Maybe Obj
_ -> Encoding
NullMap
where
subtype :: Maybe Obj
subtype = String -> Maybe Obj
get String
"/Subtype"
encoding :: Maybe Obj
encoding = String -> Maybe Obj
get String
"/Encoding"
toUnicode :: Maybe Obj
toUnicode = String -> Maybe Obj
get String
"/ToUnicode"
get :: String -> Maybe Obj
get String
s = Int -> String -> [PDFObj] -> Maybe Obj
findObjFromDictWithRef Int
x String
s [PDFObj]
objs
descendantFonts :: [Obj]
descendantFonts :: [Obj]
descendantFonts = case Int -> String -> [PDFObj] -> Maybe Obj
findObjFromDictWithRef Int
x String
"/DescendantFonts" [PDFObj]
objs of
Just (PdfArray [Obj]
dfrs) -> [Obj]
dfrs
Just (ObjRef Int
r) -> case Int -> [PDFObj] -> Maybe [Obj]
findObjsByRef Int
r [PDFObj]
objs of
Just [(PdfArray [Obj]
dfrs)] -> [Obj]
dfrs
Maybe [Obj]
_ -> String -> [Obj]
forall a. HasCallStack => String -> a
error (String -> [Obj]) -> String -> [Obj]
forall a b. (a -> b) -> a -> b
$ String
"Can not find /DescendantFonts entries in " String -> String -> String
forall a. [a] -> [a] -> [a]
++ Int -> String
forall a. Show a => a -> String
show Int
r
Maybe Obj
_ -> String -> [Obj]
forall a. HasCallStack => String -> a
error (String -> [Obj]) -> String -> [Obj]
forall a b. (a -> b) -> a -> b
$ String
"Can not find /DescendantFonts itself in " String -> String -> String
forall a. [a] -> [a] -> [a]
++ Int -> String
forall a. Show a => a -> String
show Int
x
cidSysInfo :: [Obj] -> [Encoding]
cidSysInfo :: [Obj] -> [Encoding]
cidSysInfo [] = []
cidSysInfo ((ObjRef Int
r):[Obj]
rs) = (Int -> Encoding
cidSysInfo' Int
r)Encoding -> [Encoding] -> [Encoding]
forall a. a -> [a] -> [a]
:([Obj] -> [Encoding]
cidSysInfo [Obj]
rs)
cidSysInfo' :: Int -> Encoding
cidSysInfo' Int
dfr = case Int -> String -> [PDFObj] -> Maybe Obj
findObjFromDictWithRef Int
dfr String
"/CIDSystemInfo" [PDFObj]
objs of
Just (PdfDict Dict
dict) -> Dict -> Encoding
getCIDSystemInfo Dict
dict
Just (ObjRef Int
r) -> case Int -> [PDFObj] -> Maybe Dict
findDictByRef Int
r [PDFObj]
objs of
Just Dict
dict -> Dict -> Encoding
getCIDSystemInfo Dict
dict
Maybe Dict
_ -> String -> Encoding
forall a. HasCallStack => String -> a
error (String -> Encoding) -> String -> Encoding
forall a b. (a -> b) -> a -> b
$ String
"Can not find /CIDSystemInfo entries in" String -> String -> String
forall a. [a] -> [a] -> [a]
++ Int -> String
forall a. Show a => a -> String
show Int
r
Maybe Obj
_ -> String -> Encoding
forall a. HasCallStack => String -> a
error (String -> Encoding) -> String -> Encoding
forall a b. (a -> b) -> a -> b
$ String
"Can not find /CidSystemInfo itself " String -> String -> String
forall a. [a] -> [a] -> [a]
++ Int -> String
forall a. Show a => a -> String
show Int
dfr
fontDescriptor :: [Obj] -> [Dict]
fontDescriptor :: [Obj] -> [Dict]
fontDescriptor [] = []
fontDescriptor ((ObjRef Int
r):[Obj]
rs) = (Int -> Dict
fontDescriptor' Int
r)Dict -> [Dict] -> [Dict]
forall a. a -> [a] -> [a]
:([Obj] -> [Dict]
fontDescriptor [Obj]
rs)
fontDescriptor' :: Int -> Dict
fontDescriptor' :: Int -> Dict
fontDescriptor' Int
fdr = case Int -> String -> [PDFObj] -> Maybe Obj
findObjFromDictWithRef Int
fdr String
"/FontDescriptor" [PDFObj]
objs of
Just (ObjRef Int
r) -> case Int -> [PDFObj] -> Maybe Dict
findDictByRef Int
r [PDFObj]
objs of
Just Dict
dict -> Dict
dict
Maybe Dict
_ -> String -> Dict
forall a. HasCallStack => String -> a
error (String -> Dict) -> String -> Dict
forall a b. (a -> b) -> a -> b
$ String
"No /FontDescriptor entries in " String -> String -> String
forall a. [a] -> [a] -> [a]
++ Int -> String
forall a. Show a => a -> String
show Int
r
Maybe Obj
_ -> String -> Dict
forall a. HasCallStack => String -> a
error (String -> Dict) -> String -> Dict
forall a b. (a -> b) -> a -> b
$ String
"Can not find /FontDescriptor itself in " String -> String -> String
forall a. [a] -> [a] -> [a]
++ Int -> String
forall a. Show a => a -> String
show Int
fdr
getCIDSystemInfo :: Dict -> Encoding
getCIDSystemInfo Dict
d =
let registry :: String
registry = case Dict -> String -> Maybe Obj
findObjFromDict Dict
d String
"/Registry" of
Just (PdfText String
r) -> String
r
Maybe Obj
otherwise -> String -> String
forall a. HasCallStack => String -> a
error String
"Can not find /Registry"
ordering :: String
ordering = case Dict -> String -> Maybe Obj
findObjFromDict Dict
d String
"/Ordering" of
Just (PdfText String
o) -> String
o
Maybe Obj
othserwise -> String -> String
forall a. HasCallStack => String -> a
error String
"Can not find /Ordering"
supplement :: Double
supplement = case Dict -> String -> Maybe Obj
findObjFromDict Dict
d String
"/Supplement" of
Just (PdfNumber Double
s) -> Double
s
Maybe Obj
otherwise -> String -> Double
forall a. HasCallStack => String -> a
error String
"Can not find /Supprement"
cmap :: String
cmap = String
registry String -> String -> String
forall a. [a] -> [a] -> [a]
++ String
"-" String -> String -> String
forall a. [a] -> [a] -> [a]
++ String
ordering
in if String
cmap String -> String -> Bool
forall a. Eq a => a -> a -> Bool
== String
"Adobe-Japan1"
then String -> Encoding
CIDmap String
cmap
else String -> Encoding
WithCharSet String
""
charDiff :: [Obj] -> Encoding
charDiff :: [Obj] -> Encoding
charDiff [Obj]
objs = [(Char, String)] -> Encoding
Encoding ([(Char, String)] -> Encoding) -> [(Char, String)] -> Encoding
forall a b. (a -> b) -> a -> b
$ [Obj] -> Int -> [(Char, String)]
charmap [Obj]
objs Int
0
where charmap :: [Obj] -> Int -> [(Char, String)]
charmap (PdfNumber Double
x : PdfName String
n : [Obj]
xs) Int
i =
if Int
i Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
< Double -> Int
forall a b. (RealFrac a, Integral b) => a -> b
truncate Double
x then
(Int -> Char
chr (Int -> Char) -> Int -> Char
forall a b. (a -> b) -> a -> b
$ Double -> Int
forall a b. (RealFrac a, Integral b) => a -> b
truncate Double
x, String
n) (Char, String) -> [(Char, String)] -> [(Char, String)]
forall a. a -> [a] -> [a]
: ([Obj] -> Int -> [(Char, String)]
charmap [Obj]
xs (Int -> [(Char, String)]) -> Int -> [(Char, String)]
forall a b. (a -> b) -> a -> b
$ Double -> Int
forall a a. (RealFrac a, Integral a) => a -> a
incr Double
x)
else
(Int -> Char
chr (Int -> Char) -> Int -> Char
forall a b. (a -> b) -> a -> b
$ Int
i, String
n) (Char, String) -> [(Char, String)] -> [(Char, String)]
forall a. a -> [a] -> [a]
: ([Obj] -> Int -> [(Char, String)]
charmap [Obj]
xs (Int -> [(Char, String)]) -> Int -> [(Char, String)]
forall a b. (a -> b) -> a -> b
$ Int
iInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
1)
charmap (PdfName String
n : [Obj]
xs) Int
i = (Int -> Char
chr Int
i, String
n) (Char, String) -> [(Char, String)] -> [(Char, String)]
forall a. a -> [a] -> [a]
: ([Obj] -> Int -> [(Char, String)]
charmap [Obj]
xs (Int -> [(Char, String)]) -> Int -> [(Char, String)]
forall a b. (a -> b) -> a -> b
$ Int
iInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
1)
charmap [] Int
i = []
incr :: a -> a
incr a
x = (a -> a
forall a b. (RealFrac a, Integral b) => a -> b
truncate a
x) a -> a -> a
forall a. Num a => a -> a -> a
+ a
1
findCMap :: Dict -> [PDFObj] -> [(String, CMap)]
findCMap :: Dict -> [PDFObj] -> [(String, CMap)]
findCMap Dict
d [PDFObj]
objs = ((Obj, Obj) -> (String, CMap)) -> Dict -> [(String, CMap)]
forall a b. (a -> b) -> [a] -> [b]
map (Obj, Obj) -> (String, CMap)
pairwise (Dict -> [PDFObj] -> Dict
fontObjs Dict
d [PDFObj]
objs)
where
pairwise :: (Obj, Obj) -> (String, CMap)
pairwise (PdfName String
n, ObjRef Int
r) = (String
n, Int -> [PDFObj] -> CMap
toUnicode Int
r [PDFObj]
objs)
pairwise (Obj, Obj)
x = (String
"", [])
toUnicode :: Int -> [PDFObj] -> CMap
toUnicode :: Int -> [PDFObj] -> CMap
toUnicode Int
x [PDFObj]
objs =
case Int -> String -> [PDFObj] -> Maybe Obj
findObjFromDictWithRef Int
x String
"/ToUnicode" [PDFObj]
objs of
Just (ObjRef Int
ref) ->
PDFStream -> CMap
parseCMap (PDFStream -> CMap) -> PDFStream -> CMap
forall a b. (a -> b) -> a -> b
$ [PDFObj] -> Int -> PDFStream
rawStreamByRef [PDFObj]
objs Int
ref
Maybe Obj
otherwise -> Int -> [PDFObj] -> CMap
noToUnicode Int
x [PDFObj]
objs
noToUnicode :: Int -> [PDFObj] -> CMap
noToUnicode Int
x [PDFObj]
objs =
case Int -> String -> [PDFObj] -> Maybe Obj
findObjFromDictWithRef Int
x String
"/DescendantFonts" [PDFObj]
objs of
Just (ObjRef Int
ref) ->
case Int -> [PDFObj] -> Maybe [Obj]
findObjsByRef Int
ref [PDFObj]
objs of
Just [(PdfArray ((ObjRef Int
subref):[Obj]
_))] ->
case Int -> String -> [PDFObj] -> Maybe Obj
findObjFromDictWithRef Int
subref String
"/FontDescriptor" [PDFObj]
objs of
Just (ObjRef Int
desc) ->
case Int -> String -> [PDFObj] -> Maybe Obj
findObjFromDictWithRef Int
desc String
"/FontFile2" [PDFObj]
objs of
Just (ObjRef Int
fontfile) ->
ByteString -> CMap
OpenType.cmap (ByteString -> CMap) -> ByteString -> CMap
forall a b. (a -> b) -> a -> b
$ PDFStream -> ByteString
BSL.toStrict (PDFStream -> ByteString) -> PDFStream -> ByteString
forall a b. (a -> b) -> a -> b
$ [PDFObj] -> Int -> PDFStream
rawStreamByRef [PDFObj]
objs Int
fontfile
Maybe Obj
otherwise -> []
Maybe Obj
otherwise -> []
Maybe [Obj]
otherwise -> []
Maybe Obj
otherwise -> []