module Text.Hastily.SubtitleFileTypes.Srt.Srt
(
getSrtFilesInDir,
parseFile,
srtParser
) where
import Control.Applicative ((<*))
import qualified Data.ByteString as BS
import Data.Char
import Data.String.Conversions
import qualified Data.Text as DT
import Data.Text.Encoding
import Data.Text.Encoding.Error
import Text.Hastily.Types
import System.Directory.Tree
import System.FilePath
import System.IO
import Text.Parsec ((<|>))
import qualified Text.Parsec as Parsec
getSrtFilesInDir :: FilePath -> IO [FilePath]
getSrtFilesInDir destination = do
anchored_dir_tree <- build destination
let file_dt = filter isfile $ flattenDir $ filterDir (\x -> case x of (File name a) -> (takeExtension name) == ".srt";Dir _ _ -> True;_->False) (dirTree anchored_dir_tree)
return $ fmap (\(File name file_path) -> file_path) file_dt
where
isfile x = case x of
File _ _ -> True
_ -> False
parseFile :: MovieInfo -> FilePath -> IO Subtitle
parseFile movie_info file_path = do
putStrLn $ "Parsing subtitles from " ++ file_path
handle <- openBinaryFile file_path ReadMode
doParse handle
where
doParse handle = do
b_string <- BS.hGetContents handle
let string = DT.dropWhile (/= '1') $ decodeUtf8With ignore b_string
case Parsec.parse srtParser "(mainparser)" string of
Right subtitles -> case subtitles of
[] -> do
putStrLn $ "WARNING : No subtitles could be read from file " ++ file_path
return $ Subtitle movie_info file_path []
all@(x:xs) -> do
putStrLn $ "Read " ++ (show $ length all) ++ " dialogues."
return $ Subtitle movie_info file_path subtitles
Left err -> do
print err
return $ Subtitle movie_info file_path []
srtParser :: Parsec.Parsec DT.Text () [SubtitleDialog]
srtParser = Parsec.many $ Parsec.try chunk_parser
where
makeDigest dialog = DT.filter isAlphaNum $ DT.toLower dialog
newLine = (Parsec.string "\n") <|> (Parsec.string "\r\n")
separator = Parsec.count 2 newLine
separatorOrEof = do
separator
chunk_parser = do
let time_parser = Parsec.many (Parsec.digit <|> (Parsec.char ':') <|> (Parsec.char ','))
index <- (Parsec.many Parsec.digit)
newLine
start_time <- time_parser
Parsec.many1 $ Parsec.char ' '
Parsec.string "-->"
Parsec.many1 $ Parsec.char ' '
end_time <- time_parser
newLine
dialog <- Parsec.manyTill Parsec.anyChar (Parsec.try separatorOrEof)
return $ SubtitleDialog (cs start_time) (cs end_time) (cs dialog) (makeDigest $ cs dialog)