{-# OPTIONS_GHC -threaded -rtsopts #-}
{-# OPTIONS_HADDOCK show-extensions #-}
{-# LANGUAGE BangPatterns, NoImplicitPrelude #-}

-- |
-- Module      :  Phladiprelio.General.Parsing
-- Copyright   :  (c) Oleksandr Zhabenko 2021-2024
-- License     :  MIT
-- Stability   :  Experimental
-- Maintainer  :  oleksandr.zhabenko@yahoo.com
--
-- The additional parsing library functions for the PhLADiPreLiO both old and new variants.
-- Is taken from the Phonetic.Languages.Parsing module from the
-- @phonetic-languages-simplified-examples-array@ package to reduce dependencies in general case.
-- 

module Phladiprelio.General.Parsing (
  -- * Predicates
  isClosingCurlyBracket
  , isSlash
  , isOpeningCurlyBracket
  , variations
  -- * Transformations
  , breakGroupOfStrings
  , breakInSlashes
  , combineVariants
  , combineHeadsWithNexts
  , transformToVariations
  -- * Files processment for specifications
  , readLangSpecs 
  , innerProcessmentSimple
  , argsProcessment
) where

import GHC.Base
import GHC.List
import Phladiprelio.General.PrepareText
import System.Environment (getArgs)
import System.IO (FilePath, readFile)
import Data.List (sort,lines,unwords)
import GHC.Arr
import Phladiprelio.General.Base
import Phladiprelio.General.Syllables
import Text.Read (readMaybe,read)
import Data.Maybe (fromMaybe)
import Phladiprelio.General.SpecificationsRead

isClosingCurlyBracket :: String -> Bool
isClosingCurlyBracket :: String -> Bool
isClosingCurlyBracket = (String -> String -> Bool
forall a. Eq a => a -> a -> Bool
== String
"}")
{-# INLINE isClosingCurlyBracket #-}

isSlash :: String -> Bool
isSlash :: String -> Bool
isSlash (Char
x:String
xs)
 | Char
x Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
/= Char
'/' = Bool
False
 | String -> Bool
forall a. [a] -> Bool
null String
xs = Bool
True
 | Bool
otherwise = Bool
False
isSlash String
_ = Bool
False
{-# INLINE isSlash #-}

isOpeningCurlyBracket :: String -> Bool
isOpeningCurlyBracket :: String -> Bool
isOpeningCurlyBracket = (String -> String -> Bool
forall a. Eq a => a -> a -> Bool
== String
"{")
{-# INLINE isOpeningCurlyBracket #-}

breakGroupOfStrings :: [String] -> (([String],[[String]]),[String])
breakGroupOfStrings :: [String] -> (([String], [[String]]), [String])
breakGroupOfStrings ![String]
xss = (([String]
tss,[String] -> [[String]] -> [[String]]
breakInSlashes [String]
uss []), Int -> [String] -> [String]
forall a. Int -> [a] -> [a]
drop Int
1 [String]
zss)
  where (![String]
yss,![String]
zss) = (String -> Bool) -> [String] -> ([String], [String])
forall a. (a -> Bool) -> [a] -> ([a], [a])
break String -> Bool
isClosingCurlyBracket [String]
xss
        (![String]
tss,![String]
uss) = (\([String]
t1,[String]
t2) -> ([String]
t1,Int -> [String] -> [String]
forall a. Int -> [a] -> [a]
drop Int
1 [String]
t2)) (([String], [String]) -> ([String], [String]))
-> ([String] -> ([String], [String]))
-> [String]
-> ([String], [String])
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (String -> Bool) -> [String] -> ([String], [String])
forall a. (a -> Bool) -> [a] -> ([a], [a])
break String -> Bool
isOpeningCurlyBracket ([String] -> ([String], [String]))
-> [String] -> ([String], [String])
forall a b. (a -> b) -> a -> b
$ [String]
yss
{-# INLINE breakGroupOfStrings #-}

breakInSlashes :: [String] -> [[String]] -> [[String]]
breakInSlashes :: [String] -> [[String]] -> [[String]]
breakInSlashes ![String]
wss ![[String]]
usss
 | [String] -> Bool
forall a. [a] -> Bool
null [String]
lss = [String]
kss [String] -> [[String]] -> [[String]]
forall a. a -> [a] -> [a]
: [[String]]
usss
 | Bool
otherwise = [String] -> [[String]] -> [[String]]
breakInSlashes (Int -> [String] -> [String]
forall a. Int -> [a] -> [a]
drop Int
1 [String]
lss) ([String]
kss [String] -> [[String]] -> [[String]]
forall a. a -> [a] -> [a]
: [[String]]
usss)
  where (![String]
kss,![String]
lss) = (String -> Bool) -> [String] -> ([String], [String])
forall a. (a -> Bool) -> [a] -> ([a], [a])
break String -> Bool
isSlash [String]
wss

combineVariants :: ([String],[[String]]) -> [[String]]
combineVariants :: ([String], [[String]]) -> [[String]]
combineVariants (![String]
xss, (![String]
yss:[[String]]
ysss)) = ([String]
xss [String] -> [String] -> [String]
forall a. Monoid a => a -> a -> a
`mappend` [String]
yss) [String] -> [[String]] -> [[String]]
forall a. a -> [a] -> [a]
: ([String], [[String]]) -> [[String]]
combineVariants ([String]
xss, [[String]]
ysss)
combineVariants ([String], [[String]])
_ = []

combineHeadsWithNexts :: [[String]] -> [String] -> [[String]]
combineHeadsWithNexts :: [[String]] -> [String] -> [[String]]
combineHeadsWithNexts ![[String]]
xsss ![String]
yss
 | [String] -> Bool
forall a. [a] -> Bool
null [String]
yss = [[String]]
xsss
 | Bool
otherwise = [[String]] -> [String] -> [[String]]
combineHeadsWithNexts [[String]
xss [String] -> [String] -> [String]
forall a. Monoid a => a -> a -> a
`mappend` [String]
zss | [String]
xss <- [[String]]
xsss, [String]
zss <- [[String]]
zsss] [String]
uss
     where (!([String], [[String]])
t,![String]
uss) = [String] -> (([String], [[String]]), [String])
breakGroupOfStrings [String]
yss
           !zsss :: [[String]]
zsss = ([String], [[String]]) -> [[String]]
combineVariants ([String], [[String]])
t

transformToVariations :: [String] -> [[String]]
transformToVariations :: [String] -> [[String]]
transformToVariations ![String]
yss
 | [String] -> Bool
forall a. [a] -> Bool
null [String]
yss = []
 | Bool
otherwise = [[String]] -> [String] -> [[String]]
combineHeadsWithNexts [[String]]
xsss [String]
tss
  where (!([String], [[String]])
y,![String]
tss) = [String] -> (([String], [[String]]), [String])
breakGroupOfStrings [String]
yss
        !xsss :: [[String]]
xsss = ([String], [[String]]) -> [[String]]
combineVariants ([String], [[String]])
y
{-# INLINE transformToVariations #-}

variations :: [String] -> Bool
variations :: [String] -> Bool
variations [String]
xss 
 | (String -> Bool) -> [String] -> Bool
forall a. (a -> Bool) -> [a] -> Bool
any String -> Bool
isSlash [String]
xss = if (String -> Bool) -> [String] -> Bool
forall a. (a -> Bool) -> [a] -> Bool
any String -> Bool
isOpeningCurlyBracket [String]
xss Bool -> Bool -> Bool
&& (String -> Bool) -> [String] -> Bool
forall a. (a -> Bool) -> [a] -> Bool
any String -> Bool
isClosingCurlyBracket [String]
xss then Bool
True else Bool
False
 | Bool
otherwise = Bool
False
{-# INLINE variations #-}

innerProcessmentSimple
  :: String -- ^ Must be a valid 'GWritingSystemPRPLX' specifications 'String' representation only (see the gwrsysExample.txt file in the @phonetic-languages-phonetics-basics@ package as a schema);
  -> String -- ^ Must be a 'String' with the 5 meaningful lines that are delimited with the \'~\' line one from another with the specifications for the possible allophones (if any), 'CharPhoneticClassification', white spaces information (two 'String's) and the 'String' of all the possible 'PLL' 'Char's;
  -> String -- ^ Must be a 'String' with the 'SegmentRulesG' specifications only;
  -> String -- ^ Must be a 'String' with the 'Concatenations' specifications only (see the data in the EnglishConcatenated.txt file in the @phonetic-languages-phonetics-basics@ package as a list of English equivalents of the needed 'String's). These are to be prepended to the next word.
  -> String -- ^ Must be a 'String' with the 'Concatenations' specifications only (see the data in the EnglishConcatenated.txt file in the @phonetic-languages-phonetics-basics@ package as a list of English equivalents of the needed 'String's). These are to be appended to the previous word.
  -> (GWritingSystemPRPLX, [(Char, Char)], CharPhoneticClassification, SegmentRulesG, String, String, Concatenations, Concatenations, String)
innerProcessmentSimple :: String
-> String
-> String
-> String
-> String
-> (GWritingSystemPRPLX, [(Char, Char)],
    CharPhoneticClassification, SegmentRulesG, String, String,
    [[String]], [[String]], String)
innerProcessmentSimple String
gwrsCnts String
controlConts String
segmentData String
concatenationsFileP String
concatenationsFileA =
 let [[String]
allophonesGs, [String]
charClfs, [String]
jss, [String]
vss, [String]
wss] = Char -> [String] -> [[String]]
groupBetweenChars Char
'~' ([String] -> [[String]])
-> (String -> [String]) -> String -> [[String]]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. String -> [String]
lines (String -> [[String]]) -> String -> [[String]]
forall a b. (a -> b) -> a -> b
$ String
controlConts
     wrs :: GWritingSystemPRPLX
wrs = Char -> String -> GWritingSystemPRPLX
getGWritingSystem Char
'~' String
gwrsCnts
     ks :: [(Char, Char)]
ks = [(Char, Char)] -> [(Char, Char)]
forall a. Ord a => [a] -> [a]
sort ([(Char, Char)] -> [(Char, Char)])
-> (Maybe [(Char, Char)] -> [(Char, Char)])
-> Maybe [(Char, Char)]
-> [(Char, Char)]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [(Char, Char)] -> Maybe [(Char, Char)] -> [(Char, Char)]
forall a. a -> Maybe a -> a
fromMaybe [] (Maybe [(Char, Char)] -> [(Char, Char)])
-> Maybe [(Char, Char)] -> [(Char, Char)]
forall a b. (a -> b) -> a -> b
$ (String -> Maybe [(Char, Char)]
forall a. Read a => String -> Maybe a
readMaybe ([String] -> String
unwords [String]
allophonesGs)::Maybe [(Char, Char)])
     arr :: CharPhoneticClassification
arr = String -> CharPhoneticClassification
forall a. Read a => String -> a
read ([String] -> String
unwords [String]
charClfs)::Array Int PRS -- The 'Array' must be previously sorted in the ascending order.
     gs :: SegmentRulesG
gs = String -> SegmentRulesG
forall a. Read a => String -> a
read String
segmentData::SegmentRulesG
     ysss :: [[String]]
ysss = [[String]] -> [[String]]
sort2Concat ([[String]] -> [[String]])
-> (Maybe [[String]] -> [[String]])
-> Maybe [[String]]
-> [[String]]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [[String]] -> Maybe [[String]] -> [[String]]
forall a. a -> Maybe a -> a
fromMaybe [] (Maybe [[String]] -> [[String]]) -> Maybe [[String]] -> [[String]]
forall a b. (a -> b) -> a -> b
$ (String -> Maybe [[String]]
forall a. Read a => String -> Maybe a
readMaybe String
concatenationsFileP::Maybe [[String]])
     zzzsss :: [[String]]
zzzsss = [[String]] -> [[String]]
sort2Concat ([[String]] -> [[String]])
-> (Maybe [[String]] -> [[String]])
-> Maybe [[String]]
-> [[String]]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [[String]] -> Maybe [[String]] -> [[String]]
forall a. a -> Maybe a -> a
fromMaybe [] (Maybe [[String]] -> [[String]]) -> Maybe [[String]] -> [[String]]
forall a b. (a -> b) -> a -> b
$ (String -> Maybe [[String]]
forall a. Read a => String -> Maybe a
readMaybe String
concatenationsFileA::Maybe [[String]])
     js :: String
js = [String] -> String
forall a. [[a]] -> [a]
concat [String]
jss
     vs :: String
vs = [String] -> String
forall a. [[a]] -> [a]
concat [String]
vss
     ws :: String
ws = String -> String
forall a. Ord a => [a] -> [a]
sort (String -> String) -> ([String] -> String) -> [String] -> String
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [String] -> String
forall a. [[a]] -> [a]
concat ([String] -> String) -> [String] -> String
forall a b. (a -> b) -> a -> b
$ [String]
wss
       in (GWritingSystemPRPLX
wrs, [(Char, Char)]
ks, CharPhoneticClassification
arr, SegmentRulesG
gs, String
js, String
vs, [[String]]
ysss, [[String]]
zzzsss, String
ws)
{-# INLINE innerProcessmentSimple #-}

{-| -}
argsProcessment
 :: FilePath -- ^ With the 'GWritingSystemPRPLX' specifications only (see the gwrsysExample.txt file in the @phonetic-languages-phonetics-basics@ package as a schema);
 -> FilePath -- ^ With the 5 meaningful lines that are delimited with the \'~\' line one from another with the specifications for the possible allophones (if any), 'CharPhoneticClassification', white spaces information (two 'String's) and the 'String' of all the possible 'PLL' 'Char's;
 -> FilePath -- ^ With the 'SegmentRulesG' specifications only;
 -> FilePath -- ^ With the 'Concatenations' specifications only (see the data in the EnglishConcatenated.txt file in the @phonetic-languages-phonetics-basics@ package as a list of English equivalents of the needed 'String's). These are to be prepended to the next word.
 -> FilePath -- ^ With the 'Concatenations' specifications only (see the data in the EnglishConcatenated.txt file in the @phonetic-languages-phonetics-basics@ package as a list of English equivalents of the needed 'String's). These are to be appended to the previous word.
 -> IO [String]
argsProcessment :: String -> String -> String -> String -> String -> IO [String]
argsProcessment String
fileGWrSys String
controlFile String
segmentRulesFile String
concatenationsFileP String
concatenationsFileA = (String -> IO String) -> [String] -> IO [String]
forall (m :: * -> *) a b. Monad m => (a -> m b) -> [a] -> m [b]
mapM String -> IO String
readFile [String
controlFile, String
fileGWrSys, String
segmentRulesFile, String
concatenationsFileP, String
concatenationsFileA]
{-# INLINE argsProcessment #-}

-- | The function that is mostly intended to be used by the end user. Reads the specifications from
-- the5 given files and returns the data that can be used further for generalized PhLADiPreLiO.
readLangSpecs 
 :: FilePath -- ^ With the 'GWritingSystemPRPLX' specifications only (see the gwrsysExample.txt file in the @phonetic-languages-phonetics-basics@ package as a schema);
 -> FilePath -- ^ With the 5 meaningful lines that are delimited with the \'~\' line one from another with the specifications for the possible allophones (if any), 'CharPhoneticClassification', white spaces information (two 'String's) and the 'String' of all the possible 'PLL' 'Char's;
 -> FilePath -- ^ With the 'SegmentRulesG' specifications only;
 -> FilePath -- ^ With the 'Concatenations' specifications only (see the data in the EnglishConcatenated.txt file in the @phonetic-languages-phonetics-basics@ package as a list of English equivalents of the needed 'String's). These are to be prepended to the next word.
 -> FilePath -- ^ With the 'Concatenations' specifications only (see the data in the EnglishConcatenated.txt file in the @phonetic-languages-phonetics-basics@ package as a list of English equivalents of the needed 'String's). These are to be appended to the previous word.
 -> IO (GWritingSystemPRPLX, [(Char, Char)], CharPhoneticClassification, SegmentRulesG, String, String, Concatenations, Concatenations, String)
readLangSpecs :: String
-> String
-> String
-> String
-> String
-> IO
     (GWritingSystemPRPLX, [(Char, Char)], CharPhoneticClassification,
      SegmentRulesG, String, String, [[String]], [[String]], String)
readLangSpecs  String
fileGWrSys String
controlFile String
segmentRulesFile String
concatenationsFileP String
concatenationsFileA = 
 String -> String -> String -> String -> String -> IO [String]
argsProcessment String
fileGWrSys String
controlFile String
segmentRulesFile String
concatenationsFileP String
concatenationsFileA IO [String]
-> ([String]
    -> IO
         (GWritingSystemPRPLX, [(Char, Char)], CharPhoneticClassification,
          SegmentRulesG, String, String, [[String]], [[String]], String))
-> IO
     (GWritingSystemPRPLX, [(Char, Char)], CharPhoneticClassification,
      SegmentRulesG, String, String, [[String]], [[String]], String)
forall a b. IO a -> (a -> IO b) -> IO b
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= \[String]
xss -> let [String
controlConts, String
gwrsCnts, String
segmentData, String
concatenationsFileP1, String
concatenationsFileA1] = [String]
xss in (GWritingSystemPRPLX, [(Char, Char)], CharPhoneticClassification,
 SegmentRulesG, String, String, [[String]], [[String]], String)
-> IO
     (GWritingSystemPRPLX, [(Char, Char)], CharPhoneticClassification,
      SegmentRulesG, String, String, [[String]], [[String]], String)
forall a. a -> IO a
forall (m :: * -> *) a. Monad m => a -> m a
return ((GWritingSystemPRPLX, [(Char, Char)], CharPhoneticClassification,
  SegmentRulesG, String, String, [[String]], [[String]], String)
 -> IO
      (GWritingSystemPRPLX, [(Char, Char)], CharPhoneticClassification,
       SegmentRulesG, String, String, [[String]], [[String]], String))
-> (GWritingSystemPRPLX, [(Char, Char)],
    CharPhoneticClassification, SegmentRulesG, String, String,
    [[String]], [[String]], String)
-> IO
     (GWritingSystemPRPLX, [(Char, Char)], CharPhoneticClassification,
      SegmentRulesG, String, String, [[String]], [[String]], String)
forall a b. (a -> b) -> a -> b
$ String
-> String
-> String
-> String
-> String
-> (GWritingSystemPRPLX, [(Char, Char)],
    CharPhoneticClassification, SegmentRulesG, String, String,
    [[String]], [[String]], String)
innerProcessmentSimple String
gwrsCnts String
controlConts String
segmentData String
concatenationsFileP1 String
concatenationsFileA1 
{-# INLINE readLangSpecs #-}