{-# LANGUAGE CPP #-}
{-# LANGUAGE PatternGuards #-}
module System.FilePath.Glob.Base
( Token(..), Pattern(..)
, CompOptions(..), MatchOptions(..)
, compDefault, compPosix, matchDefault, matchPosix
, decompile
, compile
, compileWith, tryCompileWith
, tokenize
, optimize
, liftP, tokToLower
, isLiteral
) where
import Control.Arrow (first)
import Control.Monad.Trans.Class (lift)
import Control.Monad.Trans.Except (ExceptT, runExceptT, throwE)
import Control.Monad.Trans.Writer.Strict (Writer, runWriter, tell)
import Control.Exception (assert)
import Data.Char (isDigit, isAlpha, toLower)
import Data.List (find, sortBy)
import Data.List.NonEmpty (toList)
import Data.Maybe (fromMaybe)
#if !MIN_VERSION_base(4,8,0)
import Data.Monoid (Monoid, mappend, mempty, mconcat)
#endif
#if MIN_VERSION_base(4,11,0)
import Data.Semigroup (sconcat, stimes)
#else
import Data.Semigroup (Semigroup, (<>), sconcat, stimes)
#endif
import Data.String (IsString(fromString))
import System.FilePath ( pathSeparator, extSeparator
, isExtSeparator, isPathSeparator
)
import System.FilePath.Glob.Utils ( dropLeadingZeroes
, isLeft, fromLeft
, increasingSeq
, addToRange, overlap
)
#if __GLASGOW_HASKELL__
import Text.Read (readPrec, lexP, parens, prec, Lexeme(Ident))
#endif
data Token
= Literal !Char
| ExtSeparator
| PathSeparator
| NonPathSeparator
| CharRange !Bool [Either Char (Char,Char)]
| OpenRange (Maybe String) (Maybe String)
| AnyNonPathSeparator
| AnyDirectory
| LongLiteral !Int String
| Unmatchable
deriving (Eq)
tokToLower :: Token -> Token
tokToLower (Literal c) = Literal (toLower c)
tokToLower (LongLiteral n s) = LongLiteral n (map toLower s)
tokToLower tok = tok
newtype Pattern = Pattern { unPattern :: [Token] } deriving (Eq)
liftP :: ([Token] -> [Token]) -> Pattern -> Pattern
liftP f (Pattern pat) = Pattern (f pat)
instance Show Token where
show (Literal c)
| c `elem` "*?[<" = ['[',c,']']
| otherwise = assert (not $ isPathSeparator c) [c]
show ExtSeparator = [ extSeparator]
show PathSeparator = [pathSeparator]
show NonPathSeparator = "?"
show AnyNonPathSeparator = "*"
show AnyDirectory = "**/"
show (LongLiteral _ s) = concatMap (show . Literal) s
show (OpenRange a b) =
'<' : fromMaybe "" a ++ "-" ++
fromMaybe "" b ++ ">"
show (CharRange b r) =
let f = either (:[]) (\(x,y) -> [x,'-',y])
(caret,exclamation,fs) =
foldr (\c (ca,ex,ss) ->
case c of
Left '^' -> ("^",ex,ss)
Left '!' -> (ca,"!",ss)
_ -> (ca, ex,(f c ++) . ss)
)
("", "", id)
r
(beg,rest) = let s' = fs []
(x,y) = splitAt 1 s'
in if not b && x == "-"
then (y,x)
else (s',"")
in concat [ "["
, if b then "" else "^"
, if b && null beg && not (null caret && null exclamation) then "/" else ""
, beg, caret, exclamation, rest
, "]"
]
show Unmatchable = "[.]"
instance Show Pattern where
showsPrec d p = showParen (d > 10) $
showString "compile " . showsPrec (d+1) (decompile p)
instance Read Pattern where
#if __GLASGOW_HASKELL__
readPrec = parens . prec 10 $ do
Ident "compile" <- lexP
fmap compile readPrec
#else
readsPrec d = readParen (d > 10) $ \r -> do
("compile",string) <- lex r
(xs,rest) <- readsPrec (d+1) string
[(compile xs, rest)]
#endif
instance Semigroup Pattern where
Pattern a <> Pattern b = optimize $ Pattern (a <> b)
sconcat = optimize . Pattern . concatMap unPattern . toList
stimes n (Pattern a) = optimize $ Pattern (stimes n a)
instance Monoid Pattern where
mempty = Pattern []
mappend = (<>)
mconcat = optimize . Pattern . concatMap unPattern
instance IsString Pattern where
fromString = compile
data CompOptions = CompOptions
{ characterClasses :: Bool
, characterRanges :: Bool
, numberRanges :: Bool
, wildcards :: Bool
, recursiveWildcards :: Bool
, pathSepInRanges :: Bool
, errorRecovery :: Bool
} deriving (Show,Read,Eq)
compDefault :: CompOptions
compDefault = CompOptions
{ characterClasses = True
, characterRanges = True
, numberRanges = True
, wildcards = True
, recursiveWildcards = True
, pathSepInRanges = True
, errorRecovery = True
}
compPosix :: CompOptions
compPosix = CompOptions
{ characterClasses = True
, characterRanges = True
, numberRanges = False
, wildcards = True
, recursiveWildcards = False
, pathSepInRanges = False
, errorRecovery = True
}
data MatchOptions = MatchOptions
{ matchDotsImplicitly :: Bool
, ignoreCase :: Bool
, ignoreDotSlash :: Bool
}
matchDefault :: MatchOptions
matchDefault = matchPosix
matchPosix :: MatchOptions
matchPosix = MatchOptions
{ matchDotsImplicitly = False
, ignoreCase = False
, ignoreDotSlash = True
}
decompile :: Pattern -> String
decompile = concatMap show . unPattern
compile :: String -> Pattern
compile = compileWith compDefault
compileWith :: CompOptions -> String -> Pattern
compileWith opts = either error id . tryCompileWith opts
tryCompileWith :: CompOptions -> String -> Either String Pattern
tryCompileWith opts = fmap optimize . tokenize opts
tokenize :: CompOptions -> String -> Either String Pattern
tokenize opts = fmap Pattern . sequence . go
where
err _ c cs | errorRecovery opts = Right (Literal c) : go cs
err s _ _ = [Left s]
go :: String -> [Either String Token]
go [] = []
go ('?':cs) | wcs = Right NonPathSeparator : go cs
go ('*':cs) | wcs =
case cs of
'*':p:xs | rwcs && isPathSeparator p
-> Right AnyDirectory : go xs
_ -> Right AnyNonPathSeparator : go cs
go ('[':cs) | crs = let (range,rest) = charRange opts cs
in case range of
Left s -> err s '[' cs
r -> r : go rest
go ('<':cs) | ors =
let (range, rest) = break (=='>') cs
in if null rest
then err "compile :: unclosed <> in pattern" '<' cs
else case openRange range of
Left s -> err s '<' cs
r -> r : go (tail rest)
go (c:cs)
| isPathSeparator c = Right PathSeparator : go cs
| isExtSeparator c = Right ExtSeparator : go cs
| otherwise = Right (Literal c) : go cs
wcs = wildcards opts
rwcs = recursiveWildcards opts
crs = characterRanges opts
ors = numberRanges opts
openRange :: String -> Either String Token
openRange ['-'] = Right $ OpenRange Nothing Nothing
openRange ('-':s) =
case span isDigit s of
(b,"") -> Right $ OpenRange Nothing (openRangeNum b)
_ -> Left $ "compile :: bad <>, expected number, got " ++ s
openRange s =
case span isDigit s of
(a,"-") -> Right $ OpenRange (openRangeNum a) Nothing
(a,'-':s') ->
case span isDigit s' of
(b,"") -> Right $ OpenRange (openRangeNum a) (openRangeNum b)
_ -> Left $ "compile :: bad <>, expected number, got " ++ s'
_ -> Left $ "compile :: bad <>, expected number followed by - in " ++ s
openRangeNum :: String -> Maybe String
openRangeNum = Just . dropLeadingZeroes
type CharRange = [Either Char (Char,Char)]
charRange :: CompOptions -> String -> (Either String Token, String)
charRange opts zs =
case zs of
y:ys | y `elem` "^!" ->
case ys of
'-':']':xs -> (Right (CharRange False [Left '-']), xs)
'-' :_ -> first (fmap (CharRange True )) (start zs)
xs -> first (fmap (CharRange False)) (start xs)
_ -> first (fmap (CharRange True )) (start zs)
where
start :: String -> (Either String CharRange, String)
start (']':xs) = run $ char ']' xs
start ('-':xs) = run $ char '-' xs
start xs = run $ go xs
run :: ExceptT String (Writer CharRange) String
-> (Either String CharRange, String)
run m = case runWriter.runExceptT $ m of
(Left err, _) -> (Left err, [])
(Right rest, cs) -> (Right cs, rest)
go :: String -> ExceptT String (Writer CharRange) String
go ('[':':':xs) | characterClasses opts = readClass xs
go ( ']':xs) = return xs
go ( c:xs) =
if not (pathSepInRanges opts) && isPathSeparator c
then throwE "compile :: path separator within []"
else char c xs
go [] = throwE "compile :: unclosed [] in pattern"
char :: Char -> String -> ExceptT String (Writer CharRange) String
char c ('-':x:xs) =
if x == ']'
then ltell [Left c, Left '-'] >> return xs
else ltell [Right (c,x)] >> go xs
char c xs = ltell [Left c] >> go xs
readClass :: String -> ExceptT String (Writer CharRange) String
readClass xs = let (name,end) = span isAlpha xs
in case end of
':':']':rest -> charClass name >> go rest
_ -> ltell [Left '[',Left ':'] >> go xs
charClass :: String -> ExceptT String (Writer CharRange) ()
charClass name =
case name of
"alnum" -> ltell [digit,upper,lower]
"alpha" -> ltell [upper,lower]
"blank" -> ltell blanks
"cntrl" -> ltell [Right ('\0','\x1f'), Left '\x7f']
"digit" -> ltell [digit]
"graph" -> ltell [Right ('!','~')]
"lower" -> ltell [lower]
"print" -> ltell [Right (' ','~')]
"punct" -> ltell punct
"space" -> ltell spaces
"upper" -> ltell [upper]
"xdigit" -> ltell [digit, Right ('A','F'), Right ('a','f')]
_ ->
throwE ("compile :: unknown character class '" ++name++ "'")
digit = Right ('0','9')
upper = Right ('A','Z')
lower = Right ('a','z')
punct = map Right [('!','/'), (':','@'), ('[','`'), ('{','~')]
blanks = [Left '\t', Left ' ']
spaces = [Right ('\t','\r'), Left ' ']
ltell = lift . tell
optimize :: Pattern -> Pattern
optimize (Pattern pat) =
Pattern . fin $
case pat of
e : ts | e == ExtSeparator || e == Literal '.' ->
checkUnmatchable (Literal '.' :) (go ts)
_ ->
case go pat of
Literal '.' : _ -> [Unmatchable]
opat -> checkUnmatchable id opat
where
fin [] = []
fin (x:y:xs) | Just x' <- isCharLiteral x, Just y' <- isCharLiteral y =
let (ls,rest) = spanMaybe isCharLiteral xs
in fin $ LongLiteral (length ls + 2)
(foldr (\a -> (a:)) [] (x':y':ls))
: rest
fin (LongLiteral l1 s1 : LongLiteral l2 s2 : xs) =
fin $ LongLiteral (l1+l2) (s1++s2) : xs
fin (LongLiteral l s : Literal c : xs) =
fin $ LongLiteral (l+1) (s++[c]) : xs
fin (LongLiteral 1 s : xs) = Literal (head s) : fin xs
fin (Literal c : LongLiteral l s : xs) =
fin $ LongLiteral (l+1) (c:s) : xs
fin (x:xs) = x : fin xs
go [] = []
go (p@PathSeparator : ExtSeparator : xs) = p : Literal '.' : go xs
go (ExtSeparator : xs) = Literal '.' : go xs
go (p@PathSeparator : x@(CharRange _ _) : xs) =
p : case optimizeCharRange True x of
x'@(CharRange _ _) -> x' : go xs
Literal '.' -> [Unmatchable]
x' -> go (x':xs)
go (x@(CharRange _ _) : xs) =
case optimizeCharRange False x of
x'@(CharRange _ _) -> x' : go xs
x' -> go (x':xs)
go (o@(OpenRange Nothing Nothing) : d : xs) | d == anyDigit =
d : go (o : xs)
go (x:xs) =
case find ((== x) . fst) compressables of
Just (_, f) -> let (compressed,ys) = span (== x) xs
in if null compressed
then x : go ys
else f (length compressed) ++ go (x : ys)
Nothing -> x : go xs
checkUnmatchable f ts = if Unmatchable `elem` ts then [Unmatchable] else f ts
compressables = [ (AnyNonPathSeparator, const [])
, (AnyDirectory, const [])
, (OpenRange Nothing Nothing, \n -> replicate n anyDigit)
]
isCharLiteral (Literal x) = Just x
isCharLiteral _ = Nothing
anyDigit = CharRange True [Right ('0', '9')]
spanMaybe :: (a -> Maybe b) -> [a] -> ([b], [a])
spanMaybe f = go
where
go xs@[] = ([], xs)
go xs@(x : xs') = case f x of
Nothing -> ([], xs)
Just y -> let (ys, zs) = go xs' in (y : ys, zs)
optimizeCharRange :: Bool -> Token -> Token
optimizeCharRange precededBySlash (CharRange b rs) =
fin . stripUnmatchable . go . sortCharRange $ rs
where
fin [Left c] | b = if isPathSeparator c then Unmatchable else Literal c
fin [Right r] | b && r == (minBound,maxBound) = NonPathSeparator
fin x = CharRange b x
stripUnmatchable xs@(_:_:_) | b =
filter (\x -> (not precededBySlash || x /= Left '.') && x /= Left '/') xs
stripUnmatchable xs = xs
go [] = []
go (x@(Left c) : xs) =
case xs of
[] -> [x]
y@(Left d) : ys
| c == d -> go$ Left c : ys
| d == succ c ->
let (ls,rest) = span isLeft xs
(catable,others) = increasingSeq (map fromLeft ls)
range = (c, head catable)
in
if null catable || null (tail catable)
then x : y : go ys
else go$ Right range : map Left others ++ rest
| otherwise -> x : go xs
Right r : ys ->
case addToRange r c of
Just r' -> go$ Right r' : ys
Nothing -> x : go xs
go (x@(Right r) : xs) =
case xs of
[] -> [x]
Left c : ys ->
case addToRange r c of
Just r' -> go$ Right r' : ys
Nothing -> x : go xs
Right r' : ys ->
case overlap r r' of
Just o -> go$ Right o : ys
Nothing -> x : go xs
optimizeCharRange _ _ = error "Glob.optimizeCharRange :: internal error"
sortCharRange :: [Either Char (Char,Char)] -> [Either Char (Char,Char)]
sortCharRange = sortBy cmp
where
cmp (Left a) (Left b) = compare a b
cmp (Left a) (Right (b,_)) = compare a b
cmp (Right (a,_)) (Left b) = compare a b
cmp (Right (a,_)) (Right (b,_)) = compare a b
isLiteral :: Pattern -> Bool
isLiteral = all lit . unPattern
where
lit (Literal _) = True
lit (LongLiteral _ _) = True
lit PathSeparator = True
lit _ = False