-- | The CorePattern module deconstructs the Pattern tree created by
-- ReadRegex.parseRegex and returns a simpler Q/P tree with
-- annotations at each Q node.  This will be converted by the TNFA
-- module into a QNFA finite automata.
--
-- Of particular note, this Pattern to Q/P conversion creates and
-- assigns all the internal Tags that will be used during the matching
-- process, and associates the captures groups with the tags that
-- represent their starting and ending locations and with their
-- immediate parent group.
--
-- Each Maximize and Minimize tag is held as either a preTag or a
-- postTag by one and only one location in the Q/P tree.  The Orbit
-- tags are each held by one and only one Star node.  Tags that stop a
-- Group are also held in perhaps numerous preReset lists.
--
-- The additional nullQ::nullView field of Q records the potentially
-- complex information about what tests and tags must be used if the
-- pattern unQ::P matches 0 zero characters.  There can be redundancy
-- in nullView, which is eliminated by cleanNullView.
--
-- Uses recursive do notation.
--
-- 2009 XXX TODO: we can avoid needing tags in the part of the pattern
-- after the last capturing group (when right-associative).  This is
-- flipped for left-associative where the front of the pattern before
-- the first capturing group needs no tags.  The edge of these regions
-- is subtle: both case needs a Maximize tag.  One ought to be able to
-- check the Pattern: if the root is PConcat then a scan from the end
-- (start) looking for the first with an embedded PGroup can be found
-- and the PGroup free elements can be wrapped in some new PNOTAG
-- semantic indicator.
module Text.Regex.TDFA.CorePattern(Q(..),P(..),WhichTest(..),Wanted(..)
                                  ,TestInfo,OP(..),SetTestInfo(..),NullView
                                  ,patternToQ,cleanNullView,cannotAccept,mustAccept) where

import Control.Monad.RWS {- all -}
import Data.Array.IArray(Array,(!),accumArray,listArray)
import Data.List(sort)
import Data.IntMap.EnumMap2(EnumMap)
import qualified Data.IntMap.EnumMap2 as Map(singleton,null,assocs,keysSet)
--import Data.Maybe(isNothing)
import Data.IntSet.EnumSet2(EnumSet)
import qualified Data.IntSet.EnumSet2 as Set(singleton,toList,isSubsetOf)
import Text.Regex.TDFA.Common {- all -}
import Text.Regex.TDFA.Pattern(Pattern(..),starTrans)
-- import Debug.Trace

{- By Chris Kuklewicz, 2007. BSD License, see the LICENSE file. -}


--err :: String -> a
--err = common_error "Text.Regex.TDFA.CorePattern"

--debug :: (Show a) => a -> b -> b
--debug _ = id

-- Core Pattern Language
data P = Empty                       -- Could be replaced by (Test Nothing)??
       | Or [Q]
       | Seq Q Q
       | Star { getOrbit :: Maybe Tag -- tag to prioritize the need to keep track of length of each pass though q
              , resetOrbits :: [Tag]  -- child star's orbits to reset (ResetOrbitTask) at all depths
              , firstNull :: Bool     -- Usually True to mean the first pass may match 0 characters
              , unStar :: Q}
       | Test TestInfo               -- Require the test to be true (merge with empty as (Test (Maybe TestInfo)) ??)
       | OneChar Pattern             -- Bring the Pattern element that accepts a character
       | NonEmpty Q                  -- Don't let the Q pattern match nothing
         deriving (Show,Eq)

-- The diagnostics about the pattern.  Note that when unQ is 'Seq' the
-- the preTag and postTag are Nothing but the preReset might have tags
-- from PGroup injecting them.
data Q = Q {nullQ :: NullView                  -- Ordered list of nullable views
           ,takes :: (Position,Maybe Position) -- Range of number of accepted characters
           ,preReset :: [Tag]                  -- Tags to "reset" (ResetGroupStopTask) (Only immediate children for efficiency)
           ,postSet :: [Tag]                   -- Tags to "set" (SetGroupStopTask)
           ,preTag,postTag :: Maybe Tag        -- Tags assigned around this pattern (TagTask)
           ,tagged :: Bool                     -- Whether this node should be tagged -- patternToQ use only
           ,childGroups :: Bool                -- Whether unQ has any PGroups -- patternToQ use only
           ,wants :: Wanted                    -- What kind of continuation is used by this pattern
           ,unQ :: P} deriving (Eq)

type TestInfo = (WhichTest,DoPa)

-- This is newtype'd to allow control over class instances
-- This is a set of WhichTest where each test has associated pattern location information
newtype SetTestInfo = SetTestInfo {getTests :: EnumMap WhichTest (EnumSet DoPa)} deriving (Eq)

instance Monoid SetTestInfo where
  mempty = SetTestInfo mempty
  SetTestInfo x `mappend` SetTestInfo y = SetTestInfo (x `mappend` y)

instance Show SetTestInfo where
  show (SetTestInfo sti) = "SetTestInfo "++show (mapSnd (Set.toList) $ Map.assocs sti)

-- There may be several distinct ways for a subtree to conditionally
-- (i.e. with a Test) or unconditionally accept 0 characters.  These
-- are in the list in order of preference, with most preferred listed
-- first.
type NullView = [(SetTestInfo,TagList)]  -- Ordered list of null views, each is a set of tests and tags

-- During the depth first traversal, children are told about tags by the parent.
-- They may change Apply to Advice and they may generate new tags.
data HandleTag = NoTag             -- No tag at this boundary
               | Advice Tag        -- tag at this boundary, applied at higher level in tree
               | Apply Tag         -- tag at this boundary, may be applied at this node or passed to one child
                 deriving (Show)

-- Nodes in the tree are labeled by the type kind of continuation they
-- prefer to be passed when processing.  This makes it possible to
-- create a smaller number of QNFA states and avoid creating wasteful
-- QNFA states that won't be reachable in the final automata.
--
-- In practice WantsBoth is treated identically to WantsQNFA and
-- WantsBoth could be removed.
data Wanted = WantsQNFA | WantsQT | WantsBoth | WantsEither deriving (Eq,Show)

instance Show Q where
  show = showQ

showQ :: Q -> String
showQ q = "Q { nullQ = "++show (nullQ q)++
        "\n  , takes = "++show (takes q)++
        "\n  , preReset = "++show (preReset q)++
        "\n  , postSet = "++show (postSet q)++
        "\n  , preTag = "++show (preTag q)++
        "\n  , postTag = "++show (postTag q)++
        "\n  , tagged = "++show (tagged q)++
        "\n  , wants = "++show (wants q)++
        "\n  , unQ = "++ indent' (unQ q)++" }"
   where indent' = unlines . (\s -> case s of
                                      [] -> []
                                      (h:t) -> h : (map (spaces ++) t)) . lines . show
         spaces = replicate 10 ' '

-- Smart constructors for NullView
notNull :: NullView
notNull = []

-- Shorthand for combining a preTag and a postTag
-- preTags :: Maybe Tag -> Maybe Tag -> TagList
-- preTags a b = promote a `mappend` promote b
--   where promote = maybe [] (\x -> [(x,PreUpdate TagTask)])

promotePreTag :: HandleTag -> TagList
promotePreTag = maybe [] (\x -> [(x,PreUpdate TagTask)]) . apply

makeEmptyNullView :: HandleTag -> HandleTag -> NullView
makeEmptyNullView a b = [(mempty, promotePreTag a ++ promotePreTag b)]

makeTestNullView ::  TestInfo -> HandleTag -> HandleTag -> NullView
makeTestNullView (w,d) a b = [(SetTestInfo (Map.singleton w (Set.singleton d)), promotePreTag a ++ promotePreTag b)]

tagWrapNullView :: HandleTag -> HandleTag -> NullView -> NullView
tagWrapNullView a b oldNV =
  case (promotePreTag a, promotePreTag b) of
    ([],[]) -> oldNV
    (pre,post) -> do
      (oldTests,oldTasks) <- oldNV
      return (oldTests,pre++oldTasks++post)

-- For PGroup, need to prepend reset tasks before others in nullView
addGroupResetsToNullView :: [Tag] -> Tag -> NullView -> NullView
addGroupResetsToNullView groupResets groupSet nv = [ (test, prepend (append tags) ) | (test,tags) <- nv ]
  where prepend = foldr (\h t -> (h:).t) id . map (\tag->(tag,PreUpdate ResetGroupStopTask)) $ groupResets
        append = (++[(groupSet,PreUpdate SetGroupStopTask)])

-- For PStar, need to put in the orbit TagTasks
orbitWrapNullView :: Maybe Tag -> [Tag] -> NullView -> NullView
orbitWrapNullView mOrbit orbitResets oldNV =
  case (mOrbit,orbitResets) of
    (Nothing,[]) -> oldNV
    (Nothing,_) -> do (oldTests,oldTasks) <- oldNV
                      return (oldTests,prepend oldTasks)
    (Just o,_) -> do (oldTests,oldTasks) <- oldNV
                     return (oldTests,prepend $ [(o,PreUpdate EnterOrbitTask)] ++ oldTasks ++ [(o,PreUpdate LeaveOrbitTask)])
  where prepend = foldr (\h t -> (h:).t) id . map (\tag->(tag,PreUpdate ResetOrbitTask)) $ orbitResets

-- The NullViews are ordered, and later test sets that contain the
-- tests from any earlier entry will never be chosen.  This function
-- returns a list with these redundant elements removed.  Note that
-- the first unconditional entry in the list will be the last entry of
-- the returned list since the empty set is a subset of any other set.
cleanNullView :: NullView -> NullView
cleanNullView [] = []
cleanNullView (first@(SetTestInfo sti,_):rest) | Map.null sti = first : []  -- optimization
                                               | otherwise =
  first : cleanNullView (filter (not . (setTI `Set.isSubsetOf`) . Map.keysSet . getTests . fst) rest)
  where setTI = Map.keysSet sti

-- Ordered Sequence of two NullViews: all ordered combinations of tests and tags.
-- Order of <- s1 and <- s2 is deliberately chosen to maintain preference priority
mergeNullViews :: NullView -> NullView -> NullView
mergeNullViews s1 s2 = cleanNullView $ do
  (test1,tag1) <- s1
  (test2,tag2) <- s2
  return (mappend test1 test2,mappend tag1 tag2)
-- mergeNullViews = cleanNullView $ liftM2 (mappend *** mappend)

-- Concatenated two ranges of number of accepted characters
seqTake :: (Int, Maybe Int) -> (Int, Maybe Int) -> (Int, Maybe Int)
seqTake (x1,y1) (x2,y2) = (x1+x2,liftM2 (+) y1 y2)

-- Parallel combination of list of ranges of number of accepted characters
orTakes :: [(Int, Maybe Int)] -> (Int,Maybe Int)
orTakes [] = (0,Just 0)
orTakes ts = let (xs,ys) = unzip ts
             in (minimum xs, foldl1 (liftM2 max) ys)

-- Invariant: apply (toAdvice _ ) == mempty
apply :: HandleTag -> Maybe Tag
apply (Apply tag) = Just tag
apply _ = Nothing
toAdvice :: HandleTag -> HandleTag
toAdvice (Apply tag) = Advice tag
toAdvice s = s
noTag :: HandleTag -> Bool
noTag NoTag = True
noTag _ = False
fromHandleTag :: HandleTag -> Tag
fromHandleTag (Apply tag) = tag
fromHandleTag (Advice tag) = tag
fromHandleTag _ = error "fromHandleTag"

-- Predicates on the range of number of accepted  characters
varies :: Q -> Bool
varies Q {takes = (_,Nothing)} = True
varies Q {takes = (x,Just y)} = x/=y

mustAccept :: Q -> Bool
mustAccept q = (0/=) . fst . takes $ q

canAccept :: Q -> Bool
canAccept q = maybe True (0/=) $ snd . takes $ q

cannotAccept :: Q -> Bool
cannotAccept q = maybe False (0==) $ snd . takes $ q

-- This converts then input Pattern to an analyzed Q structure with
-- the tags assigned.
--
-- The analysis is filled in by a depth first search and the tags are
-- created top down and passed to children.  Thus information flows up
-- from the dfs of the children and simultaneously down in the form of
-- pre and post HandleTag data.  This bidirectional flow is handled
-- declaratively by using the MonadFix (i.e. mdo).
-- 
-- Invariant: A tag should exist in Q in exactly one place (and will
-- be in a preTag,postTag, or getOrbit field).  This is partly because
-- PGroup needs to know the tags are around precisely the expression
-- that it wants to record.  If the same tag were in other branches
-- then this would no longer be true.  The tag may or may not also
-- show up in one or more preReset list or resetOrbits list.
--
-- This invariant is enforced by each node either taking
-- responsibility (apply) for a passed in / created tag or sending it
-- to exactly one child node.  Other child nodes need to receive it
-- via toAdvice.  Leaf nodes are forced to apply any passed tags.
--
-- There is a final "qwin of Q {postTag=ISet.singleton 1}" and an
-- implied initial index tag of 0.
-- 
-- favoring pushing Apply into the child postTag makes PGroup happier

type PM = RWS (Maybe GroupIndex) [Either Tag GroupInfo] ([OP]->[OP],Tag) 
type HHQ = HandleTag  -- m1 : info about left boundaary / preTag
        -> HandleTag  -- m2 : info about right boundary / postTag
        -> PM Q

-- There is no group 0 here, since it is always the whole match and has no parent of its own
makeGroupArray :: GroupIndex -> [GroupInfo] -> Array GroupIndex [GroupInfo]
makeGroupArray maxGroupIndex groups = accumArray (\earlier later -> later:earlier) [] (1,maxGroupIndex) filler
    where filler = map (\gi -> (thisIndex gi,gi)) groups

fromRight :: [Either Tag GroupInfo] -> [GroupInfo]
fromRight [] = []
fromRight ((Right x):xs) = x:fromRight xs
fromRight ((Left _):xs) = fromRight xs

partitionEither :: [Either Tag GroupInfo] -> ([Tag],[GroupInfo])
partitionEither = helper id id where
  helper :: ([Tag]->[Tag]) -> ([GroupInfo]->[GroupInfo]) -> [Either Tag GroupInfo] -> ([Tag],[GroupInfo])
  helper ls rs [] = (ls [],rs [])
  helper ls rs ((Right x):xs) = helper  ls      (rs.(x:)) xs
  helper ls rs ((Left  x):xs) = helper (ls.(x:)) rs       xs

-- Partial function: assumes starTrans has been run on the Pattern
-- Note that the lazy dependency chain for this very zigzag:
--   varies information is sent up the tree
--   handle tags depend on that and sends m1 m2 down the tree
--     makeGroup sends some tags to the writer (Right _)
--     withParent listens to children send group info to writer
--       and lazily looks resetGroupTags from aGroups, the result of all writer (Right _)
--       preReset stores the resetGroupTags result of the lookup in the tree
--     makeOrbit sends some tags to the writer (Left _)
--     withOrbit listens to children send orbit info to writer for resetOrbitTags 
--   nullQ depends m1 m2 and resetOrbitTags and resetGroupTags and is sent up the tree
patternToQ :: CompOption -> (Pattern,(GroupIndex,DoPa)) -> (Q,Array Tag OP,Array GroupIndex [GroupInfo])
patternToQ compOpt (pOrig,(maxGroupIndex,_)) = (tnfa,aTags,aGroups) where
  (tnfa,(tag_dlist,nextTag),groups) = runRWS monad startReader startState
  aTags = listArray (0,pred nextTag) (tag_dlist [])
  aGroups = makeGroupArray maxGroupIndex (fromRight groups)

  -- implicitly inside a PGroup 0 converted into a GroupInfo 0 undefined 0 1
  monad = go (starTrans pOrig) (Advice 0) (Advice 1)
  -- startReader is accessed by getParentIndex and changed by nonCapture and withParent
  startReader :: Maybe GroupIndex
  startReader = Just 0                           -- start inside group 0, capturing enabled
  -- The startState is only acted upon in the "uniq" command
  -- Tag 0 is Minimized and Tag 1 is maximized, next tag has value of 2
  -- This is regardless of right or left associativity
  startState :: ([OP]->[OP],Tag)
  startState = ( (Minimize:) . (Maximize:) , 2)

  -- uniq uses MonadState and always returns an "Apply _" tag
  {-# INLINE uniq #-}
  uniq :: String -> PM HandleTag
  uniq _msg = fmap Apply (uniq' Maximize)
--  uniq _msg = do x <- fmap Apply (uniq' Maximize)
--                trace ('\n':msg ++ " Maximize "++show x) $ return x
--                return x

  ignore :: String -> PM Tag
  ignore _msg = uniq' Ignore
--  ignore _msg = do x <- uniq' Ignore
--                  trace ('\n':msg ++ " Ignore "++show x) $ return x
--                  return x

  {-# NOINLINE uniq' #-}
  uniq' :: OP -> PM Tag
  uniq' newOp = do
    (op,s) <- get                -- generate the next tag with bias newOp
    let op' = op . (newOp:)
        s' = succ s
    put $! (op',s')
    return s

  {-# INLINE makeOrbit #-}
  -- Specialize the monad operations and give more meaningful names
  -- makeOrbit uses MonadState(uniq) and MonadWriter(tell/Left)
  makeOrbit :: PM (Maybe Tag)
  makeOrbit = do x <- uniq' Orbit
--                 trace ('\n':"PStar Orbit "++show x) $ do
                 tell [Left x]
                 return (Just x)

  {-# INLINE withOrbit #-}
  -- withOrbit uses MonadWriter(listens to makeOrbit/Left), collects
  -- children at all depths
  withOrbit :: PM a -> PM (a,[Tag])
  withOrbit = listens childStars
    where childStars x = let (ts,_) = partitionEither x in ts

  {-# INLINE makeGroup #-}
  -- makeGroup usesMonadWriter(tell/Right)
  makeGroup :: GroupInfo -> PM ()
  makeGroup = tell . (:[]) . Right

  {-# INLINE getParentIndex #-}
  -- getParentIndex uses MonadReader(ask)
  getParentIndex :: PM (Maybe GroupIndex)
  getParentIndex = ask

  {-# INLINE nonCapture #-}
  -- nonCapture uses MonadReader(local) to suppress getParentIndex to return Nothing
  nonCapture :: PM  a -> PM a
  nonCapture = local (const Nothing)

  -- withParent uses MonadReader(local) to set getParentIndex to return (Just this)
  -- withParent uses MonadWriter(listens to makeGroup/Right) to return contained group indices (stopTag)
  -- withParent is only safe if getParentIndex has been checked to be not equal to Nothing (see PGroup below)
  -- Note use of laziness: the immediate children's group index is used to look up all copies of the 
  -- group in aGroups, including copies that are not immediate children.
  withParent :: GroupIndex -> PM a -> PM (a,[Tag])
  withParent this = local (const (Just this)) . listens childGroupInfo
    where childGroupInfo x =
            let (_,gs) = partitionEither x
                children :: [GroupIndex]
                children = norep . sort . map thisIndex
                           -- filter to get only immediate children (efficiency)
                           . filter ((this==).parentIndex) $ gs
            in concatMap (map flagTag . (aGroups!)) (this:children)

  -- combineConcat is a partial function: Must not pass in an empty list
  -- Policy choices:
  --  * pass tags to apply to children and have no preTag or postTag here (so none addded to nullQ)
  --  * middle 'mid' tag: give to left/front child as postTag so a Group there might claim it as a stopTag
  --  * if parent is Group then preReset will become non-empty
  combineConcat :: [Pattern] -> HHQ
  combineConcat | rightAssoc compOpt = foldr1 combineSeq . map go
                | otherwise          = foldl1 combineSeq . map go -- libtre default
    where {-# INLINE front'end #-}
          front'end | rightAssoc compOpt = liftM2 (,)
                    | otherwise = flip (liftM2 (flip (,)))
          combineSeq :: HHQ -> HHQ -> HHQ
          combineSeq pFront pEnd = (\ m1 m2 -> mdo
            let bothVary = varies qFront && varies qEnd
            a <- if noTag m1 && bothVary then uniq "combineSeq start" else return m1
            b <- if noTag m2 && bothVary then uniq "combineSeq stop" else return m2
            mid <- case (noTag a,canAccept qFront,noTag b,canAccept qEnd) of
                     (False,False,_,_) -> return (toAdvice a)
                     (_,_,False,False) -> return (toAdvice b)
                     _ -> if tagged qFront || tagged qEnd then uniq "combineSeq mid" else return NoTag
      --      qFront <- pFront a mid
      --      qEnd <- pEnd (toAdvice mid) b
            (qFront,qEnd) <- front'end (pFront a mid) (pEnd (toAdvice mid) b)
            -- XXX: Perhaps a "produces" should be created to compliment "wants",
            -- then "produces qEnd" could be compared to "wants qFront"
            let wanted = if WantsEither == wants qEnd then wants qFront else wants qEnd
            return $ Q { nullQ = mergeNullViews (nullQ qFront) (nullQ qEnd)
                             , takes = seqTake (takes qFront) (takes qEnd)
                             , preReset = [], postSet = [], preTag = Nothing, postTag = Nothing
                             , tagged = bothVary
                             , childGroups = childGroups qFront || childGroups qEnd
                             , wants = wanted
                             , unQ = Seq qFront qEnd }
                                   )
  go :: Pattern -> HHQ
  go pIn m1 m2 =
    let die = error $ "patternToQ cannot handle "++show pIn
        nil = return $ Q {nullQ=makeEmptyNullView m1 m2
                         ,takes=(0,Just 0)
                         ,preReset=[],postSet=[],preTag=apply m1,postTag=apply m2
                         ,tagged=False,childGroups=False,wants=WantsEither
                         ,unQ=Empty}
        one = return $ Q {nullQ=notNull
                         ,takes=(1,Just 1)
                         ,preReset=[],postSet=[],preTag=apply m1,postTag=apply m2
                         ,tagged=False,childGroups=False,wants=WantsQNFA
                         ,unQ = OneChar pIn}
        test myTest = return $ Q {nullQ=makeTestNullView myTest m1 m2
                                 ,takes=(0,Just 0)
                                 ,preReset=[],postSet=[],preTag=apply m1,postTag=apply m2
                                 ,tagged=False,childGroups=False,wants=WantsQT
                                 ,unQ=Test myTest }
        xtra = newSyntax compOpt
    in case pIn of
         PEmpty -> nil
         POr [] -> nil
         POr [branch] -> go branch m1 m2
         POr branches -> mdo
           -- 2009 : The PNonEmpty p as POr [PEmpty,p] takes no branch tracking tag.
           --        I claim this is because only accepting branches need tags,
           --        and the last accepting branch does not need a tag.
           --        Non-accepting possibilities can all commute to the front and
           --        become part of the nullQ.  The accepting bits then need prioritizing.
           --    Does the above require changes in POr handling in TNFA?  Yes.
           --    Have to always use nullQ instead of "recapitulating" it.
           --    Could also create a constant-writing tag instead of many index tags.
           -- Exasperation: This POr recursive mdo is very easy to make loop and lockup the program
           -- if needTags is False then there is no way to disambiguate branches so fewer tags are needed
           let needUniqTags = childGroups ans
           let needTags = varies ans || childGroups ans -- childGroups detects that "abc|a(b)c" needs tags
           a <- if noTag m1 && needTags then uniq "POr start" else return m1 -- whole POr
           b <- if noTag m2 && needTags then uniq "POr stop" else return m2 -- whole POr
           let aAdvice = toAdvice a -- all branches share 'aAdvice'
               bAdvice = toAdvice b -- last branch gets 'bAdvice', others may get own tag
               -- Due to the recursive-do, it seems that I have to put the if needTags into the op'
               newUniq = if needUniqTags then uniq "POr branch" else return bAdvice
--           trace ("\nPOr sub "++show aAdvice++" "++show bAdvice++"needsTags is "++show needTags) $ return ()
           -- The "bs" values are allocated in left-to-right order before the children in "qs"
           -- optimiztion: low priority for last branch is implicit, do not create separate tag here.
           bs <- fmap (++[bAdvice]) $ replicateM (pred $ length branches) newUniq -- 2 <= length ps
           -- create all the child branches in left-to-right order after the "bs"
           qs <- forM (zip branches bs) (\(branch,bTag) ->  (go branch aAdvice bTag))
           let wqs = map wants qs
               wanted = if any (WantsBoth==) wqs then WantsBoth
                          else case (any (WantsQNFA==) wqs,any (WantsQT==) wqs) of
                                 (True,True) -> WantsBoth
                                 (True,False) -> WantsQNFA
                                 (False,True) -> WantsQT
                                 (False,False) -> WantsEither
               nullView = cleanNullView . tagWrapNullView a b . concatMap nullQ $ qs
               -- The nullView computed above takes the nullQ of the branches and combines them.  This
               -- assumes that the pre/post tags of the children are also part of the nullQ values.  So
               -- for consistency, POr must then add its own pre/post tags to its nullQ value.  Note that
               -- concatMap sets the left-to-right preference when choosing the null views.
           let ans = Q { nullQ = nullView
                       , takes = orTakes . map takes $ qs
                       , preReset = [], postSet = []
                       , preTag = apply a, postTag = apply b
                       , tagged = needTags
                       , childGroups = any childGroups qs
                       , wants = wanted
                       , unQ = Or qs }
           return ans
         PConcat [] -> nil -- fatal to pass [] to combineConcat
         PConcat ps -> combineConcat ps m1 m2
         PStar mayFirstBeNull p -> mdo
           let accepts    = canAccept q
               -- if needsOrbit is False then there is no need to disambiguate captures on each orbit
               -- Both checks are useful because (varies q) of True does not imply (childGroups q) of True when under PNonCapture
               needsOrbit = varies q && childGroups q
               -- if needsOrbit then must check start/stop before the Orbit tag
               -- if accepts then must check start/stop of whole pattern
               needsTags  = needsOrbit || accepts       -- important that needsOrbit implies needsTags
           a <- if noTag m1 && needsTags then uniq "PStar start" else return m1
           b <- if noTag m2 && needsTags then uniq "PStar stop" else return m2
           mOrbit <- if needsOrbit then makeOrbit else return Nothing -- any Orbit tag is created after the pre and post tags
--           test1 <- if tagged q then uniq "not-TEST1" Minimize else return NoTag
-- XXX XXX 1.1.5 testing second NoTag replaced with (toAdvice b)
           (q,resetOrbitTags) <- withOrbit (go p NoTag (toAdvice b)) -- all contained orbit tags get listened to (not including this one).
           let nullView | mayFirstBeNull = cleanNullView $ childViews ++ skipView
                        | otherwise = skipView
                 where childViews = tagWrapNullView a b . orbitWrapNullView mOrbit resetOrbitTags $ nullQ q
                       skipView = makeEmptyNullView a b
           return $ Q { nullQ = nullView
                      , takes = (0,if accepts then Nothing else (Just 0))
                      , preReset = [], postSet = []
                      , preTag = apply a, postTag = apply b
                      , tagged = needsTags
                      , childGroups = childGroups q
                      , wants = WantsQT
                      , unQ =Star { getOrbit = mOrbit
                                  , resetOrbits = resetOrbitTags
                                  , firstNull = mayFirstBeNull
                                  , unStar = q } }
         PCarat dopa -> test (Test_BOL,dopa)
         PDollar dopa -> test (Test_EOL,dopa)
         PChar {} -> one
         PDot {} -> one
         PAny {} -> one
         PAnyNot {} -> one
         -- CompOption's newSyntax enables these escaped anchors
         PEscape dopa '`'  | xtra -> test (Test_BOB,dopa)
         PEscape dopa '\'' | xtra -> test (Test_EOB,dopa)
         PEscape dopa '<'  | xtra -> test (Test_BOW,dopa)
         PEscape dopa '>'  | xtra -> test (Test_EOW,dopa)
         PEscape dopa 'b'  | xtra -> test (Test_EdgeWord,dopa)
         PEscape dopa 'B'  | xtra -> test (Test_NotEdgeWord,dopa)
         -- otherwise escape codes are just the escaped character
         PEscape {} -> one

         -- A PGroup node in the Pattern tree does not become a node
         -- in the Q/P tree. A PGroup can share and pass along a
         -- preTag (with Advice) with other branches, but will pass
         -- down an Apply postTag.
         --
         -- If the parent index is Nothing then this is part of a
         -- non-capturing subtree and ignored.  This is a lazy and
         -- efficient alternative to rebuidling the tree with PGroup
         -- Nothing replacing PGroup (Just _).
         --
         -- Guarded by the getParentIndex /= Nothing check is the
         -- withParent command.
         --
         PGroup Nothing p -> go p m1 m2
         PGroup (Just this) p -> do
           mParent <- getParentIndex
           case mParent of
             Nothing -> go p m1 m2 -- just like PGroup Nothing p
             Just parent -> do
               -- 'a' may be Advice or Apply from parent or Apply created here
               a <- if noTag m1 then uniq "PGroup start" else return m1
               b <- if noTag m2 then uniq "PGroup stop" else return m2
               flag <- ignore "PGroup ignore"
{-
               -- 'b' may be Apply from parent or Apply created here
               b <- if isNothing (apply m2) then uniq "PGroup" else return m2
-}
               (q,resetGroupTags) <- withParent this (go p a b)  -- all immediate child groups stop tags get listened to.
               -- 2009: makeGroup performs a tell, why after withParent? I am no longer sure.
               makeGroup (GroupInfo this parent (fromHandleTag a) (fromHandleTag b) flag)
               return $ q { nullQ = addGroupResetsToNullView resetGroupTags flag (nullQ q)
                          , tagged = True
                          , childGroups = True
                          , preReset = resetGroupTags `mappend` (preReset q)
                          , postSet = (postSet q) `mappend` [flag]
                          }

         -- A PNonCapture node in the Pattern tree does not become a
         -- node in the Q/P tree.  It sets the parent to Nothing while
         -- processing the sub-tree.
         PNonCapture p -> nonCapture (go p m1 m2)

         -- these are here for completeness of the case branches, currently starTrans replaces them all
         PPlus {} -> die
         PQuest {} -> die
         PBound {} -> die
         -- PNonEmpty is deprecated, and not produced in Pattern by starTrans anymore
         PNonEmpty {} -> die

{-
Similar to change in WinTags for QT/QNFA:
Change the NullView to use a tasktags instead of wintags since they are all PreUpdate

         -- PNonEmpty means the child pattern p can be skipped by
         -- bypassing the pattern.  This is only used in the case p
         -- can accept 0 and can accept more than zero characters
         -- (thus the assertions, enforcted by CorePattern.starTrans).
         -- The important thing about this case is intercept the
         -- "accept 0" possibility and replace with "skip".
         PNonEmpty p -> mdo
           let needsTags = canAccept q
           a <- if noTag m1 && needsTags then uniq Minimize else return m1
           b <- if noTag m2 && needsTags then uniq Maximize else return m2
           q <- go p (toAdvice a) (toAdvice b)
           when (not needsTags) (err $ "PNonEmpty could not accept characters: "++show (p,pOrig))
           when (mustAccept q) (err $ "patternToQ : PNonEmpty provided with a *mustAccept* pattern: "++show (p,pOrig))
           return $ Q { nullQ = emptyNull (preTags (apply a) (apply b)) -- The meaning of NonEmpty
                      , takes = (0,snd (takes q))                       -- like Or, drop lower bound to 0
                      , preReset = []
                      , preTag = apply a, postTag = apply b             -- own the closing tag so it will not end a PGroup
                      , tagged = needsTags
                      , childGroups = childGroups q
                      , wants = wants q  -- the test case is "x" =~ "(.|$){1,3}"
                      , unQ = NonEmpty q }

-}
{-
emptyNull :: TagList -> NullView
emptyNull tags = (mempty, tags) : []

testNull :: TestInfo -> TagList -> NullView
testNull (w,d) tags = (SetTestInfo (Map.singleton w (Set.singleton d)), tags) : []

-- Prepend tags to nullView
addTagsToNullView :: TagList -> NullView -> NullView
addTagsToNullView [] oldNV = oldNV
addTagsToNullView tags oldNV= do
  (oldTest,oldTags) <- oldNV
  return (oldTest,tags `mappend` oldTags)

-}


-- xxx todo
-- 
-- see of PNonEmpty -> NonEmpty -> TNFA is really smarter than POr about tags