module Bio.Bam.Evan
( fixupFlagAbuse
, fixupBwaFlags
, removeWarts
) where
import Bio.Bam.Header
import Bio.Bam.Rec
import Bio.Prelude
import qualified Data.ByteString.Char8 as S
fixupFlagAbuse :: BamRec -> BamRec
fixupFlagAbuse b =
(if b_flag b .&. flag_low_quality /= 0 then setQualFlag 'Q' else id) $
(if b_flag b .&. flag_low_complexity /= 0 then setQualFlag 'C' else id) $
b { b_flag = cleaned_flags, b_exts = cleaned_exts }
where
flags' = b_flag b .&. complement (flag_low_quality .|. flag_low_complexity)
cleaned_flags | flags' .&. flagPaired == 0 = flags' .&. complement (flagFirstMate .|. flagSecondMate)
| otherwise = flags'
flag_low_quality = 0x800
flag_low_complexity = 0x1000
is_merged = flags' .&. (flagPaired .|. flagFirstMate .|. flagSecondMate) == flagFirstMate .|. flagSecondMate
is_trimmed = flags' .&. (flagPaired .|. flagFirstMate .|. flagSecondMate) == flagSecondMate
newflags = (if is_merged then eflagMerged else 0) .|. (if is_trimmed then eflagTrimmed else 0)
cleaned_exts = case (lookup "FF" (b_exts b), lookup "XF" (b_exts b)) of
( Just (Int i), _ ) -> updateE "FF" (Int (i .|. newflags)) (b_exts b)
( _, Just (Int i) ) -> updateE "FF" (Int (i .|. newflags)) $ deleteE "XF" (b_exts b)
_ | newflags /= 0 -> updateE "FF" (Int newflags ) (b_exts b)
| otherwise -> b_exts b
fixupBwaFlags :: BamRec -> BamRec
fixupBwaFlags b = b { b_flag = fixPP $ b_flag b .|. if mu then flagMateUnmapped else 0 }
where
mu = and [ isPaired b, not (isUnmapped b)
, isReversed b == isMateReversed b
, b_rname b == b_mrnm b, b_pos b == b_mpos b ]
fixPP f | f .&. (flagUnmapped .|. flagMateUnmapped) == 0 = f
| otherwise = f .&. complement flagProperlyPaired
removeWarts :: BamRec -> BamRec
removeWarts br = br { b_qname = name, b_flag = flags, b_exts = tags }
where
(name, flags, tags) = checkFR $ checkC $ checkSharp (b_qname br, b_flag br, b_exts br)
checkFR (n,f,t) | "F_" `S.isPrefixOf` n = checkC (S.drop 2 n, f .|. flagFirstMate .|. flagPaired, t)
| "R_" `S.isPrefixOf` n = checkC (S.drop 2 n, f .|. flagSecondMate .|. flagPaired, t)
| "M_" `S.isPrefixOf` n = checkC (S.drop 2 n, f, insertE "FF" (Int eflagMerged) t)
| "T_" `S.isPrefixOf` n = checkC (S.drop 2 n, f, insertE "FF" (Int eflagTrimmed) t)
| "/1" `S.isSuffixOf` n = ( rdrop 2 n, f .|. flagFirstMate .|. flagPaired, t)
| "/2" `S.isSuffixOf` n = ( rdrop 2 n, f .|. flagSecondMate .|. flagPaired, t)
| otherwise = ( n, f, t)
checkC (n,f,t) | "C_" `S.isPrefixOf` n = (S.drop 2 n, f, insertE "XP" (Int (-1)) t)
| otherwise = ( n, f, t)
rdrop n s = S.take (S.length s - n) s
checkSharp (n,f,t) = case S.split '#' n of [n',ts] -> (n', f, insertTags ts t)
_ -> ( n, f, t)
insertTags ts t | S.null y = insertE "XI" (Text ts) t
| otherwise = insertE "XI" (Text x) $ insertE "XJ" (Text $ S.tail y) t
where (x,y) = S.break (== ',') ts