-- | Types for RNA secondary structure. Types vary from the simplest array
-- (D1Secondary) to rather complex ones.

module Biobase.Secondary.Diagrams where

import           Control.Applicative
import           Control.Arrow
import           Control.Lens
import           Data.Aeson
import           Data.Binary
import           Data.List ((\\))
import           Data.List (sort,groupBy,sortBy,intersperse)
import           Data.List.Split (splitOn)
import           Data.Serialize
import           Data.Tuple.Select (sel1,sel2)
import           Data.Tuple (swap)
import           Data.Vector.Binary
import           Data.Vector.Serialize
import           GHC.Generics
import qualified Data.Vector.Unboxed as VU
import           Text.Printf
import           Control.DeepSeq

import           Biobase.Primary.Nuc
import           Biobase.Secondary.Basepair



-- | RNA secondary structure with 1-diagrams. Each nucleotide is paired with at
-- most one other nucleotide. A nucleotide with index @k@ in @[0..len-1]@ is
-- paired if @unD1S VU.! k >=0 0@ Unpaired status is @-1@.
--
-- TODO Provide @iso@ between @D1Secondary@ and @RNAss@.

newtype D1Secondary = D1S {D1Secondary -> Vector Int
unD1S :: VU.Vector Int}
  deriving (ReadPrec [D1Secondary]
ReadPrec D1Secondary
Int -> ReadS D1Secondary
ReadS [D1Secondary]
(Int -> ReadS D1Secondary)
-> ReadS [D1Secondary]
-> ReadPrec D1Secondary
-> ReadPrec [D1Secondary]
-> Read D1Secondary
forall a.
(Int -> ReadS a)
-> ReadS [a] -> ReadPrec a -> ReadPrec [a] -> Read a
readListPrec :: ReadPrec [D1Secondary]
$creadListPrec :: ReadPrec [D1Secondary]
readPrec :: ReadPrec D1Secondary
$creadPrec :: ReadPrec D1Secondary
readList :: ReadS [D1Secondary]
$creadList :: ReadS [D1Secondary]
readsPrec :: Int -> ReadS D1Secondary
$creadsPrec :: Int -> ReadS D1Secondary
Read,Int -> D1Secondary -> ShowS
[D1Secondary] -> ShowS
D1Secondary -> String
(Int -> D1Secondary -> ShowS)
-> (D1Secondary -> String)
-> ([D1Secondary] -> ShowS)
-> Show D1Secondary
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [D1Secondary] -> ShowS
$cshowList :: [D1Secondary] -> ShowS
show :: D1Secondary -> String
$cshow :: D1Secondary -> String
showsPrec :: Int -> D1Secondary -> ShowS
$cshowsPrec :: Int -> D1Secondary -> ShowS
Show,D1Secondary -> D1Secondary -> Bool
(D1Secondary -> D1Secondary -> Bool)
-> (D1Secondary -> D1Secondary -> Bool) -> Eq D1Secondary
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: D1Secondary -> D1Secondary -> Bool
$c/= :: D1Secondary -> D1Secondary -> Bool
== :: D1Secondary -> D1Secondary -> Bool
$c== :: D1Secondary -> D1Secondary -> Bool
Eq,(forall x. D1Secondary -> Rep D1Secondary x)
-> (forall x. Rep D1Secondary x -> D1Secondary)
-> Generic D1Secondary
forall x. Rep D1Secondary x -> D1Secondary
forall x. D1Secondary -> Rep D1Secondary x
forall a.
(forall x. a -> Rep a x) -> (forall x. Rep a x -> a) -> Generic a
$cto :: forall x. Rep D1Secondary x -> D1Secondary
$cfrom :: forall x. D1Secondary -> Rep D1Secondary x
Generic,D1Secondary -> ()
(D1Secondary -> ()) -> NFData D1Secondary
forall a. (a -> ()) -> NFData a
rnf :: D1Secondary -> ()
$crnf :: D1Secondary -> ()
NFData)

instance Binary    D1Secondary
instance Serialize D1Secondary
instance FromJSON  D1Secondary
instance ToJSON    D1Secondary

-- RNA secondary structure with 2-diagrams. Each nucleotide is paired with up
-- to two other nucleotides.

newtype D2Secondary = D2S {D2Secondary
-> Vector ((Int, Edge, CTisomerism), (Int, Edge, CTisomerism))
unD2S :: VU.Vector ( (Int,Edge,CTisomerism), (Int,Edge,CTisomerism) )}
  deriving (ReadPrec [D2Secondary]
ReadPrec D2Secondary
Int -> ReadS D2Secondary
ReadS [D2Secondary]
(Int -> ReadS D2Secondary)
-> ReadS [D2Secondary]
-> ReadPrec D2Secondary
-> ReadPrec [D2Secondary]
-> Read D2Secondary
forall a.
(Int -> ReadS a)
-> ReadS [a] -> ReadPrec a -> ReadPrec [a] -> Read a
readListPrec :: ReadPrec [D2Secondary]
$creadListPrec :: ReadPrec [D2Secondary]
readPrec :: ReadPrec D2Secondary
$creadPrec :: ReadPrec D2Secondary
readList :: ReadS [D2Secondary]
$creadList :: ReadS [D2Secondary]
readsPrec :: Int -> ReadS D2Secondary
$creadsPrec :: Int -> ReadS D2Secondary
Read,Int -> D2Secondary -> ShowS
[D2Secondary] -> ShowS
D2Secondary -> String
(Int -> D2Secondary -> ShowS)
-> (D2Secondary -> String)
-> ([D2Secondary] -> ShowS)
-> Show D2Secondary
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [D2Secondary] -> ShowS
$cshowList :: [D2Secondary] -> ShowS
show :: D2Secondary -> String
$cshow :: D2Secondary -> String
showsPrec :: Int -> D2Secondary -> ShowS
$cshowsPrec :: Int -> D2Secondary -> ShowS
Show,D2Secondary -> D2Secondary -> Bool
(D2Secondary -> D2Secondary -> Bool)
-> (D2Secondary -> D2Secondary -> Bool) -> Eq D2Secondary
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: D2Secondary -> D2Secondary -> Bool
$c/= :: D2Secondary -> D2Secondary -> Bool
== :: D2Secondary -> D2Secondary -> Bool
$c== :: D2Secondary -> D2Secondary -> Bool
Eq,(forall x. D2Secondary -> Rep D2Secondary x)
-> (forall x. Rep D2Secondary x -> D2Secondary)
-> Generic D2Secondary
forall x. Rep D2Secondary x -> D2Secondary
forall x. D2Secondary -> Rep D2Secondary x
forall a.
(forall x. a -> Rep a x) -> (forall x. Rep a x -> a) -> Generic a
$cto :: forall x. Rep D2Secondary x -> D2Secondary
$cfrom :: forall x. D2Secondary -> Rep D2Secondary x
Generic)

instance Binary    D2Secondary
instance Serialize D2Secondary
instance FromJSON  D2Secondary
instance ToJSON    D2Secondary

-- | Conversion to and from 1-diagrams.

class MkD1Secondary a where
  mkD1S :: a -> D1Secondary
  fromD1S :: D1Secondary -> a

-- | Conversion to and from 2-diagrams.

class MkD2Secondary a where
  mkD2S :: a -> D2Secondary
  fromD2S :: D2Secondary -> a



-- * Tree-based representation
--
-- TODO Tree -> d1/2Secondary ?

-- | A secondary-structure tree. Has no notion of pseudoknots.

data SSTree idx a = SSTree   idx a [SSTree idx a]
                  | SSExtern Int a [SSTree idx a]
  deriving (ReadPrec [SSTree idx a]
ReadPrec (SSTree idx a)
Int -> ReadS (SSTree idx a)
ReadS [SSTree idx a]
(Int -> ReadS (SSTree idx a))
-> ReadS [SSTree idx a]
-> ReadPrec (SSTree idx a)
-> ReadPrec [SSTree idx a]
-> Read (SSTree idx a)
forall a.
(Int -> ReadS a)
-> ReadS [a] -> ReadPrec a -> ReadPrec [a] -> Read a
forall idx a. (Read idx, Read a) => ReadPrec [SSTree idx a]
forall idx a. (Read idx, Read a) => ReadPrec (SSTree idx a)
forall idx a. (Read idx, Read a) => Int -> ReadS (SSTree idx a)
forall idx a. (Read idx, Read a) => ReadS [SSTree idx a]
readListPrec :: ReadPrec [SSTree idx a]
$creadListPrec :: forall idx a. (Read idx, Read a) => ReadPrec [SSTree idx a]
readPrec :: ReadPrec (SSTree idx a)
$creadPrec :: forall idx a. (Read idx, Read a) => ReadPrec (SSTree idx a)
readList :: ReadS [SSTree idx a]
$creadList :: forall idx a. (Read idx, Read a) => ReadS [SSTree idx a]
readsPrec :: Int -> ReadS (SSTree idx a)
$creadsPrec :: forall idx a. (Read idx, Read a) => Int -> ReadS (SSTree idx a)
Read,Int -> SSTree idx a -> ShowS
[SSTree idx a] -> ShowS
SSTree idx a -> String
(Int -> SSTree idx a -> ShowS)
-> (SSTree idx a -> String)
-> ([SSTree idx a] -> ShowS)
-> Show (SSTree idx a)
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
forall idx a. (Show idx, Show a) => Int -> SSTree idx a -> ShowS
forall idx a. (Show idx, Show a) => [SSTree idx a] -> ShowS
forall idx a. (Show idx, Show a) => SSTree idx a -> String
showList :: [SSTree idx a] -> ShowS
$cshowList :: forall idx a. (Show idx, Show a) => [SSTree idx a] -> ShowS
show :: SSTree idx a -> String
$cshow :: forall idx a. (Show idx, Show a) => SSTree idx a -> String
showsPrec :: Int -> SSTree idx a -> ShowS
$cshowsPrec :: forall idx a. (Show idx, Show a) => Int -> SSTree idx a -> ShowS
Show,SSTree idx a -> SSTree idx a -> Bool
(SSTree idx a -> SSTree idx a -> Bool)
-> (SSTree idx a -> SSTree idx a -> Bool) -> Eq (SSTree idx a)
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
forall idx a.
(Eq idx, Eq a) =>
SSTree idx a -> SSTree idx a -> Bool
/= :: SSTree idx a -> SSTree idx a -> Bool
$c/= :: forall idx a.
(Eq idx, Eq a) =>
SSTree idx a -> SSTree idx a -> Bool
== :: SSTree idx a -> SSTree idx a -> Bool
$c== :: forall idx a.
(Eq idx, Eq a) =>
SSTree idx a -> SSTree idx a -> Bool
Eq,(forall x. SSTree idx a -> Rep (SSTree idx a) x)
-> (forall x. Rep (SSTree idx a) x -> SSTree idx a)
-> Generic (SSTree idx a)
forall x. Rep (SSTree idx a) x -> SSTree idx a
forall x. SSTree idx a -> Rep (SSTree idx a) x
forall a.
(forall x. a -> Rep a x) -> (forall x. Rep a x -> a) -> Generic a
forall idx a x. Rep (SSTree idx a) x -> SSTree idx a
forall idx a x. SSTree idx a -> Rep (SSTree idx a) x
$cto :: forall idx a x. Rep (SSTree idx a) x -> SSTree idx a
$cfrom :: forall idx a x. SSTree idx a -> Rep (SSTree idx a) x
Generic)

-- | Create a tree from (pseudoknot-free [not checked]) 1-diagrams.

d1sTree :: D1Secondary -> SSTree PairIdx ()
d1sTree :: D1Secondary -> SSTree PairIdx ()
d1sTree D1Secondary
s = [PairIdx] -> SSTree PairIdx ()
ext ([PairIdx] -> SSTree PairIdx ()) -> [PairIdx] -> SSTree PairIdx ()
forall a b. (a -> b) -> a -> b
$ [PairIdx] -> [PairIdx]
forall a. Ord a => [a] -> [a]
sort [PairIdx]
ps where
  (Int
len,[PairIdx]
ps) = D1Secondary -> (Int, [PairIdx])
forall a. MkD1Secondary a => D1Secondary -> a
fromD1S D1Secondary
s
  ext :: [PairIdx] -> SSTree PairIdx ()
ext [] = Int -> () -> [SSTree PairIdx ()] -> SSTree PairIdx ()
forall idx a. Int -> a -> [SSTree idx a] -> SSTree idx a
SSExtern Int
len () []
  ext [PairIdx]
xs = Int -> () -> [SSTree PairIdx ()] -> SSTree PairIdx ()
forall idx a. Int -> a -> [SSTree idx a] -> SSTree idx a
SSExtern Int
len () ([SSTree PairIdx ()] -> SSTree PairIdx ())
-> ([[PairIdx]] -> [SSTree PairIdx ()])
-> [[PairIdx]]
-> SSTree PairIdx ()
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ([PairIdx] -> SSTree PairIdx ())
-> [[PairIdx]] -> [SSTree PairIdx ()]
forall a b. (a -> b) -> [a] -> [b]
map [PairIdx] -> SSTree PairIdx ()
forall b. Ord b => [(b, b)] -> SSTree (b, b) ()
tree ([[PairIdx]] -> SSTree PairIdx ())
-> [[PairIdx]] -> SSTree PairIdx ()
forall a b. (a -> b) -> a -> b
$ (PairIdx -> PairIdx -> Bool) -> [PairIdx] -> [[PairIdx]]
forall a. (a -> a -> Bool) -> [a] -> [[a]]
groupBy (\PairIdx
l PairIdx
r -> PairIdx -> Int
forall a b. (a, b) -> b
snd PairIdx
l Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
> PairIdx -> Int
forall a b. (a, b) -> a
fst PairIdx
r) [PairIdx]
xs -- ">=" would be partial allowance for 2-diagrams
  tree :: [(b, b)] -> SSTree (b, b) ()
tree [(b, b)
ij]    = (b, b) -> () -> [SSTree (b, b) ()] -> SSTree (b, b) ()
forall idx a. idx -> a -> [SSTree idx a] -> SSTree idx a
SSTree (b, b)
ij () []
  tree ((b, b)
ij:[(b, b)]
xs) = (b, b) -> () -> [SSTree (b, b) ()] -> SSTree (b, b) ()
forall idx a. idx -> a -> [SSTree idx a] -> SSTree idx a
SSTree (b, b)
ij () ([SSTree (b, b) ()] -> SSTree (b, b) ())
-> ([[(b, b)]] -> [SSTree (b, b) ()])
-> [[(b, b)]]
-> SSTree (b, b) ()
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ([(b, b)] -> SSTree (b, b) ()) -> [[(b, b)]] -> [SSTree (b, b) ()]
forall a b. (a -> b) -> [a] -> [b]
map [(b, b)] -> SSTree (b, b) ()
tree ([[(b, b)]] -> SSTree (b, b) ()) -> [[(b, b)]] -> SSTree (b, b) ()
forall a b. (a -> b) -> a -> b
$ ((b, b) -> (b, b) -> Bool) -> [(b, b)] -> [[(b, b)]]
forall a. (a -> a -> Bool) -> [a] -> [[a]]
groupBy (\(b, b)
l (b, b)
r -> (b, b) -> b
forall a b. (a, b) -> b
snd (b, b)
l b -> b -> Bool
forall a. Ord a => a -> a -> Bool
> (b, b) -> b
forall a b. (a, b) -> a
fst (b, b)
r) [(b, b)]
xs

-- | Create a tree from (pseudoknot-free [not checked]) 2-diagrams.

d2sTree :: D2Secondary -> SSTree ExtPairIdx ()
d2sTree :: D2Secondary -> SSTree ExtPairIdx ()
d2sTree D2Secondary
s = [ExtPairIdx] -> SSTree ExtPairIdx ()
ext ([ExtPairIdx] -> SSTree ExtPairIdx ())
-> [ExtPairIdx] -> SSTree ExtPairIdx ()
forall a b. (a -> b) -> a -> b
$ (ExtPairIdx -> ExtPairIdx -> Ordering)
-> [ExtPairIdx] -> [ExtPairIdx]
forall a. (a -> a -> Ordering) -> [a] -> [a]
sortBy ExtPairIdx -> ExtPairIdx -> Ordering
forall a b b b.
(Ord b, Ord a) =>
((a, b), b) -> ((a, b), b) -> Ordering
d2Compare [ExtPairIdx]
ps where
  (Int
len,[ExtPairIdx]
ps) = D2Secondary -> (Int, [ExtPairIdx])
forall a. MkD2Secondary a => D2Secondary -> a
fromD2S D2Secondary
s
  ext :: [ExtPairIdx] -> SSTree ExtPairIdx ()
ext [] = Int -> () -> [SSTree ExtPairIdx ()] -> SSTree ExtPairIdx ()
forall idx a. Int -> a -> [SSTree idx a] -> SSTree idx a
SSExtern Int
len () []
  ext [ExtPairIdx]
xs = Int -> () -> [SSTree ExtPairIdx ()] -> SSTree ExtPairIdx ()
forall idx a. Int -> a -> [SSTree idx a] -> SSTree idx a
SSExtern Int
len () ([SSTree ExtPairIdx ()] -> SSTree ExtPairIdx ())
-> ([ExtPairIdx] -> [SSTree ExtPairIdx ()])
-> [ExtPairIdx]
-> SSTree ExtPairIdx ()
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ([ExtPairIdx] -> SSTree ExtPairIdx ())
-> [[ExtPairIdx]] -> [SSTree ExtPairIdx ()]
forall a b. (a -> b) -> [a] -> [b]
map [ExtPairIdx] -> SSTree ExtPairIdx ()
forall a a b.
(Ord a, Ord a) =>
[((a, a), b)] -> SSTree ((a, a), b) ()
tree ([[ExtPairIdx]] -> [SSTree ExtPairIdx ()])
-> ([ExtPairIdx] -> [[ExtPairIdx]])
-> [ExtPairIdx]
-> [SSTree ExtPairIdx ()]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (ExtPairIdx -> ExtPairIdx -> Bool)
-> [ExtPairIdx] -> [[ExtPairIdx]]
forall a. (a -> a -> Bool) -> [a] -> [[a]]
groupBy ExtPairIdx -> ExtPairIdx -> Bool
forall a a b b.
(Ord a, Ord a) =>
((a, a), b) -> ((a, a), b) -> Bool
d2Grouping ([ExtPairIdx] -> SSTree ExtPairIdx ())
-> [ExtPairIdx] -> SSTree ExtPairIdx ()
forall a b. (a -> b) -> a -> b
$ [ExtPairIdx]
xs
  tree :: [((a, a), b)] -> SSTree ((a, a), b) ()
tree [((a, a), b)
ij]    = ((a, a), b)
-> () -> [SSTree ((a, a), b) ()] -> SSTree ((a, a), b) ()
forall idx a. idx -> a -> [SSTree idx a] -> SSTree idx a
SSTree ((a, a), b)
ij () []
  tree (((a, a), b)
ij:[((a, a), b)]
xs) = ((a, a), b)
-> () -> [SSTree ((a, a), b) ()] -> SSTree ((a, a), b) ()
forall idx a. idx -> a -> [SSTree idx a] -> SSTree idx a
SSTree ((a, a), b)
ij () ([SSTree ((a, a), b) ()] -> SSTree ((a, a), b) ())
-> ([((a, a), b)] -> [SSTree ((a, a), b) ()])
-> [((a, a), b)]
-> SSTree ((a, a), b) ()
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ([((a, a), b)] -> SSTree ((a, a), b) ())
-> [[((a, a), b)]] -> [SSTree ((a, a), b) ()]
forall a b. (a -> b) -> [a] -> [b]
map [((a, a), b)] -> SSTree ((a, a), b) ()
tree ([[((a, a), b)]] -> [SSTree ((a, a), b) ()])
-> ([((a, a), b)] -> [[((a, a), b)]])
-> [((a, a), b)]
-> [SSTree ((a, a), b) ()]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (((a, a), b) -> ((a, a), b) -> Bool)
-> [((a, a), b)] -> [[((a, a), b)]]
forall a. (a -> a -> Bool) -> [a] -> [[a]]
groupBy ((a, a), b) -> ((a, a), b) -> Bool
forall a a b b.
(Ord a, Ord a) =>
((a, a), b) -> ((a, a), b) -> Bool
d2Grouping ([((a, a), b)] -> SSTree ((a, a), b) ())
-> [((a, a), b)] -> SSTree ((a, a), b) ()
forall a b. (a -> b) -> a -> b
$ [((a, a), b)]
xs

d2Compare :: ((a, b), b) -> ((a, b), b) -> Ordering
d2Compare ((a
i,b
j),b
_) ((a
k,b
l),b
_)
  | a
ia -> a -> Bool
forall a. Eq a => a -> a -> Bool
==a
k = b -> b -> Ordering
forall a. Ord a => a -> a -> Ordering
compare b
l b
j
  | b
jb -> b -> Bool
forall a. Eq a => a -> a -> Bool
==b
l = a -> a -> Ordering
forall a. Ord a => a -> a -> Ordering
compare a
i a
k
  | Bool
otherwise = (a, b) -> (a, b) -> Ordering
forall a. Ord a => a -> a -> Ordering
compare (a
i,b
j) (a
k,b
l)

d2Grouping :: ((a, a), b) -> ((a, a), b) -> Bool
d2Grouping ((a
i,a
j),b
_) ((a
k,a
l),b
_) = a
ia -> a -> Bool
forall a. Ord a => a -> a -> Bool
<=a
k Bool -> Bool -> Bool
&& a
ja -> a -> Bool
forall a. Ord a => a -> a -> Bool
>=a
l

-- * Instances for D1S

-- | Conversion between D1S and D2S is lossy in D2S -> D1S

instance MkD1Secondary D2Secondary where
  mkD1S :: D2Secondary -> D1Secondary
mkD1S = D2Secondary -> D1Secondary
forall a. MkD2Secondary a => D2Secondary -> a
fromD2S
  fromD1S :: D1Secondary -> D2Secondary
fromD1S = D1Secondary -> D2Secondary
forall a. MkD2Secondary a => a -> D2Secondary
mkD2S

-- | (Length,List of Pairs)

instance MkD1Secondary (Int,[PairIdx]) where
  mkD1S :: (Int, [PairIdx]) -> D1Secondary
mkD1S (Int
len,[PairIdx]
ps) = let xs :: [PairIdx]
xs = (PairIdx -> [PairIdx]) -> [PairIdx] -> [PairIdx]
forall (t :: * -> *) a b. Foldable t => (a -> [b]) -> t a -> [b]
concatMap (\PairIdx
ij -> [PairIdx
ij,PairIdx -> PairIdx
forall a b. (a, b) -> (b, a)
swap PairIdx
ij]) [PairIdx]
ps
                   in Vector Int -> D1Secondary
D1S (Vector Int -> D1Secondary) -> Vector Int -> D1Secondary
forall a b. (a -> b) -> a -> b
$ Int -> Int -> Vector Int
forall a. Unbox a => Int -> a -> Vector a
VU.replicate Int
len (-Int
1) Vector Int -> [PairIdx] -> Vector Int
forall a. Unbox a => Vector a -> [(Int, a)] -> Vector a
VU.// [PairIdx]
xs
  fromD1S :: D1Secondary -> (Int, [PairIdx])
fromD1S (D1S Vector Int
s) = (Vector Int -> Int
forall a. Unbox a => Vector a -> Int
VU.length Vector Int
s, (PairIdx -> Bool) -> [PairIdx] -> [PairIdx]
forall a. (a -> Bool) -> [a] -> [a]
filter (\(Int
i,Int
j) -> Int
iInt -> Int -> Bool
forall a. Ord a => a -> a -> Bool
<Int
j Bool -> Bool -> Bool
&& Int
jInt -> Int -> Bool
forall a. Ord a => a -> a -> Bool
>=Int
0) ([PairIdx] -> [PairIdx])
-> (Vector Int -> [PairIdx]) -> Vector Int -> [PairIdx]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [Int] -> [Int] -> [PairIdx]
forall a b. [a] -> [b] -> [(a, b)]
zip [Int
0..] ([Int] -> [PairIdx])
-> (Vector Int -> [Int]) -> Vector Int -> [PairIdx]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Vector Int -> [Int]
forall a. Unbox a => Vector a -> [a]
VU.toList (Vector Int -> [PairIdx]) -> Vector Int -> [PairIdx]
forall a b. (a -> b) -> a -> b
$ Vector Int
s)

-- * Instances for D2S

-- | Conversion between D1S and D2S is lossy in D2S -> D1S
--
-- TODO 'fromD2S' makes me wanna rewrite everything...

instance MkD2Secondary D1Secondary where
  mkD2S :: D1Secondary -> D2Secondary
mkD2S = Vector ((Int, Edge, CTisomerism), (Int, Edge, CTisomerism))
-> D2Secondary
D2S (Vector ((Int, Edge, CTisomerism), (Int, Edge, CTisomerism))
 -> D2Secondary)
-> (D1Secondary
    -> Vector ((Int, Edge, CTisomerism), (Int, Edge, CTisomerism)))
-> D1Secondary
-> D2Secondary
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Int -> ((Int, Edge, CTisomerism), (Int, Edge, CTisomerism)))
-> Vector Int
-> Vector ((Int, Edge, CTisomerism), (Int, Edge, CTisomerism))
forall a b. (Unbox a, Unbox b) => (a -> b) -> Vector a -> Vector b
VU.map (\Int
k -> ((Int
k,Edge
W,CTisomerism
Cis),(-Int
1,Edge
W,CTisomerism
Cis))) (Vector Int
 -> Vector ((Int, Edge, CTisomerism), (Int, Edge, CTisomerism)))
-> (D1Secondary -> Vector Int)
-> D1Secondary
-> Vector ((Int, Edge, CTisomerism), (Int, Edge, CTisomerism))
forall b c a. (b -> c) -> (a -> b) -> a -> c
. D1Secondary -> Vector Int
unD1S
  fromD2S :: D2Secondary -> D1Secondary
fromD2S (D2S Vector ((Int, Edge, CTisomerism), (Int, Edge, CTisomerism))
xs) = Vector Int -> D1Secondary
D1S (Vector Int -> D1Secondary)
-> (Vector ((Int, Edge, CTisomerism), (Int, Edge, CTisomerism))
    -> Vector Int)
-> Vector ((Int, Edge, CTisomerism), (Int, Edge, CTisomerism))
-> D1Secondary
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (((Int, Edge, CTisomerism), (Int, Edge, CTisomerism)) -> Int)
-> Vector ((Int, Edge, CTisomerism), (Int, Edge, CTisomerism))
-> Vector Int
forall a b. (Unbox a, Unbox b) => (a -> b) -> Vector a -> Vector b
VU.map ((Int, Edge, CTisomerism) -> Int
forall a b. Sel1 a b => a -> b
sel1 ((Int, Edge, CTisomerism) -> Int)
-> (((Int, Edge, CTisomerism), (Int, Edge, CTisomerism))
    -> (Int, Edge, CTisomerism))
-> ((Int, Edge, CTisomerism), (Int, Edge, CTisomerism))
-> Int
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ((Int, Edge, CTisomerism), (Int, Edge, CTisomerism))
-> (Int, Edge, CTisomerism)
forall a b. Sel1 a b => a -> b
sel1) (Vector ((Int, Edge, CTisomerism), (Int, Edge, CTisomerism))
 -> D1Secondary)
-> Vector ((Int, Edge, CTisomerism), (Int, Edge, CTisomerism))
-> D1Secondary
forall a b. (a -> b) -> a -> b
$ Vector ((Int, Edge, CTisomerism), (Int, Edge, CTisomerism))
xs

instance MkD2Secondary (Int,[ExtPairIdx]) where
  mkD2S :: (Int, [ExtPairIdx]) -> D2Secondary
mkD2S (Int
len,[ExtPairIdx]
ps) = let xs :: [(Int, (Int, Edge, CTisomerism))]
xs = (ExtPairIdx -> [(Int, (Int, Edge, CTisomerism))])
-> [ExtPairIdx] -> [(Int, (Int, Edge, CTisomerism))]
forall (t :: * -> *) a b. Foldable t => (a -> [b]) -> t a -> [b]
concatMap (\((Int
i,Int
j),(CTisomerism
ct,Edge
e1,Edge
e2)) ->
                                          [ (Int
i, (Int
j,Edge
e1,CTisomerism
ct))
                                          , (Int
j, (Int
i,Edge
e2,CTisomerism
ct))
                                          ]) [ExtPairIdx]
ps
                       f :: (b, b) -> b -> (b, b)
f (b
x,b
y) b
z = if b -> a
forall a b. Sel1 a b => a -> b
sel1 b
x a -> a -> Bool
forall a. Eq a => a -> a -> Bool
== -a
1 then (b
z,b
y) else (b
x,b
z)
                   in Vector ((Int, Edge, CTisomerism), (Int, Edge, CTisomerism))
-> D2Secondary
D2S (Vector ((Int, Edge, CTisomerism), (Int, Edge, CTisomerism))
 -> D2Secondary)
-> Vector ((Int, Edge, CTisomerism), (Int, Edge, CTisomerism))
-> D2Secondary
forall a b. (a -> b) -> a -> b
$ (((Int, Edge, CTisomerism), (Int, Edge, CTisomerism))
 -> (Int, Edge, CTisomerism)
 -> ((Int, Edge, CTisomerism), (Int, Edge, CTisomerism)))
-> Vector ((Int, Edge, CTisomerism), (Int, Edge, CTisomerism))
-> [(Int, (Int, Edge, CTisomerism))]
-> Vector ((Int, Edge, CTisomerism), (Int, Edge, CTisomerism))
forall a b.
Unbox a =>
(a -> b -> a) -> Vector a -> [(Int, b)] -> Vector a
VU.accum ((Int, Edge, CTisomerism), (Int, Edge, CTisomerism))
-> (Int, Edge, CTisomerism)
-> ((Int, Edge, CTisomerism), (Int, Edge, CTisomerism))
forall a b. (Eq a, Sel1 b a, Num a) => (b, b) -> b -> (b, b)
f (Int
-> ((Int, Edge, CTisomerism), (Int, Edge, CTisomerism))
-> Vector ((Int, Edge, CTisomerism), (Int, Edge, CTisomerism))
forall a. Unbox a => Int -> a -> Vector a
VU.replicate Int
len ((-Int
1,Edge
W,CTisomerism
Cis),(-Int
1,Edge
W,CTisomerism
Cis))) [(Int, (Int, Edge, CTisomerism))]
xs
  fromD2S :: D2Secondary -> (Int, [ExtPairIdx])
fromD2S (D2S Vector ((Int, Edge, CTisomerism), (Int, Edge, CTisomerism))
s) = ( Vector ((Int, Edge, CTisomerism), (Int, Edge, CTisomerism)) -> Int
forall a. Unbox a => Vector a -> Int
VU.length Vector ((Int, Edge, CTisomerism), (Int, Edge, CTisomerism))
s
                    , let ([(Int, Edge, CTisomerism)]
xs,[(Int, Edge, CTisomerism)]
ys) = [((Int, Edge, CTisomerism), (Int, Edge, CTisomerism))]
-> ([(Int, Edge, CTisomerism)], [(Int, Edge, CTisomerism)])
forall a b. [(a, b)] -> ([a], [b])
unzip ([((Int, Edge, CTisomerism), (Int, Edge, CTisomerism))]
 -> ([(Int, Edge, CTisomerism)], [(Int, Edge, CTisomerism)]))
-> (Vector ((Int, Edge, CTisomerism), (Int, Edge, CTisomerism))
    -> [((Int, Edge, CTisomerism), (Int, Edge, CTisomerism))])
-> Vector ((Int, Edge, CTisomerism), (Int, Edge, CTisomerism))
-> ([(Int, Edge, CTisomerism)], [(Int, Edge, CTisomerism)])
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Vector ((Int, Edge, CTisomerism), (Int, Edge, CTisomerism))
-> [((Int, Edge, CTisomerism), (Int, Edge, CTisomerism))]
forall a. Unbox a => Vector a -> [a]
VU.toList (Vector ((Int, Edge, CTisomerism), (Int, Edge, CTisomerism))
 -> ([(Int, Edge, CTisomerism)], [(Int, Edge, CTisomerism)]))
-> Vector ((Int, Edge, CTisomerism), (Int, Edge, CTisomerism))
-> ([(Int, Edge, CTisomerism)], [(Int, Edge, CTisomerism)])
forall a b. (a -> b) -> a -> b
$ Vector ((Int, Edge, CTisomerism), (Int, Edge, CTisomerism))
s
                          g :: Int -> Int -> Edge
g Int
i Int
j = let z :: ((Int, Edge, CTisomerism), (Int, Edge, CTisomerism))
z = Vector ((Int, Edge, CTisomerism), (Int, Edge, CTisomerism))
s Vector ((Int, Edge, CTisomerism), (Int, Edge, CTisomerism))
-> Int -> ((Int, Edge, CTisomerism), (Int, Edge, CTisomerism))
forall a. Unbox a => Vector a -> Int -> a
VU.! Int
i in if (Int, Edge, CTisomerism) -> Int
forall a b. Sel1 a b => a -> b
sel1 (((Int, Edge, CTisomerism), (Int, Edge, CTisomerism))
-> (Int, Edge, CTisomerism)
forall a b. Sel1 a b => a -> b
sel1 ((Int, Edge, CTisomerism), (Int, Edge, CTisomerism))
z) Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== Int
j then (Int, Edge, CTisomerism) -> Edge
forall a b. Sel2 a b => a -> b
sel2 (((Int, Edge, CTisomerism), (Int, Edge, CTisomerism))
-> (Int, Edge, CTisomerism)
forall a b. Sel1 a b => a -> b
sel1 ((Int, Edge, CTisomerism), (Int, Edge, CTisomerism))
z) else (Int, Edge, CTisomerism) -> Edge
forall a b. Sel2 a b => a -> b
sel2 (((Int, Edge, CTisomerism), (Int, Edge, CTisomerism))
-> (Int, Edge, CTisomerism)
forall a b. Sel2 a b => a -> b
sel2 ((Int, Edge, CTisomerism), (Int, Edge, CTisomerism))
z)
                          f :: (Int, (Int, Edge, CTisomerism)) -> ExtPairIdx
f (Int
i,(Int
j,Edge
eI,CTisomerism
ct)) = ((Int
i,Int
j),(CTisomerism
ct,Edge
eI,Int -> Int -> Edge
g Int
j Int
i))
                      in
                      ((Int, (Int, Edge, CTisomerism)) -> ExtPairIdx)
-> [(Int, (Int, Edge, CTisomerism))] -> [ExtPairIdx]
forall a b. (a -> b) -> [a] -> [b]
map (Int, (Int, Edge, CTisomerism)) -> ExtPairIdx
f ([(Int, (Int, Edge, CTisomerism))] -> [ExtPairIdx])
-> ([(Int, (Int, Edge, CTisomerism))]
    -> [(Int, (Int, Edge, CTisomerism))])
-> [(Int, (Int, Edge, CTisomerism))]
-> [ExtPairIdx]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ((Int, (Int, Edge, CTisomerism)) -> Bool)
-> [(Int, (Int, Edge, CTisomerism))]
-> [(Int, (Int, Edge, CTisomerism))]
forall a. (a -> Bool) -> [a] -> [a]
filter (\(Int
i,(Int
j,Edge
_,CTisomerism
_)) -> Int
iInt -> Int -> Bool
forall a. Ord a => a -> a -> Bool
<Int
j Bool -> Bool -> Bool
&& Int
jInt -> Int -> Bool
forall a. Ord a => a -> a -> Bool
>=Int
0) ([(Int, (Int, Edge, CTisomerism))] -> [ExtPairIdx])
-> [(Int, (Int, Edge, CTisomerism))] -> [ExtPairIdx]
forall a b. (a -> b) -> a -> b
$ [Int]
-> [(Int, Edge, CTisomerism)] -> [(Int, (Int, Edge, CTisomerism))]
forall a b. [a] -> [b] -> [(a, b)]
zip [Int
0..] [(Int, Edge, CTisomerism)]
xs [(Int, (Int, Edge, CTisomerism))]
-> [(Int, (Int, Edge, CTisomerism))]
-> [(Int, (Int, Edge, CTisomerism))]
forall a. [a] -> [a] -> [a]
++ [Int]
-> [(Int, Edge, CTisomerism)] -> [(Int, (Int, Edge, CTisomerism))]
forall a b. [a] -> [b] -> [(a, b)]
zip [Int
0..] [(Int, Edge, CTisomerism)]
ys
                    )



-- * Older instances (should still work)

-- | A second primitive generator, requiring dictionary and String. This one
-- generates pairs that are then used by the above instance. The dict is a list
-- of possible brackets: ["()"] being the minimal set.

instance MkD1Secondary ([String],String) where
  mkD1S :: ([String], String) -> D1Secondary
mkD1S ([String]
dict,String
xs) = (Int, [PairIdx]) -> D1Secondary
forall a. MkD1Secondary a => a -> D1Secondary
mkD1S (String -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length String
xs,[PairIdx]
ps) where
    ps :: [(Int,Int)]
    ps :: [PairIdx]
ps = [String] -> String -> [PairIdx]
unsafeDotBracket2pairlist [String]
dict String
xs
  fromD1S :: D1Secondary -> ([String], String)
fromD1S (D1S Vector Int
s) = ([String
"()"], (Int -> Int -> Char) -> [Int] -> [Int] -> String
forall a b c. (a -> b -> c) -> [a] -> [b] -> [c]
zipWith Int -> Int -> Char
forall a. (Num a, Ord a) => a -> a -> Char
f [Int
0..] ([Int] -> String) -> [Int] -> String
forall a b. (a -> b) -> a -> b
$ Vector Int -> [Int]
forall a. Unbox a => Vector a -> [a]
VU.toList Vector Int
s) where
    f :: a -> a -> Char
f a
k (-1) = Char
'.'
    f a
k a
p
      | a
ka -> a -> Bool
forall a. Ord a => a -> a -> Bool
>a
p = Char
')'
      | Bool
otherwise = Char
'('

-- | Generate Secondary given that we have an unboxed vector of characters

instance MkD1Secondary ([String],VU.Vector Char) where
  mkD1S :: ([String], Vector Char) -> D1Secondary
mkD1S ([String]
dict,Vector Char
xs) = ([String], String) -> D1Secondary
forall a. MkD1Secondary a => a -> D1Secondary
mkD1S ([String]
dict, Vector Char -> String
forall a. Unbox a => Vector a -> [a]
VU.toList Vector Char
xs)
  fromD1S :: D1Secondary -> ([String], Vector Char)
fromD1S D1Secondary
s = let ([String]
dict,String
res) = D1Secondary -> ([String], String)
forall a. MkD1Secondary a => D1Secondary -> a
fromD1S D1Secondary
s in ([String]
dict,String -> Vector Char
forall a. Unbox a => [a] -> Vector a
VU.fromList String
res)

-- | A "fast" instance for getting the pair list of vienna-structures.

instance MkD1Secondary String where
  mkD1S :: String -> D1Secondary
mkD1S String
xs = ([String], String) -> D1Secondary
forall a. MkD1Secondary a => a -> D1Secondary
mkD1S ([String
"()" ::String],String
xs)
  fromD1S :: D1Secondary -> String
fromD1S D1Secondary
s = let ([String]
_::[String],String
res) = D1Secondary -> ([String], String)
forall a. MkD1Secondary a => D1Secondary -> a
fromD1S D1Secondary
s in String
res

instance MkD1Secondary (VU.Vector Char) where
  mkD1S :: Vector Char -> D1Secondary
mkD1S Vector Char
xs = ([String], Vector Char) -> D1Secondary
forall a. MkD1Secondary a => a -> D1Secondary
mkD1S ([String
"()" ::String],Vector Char
xs)
  fromD1S :: D1Secondary -> Vector Char
fromD1S D1Secondary
s = let ([String]
_::[String],Vector Char
res::VU.Vector Char) = D1Secondary -> ([String], Vector Char)
forall a. MkD1Secondary a => D1Secondary -> a
fromD1S D1Secondary
s in Vector Char
res



-- * High-level parsing functionality for secondary structures

-- | Completely canonical structure.
--
-- TODO Check size of hairpins and interior loops?

isCanonicalStructure :: String -> Bool
isCanonicalStructure :: String -> Bool
isCanonicalStructure = (Char -> Bool) -> String -> Bool
forall (t :: * -> *) a. Foldable t => (a -> Bool) -> t a -> Bool
all ((Char -> String -> Bool) -> String -> Char -> Bool
forall a b c. (a -> b -> c) -> b -> a -> c
flip (forall a. (Foldable [], Eq a) => a -> [a] -> Bool
forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
elem @[]) String
"().")

-- | Is constraint type structure, i.e. there can also be symbols present
-- that denote up- or downstream pairing.

isConstraintStructure :: String -> Bool
isConstraintStructure :: String -> Bool
isConstraintStructure = (Char -> Bool) -> String -> Bool
forall (t :: * -> *) a. Foldable t => (a -> Bool) -> t a -> Bool
all ((Char -> String -> Bool) -> String -> Char -> Bool
forall a b c. (a -> b -> c) -> b -> a -> c
flip (forall a. (Foldable [], Eq a) => a -> [a] -> Bool
forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
elem @[]) String
"().<>{}|")

-- | Take a structural string and split it into its constituents.
--
-- If we decide to /NOT/ depend on @lens@ explicitly, another way to write
-- this is:
--
-- @
-- structures :: forall p f . (Profunctor p, Functor f) => p [String] (f [String]) -> p String (f String)
-- structures = dimap (splitOn "&") (fmap (concat . intersperse "&"))
-- @

structures :: Iso' String [String]
structures :: p [String] (f [String]) -> p String (f String)
structures = (String -> [String])
-> ([String] -> String) -> Iso String String [String] [String]
forall s a b t. (s -> a) -> (b -> t) -> Iso s t a b
iso (String -> String -> [String]
forall a. Eq a => [a] -> [a] -> [[a]]
splitOn String
"&") ([String] -> String
forall (t :: * -> *) a. Foldable t => t [a] -> [a]
concat ([String] -> String)
-> ([String] -> [String]) -> [String] -> String
forall b c a. (b -> c) -> (a -> b) -> a -> c
. String -> [String] -> [String]
forall a. a -> [a] -> [a]
intersperse String
"&")

-- | A @fold@ structure is a single structure

foldStructure :: Prism' String String
foldStructure :: p String (f String) -> p String (f String)
foldStructure = ShowS
-> (String -> Either String String)
-> Prism String String String String
forall b t s a. (b -> t) -> (s -> Either t a) -> Prism s t a b
prism ShowS
forall a. a -> a
id String -> Either String String
to where
  to :: String -> Either String String
to String
s = case String
sString -> Getting [String] String [String] -> [String]
forall s a. s -> Getting a s a -> a
^.Getting [String] String [String]
Iso String String [String] [String]
structures of
           [String
t] -> String -> Either String String
forall a b. b -> Either a b
Right String
t
           [String]
_   -> String -> Either String String
forall a b. a -> Either a b
Left  String
s

-- | A @cofold@ structure has exactly two structures split by @&@ (which the
-- prism removes).

cofoldStructure :: Prism' String (String,String)
cofoldStructure :: p (String, String) (f (String, String)) -> p String (f String)
cofoldStructure = ((String, String) -> String)
-> (String -> Either String (String, String))
-> Prism String String (String, String) (String, String)
forall b t s a. (b -> t) -> (s -> Either t a) -> Prism s t a b
prism (String, String) -> String
from String -> Either String (String, String)
to where
  from :: (String, String) -> String
from (String
l,String
r) = String
l String -> ShowS
forall a. [a] -> [a] -> [a]
++ Char
'&' Char -> ShowS
forall a. a -> [a] -> [a]
: String
r
  to :: String -> Either String (String, String)
to   String
s     = case String
sString -> Getting [String] String [String] -> [String]
forall s a. s -> Getting a s a -> a
^.Getting [String] String [String]
Iso String String [String] [String]
structures of
                 [String
l,String
r] -> (String, String) -> Either String (String, String)
forall a b. b -> Either a b
Right (String
l,String
r)
                 [String]
_     -> String -> Either String (String, String)
forall a b. a -> Either a b
Left  String
s

-- * Helper functions

-- | Secondary structure parser which allows pseudoknots, if they use different
-- kinds of brackets.

unsafeDotBracket2pairlist :: [String] -> String -> [(Int,Int)]
unsafeDotBracket2pairlist :: [String] -> String -> [PairIdx]
unsafeDotBracket2pairlist [String]
dict String
xs = [PairIdx] -> [PairIdx]
forall a. Ord a => [a] -> [a]
sort ([PairIdx] -> [PairIdx])
-> ([String] -> [PairIdx]) -> [String] -> [PairIdx]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (String -> [PairIdx]) -> [String] -> [PairIdx]
forall (t :: * -> *) a b. Foldable t => (a -> [b]) -> t a -> [b]
concatMap (String -> String -> [PairIdx]
f String
xs) ([String] -> [PairIdx]) -> [String] -> [PairIdx]
forall a b. (a -> b) -> a -> b
$ [String]
dict where
  f :: String -> String -> [PairIdx]
f String
xs [Char
l,Char
r] = Int -> [Int] -> String -> [PairIdx]
g Int
0 [] (String -> [PairIdx]) -> ShowS -> String -> [PairIdx]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Char -> Char) -> ShowS
forall a b. (a -> b) -> [a] -> [b]
map (\Char
x -> if Char
x Char -> String -> Bool
forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem` [Char
l,Char
r] then Char
x else Char
'.') (String -> [PairIdx]) -> String -> [PairIdx]
forall a b. (a -> b) -> a -> b
$ String
xs where
    g :: Int -> [Int] -> String -> [(Int,Int)]
    g :: Int -> [Int] -> String -> [PairIdx]
g Int
_ [Int]
st [] = []
    g Int
k [Int]
st (Char
'.':String
xs) = Int -> [Int] -> String -> [PairIdx]
g (Int
kInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
1) [Int]
st String
xs
    g Int
k [Int]
sst (Char
x:String
xs)
      | Char
lChar -> Char -> Bool
forall a. Eq a => a -> a -> Bool
==Char
x = Int -> [Int] -> String -> [PairIdx]
g (Int
kInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
1) (Int
kInt -> [Int] -> [Int]
forall a. a -> [a] -> [a]
:[Int]
sst) String
xs
    g Int
k (Int
s:[Int]
st) (Char
x:String
xs)
      | Char
rChar -> Char -> Bool
forall a. Eq a => a -> a -> Bool
==Char
x = (Int
s,Int
k) PairIdx -> [PairIdx] -> [PairIdx]
forall a. a -> [a] -> [a]
: Int -> [Int] -> String -> [PairIdx]
g (Int
kInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
1) [Int]
st String
xs
    g Int
a [Int]
b String
c = String -> [PairIdx]
forall a. HasCallStack => String -> a
error (String -> [PairIdx]) -> String -> [PairIdx]
forall a b. (a -> b) -> a -> b
$ (Int, [Int], String) -> String
forall a. Show a => a -> String
show (Int
a,[Int]
b,String
c)

-- | Secondary structure parser with a notion of errors. We either return a
-- @Right@ structure, including flags, or a @Left@ error.

dotBracket2pairlist :: [String] -> String -> Either String ( [(Int,Int)] )
dotBracket2pairlist :: [String] -> String -> Either String [PairIdx]
dotBracket2pairlist [String]
dict String
str = ([[PairIdx]] -> [PairIdx])
-> Either String [[PairIdx]] -> Either String [PairIdx]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap ([PairIdx] -> [PairIdx]
forall a. Ord a => [a] -> [a]
sort ([PairIdx] -> [PairIdx])
-> ([[PairIdx]] -> [PairIdx]) -> [[PairIdx]] -> [PairIdx]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [[PairIdx]] -> [PairIdx]
forall (t :: * -> *) a. Foldable t => t [a] -> [a]
concat) (Either String [[PairIdx]] -> Either String [PairIdx])
-> ([String] -> Either String [[PairIdx]])
-> [String]
-> Either String [PairIdx]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [Either String [PairIdx]] -> Either String [[PairIdx]]
forall (t :: * -> *) (m :: * -> *) a.
(Traversable t, Monad m) =>
t (m a) -> m (t a)
sequence ([Either String [PairIdx]] -> Either String [[PairIdx]])
-> ([String] -> [Either String [PairIdx]])
-> [String]
-> Either String [[PairIdx]]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (String -> Either String [PairIdx])
-> [String] -> [Either String [PairIdx]]
forall a b. (a -> b) -> [a] -> [b]
map (String -> String -> Either String [PairIdx]
f String
str) ([String] -> Either String [PairIdx])
-> [String] -> Either String [PairIdx]
forall a b. (a -> b) -> a -> b
$ [String]
dict where
  f :: String -> String -> Either String [PairIdx]
f String
ys [Char
l,Char
r] = Int -> [Int] -> String -> Either String [PairIdx]
g Int
0 [] (String -> Either String [PairIdx])
-> ShowS -> String -> Either String [PairIdx]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Char -> Char) -> ShowS
forall a b. (a -> b) -> [a] -> [b]
map (\Char
x -> if Char
x Char -> String -> Bool
forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem` [Char
l,Char
r] then Char
x else Char
'.') (String -> Either String [PairIdx])
-> String -> Either String [PairIdx]
forall a b. (a -> b) -> a -> b
$ String
ys where
    g :: Int -> [Int] -> String -> Either String ( [(Int,Int)] )
    g :: Int -> [Int] -> String -> Either String [PairIdx]
g Int
_ [] [] = [PairIdx] -> Either String [PairIdx]
forall (f :: * -> *) a. Applicative f => a -> f a
pure []
    g Int
k [Int]
st (Char
'.':String
xs) = Int -> [Int] -> String -> Either String [PairIdx]
g (Int
kInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
1) [Int]
st String
xs
    g Int
k [Int]
st (Char
x:String
xs) | Char
lChar -> Char -> Bool
forall a. Eq a => a -> a -> Bool
==Char
x = Int -> [Int] -> String -> Either String [PairIdx]
g (Int
kInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
1) (Int
kInt -> [Int] -> [Int]
forall a. a -> [a] -> [a]
:[Int]
st) String
xs
    g Int
k (Int
s:[Int]
st) (Char
x:String
xs) | Char
rChar -> Char -> Bool
forall a. Eq a => a -> a -> Bool
==Char
x = ((Int
s,Int
k)PairIdx -> [PairIdx] -> [PairIdx]
forall a. a -> [a] -> [a]
:) ([PairIdx] -> [PairIdx])
-> Either String [PairIdx] -> Either String [PairIdx]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Int -> [Int] -> String -> Either String [PairIdx]
g (Int
kInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
1) [Int]
st String
xs
    g Int
k [] String
xs = String -> Either String [PairIdx]
forall a b. a -> Either a b
Left (String -> Either String [PairIdx])
-> String -> Either String [PairIdx]
forall a b. (a -> b) -> a -> b
$ String -> Int -> String -> ShowS
forall r. PrintfType r => String -> r
printf String
"too many closing brackets at position %d: '%s' (dot-bracket: %s)" Int
k String
xs String
str
    g Int
k [Int]
st [] = String -> Either String [PairIdx]
forall a b. a -> Either a b
Left (String -> Either String [PairIdx])
-> String -> Either String [PairIdx]
forall a b. (a -> b) -> a -> b
$ String -> String -> ShowS
forall r. PrintfType r => String -> r
printf String
"too many opening brackets, opening bracket(s) at: %s (dot-bracket: %s)" ([Int] -> String
forall a. Show a => a -> String
show ([Int] -> String) -> [Int] -> String
forall a b. (a -> b) -> a -> b
$ [Int] -> [Int]
forall a. [a] -> [a]
reverse [Int]
st) String
str
    g Int
a [Int]
b String
c   = String -> Either String [PairIdx]
forall a b. a -> Either a b
Left (String -> Either String [PairIdx])
-> String -> Either String [PairIdx]
forall a b. (a -> b) -> a -> b
$ String -> String -> ShowS
forall r. PrintfType r => String -> r
printf String
"unspecified error: %s (dot-bracket: %s)" ((Int, [Int], String) -> String
forall a. Show a => a -> String
show (Int
a,[Int]
b,String
c)) String
str
  f String
xs lr :: String
lr@(Char
_:Char
_:Char
_:String
_) = String -> Either String [PairIdx]
forall a b. a -> Either a b
Left (String -> Either String [PairIdx])
-> String -> Either String [PairIdx]
forall a b. (a -> b) -> a -> b
$ String -> String -> ShowS
forall r. PrintfType r => String -> r
printf String
"unsound dictionary: %s (dot-bracket: %s)" String
lr String
str
  f String
xs String
lr     = String -> Either String [PairIdx]
forall a b. a -> Either a b
Left (String -> Either String [PairIdx])
-> String -> Either String [PairIdx]
forall a b. (a -> b) -> a -> b
$ String -> String -> String -> ShowS
forall r. PrintfType r => String -> r
printf String
"unspecified error: dict: %s, input: %s (dot-bracket: %s)" String
lr String
xs String
str

-- | Calculates the distance between two vienna strings.

viennaStringDistance :: Bool -> Bool -> String -> String -> (String,Int)
viennaStringDistance :: Bool -> Bool -> String -> String -> (String, Int)
viennaStringDistance Bool
sPairs Bool
tPairs String
s String
t = (String
t,[PairIdx] -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length ([PairIdx] -> Int) -> [PairIdx] -> Int
forall a b. (a -> b) -> a -> b
$ [PairIdx]
ss[PairIdx] -> [PairIdx] -> [PairIdx]
forall a. [a] -> [a] -> [a]
++[PairIdx]
tt) where
  s' :: [PairIdx]
s' = (String -> [PairIdx])
-> ([PairIdx] -> [PairIdx]) -> Either String [PairIdx] -> [PairIdx]
forall a c b. (a -> c) -> (b -> c) -> Either a b -> c
either String -> [PairIdx]
forall a. HasCallStack => String -> a
error [PairIdx] -> [PairIdx]
forall a. a -> a
id (Either String [PairIdx] -> [PairIdx])
-> (String -> Either String [PairIdx]) -> String -> [PairIdx]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [String] -> String -> Either String [PairIdx]
dotBracket2pairlist [String
"()"] (String -> [PairIdx]) -> String -> [PairIdx]
forall a b. (a -> b) -> a -> b
$ String
s
  t' :: [PairIdx]
t' = (String -> [PairIdx])
-> ([PairIdx] -> [PairIdx]) -> Either String [PairIdx] -> [PairIdx]
forall a c b. (a -> c) -> (b -> c) -> Either a b -> c
either String -> [PairIdx]
forall a. HasCallStack => String -> a
error [PairIdx] -> [PairIdx]
forall a. a -> a
id (Either String [PairIdx] -> [PairIdx])
-> (String -> Either String [PairIdx]) -> String -> [PairIdx]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [String] -> String -> Either String [PairIdx]
dotBracket2pairlist [String
"()"] (String -> [PairIdx]) -> String -> [PairIdx]
forall a b. (a -> b) -> a -> b
$ String
t
  ss :: [PairIdx]
ss = if Bool
sPairs then [PairIdx]
s' [PairIdx] -> [PairIdx] -> [PairIdx]
forall a. Eq a => [a] -> [a] -> [a]
\\ [PairIdx]
t' else []
  tt :: [PairIdx]
tt = if Bool
tPairs then [PairIdx]
t' [PairIdx] -> [PairIdx] -> [PairIdx]
forall a. Eq a => [a] -> [a] -> [a]
\\ [PairIdx]
s' else []

-- | Calculate the distance between two 'D1Secondary' structures, that live
-- in the same underlying space. In particular, this probably only works
-- for structures on the same primary sequence.
--
-- This function assumes somewhat dense structures, as it is @O(2n)@ with
-- @n@ the length of the underlying vectors.
--
-- @(i,k)@ vs @(j,l)@
--
-- TODO error out on weird inputs!

d1Distance :: D1Secondary -> D1Secondary -> Int
d1Distance :: D1Secondary -> D1Secondary -> Int
d1Distance (D1S Vector Int
x) (D1S Vector Int
y)
--  | VU.length x /= VU.length y = error "d1Distance called on vectors with differing lengths!"
  | Bool
otherwise = (Int -> Int -> Int
forall a. Integral a => a -> a -> a
`div` Int
2) (Int -> Int) -> (Vector Int -> Int) -> Vector Int -> Int
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Vector Int -> Int
forall a. (Unbox a, Num a) => Vector a -> a
VU.sum (Vector Int -> Int) -> Vector Int -> Int
forall a b. (a -> b) -> a -> b
$ (Int -> Int -> Int) -> Vector Int -> Vector Int -> Vector Int
forall a b c.
(Unbox a, Unbox b, Unbox c) =>
(a -> b -> c) -> Vector a -> Vector b -> Vector c
VU.zipWith Int -> Int -> Int
forall a p. (Ord a, Num p, Num a) => a -> a -> p
chk (Vector Int
x Vector Int -> Vector Int -> Vector Int
forall a. Unbox a => Vector a -> Vector a -> Vector a
VU.++ Vector Int
xx) (Vector Int
y Vector Int -> Vector Int -> Vector Int
forall a. Unbox a => Vector a -> Vector a -> Vector a
VU.++ Vector Int
yy)
  where xx :: Vector Int
xx = Int -> Int -> Vector Int
forall a. Unbox a => Int -> a -> Vector a
VU.replicate (Vector Int -> Int
forall a. Unbox a => Vector a -> Int
VU.length Vector Int
y Int -> Int -> Int
forall a. Num a => a -> a -> a
- Vector Int -> Int
forall a. Unbox a => Vector a -> Int
VU.length Vector Int
x) (-Int
2)
        yy :: Vector Int
yy = Int -> Int -> Vector Int
forall a. Unbox a => Int -> a -> Vector a
VU.replicate (Vector Int -> Int
forall a. Unbox a => Vector a -> Int
VU.length Vector Int
x Int -> Int -> Int
forall a. Num a => a -> a -> a
- Vector Int -> Int
forall a. Unbox a => Vector a -> Int
VU.length Vector Int
y) (-Int
2)
        chk :: a -> a -> p
chk a
i a
j | a
ia -> a -> Bool
forall a. Eq a => a -> a -> Bool
==a
j             = p
0
                | a
i a -> a -> Bool
forall a. Ord a => a -> a -> Bool
<  a
0 Bool -> Bool -> Bool
&& a
j a -> a -> Bool
forall a. Ord a => a -> a -> Bool
<  a
0 = p
0
                | a
i a -> a -> Bool
forall a. Ord a => a -> a -> Bool
>= a
0 Bool -> Bool -> Bool
&& a
j a -> a -> Bool
forall a. Ord a => a -> a -> Bool
>= a
0 = p
2
                | Bool
otherwise        = p
1
        {-# Inline chk #-}
{-# NoInline d1Distance #-}