Source code for treetime.seqgen

from collections import defaultdict
import numpy as np
from . import config as ttconf
from .seq_utils import seq2array, seq2prof
from .gtr import GTR
from .treeanc import TreeAnc


[docs]class SeqGen(TreeAnc): ''' Evolve sequences along a given tree with a specific GTR model. This class inherits from TreeAnc. '''
[docs] def __init__(self, L, *args, **kwargs): """Instantiate. Mandatory arguments are a the sequence length, tree and GTR model. """ super(SeqGen, self).__init__(seq_len=L, compress=False, **kwargs)
def sample_from_profile(self, p): """returns a sequence sampled from a profile (column wise state probabilities) Parameters ---------- p : np.array sequence profile with dimensions (L,q) Returns ------- np.array (character) sequence as character array array(['A', 'C', 'G',...]) """ cum_p = p.cumsum(axis=1).T prand = self.rng.random(self.seq_len) seq = self.gtr.alphabet[np.argmax(cum_p>prand, axis=0)] return seq def evolve(self, root_seq=None): """Evolve a root sequences along a tree. If no root sequences is provided, one will be sampled from the equilibrium probabilities of the GTR model Parameters ---------- root_seq : numpy character array, optional sequence to be used as the root sequence of the tree. if not given, will sample a sequence from the equilibrium probabilities of the GTR model. """ # set root if not given if root_seq: self.tree.root.ancestral_sequence = seq2array(root_seq) else: if len(self.gtr.Pi.shape)==2: self.tree.root.ancestral_sequence = self.sample_from_profile(self.gtr.Pi.T) else: self.tree.root.ancestral_sequence = self.sample_from_profile(np.repeat([self.gtr.Pi], self.seq_len, axis=0)) # generate sequences in preorder for n in self.tree.get_nonterminals(order='preorder'): profile_p = seq2prof(n.ancestral_sequence, self.gtr.profile_map) for c in n: profile = self.gtr.evolve(profile_p, c.branch_length) c.ancestral_sequence = self.sample_from_profile(profile) self.aln = self.get_aln() def get_aln(self, internal=False): """assemble a multiple sequence alignment from the evolved sequences. Optionally in clude internal sequences Parameters ---------- internal : bool, optional include sequences of internal nodes in the alignment Returns ------- Bio.Align.MultipleSeqAlignment multiple sequence alignment """ from Bio import SeqRecord, Seq from Bio.Align import MultipleSeqAlignment tmp = [] for n in self.tree.find_clades(): if n.is_terminal() or internal: tmp.append(SeqRecord.SeqRecord(id=n.name, name=n.name, description='', seq=Seq.Seq(''.join(n.ancestral_sequence.astype('U'))))) return MultipleSeqAlignment(tmp)