"""Module for the Lifemapper TreeWrapper class.
Todo:
* Should we provide a method to collapse clades that only have one child?
* Add method to remove annotations.
* Move label method out of internal functions.
"""
import os
import dendropy
import numpy as np
from .matrix import Matrix
# .............................
[docs]class PhyloTreeKeys:
"""Keys for phylogenetic trees.
Attributes:
MTX_IDX (str): The tree attribute indicating the matrix index position
for a node.
SQUID (str): The tree attribute indicating a hashed identifier for the
taxon.
"""
[docs] MTX_IDX = 'mx' # The matrix index for this node
[docs] SQUID = 'squid' # This is the LM SQUID (species identifier) for the tip
# .............................................................................
[docs]class LmTreeException(Exception):
"""Wrapper around the base Exception class for tree related errors."""
pass
# .............................................................................
[docs]class TreeWrapper(dendropy.Tree):
"""Dendropy Tree wrapper.
Dendropy tree wrapper that adds a little functionality and improves
performance of some functions.
"""
# ..............................
@classmethod
[docs] def from_base_tree(cls, tree):
"""Creates a TreeWrapper object from a base dendropy.Tree.
Args:
tree (:obj:`Tree`): A base dendropy tree object to wrap into a
TreeWrapper.
Returns:
TreeWrapper: The newly wrapped tree.
"""
return cls.get(data=tree.as_string('nexus'), schema='nexus')
# ..............................
@classmethod
[docs] def from_filename(cls, filename):
"""Creates a TreeWrapper object by loading a file.
Args:
filename (:obj:`str`): A file path to a tree file that should be loaded.
Returns:
TreeWrapper: The newly loaded tree.
Raises:
IOError: Raised if the tree file cannot be loaded based on the file
extension.
"""
_, tree_ext = os.path.splitext(filename)
if tree_ext == '.nex':
tree_schema = 'nexus'
elif tree_ext == '.xml':
tree_schema = 'nexml'
elif tree_ext == '.tre':
tree_schema = 'newick'
else:
raise IOError('Cannot handle tree with extension: {}'.format(tree_ext))
return cls.get(path=filename, schema=tree_schema)
# ..............................
[docs] def add_node_labels(self, prefix=None, overwrite=False):
"""Add labels to the nodes in the tree.
Add labels to the unlabeled nodes in the tree.
Args:
prefix (:obj:`str`, optional): If provided, prefix the node labels
with this string.
overwrite (:obj:`bool`, optional): Indicates whether existing node
labels should be overwritten or if they should be maintained.
Defaults to False.
Note:
* This labels nodes the way that R does.
"""
self._label_tree_nodes(
self.seed_node, len(self.get_labels()), prefix=prefix, overwrite=overwrite
)
# ..............................
[docs] def annotate_tree(
self,
annotation_dict,
annotation_attribute=None,
label_attribute=None,
update=False,
):
"""Annotates tree tips and nodes.
Args:
annotation_dict (:obj:`dict`): A dictionary where the keys
correspond with the node labels and the value is either, a
single value, or a dictionary of annotation name keys and
annotation value values.
annotation_attribute (:obj:`str` or None, optional): Only used if
annotation_dict contains single values, this will be the name
of the annotation added for each node. Using None or setting
value to 'label' will change the label of the node. Defaults
to None.
label_attribute (:obj:`str`, optional): Use the value of this
annotation as the label for the node. Setting the value to
'label' or leaving as None will use the label of the node.
Defaults to None.
update (:obj:`bool`, optional): If True, update any existing
annotations with the annotations provided. Defaults to False.
"""
label_method = self._get_label_method(label_attribute)
for node in self.nodes():
# Get the label of the node
label = label_method(node)
if label in annotation_dict.keys():
node_annotation = annotation_dict[label]
# Assume annotations are in a dictionary
try:
for (ann_name, ann_value) in node_annotation.items():
self._annotate_node(node, ann_name, ann_value, update=update)
except Exception:
# Annotation is a single value
self._annotate_node(
node, annotation_attribute, node_annotation, update=update
)
# ..............................
[docs] def annotate_tree_tips(
self, attribute_name, annotation_pairs, label_attribute='label', update=False
):
"""Annotates the tips of the tree.
Deprecated:
Update to use annotate_tree.
Args:
attribute_name (:obj:`str`): The name of the annotation attribute to add.
annotation_pairs (:obj:`dict`): A dictionary of label keys with annotation
values.
label_attribute (:obj:`str`, optional): If this is provided, use this
annotation attribute as the key instead of the label. Defaults to
'label'.
update (:obj:`bool`, optional): Defaults to False. Indicates if existing
annotations should be updated.
"""
label_method = self._get_label_method(label_attribute)
for taxon in self.taxon_namespace:
try:
# label = getattr(taxon, labelAttribute)
label = label_method(taxon)
if taxon.annotations.get_value(attribute_name) is not None:
if update:
# Remove existing values
for ann in taxon.annotations.findall(name=attribute_name):
taxon.annotations.remove(ann)
# Set new value
taxon.annotations.add_new(
attribute_name, annotation_pairs[label]
)
else:
taxon.annotations.add_new(attribute_name, annotation_pairs[label])
except KeyError:
# Pass if label is not found in the dictionary, otherwise fail
pass
# ..............................
[docs] def get_annotations(self, annotation_attribute):
"""Gets a list of (label, annotation) pairs.
Args:
annotation_attribute (:obj:`str`): The annotation attribute to retrieve.
Returns:
list: A list of annotations.
"""
annotations = []
for taxon in self.taxon_namespace:
att = taxon.annotations.get_value(annotation_attribute)
annotations.append((taxon.label, att))
return annotations
# ..............................
[docs] def get_distance_matrix(self, label_attribute='label', ordered_labels=None):
"""Gets a Matrix object of phylogenetic distances.
Get a Matrix object of phylogenetic distances between tips using a
lower memory footprint.
Args:
label_attribute (:obj:`str`, optional): The attribute of the tips to use as
labels for the matrix. Defaults to 'label'.
ordered_labels (:obj:`list` of :obj:`str`, optional): If provided, use this
order of labels.
Returns:
Matrix: A distance matrix from each tip to each of the other tips
in the tree.
"""
label_method = self._get_label_method(label_attribute)
# Get list of labels
if ordered_labels is None:
ordered_labels = []
for taxon in self.taxon_namespace:
ordered_labels.append(label_method(taxon))
label_lookup = {ordered_labels[i]: i for i in range(len(ordered_labels))}
dist_mtx = np.zeros((len(ordered_labels), len(ordered_labels)), dtype=float)
# Build path lookup dictionary
path_lookups = {}
for taxon in self.taxon_namespace:
test_node = self.find_node_for_taxon(taxon)
edge_lengths = []
while test_node is not None:
if test_node.edge_length is not None:
# If this is still too big, drop id and do some logic with
# lengths
edge_lengths.append((id(test_node), test_node.edge_length))
test_node = test_node.parent_node
path_lookups[taxon.label] = edge_lengths
num_taxa = len(self.taxon_namespace)
for i_1 in range(num_taxa - 1):
taxon1 = self.taxon_namespace[i_1]
label = label_method(taxon1)
# Check for matrix index
idx1 = label_lookup[label]
# Build path to root for taxon 1
# path_labels = []
o_dist = 0.0
t_path = path_lookups[taxon1.label]
t_labels = []
for label, p_dist in t_path:
o_dist += p_dist
t_labels.append(label)
for i_2 in range(i_1, num_taxa):
taxon2 = self.taxon_namespace[i_2]
idx2 = label_lookup[label_method(taxon2)]
# Initialize distance for these two taxa
dist = o_dist
# Loop through path back to root
t2_path = path_lookups[taxon2.label]
for label, p_dist in t2_path:
if label in t_labels:
dist -= p_dist
else:
dist += p_dist
# mrca = pdm.mrca(taxon1, taxon2)
# dist = pdm.patristic_distance(taxon1, taxon2)
dist_mtx[idx1, idx2] = dist
dist_mtx[idx2, idx1] = dist
distance_matrix = Matrix(
dist_mtx, headers={'0': ordered_labels, '1': ordered_labels}
)
return distance_matrix
# ..............................
[docs] def get_distance_matrix_dendropy(
self, label_attribute='label', ordered_labels=None
):
"""Gets a Matrix object of phylogenetic distances between tips.
Gets the distance matrix between each tip using Dendropy.
Args:
label_attribute (:obj:`str`, optional): The attribute of the tips to use as
labels for the matrix. Defaults to 'label'.
ordered_labels (:obj:`list` of :obj:`str`, optional): If provided, use this
order of labels.
Note:
This method may require a significant amount of memory for large trees.
The `get_distance_matrix` method has a smaller memory footprint and
works at nearly the same speed.
Returns:
Matrix: A distance matrix from each tip to each of the other tips in the
tree.
"""
label_method = self._get_label_method(label_attribute)
# Get list of labels
if ordered_labels is None:
ordered_labels = []
for taxon in self.taxon_namespace:
ordered_labels.append(label_method(taxon))
label_lookup = {ordered_labels[i]: i for i in range(len(ordered_labels))}
dist_mtx = np.zeros((len(ordered_labels), len(ordered_labels)), dtype=float)
pdm = self.phylogenetic_distance_matrix()
for taxon1 in self.taxon_namespace:
label = label_method(taxon1)
# Check for matrix index
idx1 = label_lookup[label]
for taxon2 in self.taxon_namespace:
idx2 = label_lookup[label_method(taxon2)]
# mrca = pdm.mrca(taxon1, taxon2)
dist = pdm.patristic_distance(taxon1, taxon2)
dist_mtx[idx1, idx2] = dist
distance_matrix = Matrix(
dist_mtx, headers={'0': ordered_labels, '1': ordered_labels}
)
return distance_matrix
# ..............................
[docs] def get_labels(self):
"""Gets tip labels for a clade.
Note:
Bottom-up order.
Returns:
A list of taxon labels for the taxa in the tree.
"""
labels = []
for taxon in self.taxon_namespace:
labels.append(taxon.label)
labels.reverse()
return labels
# ..............................
[docs] def get_variance_covariance_matrix(
self, label_attribute='label', ordered_labels=None
):
"""Gets a Matrix object of variance / co-variance for tips in tree.
Args:
label_attribute (:obj:`str`, optional): The attribute of the tips to use as
labels for the matrix. Defaults to 'label'.
ordered_labels (:obj:`list` of :obj:`str`, optional): If provided, use this
order of labels.
Returns:
Matrix: A matrix of variance / co-variance values for the tips in
the tree.
Raises:
LmTreeException: If the tree does not have branch lengths.
"""
if not self.has_branch_lengths():
raise LmTreeException('Cannot create VCV without branch lengths')
label_method = self._get_label_method(label_attribute)
# Get list of labels
if ordered_labels is None:
ordered_labels = []
for taxon in self.taxon_namespace:
ordered_labels.append(label_method(taxon))
label_lookup = {ordered_labels[i]: i for i in range(len(ordered_labels))}
n = len(ordered_labels)
vcv = np.zeros((n, n), dtype=float)
edges = []
for edge in self.postorder_edge_iter():
edges.append(edge)
edges.reverse()
for edge in edges:
# TODO: Evaluate if el assignment can throw error
el = edge.head_node.distance_from_root()
if el is not None:
child_nodes = edge.head_node.child_nodes()
if len(child_nodes) == 0:
idx = label_lookup[label_method(edge.head_node.taxon)]
vcv[idx, idx] = edge.head_node.distance_from_root()
else:
left_child, right_child = edge.head_node.child_nodes()
left_tips = [
label_lookup[label_method(tip_node.taxon)]
for tip_node in left_child.leaf_nodes()
]
right_tips = [
label_lookup[label_method(tip_node.taxon)]
for tip_node in right_child.leaf_nodes()
]
# if len(leftTips) > 1 and len(rightTips) > 1:
for left in left_tips:
for right in right_tips:
vcv[left, right] = vcv[right, left] = el
for node in self.leaf_nodes():
idx = label_lookup[label_method(node.taxon)]
vcv[idx, idx] = node.distance_from_root()
vcv_matrix = Matrix(vcv, headers={'0': ordered_labels, '1': ordered_labels})
return vcv_matrix
# ..............................
[docs] def has_branch_lengths(self):
"""Returns a boolean indicating if the entire tree has branch lengths.
Returns:
bool: An indication if the tree has branch lengths.
"""
try:
self.minmax_leaf_distance_from_root()
return True
except Exception:
return False
# ..............................
[docs] def has_polytomies(self):
"""Returns boolean indicating if the tree has polytomies.
Returns:
bool: An indication if the tree has any polytomies.
"""
for n in self.nodes():
if len(n.child_nodes()) > 2:
return True
return False
# ..............................
[docs] def is_binary(self):
"""Checks if the tree is binary.
Returns:
bool: An indication if the tree is binary.
Note:
* Checks that every clade has either zero or two children.
"""
for n in self.nodes():
if not len(n.child_nodes()) in [0, 2]:
return False
return True
# ..............................
[docs] def is_ultrametric(self, rel_tol=1e-03):
"""Checks if the tree is ultrametric.
Args:
rel_tol (:obj:`float`): The relative tolerance to determine if the min and
max are equal. We will say they are equal if they are 99.9%.
Returns:
bool: Returns true if the distance from the root to each tip is the
same (within the tolerance interval).
Note:
* To be ultrametric, the branch length from root to tip must be
equal for all tips.
"""
try:
min_bl, max_bl = self.minmax_leaf_distance_from_root()
return bool(np.isclose(min_bl, max_bl, rtol=rel_tol))
except TypeError:
pass
return False
# ..............................
[docs] def prune_tips_without_attribute(self, search_attribute=PhyloTreeKeys.MTX_IDX):
"""Prunes the tree of any tips that don't have the specified attribute.
Args:
search_attribute (:obj:`str`, optional): The attribute to look for when
pruning tips in the tree. Defaults to PhyloTreeKeys.MTX_IDX.
"""
prune_taxa = []
for taxon in self.taxon_namespace:
val = taxon.annotations.get_value(search_attribute)
if val is None:
prune_taxa.append(taxon)
self.prune_taxa(prune_taxa)
self.purge_taxon_namespace()
# ..............................
[docs] def _annotation_method(self, label_attribute):
"""Use the label attribute as the node label.
Args:
label_attribute (:obj:`str`): The annotation to use as the label for the
nodes in the tree.
Returns:
Method: A method for retrieving the label for a taxon.
"""
def label_method(taxon):
"""Get the label of a taxon.
Args:
taxon (:obj:`Taxon`): A taxon object to get the label for.
Returns:
str: The taxon's label.
"""
return taxon.annotations.get_value(label_attribute)
return label_method
# ..............................
[docs] def _annotate_node(
self, node, annotation_attribute, annotation_value, update=False
):
"""Annotates a node with the given value.
Args:
node (:obj:`Node`): A node to add an annotation to.
annotation_attribute (:obj:`str`): The annotation attribute to add. If
None or 'label', update the node label.
annotation_value (:obj:`object`): The value of the annotation.
update (:obj:`bool`, optional): If True, update existing attribute.
Defaults to False.
"""
if annotation_attribute is None or annotation_attribute.lower() == 'label':
try:
node.taxon.label = annotation_value
except Exception:
node.label = annotation_value
else:
if node.annotations.get_value(annotation_attribute) is not None:
if update:
# Remove existing annotations
for ann in node.annotations.findall(name=annotation_attribute):
node.annotations.remove(ann)
node.annotations.add_new(annotation_attribute, annotation_value)
else:
node.annotations.add_new(annotation_attribute, annotation_value)
# ..............................
[docs] def _get_label_method(self, label_attribute):
"""Gets the function to be used for retrieving labels.
Args:
label_attribute (:obj:`str`): An annotation name, 'label', or None used to
determine which method to use to retrieve the label of a node.
Returns:
Function for labeling nodes.
"""
if label_attribute is None or label_attribute.lower() == 'label':
return self._label_method
return self._annotation_method(label_attribute)
# ..............................
[docs] def _label_tree_nodes(self, node, i, prefix=None, overwrite=False):
"""Private function to do the work when labeling nodes.
Args:
node (:obj:`Node`): A node to label.
i (:obj:`int`): A count of the number of previously labeled nodes.
prefix (:obj:`str`, optional): A prefix to use when labeling nodes
resulting in labels like 'prefix_0'. Defaults to None and no prefix.
overwrite (:obj:`bool`, optional): Should node labels be overwritten.
Defaults to False.
Returns:
int: The number of nodes already labeled in the tree.
Note:
* Recursive.
"""
cn = node.child_nodes()
# If we have children, label and recurse
if len(cn) > 0:
if node.label is None or overwrite:
if prefix is not None:
node.label = '{}{}'.format(prefix, i)
else:
node.label = str(i)
i += 1
# Loop through children and label nodes
for child in cn:
i = self._label_tree_nodes(child, i, prefix=prefix, overwrite=overwrite)
# Return the current i value
return i
# ..............................
@staticmethod
[docs] def _label_method(node):
"""Use the label of the node or taxon for the label.
Args:
node (:obj:`Node`): The node to get the label for.
Returns:
str: If the node or the node's taxon has a label, return it.
None: If the node and it's taxon do not have labels.
"""
# If the node (or taxon) has a label, return it
if node.label is not None:
return node.label
# Try to return the label of the taxon object if it has one
try:
return node.taxon.label
except Exception:
# Fall back to returning None
return None
# .............................................................................
# Set the module for the classes to be lmpy
PhyloTreeKeys.__module__ = 'lmpy'
TreeWrapper.__module__ = 'lmpy'
__all__ = ['LmTreeException', 'PhyloTreeKeys', 'TreeWrapper']