from pygfa.dovetail_operations.components.connected import dovetails_nodes_connected_components
from pygfa.dovetail_operations.components.connected import dovetails_nodes_connected_component
from pygfa.dovetail_operations.components.connected import dovetails_connected_components_subgraphs
from pygfa.dovetail_operations.components.biconnected import dovetails_articulation_points
from pygfa.dovetail_operations.linear_paths import *
from pygfa.dovetail_operations.simple_paths import *
from ..operations import nodes_connected_component
[docs]def dovetails_remove_small_components(gfa_, min_length):
"""Remove all the connected components where
the sequences length is less than min_length.
Find all the connected components nodes,
for each component obtain the sum of the
sequences length.
If length is less than the given length remove the connected
component nodes.
:param min_length: An integer describing the required length
to keep a connected component.
:note:
When connected components are computed only dovetail overlaps
edges are considered.
"""
if min_length < 0:
raise ValueError("min_length must be >= 0")
for conn_comp in dovetails_nodes_connected_components(gfa_):
length = 0
for nid in conn_comp:
node_ = gfa_.node(nid)
try:
length += node_['slen']
except (TypeError, KeyError):
pass
if length < min_length:
for nid in conn_comp:
gfa_.remove_node(nid)
[docs]def dovetails_remove_dead_ends(\
gfa_, \
min_length, \
safe_remove=False):
"""Remove all the nodes where its right
degree and its left degree are the following (0,0), (1,0), (1,0)
and the length of the sequence is less than the given length.
The node to remove mustn't split its connected component
in two.
:param min_length:
:param consider_sequence: If set try to get the sequence length
where length field is not defined.
:param safe_remove: If set the operation doesn't remove nodes
where is not possible to obtain the length value.
:note:
Using the right and left degree, only dovetails overlaps
are considered.
"""
if min_length < 0:
raise ValueError("min_length must be >= 0")
art_points = set(dovetails_articulation_points(gfa_))
to_remove = set()
for nid, node_ in gfa_.nodes_iter(data=True):
left_deg = gfa_.left_degree(nid)
right_deg = gfa_.right_degree(nid)
if (left_deg, right_deg) in [(0,0), (0,1), (1,0)] \
and nid not in art_points:
try:
length = node_['slen']
if length is None:
length = 0
if length < min_length:
to_remove.add(nid)
except KeyError:
if not safe_remove:
to_remove.add(nid)
for nid in to_remove:
gfa_.remove_node(nid)