Source code for pygfa.graph_element.parser.segment

import re

from pygfa.graph_element.parser import line, field_validator as fv

[docs]def is_segmentv1(line_repr): """Check wether a given gfa line string probably belongs to a Segment of the first GFA version. :param line_repr: A string or a Line that is supposed to represent an S line. """ try: if isinstance(line_repr, str): fields = re.split("\t", line_repr) if re.fullmatch(fv.DATASTRING_VALIDATION_REGEXP[fv.GFA1_SEQUENCE], \ fields[2]) \ and fields[0] == 'S': return True else: return line_repr.type == 'S' and line_repr.fields['name'] != None except: pass return False
[docs]def is_segmentv2(line_repr): """Check wether a given string or line belongs to a Segment of the second GFA version. :param line_repr: A string or a Line that is supposed to represent an S line. """ try: if isinstance(line_repr, str): fields = re.split("\t", line_repr) if re.fullmatch(fv.DATASTRING_VALIDATION_REGEXP[fv.GFA2_POSITION], \ fields[2]) \ and fields[0] == 'S': return True else: return line_repr.type == 'S' and line_repr.fields['sid'] != None except: pass return False
[docs]class SegmentV1(line.Line): """A GFA1 Segment line. """ def __init__(self): super().__init__('S') REQUIRED_FIELDS = { \ 'name' : fv.GFA1_NAME, \ 'sequence' : fv.GFA1_SEQUENCE \ } PREDEFINED_OPTFIELDS = { \ 'LN' : fv.TYPE_i, \ 'RC' : fv.TYPE_i, \ 'FC' : fv.TYPE_i, \ 'KC' : fv.TYPE_i, \ 'SH' : fv.HEX_BYTE_ARRAY, \ 'UR' : fv.TYPE_Z \ } @classmethod
[docs] def from_string(cls, string): """Extract the segment fields from the string. The string can contains the S character at the begin or can only contains the fields of the segment directly. """ if len(string.split()) == 0: raise line.InvalidLineError("Cannot parse the empty string.") fields = re.split('\t', string) sfields = [] if fields[0] == 'S': fields = fields[1:] if len(fields) < len(cls.REQUIRED_FIELDS): raise line.InvalidLineError("The minimum number of field for " + "SegmentV1 line is not reached.") segment = SegmentV1() name_f = fv.validate(fields[0], cls.REQUIRED_FIELDS['name']) sfields.append(line.Field('name', name_f)) seq_f = fv.validate(fields[1], cls.REQUIRED_FIELDS['sequence']) sfields.append(line.Field('sequence', seq_f)) for field in fields[2:]: sfields.append(line.OptField.from_string(field)) for field in sfields: segment.add_field(field) return segment
[docs]class SegmentV2(line.Line): """A GFA2 Segment line. """ def __init__(self): super().__init__('S') REQUIRED_FIELDS = { \ 'sid' : fv.GFA2_ID, \ 'slen' : fv.GFA2_INT, \ 'sequence' : fv.GFA2_SEQUENCE \ } @classmethod
[docs] def from_string(cls, string): """Extract the segment fields from the string. The string can contains the S character at the begin or can only contains the fields of the segment directly.""" if len(string.split()) == 0: raise line.InvalidLineError("Cannot parse the empty string.") fields = re.split('\t', string) sfields = [] if fields[0] == 'S': fields = fields[1:] if len(fields) < len(cls.REQUIRED_FIELDS): raise line.InvalidLineError("The minimum number of field for " + "SegmentV2 line is not reached.") segment = SegmentV2() sid_f = fv.validate(fields[0], cls.REQUIRED_FIELDS['sid']) sfields.append(line.Field('sid', sid_f)) slen_f = fv.validate(fields[1], cls.REQUIRED_FIELDS['slen']) sfields.append(line.Field('slen', slen_f)) sequence_f = fv.validate(fields[2], cls.REQUIRED_FIELDS['sequence']) sfields.append(line.Field('sequence', sequence_f)) for field in fields[3:]: sfields.append(line.OptField.from_string(field)) for field in sfields: segment.add_field(field) return segment
if __name__ == '__main__': # pragma: no cover pass