Source code for pygfa.graph_element.parser.segment

import re

from pygfa.graph_element.parser import line, field_validator as fv

[docs]def is_segmentv1(line_repr):
    """Check wether a given gfa line string probably belongs to a
    Segment of the first GFA version.

    :param line_repr: A string or a Line that is supposed to
        represent an S line.
    """
    try:
        if isinstance(line_repr, str):
            fields = re.split("\t", line_repr)
            if re.fullmatch(fv.DATASTRING_VALIDATION_REGEXP[fv.GFA1_SEQUENCE], \
                            fields[2]) \
           and fields[0] == 'S':
                return True
        else:
            return line_repr.type == 'S' and line_repr.fields['name'] != None

    except: pass
    return False


[docs]def is_segmentv2(line_repr):
    """Check wether a given string or line belongs to a Segment of
    the second GFA version.

    :param line_repr: A string or a Line that is supposed to represent
        an S line.
    """
    try:
        if isinstance(line_repr, str):
            fields = re.split("\t", line_repr)
            if re.fullmatch(fv.DATASTRING_VALIDATION_REGEXP[fv.GFA2_POSITION], \
                            fields[2]) \
               and fields[0] == 'S':
                return True
        else:
            return line_repr.type == 'S' and line_repr.fields['sid'] != None
    except: pass
    return False

[docs]class SegmentV1(line.Line):
    """A GFA1 Segment line.
    """
    def __init__(self):
        super().__init__('S')

    REQUIRED_FIELDS = { \
    'name' : fv.GFA1_NAME, \
    'sequence' : fv.GFA1_SEQUENCE \
    }

    PREDEFINED_OPTFIELDS = { \
    'LN' : fv.TYPE_i, \
    'RC' : fv.TYPE_i, \
    'FC' : fv.TYPE_i, \
    'KC' : fv.TYPE_i, \
    'SH' : fv.HEX_BYTE_ARRAY, \
    'UR' : fv.TYPE_Z \
    }

    @classmethod
[docs]    def from_string(cls, string):
        """Extract the segment fields from the string.

        The string can contains the S character at the begin
        or can only contains the fields of the segment directly.
        """
        if len(string.split()) == 0:
            raise line.InvalidLineError("Cannot parse the empty string.")
        fields = re.split('\t', string)
        sfields = []
        if fields[0] == 'S':
            fields = fields[1:]

        if len(fields) < len(cls.REQUIRED_FIELDS):
            raise line.InvalidLineError("The minimum number of field for "
                                        + "SegmentV1 line is not reached.")
        segment = SegmentV1()
        name_f = fv.validate(fields[0], cls.REQUIRED_FIELDS['name'])
        sfields.append(line.Field('name', name_f))
        seq_f = fv.validate(fields[1], cls.REQUIRED_FIELDS['sequence'])
        sfields.append(line.Field('sequence', seq_f))

        for field in fields[2:]:
            sfields.append(line.OptField.from_string(field))

        for field in sfields:
            segment.add_field(field)
        return segment


[docs]class SegmentV2(line.Line):
    """A GFA2 Segment line.
    """
    def __init__(self):
        super().__init__('S')

    REQUIRED_FIELDS = { \
    'sid' : fv.GFA2_ID, \
    'slen' : fv.GFA2_INT, \
    'sequence' : fv.GFA2_SEQUENCE \
    }

    @classmethod
[docs]    def from_string(cls, string):
        """Extract the segment fields from the string.

        The string can contains the S character at the begin or can
        only contains the fields of the segment directly."""
        if len(string.split()) == 0:
            raise line.InvalidLineError("Cannot parse the empty string.")
        fields = re.split('\t', string)
        sfields = []
        if fields[0] == 'S':
            fields = fields[1:]

        if len(fields) < len(cls.REQUIRED_FIELDS):
            raise line.InvalidLineError("The minimum number of field for "
                                        + "SegmentV2 line is not reached.")
        segment = SegmentV2()
        sid_f = fv.validate(fields[0], cls.REQUIRED_FIELDS['sid'])
        sfields.append(line.Field('sid', sid_f))
        slen_f = fv.validate(fields[1], cls.REQUIRED_FIELDS['slen'])
        sfields.append(line.Field('slen', slen_f))
        sequence_f = fv.validate(fields[2], cls.REQUIRED_FIELDS['sequence'])
        sfields.append(line.Field('sequence', sequence_f))

        for field in fields[3:]:
            sfields.append(line.OptField.from_string(field))

        for field in sfields:
            segment.add_field(field)
        return segment


if __name__ == '__main__': # pragma: no cover
    pass