Source code for iCount.files.fastq
""".. Line to protect from pydocstyle D205, D400.
FASTQ
-----
Reading and writting `FASTQ`_ files.
"""
import os
import iCount
ENCODING_TO_OFFSET = {
'S': 33,
'X': 64,
'I': 64,
'J': 64,
'L': 33,
}
[docs]def get_qual_encoding(fname):
"""
Read first few records and determine quality encoding in FASTQ file.
See format description:
`http://en.wikipedia.org/wiki/FASTQ_format`
S - Sanger Phred+33, raw reads typically (0, 40) [33..73]
X - Solexa Solexa+64, raw reads typically (-5, 40) [59..104]
I - Illumina 1.3+ Phred+64, raw reads typically (0, 40) [64..104]
J - Illumina 1.5+ Phred+64, raw reads typically (3, 40) [66..104]
L - Illumina 1.8+ Phred+33, raw reads typically (0, 41) [33..74]
"""
def get_encoding(quals, count=0, check_count=False):
"""Determine encoding from quality scores range."""
minq, maxq = ord(min(quals)), ord(max(quals))
if minq < 59:
if maxq > 73:
return 'L'
else:
return 'S'
elif 59 <= minq < 64 and maxq > 74:
return 'X'
elif minq >= 64 and maxq > 74:
if (check_count and count > 10000) or not check_count:
if minq < 66:
return 'I'
else:
return 'J'
quals = set()
for count, read in enumerate(FastqFile(fname).read()):
quals.update(set(read.qual))
if count % 10000 == 0:
encoding = get_encoding(quals, count, check_count=True)
if encoding:
return encoding
if quals:
return get_encoding(quals, check_count=False)
[docs]class FastqEntry:
"""Single FASTQ entry."""
def __init__(self, id, seq, plus, qual): # pylint: disable=redefined-builtin
"""Initialize attributes."""
self.id = str(id).split(' ')[0] # pylint: disable=invalid-name
self.seq = str(seq)
self.plus = str(plus)
self.qual = str(qual)
def __repr__(self):
"""Represent object."""
return self.id
[docs]class FastqFile:
"""Write FASTQ files."""
def __init__(self, fname, mode='rt'):
"""Open file handle in desired mode."""
self.fname = fname
if 'r' in mode and not os.path.isfile(fname):
self.file = None
raise FileNotFoundError('File not found.')
self.file = iCount.files.gz_open(fname, mode)
def __del__(self):
"""Close file."""
self.close()
[docs] def read(self):
"""Read FASTQ file."""
for read_id in self.file:
read_seq = next(self.file).rstrip('\n')
read_plus = next(self.file).rstrip('\n')
read_qual = next(self.file).rstrip('\n')
yield FastqEntry(read_id.rstrip('\n'), read_seq, read_plus, read_qual)
[docs] def write(self, fq_entry):
"""Write single FASTQ entry."""
content = [fq_entry.id, fq_entry.seq, fq_entry.plus, fq_entry.qual]
self.file.write('\n'.join(map(str, content)) + '\n')
[docs] def close(self):
"""Close file if it is stil open."""
if self.file and not self.file.closed:
self.file.close()