Source code for iCount.externals.cutadapt
""".. Line to protect from pydocstyle D205, D400.
Cutadapt
--------
Remove adapter sequences from reads in FASTQ file.
"""
import os
import shutil
import subprocess
import tempfile
import iCount
from iCount.files.fastq import get_qual_encoding, ENCODING_TO_OFFSET
[docs]def get_version():
"""Get cutadapt version."""
args = ['cutadapt', '--version']
try:
ver = subprocess.check_output(args, shell=False, universal_newlines=True)
return str(ver).rstrip('\n\r')
except (FileNotFoundError, subprocess.CalledProcessError):
return None
[docs]def run(reads, adapter, reads_trimmed=None, overwrite=False, qual_trim=None, minimum_length=None, overlap=None,
untrimmed_output=None, error_rate=None):
"""
Remove adapter sequences from high-throughput sequencing reads.
Parameters
----------
reads : str
Input FASTQ file.
adapter : str
Sequence of an adapter ligated to the 3' end.
reads_trimmed : str
Output FASTQ file containing trimmed reads. If not provided
overwrite : bool
If true, overwrite input file (reads) with trimmed file.
qual_trim : int
Trim low-quality bases before adapter removal.
minimum_length : int
Discard trimmed reads that are shorter than `minimum_length`.
overlap : int
Require ``overlap`` overlap between read and adapter for an
adapter to be found.
untrimmed_output : str
Write reads that do not contain any adapter to this file.
error_rate : float
Maximum allowed error rate (no. of errors divided by the length
of the matching region).
Returns
-------
int
Return code of the `cutadapt` program.
"""
args = [
'cutadapt',
'--quiet',
'-a', adapter,
]
qual_base = ENCODING_TO_OFFSET.get(get_qual_encoding(reads), 33)
args.extend(['--quality-base={}'.format(qual_base)])
if reads_trimmed is None:
# Auto-generate output name:
extension = '.gz' if reads.endswith('.gz') else ''
name = next(tempfile._get_candidate_names()) + '.fq' + extension # pylint: disable=protected-access
reads_trimmed = os.path.join(iCount.TMP_ROOT, name)
if qual_trim is not None:
args.extend(['-q', '{:d}'.format(qual_trim)])
if minimum_length is not None:
args.extend(['-m', '{:d}'.format(minimum_length)])
if overlap is not None:
args.extend(['--overlap', '{:d}'.format(overlap)])
if untrimmed_output is not None:
args.extend(['--untrimmed-output', '{}'.format(untrimmed_output)])
if error_rate is not None:
args.extend(['--error-rate', '{}'.format(error_rate)])
args.extend(['-o', reads_trimmed, reads])
rcode = subprocess.call(args, shell=False)
if overwrite:
shutil.move(reads_trimmed, reads)
return rcode