Source code for iCount.files

""".. Line to protect from pydocstyle D205, D400.

Files
=====

iCount works with various formats that store `FASTA`_ and `FASTQ`_ sequencing data, `GTF`_ genome
annotation, `BAM`_ data on mapped reads, `BED`_ files with quantified cross-linked sites.
Parsing of `GTF`_ files is done with `pybedtools`_.

.. autofunction:: iCount.files.gz_open
.. autofunction:: iCount.files.decompress_to_tempfile

.. automodule:: iCount.files.bed
   :members:

.. automodule:: iCount.files.bedgraph
   :members:

.. automodule:: iCount.files.fastq
   :members:

.. automodule:: iCount.files.fasta
   :members:


.. _FASTA:
    https://en.wikipedia.org/wiki/FASTA_format

.. _FASTQ:
    https://en.wikipedia.org/wiki/FASTQ_format

.. _GTF:
    http://www.gencodegenes.org/gencodeformat.html

.. _BAM:
    https://samtools.github.io/hts-specs/SAMv1.pdf

.. _BED:
    http://bedtools.readthedocs.io/en/latest/content/general-usage.html#bed-format

.. _pybedtools:
    https://daler.github.io/pybedtools/index.html

"""

import os
import gzip
import tempfile
import shutil

import iCount

from . import bed
from . import bedgraph
from . import fasta
from . import fastq


[docs]def gz_open(fname, mode): """ Use :py:mod:`gzip` library to open compressed files ending with .gz. Parameters ---------- fname : str Path to file to open. omode : str String indicating how the file is to be opened. Returns ------- file File Object. """ if 'r' in mode and not os.path.isfile(fname): raise FileNotFoundError('File not found.') if fname.endswith('.gz'): return gzip.open(fname, mode) else: return open(fname, mode)
[docs]def decompress_to_tempfile(fname, context='misc'): """ Decompress files ending with .gz to a temporary file and return filename. If file does nto end with .gz, juts return fname. Parameters ---------- fname : str Path to file to open. context : str Name of temporary subfolder where temporary file is created. Returns ------- str Path to decompressed file. """ if fname.endswith('.gz'): tmp_dir = os.path.join(iCount.TMP_ROOT, context) if not os.path.exists(tmp_dir): os.makedirs(tmp_dir) suffix = '_{:s}'.format(os.path.basename(fname)) fout = tempfile.NamedTemporaryFile(suffix=suffix, dir=tmp_dir, delete=False) fin = gzip.open(fname, 'r') shutil.copyfileobj(fin, fout) fin.close() fout.close() return fout.name return fname
def get_temp_file_name(tmp_dir=None, extension=''): """Return an availiable name for temporary file.""" if tmp_dir is None: tmp_dir = iCount.TMP_ROOT # pylint: disable=protected-access tmp_name = next(tempfile._get_candidate_names()) if not tmp_dir: # pylint: disable=protected-access tmp_dir = tempfile._get_default_tempdir() if extension is not None: tmp_name = tmp_name + '.' + extension return os.path.join(tmp_dir, tmp_name) def _f2s(number, dec=4): """ Return string representation of ``number``. Returned string is: * without trailing decimal zeros, * with at most ``dec`` decimal places. """ if not isinstance(number, (int, float)): return number return '{{:.{:d}f}}'.format(dec).format(number).rstrip('0').rstrip('.') def remove_extension(fname, extensions): """Remove filename extension.""" name = os.path.basename(fname) for extension in sorted(extensions, key=lambda x: len(x), reverse=True): # pylint: disable=unnecessary-lambda if extension and name.endswith(extension): name = name[:-len(extension)] break return name