Source code for iCount.files.bed

""".. Line to protect from pydocstyle D205, D400.


Reading and writing `BED`_ files.


import os
import shutil
import logging
import tempfile

import pybedtools

import iCount

LOGGER = logging.getLogger(__name__)

def _convert_legacy_bed_format(feature):

    Old iCount legacy format:
    chrome, start, end, [+-]score
    where +/- indicate strand of cross-link site and score indicates the
    intensity of interaction
    use BED6 format, see:
    chrom = feature.chrom
    start = feature.start
    end = feature.stop
    name = '.'
    if[0] == '-' or[0] == '+':
        score =[1:]
        strand =[0]
        score =
        strand = '+'
    return pybedtools.create_interval_from_list(
        [chrom, start, end, name, score, strand],

[docs]def convert_legacy(bedgraph_legacy, bed_converted): """ Convert legacy iCount's four-column format into proper BED6 format. Old iCount legacy format: chrome, start, end, [+-]value Strand can be either '+' or '-', and value indicates the intensity of interaction. The returned BED file follows the BED6 format, as explained in the [bedtools manual]( /general-usage.html). """ sites = pybedtools.BedTool(bedgraph_legacy).sort().saveas() sites1 = sites.each(_convert_legacy_bed_format).saveas(bed_converted) return sites1
[docs]def merge_bed(sites_grouped, sites): """ Merge multiple files with crosslinks into one. Concatenate files into one file. Also, merge crosslinks from different files that are on same position and sum their scores. Parameters ---------- sites_grouped : str Path to output BED6 file containing merged data from input sites files. sites : list_str List of BED6 files(paths) to be merged. Returns ------- str Absolute path to outfile. """ iCount.log_inputs(LOGGER, level=logging.INFO) if not sites: raise ValueError( "At least one element expected in files list, but none found.")'Reading input files...') joined = tempfile.NamedTemporaryFile(mode='at', delete=False) for file_path in sites: if not os.path.isfile(file_path): raise ValueError("File {} not found.".format(file_path)) with iCount.files.gz_open(file_path, 'rt') as infile: shutil.copyfileobj(infile, joined) joined.close() # Marge intervals in "joined" file (needs to be sorted before!): # s=True - only merge features that are on the same strand # d=-1 - join only intervals with at least one base-pair overlap - default # (0) merges also touching intervals # c=5, o='sum' - when merging intervals, make operation 'sum' on column 5 (score)'Merging files...') merged = pybedtools.BedTool( s=True, d=-1, c=5, o='sum').sort().saveas() # Columns are now shuffled to: chrom-start-stop-strand-score # Reorder to: chrom-start-stop-empty_name-score-strand # which corresponds to BED6'Saving results...') result = pybedtools.BedTool(pybedtools.create_interval_from_list( i[:3] + ['.', i[4], i[3]]) for i in merged).saveas() result.saveas(sites_grouped)'Done. Results saved to: %s', os.path.abspath(result.fn)) return os.path.abspath(result.fn)