Source code for cap2.pipeline.preprocessing.map_to_mouse


import luigi
import subprocess
from os.path import join, dirname, basename

from .remove_adapters import AdapterRemoval
from ..utils.cap_task import CapTask
from ..config import PipelineConfig
from ..utils.conda import CondaPackage
from ..databases.mouse_removal_db import MouseRemovalDB


[docs]class RemoveMouseReads(CapTask): module_description = """ This module removes reads likely to be mouse. Motivation: as a common lab organism mouse DNA is usually a contaminant even if a sample is not from a mouse DNA that maps to the mouse genome is probably mammalian contaminant of some kind. Negatives: in some samples the sequences that resemble mouse DNA may actually be microbial though in most cases part of the microbial genome will not resemble mouse. Removing mouse DNA may obscure human DNA in the sample. """ MODULE_VERSION = 'v0.1.0' def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) self.pkg = CondaPackage( package="bowtie2", executable="bowtie2", channel="bioconda", config_filename=self.config_filename, ) self.samtools = CondaPackage( package="samtools=1.09", executable="samtools", channel="bioconda", config_filename=self.config_filename, ) self.config = PipelineConfig(self.config_filename) self.out_dir = self.config.out_dir self.db = MouseRemovalDB(config_filename=self.config_filename) self.adapter_removed_reads = AdapterRemoval.from_cap_task(self) def requires(self): return self.samtools, self.pkg, self.db, self.adapter_removed_reads def tool_version(self): version = '[BOWTIE2]\n' version += self.run_cmd(f'{self.pkg.bin} --version').stderr.decode('utf-8') version += '\n[SAMTOOLS]\n' version += self.run_cmd(f'{self.samtools.bin} --version').stderr.decode('utf-8') return version @classmethod def dependencies(cls): return ["samtools", "bowtie2", MouseRemovalDB, AdapterRemoval] @classmethod def _module_name(cls): return 'remove_mouse' def output(self): out = { 'bam': self.get_target('human_alignment', 'bam'), 'nonmouse_reads_1': self.get_target('nonmouse_reads', 'R1.fastq.gz'), } if self.paired: out['nonmouse_reads_2'] = self.get_target('nonmouse_reads', 'R2.fastq.gz') return out def _run(self): if self.paired: return self._run_paired() return self._run_single() def _run_single(self): fastq_out = self.output()['nonmmouse_reads_1'].path.replace('R1', 'R%') cmd = ''.join(( self.pkg.bin, ' -x ', self.db.bowtie2_index, ' -U ', self.adapter_removed_reads.output()['adapter_removed_reads_1'].path, f' --un-gz ', self.output()['nonmmouse_reads_1'].path, ' --threads ', str(self.cores), ' --very-sensitive ', f' | {self.samtools.bin} view -F 4 -b > ', self.output()['bam'].path, )) self.run_cmd(cmd) def _run_paired(self): fastq_out = self.output()['nonmouse_reads_1'].path.replace('R1', 'R%') cmd = ''.join(( self.pkg.bin, ' -x ', self.db.bowtie2_index, ' -1 ', self.adapter_removed_reads.output()['adapter_removed_reads_1'].path, ' -2 ', self.adapter_removed_reads.output()['adapter_removed_reads_2'].path, f' --un-conc-gz {fastq_out} ', ' --threads ', str(self.cores), ' --very-sensitive ', f' | {self.samtools.bin} view -F 4 -b > ', self.output()['bam'].path, )) self.run_cmd(cmd)