Source code for cap2.pipeline.databases.mouse_removal_db


import luigi
from os.path import join, dirname
from glob import glob
import subprocess
from os import makedirs

from ..config import PipelineConfig
from ..utils.conda import CondaPackage
from ..utils.cap_task import CapDbTask

MOUSE_GENOME_URL = 'ftp://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/000/001/635/GCA_000001635.9_GRCm39/GCA_000001635.9_GRCm39_genomic.fna.gz'


[docs]class MouseRemovalDB(CapDbTask):
    """This class is responsible for building and/or retriveing
    validating the database which will be used to remove mouse
    reads from the sample.
    """
    config_filename = luigi.Parameter()
    cores = luigi.IntParameter(default=1)
    MODULE_VERSION = 'v1.0.0'

    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)
        self.pkg = CondaPackage(
            package="bowtie2==2.4.1",
            executable="bowtie2-build",
            channel="bioconda",
            config_filename=self.config_filename,
        )
        self.config = PipelineConfig(self.config_filename)
        self.db_dir = self.config.db_dir
        self._fastas = []

    def download_mouse_genome(self):
        local_dir = join(self.config.db_dir, 'GRCm39')
        makedirs(local_dir, exist_ok=True)
        cmd = (
            'wget '
            f'--directory-prefix={local_dir} '
            f'{MOUSE_GENOME_URL} '
        )
        self.run_cmd(cmd)
        local_path = join(local_dir, 'GCA_000001635.9_GRCm39_genomic.fna.gz')
        return local_path

    @property
    def fastas(self):
        if self._fastas:
            return self._fastas
        local_path = self.download_mouse_genome()
        self._fastas = [local_path]
        return self._fastas

    def tool_version(self):
        return self.run_cmd(f'{self.pkg.bin} --version').stderr.decode('utf-8')

    def requires(self):
        return self.pkg

    @classmethod
    def _module_name(cls):
        return 'bowtie_mouse_removal_db'

    @classmethod
    def dependencies(cls):
        return ['bowtie2==2.4.1', 'GRCm39', '2020-10-21']

    @property
    def bowtie2_index(self):
        return join(self.db_dir, 'GRCm39', 'mouse_removal.bt2')

    def output(self):
        index = luigi.LocalTarget(self.bowtie2_index + '.1.bt2')
        index.makedirs()
        return {
            'bt2_index_1': index,
        }

    def build_bowtie2_index_from_fasta(self):
        cmd = ''.join((
            self.pkg.bin,
            f' --threads {self.cores} ',
            ','.join(self.fastas),
            ' ',
            self.bowtie2_index
        ))
        subprocess.check_call(cmd, shell=True)

    def download_bowtie2_index_from_s3(self):
        paths = [
            'cap2/databases/2020-06-08/hg38/hg38.fa.gz',
            'cap2/databases/2020-06-08/hg38/human_removal.bt2.1.bt2',
            'cap2/databases/2020-06-08/hg38/human_removal.bt2.2.bt2',
            'cap2/databases/2020-06-08/hg38/human_removal.bt2.3.bt2',
            'cap2/databases/2020-06-08/hg38/human_removal.bt2.4.bt2',
            'cap2/databases/2020-06-08/hg38/human_removal.bt2.rev.1.bt2',
            'cap2/databases/2020-06-08/hg38/human_removal.bt2.rev.2.bt2',
        ]
        for path in paths:
            cmd = (
                'wget '
                f'--directory-prefix={dirname(self.output()["bt2_index_1"].path)} '
                f'https://s3.wasabisys.com/metasub-microbiome/{path} '

            )
            self.run_cmd(cmd)

    def run(self):
        self.build_bowtie2_index_from_fasta()
Source code for cap2.pipeline.databases.mouse_removal_db

MetaSUB CAP

Navigation

Related Topics