Source code for cap2.pipeline.short_read.humann2


import luigi
import subprocess
from os.path import join, dirname, basename

from ..utils.cap_task import CapTask
from ..config import PipelineConfig
from ..utils.conda import CondaPackage, PyPiPackage
from ..databases.uniref import Uniref90, HumannIdTable
from ..preprocessing.clean_reads import CleanReads


[docs]class MicaUniref90(CapTask):
    module_description = """
    This module aligns reads to UniRef90 in
    preparation for functional profiling.

    Motivation: UniRef90 is a large database of functional genes. 

    Negatives: Fairly slow. Many of the genes in UniRef90 are
    merely predicted functions.

    Note: this module currently uses Diamond not MiCA
    """
    MODULE_VERSION = 'v0.2.0'

    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)
        self.pkg = CondaPackage(
            package="diamond",
            executable="diamond",
            channel="bioconda",
            config_filename=self.config_filename,
        )
        self.config = PipelineConfig(self.config_filename)
        self.out_dir = self.config.out_dir
        self.db = Uniref90(config_filename=self.config_filename)
        self.reads = CleanReads.from_cap_task(self)

    def tool_version(self):
        return self.run_cmd(f'{self.pkg.bin} --version').stderr.decode('utf-8')

    def requires(self):
        return self.pkg, self.db, self.reads

    @classmethod
    def _module_name(cls):
        return 'diamond'

    @classmethod
    def dependencies(cls):
        return ['humann==3.0.0a4', 'diamond==0.9.32', Uniref90, CleanReads]

    def output(self):
        return {
            'm8': self.get_target('uniref90', 'm8.gz'),
        }

    def _run(self):
        cmd = (
            f'{self.pkg.bin} blastx '
            f'--threads {self.cores} '
            f'-d {self.db.diamond_index} '
            f'-q {self.reads.output()["clean_reads_1"].path} '
            '--block-size 6 '
            f'| gzip > {self.output()["m8"].path} '
        )
        self.run_cmd(cmd)


[docs]class Humann2(CapTask):
    module_description = """
    This module provides functional profiles of microbiomes.

    Motivation: Functional profiles are sometimes more stable than
    taxonomic profiles and can provided metabolic and adaptive
    insights. 

    Negatives: Functional profiling is somewhat less well benchmarked
    than taxonomic profiling and metabolic pathways are often based on
    model organisms.

    Notes: This class is named Humann2 for historical reasons. It uses
    humann3
    """
    MODULE_VERSION = 'v0.3.0'

    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)
        self.config = PipelineConfig(self.config_filename)
        self.humann = PyPiPackage(
            package="humann==3.0.0a4",
            executable="humann",
            config_filename=self.config_filename,
        )
        self.db = HumannIdTable.from_cap_task(self)
        self.out_dir = self.config.out_dir
        self.alignment = MicaUniref90.from_cap_task(self)

    def tool_version(self):
        return self.run_cmd(f'{self.pkg.bin} --version').stderr.decode('utf-8')

    def requires(self):
        return self.humann, self.alignment, self.db

    @classmethod
    def _module_name(cls):
        return 'humann'

    @classmethod
    def dependencies(cls):
        return ['humann==3.0.0a4', MicaUniref90, HumannIdTable]

    def output(self):
        return {
            'genes': self.get_target('genes', 'tsv'),
            'path_abunds': self.get_target('path_abunds', 'tsv'),
            'path_covs': self.get_target('path_covs', 'tsv'),
        }

    def _run(self):
        odir = self.sample_name + '_humann2'
        genes = odir + '/*genefamilies.tsv'
        abunds = odir + '/*pathabundance.tsv'
        covs = odir + '/*pathcoverage.tsv'
        cmd = (
            f'{self.humann.bin} '
            f'--id-mapping {self.db.humann_id_table} '
            f'--input {self.alignment.output()["m8"].path} '
            f'--output {self.sample_name}_humann2 ; '
            'mv ' + genes + ' ' + self.output()['genes'].path + '; '
            'mv ' + abunds + ' ' + self.output()['path_abunds'].path + '; '
            'mv ' + covs + ' ' + self.output()['path_covs'].path + '; '
            f'rm -r {self.sample_name}_humann2;'
        )
        self.run_cmd(cmd)
Source code for cap2.pipeline.short_read.humann2

MetaSUB CAP

Navigation

Related Topics