Source code for cap2.pipeline.assembly.metaspades


import luigi
import shutil
from os.path import join, dirname, basename, isdir
from shutil import rmtree

from ..utils.cap_task import CapTask
from ..config import PipelineConfig
from ..utils.conda import CondaPackage
from ..preprocessing import CleanReads


[docs]class MetaspadesAssembly(CapTask): module_description = """ This module assembles reads into contigs. Motivation: Assembly can help to find large order genetic structures. Negatives: assembly is consistently being refined to be more efficient and effective. Misassemblies are possible. """ def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) self.pkg = CondaPackage( package="spades", executable="metaspades.py", channel="bioconda", config_filename=self.config_filename ) self.reads = CleanReads( sample_name=self.sample_name, pe1=self.pe1, pe2=self.pe2, config_filename=self.config_filename ) self.config = PipelineConfig(self.config_filename) self.exc = self.pkg.bin if self.config.exc_metaspades is not None: self.exc = self.config.exc_metaspades def tool_version(self): return self.run_cmd(f'{self.pkg.bin} --version').stderr.decode('utf-8') @classmethod def _module_name(cls): return 'metaspades' def requires(self): return self.pkg, self.reads @classmethod def version(cls): return 'v0.3.0' @classmethod def dependencies(cls): return ['spades', CleanReads] def output(self): return { 'contigs': self.get_target('contigs', 'fasta'), 'contig_paths': self.get_target('contigs', 'paths'), 'scaffolds_fasta': self.get_target('scaffolds', 'fasta'), 'scaffolds_paths': self.get_target('scaffolds', 'paths'), 'fastg': self.get_target('graph', 'fastg'), } def _run(self): out_dir = f'{self.out_dir}/tmp_metaspades_out.{self.sample_name}' if isdir(out_dir): shutil.rmtree(out_dir) cmd = ''.join(( self.exc, ' --only-assembler ', # we start from error corrected reads ' -1 ', self.reads.output()["clean_reads_1"].path, ' -2 ', self.reads.output()["clean_reads_2"].path, f' -t {self.cores} ', ' -m 200 ', f' -o {out_dir}' )) self.run_cmd(cmd) pairs_to_move = [ ('contigs.fasta', 'contigs'), ('scaffolds.fasta', 'scaffolds_fasta'), ('scaffolds.paths', 'scaffolds_paths'), ('assembly_graph.fastg', 'fastg'), ('contigs.paths', 'contig_paths'), ] out = self.output() for cur, new in pairs_to_move: cur = f'{out_dir}/{cur}' new = out[new].path self.run_cmd(f'mv {cur} {new}') rmtree(out_dir)