Source code for cap2.pipeline.preprocessing.fastqc


import luigi
import subprocess
from os.path import join, dirname, basename

from ..utils.cap_task import CapTask
from ..config import PipelineConfig
from ..utils.conda import CondaPackage
from .base_reads import BaseReads


[docs]class FastQC(CapTask): module_description = """ FastQC computes a number of quality control metrics. Motivation: Quality control is important to identify systematic errors and sequencing issues. FastQC includes a broad suite of checks. Negatives: FastQC only runs on a subset of reads though this is usually sufficient. """ MODULE_VERSION = 'v0.2.1' def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) self.pkg = CondaPackage( package="fastqc=0.11.9", executable="fastqc", channel="bioconda", config_filename=self.config_filename, ) self.reads = BaseReads.from_cap_task(self) self.config = PipelineConfig(self.config_filename) self.out_dir = self.config.out_dir @classmethod def _module_name(cls): return 'fastqc' def tool_version(self): return self.run_cmd(f'{self.pkg.bin} --version').stderr.decode('utf-8') @classmethod def dependencies(cls): return ["fastqc==0.11.9", BaseReads] @property def _report(self): return basename(self.pe1).split('.f')[0] + '_fastqc.html' @property def _zip_output(self): return basename(self.pe1).split('.f')[0] + '_fastqc.zip' def requires(self): return self.pkg, self.reads def output(self): return { 'report': self.get_target('report', 'html'), 'zip_output': self.get_target('zip_out', 'zip'), } def _run(self): # fixme: redirect output to loggers outdir = dirname(self.output()['report'].path) cmd = ' '.join([ self.pkg._env.bin + '/perl', # fastqc uses system perl which we do not assume access to self.pkg.bin, '-t', str(self.cores), self.reads.output()["base_reads_1"].path, f'-o {outdir}', '&& ', f'mv {outdir}/{self._report} {self.output()["report"].path}; ', f'mv {outdir}/{self._zip_output} {self.output()["zip_output"].path}; ', ]) self.run_cmd(cmd)