Source code for cap2.pipeline.databases.human_removal_db
import luigi
from os.path import join, dirname
from glob import glob
import subprocess
from ..config import PipelineConfig
from ..utils.conda import CondaPackage
from ..utils.cap_task import CapDbTask
[docs]class HumanRemovalDB(CapDbTask):
"""This class is responsible for building and/or retriveing
validating the database which will be used to remove human
reads from the sample.
"""
config_filename = luigi.Parameter()
cores = luigi.IntParameter(default=1)
MODULE_VERSION = 'v1.0.0'
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
self.pkg = CondaPackage(
package="bowtie2==2.4.1",
executable="bowtie2-build",
channel="bioconda",
config_filename=self.config_filename,
)
self.config = PipelineConfig(self.config_filename)
self.db_dir = self.config.db_dir
self.fastas = list(glob(join(self.db_dir, 'hg38') + '/*.fa.gz'))
def tool_version(self):
return self.run_cmd(f'{self.pkg.bin} --version').stderr.decode('utf-8')
def requires(self):
return self.pkg
@classmethod
def _module_name(cls):
return 'bowtie_human_removal_db'
@classmethod
def version(cls):
return 'v1.0.0'
@classmethod
def dependencies(cls):
return ['bowtie2==2.4.1', 'hg38_alt_contigs', '2020-06-01']
@property
def bowtie2_index(self):
return join(self.db_dir, 'hg38', 'human_removal.bt2')
def output(self):
index = luigi.LocalTarget(self.bowtie2_index + '.1.bt2')
index.makedirs()
return {
'bt2_index_1': index,
}
def build_bowtie2_index_from_fasta(self):
cmd = ''.join((
self.pkg.bin,
f' --threads {self.cores} ',
','.join(self.fastas),
' ',
self.bowtie2_index
))
subprocess.check_call(cmd, shell=True)
def download_bowtie2_index_from_s3(self):
paths = [
'cap2/databases/2020-06-08/hg38/hg38.fa.gz',
'cap2/databases/2020-06-08/hg38/human_removal.bt2.1.bt2',
'cap2/databases/2020-06-08/hg38/human_removal.bt2.2.bt2',
'cap2/databases/2020-06-08/hg38/human_removal.bt2.3.bt2',
'cap2/databases/2020-06-08/hg38/human_removal.bt2.4.bt2',
'cap2/databases/2020-06-08/hg38/human_removal.bt2.rev.1.bt2',
'cap2/databases/2020-06-08/hg38/human_removal.bt2.rev.2.bt2',
]
for path in paths:
cmd = (
'wget '
f'--directory-prefix={dirname(self.output()["bt2_index_1"].path)} '
f'https://s3.wasabisys.com/metasub-microbiome/{path} '
)
self.run_cmd(cmd)
def run(self):
if self.config.db_mode == PipelineConfig.DB_MODE_BUILD:
self.build_bowtie2_index_from_fasta()
else:
self.download_bowtie2_index_from_s3()