diff --git a/main.nf b/main.nf index 83bc9b30dd93b2cfddfd9f8ce81e2bc370187692..353959c9796cc12cf655f4007adbe4f4d7444622 100644 --- a/main.nf +++ b/main.nf @@ -3,7 +3,11 @@ include { prepare_genome_files } from './subworkflows/prepare_genome_files.nf' include { prepare_annotation_files } from './subworkflows/prepare_annotation_files.nf' -include { copyFilesForTracking } from './modules/copyFilesForTracking.nf' +include { copyFilesForTracking as copySpeciesFiles } from './modules/copyFilesForTracking.nf' +include { copyFilesForTracking as copyAnnotationFiles } from './modules/copyFilesForTracking.nf' +include { copyFilesForTracking as copyTranscriptomicFiles } from './modules/copyFilesForTracking.nf' +include { copyFilesForTracking as copyQtlFiles } from './modules/copyFilesForTracking.nf' +include { copyFilesForTracking as copySpeciesInfoFiles } from './modules/copyFilesForTracking.nf' include { getGeneFamilies } from './modules/getGeneFamilies.nf' include { getGenomeInfo } from './modules/getGenomeInfo.nf' include { getGeneToRnaEdges } from './modules/getGeneToRnaEdges.nf' @@ -97,38 +101,84 @@ workflow { } .set { annotation_ch } - if(!params.transcriptomic_files) { - error("please specify --transcriptomic_files option") + copySpeciesFiles(Channel.fromPath(params.species_files, checkIfExists: false).map { tuple('species', it) }) + copyAnnotationFiles(Channel.fromPath(params.annotation_files, checkIfExists: false).map { tuple('annotation', it) }) + + + if(params.transcriptomic_files ) { + transcriptomic_f_ch = Channel + .fromPath(params.transcriptomic_files, checkIfExists: false) + .splitCsv(header:true, sep:"\t", strip:true) + .map { row -> tuple(row.ID, row.status, file(row.bioinfo_protocol), file(row.dataset), file(row.counts), file(row.tpm), file(row.metadata), file(row.samples_annotation)) } + .set { transcriptomics_ch } + + getTranscriptomicCounts(transcriptomics_ch) + + merge_results(getTranscriptomicCounts.out.bioinfo_protocol, subdir = '') + merge_results(getTranscriptomicCounts.out.dataset, subdir = '') + merge_results(getTranscriptomicCounts.out.condition, subdir = '') + merge_results(getTranscriptomicCounts.out.free_term, subdir = '') + merge_results(getTranscriptomicCounts.out.sample, subdir = '') + merge_results(getTranscriptomicCounts.out.dataset_bioinfo_protocol, subdir = '') + merge_results(getTranscriptomicCounts.out.condition_dataset, subdir = '') + merge_results(getTranscriptomicCounts.out.sample_condition, subdir = '') + merge_results(getTranscriptomicCounts.out.gene_sample, subdir = '') + merge_results(getTranscriptomicCounts.out.gene_condition, subdir = '') + merge_results(getTranscriptomicCounts.out.condition_onto, subdir = params.merged_subdir) + merge_results(getTranscriptomicCounts.out.condition_free_term, subdir = '') + + copyTranscriptomicFiles(Channel.fromPath(params.transcriptomic_files, checkIfExists: false).map { tuple('transcriptomic', it) }) + + + } - transcriptomic_f_ch = Channel - .fromPath(params.transcriptomic_files, checkIfExists: false) - if(!params.qtl_files) { - error("please specify --qtl_files option") + if(params.qtl_files) { + qtl_f_ch = Channel + .fromPath(params.qtl_files, checkIfExists: false) + .splitCsv(header:true, sep:"\t", strip:true) + .map { row -> tuple(row.ID, row.status, row.species, file(row.dataset), file(row.qtl), file(row.qtl_annotation)) } + .set { qtl_ch } + + qtl_ch + .map { tuple( it[2], *it ) } + .combine( samples_ch.gff3, by: 0 ) + .combine( samples_ch.chr_conv, by: 0 ) + .map { it[1..-1] } + .set { qtl_genome_ch } + + + getQTLInfo(qtl_genome_ch) + + merge_results(getQTLInfo.out.qtl, subdir = '') + merge_results(getQTLInfo.out.dataset, subdir = '') + merge_results(getQTLInfo.out.population, subdir = '') + merge_results(getQTLInfo.out.site, subdir = '') + merge_results(getQTLInfo.out.year, subdir = '') + merge_results(getQTLInfo.out.free_term, subdir = '') + merge_results(getQTLInfo.out.gene_qtl, subdir = '') + merge_results(getQTLInfo.out.qtl_chromosome, subdir = '') + merge_results(getQTLInfo.out.qtl_dataset, subdir = '') + merge_results(getQTLInfo.out.qtl_population, subdir = '') + merge_results(getQTLInfo.out.qtl_site, subdir = '') + merge_results(getQTLInfo.out.qtl_year, subdir = '') + merge_results(getQTLInfo.out.qtl_onto, subdir = params.merged_subdir) + merge_results(getQTLInfo.out.qtl_free_term, subdir = '') + + copyQtlFiles(Channel.fromPath(params.qtl_files, checkIfExists: false).map { tuple('qtl', it) }) + } - qtl_f_ch = Channel - .fromPath(params.qtl_files, checkIfExists: false) - // copy input files to results directory - copyFilesForTracking(Channel.fromPath(params.species_files, checkIfExists: false), - Channel.fromPath(params.annotation_files, checkIfExists: false), - transcriptomic_f_ch, qtl_f_ch, sp_info_ch) + - //-------------------------------------------------// - // Prepare inputs // - //-------------------------------------------------// + // copy input files to results directory + copySpeciesInfoFiles(Channel.fromPath(params.species_info, checkIfExists: false).map { tuple('species_info', it) }) - // transcriptomic files - transcriptomic_f_ch - .splitCsv(header:true, sep:"\t", strip:true) - .map { row -> tuple(row.ID, row.status, file(row.bioinfo_protocol), file(row.dataset), file(row.counts), file(row.tpm), file(row.metadata), file(row.samples_annotation)) } - .set { transcriptomics_ch } + //copyFilesForTracking(Channel.fromPath(params.species_files, checkIfExists: false), + //Channel.fromPath(params.annotation_files, checkIfExists: false), + //transcriptomic_f_ch, qtl_f_ch, sp_info_ch) - // qtl files - qtl_f_ch - .splitCsv(header:true, sep:"\t", strip:true) - .map { row -> tuple(row.ID, row.status, row.species, file(row.dataset), file(row.qtl), file(row.qtl_annotation)) } - .set { qtl_ch } + //-------------------------------------------------// // Get basic genetic nodes and edges // @@ -192,19 +242,12 @@ workflow { // need to add the gff3 and chr_conv table to all datasets lines of the qtl // table, since multiple datasets for each species, join is not possible - qtl_ch - .map { tuple( it[2], *it ) } - .combine( samples_ch.gff3, by: 0 ) - .combine( samples_ch.chr_conv, by: 0 ) - .map { it[1..-1] } - .set { qtl_genome_ch } - getQTLInfo(qtl_genome_ch) + //-------------------------------------------------// // Get transcriptomics info // //-------------------------------------------------// - getTranscriptomicCounts(transcriptomics_ch) //-------------------------------------------------// // Create missing ontology terms // @@ -251,32 +294,4 @@ workflow { // interpro merge_results(getInterProAnnotation.out.interpro, subdir = '') merge_results(getInterProAnnotation.out.protein_interpro, subdir = '') - // qtl - merge_results(getQTLInfo.out.qtl, subdir = '') - merge_results(getQTLInfo.out.dataset, subdir = '') - merge_results(getQTLInfo.out.population, subdir = '') - merge_results(getQTLInfo.out.site, subdir = '') - merge_results(getQTLInfo.out.year, subdir = '') - merge_results(getQTLInfo.out.free_term, subdir = '') - merge_results(getQTLInfo.out.gene_qtl, subdir = '') - merge_results(getQTLInfo.out.qtl_chromosome, subdir = '') - merge_results(getQTLInfo.out.qtl_dataset, subdir = '') - merge_results(getQTLInfo.out.qtl_population, subdir = '') - merge_results(getQTLInfo.out.qtl_site, subdir = '') - merge_results(getQTLInfo.out.qtl_year, subdir = '') - merge_results(getQTLInfo.out.qtl_onto, subdir = params.merged_subdir) - merge_results(getQTLInfo.out.qtl_free_term, subdir = '') - // transcriptomics - merge_results(getTranscriptomicCounts.out.bioinfo_protocol, subdir = '') - merge_results(getTranscriptomicCounts.out.dataset, subdir = '') - merge_results(getTranscriptomicCounts.out.condition, subdir = '') - merge_results(getTranscriptomicCounts.out.free_term, subdir = '') - merge_results(getTranscriptomicCounts.out.sample, subdir = '') - merge_results(getTranscriptomicCounts.out.dataset_bioinfo_protocol, subdir = '') - merge_results(getTranscriptomicCounts.out.condition_dataset, subdir = '') - merge_results(getTranscriptomicCounts.out.sample_condition, subdir = '') - merge_results(getTranscriptomicCounts.out.gene_sample, subdir = '') - merge_results(getTranscriptomicCounts.out.gene_condition, subdir = '') - merge_results(getTranscriptomicCounts.out.condition_onto, subdir = params.merged_subdir) - merge_results(getTranscriptomicCounts.out.condition_free_term, subdir = '') } diff --git a/modules/copyFilesForTracking.nf b/modules/copyFilesForTracking.nf index c4998ef2e9e14886d974cf0c7eab0ae961f51fd8..613f9ce56f648f778fd5974ce3b2c2eac6564c5b 100755 --- a/modules/copyFilesForTracking.nf +++ b/modules/copyFilesForTracking.nf @@ -3,20 +3,12 @@ //*************************************************// process copyFilesForTracking { - // publishDir "${params.outdir}/conf_files/", mode: params.publish_dir_mode, enabled: params.publish_copyFilesForTracking input: - path species - path annotation - path transcriptomic - path qtl - path sp_info + tuple val(name), path(file) + output: - path species - path annotation - path transcriptomic - path qtl - path sp_info + path file script: """