Skip to content

Commit 043c568

Browse files
LouisLeNezetLouisLeNezetnvnieuwk
authored
Add BEAGLE5 imputation subworkflow (#9550)
* Update glimpse2 sbwf * Update test * Add region to beagle5 * Add subworkflow * Fix linting * Fix linting * Fix linting * Update subworkflows/nf-core/vcf_impute_beagle5/main.nf Co-authored-by: Nicolas Vannieuwkerke <101190534+nvnieuwk@users.noreply.github.com> * Add comment * Update grouping and test * Remove tag * Revert change glimpse2 reference * Revert change glimpse2 sbwf * Revert change glimpse2 sbwf * Revert change glimpse2 sbwf --------- Co-authored-by: LouisLeNezet <louislenezet@gmaio.com> Co-authored-by: Nicolas Vannieuwkerke <101190534+nvnieuwk@users.noreply.github.com>
1 parent 73072bd commit 043c568

File tree

9 files changed

+699
-26
lines changed

9 files changed

+699
-26
lines changed

modules/nf-core/beagle5/beagle/main.nf

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,9 @@ process BEAGLE5_BEAGLE {
88
'biocontainers/beagle:5.5_27Feb25.75f--hdfd78af_0' }"
99

1010
input:
11-
tuple val(meta), path(vcf), path(vcf_index), path(refpanel), path(refpanel_index), path(genmap), path(exclsamples), path(exclmarkers)
11+
// Including `val(region)` to prevent errors with multi-chromosome VCFs and single-chromosome reference panels.
12+
// This enhances clarity and simplifies implementation in the subworkflow.
13+
tuple val(meta), path(vcf), path(vcf_index), path(refpanel), path(refpanel_index), path(genmap), path(exclsamples), path(exclmarkers), val(region)
1214

1315
output:
1416
tuple val(meta), path("*.vcf.gz"), emit: vcf
@@ -22,7 +24,8 @@ process BEAGLE5_BEAGLE {
2224
def args = task.ext.args ?: ''
2325
def prefix = task.ext.prefix ?: "${meta.id}.bglout"
2426
def ref_command = refpanel ? "ref=$refpanel" : ""
25-
def map_command = genmap ? "map=$genmap" : ""
27+
def map_command = genmap ? "map=$genmap" : ""
28+
def region_cmd = region ? "chrom=$region" : ""
2629
def excludesamples_command = exclsamples ? "excludesamples=$exclsamples" : ""
2730
def excludemarkers_command = exclmarkers ? "excludemarkers=$exclmarkers" : ""
2831

@@ -40,8 +43,9 @@ process BEAGLE5_BEAGLE {
4043
$args \\
4144
${ref_command} \\
4245
${map_command} \\
46+
${region_cmd} \\
4347
${excludesamples_command} \\
44-
${excludemarkers_command} \\
48+
${excludemarkers_command}
4549
4650
cat <<-END_VERSIONS > versions.yml
4751
"${task.process}":

modules/nf-core/beagle5/beagle/meta.yml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,10 @@ input:
5757
the analysis
5858
pattern: "*.*"
5959
ontologies: []
60+
- region:
61+
type: string
62+
description: Region to perform imputation
63+
pattern: "(chr)?\\d*:\\d*-\\d*"
6064
output:
6165
vcf:
6266
- - meta:

modules/nf-core/beagle5/beagle/tests/main.nf.test

Lines changed: 4 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -5,8 +5,6 @@ nextflow_process {
55
script "../main.nf"
66
process "BEAGLE5_BEAGLE"
77

8-
config "./nextflow.config"
9-
108
tag "modules"
119
tag "modules_nfcore"
1210
tag "beagle5"
@@ -15,16 +13,13 @@ nextflow_process {
1513
test("test-beagle5-beagle") {
1614

1715
when {
18-
params {
19-
module_args = ""
20-
}
2116
process {
2217
"""
2318
input[0] = [
2419
[ id:'test', single_end:false ], // meta map
2520
file(params.modules_testdata_base_path + "genomics/homo_sapiens/illumina/vcf/NA12878.chr21_22.1X.glimpse2.vcf.gz", checkIfExists: true),
2621
file(params.modules_testdata_base_path + "genomics/homo_sapiens/illumina/vcf/NA12878.chr21_22.1X.glimpse2.vcf.gz.csi", checkIfExists: true),
27-
[], [], [], [], []
22+
[], [], [], [], [], []
2823
]
2924
"""
3025
}
@@ -46,9 +41,6 @@ nextflow_process {
4641
test("test-beagle5-beagle-ref") {
4742

4843
when {
49-
params {
50-
module_args = "chrom=chr22"
51-
}
5244
process {
5345
"""
5446
input[0] = [
@@ -57,7 +49,7 @@ nextflow_process {
5749
file(params.modules_testdata_base_path + "genomics/homo_sapiens/illumina/vcf/NA12878.chr21_22.1X.glimpse2.vcf.gz.csi", checkIfExists: true),
5850
file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/1000GP.chr22.vcf.gz", checkIfExists:true),
5951
file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/1000GP.chr22.vcf.gz.csi", checkIfExists:true),
60-
[], [], []
52+
[], [], [], "chr22"
6153
]
6254
"""
6355
}
@@ -78,9 +70,6 @@ nextflow_process {
7870

7971
test("test-beagle5-beagle-ref-map") {
8072
when {
81-
params {
82-
module_args = "chrom=chr22"
83-
}
8473
process {
8574
"""
8675
input[0] = [
@@ -90,7 +79,7 @@ nextflow_process {
9079
file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/1000GP.chr22.vcf.gz", checkIfExists:true),
9180
file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/1000GP.chr22.vcf.gz.csi", checkIfExists:true),
9281
file(params.modules_testdata_base_path + "genomics/homo_sapiens/genome/genetic_map/genome.GRCh38.chr22.plink.map"),
93-
[], []
82+
[], [], "chr22"
9483
]
9584
"""
9685
}
@@ -112,16 +101,13 @@ nextflow_process {
112101
test("test-beagle5-beagle-ref-map - stub") {
113102
options '-stub'
114103
when {
115-
params {
116-
module_args = ""
117-
}
118104
process {
119105
"""
120106
input[0] = [
121107
[ id:'test', single_end:false ], // meta map
122108
file(params.modules_testdata_base_path + "genomics/homo_sapiens/illumina/vcf/NA12878.chr21_22.1X.glimpse2.vcf.gz", checkIfExists: true),
123109
file(params.modules_testdata_base_path + "genomics/homo_sapiens/illumina/vcf/NA12878.chr21_22.1X.glimpse2.vcf.gz.csi", checkIfExists: true),
124-
[], [], [], [], []
110+
[], [], [], [], [], []
125111
]
126112
"""
127113
}

modules/nf-core/beagle5/beagle/tests/nextflow.config

Lines changed: 0 additions & 5 deletions
This file was deleted.
Lines changed: 116 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,116 @@
1+
include { BEAGLE5_BEAGLE } from '../../../modules/nf-core/beagle5/beagle'
2+
include { BCFTOOLS_VIEW } from '../../../modules/nf-core/bcftools/view'
3+
include { GLIMPSE2_LIGATE } from '../../../modules/nf-core/glimpse2/ligate'
4+
include { BCFTOOLS_INDEX as BCFTOOLS_INDEX_BEAGLE } from '../../../modules/nf-core/bcftools/index'
5+
include { BCFTOOLS_INDEX as BCFTOOLS_INDEX_LIGATE } from '../../../modules/nf-core/bcftools/index'
6+
7+
workflow VCF_IMPUTE_BEAGLE5 {
8+
9+
take:
10+
ch_input // channel (mandatory): [ [id], vcf, tbi ]
11+
ch_panel // channel (mandatory): [ [panel, chr], vcf, tbi ]
12+
ch_chunks // channel (optional) : [ [panel, chr], regionout ]
13+
ch_map // channel (optional) : [ [chr], map]
14+
15+
main:
16+
ch_versions = channel.empty()
17+
18+
// Branch input files based on format
19+
ch_input
20+
.branch { _meta, vcf, _tbi ->
21+
bcf: vcf.name.contains('.bcf')
22+
vcf: vcf.name.contains('.vcf')
23+
other: true
24+
}
25+
.set { ch_input_branched }
26+
27+
ch_input_branched.other.map{ _meta, vcf, _tbi ->
28+
error "ERROR: ${vcf.name} in ch_input channel must be in VCF or BCF format."
29+
}
30+
31+
// Convert BCF to VCF if necessary
32+
BCFTOOLS_VIEW(
33+
ch_input_branched.bcf,
34+
[], [], []
35+
)
36+
ch_versions = ch_versions.mix(BCFTOOLS_VIEW.out.versions.first())
37+
38+
// Combine VCF files
39+
ch_ready_vcf = ch_input_branched.vcf
40+
.mix(BCFTOOLS_VIEW.out.vcf
41+
.join(
42+
BCFTOOLS_VIEW.out.csi
43+
.mix(BCFTOOLS_VIEW.out.tbi)
44+
)
45+
)
46+
47+
// Prepare input channels for BEAGLE5 by combining VCF, panel, and map files
48+
ch_chunks_counts = ch_chunks
49+
.groupTuple()
50+
.map { metaPC, regionouts ->
51+
[metaPC, regionouts.size()]
52+
}
53+
54+
ch_panel_map = ch_panel
55+
.combine(ch_map, by: 0)
56+
.combine(ch_chunks, by: 0)
57+
.combine(ch_chunks_counts, by: 0)
58+
59+
ch_panel_map.ifEmpty{
60+
error "ERROR: join operation resulted in an empty channel. Please provide a valid ch_panel and ch_map channel as input."
61+
}
62+
63+
ch_beagle_input = ch_ready_vcf
64+
.combine(ch_panel_map)
65+
.map { metaI, input_vcf, input_index, metaPC, panel_vcf, panel_index, map, regionout, regionsize -> [
66+
metaI + metaPC + ["regionout": regionout, "regionsize": regionsize],
67+
input_vcf, input_index,
68+
panel_vcf, panel_index,
69+
map, [], [], regionout
70+
]}
71+
72+
// Run BEAGLE5 imputation
73+
BEAGLE5_BEAGLE(ch_beagle_input)
74+
ch_versions = ch_versions.mix(BEAGLE5_BEAGLE.out.versions.first())
75+
76+
// Index the imputed VCF files
77+
BCFTOOLS_INDEX_BEAGLE(BEAGLE5_BEAGLE.out.vcf)
78+
ch_versions = ch_versions.mix(BCFTOOLS_INDEX_BEAGLE.out.versions.first())
79+
80+
// Ligate all phased files in one and index it
81+
ligate_input = BEAGLE5_BEAGLE.out.vcf
82+
.join(
83+
BCFTOOLS_INDEX_BEAGLE.out.tbi
84+
.mix(BCFTOOLS_INDEX_BEAGLE.out.csi)
85+
)
86+
.map{ meta, vcf, index ->
87+
def keysToKeep = meta.keySet() - ['regionout', 'regionsize']
88+
[
89+
groupKey(meta.subMap(keysToKeep), meta.regionsize),
90+
vcf, index
91+
]
92+
}
93+
.groupTuple()
94+
.map{ groupKeyObj, vcf, index ->
95+
// Extract the actual meta from the groupKey
96+
def meta = groupKeyObj.getGroupTarget()
97+
[meta, vcf, index]
98+
}
99+
100+
GLIMPSE2_LIGATE( ligate_input )
101+
ch_versions = ch_versions.mix( GLIMPSE2_LIGATE.out.versions.first() )
102+
103+
BCFTOOLS_INDEX_LIGATE( GLIMPSE2_LIGATE.out.merged_variants )
104+
ch_versions = ch_versions.mix( BCFTOOLS_INDEX_LIGATE.out.versions.first() )
105+
106+
// Join imputed and index files
107+
ch_vcf_index = GLIMPSE2_LIGATE.out.merged_variants
108+
.join(
109+
BCFTOOLS_INDEX_LIGATE.out.tbi
110+
.mix(BCFTOOLS_INDEX_LIGATE.out.csi)
111+
)
112+
113+
emit:
114+
vcf_index = ch_vcf_index // channel: [ [id, chr, tools], vcf, index ]
115+
versions = ch_versions // channel: [ versions.yml ]
116+
}
Lines changed: 100 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,100 @@
1+
# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/subworkflows/yaml-schema.json
2+
name: VCF_IMPUTE_BEAGLE5
3+
description: |
4+
Subworkflow to impute VCF files using BEAGLE5 software. The subworkflow
5+
takes VCF files, phased reference panel, genetic maps and chunks region to perform imputation
6+
and outputs phased and imputed VCF files.
7+
Meta map of all channels, except ch_input, will be used to perform joint operations.
8+
"regionout" and "regionsize" keys will be added to the meta map to distinguish the different
9+
file before ligation and therefore should not be used.
10+
keywords:
11+
- VCF
12+
- imputation
13+
- beagle5
14+
- phasing
15+
components:
16+
- beagle5/beagle
17+
- bcftools/index
18+
- bcftools/view
19+
- glimpse2/ligate
20+
input:
21+
- ch_input:
22+
description: Channel with input data
23+
structure:
24+
- meta:
25+
type: map
26+
description: |
27+
Metadata map containing sample information
28+
- vcf:
29+
type: file
30+
description: Input VCF files
31+
pattern: "*.{vcf,bcf}{.gz}?"
32+
- index:
33+
type: file
34+
description: Input index file
35+
pattern: "*.{tbi,csi}"
36+
- ch_panel:
37+
description: Channel with phased reference panel data
38+
structure:
39+
- meta:
40+
type: map
41+
description: |
42+
Metadata map that will be combined with the input data map
43+
- vcf:
44+
type: file
45+
description: Reference panel VCF files by chromosomes
46+
pattern: "*.{vcf,bcf,vcf.gz}"
47+
- index:
48+
type: file
49+
description: Reference panel VCF index files
50+
pattern: "*.{tbi,csi}"
51+
- ch_chunks:
52+
description: Channel containing the region to impute
53+
structure:
54+
- meta:
55+
type: map
56+
description: |
57+
Metadata map containing chromosome information
58+
- regionout:
59+
type: string
60+
description: Region to perform the phasing on
61+
pattern: "[chr]+[0-9]+:[0-9]+-[0-9]+"
62+
- ch_map:
63+
description: Channel with genetic map data
64+
structure:
65+
- meta:
66+
type: map
67+
description: |
68+
Metadata map containing chromosome information
69+
- map:
70+
type: file
71+
description: Plink format genetic map files
72+
pattern: "*.map"
73+
output:
74+
- vcf_index:
75+
description: Channel with imputed and phased VCF files
76+
structure:
77+
- meta:
78+
type: map
79+
description: |
80+
Metadata map of the target input file combined with the reference panel map.
81+
- vcf:
82+
type: file
83+
description: VCF imputed and phased file by sample
84+
pattern: "*.{vcf,bcf,vcf.gz}"
85+
- index:
86+
type: file
87+
description: VCF index file
88+
pattern: "*.{tbi,csi}"
89+
- versions:
90+
description: Channel containing software versions file
91+
structure:
92+
- versions.yml:
93+
type: file
94+
description: File containing versions of the software used
95+
authors:
96+
- "@LouisLeNezet"
97+
- "@gichas"
98+
maintainers:
99+
- "@LouisLeNezet"
100+
- "@gichas"

0 commit comments

Comments
 (0)