Commit 1d5a3820 authored by timdiels's avatar timdiels
Browse files

Use '_' as gene set name separator

':' caused Nextflow errors. E.g. 'arath:wg*' as output file to publishDir
would try to look for an 'arath' file.
parent c49144a7
......@@ -40,7 +40,7 @@ import attr
is_flag=True,
help=join_multiline('''\
Output a gene set per species containing all genes of the species. The
sets are named `{species}:wg`.
sets are named `{species}_wg`.
'''
)
)
......@@ -59,7 +59,7 @@ import attr
type=click.Path(exists=True, dir_okay=False),
help=join_multiline('''\
Output a gene set per species per GO term containing all genes of the
species with that GO term. The sets are named `{species}:{go_term}`. The
species with that GO term. The sets are named `{species}_{go_term}`. The
argument should be a CSV file with header annotating all genes with GO
terms. It must have at least a 'gene', 'species' and
'blast2go_go_terms', 'interproscan_go_terms' column. Multiple GO terms
......@@ -162,7 +162,7 @@ def whole_genome_sets(genes, desired_species):
return item[1]
gene_sets = groupby(species, genes.items())
for species_, items in gene_sets.items():
name = '{}:wg'.format(species_)
name = '{}_wg'.format(species_)
if species_ in desired_species:
genes_ = tuple(item[0] for item in items)
yield GeneSet(name, genes_)
......@@ -186,7 +186,7 @@ def go_gene_sets(genes, desired_species, go_file):
# Yield GeneSet for each group
for species, groups in go_groups.items():
for go_term, genes_ in groups.items():
name = '{}:{}'.format(species, go_term)
name = '{}_{}'.format(species, go_term.replace(':', '_'))
yield GeneSet(name, genes_)
@attr.s(slots=True, frozen=True)
......
Symbol Gene set name
A1 A:wg
A2 A:wg
B1 B:wg
B2 B:wg
C1 C:wg
C2 C:wg
C3 C:wg
C4 C:wg
A1 A:GO:0000001
A2 A:GO:0000001
B1 B:GO:0000001
B2 B:GO:0000001
B1 B:GO:0000003
B2 B:GO:0000003
C1 C:GO:0000010
C2 C:GO:0000010
C3 C:GO:0000010
\ No newline at end of file
A1 A_wg
A2 A_wg
B1 B_wg
B2 B_wg
C1 C_wg
C2 C_wg
C3 C_wg
C4 C_wg
A1 A_GO_0000001
A2 A_GO_0000001
B1 B_GO_0000001
B2 B_GO_0000001
B1 B_GO_0000003
B2 B_GO_0000003
C1 C_GO_0000010
C2 C_GO_0000010
C3 C_GO_0000010
\ No newline at end of file
......@@ -73,9 +73,9 @@ def test_whole_genome():
'--wg',
),
expected=(
'A:wg',
'B:wg',
'C:wg',
'A_wg',
'B_wg',
'C_wg',
)
)
......@@ -90,10 +90,10 @@ def test_go():
'--go', resource_path('func_annot.csv'),
),
expected=(
'A:GO:0000001',
'B:GO:0000001',
'B:GO:0000003',
'C:GO:0000010',
'A_GO_0000001',
'B_GO_0000001',
'B_GO_0000003',
'C_GO_0000010',
)
)
......@@ -112,11 +112,11 @@ def test_subset():
'--wg',
),
expected=(
'A:GO:0000001',
'A:wg',
'B:GO:0000001',
'B:GO:0000003',
'B:wg',
'A_GO_0000001',
'A_wg',
'B_GO_0000001',
'B_GO_0000003',
'B_wg',
)
)
......@@ -134,7 +134,7 @@ def test_size():
'--wg',
),
expected=(
'C:GO:0000010',
'C:wg',
'C_GO_0000010',
'C_wg',
)
)
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment