test_gene_sets.py 3.84 KB
Newer Older
timdiels's avatar
timdiels committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
# Copyright (C) 2017 VIB/BEG/UGent - Tim Diels <timdiels.m@gmail.com>
#
# This file is part of Cedalion.
#
# Cedalion is free software: you can redistribute it and/or modify
# it under the terms of the GNU Lesser General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# Cedalion is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU Lesser General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public License
# along with Cedalion.  If not, see <http://www.gnu.org/licenses/>.

'''
Test gene sets script
'''

from cedalion.scripts.gene_sets import main
from pathlib import Path
import pytil.pkg_resources
import pytest
import csv

@pytest.fixture(autouse=True)
def auto_temp_dir(temp_dir_cwd):
    pass

def resource_path(path):
    return pytil.pkg_resources.resource_path(__name__, 'data/gene_sets/' + path)

def assert_run(args, expected):
    '''
    Run and assert result

    Parameters
    ----------
    args : Collection[str]
    expected : Collection[str]
        Gene set names. Output file should consist of exactly these sets.
    '''
    # Run
    actual_file = Path('actual.tsv')
    args = ('--min-size', '2') + args + ('--output', actual_file)
    args = tuple(map(str, args))
    main(args, standalone_mode=False)

    # Assert
    def actual_lines():
        with actual_file.open('r', newline='') as f:
            for row in csv.DictReader(f, delimiter='\t'):
                yield (row['Symbol'], row['Gene set name'])
    def expected_lines():
        # Filter all_gene_sets.tsv down to the expected sets
        gene_sets_file = resource_path('all_gene_sets.tsv')
        with gene_sets_file.open('r', newline='') as f:
            for row in csv.DictReader(f, delimiter='\t'):
                if row['Gene set name'] in expected:
                    yield (row['Symbol'], row['Gene set name'])
    assert sorted(actual_lines()) == sorted(expected_lines())

def test_whole_genome():
    '''
    Test species default and wg output
    '''
    # Run for default species (i.e. all), requesting whole genome sets.
    assert_run(
        args=(
            '--genes', resource_path('gene_info.tsv'),
            '--wg',
        ),
        expected=(
timdiels's avatar
timdiels committed
76
77
78
            'A_wg',
            'B_wg',
            'C_wg',
timdiels's avatar
timdiels committed
79
80
81
82
83
84
85
86
87
88
89
90
91
92
        )
    )

def test_go():
    '''
    Test GO output
    '''
    # Run requesting just go sets
    assert_run(
        args=(
            '--genes', resource_path('gene_info.tsv'),
            '--go', resource_path('func_annot.csv'),
        ),
        expected=(
timdiels's avatar
timdiels committed
93
94
95
96
            'A_GO_0000001',
            'B_GO_0000001',
            'B_GO_0000003',
            'C_GO_0000010',
timdiels's avatar
timdiels committed
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
        )
    )

def test_subset():
    '''
    Test subset of species + all output types
    '''
    # Run for a subset of species, requesting everything.
    # Expect everything from species A and B
    assert_run(
        args=(
            '--species', 'A',
            '-s', 'B',
            '--genes', resource_path('gene_info.tsv'),
            '--go', resource_path('func_annot.csv'),
            '--wg',
        ),
        expected=(
timdiels's avatar
timdiels committed
115
116
117
118
119
            'A_GO_0000001',
            'A_wg',
            'B_GO_0000001',
            'B_GO_0000003',
            'B_wg',
timdiels's avatar
timdiels committed
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
        )
    )

def test_size():
    '''
    Test min size for all output types
    '''
    # Run for all species with a non-default min size
    # Expect gene sets larger than min size to be omitted
    assert_run(
        args=(
            '--min-size', '3',
            '--genes', resource_path('gene_info.tsv'),
            '--go', resource_path('func_annot.csv'),
            '--wg',
        ),
        expected=(
timdiels's avatar
timdiels committed
137
138
            'C_GO_0000010',
            'C_wg',
timdiels's avatar
timdiels committed
139
140
        )
    )