From c24e2c7eefd949d4da03f17a64c765b88c9fe905 Mon Sep 17 00:00:00 2001 From: cesen Date: Thu, 3 Feb 2022 11:16:34 +0100 Subject: [PATCH 1/5] New ksrates command for multiple paralogs ks - New ksrates command paralogs-ks-multi in ksrates_cli.py allows the user to run the paralog Ks pipeline for multiple focal species - The user provides one or more config files as arguments, which will be used in series and only to perform the paralog Ks estimation; also a folder containing config files can be passed. - This command is useful to speed up the estimation of paralog Ks for a large amount of focal species --- ksrates_cli.py | 48 ++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 46 insertions(+), 2 deletions(-) diff --git a/ksrates_cli.py b/ksrates_cli.py index 2cfcdfa..d3e0b3e 100644 --- a/ksrates_cli.py +++ b/ksrates_cli.py @@ -1,6 +1,7 @@ import click import logging from sys import argv +import os from ksrates._version import __version__ @click.group(context_settings={'help_option_names': ['-h', '--help']}) @@ -49,7 +50,7 @@ def init(config_file, expert, nextflow): @click.option("--n-threads", type=int, default=4, help="Number of threads (default: 4)") def paralogs_ks(config_file, expert, n_threads): """ - Performs paralog Ks estimation for the focal species through wgd. + Performs paralog Ks estimation through wgd for the focal species. Takes parameters from CONFIG_FILE. @@ -72,7 +73,7 @@ def paralogs_ks(config_file, expert, n_threads): @click.option("--n-threads", type=int, default=4, help="Number of threads (default: 4)") def orthologs_ks(config_file, expert, species1, species2, n_threads): """ - Performs ortholog Ks estimation for SPECIES1 and SPECIES2 through wgd. + Performs ortholog Ks estimation through wgd for SPECIES1 and SPECIES2. Takes parameters from CONFIG_FILE. @@ -270,6 +271,49 @@ def paralogs_analyses(config_file, expert, paranome_table, anchors_table, recipr adjustment_table, anchorpoints, multiplicons, segments, list_elements, multiplicon_pairs) +@cli.command(context_settings={'help_option_names': ['-h', '--help']}, short_help="Performs multiple paralog Ks estimations.") +@click.argument('config_sources', nargs=-1, required=True, type=click.Path(exists=True)) +@click.option('--expert', type=click.Path(exists=True), help="User-defined path to the expert configuration file") +@click.option("--n-threads", type=int, default=4, help="Number of threads (default: 4)") +def paralogs_ks_multi(config_sources, expert, n_threads): + """ + Performs paralog Ks estimation through wgd for multiple focal species by looping through the configuration files provided as argument. + + Takes parameters from CONFIG_SOURCES. + + CONFIG_SOURCES: one or more arguments that are either ksrates configuration files or directories containing ksrates configuration files + + Example 1: ksrates paralogs-ks-multi config_file1.txt config_file2.txt + + Example 2: ksrates paralogs-ks-multi config_file*.txt + + Example 3: ksrates paralogs-ks-multi config_file*.txt config_dir + """ + from ksrates.wgd_paralogs import wgd_paralogs + if expert: + click.format_filename(expert) + else: + expert = "" + + # Process any provided source of configuration files (either files or directories containing files) + for source in config_sources: + source = os.path.abspath(source) + + # If configuration source is a file, use it directly to launch the command + if os.path.isfile(source): + click.format_filename(source) + wgd_paralogs(source, expert, n_threads) + logging.info("") + + # Else if configuration source is a directory, loop through the files thereby contained and use them + elif os.path.isdir(source): + for config_file in os.listdir(source): + config_file = os.path.join(source, config_file) + click.format_filename(config_file) + wgd_paralogs(config_file, expert, n_threads) + logging.info("") + + # For debugging # Syntax: python3 ksrates_cli.py [command] [args] if __name__ == "__main__": -- GitLab From a41f1763a2332e5cb1b057adff6b7108f6f1eaa2 Mon Sep 17 00:00:00 2001 From: cesen Date: Thu, 3 Feb 2022 11:24:31 +0100 Subject: [PATCH 2/5] Improve doc strings in CLI; rename cmd - Make doc string of ksrates commands more readable by avoiding rewrapping ("\b") - Add example ksrates commands in doc strings - Rename new command to paralogs-ks-multi --- ksrates_cli.py | 41 +++++++++++++++++++++++++++++++++++------ 1 file changed, 35 insertions(+), 6 deletions(-) diff --git a/ksrates_cli.py b/ksrates_cli.py index d3e0b3e..5778961 100644 --- a/ksrates_cli.py +++ b/ksrates_cli.py @@ -19,7 +19,10 @@ def generate_config(filename): Generates the configuration file for the rate-adjustment. The configuration file name is given by argument FILENAME. + \b FILENAME: configuration file name + \b + Example: ksrates generate-config config_file.txt """ from ksrates.generate_configfile import generate_configfile generate_configfile(filename) @@ -33,7 +36,11 @@ def init(config_file, expert, nextflow): """ Initializes rate-adjustment from CONFIG_FILE. + \b CONFIG_FILE: configuration file to set up the rate-adjustment relative to the focal species + \b + Example 1: ksrates init config_file.txt + Example 2: ksrates init config_file.txt --expert path/to/config_expert.txt """ from ksrates.setup_correction import setup_correction click.format_filename(config_file) @@ -54,7 +61,10 @@ def paralogs_ks(config_file, expert, n_threads): Takes parameters from CONFIG_FILE. + \b CONFIG_FILE: configuration file to set up the rate-adjustment relative to the focal species + \b + Example: ksrates paralogs-ks config_file.txt --n-threads 4 """ from ksrates.wgd_paralogs import wgd_paralogs click.format_filename(config_file) @@ -67,9 +77,9 @@ def paralogs_ks(config_file, expert, n_threads): @cli.command(context_settings={'help_option_names': ['-h', '--help']}, short_help="Performs ortholog Ks estimation.") @click.argument('config_file', type=click.Path(exists=True)) -@click.option('--expert', type=click.Path(exists=True), help="User-defined path to the expert configuration file") @click.argument("species1") @click.argument("species2") +@click.option('--expert', type=click.Path(exists=True), help="User-defined path to the expert configuration file") @click.option("--n-threads", type=int, default=4, help="Number of threads (default: 4)") def orthologs_ks(config_file, expert, species1, species2, n_threads): """ @@ -81,6 +91,8 @@ def orthologs_ks(config_file, expert, species1, species2, n_threads): CONFIG_FILE: configuration file to set up the rate-adjustment relative to the focal species SPECIES1: first of the two species involved in the ortholog Ks estimation SPECIES2: second of the two species involved in the ortholog Ks estimation + \b + Example: ksrates orthologs-ks config_file.txt species1 species2 --n-threads 4 """ from ksrates.wgd_orthologs import wgd_orthologs click.format_filename(config_file) @@ -102,7 +114,10 @@ def orthologs_analysis(config_file, expert, ortholog_pairs): Takes parameters from CONFIG_FILE. + \b CONFIG_FILE: configuration file to set up the rate-adjustment relative to the focal species + \b + Example: ksrates orthologs-analysis config_file.txt """ from ksrates.compute_peaks import compute_peaks click.format_filename(config_file) @@ -126,7 +141,10 @@ def orthologs_adjustment(config_file, expert, trios): Takes parameters from CONFIG_FILE. + \b CONFIG_FILE: configuration file to set up the rate-adjustment relative to the focal species + \b + Example: ksrates orthologs-adjustment config_file.txt """ from ksrates.correct import correct click.format_filename(config_file) @@ -152,7 +170,10 @@ def plot_paralogs(config_file, expert, adjustment_table, paranome_table, anchors Takes parameters from CONFIG_FILE. + \b CONFIG_FILE: configuration file to set up the rate-adjustment relative to the focal species + \b + Example: ksrates plot-paralogs config_file.txt """ from ksrates.plot_paralogs import plot_paralogs_distr click.format_filename(config_file) @@ -183,7 +204,10 @@ def plot_tree(config_file, expert, adjustment_table, nextflow): Takes parameters from CONFIG_FILE. + \b CONFIG_FILE: configuration file to set up the rate-adjustment relative to the focal species + \b + Example: ksrates plot-tree config_file.txt """ from ksrates.plot_tree import plot_tree_rates click.format_filename(config_file) @@ -206,7 +230,10 @@ def plot_orthologs(config_file, expert, trios): Takes parameters from CONFIG_FILE. + \b CONFIG_FILE: configuration file to set up the rate-adjustment relative to the focal species + \b + Example: ksrates plot-orthologs config_file.txt """ from ksrates.plot_orthologs import plot_orthologs_distr click.format_filename(config_file) @@ -240,8 +267,11 @@ def paralogs_analyses(config_file, expert, paranome_table, anchors_table, recipr If extra methods are asked through the expert configuration file, performs all methods available for the analysis type(s) selected. Takes parameters from CONFIG_FILE. - + + \b CONFIG_FILE: configuration file to set up the rate-adjustment relative to the focal species + \b + Example: ksrates paralogs-analyses config_file.txt """ from ksrates.paralogs_analyses import paralogs_analyses_methods click.format_filename(config_file) @@ -281,12 +311,11 @@ def paralogs_ks_multi(config_sources, expert, n_threads): Takes parameters from CONFIG_SOURCES. - CONFIG_SOURCES: one or more arguments that are either ksrates configuration files or directories containing ksrates configuration files - + \b + CONFIG_SOURCES: one or more ksrates configuration files and/or directories containing such files + \b Example 1: ksrates paralogs-ks-multi config_file1.txt config_file2.txt - Example 2: ksrates paralogs-ks-multi config_file*.txt - Example 3: ksrates paralogs-ks-multi config_file*.txt config_dir """ from ksrates.wgd_paralogs import wgd_paralogs -- GitLab From cd5be0130879421170cbf3f539b5178e96b86b17 Mon Sep 17 00:00:00 2001 From: cesen Date: Thu, 3 Feb 2022 12:46:42 +0100 Subject: [PATCH 3/5] Add command to delete all ortholog BLAST tables - Since BLAST tables are big files and are not used anymore once the KS are estimated, removing them allows to regain quite some disk space - New ksrates command orthologs-ks-cleanup deletes all ortholog BLAST tables found within the ortholog_distribution folder given as argument. - Dry run is available to just simulate the action - Code asks for confirmation before deleting and lists the files that are being deleted --- ksrates_cli.py | 51 ++++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 49 insertions(+), 2 deletions(-) diff --git a/ksrates_cli.py b/ksrates_cli.py index 5778961..95db729 100644 --- a/ksrates_cli.py +++ b/ksrates_cli.py @@ -332,7 +332,7 @@ def paralogs_ks_multi(config_sources, expert, n_threads): if os.path.isfile(source): click.format_filename(source) wgd_paralogs(source, expert, n_threads) - logging.info("") + print("") # Else if configuration source is a directory, loop through the files thereby contained and use them elif os.path.isdir(source): @@ -340,7 +340,54 @@ def paralogs_ks_multi(config_sources, expert, n_threads): config_file = os.path.join(source, config_file) click.format_filename(config_file) wgd_paralogs(config_file, expert, n_threads) - logging.info("") + print("") + + +@cli.command(context_settings={'help_option_names': ['-h', '--help']}, short_help="Delete all ortholog BLAST tables.") +@click.argument('orthologs_dir_path', type=click.Path(exists=True)) +@click.option("--dry-run", is_flag=True, help=("Dry run that only simulates deletion")) +def orthologs_ks_cleanup(orthologs_dir_path, dry_run): + """ + Deletes all ortholog BLAST tables from an ortholog_distributions directory to free disk space. + + Ortholog BLAST tables ("species1_species2.blast.tsv") easily weight up to 500MB and + are of little use after that the related ortholog Ks estimates have been estimated ("species1_species2.ks.tsv"). + Therefore, when numerous ortholog pipelines have already been run, the BLAST tables can take up quite some GBs of disk space + and it might be convenient to delete them in bulk through this command. + + Takes parameters from ORTHOLOGS_DIR_PATH. + + \b + ORTHOLOGS_DIR_PATH: path to the ortholog distribution directory containing the BLAST tables to be deleted + \b + Example: ksrates orthologs-ks-cleanup test/ortholog_distribution + """ + import os + import glob + blast_list = glob.glob(f"{orthologs_dir_path}/wgd_*/*.blast.tsv") + if len(blast_list) == 0: + print("There are no BLAST tables to be removed.") + else: + if dry_run: + print(f'Listing ortholog BLAST tables within "{os.path.abspath(orthologs_dir_path)}":') + for blast in blast_list: + print(f"{os.path.basename(blast)}") + print("Done") + + else: + print(f'Removing ortholog BLAST tables within "{os.path.abspath(orthologs_dir_path)}":') + text = input("Confirm deleting (y/N)? ").lower() + if text == "y" or text == "yes": + for blast in blast_list: + os.remove(blast) + print(f"Deleted: {os.path.basename(blast)}") + print("Done") + + elif text == "n" or text == "no" or text == "": + print("Cancelled") + + else: + print('Please choose between "y" or "n". Cancelled.') # For debugging -- GitLab From f6d84c98dee4f29cc64409b934cd8fdb61d640c2 Mon Sep 17 00:00:00 2001 From: cesen Date: Thu, 3 Feb 2022 13:47:26 +0100 Subject: [PATCH 4/5] Import os only where needed --- ksrates_cli.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ksrates_cli.py b/ksrates_cli.py index 95db729..3e9aad2 100644 --- a/ksrates_cli.py +++ b/ksrates_cli.py @@ -1,7 +1,6 @@ import click import logging from sys import argv -import os from ksrates._version import __version__ @click.group(context_settings={'help_option_names': ['-h', '--help']}) @@ -318,6 +317,7 @@ def paralogs_ks_multi(config_sources, expert, n_threads): Example 2: ksrates paralogs-ks-multi config_file*.txt Example 3: ksrates paralogs-ks-multi config_file*.txt config_dir """ + import os from ksrates.wgd_paralogs import wgd_paralogs if expert: click.format_filename(expert) -- GitLab From a2b88e187c71dc63f022584212ff324fa8435933 Mon Sep 17 00:00:00 2001 From: cesen Date: Mon, 7 Feb 2022 10:48:20 +0100 Subject: [PATCH 5/5] Preserve paralog BLAST table; print number tables --- ksrates_cli.py | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/ksrates_cli.py b/ksrates_cli.py index 3e9aad2..df9a102 100644 --- a/ksrates_cli.py +++ b/ksrates_cli.py @@ -355,6 +355,9 @@ def orthologs_ks_cleanup(orthologs_dir_path, dry_run): Therefore, when numerous ortholog pipelines have already been run, the BLAST tables can take up quite some GBs of disk space and it might be convenient to delete them in bulk through this command. + Note the paralog BLAST tables within the paralog_distributions directory are instead meant to be preserved and + will not be affected by this command. + Takes parameters from ORTHOLOGS_DIR_PATH. \b @@ -364,18 +367,24 @@ def orthologs_ks_cleanup(orthologs_dir_path, dry_run): """ import os import glob - blast_list = glob.glob(f"{orthologs_dir_path}/wgd_*/*.blast.tsv") + abspath = os.path.abspath(orthologs_dir_path) + if os.path.basename(abspath) == "paralog_distributions": + print("This command is meant to be used for ortholog BLAST tables, not for paralog ones.") + print("Nothing will be deleted. Exiting.") + return + + blast_list = glob.glob(f"{abspath}/wgd_*/*.blast.tsv") if len(blast_list) == 0: - print("There are no BLAST tables to be removed.") + print("There are no ortholog BLAST tables to be removed") else: if dry_run: - print(f'Listing ortholog BLAST tables within "{os.path.abspath(orthologs_dir_path)}":') + print(f'Listing {len(blast_list)} ortholog BLAST tables within "{abspath}":') for blast in blast_list: print(f"{os.path.basename(blast)}") - print("Done") + print("Done") else: - print(f'Removing ortholog BLAST tables within "{os.path.abspath(orthologs_dir_path)}":') + print(f'Removing {len(blast_list)} ortholog BLAST tables within "{abspath}":') text = input("Confirm deleting (y/N)? ").lower() if text == "y" or text == "yes": for blast in blast_list: @@ -389,7 +398,6 @@ def orthologs_ks_cleanup(orthologs_dir_path, dry_run): else: print('Please choose between "y" or "n". Cancelled.') - # For debugging # Syntax: python3 ksrates_cli.py [command] [args] if __name__ == "__main__": -- GitLab