Commit 9bc89054 authored by Thomas Van Parys's avatar Thomas Van Parys
Browse files

plaza api ftp returns latest version and adds version option (fixes #56)

parent 765b2492
...@@ -16,24 +16,33 @@ def get_plaza_data(): ...@@ -16,24 +16,33 @@ def get_plaza_data():
return j return j
def get_ftp_url(species, data_type): def get_ftp_url(species, data_type, version=None):
''' '''
Retrieves an FTP link from the Plaza API. Retrieves an FTP link from the Plaza API.
:param species: Plaza id :param species: Plaza id
:param data_type: Which download link to return (annot/fasta/cds) :param data_type: Which download link to return (annot/fasta/cds)
:param version: the version identifier of the genome (by default the latest one is selected)
:returns: A string with the FTP link or None if species not found. :returns: A string with the FTP link or None if species not found.
''' '''
j = get_plaza_data() j = get_plaza_data()
hit = None
for entry in j: for entry in j:
if entry['species'] == species: if entry['species'] == species:
if data_type == 'annot': if ((version is None
return entry['csv'][0]['location'] and (hit is None or int(entry['year']) > int(hit['year'])))
if data_type == 'prot': or (version == entry['version'])):
return entry['fasta']['proteome'][0]['location'] hit = entry
if data_type == 'cds':
return entry['fasta']['cds'][0]['location'] if hit is None:
return None return None
if data_type == 'annot':
return hit['csv'][0]['location']
if data_type == 'prot':
return hit['fasta']['proteome'][0]['location']
if data_type == 'cds':
return hit['fasta']['cds'][0]['location']
def find_species(needle, all=True): def find_species(needle, all=True):
...@@ -52,7 +61,7 @@ def find_species(needle, all=True): ...@@ -52,7 +61,7 @@ def find_species(needle, all=True):
for entry in j: for entry in j:
if entry['species'].lower() == needle or needle in entry['common_name'].lower(): if entry['species'].lower() == needle or needle in entry['common_name'].lower():
new_entry = (entry['common_name'], entry['version'], entry['year']) new_entry = (entry['common_name'], entry['version'], entry['year'])
if entry['species'] not in hits: if entry['species'] not in hits:
hits[entry['species']] = [new_entry] hits[entry['species']] = [new_entry]
else: else:
...@@ -125,9 +134,14 @@ def list(): ...@@ -125,9 +134,14 @@ def list():
@click.option( @click.option(
'--type', '-t', 'data_type', '--type', '-t', 'data_type',
default="annot", default="annot",
help="Type of data we need the ftp link for (annot/prot/cds)" help="Type of data we need the ftp link for (annot/prot/cds) (default: annot)"
)
@click.option(
'--version', '-v', 'version',
default=None,
help="Genome version identifier"
) )
def ftp(species, data_type): def ftp(species, data_type, version):
''' '''
Get Plaza FTP URLs for given species. Get Plaza FTP URLs for given species.
...@@ -137,7 +151,7 @@ def ftp(species, data_type): ...@@ -137,7 +151,7 @@ def ftp(species, data_type):
https://bioinformatics.psb.ugent.be/plaza/documentation/data_warehouse https://bioinformatics.psb.ugent.be/plaza/documentation/data_warehouse
''' '''
ftp_url = get_ftp_url(species, data_type) ftp_url = get_ftp_url(species, data_type, version)
if not ftp_url: if not ftp_url:
print("Error: Species abbrevation unknown: {}".format(species), file=sys.stderr) print("Error: Species abbrevation unknown: {}".format(species), file=sys.stderr)
sys.exit(1) sys.exit(1)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment