Newer
Older
#!/usr/bin/python
# -*- coding: utf-8 -*-
import yaml
import logging
import sys

Arthur Le Bars
committed
import os
import subprocess

Loraine Gueguen
committed
import constants

Arthur Le Bars
committed

Loraine Gueguen
committed
def load_yaml(yaml_file):
try:
with open(yaml_file, 'r') as stream:
try:
data = yaml.safe_load(stream)
except yaml.YAMLError as err:
logging.critical("Input file %s is not in YAML format" % yaml_file)
sys.exit(err)
except FileNotFoundError:
logging.critical("Input file doesn't exist (%s)" % yaml_file)
sys.exit()
except OSError:
logging.critical("Input file cannot be read (%s)" % yaml_file)
sys.exit()
return data

Arthur Le Bars
committed
def parse_config(config_file):
"""

Arthur Le Bars
committed
Parse a config file containing users and password used by the different services (tripal, galaxy, chado, ...)

Arthur Le Bars
committed
:param config_file:
:return:
"""

Loraine Gueguen
committed
config_dict = load_yaml(config_file)
if isinstance(config_dict, dict):
return config_dict
else:
logging.critical("Config yaml file is not a dictionary" % config_file)

Arthur Le Bars
committed
sys.exit()
def parse_input(input_file):
"""
Parse the yml input file to extract data to create the SpeciesData objects
Return a list of dictionaries. Each dictionary contains data tied to a species
:param input_file:
:return:
"""

Loraine Gueguen
committed
sp_dict_list = load_yaml(input_file)
if isinstance(sp_dict_list, list):
return sp_dict_list
else:
logging.critical("Input organisms yaml file is not a list" % input_file)

Arthur Le Bars
committed
sys.exit()

Arthur Le Bars
committed
"""
Separate a list between empty items and non empty items.
Return a dict with 2 keys: empty values (items) and non empty values (items)
:param li:
:return:
"""
filtered_dict = {"empty": [], "not_empty": []}

Arthur Le Bars
committed
filtered_dict["empty"].append(i)

Arthur Le Bars
committed
filtered_dict["not_empty"].append(i)
return filtered_dict

Arthur Le Bars
committed
def check_galaxy_state(genus_lowercase, species, script_dir):

Arthur Le Bars
committed
"""
Read the logs of the galaxy container for the current species to check if the service is "ready"
:param genus_lowercase:
:param species:

Arthur Le Bars
committed
:param script_dir:

Arthur Le Bars
committed
:return:
"""

Arthur Le Bars
committed
# Run supervisorctl status in the galaxy container via serexec
# Change serexec permissions in repo
try:
os.chmod("%s/serexec" % script_dir, 0o0755)
except PermissionError:
logging.warning("serexec permissions incorrect in %s" % script_dir)

Arthur Le Bars
committed
galaxy_logs = subprocess.run(["%s/serexec" % script_dir, "{0}_{1}_galaxy".format(genus_lowercase, species),
"supervisorctl", "status", "galaxy:"], stdout=subprocess.PIPE, stderr=subprocess.PIPE)

Arthur Le Bars
committed
if "galaxy:galaxy_web RUNNING" in str(galaxy_logs.stdout) \
and "galaxy:handler0 RUNNING" in str(galaxy_logs.stdout) \
and "galaxy:handler1 RUNNING" in str(galaxy_logs.stdout):

Arthur Le Bars
committed
return 1
else:
return 0
def get_species_history_id(instance, full_name):
"""
Set and return the current species history id in its galaxy instance
:param instance:
:param full_name:
:return:
"""
histories = instance.histories.get_histories(name=str(full_name))
history_id = histories[0]["id"]
show_history = instance.histories.show_history(history_id=history_id)
return [history_id, show_history]

Loraine Gueguen
committed
def get_gspecies_string_from_sp_dict(sp_dict):

Loraine Gueguen
committed
genus = sp_dict[constants.ORG_PARAM_DESC][constants.ORG_PARAM_DESC_GENUS]
species = sp_dict[constants.ORG_PARAM_DESC][constants.ORG_PARAM_DESC_SPECIES]
gspecies = genus.lower() + "_" + species.lower()

Loraine Gueguen
committed
return gspecies

Arthur Le Bars
committed
def get_unique_species_str_list(sp_dict_list):

Arthur Le Bars
committed
"""
Find and return which species (i.e genus species) are not duplicated in the input dictionary used by gga scripts
Returns a list of species (directories) for which to interact only once (i.e deploying a stack or loading the library)
This aims to reduce the number of deployments/loading for a single species

Arthur Le Bars
committed
:param sp_dict_list:
:return:
"""
unique_species_li = []

Arthur Le Bars
committed
for sp in sp_dict_list:
sp_gspecies = get_gspecies_string_from_sp_dict(sp)
if sp_gspecies not in unique_species_li and sp_gspecies != "":
unique_species_li.append(sp_gspecies)

Arthur Le Bars
committed
return unique_species_li
def get_unique_species_dict_list(sp_dict_list):
Filter the species dictionary list to return only unique genus_species combinations
The default organism is always the first encountered in the list of species input list of dictionaries
Used in gga_init.py to write the docker-compose files for the input organisms
:param sp_dict_list:

Arthur Le Bars
committed
:return:
"""

Arthur Le Bars
committed
unique_species_dict = {}

Arthur Le Bars
committed
for sp in sp_dict_list:

Loraine Gueguen
committed
gspecies = get_gspecies_string_from_sp_dict(sp)

Loraine Gueguen
committed
if gspecies not in unique_species_dict.keys() or sp[constants.ORG_PARAM_DESC][constants.ORG_PARAM_DESC_MAIN_SPECIES] == True :

Loraine Gueguen
committed
unique_species_dict[gspecies] = sp
else:
continue

Arthur Le Bars
committed
for k, v in unique_species_dict.items():
unique_species_list_of_dict.append(v)
def run_tool(instance, tool_id, history_id, tool_inputs):
output_dict = None
try:
logging.debug("Running tool {0} with tool inputs: {1}".format(tool_id, tool_inputs))
output_dict = instance.tools.run_tool(
tool_id=tool_id,
history_id=history_id,
tool_inputs=tool_inputs)
except bioblend.ConnectionError:
logging.error("Unexpected HTTP response (bioblend.ConnectionError) when running tool {0} with tool inputs: {1}".format(tool_id, tool_inputs))
return output_dict
def run_tool_and_get_single_output_dataset_id(instance, tool_id, history_id, tool_inputs):
output_dict = run_tool(instance, tool_id, history_id, tool_inputs)
single_output_dataset_id = output_dict["outputs"][0]["id"]
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
return single_output_dataset_id
def create_org_param_dict_from_constants():
"""
Create a dictionary of variables containing the keys needed to render the organisms.yml.j2 (NOT the values)
Created from the constants
"""
org_param_dict={}
org_param_dict[constants.ORG_PARAM_NAME] = constants.ORG_PARAM_NAME
org_param_dict[constants.ORG_PARAM_DESC] = constants.ORG_PARAM_DESC
org_param_dict[constants.ORG_PARAM_DESC_GENUS] = constants.ORG_PARAM_DESC_GENUS
org_param_dict[constants.ORG_PARAM_DESC_SPECIES] = constants.ORG_PARAM_DESC_SPECIES
org_param_dict[constants.ORG_PARAM_DESC_SEX] = constants.ORG_PARAM_DESC_SEX
org_param_dict[constants.ORG_PARAM_DESC_STRAIN] = constants.ORG_PARAM_DESC_STRAIN
org_param_dict[constants.ORG_PARAM_DESC_COMMON_NAME] = constants.ORG_PARAM_DESC_COMMON_NAME
org_param_dict[constants.ORG_PARAM_DESC_ORIGIN] = constants.ORG_PARAM_DESC_ORIGIN
org_param_dict[constants.ORG_PARAM_DESC_MAIN_SPECIES] = constants.ORG_PARAM_DESC_MAIN_SPECIES
org_param_dict[constants.ORG_PARAM_DATA] = constants.ORG_PARAM_DATA
org_param_dict[constants.ORG_PARAM_DATA_GENOME_PATH] = constants.ORG_PARAM_DATA_GENOME_PATH
org_param_dict[constants.ORG_PARAM_DATA_TRANSCRIPTS_PATH] = constants.ORG_PARAM_DATA_TRANSCRIPTS_PATH
org_param_dict[constants.ORG_PARAM_DATA_PROTEINS_PATH] = constants.ORG_PARAM_DATA_PROTEINS_PATH
org_param_dict[constants.ORG_PARAM_DATA_GFF_PATH] = constants.ORG_PARAM_DATA_GFF_PATH
org_param_dict[constants.ORG_PARAM_DATA_INTERPRO_PATH] = constants.ORG_PARAM_DATA_INTERPRO_PATH
org_param_dict[constants.ORG_PARAM_DATA_ORTHOFINDER_PATH] = constants.ORG_PARAM_DATA_ORTHOFINDER_PATH
org_param_dict[constants.ORG_PARAM_DATA_BLASTP_PATH] = constants.ORG_PARAM_DATA_BLASTP_PATH
org_param_dict[constants.ORG_PARAM_DATA_BLASTX_PATH] = constants.ORG_PARAM_DATA_BLASTX_PATH
org_param_dict[constants.ORG_PARAM_DATA_GENOME_VERSION] = constants.ORG_PARAM_DATA_GENOME_VERSION
org_param_dict[constants.ORG_PARAM_DATA_OGS_VERSION] = constants.ORG_PARAM_DATA_GENOME_VERSION
org_param_dict[constants.ORG_PARAM_DATA_PERFORMED_BY] = constants.ORG_PARAM_DATA_PERFORMED_BY
org_param_dict[constants.ORG_PARAM_SERVICES] = constants.ORG_PARAM_SERVICES
org_param_dict[constants.ORG_PARAM_SERVICES_BLAST] = constants.ORG_PARAM_SERVICES_BLAST
return org_param_dict