Newer
Older
#!/usr/bin/python
# -*- coding: utf-8 -*-
import yaml
import logging
import sys

Arthur Le Bars
committed
import os
import subprocess

Loraine Gueguen
committed
import constants

Loraine Gueguen
committed
import time

Arthur Le Bars
committed

Loraine Gueguen
committed
def load_yaml(yaml_file):
try:
with open(yaml_file, 'r') as stream:
try:
data = yaml.safe_load(stream)
except yaml.YAMLError as err:
logging.critical("Input file %s is not in YAML format" % yaml_file)
sys.exit(err)
except FileNotFoundError:
logging.critical("Input file doesn't exist (%s)" % yaml_file)
sys.exit()
except OSError:
logging.critical("Input file cannot be read (%s)" % yaml_file)
sys.exit()
return data

Arthur Le Bars
committed
def parse_config(config_file):
"""

Arthur Le Bars
committed
Parse a config file containing users and password used by the different services (tripal, galaxy, chado, ...)

Arthur Le Bars
committed
:param config_file:
:return:
"""

Loraine Gueguen
committed
config_dict = load_yaml(config_file)
if isinstance(config_dict, dict):
#logging.debug("Config dictionary: {0}".format(config_dict))

Loraine Gueguen
committed
return config_dict
else:
logging.critical("Config yaml file is not a dictionary" % config_file)

Arthur Le Bars
committed
sys.exit()
def parse_input(input_file):
"""
Parse the yml input file to extract data to create the SpeciesData objects
Return a list of dictionaries. Each dictionary contains data tied to a species
:param input_file:
:return:
"""

Loraine Gueguen
committed
sp_dict_list = load_yaml(input_file)
if isinstance(sp_dict_list, list):
return sp_dict_list
else:
logging.critical("Input organisms yaml file is not a list" % input_file)

Arthur Le Bars
committed
sys.exit()

Arthur Le Bars
committed
"""
Separate a list between empty items and non empty items.
Return a dict with 2 keys: empty values (items) and non empty values (items)
:param li:
:return:
"""
filtered_dict = {"empty": [], "not_empty": []}

Arthur Le Bars
committed
filtered_dict["empty"].append(i)

Arthur Le Bars
committed
filtered_dict["not_empty"].append(i)
return filtered_dict
def no_empty_items(li):
empty = True
for i in li:
if i is None or i == "":
empty = False
return empty

Arthur Le Bars
committed
def check_wf_param(full_name, params):
if not no_empty_items(params):
logging.critical(
"One empty workflow parameter found for organism {0}: {1})".format(full_name, params))
sys.exit()

Loraine Gueguen
committed
def check_galaxy_state(network_name, script_dir):

Arthur Le Bars
committed
"""
Read the logs of the galaxy container for the current species to check if the service is "ready"
"""

Arthur Le Bars
committed
# Run supervisorctl status in the galaxy container via serexec
# Change serexec permissions in repo
try:
os.chmod("%s/serexec" % script_dir, 0o0755)
except PermissionError:
logging.warning("serexec permissions incorrect in %s" % script_dir)

Loraine Gueguen
committed
galaxy_logs = subprocess.run(["%s/serexec" % script_dir, "{0}_galaxy".format(network_name),
"supervisorctl", "status", "galaxy:"], stdout=subprocess.PIPE, stderr=subprocess.PIPE)

Arthur Le Bars
committed
if "galaxy:galaxy_web RUNNING" in str(galaxy_logs.stdout) \
and "galaxy:handler0 RUNNING" in str(galaxy_logs.stdout) \
and "galaxy:handler1 RUNNING" in str(galaxy_logs.stdout):

Arthur Le Bars
committed
return 1
else:
return 0
def get_species_history_id(instance, full_name):
"""
Set and return the current species history id in its galaxy instance
:param instance:
:param full_name:
:return:
"""
histories = instance.histories.get_histories(name=str(full_name))
history_id = histories[0]["id"]
show_history = instance.histories.show_history(history_id=history_id)
return [history_id, show_history]

Loraine Gueguen
committed
def get_gspecies_string_from_sp_dict(sp_dict):

Loraine Gueguen
committed
genus = sp_dict[constants.ORG_PARAM_DESC][constants.ORG_PARAM_DESC_GENUS]
species = sp_dict[constants.ORG_PARAM_DESC][constants.ORG_PARAM_DESC_SPECIES]
gspecies = genus.lower() + "_" + species.lower()

Loraine Gueguen
committed
return gspecies

Arthur Le Bars
committed
def get_unique_species_str_list(sp_dict_list):

Arthur Le Bars
committed
"""
Find and return which species (i.e genus species) are not duplicated in the input dictionary used by gga scripts
Returns a list of species (directories) for which to interact only once (i.e deploying a stack or loading the library)
This aims to reduce the number of deployments/loading for a single species

Arthur Le Bars
committed
:param sp_dict_list:
:return:
"""
unique_species_li = []

Arthur Le Bars
committed
for sp in sp_dict_list:
sp_gspecies = get_gspecies_string_from_sp_dict(sp)
if sp_gspecies not in unique_species_li and sp_gspecies != "":
unique_species_li.append(sp_gspecies)

Arthur Le Bars
committed
return unique_species_li
def get_unique_species_dict_list(sp_dict_list):
Filter the species dictionary list to return only unique genus_species combinations
The default organism is always the first encountered in the list of species input list of dictionaries
Used in gga_init.py to write the docker-compose files for the input organisms
:param sp_dict_list:

Arthur Le Bars
committed
:return:
"""

Arthur Le Bars
committed
unique_species_dict = {}

Arthur Le Bars
committed
for sp in sp_dict_list:

Loraine Gueguen
committed
gspecies = get_gspecies_string_from_sp_dict(sp)
if gspecies not in unique_species_dict.keys() or ( constants.ORG_PARAM_DESC_MAIN_SPECIES in sp[constants.ORG_PARAM_DESC].keys() and
sp[constants.ORG_PARAM_DESC][constants.ORG_PARAM_DESC_MAIN_SPECIES] == True ) :

Loraine Gueguen
committed
unique_species_dict[gspecies] = sp

Arthur Le Bars
committed
for k, v in unique_species_dict.items():
unique_species_list_of_dict.append(v)

Loraine Gueguen
committed
def get_sp_picture(sp_dict_list):
"""
Get the picture for each species: the picture of the main strain if exists, other strain if not
"""
sp_picture_dict = {}
for sp in sp_dict_list:
gspecies = get_gspecies_string_from_sp_dict(sp)
if gspecies not in sp_picture_dict.keys() or ( constants.ORG_PARAM_DESC_MAIN_SPECIES in sp[constants.ORG_PARAM_DESC].keys() and
sp[constants.ORG_PARAM_DESC][constants.ORG_PARAM_DESC_MAIN_SPECIES] == True ):
if constants.ORG_PARAM_DESC_PICTURE_PATH in sp[constants.ORG_PARAM_DESC].keys() and \
sp[constants.ORG_PARAM_DESC][constants.ORG_PARAM_DESC_PICTURE_PATH] != "":
sp_picture_dict[gspecies] = sp[constants.ORG_PARAM_DESC][constants.ORG_PARAM_DESC_PICTURE_PATH]

Loraine Gueguen
committed
return sp_picture_dict
def run_tool(instance, tool_id, history_id, tool_inputs):
output_dict = None
try:
logging.debug("Running tool {0} with tool inputs: {1}".format(tool_id, tool_inputs))
output_dict = instance.tools.run_tool(
tool_id=tool_id,
history_id=history_id,
tool_inputs=tool_inputs)
except bioblend.ConnectionError:
logging.error("Unexpected HTTP response (bioblend.ConnectionError) when running tool {0} with tool inputs: {1}".format(tool_id, tool_inputs))
return output_dict
def run_tool_and_download_single_output_dataset(instance, tool_id, history_id, tool_inputs, time_sleep = 0):
output_dict = run_tool(instance, tool_id, history_id, tool_inputs)
if not time_sleep is None:

Loraine Gueguen
committed
time.sleep(time_sleep)
single_output_dataset_id = output_dict["outputs"][0]["id"]

Loraine Gueguen
committed
dataset = instance.datasets.download_dataset(dataset_id=single_output_dataset_id)

Loraine Gueguen
committed
return dataset
def install_repository_revision(current_version, toolshed_dict, version_to_install, changeset_revision, instance):
if current_version != version_to_install:
name = toolshed_dict["name"]
owner = toolshed_dict["owner"]
toolshed = "https://" + toolshed_dict["tool_shed"]
logging.warning("Installing changeset revision {0} for {1}".format(changeset_revision, name))
instance.toolshed.install_repository_revision(tool_shed_url=toolshed,
name=name,
owner=owner,
changeset_revision=changeset_revision,
install_tool_dependencies=True,
install_repository_dependencies=False,
install_resolver_dependencies=True)
def create_org_param_dict_from_constants():
"""
Create a dictionary of variables containing the keys needed to render the organisms.yml.j2 (NOT the values)
Created from the constants
"""
org_param_dict={}
org_param_dict["org_param_name"] = constants.ORG_PARAM_NAME
org_param_dict["org_param_desc"] = constants.ORG_PARAM_DESC
org_param_dict["org_param_desc_genus"] = constants.ORG_PARAM_DESC_GENUS
org_param_dict["org_param_desc_species"] = constants.ORG_PARAM_DESC_SPECIES
org_param_dict["org_param_desc_sex"] = constants.ORG_PARAM_DESC_SEX
org_param_dict["org_param_desc_strain"] = constants.ORG_PARAM_DESC_STRAIN
org_param_dict["org_param_desc_common_name"] = constants.ORG_PARAM_DESC_COMMON_NAME
org_param_dict["org_param_desc_origin"] = constants.ORG_PARAM_DESC_ORIGIN

Arthur Le Bars
committed
org_param_dict["org_param_desc_picture_path"] = constants.ORG_PARAM_DESC_PICTURE_PATH
org_param_dict["org_param_desc_main_species"] = constants.ORG_PARAM_DESC_MAIN_SPECIES
org_param_dict["org_param_data"] = constants.ORG_PARAM_DATA
org_param_dict["org_param_data_genome_path"] = constants.ORG_PARAM_DATA_GENOME_PATH
org_param_dict["org_param_data_transcripts_path"] = constants.ORG_PARAM_DATA_TRANSCRIPTS_PATH
org_param_dict["org_param_data_proteins_path"] = constants.ORG_PARAM_DATA_PROTEINS_PATH
org_param_dict["org_param_data_gff_path"] = constants.ORG_PARAM_DATA_GFF_PATH
org_param_dict["org_param_data_interpro_path"] = constants.ORG_PARAM_DATA_INTERPRO_PATH
org_param_dict["org_param_data_orthofinder_path"] = constants.ORG_PARAM_DATA_ORTHOFINDER_PATH
org_param_dict["org_param_data_blastp_path"] = constants.ORG_PARAM_DATA_BLASTP_PATH
org_param_dict["org_param_data_blastx_path"] = constants.ORG_PARAM_DATA_BLASTX_PATH
org_param_dict["org_param_data_genome_version"] = constants.ORG_PARAM_DATA_GENOME_VERSION
org_param_dict["org_param_data_ogs_version"] = constants.ORG_PARAM_DATA_OGS_VERSION
org_param_dict["org_param_services"] = constants.ORG_PARAM_SERVICES
org_param_dict["org_param_services_blast"] = constants.ORG_PARAM_SERVICES_BLAST
org_param_dict["org_param_services_go"] = constants.ORG_PARAM_SERVICES_GO

Loraine Gueguen
committed
return org_param_dict