Skip to content
Snippets Groups Projects
Commit 7cdad582 authored by Loraine Gueguen's avatar Loraine Gueguen
Browse files

Create utilities_bioblend.py. Refactor run_wf (WIP).

parent 5bbc05a7
No related branches found
No related tags found
2 merge requests!24Run wf,!18Release v2.1.0
This commit is part of merge request !18. Comments created here will be created in the context of that merge request.
File moved
......@@ -10,10 +10,10 @@ import sys
import time
import json
import yaml
from bioblend import galaxy
from bioblend.galaxy.objects import GalaxyInstance
import utilities
import utilities_bioblend
import speciesData
import constants
......@@ -68,7 +68,7 @@ class LoadData(speciesData.SpeciesData):
"""
logging.debug("Getting 'Homo sapiens' ID in chado database")
get_sapiens_id_json_output = utilities.run_tool_and_download_single_output_dataset(
get_sapiens_id_json_output = utilities_bioblend.run_tool_and_download_single_output_dataset(
self.instance,
tool_id=constants.GET_ORGANISMS_TOOL, # If this version if not found, Galaxy will use the one that is found
history_id=self.history_id,
......@@ -78,7 +78,7 @@ class LoadData(speciesData.SpeciesData):
try:
get_sapiens_id_final_output = json.loads(get_sapiens_id_json_output)[0]
sapiens_id = str(get_sapiens_id_final_output["organism_id"]) # needs to be str to be recognized by the chado tool
utilities.run_tool(
utilities_bioblend.run_tool(
self.instance,
tool_id=constants.DELETE_ORGANISMS_TOOL,
history_id=self.history_id,
......@@ -278,29 +278,6 @@ class LoadData(speciesData.SpeciesData):
logging.info("Did not find metadata in %s " % meta_file)
return self.get_bam_label(dirname, bam_file)
def set_galaxy_instance(self):
"""
Test the connection to the galaxy instance for the current organism
Exit if we cannot connect to the instance
"""
logging.info("Connecting to the galaxy instance (%s)" % self.instance_url)
self.instance = galaxy.GalaxyInstance(url=self.instance_url,
email=self.config[constants.CONF_GALAXY_DEFAULT_ADMIN_EMAIL],
password=self.config[constants.CONF_GALAXY_DEFAULT_ADMIN_PASSWORD]
)
try:
self.instance.histories.get_histories()
except bioblend.ConnectionError:
logging.critical("Cannot connect to galaxy instance (%s) " % self.instance_url)
sys.exit()
else:
logging.info("Successfully connected to galaxy instance (%s) " % self.instance_url)
return self.instance
if __name__ == "__main__":
parser = argparse.ArgumentParser(description="Load data into Galaxy library")
......@@ -367,11 +344,15 @@ if __name__ == "__main__":
load_data_for_current_species.genus_species)
# Check the galaxy container state and proceed if the galaxy services are up and running
if utilities.check_galaxy_state(network_name=load_data_for_current_species.genus_species,
if utilities_bioblend.check_galaxy_state(network_name=load_data_for_current_species.genus_species,
script_dir=load_data_for_current_species.script_dir):
# Create the Galaxy instance
load_data_for_current_species.instance = load_data_for_current_species.set_galaxy_instance()
load_data_for_current_species.instance = utilities_bioblend.get_galaxy_instance(
instance_url=load_data_for_current_species.instance_url,
email=load_data_for_current_species.config[constants.CONF_GALAXY_DEFAULT_ADMIN_EMAIL],
password=load_data_for_current_species.config[constants.CONF_GALAXY_DEFAULT_ADMIN_PASSWORD]
)
# Load the datasets into a galaxy library
logging.info("Setting up library for {0} {1}".format(load_data_for_current_species.genus, load_data_for_current_species.species))
......
This diff is collapsed.
......@@ -4,11 +4,7 @@
import yaml
import logging
import sys
import os
import subprocess
import bioblend
import constants
import time
def load_yaml(yaml_file):
......@@ -84,49 +80,6 @@ def no_empty_items(li):
empty = False
return empty
def check_wf_param(full_name, params):
if not no_empty_items(params):
logging.critical(
"One empty workflow parameter found for organism {0}: {1})".format(full_name, params))
sys.exit()
def check_galaxy_state(network_name, script_dir):
"""
Read the logs of the galaxy container for the current species to check if the service is "ready"
"""
# Run supervisorctl status in the galaxy container via serexec
# Change serexec permissions in repo
try:
os.chmod("%s/serexec" % script_dir, 0o0755)
except PermissionError:
logging.warning("serexec permissions incorrect in %s" % script_dir)
galaxy_logs = subprocess.run(["%s/serexec" % script_dir, "{0}_galaxy".format(network_name),
"supervisorctl", "status", "galaxy:"], stdout=subprocess.PIPE, stderr=subprocess.PIPE)
if "galaxy:galaxy_web RUNNING" in str(galaxy_logs.stdout) \
and "galaxy:handler0 RUNNING" in str(galaxy_logs.stdout) \
and "galaxy:handler1 RUNNING" in str(galaxy_logs.stdout):
return 1
else:
return 0
def get_species_history_id(instance, full_name):
"""
Set and return the current species history id in its galaxy instance
:param instance:
:param full_name:
:return:
"""
histories = instance.histories.get_histories(name=str(full_name))
history_id = histories[0]["id"]
show_history = instance.histories.show_history(history_id=history_id)
return [history_id, show_history]
def get_gspecies_string_from_sp_dict(sp_dict):
genus = sp_dict[constants.ORG_PARAM_DESC][constants.ORG_PARAM_DESC_GENUS]
......@@ -153,7 +106,6 @@ def get_unique_species_str_list(sp_dict_list):
return unique_species_li
def get_unique_species_dict_list(sp_dict_list):
"""
Filter the species dictionary list to return only unique genus_species combinations
......@@ -195,46 +147,6 @@ def get_sp_picture(sp_dict_list):
sp_picture_dict[gspecies] = sp[constants.ORG_PARAM_DESC][constants.ORG_PARAM_DESC_PICTURE_PATH]
return sp_picture_dict
def run_tool(instance, tool_id, history_id, tool_inputs):
output_dict = None
try:
logging.debug("Running tool {0} with tool inputs: {1}".format(tool_id, tool_inputs))
output_dict = instance.tools.run_tool(
tool_id=tool_id,
history_id=history_id,
tool_inputs=tool_inputs)
except bioblend.ConnectionError:
logging.error("Unexpected HTTP response (bioblend.ConnectionError) when running tool {0} with tool inputs: {1}".format(tool_id, tool_inputs))
return output_dict
def run_tool_and_download_single_output_dataset(instance, tool_id, history_id, tool_inputs, time_sleep = 0):
output_dict = run_tool(instance, tool_id, history_id, tool_inputs)
if not time_sleep is None:
time.sleep(time_sleep)
single_output_dataset_id = output_dict["outputs"][0]["id"]
dataset = instance.datasets.download_dataset(dataset_id=single_output_dataset_id)
return dataset
def install_repository_revision(current_version, toolshed_dict, version_to_install, changeset_revision, instance):
if current_version != version_to_install:
name = toolshed_dict["name"]
owner = toolshed_dict["owner"]
toolshed = "https://" + toolshed_dict["tool_shed"]
logging.warning("Installing changeset revision {0} for {1}".format(changeset_revision, name))
instance.toolshed.install_repository_revision(tool_shed_url=toolshed,
name=name,
owner=owner,
changeset_revision=changeset_revision,
install_tool_dependencies=True,
install_repository_dependencies=False,
install_resolver_dependencies=True)
def create_org_param_dict_from_constants():
"""
Create a dictionary of variables containing the keys needed to render the organisms.yml.j2 (NOT the values)
......
#!/usr/bin/python
# -*- coding: utf-8 -*-
import logging
import sys
import os
import subprocess
import time
import bioblend
from bioblend import galaxy
import utilities
def get_galaxy_instance(instance_url, email, password):
"""
Test the connection to the galaxy instance for the current organism
Exit if we cannot connect to the instance
"""
logging.info("Connecting to the galaxy instance (%s)" % instance_url)
instance = galaxy.GalaxyInstance(url=instance_url,
email=email,
password=password)
try:
instance.histories.get_histories()
except bioblend.ConnectionError:
logging.critical("Cannot connect to galaxy instance (%s) " % instance_url)
sys.exit()
else:
logging.info("Successfully connected to galaxy instance (%s) " % instance_url)
return instance
def get_history(instance, history_name):
"""
Create or set the working history to the current species one
:return:
"""
try:
histories = instance.histories.get_histories(name=history_name)
history_id = histories[0]["id"]
logging.debug("History ID set for {0}: {1}".format(history_name, history_id))
except IndexError:
logging.info("Creating history for %s" % history_name)
history = instance.histories.create_history(name=history_name)
history_id = history["id"]
logging.debug("History ID set for {0}: {1}".format(history_name, history_id))
return history_id
def check_wf_param(full_name, params):
if not utilities.no_empty_items(params):
logging.critical(
"One empty workflow parameter found for organism {0}: {1})".format(full_name, params))
sys.exit()
def check_galaxy_state(network_name, script_dir):
"""
Read the logs of the galaxy container for the current species to check if the service is "ready"
"""
# Run supervisorctl status in the galaxy container via serexec
# Change serexec permissions in repo
try:
os.chmod("%s/serexec" % script_dir, 0o0755)
except PermissionError:
logging.warning("serexec permissions incorrect in %s" % script_dir)
galaxy_logs = subprocess.run(["%s/serexec" % script_dir, "{0}_galaxy".format(network_name),
"supervisorctl", "status", "galaxy:"], stdout=subprocess.PIPE, stderr=subprocess.PIPE)
if "galaxy:galaxy_web RUNNING" in str(galaxy_logs.stdout) \
and "galaxy:handler0 RUNNING" in str(galaxy_logs.stdout) \
and "galaxy:handler1 RUNNING" in str(galaxy_logs.stdout):
return 1
else:
return 0
def get_species_history_id(instance, full_name):
"""
Set and return the current species history id in its galaxy instance
:param instance:
:param full_name:
:return:
"""
histories = instance.histories.get_histories(name=str(full_name))
history_id = histories[0]["id"]
show_history = instance.histories.show_history(history_id=history_id)
return [history_id, show_history]
def run_tool(instance, tool_id, history_id, tool_inputs):
output_dict = None
try:
logging.debug("Running tool {0} with tool inputs: {1}".format(tool_id, tool_inputs))
output_dict = instance.tools.run_tool(
tool_id=tool_id,
history_id=history_id,
tool_inputs=tool_inputs)
except bioblend.ConnectionError:
logging.error("Unexpected HTTP response (bioblend.ConnectionError) when running tool {0} with tool inputs: {1}".format(tool_id, tool_inputs))
return output_dict
def run_tool_and_download_single_output_dataset(instance, tool_id, history_id, tool_inputs, time_sleep = 0):
output_dict = run_tool(instance, tool_id, history_id, tool_inputs)
if not time_sleep is None:
time.sleep(time_sleep)
single_output_dataset_id = output_dict["outputs"][0]["id"]
dataset = instance.datasets.download_dataset(dataset_id=single_output_dataset_id)
return dataset
def install_repository_revision(instance, tool_id, version, changeset_revision):
tool_dict = instance.tools.show_tool(tool_id)
current_version = tool_dict["version"]
toolshed_dict = tool_dict["tool_shed_repository"]
if current_version != version:
name = toolshed_dict["name"]
owner = toolshed_dict["owner"]
toolshed = "https://" + toolshed_dict["tool_shed"]
logging.warning("Installing changeset revision {0} for {1}".format(changeset_revision, name))
instance.toolshed.install_repository_revision(tool_shed_url=toolshed,
name=name,
owner=owner,
changeset_revision=changeset_revision,
install_tool_dependencies=True,
install_repository_dependencies=False,
install_resolver_dependencies=True)
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment