Skip to content
Snippets Groups Projects
Commit d44121bf authored by Loraine Gueguen's avatar Loraine Gueguen
Browse files

add phaeoexplorer_constants.py. Add constants. Add SpeciesData attributes....

add phaeoexplorer_constants.py. Add constants. Add SpeciesData attributes. Update run_tool_and_download_single_output_dataset(). Move install_repository_revision() to utilities.py. Refactor run_wf (WIP)
parent 2dce0f69
No related branches found
No related tags found
2 merge requests!24Run wf,!18Release v2.1.0
This commit is part of merge request !18. Comments created here will be created in the context of that merge request.
......@@ -53,3 +53,5 @@ DELETE_ORGANISMS_TOOL = "toolshed.g2.bx.psu.edu/repos/gga/chado_organism_delete_
HOST_DATA_DIR='src_data'
CONTAINER_DATA_DIR_ROOT='/project_data'
GALAXY_LIBRARY_NAME = 'Project Data'
GALAXY_LIBRARY_DESC = 'Data for current genome annotation project'
......@@ -69,12 +69,11 @@ class LoadData(speciesData.SpeciesData):
"""
logging.debug("Getting 'Homo sapiens' ID in chado database")
get_sapiens_id_job_output_dataset_id = utilities.run_tool_and_get_single_output_dataset_id(
get_sapiens_id_json_output = utilities.run_tool_and_download_single_output_dataset(
self.instance,
tool_id=constants.GET_ORGANISMS_TOOL, # If this version if not found, Galaxy will use the one that is found
history_id=self.history_id,
tool_inputs={"genus": "Homo", "species": "sapiens"})
get_sapiens_id_json_output = self.instance.datasets.download_dataset(dataset_id=get_sapiens_id_job_output_dataset_id)
logging.info("Deleting Homo 'sapiens' in the instance's chado database")
try:
......@@ -114,7 +113,7 @@ class LoadData(speciesData.SpeciesData):
data_dir_root=os.path.join(self.get_species_dir(), constants.HOST_DATA_DIR)
instance = GalaxyInstance(url=self.instance_url,
gio = GalaxyInstance(url=self.instance_url,
email=self.config[constants.CONF_GALAXY_DEFAULT_ADMIN_EMAIL],
password=self.config[constants.CONF_GALAXY_DEFAULT_ADMIN_PASSWORD]
)
......@@ -129,14 +128,14 @@ class LoadData(speciesData.SpeciesData):
if folders:
# Delete pre-existing lib (probably created by a previous call)
existing = instance.libraries.get_previews(name='Project Data')
existing = gio.libraries.get_previews(name=constants.GALAXY_LIBRARY_NAME)
for lib in existing:
if not lib.deleted:
logging.info('Pre-existing "Project Data" library %s found, removing it' % lib.id)
instance.libraries.delete(lib.id)
logging.info('Pre-existing {0} library {1}} found, removing it'.format(constants.GALAXY_LIBRARY_NAME, lib.id))
gio.libraries.delete(lib.id)
logging.info("Creating new 'Project Data' library")
prj_lib = instance.libraries.create('Project Data', 'Data for current genome annotation project')
logging.info("Creating new %s library" % constants.GALAXY_LIBRARY_NAME)
prj_lib = gio.libraries.create(constants.GALAXY_LIBRARY_NAME, constants.GALAXY_LIBRARY_DESC)
self.library_id = prj_lib.id # project data folder/library
logging.info("Library for {0}: {1}".format(self.full_name, self.library_id))
......@@ -280,7 +279,7 @@ class LoadData(speciesData.SpeciesData):
logging.info("Did not find metadata in %s " % meta_file)
return self.get_bam_label(dirname, bam_file)
def create_galaxy_instance(self):
def set_galaxy_instance(self):
"""
Test the connection to the galaxy instance for the current organism
Exit if we cannot connect to the instance
......@@ -364,18 +363,16 @@ if __name__ == "__main__":
# Parse the config yaml file
load_data_for_current_species.config = config
# Set the instance url attribute -- Does not work with localhost on scratch (ALB)
load_data_for_current_species.instance_url = "http://localhost:{0}/sp/{1}_{2}/galaxy/".format(
load_data_for_current_species.instance_url = "http://localhost:{0}/sp/{1}/galaxy/".format(
load_data_for_current_species.config[constants.CONF_ALL_HTTP_PORT],
load_data_for_current_species.genus_lowercase,
load_data_for_current_species.species)
load_data_for_current_species.genus_species)
# Check the galaxy container state and proceed if the galaxy services are up and running
if utilities.check_galaxy_state(genus_lowercase=load_data_for_current_species.genus_lowercase,
species=load_data_for_current_species.species,
if utilities.check_galaxy_state(network_name=load_data_for_current_species.genus_species,
script_dir=load_data_for_current_species.script_dir):
# Create the Galaxy instance
load_data_for_current_species.instance = load_data_for_current_species.create_galaxy_instance()
load_data_for_current_species.instance = load_data_for_current_species.set_galaxy_instance()
# Load the datasets into a galaxy library
logging.info("Setting up library for {0} {1}".format(load_data_for_current_species.genus, load_data_for_current_species.species))
......
### Workflows
WORKFLOW_LOAD_FASTA_GFF_JBROWSE = "load_fasta_gff_jbrowse"
WORKFLOW_BLAST = "blast"
WORKFLOW_INTERPRO = "interpro"
WORKFLOW_VALID_TYPES = [WORKFLOW_LOAD_FASTA_GFF_JBROWSE, WORKFLOW_BLAST, WORKFLOW_INTERPRO]
### Galaxy tools
ADD_ORGANISM_TOOL_NAME = "toolshed.g2.bx.psu.edu/repos/gga/chado_organism_add_organism/organism_add_organism/"
ADD_ORGANISM_TOOL_NAME = "2.3.4+galaxy0"
ADD_ORGANISM_TOOL_ID = ADD_ORGANISM_TOOL_NAME + ADD_ORGANISM_TOOL_NAME
ADD_ORGANISM_TOOL_CHANGESET_REVISION = "1f12b9650028"
ADD_ANALYSIS_TOOL_NAME = "toolshed.g2.bx.psu.edu/repos/gga/chado_analysis_add_analysis/analysis_add_analysis/"
ADD_ANALYSIS_TOOL_VERSION = "2.3.4+galaxy0"
ADD_ANALYSIS_TOOL_ID= ADD_ANALYSIS_TOOL_NAME + ADD_ANALYSIS_TOOL_VERSION
ADD_ANALYSIS_TOOL_CHANGESET_REVISION = "10b2b1c70e69"
ADD_ANALYSIS_TOOL_PARAM_PROGRAM = "Performed by Genoscope"
ADD_ANALYSIS_TOOL_PARAM_DATE = "2021-02-24"
GET_ORGANISMS_TOOL_NAME = "toolshed.g2.bx.psu.edu/repos/gga/chado_organism_get_organisms/organism_get_organisms/"
GET_ORGANISMS_TOOL_VERSION = "2.3.4+galaxy0"
GET_ORGANISMS_TOOL_ID = GET_ORGANISMS_TOOL_NAME + GET_ORGANISMS_TOOL_VERSION
GET_ORGANISMS_TOOL_CHANGESET_REVISION = "831229e6cda2"
GET_ANALYSES_TOOL_NAME = "toolshed.g2.bx.psu.edu/repos/gga/chado_analysis_get_analyses/analysis_get_analyses/"
GET_ANALYSES_TOOL_VERSION = "2.3.4+galaxy0"
GET_ANALYSES_TOOL_ID = GET_ANALYSES_TOOL_NAME + GET_ANALYSES_TOOL_VERSION
GET_ANALYSES_TOOL_CHANGESET_REVISION = "a867923f555e"
ANALYSIS_SYNC_TOOL_NAME = "toolshed.g2.bx.psu.edu/repos/gga/tripal_analysis_sync/analysis_sync/"
ANALYSIS_SYNC_TOOL_VERSION = "3.2.1.0"
ANALYSIS_SYNC_TOOL_ID = ANALYSIS_SYNC_TOOL_NAME + ANALYSIS_SYNC_TOOL_VERSION
ANALYSIS_SYNC_TOOL_CHANGESET_REVISION = "f487ff676088"
ORGANISM_SYNC_TOOL_NAME = "toolshed.g2.bx.psu.edu/repos/gga/tripal_organism_sync/organism_sync/"
ORGANISM_SYNC_TOOL_VERSION = "3.2.1.0"
ORGANISM_SYNC_TOOL_ID = ORGANISM_SYNC_TOOL_NAME + ORGANISM_SYNC_TOOL_VERSION
ORGANISM_SYNC_TOOL_CHANGESET_REVISION = "afd5d92745fb"
This diff is collapsed.
......@@ -96,8 +96,12 @@ class SpeciesData:
self.genus_lowercase = self.genus.lower()
self.species_lowercase = self.species.lower()
self.strain_lowercase = self.strain.lower()
self.sex_lowercase = self.sex.lower()
self.genus_uppercase = self.genus[0].upper() + self.genus_lowercase[1:]
self.genus_species = "{0}_{1}".format(self.genus_lowercase, self.species_lowercase)
self.strain_sex = "{0}_{1}".format(self.strain_lowercase, self.sex_lowercase)
self.full_name = ' '.join(utilities.filter_empty_not_empty_items([self.genus_uppercase, self.species, self.strain, self.sex])["not_empty"])
self.full_name_lowercase = self.full_name.lower()
......
......@@ -8,6 +8,7 @@ import os
import subprocess
import bioblend
import constants
import time
def load_yaml(yaml_file):
......@@ -76,7 +77,7 @@ def filter_empty_not_empty_items(li):
return filtered_dict
def check_galaxy_state(genus_lowercase, species, script_dir):
def check_galaxy_state(network_name, script_dir):
"""
Read the logs of the galaxy container for the current species to check if the service is "ready"
......@@ -92,7 +93,7 @@ def check_galaxy_state(genus_lowercase, species, script_dir):
os.chmod("%s/serexec" % script_dir, 0o0755)
except PermissionError:
logging.warning("serexec permissions incorrect in %s" % script_dir)
galaxy_logs = subprocess.run(["%s/serexec" % script_dir, "{0}_{1}_galaxy".format(genus_lowercase, species),
galaxy_logs = subprocess.run(["%s/serexec" % script_dir, "{0}_galaxy".format(network_name),
"supervisorctl", "status", "galaxy:"], stdout=subprocess.PIPE, stderr=subprocess.PIPE)
if "galaxy:galaxy_web RUNNING" in str(galaxy_logs.stdout) \
and "galaxy:handler0 RUNNING" in str(galaxy_logs.stdout) \
......@@ -199,12 +200,31 @@ def run_tool(instance, tool_id, history_id, tool_inputs):
return output_dict
def run_tool_and_get_single_output_dataset_id(instance, tool_id, history_id, tool_inputs):
def run_tool_and_download_single_output_dataset(instance, tool_id, history_id, tool_inputs, time_sleep):
output_dict = run_tool(instance, tool_id, history_id, tool_inputs)
if (not time_sleep is None):
time.sleep(time_sleep)
single_output_dataset_id = output_dict["outputs"][0]["id"]
dataset = instance.datasets.download_dataset(dataset_id=single_output_dataset_id)
return single_output_dataset_id
return dataset
def install_repository_revision(current_version, toolshed_dict, version_to_install, changeset_revision, instance):
if current_version != version_to_install:
name = toolshed_dict["name"]
owner = toolshed_dict["owner"]
toolshed = "https://" + toolshed_dict["tool_shed"]
logging.warning("Installing changeset revision {0} for {1}".format(changeset_revision, name))
instance.toolshed.install_repository_revision(tool_shed_url=toolshed,
name=name,
owner=owner,
changeset_revision=changeset_revision,
install_tool_dependencies=True,
install_repository_dependencies=False,
install_resolver_dependencies=True)
def create_org_param_dict_from_constants():
"""
......@@ -234,7 +254,6 @@ def create_org_param_dict_from_constants():
org_param_dict["org_param_data_blastx_path"] = constants.ORG_PARAM_DATA_BLASTX_PATH
org_param_dict["org_param_data_genome_version"] = constants.ORG_PARAM_DATA_GENOME_VERSION
org_param_dict["org_param_data_ogs_version"] = constants.ORG_PARAM_DATA_OGS_VERSION
org_param_dict["org_param_data_performed_by"] = constants.ORG_PARAM_DATA_PERFORMED_BY
org_param_dict["org_param_services"] = constants.ORG_PARAM_SERVICES
org_param_dict["org_param_services_blast"] = constants.ORG_PARAM_SERVICES_BLAST
org_param_dict["org_param_services_go"] = constants.ORG_PARAM_SERVICES_GO
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment