From 369d4004fe70f1eaed86150ff9bc939c03e00c22 Mon Sep 17 00:00:00 2001 From: Arthur Le Bars <arthur.le-bars@sb-roscoff.fr> Date: Wed, 19 May 2021 17:38:15 +0200 Subject: [PATCH] Fix for tool tripal_sync_analysis (cached ids issue in chado db), organism tripal picture changes --- constants.py | 2 +- gga_init.py | 43 ++++++++++------ run_workflow_phaeoexplorer.py | 83 ++++++++++++++++++------------- speciesData.py | 4 +- templates/gspecies_compose.yml.j2 | 6 +-- 5 files changed, 83 insertions(+), 55 deletions(-) diff --git a/constants.py b/constants.py index 8e2fcfc..70724c3 100644 --- a/constants.py +++ b/constants.py @@ -7,7 +7,7 @@ ORG_PARAM_DESC_SEX = "sex" ORG_PARAM_DESC_STRAIN = "strain" ORG_PARAM_DESC_COMMON_NAME = "common_name" ORG_PARAM_DESC_ORIGIN = "origin" -ORG_PARAM_DESC_PICTURE_PATH = "tripal_species_picture" +ORG_PARAM_DESC_PICTURE_PATH = "picture_path" ORG_PARAM_DESC_MAIN_SPECIES = "main_species" ORG_PARAM_DATA = "data" ORG_PARAM_DATA_GENOME_PATH = "genome_path" diff --git a/gga_init.py b/gga_init.py index 497fb1f..46d8450 100755 --- a/gga_init.py +++ b/gga_init.py @@ -77,19 +77,32 @@ class DeploySpeciesStack(speciesData.SpeciesData): self.config.pop(constants.CONF_TRIPAL_BANNER_PATH, None) # Copy the organism picture for tripal if one was specified in the input species - if self.tripal_species_picture is not None: - if self.tripal_species_picture.endswith("png"): - species_picture_dest_path = os.path.join(self.species_dir, os.path.abspath("species.png")) - try: - os.symlink(os.path.abspath(self.tripal_species_picture), species_picture_dest_path) - except FileNotFoundError: - logging.info("Specified organism picture ({0}) not found for {1}".format(self.tripal_species_picture, self.genus_upper + " " + self.species)) - elif self.tripal_species_picture.endswith("jpg"): - species_picture_dest_path = os.path.join(self.species_dir, os.path.abspath("species.jpg")) - try: - os.symlink(os.path.abspath(self.tripal_species_picture), species_picture_dest_path) - except FileNotFoundError: - logging.info("Specified organism picture ({0}) not found for {1}".format(self.tripal_species_picture, self.genus_upper + " " + self.species)) + if self.picture_path is not None: + if os.path.isfile(self.picture_path): + picture_path_basename = os.path.basename(self.picture_path) + picture_path_filename, picture_path_extension = os.path.splitext(picture_path_basename) + if picture_path_extension == ".png" or picture_path_extension == ".jpg": + picture_dest_name = "species%s" % picture_path_extension + picture_dest_path = os.path.join(self.species_dir, picture_dest_name) + shutil.copy(self.picture_path, picture_dest_path) + else: + logging.error("Specified organism picture has wrong extension (must be '.png' or '.jpg'): {0}".format(self.picture_path)) + else: + logging.error("Specified organism picture not found {0} for {1}".format(self.picture_path, self.genus_uppercase + " " + self.species)) + + # if self.picture_path is not None: + # if self.picture_path.endswith("png"): + # species_picture_dest_path = os.path.join(self.species_dir, os.path.abspath("species.png")) + # try: + # os.symlink(os.path.abspath(self.picture_path), species_picture_dest_path) + # except FileNotFoundError: + # logging.info("Specified organism picture ({0}) not found for {1}".format(self.picture_path, self.genus_uppercase + " " + self.species)) + # elif self.picture_path.endswith("jpg"): + # species_picture_dest_path = os.path.join(self.species_dir, os.path.abspath("species.jpg")) + # try: + # os.symlink(os.path.abspath(self.picture_path), species_picture_dest_path) + # except FileNotFoundError: + # logging.info("Specified organism picture ({0}) not found for {1}".format(self.picture_path, self.genus_uppercase + " " + self.species)) # Create nginx dirs and write/re-write nginx conf make_dirs(dir_paths_li=["./nginx", "./nginx/conf"]) @@ -128,12 +141,12 @@ class DeploySpeciesStack(speciesData.SpeciesData): # Create input file vars dict input_vars = {} - if self.tripal_species_picture is not None: + if self.picture_path is not None: input_vars = {"genus": self.genus_lowercase, "Genus": self.genus_uppercase, "species": self.species, "genus_species": self.genus_species, "genus_species_strain_sex": self.species_folder_name, "genus_species_sex": "{0}_{1}_{2}".format(self.genus_lowercase, self.species.lower(), self.sex), "strain": self.strain, "sex": self.sex, "Genus_species": self.genus_species[0].upper() + self.genus_species[1:], - "blast": self.blast, "tripal_species_picture": self.tripal_species_picture} + "blast": self.blast, "picture_path": self.picture_path} else: input_vars = {"genus": self.genus_lowercase, "Genus": self.genus_uppercase, "species": self.species, "genus_species": self.genus_species, "genus_species_strain_sex": self.species_folder_name, diff --git a/run_workflow_phaeoexplorer.py b/run_workflow_phaeoexplorer.py index 07511eb..c526656 100755 --- a/run_workflow_phaeoexplorer.py +++ b/run_workflow_phaeoexplorer.py @@ -223,10 +223,10 @@ class RunWorkflow(speciesData.SpeciesData): install_resolver_dependencies=True) - sync_analysis_tool = self.instance.tools.show_tool("toolshed.g2.bx.psu.edu/repos/gga/tripal_analysis_sync/analysis_sync/3.2.1") - sync_organism_tool = self.instance.tools.show_tool("toolshed.g2.bx.psu.edu/repos/gga/tripal_organism_sync/organism_sync/3.2.1") + sync_analysis_tool = self.instance.tools.show_tool("toolshed.g2.bx.psu.edu/repos/gga/tripal_analysis_sync/analysis_sync/3.2.1.0") + sync_organism_tool = self.instance.tools.show_tool("toolshed.g2.bx.psu.edu/repos/gga/tripal_organism_sync/organism_sync/3.2.1.0") - if sync_analysis_tool["version"] != "3.2.1": + if sync_analysis_tool["version"] != "3.2.1.0": toolshed_dict = sync_analysis_tool["tool_shed_repository"] logging.warning("Changeset for %s is not installed" % toolshed_dict["name"]) changeset_revision = "f487ff676088" @@ -241,10 +241,10 @@ class RunWorkflow(speciesData.SpeciesData): install_repository_dependencies=False, install_resolver_dependencies=True) - if sync_organism_tool["version"] != "3.2.1": + if sync_organism_tool["version"] != "3.2.1.0": toolshed_dict = sync_organism_tool["tool_shed_repository"] logging.warning("Changeset for %s is not installed" % toolshed_dict["name"]) - changeset_revision = "f487ff676088" + changeset_revision = "afd5d92745fb" name = toolshed_dict["name"] owner = toolshed_dict["owner"] toolshed = "https://" + toolshed_dict["tool_shed"] @@ -257,16 +257,16 @@ class RunWorkflow(speciesData.SpeciesData): install_resolver_dependencies=True) - logging.info("Individual tools versions and changesets validated") + logging.info("Success: individual tools versions and changesets validated") def tripal_synchronize_organism_analyses(self): """ """ - show_tool_tripal_sync = self.instance.tools.show_tool(tool_id="toolshed.g2.bx.psu.edu/repos/gga/tripal_organism_sync/organism_sync/3.2.1", io_details=True) - org_sync = "toolshed.g2.bx.psu.edu/repos/gga/tripal_organism_sync/organism_sync/3.2.1" - org_sync = self.instance.tools.run_tool(tool_id="toolshed.g2.bx.psu.edu/repos/gga/tripal_organism_sync/organism_sync/3.2.1", + show_tool_tripal_sync = self.instance.tools.show_tool(tool_id="toolshed.g2.bx.psu.edu/repos/gga/tripal_organism_sync/organism_sync/3.2.1.0", io_details=True) + org_sync = "toolshed.g2.bx.psu.edu/repos/gga/tripal_organism_sync/organism_sync/3.2.1.0" + org_sync = self.instance.tools.run_tool(tool_id="toolshed.g2.bx.psu.edu/repos/gga/tripal_organism_sync/organism_sync/3.2.1.0", history_id=self.history_id, tool_inputs={"organism_id": "2"}) org_sync_job_out = org_sync["outputs"] @@ -336,7 +336,9 @@ class RunWorkflow(speciesData.SpeciesData): org_id = str(org_output["organism_id"]) # id needs to be a str to be recognized by chado tools # Synchronize newly added organism in Tripal - org_sync = self.instance.tools.run_tool(tool_id="toolshed.g2.bx.psu.edu/repos/gga/tripal_organism_sync/organism_sync/3.2.1", + logging.info("Synchronizing organism %s in Tripal" % self.full_name) + time.sleep(60) + org_sync = self.instance.tools.run_tool(tool_id="toolshed.g2.bx.psu.edu/repos/gga/tripal_organism_sync/organism_sync/3.2.1.0", history_id=self.history_id, tool_inputs={"organism_id": org_id}) @@ -377,12 +379,14 @@ class RunWorkflow(speciesData.SpeciesData): analysis_job_out_id = analysis_outputs[0]["id"] analysis_json_output = self.instance.datasets.download_dataset(dataset_id=analysis_job_out_id) analysis_output = json.loads(analysis_json_output) - ogs_analysis_id = str(analysis_output["analysis_id"]) - - # Synchronize OGS analysis in Tripal - ogs_analysis_sync = self.instance.tools.run_tool(tool_id="toolshed.g2.bx.psu.edu/repos/gga/tripal_analysis_sync/analysis_sync/3.2.1", - history_id=self.history_id, - tool_inputs={"analysis_id": ogs_analysis_id}) + ogs_analysis_id = str(analysis_output["analysis_id"]) + + # Synchronize OGS analysis in Tripal + logging.info("Synchronizing OGS%s analysis in Tripal" % self.ogs_version) + time.sleep(60) + ogs_analysis_sync = self.instance.tools.run_tool(tool_id="toolshed.g2.bx.psu.edu/repos/gga/tripal_analysis_sync/analysis_sync/3.2.1.0", + history_id=self.history_id, + tool_inputs={"analysis_id": ogs_analysis_id}) if genome_analysis_id is None: add_genome_analysis_job = self.instance.tools.run_tool( @@ -398,11 +402,13 @@ class RunWorkflow(speciesData.SpeciesData): analysis_json_output = self.instance.datasets.download_dataset(dataset_id=analysis_job_out_id) analysis_output = json.loads(analysis_json_output) genome_analysis_id = str(analysis_output["analysis_id"]) - - # Synchronize genome analysis in Tripal - genome_analysis_sync = self.instance.tools.run_tool(tool_id="toolshed.g2.bx.psu.edu/repos/gga/tripal_analysis_sync/analysis_sync/3.2.1", - history_id=self.history_id, - tool_inputs={"analysis_id": genome_analysis_id}) + + # Synchronize genome analysis in Tripal + logging.info("Synchronizing genome v%s analysis in Tripal" % self.genome_version) + time.sleep(60) + genome_analysis_sync = self.instance.tools.run_tool(tool_id="toolshed.g2.bx.psu.edu/repos/gga/tripal_analysis_sync/analysis_sync/3.2.1.0", + history_id=self.history_id, + tool_inputs={"analysis_id": genome_analysis_id}) # print({"org_id": org_id, "genome_analysis_id": genome_analysis_id, "ogs_analysis_id": ogs_analysis_id}) return({"org_id": org_id, "genome_analysis_id": genome_analysis_id, "ogs_analysis_id": ogs_analysis_id}) @@ -471,6 +477,13 @@ class RunWorkflow(speciesData.SpeciesData): org_output = json.loads(org_json_output) org_id = str(org_output["organism_id"]) # id needs to be a str to be recognized by chado tools + # Synchronize newly added organism in Tripal + logging.info("Synchronizing organism %s in Tripal" % self.full_name) + time.sleep(60) + org_sync = self.instance.tools.run_tool(tool_id="toolshed.g2.bx.psu.edu/repos/gga/tripal_organism_sync/organism_sync/3.2.1.0", + history_id=self.history_id, + tool_inputs={"organism_id": org_id}) + get_analyses = self.instance.tools.run_tool( tool_id="toolshed.g2.bx.psu.edu/repos/gga/chado_analysis_get_analyses/analysis_get_analyses/%s" % tool_version, @@ -506,9 +519,12 @@ class RunWorkflow(speciesData.SpeciesData): analysis_output = json.loads(analysis_json_output) blastp_analysis_id = str(analysis_output["analysis_id"]) - blastp_analysis_sync = self.instance.tools.run_tool(tool_id="toolshed.g2.bx.psu.edu/repos/gga/tripal_analysis_sync/analysis_sync/3.2.1", - history_id=self.history_id, - tool_inputs={"analysis_id": blastp_analysis_id}) + # Synchronize blastp analysis + logging.info("Synchronizing Diamong blastp OGS%s analysis in Tripal" % self.ogs_version) + time.sleep(60) + blastp_analysis_sync = self.instance.tools.run_tool(tool_id="toolshed.g2.bx.psu.edu/repos/gga/tripal_analysis_sync/analysis_sync/3.2.1.0", + history_id=self.history_id, + tool_inputs={"analysis_id": blastp_analysis_id}) # print({"org_id": org_id, "genome_analysis_id": genome_analysis_id, "ogs_analysis_id": ogs_analysis_id}) return({"org_id": org_id, "blastp_analysis_id": blastp_analysis_id}) @@ -824,9 +840,6 @@ def create_sp_workflow_dict(sp_dict, main_dir, config, workflow_type): species=run_workflow_for_current_organism.species, script_dir=run_workflow_for_current_organism.script_dir): - # Starting - logging.info("run_workflow.py called for %s" % run_workflow_for_current_organism.full_name) - # Setting some of the instance attributes run_workflow_for_current_organism.main_dir = main_dir run_workflow_for_current_organism.species_dir = os.path.join(run_workflow_for_current_organism.main_dir, @@ -1065,7 +1078,6 @@ if __name__ == "__main__": else: all_sp_workflow_dict[current_sp_key][current_sp_strain_sex_key] = current_sp_strain_sex_value - for k, v in all_sp_workflow_dict.items(): if len(list(v.keys())) == 1: logging.info("Input organism %s: 1 species detected in input dictionary" % k) @@ -1088,7 +1100,10 @@ if __name__ == "__main__": install_changesets_revisions_from_workflow(workflow_path=workflow_path, instance=instance) organism_key_name = list(v.keys()) - org_dict = v[organisms_key_names[0]] + org_dict = v[organisms_key_name[0]] + + # print("\n") + # print(org_dict) history_id = org_dict["history_id"] @@ -1209,7 +1224,7 @@ if __name__ == "__main__": logging.warning("Error finding workflow %s" % workflow_name) # Finally, invoke the workflow alogn with its datamap, parameters and the history in which to invoke it - instance.workflows.invoke_workflow(workflow_id=workflow_id, history_id=history_id, params=workflow_parameters, inputs=datamap, allow_tool_state_corrections=True) + # instance.workflows.invoke_workflow(workflow_id=workflow_id, history_id=history_id, params=workflow_parameters, inputs=datamap, allow_tool_state_corrections=True) logging.info("Successfully imported and invoked workflow {0}, check the galaxy instance ({1}) for the jobs state".format(workflow_name, instance_url)) @@ -1445,7 +1460,7 @@ if __name__ == "__main__": logging.warning("Error finding workflow %s" % workflow_name) # Finally, invoke the workflow alogn with its datamap, parameters and the history in which to invoke it - instance.workflows.invoke_workflow(workflow_id=workflow_id, history_id=history_id, params=workflow_parameters, inputs=datamap, allow_tool_state_corrections=True) + # instance.workflows.invoke_workflow(workflow_id=workflow_id, history_id=history_id, params=workflow_parameters, inputs=datamap, allow_tool_state_corrections=True) logging.info("Successfully imported and invoked workflow {0}, check the galaxy instance ({1}) for the jobs state".format(workflow_name, instance_url)) @@ -1490,7 +1505,7 @@ if __name__ == "__main__": install_changesets_revisions_from_workflow(workflow_path=workflow_path, instance=instance) organism_key_name = list(v.keys()) - org_dict = v[organisms_key_names[0]] + org_dict = v[organisms_key_name[0]] history_id = org_dict["history_id"] @@ -1560,7 +1575,7 @@ if __name__ == "__main__": logging.warning("Error finding workflow %s" % workflow_name) # Finally, invoke the workflow alogn with its datamap, parameters and the history in which to invoke it - instance.workflows.invoke_workflow(workflow_id=workflow_id, history_id=history_id, params=workflow_parameters, inputs=datamap, allow_tool_state_corrections=True) + # instance.workflows.invoke_workflow(workflow_id=workflow_id, history_id=history_id, params=workflow_parameters, inputs=datamap, allow_tool_state_corrections=True) logging.info("Successfully imported and invoked workflow {0}, check the galaxy instance ({1}) for the jobs state".format(workflow_name, instance_url)) @@ -1714,7 +1729,7 @@ if __name__ == "__main__": logging.warning("Error finding workflow %s" % workflow_name) # Finally, invoke the workflow alogn with its datamap, parameters and the history in which to invoke it - instance.workflows.invoke_workflow(workflow_id=workflow_id, history_id=history_id, params=workflow_parameters, inputs=datamap, allow_tool_state_corrections=True) + # instance.workflows.invoke_workflow(workflow_id=workflow_id, history_id=history_id, params=workflow_parameters, inputs=datamap, allow_tool_state_corrections=True) logging.info("Successfully imported and invoked workflow {0}, check the galaxy instance ({1}) for the jobs state".format(workflow_name, instance_url)) diff --git a/speciesData.py b/speciesData.py index dcc26a0..49ffed5 100755 --- a/speciesData.py +++ b/speciesData.py @@ -89,9 +89,9 @@ class SpeciesData: self.blast = "0" if constants.ORG_PARAM_DESC_PICTURE_PATH in parameters_dictionary_description.keys(): - self.tripal_species_picture = parameters_dictionary_description[constants.ORG_PARAM_DESC_PICTURE_PATH] + self.picture_path = parameters_dictionary_description[constants.ORG_PARAM_DESC_PICTURE_PATH] else: - self.tripal_species_picture = None + self.picture_path = None self.genus_lowercase = self.genus[0].lower() + self.genus[1:] self.genus_uppercase = self.genus[0].upper() + self.genus[1:] diff --git a/templates/gspecies_compose.yml.j2 b/templates/gspecies_compose.yml.j2 index 9b727ad..9b8c3f6 100644 --- a/templates/gspecies_compose.yml.j2 +++ b/templates/gspecies_compose.yml.j2 @@ -42,11 +42,11 @@ services: - ./banner.png:/var/www/html/banner.png:ro {% endif %} #- /groups/XXX/:/groups/XXX/:ro # We do this when we have symlinks in src_data pointing to /groups/XXX/... - {% if 'tripal_species_picture' is defined %} - {% if 'png' in tripal_species_picture %} + {% if 'picture_path' is defined %} + {% if 'png' in picture_path %} - ./species.png:/var/www/html/species.png:ro {% endif %} - {% if 'jpg' in tripal_species_picture %} + {% if 'jpg' in picture_path %} - ./species.jpg:/var/www/html/species.jpg:ro {% endif %} {% endif %} -- GitLab