From 49f5add2f9fb393db5d7436230cc0cf2536f0b57 Mon Sep 17 00:00:00 2001
From: Loraine Gueguen <loraine.gueguen@sb-roscoff.fr>
Date: Fri, 28 May 2021 23:19:47 +0200
Subject: [PATCH] Refactor run_wf (WIP)

---
 ...constants.py => phaeoexplorer_constants.py |  18 +-
 run_workflow_phaeoexplorer.py                 | 242 +++++++-----------
 2 files changed, 112 insertions(+), 148 deletions(-)
 rename phaoexplorer_constants.py => phaeoexplorer_constants.py (75%)

diff --git a/phaoexplorer_constants.py b/phaeoexplorer_constants.py
similarity index 75%
rename from phaoexplorer_constants.py
rename to phaeoexplorer_constants.py
index 1cecdb1..2773e2b 100644
--- a/phaoexplorer_constants.py
+++ b/phaeoexplorer_constants.py
@@ -6,11 +6,23 @@ import constants
 ### Workflows
 
 WORKFLOWS_PATH = "workflows_phaeoexplorer/"
-WORKFLOW_LOAD_FASTA_GFF_JBROWSE = "load_fasta_gff_jbrowse"
-WORKFLOW_LOAD_FASTA_GFF_JBROWSE_FILE_1_ORG = "Galaxy-Workflow-chado_load_tripal_synchronize_jbrowse_1org_v4.ga"
+WF_LOAD_GFF_JB = "load_fasta_gff_jbrowse"
+
+WF_LOAD_GFF_JB_1_ORG_FILE = "Galaxy-Workflow-chado_load_tripal_synchronize_jbrowse_1org_v4.ga"
+WF_LOAD_GFF_JB_1_ORG_INPUT_GENOME = "0"
+WF_LOAD_GFF_JB_1_ORG_INPUT_GFF = "1"
+WF_LOAD_GFF_JB_1_ORG_INPUT_PROTEINS = "2"
+WF_LOAD_GFF_JB_1_ORG_STEP_LOAD_FASTA = "3"
+WF_LOAD_GFF_JB_1_ORG_STEP_JBROWSE = "4"
+WF_LOAD_GFF_JB_1_ORG_STEP_LOAD_GFF = "5"
+WF_LOAD_GFF_JB_1_ORG_STEP_JB_TO_CONTAINER = "6"
+WF_LOAD_GFF_JB_1_ORG_STEP_FEATURE_SYNC = "7"
+WF_LOAD_GFF_JB_1_ORG_STEP_POPULATE_VIEWS = "8"
+WF_LOAD_GFF_JB_1_ORG_STEP_INDEX = "9"
+
 WORKFLOW_BLAST = "blast"
 WORKFLOW_INTERPRO = "interpro"
-WORKFLOW_VALID_TYPES = [WORKFLOW_LOAD_FASTA_GFF_JBROWSE, WORKFLOW_BLAST, WORKFLOW_INTERPRO]
+WORKFLOW_VALID_TYPES = [WF_LOAD_GFF_JB, WORKFLOW_BLAST, WORKFLOW_INTERPRO]
 
 ### Galaxy tools
 
diff --git a/run_workflow_phaeoexplorer.py b/run_workflow_phaeoexplorer.py
index c03b917..60c0ed3 100755
--- a/run_workflow_phaeoexplorer.py
+++ b/run_workflow_phaeoexplorer.py
@@ -16,7 +16,7 @@ from bioblend import galaxy
 import utilities
 import speciesData
 import constants
-import phaoexplorer_constants
+import phaeoexplorer_constants
 
 """ 
 gga_init.py
@@ -27,7 +27,7 @@ Usage: $ python3 gga_init.py -i input_example.yml --config [config file] [OPTION
 class StrainWorkflowParam:
 
     def __init__(self, genus_species, strain_sex, genus, species, sex, strain, full_name, species_folder_name, org_id,
-                 genome_analysis_id, ogs_analysis_id, blastp_analysis_id, interpro_analysis_id, hda_ids, history_id,
+                 genome_analysis_id, ogs_analysis_id, blastp_analysis_id, interpro_analysis_id, history_id,
                  instance, instance_url, email, password):
         self.genus_species = genus_species
         self.strain_sex = strain_sex
@@ -42,7 +42,6 @@ class StrainWorkflowParam:
         self.ogs_analysis_id = ogs_analysis_id
         self.blastp_analysis_id = blastp_analysis_id
         self.interpro_analysis_id = interpro_analysis_id
-        self.hda_ids = hda_ids
         self.history_id = history_id
         self.instance = instance
         self.instance_url = instance_url
@@ -58,21 +57,21 @@ class StrainWorkflowParam:
     def check_param_for_workflow_load_fasta_gff_jbrowse(self, params):
         params = [self.genus_species, self.strain_sex, self.genus, self.species, self.sex, self.strain, self.full_name,
                   self.species_folder_name, self.org_id,
-                  self.genome_analysis_id, self.ogs_analysis_id, self.hda_ids, self.history_id,
+                  self.genome_analysis_id, self.ogs_analysis_id, self.history_id,
                   self.instance, self.instance_url, self.email, self.password]
         self.check_param(params)
 
     def check_param_for_workflow_blast(self, params):
         params = [self.genus_species, self.strain_sex, self.genus, self.species, self.sex, self.strain, self.full_name,
                   self.species_folder_name, self.org_id,
-                  self.blastp_analysis_id, self.hda_ids, self.history_id,
+                  self.blastp_analysis_id, self.history_id,
                   self.instance, self.instance_url, self.email, self.password]
         self.check_param(params)
 
     def check_param_for_workflow_interpro(self, params):
         params = [self.genus_species, self.strain_sex, self.genus, self.species, self.sex, self.strain, self.full_name,
                   self.species_folder_name, self.org_id,
-                  self.interpro_analysis_id, self.hda_ids, self.history_id,
+                  self.interpro_analysis_id, self.history_id,
                   self.instance, self.instance_url, self.email, self.password]
         self.check_param(params)
 
@@ -111,6 +110,14 @@ class RunWorkflow(speciesData.SpeciesData):
         self.ogs_analysis_programversion = "OGS{0}".format(self.ogs_version)
         self.ogs_analysis_sourcename = self.full_name
 
+        self.genome_hda_id = None
+        self.gff_hda_id = None
+        self.transcripts_hda_id = None
+        self.proteins_hda_id = None
+        self.blastp_hda_id = None
+        self.blastx_hda_id = None
+        self.interproscan_hda_id = None
+
     def set_history(self):
         """
         Create or set the working history to the current species one
@@ -165,50 +172,50 @@ class RunWorkflow(speciesData.SpeciesData):
 
         # Verify that the add_organism and add_analysis versions are correct in the instance
 
-        add_organism_tool = self.instance.tools.show_tool(phaoexplorer_constants.ADD_ORGANISM_TOOL_ID)
-        add_analysis_tool = self.instance.tools.show_tool(phaoexplorer_constants.ADD_ANALYSIS_TOOL_ID)
-        get_organisms_tool = self.instance.tools.show_tool(phaoexplorer_constants.GET_ORGANISMS_TOOL_ID)
-        get_analyses_tool = self.instance.tools.show_tool(phaoexplorer_constants.GET_ANALYSES_TOOL_ID)
-        analysis_sync_tool = self.instance.tools.show_tool(phaoexplorer_constants.ANALYSIS_SYNC_TOOL_ID)
-        organism_sync_tool = self.instance.tools.show_tool(phaoexplorer_constants.ORGANISM_SYNC_TOOL_ID)
+        add_organism_tool = self.instance.tools.show_tool(phaeoexplorer_constants.ADD_ORGANISM_TOOL_ID)
+        add_analysis_tool = self.instance.tools.show_tool(phaeoexplorer_constants.ADD_ANALYSIS_TOOL_ID)
+        get_organisms_tool = self.instance.tools.show_tool(phaeoexplorer_constants.GET_ORGANISMS_TOOL_ID)
+        get_analyses_tool = self.instance.tools.show_tool(phaeoexplorer_constants.GET_ANALYSES_TOOL_ID)
+        analysis_sync_tool = self.instance.tools.show_tool(phaeoexplorer_constants.ANALYSIS_SYNC_TOOL_ID)
+        organism_sync_tool = self.instance.tools.show_tool(phaeoexplorer_constants.ORGANISM_SYNC_TOOL_ID)
 
         # changeset for 2.3.4+galaxy0 has to be manually found because there is no way to get the wanted changeset of a non installed tool via bioblend
         # except for workflows (.ga) that already contain the changeset revisions inside the steps ids
 
         utilities.install_repository_revision(current_version=get_organisms_tool["version"],
                                               toolshed_dict=get_organisms_tool["tool_shed_repository"],
-                                              version_to_install=phaoexplorer_constants.GET_ORGANISMS_TOOL_VERSION,
-                                              changeset_revision=phaoexplorer_constants.GET_ORGANISMS_TOOL_CHANGESET_REVISION,
+                                              version_to_install=phaeoexplorer_constants.GET_ORGANISMS_TOOL_VERSION,
+                                              changeset_revision=phaeoexplorer_constants.GET_ORGANISMS_TOOL_CHANGESET_REVISION,
                                               instance=self.instance)
 
         utilities.install_repository_revision(current_version=get_analyses_tool["version"],
                                               toolshed_dict=get_analyses_tool["tool_shed_repository"],
-                                              version_to_install=phaoexplorer_constants.GET_ANALYSES_TOOL_VERSION,
-                                              changeset_revision=phaoexplorer_constants.GET_ANALYSES_TOOL_CHANGESET_REVISION,
+                                              version_to_install=phaeoexplorer_constants.GET_ANALYSES_TOOL_VERSION,
+                                              changeset_revision=phaeoexplorer_constants.GET_ANALYSES_TOOL_CHANGESET_REVISION,
                                               instance=self.instance)
 
         utilities.install_repository_revision(current_version=add_organism_tool["version"],
                                               toolshed_dict=add_organism_tool["tool_shed_repository"],
-                                              version_to_install=phaoexplorer_constants.ADD_ORGANISM_TOOL_VERSION,
-                                              changeset_revision=phaoexplorer_constants.ADD_ORGANISM_TOOL_CHANGESET_REVISION,
+                                              version_to_install=phaeoexplorer_constants.ADD_ORGANISM_TOOL_VERSION,
+                                              changeset_revision=phaeoexplorer_constants.ADD_ORGANISM_TOOL_CHANGESET_REVISION,
                                               instance=self.instance)
 
         utilities.install_repository_revision(current_version=add_analysis_tool["version"],
                                               toolshed_dict=add_analysis_tool["tool_shed_repository"],
-                                              version_to_install=phaoexplorer_constants.ADD_ANALYSIS_TOOL_VERSION,
-                                              changeset_revision=phaoexplorer_constants.ADD_ANALYSIS_TOOL_CHANGESET_REVISION,
+                                              version_to_install=phaeoexplorer_constants.ADD_ANALYSIS_TOOL_VERSION,
+                                              changeset_revision=phaeoexplorer_constants.ADD_ANALYSIS_TOOL_CHANGESET_REVISION,
                                               instance=self.instance)
 
         utilities.install_repository_revision(current_version=analysis_sync_tool["version"],
                                               toolshed_dict=analysis_sync_tool["tool_shed_repository"],
-                                              version_to_install=phaoexplorer_constants.ANALYSIS_SYNC_TOOL_VERSION,
-                                              changeset_revision=phaoexplorer_constants.ANALYSIS_SYNC_TOOL_CHANGESET_REVISION,
+                                              version_to_install=phaeoexplorer_constants.ANALYSIS_SYNC_TOOL_VERSION,
+                                              changeset_revision=phaeoexplorer_constants.ANALYSIS_SYNC_TOOL_CHANGESET_REVISION,
                                               instance=self.instance)
 
         utilities.install_repository_revision(current_version=organism_sync_tool["version"],
                                               toolshed_dict=organism_sync_tool["tool_shed_repository"],
-                                              version_to_install=phaoexplorer_constants.ORGANISM_SYNC_TOOL_VERSION,
-                                              changeset_revision=phaoexplorer_constants.ORGANISM_SYNC_TOOL_CHANGESET_REVISION,
+                                              version_to_install=phaeoexplorer_constants.ORGANISM_SYNC_TOOL_VERSION,
+                                              changeset_revision=phaeoexplorer_constants.ORGANISM_SYNC_TOOL_CHANGESET_REVISION,
                                               instance=self.instance)
 
         logging.info("Success: individual tools versions and changesets validated")
@@ -217,13 +224,13 @@ class RunWorkflow(speciesData.SpeciesData):
 
         add_analysis_tool_dataset = utilities.run_tool_and_download_single_output_dataset(
             instance=self.instance,
-            tool_id=phaoexplorer_constants.ADD_ANALYSIS_TOOL_ID,
+            tool_id=phaeoexplorer_constants.ADD_ANALYSIS_TOOL_ID,
             history_id=self.history_id,
             tool_inputs={"name": name,
-                         "program": phaoexplorer_constants.ADD_ANALYSIS_TOOL_PARAM_PROGRAM,
+                         "program": phaeoexplorer_constants.ADD_ANALYSIS_TOOL_PARAM_PROGRAM,
                          "programversion": programversion,
                          "sourcename": sourcename,
-                         "date_executed": phaoexplorer_constants.ADD_ANALYSIS_TOOL_PARAM_DATE})
+                         "date_executed": phaeoexplorer_constants.ADD_ANALYSIS_TOOL_PARAM_DATE})
         analysis_dict = json.loads(add_analysis_tool_dataset)
         analysis_id = str(analysis_dict["analysis_id"])
 
@@ -234,7 +241,7 @@ class RunWorkflow(speciesData.SpeciesData):
         time.sleep(60)
         utilities.run_tool(
             instance=self.instance,
-            tool_id=phaoexplorer_constants.ANALYSIS_SYNC_TOOL_ID,
+            tool_id=phaeoexplorer_constants.ANALYSIS_SYNC_TOOL_ID,
             history_id=self.history_id,
             tool_inputs={"analysis_id": analysis_id})
 
@@ -242,7 +249,7 @@ class RunWorkflow(speciesData.SpeciesData):
 
         get_organisms_tool_dataset = utilities.run_tool_and_download_single_output_dataset(
             instance=self.instance,
-            tool_id=phaoexplorer_constants.GET_ORGANISMS_TOOL_ID,
+            tool_id=phaeoexplorer_constants.GET_ORGANISMS_TOOL_ID,
             history_id=self.history_id,
             tool_inputs={},
             time_sleep=10
@@ -259,7 +266,7 @@ class RunWorkflow(speciesData.SpeciesData):
         if org_id is None:
             add_organism_tool_dataset = utilities.run_tool_and_download_single_output_dataset(
                 instance=self.instance,
-                tool_id=phaoexplorer_constants.ADD_ORGANISM_TOOL_ID,
+                tool_id=phaeoexplorer_constants.ADD_ORGANISM_TOOL_ID,
                 history_id=self.history_id,
                 tool_inputs={"abbr": self.abbreviation,
                              "genus": self.genus_uppercase,
@@ -273,7 +280,7 @@ class RunWorkflow(speciesData.SpeciesData):
         time.sleep(60)
         utilities.run_tool(
             instance=self.instance,
-            tool_id=phaoexplorer_constants.ORGANISM_SYNC_TOOL_ID,
+            tool_id=phaeoexplorer_constants.ORGANISM_SYNC_TOOL_ID,
             history_id=self.history_id,
             tool_inputs={"organism_id": org_id})
 
@@ -283,7 +290,7 @@ class RunWorkflow(speciesData.SpeciesData):
 
         get_analyses_tool_dataset = utilities.run_tool_and_download_single_output_dataset(
             instance=self.instance,
-            tool_id=phaoexplorer_constants.GET_ANALYSES_TOOL_ID,
+            tool_id=phaeoexplorer_constants.GET_ANALYSES_TOOL_ID,
             history_id=self.history_id,
             tool_inputs={},
             time_sleep=10
@@ -672,77 +679,13 @@ class RunWorkflow(speciesData.SpeciesData):
             except Exception as exc:
                 logging.debug("blastp file not found in library (history: {0})".format(self.history_id))
 
-        hda_ids = {"genome_hda_id": genome_hda_id,
-                   "gff_hda_id": gff_hda_id,
-                   "transcripts_hda_id": transcripts_hda_id,
-                   "proteins_hda_id": proteins_hda_id,
-                   "blastp_hda_id": blastp_hda_id,
-                   "blastx_hda_id": blastx_hda_id,
-                   "interproscan_hda_id": interproscan_hda_id}
-
-        # logging.debug("History dataset IDs (hda_id) for %s:" % self.full_name)
-        # logging.debug(hda_ids)
-
-        # Return a dict made of the hda ids
-        return hda_ids
-
-def run_workflow(workflow_path, workflow_parameters, datamap, config, input_species_number):
-    """
-    Run a workflow in galaxy
-    Requires the .ga file to be loaded as a dictionary (optionally could be uploaded as a raw file)
-
-    :param workflow_name:
-    :param workflow_parameters:
-    :param datamap:
-    :return:
-    """
-
-    logging.info("Importing workflow %s" % str(workflow_path))
-
-    # Load the workflow file (.ga) in a buffer
-    with open(workflow_path, 'r') as ga_in_file:
-
-        # Then store the decoded json dictionary
-        workflow_dict = json.load(ga_in_file)
-
-        # In case of the Jbrowse workflow, we unfortunately have to manually edit the parameters instead of setting them
-        # as runtime values, using runtime parameters makes the tool throw an internal critical error ("replace not found" error)
-        # Scratchgmod test: need "http" (or "https"), the hostname (+ port)
-        if "jbrowse_menu_url" not in config.keys():
-            jbrowse_menu_url = "https://{hostname}/sp/{genus_sp}/feature/{Genus}/{species}/mRNA/{id}".format(hostname=self.config["hostname"], genus_sp=self.genus_species, Genus=self.genus_uppercase, species=self.species, id="{id}")
-        else:
-            jbrowse_menu_url = config["jbrowse_menu_url"]
-        if workflow_name == "Jbrowse":
-            workflow_dict["steps"]["2"]["tool_state"] = workflow_dict["steps"]["2"]["tool_state"].replace("__MENU_URL__", jbrowse_menu_url)
-            # The UNIQUE_ID is specific to a combination genus_species_strain_sex so every combination should have its unique workflow
-            # in galaxy --> define a naming method for these workflows
-            workflow_dict["steps"]["3"]["tool_state"] = workflow_dict["steps"]["3"]["tool_state"].replace("__FULL_NAME__", self.full_name).replace("__UNIQUE_ID__", self.species_folder_name)
-
-        # Import the workflow in galaxy as a dict
-        self.instance.workflows.import_workflow_dict(workflow_dict=workflow_dict)
-
-        # Get its attributes
-        workflow_attributes = self.instance.workflows.get_workflows(name=workflow_name)
-        # Then get its ID (required to invoke the workflow)
-        workflow_id = workflow_attributes[0]["id"]  # Index 0 is the most recently imported workflow (the one we want)
-        show_workflow = self.instance.workflows.show_workflow(workflow_id=workflow_id)
-        # Check if the workflow is found
-        try:
-            logging.debug("Workflow ID: %s" % workflow_id)
-        except bioblend.ConnectionError:
-            logging.warning("Error retrieving workflow attributes for workflow %s" % workflow_name)
-
-        # Finally, invoke the workflow alogn with its datamap, parameters and the history in which to invoke it
-        self.instance.workflows.invoke_workflow(workflow_id=workflow_id,
-                                                history_id=self.history_id,
-                                                params=workflow_parameters,
-                                                inputs=datamap,
-                                                allow_tool_state_corrections=True)
-
-        logging.info("Successfully imported and invoked workflow {0}, check the galaxy instance ({1}) for the jobs state".format(workflow_name, self.instance_url))
-
-
-
+        self.genome_hda_id = genome_hda_id
+        self.gff_hda_id = gff_hda_id
+        self.transcripts_hda_id = transcripts_hda_id
+        self.proteins_hda_id = proteins_hda_id
+        self.blastp_hda_id = blastp_hda_id
+        self.blastx_hda_id = blastx_hda_id
+        self.interproscan_hda_id = interproscan_hda_id
 
 def get_sp_workflow_param(sp_dict, main_dir, config, workflow_type):
     """
@@ -775,7 +718,7 @@ def get_sp_workflow_param(sp_dict, main_dir, config, workflow_type):
         history_id = run_workflow_for_current_organism.set_history()
         run_workflow_for_current_organism.install_changesets_revisions_for_individual_tools()
 
-        if workflow_type == phaoexplorer_constants.WORKFLOW_LOAD_FASTA_GFF_JBROWSE:
+        if workflow_type == phaeoexplorer_constants.WF_LOAD_GFF_JB:
 
             analyses_dict_list = run_workflow_for_current_organism.get_analyses()
 
@@ -793,7 +736,7 @@ def get_sp_workflow_param(sp_dict, main_dir, config, workflow_type):
                 analysis_sourcename=run_workflow_for_current_organism.ogs_analysis_sourcename
             )
 
-            hda_ids = run_workflow_for_current_organism.import_datasets_into_history()
+            run_workflow_for_current_organism.import_datasets_into_history()
 
             # Create the StrainWorkflowParam object holding all attributes needed for the workflow
             sp_wf_param = StrainWorkflowParam(
@@ -808,7 +751,13 @@ def get_sp_workflow_param(sp_dict, main_dir, config, workflow_type):
                 org_id=org_id,
                 genome_analysis_id=genome_analysis_id,
                 ogs_analysis_id=ogs_analysis_id,
-                hda_ids=hda_ids,
+                genome_hda_id=run_workflow_for_current_organism.genome_hda_id,
+                gff_hda_id=run_workflow_for_current_organism.gff_hda_id,
+                transcripts_hda_id=run_workflow_for_current_organism.transcripts_hda_id,
+                proteins_hda_id=run_workflow_for_current_organism.proteins_hda_id,
+                blastp_hda_id=run_workflow_for_current_organism.blastp_hda_id,
+                blastx_hda_id=run_workflow_for_current_organism.blastx_hda_id,
+                interproscan_hda_id=run_workflow_for_current_organism.interproscan_hda_id,
                 history_id=history_id,
                 instance=run_workflow_for_current_organism.instance,
                 instance_url=run_workflow_for_current_organism.instance_url,
@@ -823,7 +772,7 @@ def get_sp_workflow_param(sp_dict, main_dir, config, workflow_type):
 
             org_id = ids["org_id"]
             blastp_analysis_id = ids["blastp_analysis_id"]
-            hda_ids = run_workflow_for_current_organism.import_datasets_into_history()
+            run_workflow_for_current_organism.import_datasets_into_history()
 
             # Create the StrainWorkflowParam object holding all attributes needed for the workflow
             sp_wf_param = StrainWorkflowParam(
@@ -837,7 +786,13 @@ def get_sp_workflow_param(sp_dict, main_dir, config, workflow_type):
                 strain=run_workflow_for_current_organism.strain,
                 org_id=org_id,
                 blastp_analysis_id=genome_analysis_id,
-                hda_ids=hda_ids,
+                genome_hda_id=run_workflow_for_current_organism.genome_hda_id,
+                gff_hda_id=run_workflow_for_current_organism.gff_hda_id,
+                transcripts_hda_id=run_workflow_for_current_organism.transcripts_hda_id,
+                proteins_hda_id=run_workflow_for_current_organism.proteins_hda_id,
+                blastp_hda_id=run_workflow_for_current_organism.blastp_hda_id,
+                blastx_hda_id=run_workflow_for_current_organism.blastx_hda_id,
+                interproscan_hda_id=run_workflow_for_current_organism.interproscan_hda_id,
                 history_id=history_id,
                 instance=run_workflow_for_current_organism.instance,
                 instance_url=run_workflow_for_current_organism.instance_url,
@@ -852,7 +807,7 @@ def get_sp_workflow_param(sp_dict, main_dir, config, workflow_type):
 
             org_id = ids["org_id"]
             interpro_analysis_id = ids["interpro_analysis_id"]
-            hda_ids = run_workflow_for_current_organism.import_datasets_into_history()
+            run_workflow_for_current_organism.import_datasets_into_history()
 
             # Create the StrainWorkflowParam object holding all attributes needed for the workflow
             sp_wf_param = StrainWorkflowParam(
@@ -866,7 +821,13 @@ def get_sp_workflow_param(sp_dict, main_dir, config, workflow_type):
                 strain=run_workflow_for_current_organism.strain,
                 org_id=org_id,
                 interpro_analysis_id=interpro_analysis_id,
-                hda_ids=hda_ids,
+                genome_hda_id=run_workflow_for_current_organism.genome_hda_id,
+                gff_hda_id=run_workflow_for_current_organism.gff_hda_id,
+                transcripts_hda_id=run_workflow_for_current_organism.transcripts_hda_id,
+                proteins_hda_id=run_workflow_for_current_organism.proteins_hda_id,
+                blastp_hda_id=run_workflow_for_current_organism.blastp_hda_id,
+                blastx_hda_id=run_workflow_for_current_organism.blastx_hda_id,
+                interproscan_hda_id=run_workflow_for_current_organism.interproscan_hda_id,
                 history_id=history_id,
                 instance=run_workflow_for_current_organism.instance,
                 instance_url=run_workflow_for_current_organism.instance_url,
@@ -981,14 +942,14 @@ if __name__ == "__main__":
     if not args.workflow:
         logging.critical("No workflow type specified, exiting")
         sys.exit()
-    elif args.workflow in phaoexplorer_constants.WORKFLOW_VALID_TYPES:
+    elif args.workflow in phaeoexplorer_constants.WORKFLOW_VALID_TYPES:
         workflow_type = args.workflow
     logging.info("Workflow type set to '%s'" % workflow_type)
 
     script_dir = os.path.dirname(os.path.realpath(sys.argv[0]))
     all_sp_workflow_dict = {}
 
-    if workflow_type == phaoexplorer_constants.WORKFLOW_LOAD_FASTA_GFF_JBROWSE:
+    if workflow_type == phaeoexplorer_constants.WF_LOAD_GFF_JB:
         for sp_dict in sp_dict_list:
 
             # Add and retrieve all analyses/organisms for the current input species and add their IDs to the input dictionary
@@ -996,7 +957,7 @@ if __name__ == "__main__":
                 sp_dict,
                 main_dir=main_dir,
                 config=config,
-                workflow_type=phaoexplorer_constants.WORKFLOW_LOAD_FASTA_GFF_JBROWSE)
+                workflow_type=phaeoexplorer_constants.WF_LOAD_GFF_JB)
 
             current_sp_genus_species = sp_workflow_param.genus_species
             current_sp_strain_sex = sp_workflow_param.strain_sex
@@ -1021,7 +982,7 @@ if __name__ == "__main__":
                 sp_workflow_param = strains[strain_sex]
 
                 # Set workflow path (1 organism)
-                workflow_path = os.path.join(os.path.abspath(script_dir), phaoexplorer_constants.WORKFLOWS_PATH, phaoexplorer_constants.WORKFLOW_LOAD_FASTA_GFF_JBROWSE_FILE_1_ORG)
+                workflow_path = os.path.join(os.path.abspath(script_dir), phaeoexplorer_constants.WORKFLOWS_PATH, phaeoexplorer_constants.WF_LOAD_GFF_JB_1_ORG_FILE)
 
                 # Check if the versions of tools specified in the workflow are installed in galaxy
                 install_changesets_revisions_from_workflow(workflow_path=workflow_path, instance=sp_workflow_param.instance)
@@ -1039,46 +1000,37 @@ if __name__ == "__main__":
                 org_org_id = sp_workflow_param.org_id
                 org_genome_analysis_id = sp_workflow_param.genome_analysis_id
                 org_ogs_analysis_id = sp_workflow_param.ogs_analysis_id
-                org_genome_hda_id = sp_workflow_param.hda_ids["genome_hda_id"]
-                org_transcripts_hda_id = sp_workflow_param.hda_ids["transcripts_hda_id"]
-                org_proteins_hda_id = sp_workflow_param.hda_ids["proteins_hda_id"]
-                org_gff_hda_id = sp_workflow_param.hda_ids["gff_hda_id"]
+                org_genome_hda_id = sp_workflow_param.genome_hda_id
+                org_transcripts_hda_id = sp_workflow_param.transcripts_hda_id
+                org_proteins_hda_id = sp_workflow_param.proteins_hda_id
+                org_gff_hda_id = sp_workflow_param.gff_hda_id
 
                 # Set the workflow parameters (individual tools runtime parameters in the workflow)
                 workflow_parameters = {}
 
-                GENOME_FASTA_FILE_ORG = "0"
-                GFF_FILE_ORG = "1"
-                PROTEINS_FASTA_FILE_ORG = "2"
-                LOAD_FASTA_ORG = "3"
-                JBROWSE_ORG = "4"
-                LOAD_GFF_ORG = "5"
-                JBROWSE_CONTAINER = "6"
-                SYNC_FEATURES_ORG = "7"
-                POPULATE_MAT_VIEWS = "8"
-                INDEX_TRIPAL_DATA = "9"
-
                 # Input files have no parameters (they are set via assigning the hda IDs in the datamap parameter of the bioblend method)
-                workflow_parameters[GENOME_FASTA_FILE_ORG] = {}
-                workflow_parameters[GFF_FILE_ORG] = {}
-                workflow_parameters[PROTEINS_FASTA_FILE_ORG] = {}
-                workflow_parameters[LOAD_FASTA_ORG] = {"organism": org_org_id,
-                                                        "analysis_id": org_genome_analysis_id,
-                                                        "do_update": "true"}
-                workflow_parameters[JBROWSE_ORG] = {}
-                workflow_parameters[LOAD_GFF_ORG] = {"organism": org_org_id, "analysis_id": org_ogs_analysis_id}
-                workflow_parameters[SYNC_FEATURES_ORG] = {"organism_id":  org_org_id}
-                # POPULATE + INDEX DATA
-                workflow_parameters[POPULATE_MAT_VIEWS] = {}
-                workflow_parameters[INDEX_TRIPAL_DATA] = {}
+                workflow_parameters[phaeoexplorer_constants.WF_LOAD_GFF_JB_1_ORG_INPUT_GENOME] = {}
+                workflow_parameters[phaeoexplorer_constants.WF_LOAD_GFF_JB_1_ORG_INPUT_GFF] = {}
+                workflow_parameters[phaeoexplorer_constants.WF_LOAD_GFF_JB_1_ORG_INPUT_PROTEINS] = {}
+                workflow_parameters[phaeoexplorer_constants.WF_LOAD_GFF_JB_1_ORG_STEP_LOAD_FASTA] = {
+                    "organism": org_org_id,
+                    "analysis_id": org_genome_analysis_id,
+                    "do_update": "true"}
+                workflow_parameters[phaeoexplorer_constants.WF_LOAD_GFF_JB_1_ORG_STEP_JBROWSE] = {}
+                workflow_parameters[phaeoexplorer_constants.WF_LOAD_GFF_JB_1_ORG_STEP_LOAD_GFF] = {
+                    "organism": org_org_id,
+                    "analysis_id": org_ogs_analysis_id}
+                workflow_parameters[phaeoexplorer_constants.WF_LOAD_GFF_JB_1_ORG_STEP_FEATURE_SYNC] = {
+                    "organism_id": org_org_id}
+                workflow_parameters[phaeoexplorer_constants.WF_LOAD_GFF_JB_1_ORG_STEP_POPULATE_VIEWS] = {}
+                workflow_parameters[phaeoexplorer_constants.WF_LOAD_GFF_JB_1_ORG_STEP_INDEX] = {}
 
                 # Set datamap (mapping of input files in the workflow)
                 datamap = {}
 
-                datamap[GENOME_FASTA_FILE_ORG] = {"src": "hda", "id": org_genome_hda_id}
-                datamap[GFF_FILE_ORG] = {"src": "hda", "id": org_gff_hda_id}
-                datamap[PROTEINS_FASTA_FILE_ORG] = {"src": "hda", "id": org_proteins_hda_id}
-
+                datamap[phaeoexplorer_constants.WF_LOAD_GFF_JB_1_ORG_INPUT_GENOME] = {"src": "hda", "id": org_genome_hda_id}
+                datamap[phaeoexplorer_constants.WF_LOAD_GFF_JB_1_ORG_INPUT_GFF] = {"src": "hda", "id": org_gff_hda_id}
+                datamap[phaeoexplorer_constants.WF_LOAD_GFF_JB_1_ORG_INPUT_PROTEINS] = {"src": "hda", "id": org_proteins_hda_id}
 
                 with open(workflow_path, 'r') as ga_in_file:
 
-- 
GitLab