Compare revisions

Loraine Gueguen · Arthur Le Bars · Arthur Le Bars · Arthur Le Bars · Loraine Gueguen · 23e250bb
--- a/run_workflow_phaeoexplorer.py
+++ b/run_workflow_phaeoexplorer.py
@@ -495,20 +495,120 @@ class RunWorkflow(speciesData.SpeciesData):
        # print({"org_id": org_id, "genome_analysis_id": genome_analysis_id, "ogs_analysis_id": ogs_analysis_id})
        return({"org_id": org_id, "blastp_analysis_id": blastp_analysis_id})

-    def add_interproscan_analysis(self):
+    def add_organism_interproscan_analysis(self):
        """
+        Add OGS and genome vX analyses to Chado database
+        Required for Chado Load Tripal Synchronize workflow (which should be ran as the first workflow)
+        Called outside workflow for practical reasons (Chado add doesn't have an input link for analysis or organism)
+
+        :return:
+
        """

-        # Add Interpro analysis to chado
-        logging.info("Adding Interproscan analysis to the instance's chado database") 
-        self.instance.tools.run_tool(
-            tool_id="toolshed.g2.bx.psu.edu/repos/gga/chado_analysis_add_analysis/analysis_add_analysis/2.3.4+galaxy0",
+        self.connect_to_instance()
+        self.set_get_history()
+
+        tool_version = "2.3.4+galaxy0"
+
+        get_organism_tool = self.instance.tools.show_tool("toolshed.g2.bx.psu.edu/repos/gga/chado_organism_get_organisms/organism_get_organisms/2.3.4+galaxy0")
+
+        get_organisms = self.instance.tools.run_tool(
+            tool_id="toolshed.g2.bx.psu.edu/repos/gga/chado_organism_get_organisms/organism_get_organisms/%s" % tool_version,
            history_id=self.history_id,
-            tool_inputs={"name": "InterproScan on OGS%s" % self.ogs_version,
-                         "program": "InterproScan",
-                         "programversion": "OGS%s" % self.ogs_version,
-                         "sourcename": "Genoscope",
-                         "date_executed": self.date})
+            tool_inputs={})
+
+        time.sleep(10)  # Ensure the tool has had time to complete
+        org_outputs = get_organisms["outputs"]  # Outputs from the get_organism tool
+        org_job_out_id = org_outputs[0]["id"]  # ID of the get_organism output dataset (list of dicts)
+        org_json_output = self.instance.datasets.download_dataset(dataset_id=org_job_out_id)  # Download the dataset
+        org_output = json.loads(org_json_output)  # Turn the dataset into a list for parsing
+
+        org_id = None
+
+        # Look up list of outputs (dictionaries)
+        for organism_output_dict in org_output:
+            if organism_output_dict["genus"] == self.genus and organism_output_dict["species"] == "{0} {1}".format(self.species, self.sex):
+                correct_organism_id = str(organism_output_dict["organism_id"])  # id needs to be a str to be recognized by chado tools
+                org_id = str(correct_organism_id)
+
+
+        if org_id is None:
+            if self.common == "" or self.common is None:
+                add_org_job = self.instance.tools.run_tool(
+                    tool_id="toolshed.g2.bx.psu.edu/repos/gga/chado_organism_add_organism/organism_add_organism/%s" % tool_version,
+                    history_id=self.history_id,
+                    tool_inputs={"abbr": self.abbreviation,
+                                 "genus": self.genus_uppercase,
+                                 "species": self.chado_species_name,
+                                 "common": self.abbreviation})
+                org_job_out_id = add_org_job["outputs"][0]["id"]
+                org_json_output = self.instance.datasets.download_dataset(dataset_id=org_job_out_id)
+                org_output = json.loads(org_json_output)
+                org_id = str(org_output["organism_id"])  # id needs to be a str to be recognized by chado tools
+            else:
+                add_org_job = self.instance.tools.run_tool(
+                    tool_id="toolshed.g2.bx.psu.edu/repos/gga/chado_organism_add_organism/organism_add_organism/%s" % tool_version,
+                    history_id=self.history_id,
+                    tool_inputs={"abbr": self.abbreviation,
+                                 "genus": self.genus_uppercase,
+                                 "species": self.chado_species_name,
+                                 "common": self.common})
+                org_job_out_id = add_org_job["outputs"][0]["id"]
+                org_json_output = self.instance.datasets.download_dataset(dataset_id=org_job_out_id)
+                org_output = json.loads(org_json_output)
+                org_id = str(org_output["organism_id"])  # id needs to be a str to be recognized by chado tools
+
+            # Synchronize newly added organism in Tripal
+            logging.info("Synchronizing organism %s in Tripal" % self.full_name)
+            time.sleep(60)
+            org_sync = self.instance.tools.run_tool(tool_id="toolshed.g2.bx.psu.edu/repos/gga/tripal_organism_sync/organism_sync/3.2.1.0",
+                                                    history_id=self.history_id,
+                                                    tool_inputs={"organism_id": org_id})
+
+
+        get_analyses = self.instance.tools.run_tool(
+            tool_id="toolshed.g2.bx.psu.edu/repos/gga/chado_analysis_get_analyses/analysis_get_analyses/%s" % tool_version,
+            history_id=self.history_id,
+            tool_inputs={})
+
+        time.sleep(10)
+        analysis_outputs = get_analyses["outputs"]
+        analysis_job_out_id = analysis_outputs[0]["id"]
+        analysis_json_output = self.instance.datasets.download_dataset(dataset_id=analysis_job_out_id)
+        analysis_output = json.loads(analysis_json_output)
+
+        interpro_analysis_id = None
+
+        # Look up list of outputs (dictionaries)
+        for analysis_output_dict in analysis_output:
+            if analysis_output_dict["name"] == "Interproscan on " + self.full_name_lowercase + " OGS" + self.ogs_version:
+                interpro_analysis_id = str(analysis_output_dict["analysis_id"])
+
+
+        if interpro_analysis_id is None:
+            add_interproscan_analysis_job = self.instance.tools.run_tool(
+                tool_id="toolshed.g2.bx.psu.edu/repos/gga/chado_analysis_add_analysis/analysis_add_analysis/%s" % tool_version,
+                history_id=self.history_id,
+                tool_inputs={"name": "Interproscan on " + self.full_name_lowercase + " OGS" + self.ogs_version,
+                             "program": "Performed by Genoscope",
+                             "programversion": str(self.sex + " OGS" + self.ogs_version),
+                             "sourcename": "Genoscope",
+                             "date_executed": self.date})
+            analysis_outputs = add_interproscan_analysis_job["outputs"]
+            analysis_job_out_id = analysis_outputs[0]["id"]
+            analysis_json_output = self.instance.datasets.download_dataset(dataset_id=analysis_job_out_id)
+            analysis_output = json.loads(analysis_json_output)
+            interpro_analysis_id = str(analysis_output["analysis_id"])
+
+        # Synchronize blastp analysis
+        logging.info("Synchronizing Diamong blastp OGS%s analysis in Tripal" % self.ogs_version)
+        time.sleep(60)
+        interproscan_analysis_sync = self.instance.tools.run_tool(tool_id="toolshed.g2.bx.psu.edu/repos/gga/tripal_analysis_sync/analysis_sync/3.2.1.0",
+                                                            history_id=self.history_id,
+                                                            tool_inputs={"analysis_id": interpro_analysis_id})
+
+        # print({"org_id": org_id, "genome_analysis_id": genome_analysis_id, "ogs_analysis_id": ogs_analysis_id})
+        return({"org_id": org_id, "interpro_analysis_id": interpro_analysis_id})


    def get_interpro_analysis_id(self):
@@ -628,6 +728,8 @@ class RunWorkflow(speciesData.SpeciesData):
                proteins_hda_id = dataset_id
            if dataset_name == "{0}_OGS{1}_blastp.xml".format(self.dataset_prefix, self.ogs_version):
                blastp_hda_id = dataset_id
+            if dataset_name == "{0}_OGS{1}_interproscan.xml".format(self.dataset_prefix, self.ogs_version):
+                interproscan_hda_id = dataset_id

                    
        # Import each dataset into history if it is not imported
@@ -906,6 +1008,45 @@ def create_sp_workflow_dict(sp_dict, main_dir, config, workflow_type):

        sp_workflow_dict[genus_species] = {strain_sex: attributes}

+
+    if workflow_type == "interpro":
+        run_workflow_for_current_organism.connect_to_instance()
+
+        history_id = run_workflow_for_current_organism.set_get_history()
+
+        run_workflow_for_current_organism.install_changesets_revisions_for_individual_tools()
+        ids = run_workflow_for_current_organism.add_organism_interproscan_analysis()
+
+        org_id = None
+        org_id = ids["org_id"]
+        interpro_analysis_id = None
+        interpro_analysis_id = ids["interpro_analysis_id"]
+        instance_attributes = run_workflow_for_current_organism.get_instance_attributes()
+        hda_ids = run_workflow_for_current_organism.import_datasets_into_history()
+
+        strain_sex = "{0}_{1}".format(run_workflow_for_current_organism.strain, run_workflow_for_current_organism.sex)
+        genus_species = run_workflow_for_current_organism.genus_species
+
+        # Create the dictionary holding all attributes needed to connect to the galaxy instance
+        attributes = {"genus": run_workflow_for_current_organism.genus,
+                      "species": run_workflow_for_current_organism.species,
+                      "genus_species": run_workflow_for_current_organism.genus_species,
+                      "full_name": run_workflow_for_current_organism.full_name,
+                      "species_folder_name": run_workflow_for_current_organism.species_folder_name,
+                      "sex": run_workflow_for_current_organism.sex,
+                      "strain": run_workflow_for_current_organism.strain,
+                      "org_id": org_id,
+                      "interpro_analysis_id": interpro_analysis_id,
+                      "instance_attributes": instance_attributes,
+                      "hda_ids": hda_ids,
+                      "history_id": history_id,
+                      "instance": run_workflow_for_current_organism.instance,
+                      "instance_url": run_workflow_for_current_organism.instance_url,
+                      "email": config["galaxy_default_admin_email"],
+                      "password": config["galaxy_default_admin_password"]}
+
+        sp_workflow_dict[genus_species] = {strain_sex: attributes}
+
    else:
        logging.critical("The galaxy container for %s is not ready yet!" % run_workflow_for_current_organism.full_name)
        sys.exit()
@@ -987,12 +1128,13 @@ if __name__ == "__main__":

    args = parser.parse_args()

+    bioblend_logger = logging.getLogger("bioblend")
    if args.verbose:
        logging.basicConfig(level=logging.DEBUG)
+        bioblend_logger.setLevel(logging.DEBUG)
    else:
        logging.basicConfig(level=logging.INFO)
-    logging.getLogger("urllib3").setLevel(logging.INFO)
-    logging.getLogger("bioblend").setLevel(logging.INFO)
+        bioblend_logger.setLevel(logging.INFO)

    # Parsing the config file if provided, using the default config otherwise
    if not args.config:
@@ -1000,6 +1142,12 @@ if __name__ == "__main__":
    else:
        args.config = os.path.abspath(args.config)

+    if args.config:
+        config_file = os.path.abspath(args.config)
+    else:
+        config_file = os.path.join(os.path.dirname(os.path.realpath(sys.argv[0])), constants.DEFAULT_CONFIG)
+    config = utilities.parse_config(config_file)
+
    if not args.main_directory:
        args.main_directory = os.getcwd()
    else:
@@ -1164,13 +1312,6 @@ if __name__ == "__main__":
                    else:
                        jbrowse_menu_url_org = config["jbrowse_menu_url"] + "/sp/{genus_sp}/feature/{Genus}/{species}/mRNA/{id}".format(genus_sp=org_genus_species, Genus=org_genus[0].upper() + org_genus[1:], species=org_species, id="{id}")

-                    # show_tool_add_organism = instance.tools.show_tool(tool_id="toolshed.g2.bx.psu.edu/repos/gga/chado_organism_add_organism/organism_add_organism/2.3.4+galaxy0", io_details=True)
-                    # print(show_tool_add_organism)
-                    # show_jbrowse_tool = instance.tools.show_tool(tool_id="toolshed.g2.bx.psu.edu/repos/iuc/jbrowse/jbrowse/1.16.11+galaxy0", io_details=True)
-                    # print(show_jbrowse_tool)
-                    # show_jbrowse_container_tool = instance.tools.show_tool(tool_id="toolshed.g2.bx.psu.edu/repos/gga/jbrowse_to_container/jbrowse_to_container/0.5.1", io_details=True)
-                    # print(show_jbrowse_container_tool)
-
                    # Replace values in the workflow dictionary
                    workflow_dict["steps"]["4"]["tool_state"] = workflow_dict["steps"]["4"]["tool_state"].replace("__MENU_URL_ORG__", jbrowse_menu_url_org)
                    workflow_dict["steps"]["6"]["tool_state"] = workflow_dict["steps"]["6"]["tool_state"].replace("__DISPLAY_NAME_ORG__", org_full_name).replace("__UNIQUE_ID_ORG__", org_species_folder_name)
@@ -1190,7 +1331,7 @@ if __name__ == "__main__":
                        logging.warning("Error finding workflow %s" % workflow_name)

                    # Finally, invoke the workflow alogn with its datamap, parameters and the history in which to invoke it
-                    # instance.workflows.invoke_workflow(workflow_id=workflow_id, history_id=history_id, params=workflow_parameters, inputs=datamap, allow_tool_state_corrections=True)
+                    instance.workflows.invoke_workflow(workflow_id=workflow_id, history_id=history_id, params=workflow_parameters, inputs=datamap, allow_tool_state_corrections=True)

                    logging.info("Successfully imported and invoked workflow {0}, check the galaxy instance ({1}) for the jobs state".format(workflow_name, instance_url))

@@ -1426,7 +1567,7 @@ if __name__ == "__main__":
                        logging.warning("Error finding workflow %s" % workflow_name)

                    # Finally, invoke the workflow alogn with its datamap, parameters and the history in which to invoke it
-                    # instance.workflows.invoke_workflow(workflow_id=workflow_id, history_id=history_id, params=workflow_parameters, inputs=datamap, allow_tool_state_corrections=True)
+                    instance.workflows.invoke_workflow(workflow_id=workflow_id, history_id=history_id, params=workflow_parameters, inputs=datamap, allow_tool_state_corrections=True)

                    logging.info("Successfully imported and invoked workflow {0}, check the galaxy instance ({1}) for the jobs state".format(workflow_name, instance_url))

@@ -1541,7 +1682,7 @@ if __name__ == "__main__":
                    logging.warning("Error finding workflow %s" % workflow_name)

                # Finally, invoke the workflow alogn with its datamap, parameters and the history in which to invoke it
-                # instance.workflows.invoke_workflow(workflow_id=workflow_id, history_id=history_id, params=workflow_parameters, inputs=datamap, allow_tool_state_corrections=True)
+                instance.workflows.invoke_workflow(workflow_id=workflow_id, history_id=history_id, params=workflow_parameters, inputs=datamap, allow_tool_state_corrections=True)

                logging.info("Successfully imported and invoked workflow {0}, check the galaxy instance ({1}) for the jobs state".format(workflow_name, instance_url))

@@ -1695,7 +1836,276 @@ if __name__ == "__main__":
                    logging.warning("Error finding workflow %s" % workflow_name)

                # Finally, invoke the workflow alogn with its datamap, parameters and the history in which to invoke it
-                # instance.workflows.invoke_workflow(workflow_id=workflow_id, history_id=history_id, params=workflow_parameters, inputs=datamap, allow_tool_state_corrections=True)
+                instance.workflows.invoke_workflow(workflow_id=workflow_id, history_id=history_id, params=workflow_parameters, inputs=datamap, allow_tool_state_corrections=True)
+
+                logging.info("Successfully imported and invoked workflow {0}, check the galaxy instance ({1}) for the jobs state".format(workflow_name, instance_url))
+
+
+    if workflow_type == "interpro":
+        for sp_dict in sp_dict_list:
+
+            # Add and retrieve all analyses/organisms for the current input species and add their IDs to the input dictionary
+            current_sp_workflow_dict = create_sp_workflow_dict(sp_dict, main_dir=args.main_directory, config=config, workfow_type="blast")
+
+            current_sp_key = list(current_sp_workflow_dict.keys())[0]
+            current_sp_value = list(current_sp_workflow_dict.values())[0]
+            current_sp_strain_sex_key = list(current_sp_value.keys())[0]
+            current_sp_strain_sex_value = list(current_sp_value.values())[0]
+
+            # Add the species dictionary to the complete dictionary
+            # This dictionary contains every organism present in the input file
+            # Its structure is the following:
+            # {genus species: {strain1_sex1: {variables_key: variables_values}, strain1_sex2: {variables_key: variables_values}}}
+            if not current_sp_key in all_sp_workflow_dict.keys():
+                all_sp_workflow_dict[current_sp_key] = current_sp_value
+            else:
+                all_sp_workflow_dict[current_sp_key][current_sp_strain_sex_key] = current_sp_strain_sex_value
+
+        if len(list(v.keys())) == 1:
+            logging.info("Input organism %s: 1 species detected in input dictionary" % k)
+
+            # Set workflow path (1 organism)
+            workflow_path = os.path.join(os.path.abspath(script_dir), "workflows_phaeoexplorer/Galaxy-Workflow-load_blast_results_1org_v1.ga")
+
+            # Instance object required variables
+            instance_url, email, password = None, None, None
+
+            # Set the galaxy instance variables
+            for k2, v2 in v.items():
+                instance_url = v2["instance_url"]
+                email = v2["email"]
+                password = v2["password"]
+
+            instance = galaxy.GalaxyInstance(url=instance_url, email=email, password=password)
+
+            # Check if the versions of tools specified in the workflow are installed in galaxy
+            install_changesets_revisions_from_workflow(workflow_path=workflow_path, instance=instance)
+
+            organism_key_name = list(v.keys())
+            org_dict = v[organisms_key_name[0]]
+
+            history_id = org_dict["history_id"]
+
+            # Organism attributes
+            org_genus = org_dict["genus"]
+            org_species = org_dict["species"]
+            org_genus_species = org_dict["genus_species"]
+            org_species_folder_name = org_dict["species_folder_name"]
+            org_full_name = org_dict["full_name"]
+            org_strain = org_dict["sex"]
+            org_sex = org_dict["strain"]
+            org_org_id = org_dict["org_id"]
+            org_inteproscan_analysis_id = org_dict["inteproscan_analysis_id"]
+            org_interproscan_hda_id = org_dict["hda_ids"]["interproscan_hda_id"]
+
+            # Store these values into a dict for parameters logging/validation
+            org_parameters_dict = {
+                "org_genus": org_genus,
+                "org_species": org_species,
+                "org_genus_species": org_genus_species,
+                "org_species_folder_name": org_species_folder_name,
+                "org_full_name": org_full_name,
+                "org_strain": org_strain,
+                "org_sex": org_sex,
+                "org_org_id": org_org_id,
+                "org_inteproscan_analysis_id": org_inteproscan_analysis_id,
+                "org_interproscan_hda_id": org_interproscan_hda_id,
+            }
+
+            # Look for empty parameters values, throw a critical error if a parameter value is invalid
+            for param_name, param_value in org_parameters_dict.items():
+                if param_value is None or param_value == "":
+                    logging.critical("Empty parameter value found for organism {0} (parameter: {1}, parameter value: {2})".format(org_full_name, param_name, param_value))
+                    sys.exit()
+
+            INTEPRO_FILE = "0"
+            LOAD_INTERPRO_FILE = "1"
+            POPULATE_MAT_VIEWS = "2"
+            INDEX_TRIPAL_DATA = "3"
+
+            # Set the workflow parameters (individual tools runtime parameters in the workflow)
+            workflow_parameters = {}
+            workflow_parameters[INTEPRO_FILE] = {}
+            workflow_parameters[LOAD_INTERPRO_FILE] = {"analysis_id": org_inteproscan_analysis_id, "organism_id": org_org_id}
+            workflow_parameters[POPULATE_MAT_VIEWS] = {}
+            workflow_parameters[INDEX_TRIPAL_DATA] = {}
+
+            datamap = {}
+            datamap[INTEPRO_FILE] = {"src": "hda", "id": org_interproscan_hda_id}
+
+            with open(workflow_path, 'r') as ga_in_file:
+                # Store the decoded json dictionary
+                workflow_dict = json.load(ga_in_file)
+                workflow_name = workflow_dict["name"]
+
+                # Import the workflow in galaxy as a dict
+                instance.workflows.import_workflow_dict(workflow_dict=workflow_dict)
+                # Get its attributes
+                workflow_attributes = instance.workflows.get_workflows(name=workflow_name)
+                # Then get its ID (required to invoke the workflow)
+                workflow_id = workflow_attributes[0]["id"]  # Index 0 is the most recently imported workflow (the one we want)
+                show_workflow = instance.workflows.show_workflow(workflow_id=workflow_id)
+                # Check if the workflow is found
+                try:
+                    logging.debug("Workflow ID: %s" % workflow_id)
+                except bioblend.ConnectionError:
+                    logging.warning("Error finding workflow %s" % workflow_name)
+
+                # Finally, invoke the workflow alogn with its datamap, parameters and the history in which to invoke it
+                instance.workflows.invoke_workflow(workflow_id=workflow_id, history_id=history_id, params=workflow_parameters, inputs=datamap, allow_tool_state_corrections=True)

                logging.info("Successfully imported and invoked workflow {0}, check the galaxy instance ({1}) for the jobs state".format(workflow_name, instance_url))

+
+
+        if len(list(v.keys())) == 2:
+
+            logging.info("Input organism %s: 2 species detected in input dictionary" % k)
+
+            # Set workflow path (2 organisms)
+            workflow_path = os.path.join(os.path.abspath(script_dir), "workflows_phaeoexplorer/Galaxy-Workflow-load_blast_results_2org_v1.ga")
+
+            # Instance object required variables
+            instance_url, email, password = None, None, None
+
+            # Set the galaxy instance variables
+            for k2, v2 in v.items():
+                instance_url = v2["instance_url"]
+                email = v2["email"]
+                password = v2["password"]
+
+            instance = galaxy.GalaxyInstance(url=instance_url, email=email, password=password)
+
+            # Check if the versions of tools specified in the workflow are installed in galaxy
+            install_changesets_revisions_from_workflow(workflow_path=workflow_path, instance=instance)
+
+            organisms_key_names = list(v.keys())
+            org1_dict = v[organisms_key_names[0]]
+            org2_dict = v[organisms_key_names[1]]
+
+            history_id = org1_dict["history_id"]
+
+            # Organism 1 attributes
+            org1_genus = org1_dict["genus"]
+            org1_species = org1_dict["species"]
+            org1_genus_species = org1_dict["genus_species"]
+            org1_species_folder_name = org1_dict["species_folder_name"]
+            org1_full_name = org1_dict["full_name"]
+            org1_strain = org1_dict["sex"]
+            org1_sex = org1_dict["strain"]
+            org1_org_id = org1_dict["org_id"]
+            org1_interproscan_analysis_id = org1_dict["interproscan_analysis_id"]
+            org1_interproscan_hda_id = org1_dict["hda_ids"]["interproscan_hda_id"]
+
+            # Store these values into a dict for parameters logging/validation
+            org1_parameters_dict = {
+                "org1_genus": org1_genus,
+                "org1_species": org1_species,
+                "org1_genus_species": org1_genus_species,
+                "org1_species_folder_name": org1_species_folder_name,
+                "org1_full_name": org1_full_name,
+                "org1_strain": org1_strain,
+                "org1_sex": org1_sex,
+                "org1_org_id": org1_org_id,
+                "org1_interproscan_analysis_id": org1_interproscan_analysis_id,
+                "org1_interproscan_hda_id": org1_interproscan_hda_id,
+            }
+
+
+            # Look for empty parameters values, throw a critical error if a parameter value is invalid
+            for param_name, param_value in org1_parameters_dict.items():
+                if param_value is None or param_value == "":
+                    logging.critical("Empty parameter value found for organism {0} (parameter: {1}, parameter value: {2})".format(org1_full_name, param_name, param_value))
+                    sys.exit()
+
+            # Organism 2 attributes
+            org2_genus = org2_dict["genus"]
+            org2_species = org2_dict["species"]
+            org2_genus_species = org2_dict["genus_species"]
+            org2_species_folder_name = org2_dict["species_folder_name"]
+            org2_full_name = org2_dict["full_name"]
+            org2_strain = org2_dict["sex"]
+            org2_sex = org2_dict["strain"]
+            org2_org_id = org2_dict["org_id"]
+            org2_interproscan_analysis_id = org2_dict["interproscan_analysis_id"]
+            org2_interproscan_hda_id = org2_dict["hda_ids"]["interproscan_hda_id"]
+
+            # Store these values into a dict for parameters logging/validation
+            org2_parameters_dict = {
+                "org2_genus": org2_genus,
+                "org2_species": org2_species,
+                "org2_genus_species": org2_genus_species,
+                "org2_species_folder_name": orgé_species_folder_name,
+                "org2_full_name": org2_full_name,
+                "org2_strain": org2_strain,
+                "org2_sex": org2_sex,
+                "org2_org_id": org2_org_id,
+                "org2_interproscan_analysis_id": org2_interproscan_analysis_id,
+                "org2_interproscan_hda_id": org2_interproscan_hda_id,
+            }
+
+
+            # Look for empty parameters values, throw a critical error if a parameter value is invalid
+            for param_name, param_value in org2_parameters_dict.items():
+                if param_value is None or param_value == "":
+                    logging.critical("Empty parameter value found for organism {0} (parameter: {1}, parameter value: {2})".format(org2_full_name, param_name, param_value))
+                    sys.exit()
+
+            # Source files association (ordered by their IDs in the workflow)
+            # WARNING: Be very careful about how the workflow is "organized" (i.e the order of the steps/datasets, check the .ga if there is any error)
+            INTERPRO_FILE_ORG1 = "0"
+            INTERPRO_FILE_ORG2 = "1"
+            LOAD_INTERPRO_FILE_ORG1 = "2"
+            LOAD_INTERPRO_FILE_ORG2 = "3"
+            POPULATE_MAT_VIEWS = "4"
+            INDEX_TRIPAL_DATA = "5"
+
+            # Set the workflow parameters (individual tools runtime parameters in the workflow)
+            workflow_parameters = {}
+
+            # Input files have no parameters (they are set via assigning the hda IDs in the datamap parameter of the bioblend method)
+            workflow_parameters[INTERPRO_FILE_ORG1] = {}
+            workflow_parameters[INTERPRO_FILE_ORG2] = {}
+
+            # Organism 1
+            workflow_parameters[LOAD_INTERPRO_FILE_ORG1] = {"organism_id": org1_org_id,
+                                                          "analysis_id": org1_interproscan_analysis_id}
+
+            # Organism 2
+            workflow_parameters[LOAD_INTERPRO_FILE_ORG2] = {"organism_id": org2_org_id,
+                                                          "analysis_id": org2_interproscan_analysis_id}
+
+            workflow_parameters[POPULATE_MAT_VIEWS] = {}
+            workflow_parameters[INDEX_TRIPAL_DATA] = {}
+
+            # Set datamap (mapping of input files in the workflow)
+            datamap = {}
+
+            # Organism 1
+            datamap[BLASTP_FILE_ORG1] = {"src": "hda", "id": org1_interproscan_hda_id}
+
+            # Organism 2
+            datamap[BLASTP_FILE_ORG2] = {"src": "hda", "id": org2_interproscan_hda_id}
+
+            with open(workflow_path, 'r') as ga_in_file:
+                # Store the decoded json dictionary
+                workflow_dict = json.load(ga_in_file)
+                workflow_name = workflow_dict["name"]
+
+                # Import the workflow in galaxy as a dict
+                instance.workflows.import_workflow_dict(workflow_dict=workflow_dict)
+                # Get its attributes
+                workflow_attributes = instance.workflows.get_workflows(name=workflow_name)
+                # Then get its ID (required to invoke the workflow)
+                workflow_id = workflow_attributes[0]["id"]  # Index 0 is the most recently imported workflow (the one we want)
+                show_workflow = instance.workflows.show_workflow(workflow_id=workflow_id)
+                # Check if the workflow is found
+                try:
+                    logging.debug("Workflow ID: %s" % workflow_id)
+                except bioblend.ConnectionError:
+                    logging.warning("Error finding workflow %s" % workflow_name)
+
+                # Finally, invoke the workflow alogn with its datamap, parameters and the history in which to invoke it
+                instance.workflows.invoke_workflow(workflow_id=workflow_id, history_id=history_id, params=workflow_parameters, inputs=datamap, allow_tool_state_corrections=True)
+
+                logging.info("Successfully imported and invoked workflow {0}, check the galaxy instance ({1}) for the jobs state".format(workflow_name, instance_url))
--- a/workflows_phaeoexplorer/Galaxy-Workflow-load_interproscan_1org_v1.ga
+++ b/workflows_phaeoexplorer/Galaxy-Workflow-load_interproscan_1org_v1.ga
+{
+    "a_galaxy_workflow": "true",
+    "annotation": "",
+    "format-version": "0.1",
+    "name": "load_inteproscan_1org1_v1",
+    "steps": {
+        "0": {
+            "annotation": "",
+            "content_id": null,
+            "errors": null,
+            "id": 0,
+            "input_connections": {},
+            "inputs": [
+                {
+                    "description": "",
+                    "name": "inteproscan file org1"
+                }
+            ],
+            "label": "inteproscan file org1",
+            "name": "Input dataset",
+            "outputs": [],
+            "position": {
+                "bottom": 319.1999969482422,
+                "height": 82.19999694824219,
+                "left": 287,
+                "right": 487,
+                "top": 237,
+                "width": 200,
+                "x": 287,
+                "y": 237
+            },
+            "tool_id": null,
+            "tool_state": "{\"optional\": false}",
+            "tool_version": null,
+            "type": "data_input",
+            "uuid": "3b13466d-1b81-475a-b652-183ed9d24bfa",
+            "workflow_outputs": [
+                {
+                    "label": null,
+                    "output_name": "output",
+                    "uuid": "aff82f29-1971-4283-bfe8-cdab4857a215"
+                }
+            ]
+        },
+        "1": {
+            "annotation": "",
+            "content_id": "toolshed.g2.bx.psu.edu/repos/gga/chado_load_interpro/load_interpro/2.3.6+galaxy0",
+            "errors": null,
+            "id": 1,
+            "input_connections": {
+                "input": {
+                    "id": 0,
+                    "output_name": "output"
+                }
+            },
+            "inputs": [
+                {
+                    "description": "runtime parameter for tool Chado load InterProScan results",
+                    "name": "analysis_id"
+                },
+                {
+                    "description": "runtime parameter for tool Chado load InterProScan results",
+                    "name": "input"
+                },
+                {
+                    "description": "runtime parameter for tool Chado load InterProScan results",
+                    "name": "organism_id"
+                },
+                {
+                    "description": "runtime parameter for tool Chado load InterProScan results",
+                    "name": "wait_for"
+                }
+            ],
+            "label": "interproscan load org 1",
+            "name": "Chado load InterProScan results",
+            "outputs": [
+                {
+                    "name": "results",
+                    "type": "json"
+                }
+            ],
+            "position": {
+                "bottom": 371.3999938964844,
+                "height": 164.39999389648438,
+                "left": 595,
+                "right": 795,
+                "top": 207,
+                "width": 200,
+                "x": 595,
+                "y": 207
+            },
+            "post_job_actions": {},
+            "tool_id": "toolshed.g2.bx.psu.edu/repos/gga/chado_load_interpro/load_interpro/2.3.6+galaxy0",
+            "tool_shed_repository": {
+                "changeset_revision": "1e54f2717e74",
+                "name": "chado_load_interpro",
+                "owner": "gga",
+                "tool_shed": "toolshed.g2.bx.psu.edu"
+            },
+            "tool_state": "{\"analysis_id\": {\"__class__\": \"RuntimeValue\"}, \"input\": {\"__class__\": \"RuntimeValue\"}, \"match_on_name\": \"false\", \"organism_id\": {\"__class__\": \"RuntimeValue\"}, \"parse_go\": \"false\", \"psql_target\": {\"method\": \"remote\", \"__current_case__\": 0}, \"query_type\": \"polypeptide\", \"re_name\": \"\", \"skip_missing\": \"false\", \"wait_for\": {\"__class__\": \"RuntimeValue\"}, \"__page__\": null, \"__rerun_remap_job_id__\": null}",
+            "tool_version": "2.3.6+galaxy0",
+            "type": "tool",
+            "uuid": "a084abdc-a0f4-4670-a486-4aed4e0f61fa",
+            "workflow_outputs": [
+                {
+                    "label": null,
+                    "output_name": "results",
+                    "uuid": "dbaab44d-1494-4fbe-bd15-455f7cbf7307"
+                }
+            ]
+        },
+        "2": {
+            "annotation": "",
+            "content_id": "toolshed.g2.bx.psu.edu/repos/gga/tripal_db_populate_mviews/db_populate_mviews/3.2.1.0",
+            "errors": null,
+            "id": 2,
+            "input_connections": {
+                "wait_for": {
+                    "id": 1,
+                    "output_name": "results"
+                }
+            },
+            "inputs": [],
+            "label": null,
+            "name": "Populate materialized views",
+            "outputs": [
+                {
+                    "name": "results",
+                    "type": "txt"
+                }
+            ],
+            "position": {
+                "bottom": 360.3999938964844,
+                "height": 154.39999389648438,
+                "left": 876,
+                "right": 1076,
+                "top": 206,
+                "width": 200,
+                "x": 876,
+                "y": 206
+            },
+            "post_job_actions": {},
+            "tool_id": "toolshed.g2.bx.psu.edu/repos/gga/tripal_db_populate_mviews/db_populate_mviews/3.2.1.0",
+            "tool_shed_repository": {
+                "changeset_revision": "3c08f32a3dc1",
+                "name": "tripal_db_populate_mviews",
+                "owner": "gga",
+                "tool_shed": "toolshed.g2.bx.psu.edu"
+            },
+            "tool_state": "{\"mview\": \"\", \"wait_for\": {\"__class__\": \"ConnectedValue\"}, \"__page__\": null, \"__rerun_remap_job_id__\": null}",
+            "tool_version": "3.2.1.0",
+            "type": "tool",
+            "uuid": "2acbe412-1318-46e8-a178-72f50df36a07",
+            "workflow_outputs": [
+                {
+                    "label": "Populate Tripal materialized view(s)",
+                    "output_name": "results",
+                    "uuid": "08a4eec2-b95a-4c75-8116-b0fc9477812f"
+                }
+            ]
+        },
+        "3": {
+            "annotation": "",
+            "content_id": "toolshed.g2.bx.psu.edu/repos/gga/tripal_db_index/db_index/3.2.1.1",
+            "errors": null,
+            "id": 3,
+            "input_connections": {
+                "wait_for": {
+                    "id": 2,
+                    "output_name": "results"
+                }
+            },
+            "inputs": [],
+            "label": null,
+            "name": "Index Tripal data",
+            "outputs": [
+                {
+                    "name": "results",
+                    "type": "txt"
+                }
+            ],
+            "position": {
+                "bottom": 340.6000061035156,
+                "height": 113.60000610351562,
+                "left": 1150,
+                "right": 1350,
+                "top": 227,
+                "width": 200,
+                "x": 1150,
+                "y": 227
+            },
+            "post_job_actions": {},
+            "tool_id": "toolshed.g2.bx.psu.edu/repos/gga/tripal_db_index/db_index/3.2.1.1",
+            "tool_shed_repository": {
+                "changeset_revision": "d55a39f12dda",
+                "name": "tripal_db_index",
+                "owner": "gga",
+                "tool_shed": "toolshed.g2.bx.psu.edu"
+            },
+            "tool_state": "{\"expose\": {\"do_expose\": \"no\", \"__current_case__\": 0}, \"queues\": \"10\", \"table\": {\"mode\": \"website\", \"__current_case__\": 0}, \"tokenizer\": \"standard\", \"wait_for\": {\"__class__\": \"ConnectedValue\"}, \"__page__\": null, \"__rerun_remap_job_id__\": null}",
+            "tool_version": "3.2.1.1",
+            "type": "tool",
+            "uuid": "42d782ff-9e6b-49d8-bd17-37dc67f31a18",
+            "workflow_outputs": [
+                {
+                    "label": "Index Tripal data",
+                    "output_name": "results",
+                    "uuid": "8b38b48f-5d4e-49e6-ad70-748ad64dc901"
+                }
+            ]
+        }
+    },
+    "tags": [],
+    "uuid": "9912daac-c00e-4d5e-a93e-a747bd1b3499",
+    "version": 3
+}
\ No newline at end of file
--- a/workflows_phaeoexplorer/Galaxy-Workflow-load_interproscan_2org_v1.ga
+++ b/workflows_phaeoexplorer/Galaxy-Workflow-load_interproscan_2org_v1.ga
+{
+    "a_galaxy_workflow": "true",
+    "annotation": "",
+    "format-version": "0.1",
+    "name": "load_interproscan_2org_v1",
+    "steps": {
+        "0": {
+            "annotation": "",
+            "content_id": null,
+            "errors": null,
+            "id": 0,
+            "input_connections": {},
+            "inputs": [
+                {
+                    "description": "",
+                    "name": "interproscan file org 1"
+                }
+            ],
+            "label": "interproscan file org 1",
+            "name": "Input dataset",
+            "outputs": [],
+            "position": {
+                "bottom": 350.1999969482422,
+                "height": 82.19999694824219,
+                "left": 414,
+                "right": 614,
+                "top": 268,
+                "width": 200,
+                "x": 414,
+                "y": 268
+            },
+            "tool_id": null,
+            "tool_state": "{\"optional\": false}",
+            "tool_version": null,
+            "type": "data_input",
+            "uuid": "bfce9a38-df8a-46ef-af2e-390a4982ebfc",
+            "workflow_outputs": [
+                {
+                    "label": null,
+                    "output_name": "output",
+                    "uuid": "d5d96afa-8dcc-4322-89fa-df56fd503d89"
+                }
+            ]
+        },
+        "1": {
+            "annotation": "",
+            "content_id": null,
+            "errors": null,
+            "id": 1,
+            "input_connections": {},
+            "inputs": [
+                {
+                    "description": "",
+                    "name": "interproscan file org 2"
+                }
+            ],
+            "label": "interproscan file org 2",
+            "name": "Input dataset",
+            "outputs": [],
+            "position": {
+                "bottom": 451.1999969482422,
+                "height": 82.19999694824219,
+                "left": 437,
+                "right": 637,
+                "top": 369,
+                "width": 200,
+                "x": 437,
+                "y": 369
+            },
+            "tool_id": null,
+            "tool_state": "{\"optional\": false}",
+            "tool_version": null,
+            "type": "data_input",
+            "uuid": "45f57c7f-f08c-4b3b-879d-1acd246868fb",
+            "workflow_outputs": [
+                {
+                    "label": null,
+                    "output_name": "output",
+                    "uuid": "f5e4ae10-fdbf-47e9-99d8-0644bf8bb46f"
+                }
+            ]
+        },
+        "2": {
+            "annotation": "",
+            "content_id": "toolshed.g2.bx.psu.edu/repos/gga/chado_load_interpro/load_interpro/2.3.6+galaxy0",
+            "errors": null,
+            "id": 2,
+            "input_connections": {
+                "input": {
+                    "id": 0,
+                    "output_name": "output"
+                }
+            },
+            "inputs": [
+                {
+                    "description": "runtime parameter for tool Chado load InterProScan results",
+                    "name": "analysis_id"
+                },
+                {
+                    "description": "runtime parameter for tool Chado load InterProScan results",
+                    "name": "input"
+                },
+                {
+                    "description": "runtime parameter for tool Chado load InterProScan results",
+                    "name": "organism_id"
+                },
+                {
+                    "description": "runtime parameter for tool Chado load InterProScan results",
+                    "name": "wait_for"
+                }
+            ],
+            "label": "inteproscan load org 1",
+            "name": "Chado load InterProScan results",
+            "outputs": [
+                {
+                    "name": "results",
+                    "type": "json"
+                }
+            ],
+            "position": {
+                "bottom": 365.3999938964844,
+                "height": 164.39999389648438,
+                "left": 790,
+                "right": 990,
+                "top": 201,
+                "width": 200,
+                "x": 790,
+                "y": 201
+            },
+            "post_job_actions": {},
+            "tool_id": "toolshed.g2.bx.psu.edu/repos/gga/chado_load_interpro/load_interpro/2.3.6+galaxy0",
+            "tool_shed_repository": {
+                "changeset_revision": "1e54f2717e74",
+                "name": "chado_load_interpro",
+                "owner": "gga",
+                "tool_shed": "toolshed.g2.bx.psu.edu"
+            },
+            "tool_state": "{\"analysis_id\": {\"__class__\": \"RuntimeValue\"}, \"input\": {\"__class__\": \"RuntimeValue\"}, \"match_on_name\": \"false\", \"organism_id\": {\"__class__\": \"RuntimeValue\"}, \"parse_go\": \"false\", \"psql_target\": {\"method\": \"remote\", \"__current_case__\": 0}, \"query_type\": \"polypeptide\", \"re_name\": \"\", \"skip_missing\": \"false\", \"wait_for\": {\"__class__\": \"RuntimeValue\"}, \"__page__\": null, \"__rerun_remap_job_id__\": null}",
+            "tool_version": "2.3.6+galaxy0",
+            "type": "tool",
+            "uuid": "e6c509b4-ea16-4fda-b171-3dc781a54759",
+            "workflow_outputs": [
+                {
+                    "label": null,
+                    "output_name": "results",
+                    "uuid": "bfff8893-5c92-45a0-88ca-594b33a8b4e2"
+                }
+            ]
+        },
+        "3": {
+            "annotation": "",
+            "content_id": "toolshed.g2.bx.psu.edu/repos/gga/chado_load_interpro/load_interpro/2.3.6+galaxy0",
+            "errors": null,
+            "id": 3,
+            "input_connections": {
+                "input": {
+                    "id": 1,
+                    "output_name": "output"
+                },
+                "wait_for": {
+                    "id": 2,
+                    "output_name": "results"
+                }
+            },
+            "inputs": [
+                {
+                    "description": "runtime parameter for tool Chado load InterProScan results",
+                    "name": "analysis_id"
+                },
+                {
+                    "description": "runtime parameter for tool Chado load InterProScan results",
+                    "name": "input"
+                },
+                {
+                    "description": "runtime parameter for tool Chado load InterProScan results",
+                    "name": "organism_id"
+                },
+                {
+                    "description": "runtime parameter for tool Chado load InterProScan results",
+                    "name": "wait_for"
+                }
+            ],
+            "label": "interproscan load org 2",
+            "name": "Chado load InterProScan results",
+            "outputs": [
+                {
+                    "name": "results",
+                    "type": "json"
+                }
+            ],
+            "position": {
+                "bottom": 555.3999938964844,
+                "height": 164.39999389648438,
+                "left": 819,
+                "right": 1019,
+                "top": 391,
+                "width": 200,
+                "x": 819,
+                "y": 391
+            },
+            "post_job_actions": {},
+            "tool_id": "toolshed.g2.bx.psu.edu/repos/gga/chado_load_interpro/load_interpro/2.3.6+galaxy0",
+            "tool_shed_repository": {
+                "changeset_revision": "1e54f2717e74",
+                "name": "chado_load_interpro",
+                "owner": "gga",
+                "tool_shed": "toolshed.g2.bx.psu.edu"
+            },
+            "tool_state": "{\"analysis_id\": {\"__class__\": \"RuntimeValue\"}, \"input\": {\"__class__\": \"RuntimeValue\"}, \"match_on_name\": \"false\", \"organism_id\": {\"__class__\": \"RuntimeValue\"}, \"parse_go\": \"false\", \"psql_target\": {\"method\": \"remote\", \"__current_case__\": 0}, \"query_type\": \"polypeptide\", \"re_name\": \"\", \"skip_missing\": \"false\", \"wait_for\": {\"__class__\": \"RuntimeValue\"}, \"__page__\": null, \"__rerun_remap_job_id__\": null}",
+            "tool_version": "2.3.6+galaxy0",
+            "type": "tool",
+            "uuid": "1832aceb-851c-4020-9c07-8193e8b2299a",
+            "workflow_outputs": [
+                {
+                    "label": null,
+                    "output_name": "results",
+                    "uuid": "be9f442f-7f19-4b84-94cf-9c2c28d9a6a5"
+                }
+            ]
+        },
+        "4": {
+            "annotation": "",
+            "content_id": "toolshed.g2.bx.psu.edu/repos/gga/tripal_db_populate_mviews/db_populate_mviews/3.2.1.0",
+            "errors": null,
+            "id": 4,
+            "input_connections": {
+                "wait_for": {
+                    "id": 3,
+                    "output_name": "results"
+                }
+            },
+            "inputs": [
+                {
+                    "description": "runtime parameter for tool Populate materialized views",
+                    "name": "wait_for"
+                }
+            ],
+            "label": null,
+            "name": "Populate materialized views",
+            "outputs": [
+                {
+                    "name": "results",
+                    "type": "txt"
+                }
+            ],
+            "position": {
+                "bottom": 459.3999938964844,
+                "height": 154.39999389648438,
+                "left": 1118,
+                "right": 1318,
+                "top": 305,
+                "width": 200,
+                "x": 1118,
+                "y": 305
+            },
+            "post_job_actions": {},
+            "tool_id": "toolshed.g2.bx.psu.edu/repos/gga/tripal_db_populate_mviews/db_populate_mviews/3.2.1.0",
+            "tool_shed_repository": {
+                "changeset_revision": "3c08f32a3dc1",
+                "name": "tripal_db_populate_mviews",
+                "owner": "gga",
+                "tool_shed": "toolshed.g2.bx.psu.edu"
+            },
+            "tool_state": "{\"mview\": \"\", \"wait_for\": {\"__class__\": \"RuntimeValue\"}, \"__page__\": null, \"__rerun_remap_job_id__\": null}",
+            "tool_version": "3.2.1.0",
+            "type": "tool",
+            "uuid": "a819f33b-e566-43e5-a467-e3410ce431ec",
+            "workflow_outputs": [
+                {
+                    "label": "Populate Tripal materialized view(s)",
+                    "output_name": "results",
+                    "uuid": "96483655-995e-4d4e-b8c6-7f602c974ee3"
+                }
+            ]
+        },
+        "5": {
+            "annotation": "",
+            "content_id": "toolshed.g2.bx.psu.edu/repos/gga/tripal_db_index/db_index/3.2.1.1",
+            "errors": null,
+            "id": 5,
+            "input_connections": {
+                "wait_for": {
+                    "id": 4,
+                    "output_name": "results"
+                }
+            },
+            "inputs": [
+                {
+                    "description": "runtime parameter for tool Index Tripal data",
+                    "name": "wait_for"
+                }
+            ],
+            "label": null,
+            "name": "Index Tripal data",
+            "outputs": [
+                {
+                    "name": "results",
+                    "type": "txt"
+                }
+            ],
+            "position": {
+                "bottom": 442.6000061035156,
+                "height": 113.60000610351562,
+                "left": 1382,
+                "right": 1582,
+                "top": 329,
+                "width": 200,
+                "x": 1382,
+                "y": 329
+            },
+            "post_job_actions": {},
+            "tool_id": "toolshed.g2.bx.psu.edu/repos/gga/tripal_db_index/db_index/3.2.1.1",
+            "tool_shed_repository": {
+                "changeset_revision": "d55a39f12dda",
+                "name": "tripal_db_index",
+                "owner": "gga",
+                "tool_shed": "toolshed.g2.bx.psu.edu"
+            },
+            "tool_state": "{\"expose\": {\"do_expose\": \"no\", \"__current_case__\": 0}, \"queues\": \"10\", \"table\": {\"mode\": \"website\", \"__current_case__\": 0}, \"tokenizer\": \"standard\", \"wait_for\": {\"__class__\": \"RuntimeValue\"}, \"__page__\": null, \"__rerun_remap_job_id__\": null}",
+            "tool_version": "3.2.1.1",
+            "type": "tool",
+            "uuid": "489483da-c7c7-49a5-b5d4-353db41b1240",
+            "workflow_outputs": [
+                {
+                    "label": "Index Tripal data",
+                    "output_name": "results",
+                    "uuid": "3df4fd46-5dd9-49d6-b0d2-24425d9ba91e"
+                }
+            ]
+        }
+    },
+    "tags": [],
+    "uuid": "ac96d70e-60b6-4455-8d14-9219d0674950",
+    "version": 1
+}
\ No newline at end of file
No results found