Loraine Gueguen · f3644d24 · aa412602 · 8d50f6e9 · 2eed9230 · 3a5a747b
--- a/run_workflow_phaeoexplorer.py

+ 54

− 49
+++ b/run_workflow_phaeoexplorer.py

+ 54

− 49
 @@ -287,7 +287,7 @@ class RunWorkflow(speciesData.SpeciesData):
                org_job_out_id = add_org_job["outputs"][0]["id"]
                org_json_output = self.instance.datasets.download_dataset(dataset_id=org_job_out_id)
                org_output = json.loads(org_json_output)
-                org_id = str(organism_output_dict["organism_id"])  # id needs to be a str to be recognized by chado tools
+                org_id = str(org_output["organism_id"])  # id needs to be a str to be recognized by chado tools
            else:
                add_org_job = self.instance.tools.run_tool(
                    tool_id="toolshed.g2.bx.psu.edu/repos/gga/chado_organism_add_organism/organism_add_organism/%s" % tool_version,
 @@ -299,7 +299,7 @@ class RunWorkflow(speciesData.SpeciesData):
                org_job_out_id = add_org_job["outputs"][0]["id"]
                org_json_output = self.instance.datasets.download_dataset(dataset_id=org_job_out_id)
                org_output = json.loads(org_json_output)
-                org_id = str(organism_output_dict["organism_id"])  # id needs to be a str to be recognized by chado tools
+                org_id = str(org_output["organism_id"])  # id needs to be a str to be recognized by chado tools


        get_analyses = self.instance.tools.run_tool(
 @@ -338,7 +338,7 @@ class RunWorkflow(speciesData.SpeciesData):
            analysis_json_output = self.instance.datasets.download_dataset(dataset_id=analysis_job_out_id)
            analysis_output = json.loads(analysis_json_output)
            ogs_analysis_id = analysis_output["analysis_id"] 
-            ogs_analysis_id = str(analysis_output_dict["analysis_id"])
+            ogs_analysis_id = str(ogs_analysis_id["analysis_id"])
                    
        if genome_analysis_id is None:
            add_genome_analysis_job = self.instance.tools.run_tool(
 @@ -353,7 +353,7 @@ class RunWorkflow(speciesData.SpeciesData):
            analysis_job_out_id = analysis_outputs[0]["id"]
            analysis_json_output = self.instance.datasets.download_dataset(dataset_id=analysis_job_out_id)
            analysis_output = json.loads(analysis_json_output)
-            genome_analysis_id = str(analysis_output_dict["analysis_id"])
+            genome_analysis_id = str(analysis_output["analysis_id"])

        print({"org_id": org_id, "genome_analysis_id": genome_analysis_id, "ogs_analysis_id": ogs_analysis_id})
        return({"org_id": org_id, "genome_analysis_id": genome_analysis_id, "ogs_analysis_id": ogs_analysis_id})
 @@ -575,11 +575,13 @@ class RunWorkflow(speciesData.SpeciesData):
        genome_hda_id, gff_hda_id, transcripts_hda_id, proteins_hda_id, blast_diamond_hda_id, interproscan_hda_id = None, None, None, None, None, None

        # Finding datasets in history (matches datasets name)
+        # TODO: match version as well
+        # TODO: match whoole dataset name (not just prefix and affix)
        for dataset in history_datasets_li:
            dataset_name = dataset["name"]
            if dataset_shortname in dataset_name:
                dataset_id = dataset["id"]
-                if dataset_name.endswith(".fasta"):
+                if dataset_name.endswith("%s.fasta" % self.genome_version):
                    genome_hda_id = dataset_id 
                if dataset_name.endswith(".gff"):
                    gff_hda_id = dataset_id
 @@ -961,7 +963,7 @@ if __name__ == "__main__":
            logging.info("Input organism %s: 1 species detected in input dictionary" % k)

            # Set workflow path (1 organism)
-            workflow_path = os.path.join(os.path.abspath(script_dir), "workflows/Galaxy-Workflow-chado_load_tripal_synchronize_jbrowse_1org_v2.ga")
+            workflow_path = os.path.join(os.path.abspath(script_dir), "workflows_phaeoexplorer/Galaxy-Workflow-chado_load_tripal_synchronize_jbrowse_1org_v2.ga")

            # Set the galaxy instance variables
            for k2, v2 in v.items():
 @@ -986,7 +988,7 @@ if __name__ == "__main__":
            logging.info("Input organism %s: 2 species detected in input dictionary" % k)

            # Set workflow path (2 organisms)
-            workflow_path = os.path.join(os.path.abspath(script_dir), "workflows/Galaxy-Workflow-chado_load_tripal_synchronize_jbrowse_2org_v3.ga")
+            workflow_path = os.path.join(os.path.abspath(script_dir), "workflows_phaeoexplorer/Galaxy-Workflow-chado_load_tripal_synchronize_jbrowse_2org_v3.ga")

            # Instance object required variables
            instance_url, email, password = None, None, None
 @@ -1160,48 +1162,51 @@ if __name__ == "__main__":
            datamap[GFF_FILE_ORG2] = {"src": "hda", "id": org2_gff_hda_id}
            datamap[PROTEINS_FASTA_FILE_ORG2] = {"src": "hda", "id": org2_proteins_hda_id}

-            # with open(workflow_path, 'r') as ga_in_file:
-
-            #     # Store the decoded json dictionary
-            #     workflow_dict = json.load(ga_in_file)
-            #     workflow_name = workflow_dict["name"]
-
-            #     # For the Jbrowse tool, we unfortunately have to manually edit the parameters instead of setting them
-            #     # as runtime values, using runtime parameters makes the tool throw an internal critical error ("replace not found" error)
-            #     # Scratchgmod test: need "http" (or "https"), the hostname (+ port)
-            #     if "menu_url" not in config.keys():
-            #         jbrowse_menu_url_org1 = "https://{hostname}/sp/{genus_sp}/feature/{Genus}/{species}/mRNA/{id}".format(hostname=config["hostname"], genus_sp=org1_genus_species, Genus=org1_genus[0].upper() + org1_genus[1:], species=org1_species, id="\{id\}")
-            #         jbrowse_menu_url_org2 = "https://{hostname}/sp/{genus_sp}/feature/{Genus}/{species}/mRNA/{id}".format(hostname=config["hostname"], genus_sp=org2_genus_species, Genus=org2_genus[0].upper() + org2_genus[1:], species=org2_species, id="\{id\}")
-            #     else:
-            #         jbrowse_menu_url_org1 = config["menu_url"]
-            #         jbrowse_menu_url_org2 = jbrowse_menu_url_org1
-
-            #     # Replace values in the workflow dictionary
-            #     workflow_dict["steps"]["7"]["tool_state"] = workflow_dict["steps"]["7"]["tool_state"].replace("__MENU_URL_ORG1__", jbrowse_menu_url_org1)
-            #     workflow_dict["steps"]["8"]["tool_state"] = workflow_dict["steps"]["8"]["tool_state"].replace("__MENU_URL_ORG2__", jbrowse_menu_url_org2)
-            #     # The UNIQUE_ID is specific to a combination genus_species_strain_sex so every combination should have its unique workflow
-            #     # in galaxy --> define a naming method for these workflows
-            #     workflow_dict["steps"]["10"]["tool_state"] = workflow_dict["steps"]["10"]["tool_state"].replace("__FULL_NAME_ORG1__", org1_full_name).replace("__UNIQUE_ID_ORG1__", org1_species_folder_name)
-            #     workflow_dict["steps"]["10"]["tool_state"] = workflow_dict["steps"]["10"]["tool_state"].replace("__FULL_NAME_ORG2__", org2_full_name).replace("__UNIQUE_ID_ORG2__", org2_species_folder_name)
-
-            #     # Import the workflow in galaxy as a dict
-            #     instance.workflows.import_workflow_dict(workflow_dict=workflow_dict)
-
-            #     # Get its attributes
-            #     workflow_attributes = instance.workflows.get_workflows(name=workflow_name)
-            #     # Then get its ID (required to invoke the workflow)
-            #     workflow_id = workflow_attributes[0]["id"]  # Index 0 is the most recently imported workflow (the one we want)
-            #     show_workflow = instance.workflows.show_workflow(workflow_id=workflow_id)
-            #     # Check if the workflow is found
-            #     try:
-            #         logging.debug("Workflow ID: %s" % workflow_id)
-            #     except bioblend.ConnectionError:
-            #         logging.warning("Error finding workflow %s" % workflow_name)
-
-            #     # Finally, invoke the workflow alogn with its datamap, parameters and the history in which to invoke it
-            #     instance.workflows.invoke_workflow(workflow_id=workflow_id, history_id=history_id, params=workflow_parameters, inputs=datamap, allow_tool_state_corrections=True)
-
-            #     logging.info("Successfully imported and invoked workflow {0}, check the galaxy instance ({1}) for the jobs state".format(workflow_name, instance_url))
+            with open(workflow_path, 'r') as ga_in_file:
+
+                # Store the decoded json dictionary
+                workflow_dict = json.load(ga_in_file)
+                workflow_name = workflow_dict["name"]
+
+                # For the Jbrowse tool, we unfortunately have to manually edit the parameters instead of setting them
+                # as runtime values, using runtime parameters makes the tool throw an internal critical error ("replace not found" error)
+                # Scratchgmod test: need "http" (or "https"), the hostname (+ port)
+                jbrowse_menu_url_org1 = "https://{hostname}/sp/{genus_sp}/feature/{Genus}/{species}/mRNA/{id}".format(hostname=config["hostname"], genus_sp=org1_genus_species, Genus=org1_genus[0].upper() + org1_genus[1:], species=org1_species, id="\{id\}")
+                jbrowse_menu_url_org2 = "https://{hostname}/sp/{genus_sp}/feature/{Genus}/{species}/mRNA/{id}".format(hostname=config["hostname"], genus_sp=org2_genus_species, Genus=org2_genus[0].upper() + org2_genus[1:], species=org2_species, id="\{id\}")
+                if "menu_url" not in config.keys():
+                    jbrowse_menu_url_org1 = "https://{hostname}/sp/{genus_sp}/feature/{Genus}/{species}/mRNA/{id}".format(hostname=config["hostname"], genus_sp=org1_genus_species, Genus=org1_genus[0].upper() + org1_genus[1:], species=org1_species, id="\{id\}")
+                    jbrowse_menu_url_org2 = "https://{hostname}/sp/{genus_sp}/feature/{Genus}/{species}/mRNA/{id}".format(hostname=config["hostname"], genus_sp=org2_genus_species, Genus=org2_genus[0].upper() + org2_genus[1:], species=org2_species, id="\{id\}")
+                else:
+                    jbrowse_menu_url_org1 = config["menu_url"]
+                    jbrowse_menu_url_org2 = "https://{hostname}/sp/{genus_sp}/feature/{Genus}/{species}/mRNA/{id}".format(hostname=config["hostname"], genus_sp=org2_genus_species, Genus=org2_genus[0].upper() + org2_genus[1:], species=org2_species, id="\{id\}")
+
+
+                # Replace values in the workflow dictionary
+                workflow_dict["steps"]["7"]["tool_state"] = workflow_dict["steps"]["7"]["tool_state"].replace("__MENU_URL_ORG1__", jbrowse_menu_url_org1)
+                workflow_dict["steps"]["8"]["tool_state"] = workflow_dict["steps"]["8"]["tool_state"].replace("__MENU_URL_ORG2__", jbrowse_menu_url_org2)
+                # The UNIQUE_ID is specific to a combination genus_species_strain_sex so every combination should have its unique workflow
+                # in galaxy --> define a naming method for these workflows
+                workflow_dict["steps"]["10"]["tool_state"] = workflow_dict["steps"]["10"]["tool_state"].replace("__FULL_NAME_ORG1__", org1_full_name).replace("__UNIQUE_ID_ORG1__", org1_species_folder_name)
+                workflow_dict["steps"]["10"]["tool_state"] = workflow_dict["steps"]["10"]["tool_state"].replace("__FULL_NAME_ORG2__", org2_full_name).replace("__UNIQUE_ID_ORG2__", org2_species_folder_name)
+
+                # Import the workflow in galaxy as a dict
+                instance.workflows.import_workflow_dict(workflow_dict=workflow_dict)
+
+                # Get its attributes
+                workflow_attributes = instance.workflows.get_workflows(name=workflow_name)
+                # Then get its ID (required to invoke the workflow)
+                workflow_id = workflow_attributes[0]["id"]  # Index 0 is the most recently imported workflow (the one we want)
+                show_workflow = instance.workflows.show_workflow(workflow_id=workflow_id)
+                # Check if the workflow is found
+                try:
+                    logging.debug("Workflow ID: %s" % workflow_id)
+                except bioblend.ConnectionError:
+                    logging.warning("Error finding workflow %s" % workflow_name)
+
+                # Finally, invoke the workflow alogn with its datamap, parameters and the history in which to invoke it
+                instance.workflows.invoke_workflow(workflow_id=workflow_id, history_id=history_id, params=workflow_parameters, inputs=datamap, allow_tool_state_corrections=True)
+
+                logging.info("Successfully imported and invoked workflow {0}, check the galaxy instance ({1}) for the jobs state".format(workflow_name, instance_url))