diff --git a/run_workflow_phaeoexplorer.py b/run_workflow_phaeoexplorer.py index 78856471f248463241e8d5e809527513709cdd37..30ad7e5dc3753da8ca26db2d1a822e0c56a565fb 100755 --- a/run_workflow_phaeoexplorer.py +++ b/run_workflow_phaeoexplorer.py @@ -575,11 +575,13 @@ class RunWorkflow(speciesData.SpeciesData): genome_hda_id, gff_hda_id, transcripts_hda_id, proteins_hda_id, blast_diamond_hda_id, interproscan_hda_id = None, None, None, None, None, None # Finding datasets in history (matches datasets name) + # TODO: match version as well + # TODO: match whoole dataset name (not just prefix and affix) for dataset in history_datasets_li: dataset_name = dataset["name"] if dataset_shortname in dataset_name: dataset_id = dataset["id"] - if dataset_name.endswith(".fasta"): + if dataset_name.endswith("%s.fasta" % self.genome_version): genome_hda_id = dataset_id if dataset_name.endswith(".gff"): gff_hda_id = dataset_id @@ -961,7 +963,7 @@ if __name__ == "__main__": logging.info("Input organism %s: 1 species detected in input dictionary" % k) # Set workflow path (1 organism) - workflow_path = os.path.join(os.path.abspath(script_dir), "workflows/Galaxy-Workflow-chado_load_tripal_synchronize_jbrowse_1org_v2.ga") + workflow_path = os.path.join(os.path.abspath(script_dir), "workflows_phaeoexplorer/Galaxy-Workflow-chado_load_tripal_synchronize_jbrowse_1org_v2.ga") # Set the galaxy instance variables for k2, v2 in v.items(): @@ -986,7 +988,7 @@ if __name__ == "__main__": logging.info("Input organism %s: 2 species detected in input dictionary" % k) # Set workflow path (2 organisms) - workflow_path = os.path.join(os.path.abspath(script_dir), "workflows/Galaxy-Workflow-chado_load_tripal_synchronize_jbrowse_2org_v3.ga") + workflow_path = os.path.join(os.path.abspath(script_dir), "workflows_phaeoexplorer/Galaxy-Workflow-chado_load_tripal_synchronize_jbrowse_2org_v3.ga") # Instance object required variables instance_url, email, password = None, None, None @@ -1160,48 +1162,51 @@ if __name__ == "__main__": datamap[GFF_FILE_ORG2] = {"src": "hda", "id": org2_gff_hda_id} datamap[PROTEINS_FASTA_FILE_ORG2] = {"src": "hda", "id": org2_proteins_hda_id} - # with open(workflow_path, 'r') as ga_in_file: - - # # Store the decoded json dictionary - # workflow_dict = json.load(ga_in_file) - # workflow_name = workflow_dict["name"] - - # # For the Jbrowse tool, we unfortunately have to manually edit the parameters instead of setting them - # # as runtime values, using runtime parameters makes the tool throw an internal critical error ("replace not found" error) - # # Scratchgmod test: need "http" (or "https"), the hostname (+ port) - # if "menu_url" not in config.keys(): - # jbrowse_menu_url_org1 = "https://{hostname}/sp/{genus_sp}/feature/{Genus}/{species}/mRNA/{id}".format(hostname=config["hostname"], genus_sp=org1_genus_species, Genus=org1_genus[0].upper() + org1_genus[1:], species=org1_species, id="\{id\}") - # jbrowse_menu_url_org2 = "https://{hostname}/sp/{genus_sp}/feature/{Genus}/{species}/mRNA/{id}".format(hostname=config["hostname"], genus_sp=org2_genus_species, Genus=org2_genus[0].upper() + org2_genus[1:], species=org2_species, id="\{id\}") - # else: - # jbrowse_menu_url_org1 = config["menu_url"] - # jbrowse_menu_url_org2 = jbrowse_menu_url_org1 - - # # Replace values in the workflow dictionary - # workflow_dict["steps"]["7"]["tool_state"] = workflow_dict["steps"]["7"]["tool_state"].replace("__MENU_URL_ORG1__", jbrowse_menu_url_org1) - # workflow_dict["steps"]["8"]["tool_state"] = workflow_dict["steps"]["8"]["tool_state"].replace("__MENU_URL_ORG2__", jbrowse_menu_url_org2) - # # The UNIQUE_ID is specific to a combination genus_species_strain_sex so every combination should have its unique workflow - # # in galaxy --> define a naming method for these workflows - # workflow_dict["steps"]["10"]["tool_state"] = workflow_dict["steps"]["10"]["tool_state"].replace("__FULL_NAME_ORG1__", org1_full_name).replace("__UNIQUE_ID_ORG1__", org1_species_folder_name) - # workflow_dict["steps"]["10"]["tool_state"] = workflow_dict["steps"]["10"]["tool_state"].replace("__FULL_NAME_ORG2__", org2_full_name).replace("__UNIQUE_ID_ORG2__", org2_species_folder_name) - - # # Import the workflow in galaxy as a dict - # instance.workflows.import_workflow_dict(workflow_dict=workflow_dict) - - # # Get its attributes - # workflow_attributes = instance.workflows.get_workflows(name=workflow_name) - # # Then get its ID (required to invoke the workflow) - # workflow_id = workflow_attributes[0]["id"] # Index 0 is the most recently imported workflow (the one we want) - # show_workflow = instance.workflows.show_workflow(workflow_id=workflow_id) - # # Check if the workflow is found - # try: - # logging.debug("Workflow ID: %s" % workflow_id) - # except bioblend.ConnectionError: - # logging.warning("Error finding workflow %s" % workflow_name) - - # # Finally, invoke the workflow alogn with its datamap, parameters and the history in which to invoke it - # instance.workflows.invoke_workflow(workflow_id=workflow_id, history_id=history_id, params=workflow_parameters, inputs=datamap, allow_tool_state_corrections=True) - - # logging.info("Successfully imported and invoked workflow {0}, check the galaxy instance ({1}) for the jobs state".format(workflow_name, instance_url)) + with open(workflow_path, 'r') as ga_in_file: + + # Store the decoded json dictionary + workflow_dict = json.load(ga_in_file) + workflow_name = workflow_dict["name"] + + # For the Jbrowse tool, we unfortunately have to manually edit the parameters instead of setting them + # as runtime values, using runtime parameters makes the tool throw an internal critical error ("replace not found" error) + # Scratchgmod test: need "http" (or "https"), the hostname (+ port) + jbrowse_menu_url_org1 = "https://{hostname}/sp/{genus_sp}/feature/{Genus}/{species}/mRNA/{id}".format(hostname=config["hostname"], genus_sp=org1_genus_species, Genus=org1_genus[0].upper() + org1_genus[1:], species=org1_species, id="\{id\}") + jbrowse_menu_url_org2 = "https://{hostname}/sp/{genus_sp}/feature/{Genus}/{species}/mRNA/{id}".format(hostname=config["hostname"], genus_sp=org2_genus_species, Genus=org2_genus[0].upper() + org2_genus[1:], species=org2_species, id="\{id\}") + if "menu_url" not in config.keys(): + jbrowse_menu_url_org1 = "https://{hostname}/sp/{genus_sp}/feature/{Genus}/{species}/mRNA/{id}".format(hostname=config["hostname"], genus_sp=org1_genus_species, Genus=org1_genus[0].upper() + org1_genus[1:], species=org1_species, id="\{id\}") + jbrowse_menu_url_org2 = "https://{hostname}/sp/{genus_sp}/feature/{Genus}/{species}/mRNA/{id}".format(hostname=config["hostname"], genus_sp=org2_genus_species, Genus=org2_genus[0].upper() + org2_genus[1:], species=org2_species, id="\{id\}") + else: + jbrowse_menu_url_org1 = config["menu_url"] + jbrowse_menu_url_org2 = "https://{hostname}/sp/{genus_sp}/feature/{Genus}/{species}/mRNA/{id}".format(hostname=config["hostname"], genus_sp=org2_genus_species, Genus=org2_genus[0].upper() + org2_genus[1:], species=org2_species, id="\{id\}") + + + # Replace values in the workflow dictionary + workflow_dict["steps"]["7"]["tool_state"] = workflow_dict["steps"]["7"]["tool_state"].replace("__MENU_URL_ORG1__", jbrowse_menu_url_org1) + workflow_dict["steps"]["8"]["tool_state"] = workflow_dict["steps"]["8"]["tool_state"].replace("__MENU_URL_ORG2__", jbrowse_menu_url_org2) + # The UNIQUE_ID is specific to a combination genus_species_strain_sex so every combination should have its unique workflow + # in galaxy --> define a naming method for these workflows + workflow_dict["steps"]["10"]["tool_state"] = workflow_dict["steps"]["10"]["tool_state"].replace("__FULL_NAME_ORG1__", org1_full_name).replace("__UNIQUE_ID_ORG1__", org1_species_folder_name) + workflow_dict["steps"]["10"]["tool_state"] = workflow_dict["steps"]["10"]["tool_state"].replace("__FULL_NAME_ORG2__", org2_full_name).replace("__UNIQUE_ID_ORG2__", org2_species_folder_name) + + # Import the workflow in galaxy as a dict + instance.workflows.import_workflow_dict(workflow_dict=workflow_dict) + + # Get its attributes + workflow_attributes = instance.workflows.get_workflows(name=workflow_name) + # Then get its ID (required to invoke the workflow) + workflow_id = workflow_attributes[0]["id"] # Index 0 is the most recently imported workflow (the one we want) + show_workflow = instance.workflows.show_workflow(workflow_id=workflow_id) + # Check if the workflow is found + try: + logging.debug("Workflow ID: %s" % workflow_id) + except bioblend.ConnectionError: + logging.warning("Error finding workflow %s" % workflow_name) + + # Finally, invoke the workflow alogn with its datamap, parameters and the history in which to invoke it + instance.workflows.invoke_workflow(workflow_id=workflow_id, history_id=history_id, params=workflow_parameters, inputs=datamap, allow_tool_state_corrections=True) + + logging.info("Successfully imported and invoked workflow {0}, check the galaxy instance ({1}) for the jobs state".format(workflow_name, instance_url))