From df6d5d55b2fc052a2088d16392832a8cef0f36a6 Mon Sep 17 00:00:00 2001 From: Arthur Le Bars <arthur.le-bars@sb-roscoff.fr> Date: Mon, 10 May 2021 20:29:57 +0200 Subject: [PATCH] Blast workflow added (v1) --- run_workflow_phaeoexplorer.py | 1295 +++++++++-------- ...axy-Workflow-load_blast_results_1org_v1.ga | 271 ++++ ...axy-Workflow-load_blast_results_2org_v1.ga | 418 ++++++ 3 files changed, 1371 insertions(+), 613 deletions(-) create mode 100644 workflows_phaeoexplorer/Galaxy-Workflow-load_blast_results_1org_v1.ga create mode 100644 workflows_phaeoexplorer/Galaxy-Workflow-load_blast_results_2org_v1.ga diff --git a/run_workflow_phaeoexplorer.py b/run_workflow_phaeoexplorer.py index 282dcd7..b5f70f0 100755 --- a/run_workflow_phaeoexplorer.py +++ b/run_workflow_phaeoexplorer.py @@ -71,41 +71,41 @@ class RunWorkflow(speciesData.SpeciesData): logging.debug("Library ID: %s" % self.library_id) instance_source_data_folders = self.instance.libraries.get_folders(library_id=library_id) - # Access folders via their absolute path - genome_folder = self.instance.libraries.get_folders(library_id=library_id, name="/genome/" + str(self.species_folder_name) + "/v" + str(self.genome_version)) - annotation_folder = self.instance.libraries.get_folders(library_id=library_id, name="/annotation/" + str(self.species_folder_name) + "/OGS" + str(self.ogs_version)) + # # Access folders via their absolute path + # genome_folder = self.instance.libraries.get_folders(library_id=library_id, name="/genome/" + str(self.species_folder_name) + "/v" + str(self.genome_version)) + # annotation_folder = self.instance.libraries.get_folders(library_id=library_id, name="/annotation/" + str(self.species_folder_name) + "/OGS" + str(self.ogs_version)) - # Get their IDs - genome_folder_id = genome_folder[0]["id"] - annotation_folder_id = annotation_folder[0]["id"] + # # Get their IDs + # genome_folder_id = genome_folder[0]["id"] + # annotation_folder_id = annotation_folder[0]["id"] - # Get the content of the folders - genome_folder_content = self.instance.folders.show_folder(folder_id=genome_folder_id, contents=True) - annotation_folder_content = self.instance.folders.show_folder(folder_id=annotation_folder_id, contents=True) + # # Get the content of the folders + # genome_folder_content = self.instance.folders.show_folder(folder_id=genome_folder_id, contents=True) + # annotation_folder_content = self.instance.folders.show_folder(folder_id=annotation_folder_id, contents=True) - # Find genome folder datasets - genome_fasta_ldda_id = genome_folder_content["folder_contents"][0]["ldda_id"] + # # Find genome folder datasets + # genome_fasta_ldda_id = genome_folder_content["folder_contents"][0]["ldda_id"] - annotation_gff_ldda_id, annotation_proteins_ldda_id, annotation_transcripts_ldda_id = None, None, None + # annotation_gff_ldda_id, annotation_proteins_ldda_id, annotation_transcripts_ldda_id = None, None, None - # Several dicts in the annotation folder content (one dict = one file) - for k, v in annotation_folder_content.items(): - if k == "folder_contents": - for d in v: - if "proteins" in d["name"]: - annotation_proteins_ldda_id = d["ldda_id"] - if "transcripts" in d["name"]: - annotation_transcripts_ldda_id = d["ldda_id"] - if ".gff" in d["name"]: - annotation_gff_ldda_id = d["ldda_id"] + # # Several dicts in the annotation folder content (one dict = one file) + # for k, v in annotation_folder_content.items(): + # if k == "folder_contents": + # for d in v: + # if "proteins" in d["name"]: + # annotation_proteins_ldda_id = d["ldda_id"] + # if "transcripts" in d["name"]: + # annotation_transcripts_ldda_id = d["ldda_id"] + # if ".gff" in d["name"]: + # annotation_gff_ldda_id = d["ldda_id"] - # Minimum datasets to populate tripal views --> will not work if these files are not assigned in the input file - self.datasets["genome_file"] = genome_fasta_ldda_id - self.datasets["gff_file"] = annotation_gff_ldda_id - self.datasets["proteins_file"] = annotation_proteins_ldda_id - self.datasets["transcripts_file"] = annotation_transcripts_ldda_id + # # Minimum datasets to populate tripal views --> will not work if these files are not assigned in the input file + # self.datasets["genome_file"] = genome_fasta_ldda_id + # self.datasets["gff_file"] = annotation_gff_ldda_id + # self.datasets["proteins_file"] = annotation_proteins_ldda_id + # self.datasets["transcripts_file"] = annotation_transcripts_ldda_id - return {"history_id": self.history_id, "library_id": library_id, "datasets": self.datasets} + return {"history_id": self.history_id, "library_id": library_id} def connect_to_instance(self): @@ -357,6 +357,107 @@ class RunWorkflow(speciesData.SpeciesData): return({"org_id": org_id, "genome_analysis_id": genome_analysis_id, "ogs_analysis_id": ogs_analysis_id}) + def add_organism_blastp_analysis(self): + """ + Add OGS and genome vX analyses to Chado database + Required for Chado Load Tripal Synchronize workflow (which should be ran as the first workflow) + Called outside workflow for practical reasons (Chado add doesn't have an input link for analysis or organism) + + :return: + + """ + + self.connect_to_instance() + self.set_get_history() + + tool_version = "2.3.4+galaxy0" + + get_organism_tool = self.instance.tools.show_tool("toolshed.g2.bx.psu.edu/repos/gga/chado_organism_get_organisms/organism_get_organisms/2.3.4+galaxy0") + + get_organisms = self.instance.tools.run_tool( + tool_id="toolshed.g2.bx.psu.edu/repos/gga/chado_organism_get_organisms/organism_get_organisms/%s" % tool_version, + history_id=self.history_id, + tool_inputs={}) + + time.sleep(10) # Ensure the tool has had time to complete + org_outputs = get_organisms["outputs"] # Outputs from the get_organism tool + org_job_out_id = org_outputs[0]["id"] # ID of the get_organism output dataset (list of dicts) + org_json_output = self.instance.datasets.download_dataset(dataset_id=org_job_out_id) # Download the dataset + org_output = json.loads(org_json_output) # Turn the dataset into a list for parsing + + org_id = None + + # Look up list of outputs (dictionaries) + for organism_output_dict in org_output: + if organism_output_dict["genus"] == self.genus and organism_output_dict["species"] == "{0} {1}".format(self.species, self.sex): + correct_organism_id = str(organism_output_dict["organism_id"]) # id needs to be a str to be recognized by chado tools + org_id = str(correct_organism_id) + + + if org_id is None: + if self.common == "" or self.common is None: + add_org_job = self.instance.tools.run_tool( + tool_id="toolshed.g2.bx.psu.edu/repos/gga/chado_organism_add_organism/organism_add_organism/%s" % tool_version, + history_id=self.history_id, + tool_inputs={"abbr": self.abbreviation, + "genus": self.genus_uppercase, + "species": self.chado_species_name, + "common": self.abbreviation}) + org_job_out_id = add_org_job["outputs"][0]["id"] + org_json_output = self.instance.datasets.download_dataset(dataset_id=org_job_out_id) + org_output = json.loads(org_json_output) + org_id = str(org_output["organism_id"]) # id needs to be a str to be recognized by chado tools + else: + add_org_job = self.instance.tools.run_tool( + tool_id="toolshed.g2.bx.psu.edu/repos/gga/chado_organism_add_organism/organism_add_organism/%s" % tool_version, + history_id=self.history_id, + tool_inputs={"abbr": self.abbreviation, + "genus": self.genus_uppercase, + "species": self.chado_species_name, + "common": self.common}) + org_job_out_id = add_org_job["outputs"][0]["id"] + org_json_output = self.instance.datasets.download_dataset(dataset_id=org_job_out_id) + org_output = json.loads(org_json_output) + org_id = str(org_output["organism_id"]) # id needs to be a str to be recognized by chado tools + + + get_analyses = self.instance.tools.run_tool( + tool_id="toolshed.g2.bx.psu.edu/repos/gga/chado_analysis_get_analyses/analysis_get_analyses/%s" % tool_version, + history_id=self.history_id, + tool_inputs={}) + + time.sleep(10) + analysis_outputs = get_analyses["outputs"] + analysis_job_out_id = analysis_outputs[0]["id"] + analysis_json_output = self.instance.datasets.download_dataset(dataset_id=analysis_job_out_id) + analysis_output = json.loads(analysis_json_output) + + blastp_analysis_id = None + + # Look up list of outputs (dictionaries) + for analysis_output_dict in analysis_output: + if analysis_output_dict["name"] == "Diamond on " + self.full_name_lowercase + " OGS" + self.ogs_version: + blastp_analysis_id = str(analysis_output_dict["analysis_id"]) + + + if blastp_analysis_id is None: + add_blast_analysis_job = self.instance.tools.run_tool( + tool_id="toolshed.g2.bx.psu.edu/repos/gga/chado_analysis_add_analysis/analysis_add_analysis/%s" % tool_version, + history_id=self.history_id, + tool_inputs={"name": "Diamond on " + self.full_name_lowercase + " OGS" + self.ogs_version, + "program": "Performed by Genoscope", + "programversion": str(self.sex + " OGS" + self.ogs_version), + "sourcename": "Genoscope", + "date_executed": self.date}) + analysis_outputs = add_blast_analysis_job["outputs"] + analysis_job_out_id = analysis_outputs[0]["id"] + analysis_json_output = self.instance.datasets.download_dataset(dataset_id=analysis_job_out_id) + analysis_output = json.loads(analysis_json_output) + blastp_analysis_id = str(analysis_output["analysis_id"]) + + # print({"org_id": org_id, "genome_analysis_id": genome_analysis_id, "ogs_analysis_id": ogs_analysis_id}) + return({"org_id": org_id, "blastp_analysis_id": blastp_analysis_id}) + def add_interproscan_analysis(self): """ """ @@ -393,43 +494,6 @@ class RunWorkflow(speciesData.SpeciesData): return self.interpro_analysis_id - def add_blastp_diamond_analysis(self): - """ - - """ - # Add Blastp (diamond) analysis to chado - logging.info("Adding Blastp Diamond analysis to the instance's chado database") - self.instance.tools.run_tool( - tool_id="toolshed.g2.bx.psu.edu/repos/gga/chado_analysis_add_analysis/analysis_add_analysis/2.3.3", - history_id=self.history_id, - tool_inputs={"name": "Diamond on OGS%s" % self.ogs_version, - "program": "Diamond", - "programversion": "OGS%s" % self.ogs_version, - "sourcename": "Genoscope", - "date_executed": self.date}) - - - def get_blastp_diamond_analysis_id(self): - """ - """ - - # Get blasp ID - blast_diamond_analysis = self.instance.tools.run_tool( - tool_id="toolshed.g2.bx.psu.edu/repos/gga/chado_analysis_get_analyses/analysis_get_analyses/2.3.3", - history_id=self.history_id, - tool_inputs={"name": "Diamond on OGS%s" % self.ogs_version}) - blast_diamond_analysis_job_out = blast_diamond_analysis["outputs"][0]["id"] - blast_diamond_analysis_json_output = self.instance.datasets.download_dataset(dataset_id=blast_diamond_analysis_job_out) - try: - blast_diamond_analysis_output = json.loads(blast_diamond_analysis_json_output)[0] - self.blast_diamond_analysis_id = str(blast_diamond_analysis_output["analysis_id"]) - except IndexError as exc: - logging.critical("No matching InterproScan analysis exists in the instance's chado database") - sys.exit(exc) - - return self.blast_diamond_analysis_id - - def run_workflow(self, workflow_path, workflow_parameters, workflow_name, datamap): """ Run a workflow in galaxy @@ -560,12 +624,12 @@ class RunWorkflow(speciesData.SpeciesData): self.datasets["interproscan_file"] = e["ldda_id"] self.datasets_name["interproscan_file"] = e["name"] elif "blastp" in e["name"]: - self.datasets["blast_diamond_file"] = e["ldda_id"] - self.datasets_name["blast_diamond_file"] = e["name"] + self.datasets["blastp_file"] = e["ldda_id"] + self.datasets_name["blastp_file"] = e["name"] history_datasets_li = self.instance.datasets.get_datasets() - genome_hda_id, gff_hda_id, transcripts_hda_id, proteins_hda_id, blast_diamond_hda_id, interproscan_hda_id = None, None, None, None, None, None + genome_hda_id, gff_hda_id, transcripts_hda_id, proteins_hda_id, blastp_hda_id, interproscan_hda_id = None, None, None, None, None, None # Finding datasets in history (matching datasets names) for dataset in history_datasets_li: @@ -579,8 +643,8 @@ class RunWorkflow(speciesData.SpeciesData): transcripts_hda_id = dataset_id if dataset_name == "{0}_OGS{1}_proteins.fasta".format(self.dataset_prefix, self.ogs_version): proteins_hda_id = dataset_id - if dataset_name == "{0}_OGS{1}_blastx.xml".format(self.dataset_prefix, self.ogs_version): - blast_diamond_hda_id = dataset_id + if dataset_name == "{0}_OGS{1}_blastp.xml".format(self.dataset_prefix, self.ogs_version): + blastp_hda_id = dataset_id # Import each dataset into history if it is not imported @@ -604,27 +668,27 @@ class RunWorkflow(speciesData.SpeciesData): interproscan_hda_id = interproscan_dataset_upload["id"] except Exception as exc: logging.debug("Interproscan file not found in library (history: {0})".format(self.history_id)) - if blast_diamond_hda_id is None: + if blastp_hda_id is None: try: - blast_diamond_dataset_upload = self.instance.histories.upload_dataset_from_library(history_id=self.history_id, lib_dataset_id=self.datasets["blast_diamond_file"]) - blast_diamond_hda_id = blast_diamond_upload["id"] + blastp_dataset_upload = self.instance.histories.upload_dataset_from_library(history_id=self.history_id, lib_dataset_id=self.datasets["blastp_file"]) + blastp_hda_id = blastp_dataset_upload["id"] except Exception as exc: - logging.debug("Blastp file not found in library (history: {0})".format(self.history_id)) + logging.debug("blastp file not found in library (history: {0})".format(self.history_id)) - logging.debug("History dataset IDs (hda_id) for %s:" % self.full_name) - logging.debug({"genome_hda_id": genome_hda_id, - "gff_hda_id": gff_hda_id, - "transcripts_hda_id": transcripts_hda_id, - "proteins_hda_id": proteins_hda_id, - "blast_diamond_hda_id": blast_diamond_hda_id, - "interproscan_hda_id": interproscan_hda_id}) + # logging.debug("History dataset IDs (hda_id) for %s:" % self.full_name) + # logging.debug({"genome_hda_id": genome_hda_id, + # "gff_hda_id": gff_hda_id, + # "transcripts_hda_id": transcripts_hda_id, + # "proteins_hda_id": proteins_hda_id, + # "blastp_hda_id": blastp_hda_id, + # "interproscan_hda_id": interproscan_hda_id}) # Return a dict made of the hda ids return {"genome_hda_id": genome_hda_id, "gff_hda_id": gff_hda_id, "transcripts_hda_id": transcripts_hda_id, "proteins_hda_id": proteins_hda_id, - "blast_diamond_hda_id": blast_diamond_hda_id, + "blastp_hda_id": blastp_hda_id, "interproscan_hda_id": interproscan_hda_id} @@ -662,17 +726,16 @@ class RunWorkflow(speciesData.SpeciesData): if "interproscan_file" in self.datasets_name.keys(): if dataset_dict["name"] == self.datasets_name["interproscan_file"] and dataset_dict["id"] not in imported_datasets_ids: interproscan_dataset_hda_id = dataset_dict["id"] - if "blast_diamond_file" in self.datasets_name.keys(): - if dataset_dict["name"] == self.datasets_name["blast_diamond_file"] and dataset_dict["id"] not in imported_datasets_ids: - blast_diamond_dataset_hda_id = dataset_dict["id"] + if dataset_dict["name"] == self.datasets_name["blastp_file"] and dataset_dict["id"] not in imported_datasets_ids: + blastp_dataset_hda_id = dataset_dict["id"] logging.debug("Genome dataset hda id: %s" % genome_dataset_hda_id) logging.debug("Proteins dataset hda ID: %s" % proteins_datasets_hda_id) logging.debug("Transcripts dataset hda ID: %s" % transcripts_dataset_hda_id) logging.debug("GFF dataset hda ID: %s" % gff_dataset_hda_id) logging.debug("InterproScan dataset hda ID: %s" % gff_dataset_hda_id) - logging.debug("Blast Diamond dataset hda ID: %s" % gff_dataset_hda_id) + logging.debug("Blastp Diamond dataset hda ID: %s" % blastp_dataset_hda_id) # Add datasets IDs to already imported IDs (so we don't assign all the wrong IDs to the next organism if there is one) imported_datasets_ids.append(genome_dataset_hda_id) @@ -680,13 +743,13 @@ class RunWorkflow(speciesData.SpeciesData): imported_datasets_ids.append(proteins_datasets_hda_id) imported_datasets_ids.append(gff_dataset_hda_id) imported_datasets_ids.append(interproscan_dataset_hda_id) - imported_datasets_ids.append(blast_diamond_dataset_hda_id) + imported_datasets_ids.append(blastp_dataset_hda_id) # Return a dict made of the hda ids return {"genome_hda_id": genome_dataset_hda_id, "transcripts_hda_id": transcripts_dataset_hda_id, "proteins_hda_id": proteins_datasets_hda_id, "gff_hda_id": gff_dataset_hda_id, "interproscan_hda_id": interproscan_dataset_hda_id, - "blast_diamond_hda_id": blast_diamond_dataset_hda_id, + "blastp_hda_id": blastp_dataset_hda_id, "imported_datasets_ids": imported_datasets_ids} @@ -748,7 +811,7 @@ def run_workflow(workflow_path, workflow_parameters, datamap, config, input_spec -def create_sp_workflow_dict(sp_dict, main_dir, config): +def create_sp_workflow_dict(sp_dict, main_dir, config, workflow_type): """ """ @@ -778,19 +841,65 @@ def create_sp_workflow_dict(sp_dict, main_dir, config): run_workflow_for_current_organism.species) + if workflow_type == "load_fasta_gff_jbrowse": + run_workflow_for_current_organism.connect_to_instance() + + history_id = run_workflow_for_current_organism.set_get_history() + + run_workflow_for_current_organism.install_changesets_revisions_for_individual_tools() + ids = run_workflow_for_current_organism.add_organism_ogs_genome_analyses() + + org_id = None + genome_analysis_id = None + ogs_analysis_id = None + org_id = ids["org_id"] + genome_analysis_id = ids["genome_analysis_id"] + ogs_analysis_id = ids["ogs_analysis_id"] + instance_attributes = run_workflow_for_current_organism.get_instance_attributes() + hda_ids = run_workflow_for_current_organism.import_datasets_into_history() + + strain_sex = "{0}_{1}".format(run_workflow_for_current_organism.strain, run_workflow_for_current_organism.sex) + genus_species = run_workflow_for_current_organism.genus_species + + # Create the dictionary holding all attributes needed to connect to the galaxy instance + attributes = {"genus": run_workflow_for_current_organism.genus, + "species": run_workflow_for_current_organism.species, + "genus_species": run_workflow_for_current_organism.genus_species, + "full_name": run_workflow_for_current_organism.full_name, + "species_folder_name": run_workflow_for_current_organism.species_folder_name, + "sex": run_workflow_for_current_organism.sex, + "strain": run_workflow_for_current_organism.strain, + "org_id": org_id, + "genome_analysis_id": genome_analysis_id, + "ogs_analysis_id": ogs_analysis_id, + "instance_attributes": instance_attributes, + "hda_ids": hda_ids, + "history_id": history_id, + "instance": run_workflow_for_current_organism.instance, + "instance_url": run_workflow_for_current_organism.instance_url, + "email": config["galaxy_default_admin_email"], + "password": config["galaxy_default_admin_password"]} + + sp_workflow_dict[genus_species] = {strain_sex: attributes} + + else: + logging.critical("The galaxy container for %s is not ready yet!" % run_workflow_for_current_organism.full_name) + sys.exit() + + return sp_workflow_dict + + if workflow_type == "blast": run_workflow_for_current_organism.connect_to_instance() history_id = run_workflow_for_current_organism.set_get_history() run_workflow_for_current_organism.install_changesets_revisions_for_individual_tools() - ids = run_workflow_for_current_organism.add_organism_ogs_genome_analyses() + ids = run_workflow_for_current_organism.add_organism_blastp_analysis() org_id = None - genome_analysis_id = None - ogs_analysis_id = None org_id = ids["org_id"] - genome_analysis_id = ids["genome_analysis_id"] - ogs_analysis_id = ids["ogs_analysis_id"] + blastp_analysis_id = None + blastp_analysis_id = ids["blastp_analysis_id"] instance_attributes = run_workflow_for_current_organism.get_instance_attributes() hda_ids = run_workflow_for_current_organism.import_datasets_into_history() @@ -806,8 +915,7 @@ def create_sp_workflow_dict(sp_dict, main_dir, config): "sex": run_workflow_for_current_organism.sex, "strain": run_workflow_for_current_organism.strain, "org_id": org_id, - "genome_analysis_id": genome_analysis_id, - "ogs_analysis_id": ogs_analysis_id, + "blastp_analysis_id": blastp_analysis_id, "instance_attributes": instance_attributes, "hda_ids": hda_ids, "history_id": history_id, @@ -823,8 +931,6 @@ def create_sp_workflow_dict(sp_dict, main_dir, config): sys.exit() - return sp_workflow_dict - def install_changesets_revisions_from_workflow(instance, workflow_path): """ @@ -900,7 +1006,7 @@ if __name__ == "__main__": parser.add_argument("--workflow", "-w", type=str, - help="Worfklow to run") + help="Worfklow to run. Available options: load_fasta_gff_jbrowse, blast, interpro") args = parser.parse_args() @@ -924,44 +1030,452 @@ if __name__ == "__main__": sp_dict_list = utilities.parse_input(args.input) - # # Checking if user specified a workflow to run - # if not args.workflow: - # logging.critical("No workflow specified, exiting") - # sys.exit() - # else: - # workflow = os.path.abspath(args.workflow) + workflow_valid_types = ["load_fasta_gff_jbrowse", "blast", "interpro"] + + workflow_type = None + # Checking if user specified a workflow to run + if not args.workflow: + logging.critical("No workflow type specified, exiting") + sys.exit() + elif args.workflow in workflow_valid_types: + workflow_type = args.workflow + logging.info("Workflow type set to %s" % workflow_type) script_dir = os.path.dirname(os.path.realpath(sys.argv[0])) config = utilities.parse_config(args.config) all_sp_workflow_dict = {} - for sp_dict in sp_dict_list: - # Add and retrieve all analyses/organisms for the current input species and add their IDs to the input dictionary - current_sp_workflow_dict = create_sp_workflow_dict(sp_dict, main_dir=args.main_directory, config=config) + if workflow_type == "load_fasta_gff_jbrowse": + for sp_dict in sp_dict_list: - current_sp_key = list(current_sp_workflow_dict.keys())[0] - current_sp_value = list(current_sp_workflow_dict.values())[0] - current_sp_strain_sex_key = list(current_sp_value.keys())[0] - current_sp_strain_sex_value = list(current_sp_value.values())[0] + # Add and retrieve all analyses/organisms for the current input species and add their IDs to the input dictionary + current_sp_workflow_dict = create_sp_workflow_dict(sp_dict, main_dir=args.main_directory, config=config, workfow_type="load_fasta_gff_jbrowse") - # Add the species dictionary to the complete dictionary - # This dictionary contains every organism present in the input file - # Its structure is the following: - # {genus species: {strain1_sex1: {variables_key: variables_values}, strain1_sex2: {variables_key: variables_values}}} - if not current_sp_key in all_sp_workflow_dict.keys(): - all_sp_workflow_dict[current_sp_key] = current_sp_value - else: - all_sp_workflow_dict[current_sp_key][current_sp_strain_sex_key] = current_sp_strain_sex_value + current_sp_key = list(current_sp_workflow_dict.keys())[0] + current_sp_value = list(current_sp_workflow_dict.values())[0] + current_sp_strain_sex_key = list(current_sp_value.keys())[0] + current_sp_strain_sex_value = list(current_sp_value.values())[0] + + # Add the species dictionary to the complete dictionary + # This dictionary contains every organism present in the input file + # Its structure is the following: + # {genus species: {strain1_sex1: {variables_key: variables_values}, strain1_sex2: {variables_key: variables_values}}} + if not current_sp_key in all_sp_workflow_dict.keys(): + all_sp_workflow_dict[current_sp_key] = current_sp_value + else: + all_sp_workflow_dict[current_sp_key][current_sp_strain_sex_key] = current_sp_strain_sex_value + + + for k, v in all_sp_workflow_dict.items(): + if len(list(v.keys())) == 1: + logging.info("Input organism %s: 1 species detected in input dictionary" % k) + + # Set workflow path (1 organism) + workflow_path = os.path.join(os.path.abspath(script_dir), "workflows_phaeoexplorer/Galaxy-Workflow-chado_load_tripal_synchronize_jbrowse_1org_v4.ga") + + # Instance object required variables + instance_url, email, password = None, None, None + + # Set the galaxy instance variables + for k2, v2 in v.items(): + instance_url = v2["instance_url"] + email = v2["email"] + password = v2["password"] + + instance = galaxy.GalaxyInstance(url=instance_url, email=email, password=password) + + # Check if the versions of tools specified in the workflow are installed in galaxy + install_changesets_revisions_from_workflow(workflow_path=workflow_path, instance=instance) + + organism_key_name = list(v.keys()) + org_dict = v[organisms_key_names[0]] + + history_id = org_dict["history_id"] + + # Organism 1 attributes + org_genus = org_dict["genus"] + org_species = org_dict["species"] + org_genus_species = org_dict["genus_species"] + org_species_folder_name = org_dict["species_folder_name"] + org_full_name = org_dict["full_name"] + org_strain = org_dict["sex"] + org_sex = org_dict["strain"] + org_org_id = org_dict["org_id"] + org_genome_analysis_id = org_dict["genome_analysis_id"] + org_ogs_analysis_id = org_dict["ogs_analysis_id"] + org_genome_hda_id = org_dict["hda_ids"]["genome_hda_id"] + org_transcripts_hda_id = org_dict["hda_ids"]["transcripts_hda_id"] + org_proteins_hda_id = org_dict["hda_ids"]["proteins_hda_id"] + org_gff_hda_id = org_dict["hda_ids"]["gff_hda_id"] + + # Store these values into a dict for parameters logging/validation + org_parameters_dict = { + "org_genus": org_genus, + "org_species": org_species, + "org_genus_species": org_genus_species, + "org_species_folder_name": org_species_folder_name, + "org_full_name": org_full_name, + "org_strain": org_strain, + "org_sex": org_sex, + "org_org_id": org_org_id, + "org_genome_analysis_id": org_genome_analysis_id, + "org_ogs_analysis_id": org_ogs_analysis_id, + "org_genome_hda_id": org_genome_hda_id, + "org_transcripts_hda_id": org_transcripts_hda_id, + "org_proteins_hda_id": org_proteins_hda_id, + "org_gff_hda_id": org_gff_hda_id, + } + + # Look for empty parameters values, throw a critical error if a parameter value is invalid + for param_name, param_value in org_parameters_dict.items(): + if param_value is None or param_value == "": + logging.critical("Empty parameter value found for organism {0} (parameter: {1}, parameter value: {2})".format(org_full_name, param_name, param_value)) + sys.exit() + + # Set the workflow parameters (individual tools runtime parameters in the workflow) + workflow_parameters = {} + + GENOME_FASTA_FILE_ORG = "0" + GFF_FILE_ORG = "1" + PROTEINS_FASTA_FILE_ORG = "2" + LOAD_FASTA_ORG = "3" + JBROWSE_ORG = "4" + LOAD_GFF_ORG = "5" + JBROWSE_CONTAINER = "6" + SYNC_FEATURES_ORG = "7" + POPULATE_MAT_VIEWS = "8" + INDEX_TRIPAL_DATA = "9" + + # Input files have no parameters (they are set via assigning the hda IDs in the datamap parameter of the bioblend method) + workflow_parameters[GENOME_FASTA_FILE_ORG] = {} + workflow_parameters[GFF_FILE_ORG] = {} + workflow_parameters[PROTEINS_FASTA_FILE_ORG] = {} + workflow_parameters[LOAD_FASTA_ORG] = {"organism": org_org_id, + "analysis_id": org_genome_analysis_id, + "do_update": "true"} + workflow_parameters[JBROWSE_ORG] = {} + workflow_parameters[LOAD_GFF_ORG] = {"organism": org_org_id, "analysis_id": org_ogs_analysis_id} + workflow_parameters[SYNC_FEATURES_ORG] = {"organism_id": org_org_id} + # POPULATE + INDEX DATA + workflow_parameters[POPULATE_MAT_VIEWS] = {} + workflow_parameters[INDEX_TRIPAL_DATA] = {} + + # Set datamap (mapping of input files in the workflow) + datamap = {} + + datamap[GENOME_FASTA_FILE_ORG] = {"src": "hda", "id": org_genome_hda_id} + datamap[GFF_FILE_ORG] = {"src": "hda", "id": org_gff_hda_id} + datamap[PROTEINS_FASTA_FILE_ORG] = {"src": "hda", "id": org_proteins_hda_id} + + + with open(workflow_path, 'r') as ga_in_file: + # Store the decoded json dictionary + workflow_dict = json.load(ga_in_file) + workflow_name = workflow_dict["name"] + + # For the Jbrowse tool, we unfortunately have to manually edit the parameters instead of setting them + # as runtime values, using runtime parameters makes the tool throw an internal critical error ("replace not found" error) + # Scratchgmod test: need "http" (or "https"), the hostname (+ port) + jbrowse_menu_url_org = "https://{hostname}/sp/{genus_sp}/feature/{Genus}/{species}/mRNA/{id}".format(hostname=config["hostname"], genus_sp=org_genus_species, Genus=org_genus[0].upper() + org_genus[1:], species=org_species, id="{id}") + if "menu_url" not in config.keys(): + jbrowse_menu_url_org = "https://{hostname}/sp/{genus_sp}/feature/{Genus}/{species}/mRNA/{id}".format(hostname=config["hostname"], genus_sp=org_genus_species, Genus=org_genus[0].upper() + org_genus[1:], species=org_species, id="{id}") + else: + jbrowse_menu_url_org = config["menu_url"] + "/sp/{genus_sp}/feature/{Genus}/{species}/mRNA/{id}".format(genus_sp=org_genus_species, Genus=org_genus[0].upper() + org_genus[1:], species=org_species, id="{id}") + + # show_tool_add_organism = instance.tools.show_tool(tool_id="toolshed.g2.bx.psu.edu/repos/gga/chado_organism_add_organism/organism_add_organism/2.3.4+galaxy0", io_details=True) + # print(show_tool_add_organism) + # show_jbrowse_tool = instance.tools.show_tool(tool_id="toolshed.g2.bx.psu.edu/repos/iuc/jbrowse/jbrowse/1.16.11+galaxy0", io_details=True) + # print(show_jbrowse_tool) + # show_jbrowse_container_tool = instance.tools.show_tool(tool_id="toolshed.g2.bx.psu.edu/repos/gga/jbrowse_to_container/jbrowse_to_container/0.5.1", io_details=True) + # print(show_jbrowse_container_tool) + + # Replace values in the workflow dictionary + workflow_dict["steps"]["4"]["tool_state"] = workflow_dict["steps"]["4"]["tool_state"].replace("__MENU_URL_ORG__", jbrowse_menu_url_org) + workflow_dict["steps"]["6"]["tool_state"] = workflow_dict["steps"]["6"]["tool_state"].replace("__DISPLAY_NAME_ORG__", org_full_name).replace("__UNIQUE_ID_ORG__", org_species_folder_name) + + # Import the workflow in galaxy as a dict + instance.workflows.import_workflow_dict(workflow_dict=workflow_dict) + + # Get its attributes + workflow_attributes = instance.workflows.get_workflows(name=workflow_name) + # Then get its ID (required to invoke the workflow) + workflow_id = workflow_attributes[0]["id"] # Index 0 is the most recently imported workflow (the one we want) + show_workflow = instance.workflows.show_workflow(workflow_id=workflow_id) + # Check if the workflow is found + try: + logging.debug("Workflow ID: %s" % workflow_id) + except bioblend.ConnectionError: + logging.warning("Error finding workflow %s" % workflow_name) + + # Finally, invoke the workflow alogn with its datamap, parameters and the history in which to invoke it + instance.workflows.invoke_workflow(workflow_id=workflow_id, history_id=history_id, params=workflow_parameters, inputs=datamap, allow_tool_state_corrections=True) + + logging.info("Successfully imported and invoked workflow {0}, check the galaxy instance ({1}) for the jobs state".format(workflow_name, instance_url)) + + + if len(list(v.keys())) == 2: + + logging.info("Input organism %s: 2 species detected in input dictionary" % k) + + # Set workflow path (2 organisms) + workflow_path = os.path.join(os.path.abspath(script_dir), "workflows_phaeoexplorer/Galaxy-Workflow-chado_load_tripal_synchronize_jbrowse_2org_v4.ga") + + # Instance object required variables + instance_url, email, password = None, None, None + + # Set the galaxy instance variables + for k2, v2 in v.items(): + instance_url = v2["instance_url"] + email = v2["email"] + password = v2["password"] + + instance = galaxy.GalaxyInstance(url=instance_url, email=email, password=password) + + # Check if the versions of tools specified in the workflow are installed in galaxy + install_changesets_revisions_from_workflow(workflow_path=workflow_path, instance=instance) + + # Get key names from the current organism (item 1 = organism 1, item 2 = organism 2) + organisms_key_names = list(v.keys()) + org1_dict = v[organisms_key_names[0]] + org2_dict = v[organisms_key_names[1]] + + history_id = org1_dict["history_id"] + + # Organism 1 attributes + org1_genus = org1_dict["genus"] + org1_species = org1_dict["species"] + org1_genus_species = org1_dict["genus_species"] + org1_species_folder_name = org1_dict["species_folder_name"] + org1_full_name = org1_dict["full_name"] + org1_strain = org1_dict["sex"] + org1_sex = org1_dict["strain"] + org1_org_id = org1_dict["org_id"] + org1_genome_analysis_id = org1_dict["genome_analysis_id"] + org1_ogs_analysis_id = org1_dict["ogs_analysis_id"] + org1_genome_hda_id = org1_dict["hda_ids"]["genome_hda_id"] + org1_transcripts_hda_id = org1_dict["hda_ids"]["transcripts_hda_id"] + org1_proteins_hda_id = org1_dict["hda_ids"]["proteins_hda_id"] + org1_gff_hda_id = org1_dict["hda_ids"]["gff_hda_id"] + + # Store these values into a dict for parameters logging/validation + org1_parameters_dict = { + "org1_genus": org1_genus, + "org1_species": org1_species, + "org1_genus_species": org1_genus_species, + "org1_species_folder_name": org1_species_folder_name, + "org1_full_name": org1_full_name, + "org1_strain": org1_strain, + "org1_sex": org1_sex, + "org1_org_id": org1_org_id, + "org1_genome_analysis_id": org1_genome_analysis_id, + "org1_ogs_analysis_id": org1_ogs_analysis_id, + "org1_genome_hda_id": org1_genome_hda_id, + "org1_transcripts_hda_id": org1_transcripts_hda_id, + "org1_proteins_hda_id": org1_proteins_hda_id, + "org1_gff_hda_id": org1_gff_hda_id, + } + + # Look for empty parameters values, throw a critical error if a parameter value is invalid + for param_name, param_value in org1_parameters_dict.items(): + if param_value is None or param_value == "": + logging.critical("Empty parameter value found for organism {0} (parameter: {1}, parameter value: {2})".format(org1_full_name, param_name, param_value)) + sys.exit() + + # Organism 2 attributes + org2_genus = org2_dict["genus"] + org2_species = org2_dict["species"] + org2_genus_species = org2_dict["genus_species"] + org2_species_folder_name = org2_dict["species_folder_name"] + org2_full_name = org2_dict["full_name"] + org2_strain = org2_dict["sex"] + org2_sex = org2_dict["strain"] + org2_org_id = org2_dict["org_id"] + org2_genome_analysis_id = org2_dict["genome_analysis_id"] + org2_ogs_analysis_id = org2_dict["ogs_analysis_id"] + org2_genome_hda_id = org2_dict["hda_ids"]["genome_hda_id"] + org2_transcripts_hda_id = org2_dict["hda_ids"]["transcripts_hda_id"] + org2_proteins_hda_id = org2_dict["hda_ids"]["proteins_hda_id"] + org2_gff_hda_id = org2_dict["hda_ids"]["gff_hda_id"] + + # Store these values into a dict for parameters logging/validation + org2_parameters_dict = { + "org2_genus": org2_genus, + "org2_species": org2_species, + "org2_genus_species": org2_genus_species, + "org2_species_folder_name": org2_species_folder_name, + "org2_full_name": org2_full_name, + "org2_strain": org2_strain, + "org2_sex": org2_sex, + "org2_org_id": org2_org_id, + "org2_genome_analysis_id": org2_genome_analysis_id, + "org2_ogs_analysis_id": org2_ogs_analysis_id, + "org2_genome_hda_id": org2_genome_hda_id, + "org2_transcripts_hda_id": org2_transcripts_hda_id, + "org2_proteins_hda_id": org2_proteins_hda_id, + "org2_gff_hda_id": org2_gff_hda_id, + } + + # Look for empty parameters values, throw a critical error if a parameter value is invalid + for param_name, param_value in org2_parameters_dict.items(): + if param_value is None or param_value == "": + logging.critical("Empty parameter value found for organism {0} (parameter: {1}, parameter value: {2})".format(org2_full_name, param_name, param_value)) + sys.exit() + + # Source files association (ordered by their IDs in the workflow) + # WARNING: Be very careful about how the workflow is "organized" (i.e the order of the steps/datasets, check the .ga if there is any error) + GFF_FILE_ORG1 = "0" + GENOME_FASTA_FILE_ORG1 = "1" + PROTEINS_FASTA_FILE_ORG1 = "2" + + GENOME_FASTA_FILE_ORG2 = "3" + GFF_FILE_ORG2 = "4" + PROTEINS_FASTA_FILE_ORG2 = "5" + + LOAD_FASTA_ORG1 = "6" + JBROWSE_ORG1 = "7" + JRBOWSE_ORG2 = "8" + + LOAD_GFF_ORG1 = "9" + JBROWSE_CONTAINER = "10" + SYNC_FEATURES_ORG1 = "11" + + LOAD_FASTA_ORG2 = "12" + LOAD_GFF_ORG2 = "13" + + SYNC_FEATURES_ORG2 = "14" + POPULATE_MAT_VIEWS = "15" + INDEX_TRIPAL_DATA = "16" + + # Set the workflow parameters (individual tools runtime parameters in the workflow) + workflow_parameters = {} + + # Input files have no parameters (they are set via assigning the hda IDs in the datamap parameter of the bioblend method) + workflow_parameters[GENOME_FASTA_FILE_ORG1] = {} + workflow_parameters[GFF_FILE_ORG1] = {} + workflow_parameters[PROTEINS_FASTA_FILE_ORG1] = {} + workflow_parameters[GENOME_FASTA_FILE_ORG2] = {} + workflow_parameters[GFF_FILE_ORG2] = {} + workflow_parameters[PROTEINS_FASTA_FILE_ORG2] = {} + + # Organism 1 + workflow_parameters[LOAD_FASTA_ORG1] = {"organism": org1_org_id, + "analysis_id": org1_genome_analysis_id, + "do_update": "true"} + # workflow_parameters[JBROWSE_ORG1] = {"menu_url": jbrowse_menu_url_org1} + workflow_parameters[JBROWSE_ORG1] = {} + workflow_parameters[LOAD_GFF_ORG1] = {"organism": org1_org_id, "analysis_id": org1_ogs_analysis_id} + workflow_parameters[SYNC_FEATURES_ORG1] = {"organism_id": org1_org_id} + # workflow_parameters[JBROWSE_CONTAINER] = {"organisms": [{"name": org1_full_name, "unique_id": org1_species_folder_name, }, {"name": org2_full_name, "unique_id": org2_species_folder_name}]} + workflow_parameters[JBROWSE_CONTAINER] = {} + + # Organism 2 + workflow_parameters[LOAD_FASTA_ORG2] = {"organism": org2_org_id, + "analysis_id": org2_genome_analysis_id, + "do_update": "true"} + workflow_parameters[LOAD_GFF_ORG2] = {"organism": org2_org_id, "analysis_id": org2_ogs_analysis_id} + # workflow_parameters[JRBOWSE_ORG2] = {"menu_url": jbrowse_menu_url_org2} + workflow_parameters[JRBOWSE_ORG2] = {} + workflow_parameters[SYNC_FEATURES_ORG2] = {"organism_id": org2_org_id} + + + # POPULATE + INDEX DATA + workflow_parameters[POPULATE_MAT_VIEWS] = {} + workflow_parameters[INDEX_TRIPAL_DATA] = {} + + + # Set datamap (mapping of input files in the workflow) + datamap = {} + + # Organism 1 + datamap[GENOME_FASTA_FILE_ORG1] = {"src": "hda", "id": org1_genome_hda_id} + datamap[GFF_FILE_ORG1] = {"src": "hda", "id": org1_gff_hda_id} + datamap[PROTEINS_FASTA_FILE_ORG1] = {"src": "hda", "id": org1_proteins_hda_id} + + # Organism 2 + datamap[GENOME_FASTA_FILE_ORG2] = {"src": "hda", "id": org2_genome_hda_id} + datamap[GFF_FILE_ORG2] = {"src": "hda", "id": org2_gff_hda_id} + datamap[PROTEINS_FASTA_FILE_ORG2] = {"src": "hda", "id": org2_proteins_hda_id} + + with open(workflow_path, 'r') as ga_in_file: + + # Store the decoded json dictionary + workflow_dict = json.load(ga_in_file) + workflow_name = workflow_dict["name"] + + # For the Jbrowse tool, we unfortunately have to manually edit the parameters instead of setting them + # as runtime values, using runtime parameters makes the tool throw an internal critical error ("replace not found" error) + # Scratchgmod test: need "http" (or "https"), the hostname (+ port) + jbrowse_menu_url_org1 = "https://{hostname}/sp/{genus_sp}/feature/{Genus}/{species}/mRNA/{id}".format(hostname=config["hostname"], genus_sp=org1_genus_species, Genus=org1_genus[0].upper() + org1_genus[1:], species=org1_species, id="{id}") + jbrowse_menu_url_org2 = "https://{hostname}/sp/{genus_sp}/feature/{Genus}/{species}/mRNA/{id}".format(hostname=config["hostname"], genus_sp=org2_genus_species, Genus=org2_genus[0].upper() + org2_genus[1:], species=org2_species, id="{id}") + if "menu_url" not in config.keys(): + jbrowse_menu_url_org1 = "https://{hostname}/sp/{genus_sp}/feature/{Genus}/{species}/mRNA/{id}".format(hostname=config["hostname"], genus_sp=org1_genus_species, Genus=org1_genus[0].upper() + org1_genus[1:], species=org1_species, id="{id}") + jbrowse_menu_url_org2 = "https://{hostname}/sp/{genus_sp}/feature/{Genus}/{species}/mRNA/{id}".format(hostname=config["hostname"], genus_sp=org2_genus_species, Genus=org2_genus[0].upper() + org2_genus[1:], species=org2_species, id="{id}") + else: + jbrowse_menu_url_org1 = config["menu_url"] + "/sp/{genus_sp}/feature/{Genus}/{species}/mRNA/{id}".format(genus_sp=org1_genus_species, Genus=org1_genus[0].upper() + org1_genus[1:], species=org1_species, id="{id}") + jbrowse_menu_url_org2 = config["menu_url"] + "/sp/{genus_sp}/feature/{Genus}/{species}/mRNA/{id}".format(genus_sp=org2_genus_species, Genus=org2_genus[0].upper() + org2_genus[1:], species=org2_species, id="{id}") + + # show_tool_add_organism = instance.tools.show_tool(tool_id="toolshed.g2.bx.psu.edu/repos/gga/chado_organism_add_organism/organism_add_organism/2.3.4+galaxy0", io_details=True) + # print(show_tool_add_organism) + # show_jbrowse_tool = instance.tools.show_tool(tool_id="toolshed.g2.bx.psu.edu/repos/iuc/jbrowse/jbrowse/1.16.11+galaxy0", io_details=True) + # print(show_jbrowse_tool) + # show_jbrowse_container_tool = instance.tools.show_tool(tool_id="toolshed.g2.bx.psu.edu/repos/gga/jbrowse_to_container/jbrowse_to_container/0.5.1", io_details=True) + # print(show_jbrowse_container_tool) + + # Replace values in the workflow dictionary + workflow_dict["steps"]["7"]["tool_state"] = workflow_dict["steps"]["7"]["tool_state"].replace("__MENU_URL_ORG1__", jbrowse_menu_url_org1) + workflow_dict["steps"]["8"]["tool_state"] = workflow_dict["steps"]["8"]["tool_state"].replace("__MENU_URL_ORG2__", jbrowse_menu_url_org2) + # The UNIQUE_ID is specific to a combination genus_species_strain_sex so every combination should have its unique workflow + # in galaxy --> define a naming method for these workflows + workflow_dict["steps"]["10"]["tool_state"] = workflow_dict["steps"]["10"]["tool_state"].replace("__DISPLAY_NAME_ORG1__", org1_full_name).replace("__UNIQUE_ID_ORG1__", org1_species_folder_name) + workflow_dict["steps"]["10"]["tool_state"] = workflow_dict["steps"]["10"]["tool_state"].replace("__DISPLAY_NAME_ORG2__", org2_full_name).replace("__UNIQUE_ID_ORG2__", org2_species_folder_name) + + # Import the workflow in galaxy as a dict + instance.workflows.import_workflow_dict(workflow_dict=workflow_dict) + + # Get its attributes + workflow_attributes = instance.workflows.get_workflows(name=workflow_name) + # Then get its ID (required to invoke the workflow) + workflow_id = workflow_attributes[0]["id"] # Index 0 is the most recently imported workflow (the one we want) + show_workflow = instance.workflows.show_workflow(workflow_id=workflow_id) + # Check if the workflow is found + try: + logging.debug("Workflow ID: %s" % workflow_id) + except bioblend.ConnectionError: + logging.warning("Error finding workflow %s" % workflow_name) + + # Finally, invoke the workflow alogn with its datamap, parameters and the history in which to invoke it + instance.workflows.invoke_workflow(workflow_id=workflow_id, history_id=history_id, params=workflow_parameters, inputs=datamap, allow_tool_state_corrections=True) + + logging.info("Successfully imported and invoked workflow {0}, check the galaxy instance ({1}) for the jobs state".format(workflow_name, instance_url)) + + if workflow_type == "blast": + for sp_dict in sp_dict_list: + + # Add and retrieve all analyses/organisms for the current input species and add their IDs to the input dictionary + current_sp_workflow_dict = create_sp_workflow_dict(sp_dict, main_dir=args.main_directory, config=config, workfow_type="blast") + + current_sp_key = list(current_sp_workflow_dict.keys())[0] + current_sp_value = list(current_sp_workflow_dict.values())[0] + current_sp_strain_sex_key = list(current_sp_value.keys())[0] + current_sp_strain_sex_value = list(current_sp_value.values())[0] + + # Add the species dictionary to the complete dictionary + # This dictionary contains every organism present in the input file + # Its structure is the following: + # {genus species: {strain1_sex1: {variables_key: variables_values}, strain1_sex2: {variables_key: variables_values}}} + if not current_sp_key in all_sp_workflow_dict.keys(): + all_sp_workflow_dict[current_sp_key] = current_sp_value + else: + all_sp_workflow_dict[current_sp_key][current_sp_strain_sex_key] = current_sp_strain_sex_value - for k, v in all_sp_workflow_dict.items(): if len(list(v.keys())) == 1: logging.info("Input organism %s: 1 species detected in input dictionary" % k) # Set workflow path (1 organism) - workflow_path = os.path.join(os.path.abspath(script_dir), "workflows_phaeoexplorer/Galaxy-Workflow-chado_load_tripal_synchronize_jbrowse_1org_v4.ga") + workflow_path = os.path.join(os.path.abspath(script_dir), "workflows_phaeoexplorer/Galaxy-Workflow-load_blast_results_1org_v1.ga") # Instance object required variables instance_url, email, password = None, None, None @@ -982,7 +1496,7 @@ if __name__ == "__main__": history_id = org_dict["history_id"] - # Organism 1 attributes + # Organism attributes org_genus = org_dict["genus"] org_species = org_dict["species"] org_genus_species = org_dict["genus_species"] @@ -991,12 +1505,8 @@ if __name__ == "__main__": org_strain = org_dict["sex"] org_sex = org_dict["strain"] org_org_id = org_dict["org_id"] - org_genome_analysis_id = org_dict["genome_analysis_id"] - org_ogs_analysis_id = org_dict["ogs_analysis_id"] - org_genome_hda_id = org_dict["hda_ids"]["genome_hda_id"] - org_transcripts_hda_id = org_dict["hda_ids"]["transcripts_hda_id"] - org_proteins_hda_id = org_dict["hda_ids"]["proteins_hda_id"] - org_gff_hda_id = org_dict["hda_ids"]["gff_hda_id"] + org_blastp_analysis_id = org_dict["blastp_analysis_id"] + org_blastp_hda_id = org_dict["hda_ids"]["blastp_hda_id"] # Store these values into a dict for parameters logging/validation org_parameters_dict = { @@ -1008,12 +1518,8 @@ if __name__ == "__main__": "org_strain": org_strain, "org_sex": org_sex, "org_org_id": org_org_id, - "org_genome_analysis_id": org_genome_analysis_id, - "org_ogs_analysis_id": org_ogs_analysis_id, - "org_genome_hda_id": org_genome_hda_id, - "org_transcripts_hda_id": org_transcripts_hda_id, - "org_proteins_hda_id": org_proteins_hda_id, - "org_gff_hda_id": org_gff_hda_id, + "org_blast_analysis_id": org_blastp_analysis_id, + "org_blastp_hda_id": org_blastp_hda_id, } # Look for empty parameters values, throw a critical error if a parameter value is invalid @@ -1022,71 +1528,30 @@ if __name__ == "__main__": logging.critical("Empty parameter value found for organism {0} (parameter: {1}, parameter value: {2})".format(org_full_name, param_name, param_value)) sys.exit() + BLASTP_FILE = "0" + LOAD_BLASTP_FILE = "1" + SYNC_BLASTP_ANALYSIS = "2" + POPULATE_MAT_VIEWS = "3" + INDEX_TRIPAL_DATA = "4" + # Set the workflow parameters (individual tools runtime parameters in the workflow) workflow_parameters = {} - - GENOME_FASTA_FILE_ORG = "0" - GFF_FILE_ORG = "1" - PROTEINS_FASTA_FILE_ORG = "2" - LOAD_FASTA_ORG = "3" - JBROWSE_ORG = "4" - LOAD_GFF_ORG = "5" - JBROWSE_CONTAINER = "6" - SYNC_FEATURES_ORG = "7" - POPULATE_MAT_VIEWS = "8" - INDEX_TRIPAL_DATA = "9" - - # Input files have no parameters (they are set via assigning the hda IDs in the datamap parameter of the bioblend method) - workflow_parameters[GENOME_FASTA_FILE_ORG] = {} - workflow_parameters[GFF_FILE_ORG] = {} - workflow_parameters[PROTEINS_FASTA_FILE_ORG] = {} - workflow_parameters[LOAD_FASTA_ORG] = {"organism": org_org_id, - "analysis_id": org_genome_analysis_id, - "do_update": "true"} - workflow_parameters[JBROWSE_ORG] = {} - workflow_parameters[LOAD_GFF_ORG] = {"organism": org_org_id, "analysis_id": org_ogs_analysis_id} - workflow_parameters[SYNC_FEATURES_ORG] = {"organism_id": org_org_id} - # POPULATE + INDEX DATA + workflow_parameters[BLASTP_FILE] = {} + workflow_parameters[LOAD_BLASTP_FILE] = {"analysis_id": org_blastp_analysis_id, "organism_id": org_org_id} + workflow_parameters[SYNC_BLASTP_ANALYSIS] = {"analysis_id": org_blastp_analysis_id} workflow_parameters[POPULATE_MAT_VIEWS] = {} workflow_parameters[INDEX_TRIPAL_DATA] = {} - # Set datamap (mapping of input files in the workflow) datamap = {} - - datamap[GENOME_FASTA_FILE_ORG] = {"src": "hda", "id": org_genome_hda_id} - datamap[GFF_FILE_ORG] = {"src": "hda", "id": org_gff_hda_id} - datamap[PROTEINS_FASTA_FILE_ORG] = {"src": "hda", "id": org_proteins_hda_id} - + datamap[BLASTP_FILE] = {"src": "hda", "id": org_blastp_hda_id} with open(workflow_path, 'r') as ga_in_file: - # Store the decoded json dictionary workflow_dict = json.load(ga_in_file) workflow_name = workflow_dict["name"] - # For the Jbrowse tool, we unfortunately have to manually edit the parameters instead of setting them - # as runtime values, using runtime parameters makes the tool throw an internal critical error ("replace not found" error) - # Scratchgmod test: need "http" (or "https"), the hostname (+ port) - jbrowse_menu_url_org = "https://{hostname}/sp/{genus_sp}/feature/{Genus}/{species}/mRNA/{id}".format(hostname=config["hostname"], genus_sp=org_genus_species, Genus=org_genus[0].upper() + org_genus[1:], species=org_species, id="{id}") - if "menu_url" not in config.keys(): - jbrowse_menu_url_org = "https://{hostname}/sp/{genus_sp}/feature/{Genus}/{species}/mRNA/{id}".format(hostname=config["hostname"], genus_sp=org_genus_species, Genus=org_genus[0].upper() + org_genus[1:], species=org_species, id="{id}") - else: - jbrowse_menu_url_org = config["menu_url"] + "/sp/{genus_sp}/feature/{Genus}/{species}/mRNA/{id}".format(genus_sp=org_genus_species, Genus=org_genus[0].upper() + org_genus[1:], species=org_species, id="{id}") - - # show_tool_add_organism = instance.tools.show_tool(tool_id="toolshed.g2.bx.psu.edu/repos/gga/chado_organism_add_organism/organism_add_organism/2.3.4+galaxy0", io_details=True) - # print(show_tool_add_organism) - # show_jbrowse_tool = instance.tools.show_tool(tool_id="toolshed.g2.bx.psu.edu/repos/iuc/jbrowse/jbrowse/1.16.11+galaxy0", io_details=True) - # print(show_jbrowse_tool) - # show_jbrowse_container_tool = instance.tools.show_tool(tool_id="toolshed.g2.bx.psu.edu/repos/gga/jbrowse_to_container/jbrowse_to_container/0.5.1", io_details=True) - # print(show_jbrowse_container_tool) - - # Replace values in the workflow dictionary - workflow_dict["steps"]["4"]["tool_state"] = workflow_dict["steps"]["4"]["tool_state"].replace("__MENU_URL_ORG__", jbrowse_menu_url_org) - workflow_dict["steps"]["6"]["tool_state"] = workflow_dict["steps"]["6"]["tool_state"].replace("__DISPLAY_NAME_ORG__", org_full_name).replace("__UNIQUE_ID_ORG__", org_species_folder_name) - # Import the workflow in galaxy as a dict instance.workflows.import_workflow_dict(workflow_dict=workflow_dict) - # Get its attributes workflow_attributes = instance.workflows.get_workflows(name=workflow_name) # Then get its ID (required to invoke the workflow) @@ -1106,11 +1571,11 @@ if __name__ == "__main__": if len(list(v.keys())) == 2: - + logging.info("Input organism %s: 2 species detected in input dictionary" % k) # Set workflow path (2 organisms) - workflow_path = os.path.join(os.path.abspath(script_dir), "workflows_phaeoexplorer/Galaxy-Workflow-chado_load_tripal_synchronize_jbrowse_2org_v4.ga") + workflow_path = os.path.join(os.path.abspath(script_dir), "workflows_phaeoexplorer/Galaxy-Workflow-load_blast_results_2org_v1.ga") # Instance object required variables instance_url, email, password = None, None, None @@ -1126,7 +1591,6 @@ if __name__ == "__main__": # Check if the versions of tools specified in the workflow are installed in galaxy install_changesets_revisions_from_workflow(workflow_path=workflow_path, instance=instance) - # Get key names from the current organism (item 1 = organism 1, item 2 = organism 2) organisms_key_names = list(v.keys()) org1_dict = v[organisms_key_names[0]] org2_dict = v[organisms_key_names[1]] @@ -1142,12 +1606,8 @@ if __name__ == "__main__": org1_strain = org1_dict["sex"] org1_sex = org1_dict["strain"] org1_org_id = org1_dict["org_id"] - org1_genome_analysis_id = org1_dict["genome_analysis_id"] - org1_ogs_analysis_id = org1_dict["ogs_analysis_id"] - org1_genome_hda_id = org1_dict["hda_ids"]["genome_hda_id"] - org1_transcripts_hda_id = org1_dict["hda_ids"]["transcripts_hda_id"] - org1_proteins_hda_id = org1_dict["hda_ids"]["proteins_hda_id"] - org1_gff_hda_id = org1_dict["hda_ids"]["gff_hda_id"] + org1_blastp_analysis_id = org1_dict["blastp_analysis_id"] + org1_blastp_hda_id = org1_dict["hda_ids"]["blastp_hda_id"] # Store these values into a dict for parameters logging/validation org1_parameters_dict = { @@ -1159,14 +1619,11 @@ if __name__ == "__main__": "org1_strain": org1_strain, "org1_sex": org1_sex, "org1_org_id": org1_org_id, - "org1_genome_analysis_id": org1_genome_analysis_id, - "org1_ogs_analysis_id": org1_ogs_analysis_id, - "org1_genome_hda_id": org1_genome_hda_id, - "org1_transcripts_hda_id": org1_transcripts_hda_id, - "org1_proteins_hda_id": org1_proteins_hda_id, - "org1_gff_hda_id": org1_gff_hda_id, + "org1_blast_analysis_id": org1_blastp_analysis_id, + "org1_blastp_hda_id": org1_blastp_hda_id, } + # Look for empty parameters values, throw a critical error if a parameter value is invalid for param_name, param_value in org1_parameters_dict.items(): if param_value is None or param_value == "": @@ -1182,30 +1639,23 @@ if __name__ == "__main__": org2_strain = org2_dict["sex"] org2_sex = org2_dict["strain"] org2_org_id = org2_dict["org_id"] - org2_genome_analysis_id = org2_dict["genome_analysis_id"] - org2_ogs_analysis_id = org2_dict["ogs_analysis_id"] - org2_genome_hda_id = org2_dict["hda_ids"]["genome_hda_id"] - org2_transcripts_hda_id = org2_dict["hda_ids"]["transcripts_hda_id"] - org2_proteins_hda_id = org2_dict["hda_ids"]["proteins_hda_id"] - org2_gff_hda_id = org2_dict["hda_ids"]["gff_hda_id"] + org2_blastp_analysis_id = org2_dict["blastp_analysis_id"] + org2_blastp_hda_id = org2_dict["hda_ids"]["blastp_hda_id"] # Store these values into a dict for parameters logging/validation org2_parameters_dict = { "org2_genus": org2_genus, "org2_species": org2_species, "org2_genus_species": org2_genus_species, - "org2_species_folder_name": org2_species_folder_name, + "org2_species_folder_name": orgé_species_folder_name, "org2_full_name": org2_full_name, "org2_strain": org2_strain, "org2_sex": org2_sex, "org2_org_id": org2_org_id, - "org2_genome_analysis_id": org2_genome_analysis_id, - "org2_ogs_analysis_id": org2_ogs_analysis_id, - "org2_genome_hda_id": org2_genome_hda_id, - "org2_transcripts_hda_id": org2_transcripts_hda_id, - "org2_proteins_hda_id": org2_proteins_hda_id, - "org2_gff_hda_id": org2_gff_hda_id, + "org2_blast_analysis_id": org2_blastp_analysis_id, + "org2_blastp_hda_id": org2_blastp_hda_id, } + # Look for empty parameters values, throw a critical error if a parameter value is invalid for param_name, param_value in org2_parameters_dict.items(): @@ -1215,115 +1665,51 @@ if __name__ == "__main__": # Source files association (ordered by their IDs in the workflow) # WARNING: Be very careful about how the workflow is "organized" (i.e the order of the steps/datasets, check the .ga if there is any error) - GFF_FILE_ORG1 = "0" - GENOME_FASTA_FILE_ORG1 = "1" - PROTEINS_FASTA_FILE_ORG1 = "2" - - GENOME_FASTA_FILE_ORG2 = "3" - GFF_FILE_ORG2 = "4" - PROTEINS_FASTA_FILE_ORG2 = "5" - - LOAD_FASTA_ORG1 = "6" - JBROWSE_ORG1 = "7" - JRBOWSE_ORG2 = "8" - - LOAD_GFF_ORG1 = "9" - JBROWSE_CONTAINER = "10" - SYNC_FEATURES_ORG1 = "11" - - LOAD_FASTA_ORG2 = "12" - LOAD_GFF_ORG2 = "13" - - SYNC_FEATURES_ORG2 = "14" - POPULATE_MAT_VIEWS = "15" - INDEX_TRIPAL_DATA = "16" + BLASTP_FILE_ORG1 = "0" + BLASTP_FILE_ORG2 = "1" + LOAD_BLASTP_FILE_ORG1 = "2" + LOAD_BLASTP_FILE_ORG1 = "3" + SYNC_BLASTP_ANALYSIS_ORG1 = "4" + SYNC_BLASTP_ANALYSIS_ORG2 = "5" + POPULATE_MAT_VIEWS = "6" + INDEX_TRIPAL_DATA = "7" # Set the workflow parameters (individual tools runtime parameters in the workflow) workflow_parameters = {} # Input files have no parameters (they are set via assigning the hda IDs in the datamap parameter of the bioblend method) - workflow_parameters[GENOME_FASTA_FILE_ORG1] = {} - workflow_parameters[GFF_FILE_ORG1] = {} - workflow_parameters[PROTEINS_FASTA_FILE_ORG1] = {} - workflow_parameters[GENOME_FASTA_FILE_ORG2] = {} - workflow_parameters[GFF_FILE_ORG2] = {} - workflow_parameters[PROTEINS_FASTA_FILE_ORG2] = {} + workflow_parameters[BLASTP_FILE_ORG1] = {} + workflow_parameters[BLASTP_FILE_ORG2] = {} # Organism 1 - workflow_parameters[LOAD_FASTA_ORG1] = {"organism": org1_org_id, - "analysis_id": org1_genome_analysis_id, - "do_update": "true"} - # workflow_parameters[JBROWSE_ORG1] = {"menu_url": jbrowse_menu_url_org1} - workflow_parameters[JBROWSE_ORG1] = {} - workflow_parameters[LOAD_GFF_ORG1] = {"organism": org1_org_id, "analysis_id": org1_ogs_analysis_id} - workflow_parameters[SYNC_FEATURES_ORG1] = {"organism_id": org1_org_id} - # workflow_parameters[JBROWSE_CONTAINER] = {"organisms": [{"name": org1_full_name, "unique_id": org1_species_folder_name, }, {"name": org2_full_name, "unique_id": org2_species_folder_name}]} - workflow_parameters[JBROWSE_CONTAINER] = {} + workflow_parameters[LOAD_BLASTP_FILE_ORG1] = {"organism_id": org1_org_id, + "analysis_id": org1_blastp_analysis_id} + workflow_parameters[SYNC_BLASTP_ANALYSIS_ORG1] = {"analysis_id": org1_blastp_analysis_id} # Organism 2 - workflow_parameters[LOAD_FASTA_ORG2] = {"organism": org2_org_id, - "analysis_id": org2_genome_analysis_id, - "do_update": "true"} - workflow_parameters[LOAD_GFF_ORG2] = {"organism": org2_org_id, "analysis_id": org2_ogs_analysis_id} - # workflow_parameters[JRBOWSE_ORG2] = {"menu_url": jbrowse_menu_url_org2} - workflow_parameters[JRBOWSE_ORG2] = {} - workflow_parameters[SYNC_FEATURES_ORG2] = {"organism_id": org2_org_id} - + workflow_parameters[LOAD_BLASTP_FILE_ORG2] = {"organism_id": org2_org_id, + "analysis_id": org2_blastp_analysis_id} + workflow_parameters[SYNC_BLASTP_ANALYSIS_ORG2] = {"analysis_id": org2_blastp_analysis_id} - # POPULATE + INDEX DATA workflow_parameters[POPULATE_MAT_VIEWS] = {} workflow_parameters[INDEX_TRIPAL_DATA] = {} - # Set datamap (mapping of input files in the workflow) datamap = {} # Organism 1 - datamap[GENOME_FASTA_FILE_ORG1] = {"src": "hda", "id": org1_genome_hda_id} - datamap[GFF_FILE_ORG1] = {"src": "hda", "id": org1_gff_hda_id} - datamap[PROTEINS_FASTA_FILE_ORG1] = {"src": "hda", "id": org1_proteins_hda_id} + datamap[BLASTP_FILE_ORG1] = {"src": "hda", "id": org1_blastp_hda_id} # Organism 2 - datamap[GENOME_FASTA_FILE_ORG2] = {"src": "hda", "id": org2_genome_hda_id} - datamap[GFF_FILE_ORG2] = {"src": "hda", "id": org2_gff_hda_id} - datamap[PROTEINS_FASTA_FILE_ORG2] = {"src": "hda", "id": org2_proteins_hda_id} + datamap[BLASTP_FILE_ORG2] = {"src": "hda", "id": org2_blastp_hda_id} with open(workflow_path, 'r') as ga_in_file: - # Store the decoded json dictionary workflow_dict = json.load(ga_in_file) workflow_name = workflow_dict["name"] - # For the Jbrowse tool, we unfortunately have to manually edit the parameters instead of setting them - # as runtime values, using runtime parameters makes the tool throw an internal critical error ("replace not found" error) - # Scratchgmod test: need "http" (or "https"), the hostname (+ port) - jbrowse_menu_url_org1 = "https://{hostname}/sp/{genus_sp}/feature/{Genus}/{species}/mRNA/{id}".format(hostname=config["hostname"], genus_sp=org1_genus_species, Genus=org1_genus[0].upper() + org1_genus[1:], species=org1_species, id="{id}") - jbrowse_menu_url_org2 = "https://{hostname}/sp/{genus_sp}/feature/{Genus}/{species}/mRNA/{id}".format(hostname=config["hostname"], genus_sp=org2_genus_species, Genus=org2_genus[0].upper() + org2_genus[1:], species=org2_species, id="{id}") - if "menu_url" not in config.keys(): - jbrowse_menu_url_org1 = "https://{hostname}/sp/{genus_sp}/feature/{Genus}/{species}/mRNA/{id}".format(hostname=config["hostname"], genus_sp=org1_genus_species, Genus=org1_genus[0].upper() + org1_genus[1:], species=org1_species, id="{id}") - jbrowse_menu_url_org2 = "https://{hostname}/sp/{genus_sp}/feature/{Genus}/{species}/mRNA/{id}".format(hostname=config["hostname"], genus_sp=org2_genus_species, Genus=org2_genus[0].upper() + org2_genus[1:], species=org2_species, id="{id}") - else: - jbrowse_menu_url_org1 = config["menu_url"] + "/sp/{genus_sp}/feature/{Genus}/{species}/mRNA/{id}".format(genus_sp=org1_genus_species, Genus=org1_genus[0].upper() + org1_genus[1:], species=org1_species, id="{id}") - jbrowse_menu_url_org2 = config["menu_url"] + "/sp/{genus_sp}/feature/{Genus}/{species}/mRNA/{id}".format(genus_sp=org2_genus_species, Genus=org2_genus[0].upper() + org2_genus[1:], species=org2_species, id="{id}") - - # show_tool_add_organism = instance.tools.show_tool(tool_id="toolshed.g2.bx.psu.edu/repos/gga/chado_organism_add_organism/organism_add_organism/2.3.4+galaxy0", io_details=True) - # print(show_tool_add_organism) - # show_jbrowse_tool = instance.tools.show_tool(tool_id="toolshed.g2.bx.psu.edu/repos/iuc/jbrowse/jbrowse/1.16.11+galaxy0", io_details=True) - # print(show_jbrowse_tool) - # show_jbrowse_container_tool = instance.tools.show_tool(tool_id="toolshed.g2.bx.psu.edu/repos/gga/jbrowse_to_container/jbrowse_to_container/0.5.1", io_details=True) - # print(show_jbrowse_container_tool) - - # Replace values in the workflow dictionary - workflow_dict["steps"]["7"]["tool_state"] = workflow_dict["steps"]["7"]["tool_state"].replace("__MENU_URL_ORG1__", jbrowse_menu_url_org1) - workflow_dict["steps"]["8"]["tool_state"] = workflow_dict["steps"]["8"]["tool_state"].replace("__MENU_URL_ORG2__", jbrowse_menu_url_org2) - # The UNIQUE_ID is specific to a combination genus_species_strain_sex so every combination should have its unique workflow - # in galaxy --> define a naming method for these workflows - workflow_dict["steps"]["10"]["tool_state"] = workflow_dict["steps"]["10"]["tool_state"].replace("__DISPLAY_NAME_ORG1__", org1_full_name).replace("__UNIQUE_ID_ORG1__", org1_species_folder_name) - workflow_dict["steps"]["10"]["tool_state"] = workflow_dict["steps"]["10"]["tool_state"].replace("__DISPLAY_NAME_ORG2__", org2_full_name).replace("__UNIQUE_ID_ORG2__", org2_species_folder_name) - # Import the workflow in galaxy as a dict instance.workflows.import_workflow_dict(workflow_dict=workflow_dict) - # Get its attributes workflow_attributes = instance.workflows.get_workflows(name=workflow_name) # Then get its ID (required to invoke the workflow) @@ -1340,320 +1726,3 @@ if __name__ == "__main__": logging.info("Successfully imported and invoked workflow {0}, check the galaxy instance ({1}) for the jobs state".format(workflow_name, instance_url)) - - - - - - - - # Get the instance attribute from the object for future connections - # This is the GalaxyInstance object from bioblend (not the url!) - # instance = run_workflow_for_current_organism.instance - - # if "2org" in str(workflow): - # logging.info("Executing workflow %s" % workflow) - - - # run_workflow_for_current_organism.connect_to_instance() - # run_workflow_for_current_organism.set_get_history() - - # # TODO: only do this once per instance (not at each iteration!) - # run_workflow_for_current_organism.install_changesets_revisions_for_individual_tools() - # run_workflow_for_current_organism.install_changesets_revisions_from_workflow(workflow_path=workflow) - # run_workflow_for_current_organism.add_organism_ogs_genome_analyses() - # org_id = run_workflow_for_current_organism.get_organism_id() - # genome_analysis_id = run_workflow_for_current_organism.get_genome_analysis_id() - # ogs_analysis_id = run_workflow_for_current_organism.get_ogs_analysis_id() - # instance_attributes = run_workflow_for_current_organism.get_instance_attributes() - - - # # Import datasets into history and retrieve their hda IDs - # # TODO: can be simplified with direct access to the folder contents via the full path (no loop required) - - # hda_ids = run_workflow_for_current_organism.import_datasets_into_history() - # hda_ids_list.append(hda_ids) - - - # # TODO: Exlcude the workflow invocation from the loop - # # Extract instance url from one, attributes from both in lists ? - - - # # Source files association (ordered by their IDs in the workflow) - # GENOME_FASTA_FILE_ORG1 = "0" - # GFF_FILE_ORG1 = "1" - # PROTEINS_FASTA_FILE_ORG1 = "2" - # GENOME_FASTA_FILE_ORG2 = "3" - # GFF_FILE_ORG2 = "4" - # PROTEINS_FASTA_FILE_ORG2 = "5" - - # LOAD_FASTA_ORG1 = "6" - # JBROWSE_ORG1 = "7" - # JRBOWSE_ORG2 = "8" - - # LOAD_GFF_ORG1 = "9" - # JBROWSE_CONTAINER = "10" - # SYNC_FEATURES_ORG1 = "11" - - # LOAD_FASTA_ORG2 = "12" - # LOAD_GFF_ORG2 = "13" - - # SYNC_FEATURES_ORG2 = "14" - # POPULATE_MAT_VIEWS = "15" - # INDEX_TRIPAL_DATA = "16" - - # workflow_parameters = {} - - # workflow_parameters[GENOME_FASTA_FILE_ORG1] = {} - # workflow_parameters[GFF_FILE_ORG1] = {} - # workflow_parameters[PROTEINS_FASTA_FILE_ORG1] = {} - # workflow_parameters[GENOME_FASTA_FILE_ORG2] = {} - # workflow_parameters[GFF_FILE_ORG2] = {} - # workflow_parameters[PROTEINS_FASTA_FILE_ORG2] = {} - - - # # ORGANISM 1 - # workflow_parameters[LOAD_FASTA_ORG1] = {"organism": org_ids[0], - # "analysis_id": genome_analysis_ids[0], - # "do_update": "true"} - # # Change "do_update": "true" to "do_update": "false" in above parameters to prevent appending/updates to the fasta file in chado - # # WARNING: It is safer to never update it and just change the genome/ogs versions in the config - # workflow_parameters[JBROWSE_ORG1] = {} - # workflow_parameters[LOAD_GFF_ORG1] = {"organism": org_ids[0], "analysis_id": ogs_analysis_ids[0]} - # workflow_parameters[SYNC_FEATURES_ORG1] = {"organism_id": org_ids[0]} - # workflow_parameters[JBROWSE_CONTAINER] = {} - - - # # ORGANISM 2 - # workflow_parameters[LOAD_FASTA_ORG2] = {"organism": org_ids[1], - # "analysis_id": genome_analysis_ids[1], - # "do_update": "true"} - # # Change "do_update": "true" to "do_update": "false" in above parameters to prevent appending/updates to the fasta file in chado - # # WARNING: It is safer to never update it and just change the genome/ogs versions in the config - # workflow_parameters[LOAD_GFF_ORG2] = {"organism": org_ids[1], "analysis_id": ogs_analysis_ids[1]} - # workflow_parameters[JRBOWSE_ORG2] = {} - # workflow_parameters[SYNC_FEATURES_ORG2] = {"organism_id": org_ids[1]} - - # workflow_parameters[SYNC_GENOME_ANALYSIS_INTO_TRIPAL] = {"analysis_id": ogs_analysis_ids[0]} - # workflow_parameters[SYNC_OGS_ANALYSIS_INTO_TRIPAL] = {"analysis_id": genome_analysis_ids[0]} - # workflow_parameters[SYNC_FEATURES_INTO_TRIPAL] = {"organism_id": org_ids[0]} - - # # POPULATE + INDEX DATA - # workflow_parameters[POPULATE_MAT_VIEWS] = {} - # workflow_parameters[INDEX_TRIPAL_DATA] = {} - - # # Datamap for input datasets - dataset source (type): ldda (LibraryDatasetDatasetAssociation) - # run_workflow_for_current_organism.datamap = {} - - # run_workflow_for_current_organism.datamap[GENOME_FASTA_FILE_ORG1] = {"src": "hda", "id": hda_ids_list[0]["genome_hda_id"]} - # run_workflow_for_current_organism.datamap[GFF_FILE_ORG1] = {"src": "hda", "id": hda_ids_list[0]["gff_hda_id"]} - # run_workflow_for_current_organism.datamap[PROTEINS_FASTA_FILE_ORG1] = {"src": "hda", "id": hda_ids_list[0]["proteins_hda_id"]} - - # run_workflow_for_current_organism.datamap[GENOME_FASTA_FILE_ORG2] = {"src": "hda", "id": hda_ids_list[1]["genome_hda_id"]} - # run_workflow_for_current_organism.datamap[GFF_FILE_ORG2] = {"src": "hda", "id": hda_ids_list[1]["gff_hda_id"]} - # run_workflow_for_current_organism.datamap[GFF_FILE_ORG2] = {"src": "hda", "id": hda_ids_list[1]["proteins_hda_id"]} - - - - # logging.info("OK: Workflow invoked") - - - - # # If input workflow is Chado_load_Tripal_synchronize.ga - # if "Chado_load_Tripal_synchronize" in str(workflow): - - # logging.info("Executing workflow 'Chado_load_Tripal_synchronize'") - - # run_workflow_for_current_organism.connect_to_instance() - # run_workflow_for_current_organism.set_get_history() - # # run_workflow_for_current_organism.get_species_history_id() - - # run_workflow_for_current_organism.install_changesets_revisions_for_individual_tools() - # run_workflow_for_current_organism.install_changesets_revisions_from_workflow(workflow_path=workflow) - # run_workflow_for_current_organism.add_organism_ogs_genome_analyses() - # run_workflow_for_current_organism.get_organism_id() - # run_workflow_for_current_organism.get_genome_analysis_id() - # run_workflow_for_current_organism.get_ogs_analysis_id() - - # # run_workflow_for_current_organism.tripal_synchronize_organism_analyses() - - # # Get the attributes of the instance and project data files - # run_workflow_for_current_organism.get_instance_attributes() - - # # Import datasets into history and retrieve their hda IDs - # # TODO: can be simplified with direct access to the folder contents via the full path (no loop required) - # hda_ids = run_workflow_for_current_organism.import_datasets_into_history() - - # # DEBUG - # # run_workflow_for_current_organism.get_invocation_report(workflow_name="Chado load Tripal synchronize") - - # # Explicit workflow parameter names - # GENOME_FASTA_FILE = "0" - # GFF_FILE = "1" - # PROTEINS_FASTA_FILE = "2" - # TRANSCRIPTS_FASTA_FILE = "3" - - # LOAD_FASTA_IN_CHADO = "4" - # LOAD_GFF_IN_CHADO = "5" - # SYNC_ORGANISM_INTO_TRIPAL = "6" - # SYNC_GENOME_ANALYSIS_INTO_TRIPAL = "7" - # SYNC_OGS_ANALYSIS_INTO_TRIPAL = "8" - # SYNC_FEATURES_INTO_TRIPAL = "9" - - # workflow_parameters = {} - - # workflow_parameters[GENOME_FASTA_FILE] = {} - # workflow_parameters[GFF_FILE] = {} - # workflow_parameters[PROTEINS_FASTA_FILE] = {} - # workflow_parameters[TRANSCRIPTS_FASTA_FILE] = {} - - # workflow_parameters[LOAD_FASTA_IN_CHADO] = {"organism": run_workflow_for_current_organism.org_id, - # "analysis_id": run_workflow_for_current_organism.genome_analysis_id, - # "do_update": "true"} - # # Change "do_update": "true" to "do_update": "false" in above parameters to prevent appending/updates to the fasta file in chado - # # WARNING: It is safer to never update it and just change the genome/ogs versions in the config - # workflow_parameters[LOAD_GFF_IN_CHADO] = {"organism": run_workflow_for_current_organism.org_id, - # "analysis_id": run_workflow_for_current_organism.ogs_analysis_id} - # workflow_parameters[SYNC_ORGANISM_INTO_TRIPAL] = {"organism_id": run_workflow_for_current_organism.org_id} - # workflow_parameters[SYNC_GENOME_ANALYSIS_INTO_TRIPAL] = {"analysis_id": run_workflow_for_current_organism.ogs_analysis_id} - # workflow_parameters[SYNC_OGS_ANALYSIS_INTO_TRIPAL] = {"analysis_id": run_workflow_for_current_organism.genome_analysis_id} - # workflow_parameters[SYNC_FEATURES_INTO_TRIPAL] = {"organism_id": run_workflow_for_current_organism.org_id} - - # # Datamap for input datasets - dataset source (type): ldda (LibraryDatasetDatasetAssociation) - # run_workflow_for_current_organism.datamap = {} - # run_workflow_for_current_organism.datamap[GENOME_FASTA_FILE] = {"src": "hda", "id": hda_ids["genome_hda_id"]} - # run_workflow_for_current_organism.datamap[GFF_FILE] = {"src": "hda", "id": hda_ids["gff_hda_id"]} - # run_workflow_for_current_organism.datamap[PROTEINS_FASTA_FILE] = {"src": "hda", "id": hda_ids["proteins_hda_id"]} - # run_workflow_for_current_organism.datamap[TRANSCRIPTS_FASTA_FILE] = {"src": "hda", "id": hda_ids["transcripts_hda_id"]} - - # # run_workflow_for_current_organism.datamap = {} - # # run_workflow_for_current_organism.datamap[GENOME_FASTA_FILE] = {"src": "hda", "id": - # # run_workflow_for_current_organism.datasets["genome_file"]} - # # run_workflow_for_current_organism.datamap[GFF_FILE] = {"src": "hda", - # # "id": hda_ids["gff_hda_id"]} - - # # Ensures galaxy has had time to retrieve datasets - # time.sleep(60) - # # Run the Chado load Tripal sync workflow with the parameters set above - # run_workflow_for_current_organism.run_workflow(workflow_path=workflow, - # workflow_parameters=workflow_parameters, - # datamap=run_workflow_for_current_organism.datamap, - # workflow_name="Chado load Tripal synchronize") - - # # Jbrowse creation workflow - # elif "Jbrowse" in str(workflow): - - # logging.info("Executing workflow 'Jbrowse'") - - # run_workflow_for_current_organism.connect_to_instance() - # run_workflow_for_current_organism.set_get_history() - # run_workflow_for_current_organism.install_changesets_revisions_from_workflow(workflow_path=workflow) - # run_workflow_for_current_organism.get_organism_id() - # # Import datasets into history and get their hda IDs - # run_workflow_for_current_organism.import_datasets_into_history() - # hda_ids = run_workflow_for_current_organism.get_datasets_hda_ids() # Note: only call this function AFTER calling "import_datasets_into_history()" - - # # Debugging - # # run_workflow_for_current_organism.get_invocation_report(workflow_name="Jbrowse") - - # GENOME_FASTA_FILE = "0" - # GFF_FILE = "1" - # ADD_JBROWSE = "2" - # ADD_ORGANISM_TO_JBROWSE = "3" - - # workflow_parameters = {} - # workflow_parameters[GENOME_FASTA_FILE] = {} - # workflow_parameters[GFF_FILE] = {} - # workflow_parameters[ADD_JBROWSE] = {} - # workflow_parameters[ADD_ORGANISM_TO_JBROWSE] = {} - - # run_workflow_for_current_organism.datamap = {} - # run_workflow_for_current_organism.datamap[GENOME_FASTA_FILE] = {"src": "hda", "id": hda_ids["genome_hda_id"]} - # run_workflow_for_current_organism.datamap[GFF_FILE] = {"src": "hda", "id": hda_ids["gff_hda_id"]} - - # # Run the jbrowse creation workflow - # run_workflow_for_current_organism.run_workflow(workflow_path=workflow, - # workflow_parameters=workflow_parameters, - # datamap=run_workflow_for_current_organism.datamap, - # workflow_name="Jbrowse") - - # elif "Interpro" in str(workflow): - - # logging.info("Executing workflow 'Interproscan") - - # run_workflow_for_current_organism.connect_to_instance() - # run_workflow_for_current_organism.set_get_history() - # run_workflow_for_current_organism.install_changesets_revisions_from_workflow(workflow_path=workflow) - # # run_workflow_for_current_organism.get_species_history_id() - - # # Get the attributes of the instance and project data files - # run_workflow_for_current_organism.get_instance_attributes() - # run_workflow.add_interproscan_analysis() - # run_workflow_for_current_organism.get_interpro_analysis_id() - - # # Import datasets into history and retrieve their hda IDs - # run_workflow_for_current_organism.import_datasets_into_history() - # hda_ids = run_workflow_for_current_organism.get_datasets_hda_ids() - - # INTERPRO_FILE = "0" - # LOAD_INTERPRO_IN_CHADO = "1" - # SYNC_INTERPRO_ANALYSIS_INTO_TRIPAL = "2" - # SYNC_FEATURES_INTO_TRIPAL = "3" - # POPULATE_MAT_VIEWS = "4" - # INDEX_TRIPAL_DATA = "5" - - # workflow_parameters = {} - # workflow_parameters[INTERPRO_FILE] = {} - # workflow_parameters[LOAD_INTERPRO_IN_CHADO] = {"organism": run_workflow_for_current_organism.org_id, - # "analysis_id": run_workflow_for_current_organism.interpro_analysis_id} - # workflow_parameters[SYNC_INTERPRO_ANALYSIS_INTO_TRIPAL] = {"analysis_id": run_workflow_for_current_organism.interpro_analysis_id} - - - # run_workflow_for_current_organism.datamap = {} - # run_workflow_for_current_organism.datamap[INTERPRO_FILE] = {"src": "hda", "id": run_workflow_for_current_organism.hda_ids["interproscan_hda_id"]} - - # # Run Interproscan workflow - # run_workflow_for_current_organism.run_workflow(workflow_path=workflow, - # workflow_parameters=workflow_parameters, - # datamap=run_workflow_for_current_organism.datamap, - # workflow_name="Interproscan") - - # elif "Blast" in str(workflow): - - # logging.info("Executing workflow 'Blast_Diamond") - - # run_workflow_for_current_organism.connect_to_instance() - # run_workflow_for_current_organism.set_get_history() - # run_workflow_for_current_organism.install_changesets_revisions_from_workflow(workflow_path=workflow) - # # run_workflow_for_current_organism.get_species_history_id() - - # # Get the attributes of the instance and project data files - # run_workflow_for_current_organism.get_instance_attributes() - # run_workflow_for_current_organism.add_blastp_diamond_analysis() - # run_workflow_for_current_organism.get_blastp_diamond_analysis_id() - - # # Import datasets into history and retrieve their hda IDs - # run_workflow_for_current_organism.import_datasets_into_history() - # hda_ids = run_workflow_for_current_organism.get_datasets_hda_ids() - - # BLAST_FILE = "0" - # LOAD_BLAST_IN_CHADO = "1" - # SYNC_BLAST_ANALYSIS_INTO_TRIPAL = "2" - # SYNC_FEATURES_INTO_TRIPAL = "3" - # POPULATE_MAT_VIEWS = "4" - # INDEX_TRIPAL_DATA = "5" - - # workflow_parameters = {} - # workflow_parameters[INTERPRO_FILE] = {} - # workflow_parameters[LOAD_BLAST_IN_CHADO] = {"organism": run_workflow_for_current_organism.org_id, - # "analysis_id": run_workflow_for_current_organism.blast_diamond_analysis_id} - # workflow_parameters[SYNC_BLAST_ANALYSIS_INTO_TRIPAL] = {"analysis_id": run_workflow_for_current_organism.blast_diamond_analysis_id} - - # run_workflow_for_current_organism.datamap = {} - # run_workflow_for_current_organism.datamap[INTERPRO_FILE] = {"src": "hda", "id": hda_ids["interproscan_hda_id"]} - - # # Run Interproscan workflow - # run_workflow_for_current_organism.run_workflow(workflow_path=workflow, - # workflow_parameters=workflow_parameters, - # datamap=run_workflow_for_current_organism.datamap, - # workflow_name="Interproscan") diff --git a/workflows_phaeoexplorer/Galaxy-Workflow-load_blast_results_1org_v1.ga b/workflows_phaeoexplorer/Galaxy-Workflow-load_blast_results_1org_v1.ga new file mode 100644 index 0000000..db4e953 --- /dev/null +++ b/workflows_phaeoexplorer/Galaxy-Workflow-load_blast_results_1org_v1.ga @@ -0,0 +1,271 @@ +{ + "a_galaxy_workflow": "true", + "annotation": "", + "format-version": "0.1", + "name": "load_blast_results_1org_v1", + "steps": { + "0": { + "annotation": "", + "content_id": null, + "errors": null, + "id": 0, + "input_connections": {}, + "inputs": [ + { + "description": "", + "name": "blast file xml org1" + } + ], + "label": "blast file xml org1", + "name": "Input dataset", + "outputs": [], + "position": { + "bottom": 416.1999969482422, + "height": 82.19999694824219, + "left": 410, + "right": 610, + "top": 334, + "width": 200, + "x": 410, + "y": 334 + }, + "tool_id": null, + "tool_state": "{\"optional\": false}", + "tool_version": null, + "type": "data_input", + "uuid": "b1c63d94-61a7-4bf1-8b5d-e08fb34c0357", + "workflow_outputs": [ + { + "label": null, + "output_name": "output", + "uuid": "f602d234-8cea-4db9-ab77-678cdc0d2101" + } + ] + }, + "1": { + "annotation": "", + "content_id": "toolshed.g2.bx.psu.edu/repos/gga/chado_load_blast/load_blast/2.3.4+galaxy0", + "errors": null, + "id": 1, + "input_connections": { + "input": { + "id": 0, + "output_name": "output" + } + }, + "inputs": [ + { + "description": "runtime parameter for tool Chado load Blast results", + "name": "analysis_id" + }, + { + "description": "runtime parameter for tool Chado load Blast results", + "name": "input" + }, + { + "description": "runtime parameter for tool Chado load Blast results", + "name": "organism_id" + }, + { + "description": "runtime parameter for tool Chado load Blast results", + "name": "wait_for" + } + ], + "label": "load blast results org1", + "name": "Chado load Blast results", + "outputs": [ + { + "name": "results", + "type": "json" + } + ], + "position": { + "bottom": 457.3999938964844, + "height": 164.39999389648438, + "left": 711, + "right": 911, + "top": 293, + "width": 200, + "x": 711, + "y": 293 + }, + "post_job_actions": {}, + "tool_id": "toolshed.g2.bx.psu.edu/repos/gga/chado_load_blast/load_blast/2.3.4+galaxy0", + "tool_shed_repository": { + "changeset_revision": "08ae8b27b193", + "name": "chado_load_blast", + "owner": "gga", + "tool_shed": "toolshed.g2.bx.psu.edu" + }, + "tool_state": "{\"analysis_id\": {\"__class__\": \"RuntimeValue\"}, \"blastdb_id\": \"21\", \"input\": {\"__class__\": \"RuntimeValue\"}, \"match_on_name\": \"false\", \"organism_id\": {\"__class__\": \"RuntimeValue\"}, \"psql_target\": {\"method\": \"remote\", \"__current_case__\": 0}, \"query_type\": \"polypeptide\", \"re_name\": \"\", \"skip_missing\": \"false\", \"wait_for\": {\"__class__\": \"RuntimeValue\"}, \"__page__\": null, \"__rerun_remap_job_id__\": null}", + "tool_version": "2.3.4+galaxy0", + "type": "tool", + "uuid": "10144cf8-f121-45f3-ba64-9f4d66bf1e56", + "workflow_outputs": [ + { + "label": null, + "output_name": "results", + "uuid": "95708895-8439-4257-bff6-96e4c51a0725" + } + ] + }, + "2": { + "annotation": "", + "content_id": "toolshed.g2.bx.psu.edu/repos/gga/tripal_analysis_sync/analysis_sync/3.2.1.0", + "errors": null, + "id": 2, + "input_connections": { + "wait_for": { + "id": 1, + "output_name": "results" + } + }, + "inputs": [ + { + "description": "runtime parameter for tool Synchronize an analysis", + "name": "analysis_id" + } + ], + "label": "sync blast analysis org1", + "name": "Synchronize an analysis", + "outputs": [ + { + "name": "results", + "type": "json" + } + ], + "position": { + "bottom": 451.3999938964844, + "height": 154.39999389648438, + "left": 1010, + "right": 1210, + "top": 297, + "width": 200, + "x": 1010, + "y": 297 + }, + "post_job_actions": {}, + "tool_id": "toolshed.g2.bx.psu.edu/repos/gga/tripal_analysis_sync/analysis_sync/3.2.1.0", + "tool_shed_repository": { + "changeset_revision": "f487ff676088", + "name": "tripal_analysis_sync", + "owner": "gga", + "tool_shed": "toolshed.g2.bx.psu.edu" + }, + "tool_state": "{\"analysis_id\": {\"__class__\": \"RuntimeValue\"}, \"wait_for\": {\"__class__\": \"ConnectedValue\"}, \"__page__\": null, \"__rerun_remap_job_id__\": null}", + "tool_version": "3.2.1.0", + "type": "tool", + "uuid": "99e7496d-ac32-467d-8c09-2efd48d0231a", + "workflow_outputs": [ + { + "label": "Synchronize Analysis into Tripal", + "output_name": "results", + "uuid": "1fb6db92-90a2-4e33-beec-f2f974e369e9" + } + ] + }, + "3": { + "annotation": "", + "content_id": "toolshed.g2.bx.psu.edu/repos/gga/tripal_db_populate_mviews/db_populate_mviews/3.2.1.0", + "errors": null, + "id": 3, + "input_connections": { + "wait_for": { + "id": 2, + "output_name": "results" + } + }, + "inputs": [], + "label": "populate mat views", + "name": "Populate materialized views", + "outputs": [ + { + "name": "results", + "type": "txt" + } + ], + "position": { + "bottom": 452.3999938964844, + "height": 154.39999389648438, + "left": 1295, + "right": 1495, + "top": 298, + "width": 200, + "x": 1295, + "y": 298 + }, + "post_job_actions": {}, + "tool_id": "toolshed.g2.bx.psu.edu/repos/gga/tripal_db_populate_mviews/db_populate_mviews/3.2.1.0", + "tool_shed_repository": { + "changeset_revision": "3c08f32a3dc1", + "name": "tripal_db_populate_mviews", + "owner": "gga", + "tool_shed": "toolshed.g2.bx.psu.edu" + }, + "tool_state": "{\"mview\": \"\", \"wait_for\": {\"__class__\": \"ConnectedValue\"}, \"__page__\": null, \"__rerun_remap_job_id__\": null}", + "tool_version": "3.2.1.0", + "type": "tool", + "uuid": "5c947dd5-89df-4146-9ab8-9e1d6de42360", + "workflow_outputs": [ + { + "label": "Populate Tripal materialized view(s)", + "output_name": "results", + "uuid": "0a0c9fa7-3a3c-459d-b5c7-b7a5a11459f3" + } + ] + }, + "4": { + "annotation": "", + "content_id": "toolshed.g2.bx.psu.edu/repos/gga/tripal_db_index/db_index/3.2.1.1", + "errors": null, + "id": 4, + "input_connections": { + "wait_for": { + "id": 3, + "output_name": "results" + } + }, + "inputs": [], + "label": "index tripal data", + "name": "Index Tripal data", + "outputs": [ + { + "name": "results", + "type": "txt" + } + ], + "position": { + "bottom": 433.6000061035156, + "height": 113.60000610351562, + "left": 1570, + "right": 1770, + "top": 320, + "width": 200, + "x": 1570, + "y": 320 + }, + "post_job_actions": {}, + "tool_id": "toolshed.g2.bx.psu.edu/repos/gga/tripal_db_index/db_index/3.2.1.1", + "tool_shed_repository": { + "changeset_revision": "d55a39f12dda", + "name": "tripal_db_index", + "owner": "gga", + "tool_shed": "toolshed.g2.bx.psu.edu" + }, + "tool_state": "{\"expose\": {\"do_expose\": \"no\", \"__current_case__\": 0}, \"queues\": \"10\", \"table\": {\"mode\": \"website\", \"__current_case__\": 0}, \"tokenizer\": \"standard\", \"wait_for\": {\"__class__\": \"ConnectedValue\"}, \"__page__\": null, \"__rerun_remap_job_id__\": null}", + "tool_version": "3.2.1.1", + "type": "tool", + "uuid": "5ecc30b0-07ab-4f9b-81f0-31310358c221", + "workflow_outputs": [ + { + "label": "Index Tripal data", + "output_name": "results", + "uuid": "5c0f0431-acb0-4e40-a7e4-8a562933fd97" + } + ] + } + }, + "tags": [], + "uuid": "80e32784-e39e-48ce-a6e3-7627de734ca6", + "version": 4 +} \ No newline at end of file diff --git a/workflows_phaeoexplorer/Galaxy-Workflow-load_blast_results_2org_v1.ga b/workflows_phaeoexplorer/Galaxy-Workflow-load_blast_results_2org_v1.ga new file mode 100644 index 0000000..67d62db --- /dev/null +++ b/workflows_phaeoexplorer/Galaxy-Workflow-load_blast_results_2org_v1.ga @@ -0,0 +1,418 @@ +{ + "a_galaxy_workflow": "true", + "annotation": "", + "format-version": "0.1", + "name": "load_blast_results_2org_v1", + "steps": { + "0": { + "annotation": "", + "content_id": null, + "errors": null, + "id": 0, + "input_connections": {}, + "inputs": [ + { + "description": "", + "name": "blast file xml org1" + } + ], + "label": "blast file xml org1", + "name": "Input dataset", + "outputs": [], + "position": { + "bottom": 356.1999969482422, + "height": 82.19999694824219, + "left": 168, + "right": 368, + "top": 274, + "width": 200, + "x": 168, + "y": 274 + }, + "tool_id": null, + "tool_state": "{\"optional\": false}", + "tool_version": null, + "type": "data_input", + "uuid": "b1c63d94-61a7-4bf1-8b5d-e08fb34c0357", + "workflow_outputs": [ + { + "label": null, + "output_name": "output", + "uuid": "8c6e3cd8-d0a0-403f-a882-f5206c8627f9" + } + ] + }, + "1": { + "annotation": "", + "content_id": null, + "errors": null, + "id": 1, + "input_connections": {}, + "inputs": [ + { + "description": "", + "name": "blast file xml org2" + } + ], + "label": "blast file xml org2", + "name": "Input dataset", + "outputs": [], + "position": { + "bottom": 467.1999969482422, + "height": 82.19999694824219, + "left": 200, + "right": 400, + "top": 385, + "width": 200, + "x": 200, + "y": 385 + }, + "tool_id": null, + "tool_state": "{\"optional\": false}", + "tool_version": null, + "type": "data_input", + "uuid": "9de2716c-eecd-48fc-8a71-b3d1f5daef85", + "workflow_outputs": [ + { + "label": null, + "output_name": "output", + "uuid": "8d7cef33-5fdc-481b-a0fd-542c42c6d032" + } + ] + }, + "2": { + "annotation": "", + "content_id": "toolshed.g2.bx.psu.edu/repos/gga/chado_load_blast/load_blast/2.3.4+galaxy0", + "errors": null, + "id": 2, + "input_connections": { + "input": { + "id": 0, + "output_name": "output" + } + }, + "inputs": [ + { + "description": "runtime parameter for tool Chado load Blast results", + "name": "analysis_id" + }, + { + "description": "runtime parameter for tool Chado load Blast results", + "name": "input" + }, + { + "description": "runtime parameter for tool Chado load Blast results", + "name": "organism_id" + }, + { + "description": "runtime parameter for tool Chado load Blast results", + "name": "wait_for" + } + ], + "label": "load blast results org1", + "name": "Chado load Blast results", + "outputs": [ + { + "name": "results", + "type": "json" + } + ], + "position": { + "bottom": 361.3999938964844, + "height": 164.39999389648438, + "left": 528, + "right": 728, + "top": 197, + "width": 200, + "x": 528, + "y": 197 + }, + "post_job_actions": {}, + "tool_id": "toolshed.g2.bx.psu.edu/repos/gga/chado_load_blast/load_blast/2.3.4+galaxy0", + "tool_shed_repository": { + "changeset_revision": "08ae8b27b193", + "name": "chado_load_blast", + "owner": "gga", + "tool_shed": "toolshed.g2.bx.psu.edu" + }, + "tool_state": "{\"analysis_id\": {\"__class__\": \"RuntimeValue\"}, \"blastdb_id\": \"21\", \"input\": {\"__class__\": \"RuntimeValue\"}, \"match_on_name\": \"false\", \"organism_id\": {\"__class__\": \"RuntimeValue\"}, \"psql_target\": {\"method\": \"remote\", \"__current_case__\": 0}, \"query_type\": \"polypeptide\", \"re_name\": \"\", \"skip_missing\": \"false\", \"wait_for\": {\"__class__\": \"RuntimeValue\"}, \"__page__\": null, \"__rerun_remap_job_id__\": null}", + "tool_version": "2.3.4+galaxy0", + "type": "tool", + "uuid": "595f6e1f-955a-42be-b03b-1269d1f7d189", + "workflow_outputs": [ + { + "label": null, + "output_name": "results", + "uuid": "c0ad63b8-045e-4a7c-97ba-8bc45862550b" + } + ] + }, + "3": { + "annotation": "", + "content_id": "toolshed.g2.bx.psu.edu/repos/gga/chado_load_blast/load_blast/2.3.4+galaxy0", + "errors": null, + "id": 3, + "input_connections": { + "input": { + "id": 1, + "output_name": "output" + } + }, + "inputs": [ + { + "description": "runtime parameter for tool Chado load Blast results", + "name": "input" + }, + { + "description": "runtime parameter for tool Chado load Blast results", + "name": "wait_for" + } + ], + "label": "load blast results org2", + "name": "Chado load Blast results", + "outputs": [ + { + "name": "results", + "type": "json" + } + ], + "position": { + "bottom": 545.3999938964844, + "height": 164.39999389648438, + "left": 591, + "right": 791, + "top": 381, + "width": 200, + "x": 591, + "y": 381 + }, + "post_job_actions": {}, + "tool_id": "toolshed.g2.bx.psu.edu/repos/gga/chado_load_blast/load_blast/2.3.4+galaxy0", + "tool_shed_repository": { + "changeset_revision": "08ae8b27b193", + "name": "chado_load_blast", + "owner": "gga", + "tool_shed": "toolshed.g2.bx.psu.edu" + }, + "tool_state": "{\"analysis_id\": \"1\", \"blastdb_id\": \"1\", \"input\": {\"__class__\": \"RuntimeValue\"}, \"match_on_name\": \"false\", \"organism_id\": \"2\", \"psql_target\": {\"method\": \"remote\", \"__current_case__\": 0}, \"query_type\": \"polypeptide\", \"re_name\": \"\", \"skip_missing\": \"false\", \"wait_for\": {\"__class__\": \"RuntimeValue\"}, \"__page__\": null, \"__rerun_remap_job_id__\": null}", + "tool_version": "2.3.4+galaxy0", + "type": "tool", + "uuid": "5d08ed84-681b-431c-92c9-3b2fd5f32176", + "workflow_outputs": [ + { + "label": null, + "output_name": "results", + "uuid": "6ed0a7c4-5b78-4016-8c21-0aaa992acd11" + } + ] + }, + "4": { + "annotation": "", + "content_id": "toolshed.g2.bx.psu.edu/repos/gga/tripal_analysis_sync/analysis_sync/3.2.1.0", + "errors": null, + "id": 4, + "input_connections": { + "wait_for": { + "id": 2, + "output_name": "results" + } + }, + "inputs": [ + { + "description": "runtime parameter for tool Synchronize an analysis", + "name": "analysis_id" + } + ], + "label": "sync blast analysis org1", + "name": "Synchronize an analysis", + "outputs": [ + { + "name": "results", + "type": "json" + } + ], + "position": { + "bottom": 360.3999938964844, + "height": 154.39999389648438, + "left": 858, + "right": 1058, + "top": 206, + "width": 200, + "x": 858, + "y": 206 + }, + "post_job_actions": {}, + "tool_id": "toolshed.g2.bx.psu.edu/repos/gga/tripal_analysis_sync/analysis_sync/3.2.1.0", + "tool_shed_repository": { + "changeset_revision": "f487ff676088", + "name": "tripal_analysis_sync", + "owner": "gga", + "tool_shed": "toolshed.g2.bx.psu.edu" + }, + "tool_state": "{\"analysis_id\": {\"__class__\": \"RuntimeValue\"}, \"wait_for\": {\"__class__\": \"ConnectedValue\"}, \"__page__\": null, \"__rerun_remap_job_id__\": null}", + "tool_version": "3.2.1.0", + "type": "tool", + "uuid": "99e7496d-ac32-467d-8c09-2efd48d0231a", + "workflow_outputs": [ + { + "label": "Synchronize Analysis into Tripal", + "output_name": "results", + "uuid": "9792ea7a-0b4d-4355-9ce6-db2ba1ed36e9" + } + ] + }, + "5": { + "annotation": "", + "content_id": "toolshed.g2.bx.psu.edu/repos/gga/tripal_analysis_sync/analysis_sync/3.2.1.0", + "errors": null, + "id": 5, + "input_connections": { + "wait_for": { + "id": 3, + "output_name": "results" + } + }, + "inputs": [], + "label": "sync blast analysis org2", + "name": "Synchronize an analysis", + "outputs": [ + { + "name": "results", + "type": "json" + } + ], + "position": { + "bottom": 545.3999938964844, + "height": 154.39999389648438, + "left": 899, + "right": 1099, + "top": 391, + "width": 200, + "x": 899, + "y": 391 + }, + "post_job_actions": {}, + "tool_id": "toolshed.g2.bx.psu.edu/repos/gga/tripal_analysis_sync/analysis_sync/3.2.1.0", + "tool_shed_repository": { + "changeset_revision": "f487ff676088", + "name": "tripal_analysis_sync", + "owner": "gga", + "tool_shed": "toolshed.g2.bx.psu.edu" + }, + "tool_state": "{\"analysis_id\": \"1\", \"wait_for\": {\"__class__\": \"ConnectedValue\"}, \"__page__\": null, \"__rerun_remap_job_id__\": null}", + "tool_version": "3.2.1.0", + "type": "tool", + "uuid": "99f68436-37c7-409a-bcf0-70bde40a9a8e", + "workflow_outputs": [ + { + "label": "Synchronize Analysis into Tripal", + "output_name": "results", + "uuid": "ce2c98b7-17f2-4646-a3bf-784af0103f4e" + } + ] + }, + "6": { + "annotation": "", + "content_id": "toolshed.g2.bx.psu.edu/repos/gga/tripal_db_populate_mviews/db_populate_mviews/3.2.1.0", + "errors": null, + "id": 6, + "input_connections": { + "wait_for": { + "id": 5, + "output_name": "results" + } + }, + "inputs": [], + "label": "populate mat views", + "name": "Populate materialized views", + "outputs": [ + { + "name": "results", + "type": "txt" + } + ], + "position": { + "bottom": 474.3999938964844, + "height": 154.39999389648438, + "left": 1174, + "right": 1374, + "top": 320, + "width": 200, + "x": 1174, + "y": 320 + }, + "post_job_actions": {}, + "tool_id": "toolshed.g2.bx.psu.edu/repos/gga/tripal_db_populate_mviews/db_populate_mviews/3.2.1.0", + "tool_shed_repository": { + "changeset_revision": "3c08f32a3dc1", + "name": "tripal_db_populate_mviews", + "owner": "gga", + "tool_shed": "toolshed.g2.bx.psu.edu" + }, + "tool_state": "{\"mview\": \"\", \"wait_for\": {\"__class__\": \"ConnectedValue\"}, \"__page__\": null, \"__rerun_remap_job_id__\": null}", + "tool_version": "3.2.1.0", + "type": "tool", + "uuid": "5c947dd5-89df-4146-9ab8-9e1d6de42360", + "workflow_outputs": [ + { + "label": "Populate Tripal materialized view(s)", + "output_name": "results", + "uuid": "31970b1e-e567-4994-b05e-31d3406ebdf3" + } + ] + }, + "7": { + "annotation": "", + "content_id": "toolshed.g2.bx.psu.edu/repos/gga/tripal_db_index/db_index/3.2.1.1", + "errors": null, + "id": 7, + "input_connections": { + "wait_for": { + "id": 6, + "output_name": "results" + } + }, + "inputs": [], + "label": "index tripal data", + "name": "Index Tripal data", + "outputs": [ + { + "name": "results", + "type": "txt" + } + ], + "position": { + "bottom": 454.6000061035156, + "height": 113.60000610351562, + "left": 1444, + "right": 1644, + "top": 341, + "width": 200, + "x": 1444, + "y": 341 + }, + "post_job_actions": {}, + "tool_id": "toolshed.g2.bx.psu.edu/repos/gga/tripal_db_index/db_index/3.2.1.1", + "tool_shed_repository": { + "changeset_revision": "d55a39f12dda", + "name": "tripal_db_index", + "owner": "gga", + "tool_shed": "toolshed.g2.bx.psu.edu" + }, + "tool_state": "{\"expose\": {\"do_expose\": \"no\", \"__current_case__\": 0}, \"queues\": \"10\", \"table\": {\"mode\": \"website\", \"__current_case__\": 0}, \"tokenizer\": \"standard\", \"wait_for\": {\"__class__\": \"ConnectedValue\"}, \"__page__\": null, \"__rerun_remap_job_id__\": null}", + "tool_version": "3.2.1.1", + "type": "tool", + "uuid": "5ecc30b0-07ab-4f9b-81f0-31310358c221", + "workflow_outputs": [ + { + "label": "Index Tripal data", + "output_name": "results", + "uuid": "c4398cf0-10ca-45c7-ab62-9854b7519ecb" + } + ] + } + }, + "tags": [], + "uuid": "1219fcfa-e57c-437e-ac5e-4d803e3dafd9", + "version": 5 +} \ No newline at end of file -- GitLab