diff --git a/README.md b/README.md index 4601866ff7b46ceec99b08b0bf2aee0910a18ba2..c11b3c72699c61d2b9663ac39b11f6903e1144c6 100755 --- a/README.md +++ b/README.md @@ -73,8 +73,8 @@ $ python3 /path/to/repo/gga_load_data.py input_file.yml -c/--config config_file. - Run a workflow in galaxy: ```bash -$ python3 /path/to/repo/run_workflow_phaeoexplorer.py input_file.yml -c/--config config_file.yml --workflow /path/to/workflow.ga [-v/--verbose] [OPTIONS] - --workflow $WORKFLOW (Path to the workflow to run in galaxy. A couple of preset workflows are available in the "workflows_phaeoexplorer" folder) +$ python3 /path/to/repo/run_workflow_phaeoexplorer.py input_file.yml -c/--config config_file --workflow workflow_type [-v/--verbose] [OPTIONS] + --workflow (Valid options: "chado_load_fasta_gff_jbrowse", "blast", "interpro", preset workflows are available in the "workflows_phaeoexplorer" directory) --main-directory $PATH (Path where to access stacks; default=current directory) ``` diff --git a/run_workflow_phaeoexplorer.py b/run_workflow_phaeoexplorer.py index 71fc25f7231f2ec10847664147222300ee5abc2c..bff96313c37cd1f177594630f386f03fc02cce54 100755 --- a/run_workflow_phaeoexplorer.py +++ b/run_workflow_phaeoexplorer.py @@ -71,41 +71,41 @@ class RunWorkflow(speciesData.SpeciesData): logging.debug("Library ID: %s" % self.library_id) instance_source_data_folders = self.instance.libraries.get_folders(library_id=library_id) - # Access folders via their absolute path - genome_folder = self.instance.libraries.get_folders(library_id=library_id, name="/genome/" + str(self.species_folder_name) + "/v" + str(self.genome_version)) - annotation_folder = self.instance.libraries.get_folders(library_id=library_id, name="/annotation/" + str(self.species_folder_name) + "/OGS" + str(self.ogs_version)) + # # Access folders via their absolute path + # genome_folder = self.instance.libraries.get_folders(library_id=library_id, name="/genome/" + str(self.species_folder_name) + "/v" + str(self.genome_version)) + # annotation_folder = self.instance.libraries.get_folders(library_id=library_id, name="/annotation/" + str(self.species_folder_name) + "/OGS" + str(self.ogs_version)) - # Get their IDs - genome_folder_id = genome_folder[0]["id"] - annotation_folder_id = annotation_folder[0]["id"] + # # Get their IDs + # genome_folder_id = genome_folder[0]["id"] + # annotation_folder_id = annotation_folder[0]["id"] - # Get the content of the folders - genome_folder_content = self.instance.folders.show_folder(folder_id=genome_folder_id, contents=True) - annotation_folder_content = self.instance.folders.show_folder(folder_id=annotation_folder_id, contents=True) + # # Get the content of the folders + # genome_folder_content = self.instance.folders.show_folder(folder_id=genome_folder_id, contents=True) + # annotation_folder_content = self.instance.folders.show_folder(folder_id=annotation_folder_id, contents=True) - # Find genome folder datasets - genome_fasta_ldda_id = genome_folder_content["folder_contents"][0]["ldda_id"] + # # Find genome folder datasets + # genome_fasta_ldda_id = genome_folder_content["folder_contents"][0]["ldda_id"] - annotation_gff_ldda_id, annotation_proteins_ldda_id, annotation_transcripts_ldda_id = None, None, None + # annotation_gff_ldda_id, annotation_proteins_ldda_id, annotation_transcripts_ldda_id = None, None, None - # Several dicts in the annotation folder content (one dict = one file) - for k, v in annotation_folder_content.items(): - if k == "folder_contents": - for d in v: - if "proteins" in d["name"]: - annotation_proteins_ldda_id = d["ldda_id"] - if "transcripts" in d["name"]: - annotation_transcripts_ldda_id = d["ldda_id"] - if ".gff" in d["name"]: - annotation_gff_ldda_id = d["ldda_id"] + # # Several dicts in the annotation folder content (one dict = one file) + # for k, v in annotation_folder_content.items(): + # if k == "folder_contents": + # for d in v: + # if "proteins" in d["name"]: + # annotation_proteins_ldda_id = d["ldda_id"] + # if "transcripts" in d["name"]: + # annotation_transcripts_ldda_id = d["ldda_id"] + # if ".gff" in d["name"]: + # annotation_gff_ldda_id = d["ldda_id"] - # Minimum datasets to populate tripal views --> will not work if these files are not assigned in the input file - self.datasets["genome_file"] = genome_fasta_ldda_id - self.datasets["gff_file"] = annotation_gff_ldda_id - self.datasets["proteins_file"] = annotation_proteins_ldda_id - self.datasets["transcripts_file"] = annotation_transcripts_ldda_id + # # Minimum datasets to populate tripal views --> will not work if these files are not assigned in the input file + # self.datasets["genome_file"] = genome_fasta_ldda_id + # self.datasets["gff_file"] = annotation_gff_ldda_id + # self.datasets["proteins_file"] = annotation_proteins_ldda_id + # self.datasets["transcripts_file"] = annotation_transcripts_ldda_id - return {"history_id": self.history_id, "library_id": library_id, "datasets": self.datasets} + return {"history_id": self.history_id, "library_id": library_id} def connect_to_instance(self): @@ -115,7 +115,7 @@ class RunWorkflow(speciesData.SpeciesData): """ - logging.debug("Connecting to the galaxy instance (%s)" % self.instance_url) + # logging.debug("Connecting to the galaxy instance (%s)" % self.instance_url) self.instance = galaxy.GalaxyInstance(url=self.instance_url, email=self.config["galaxy_default_admin_email"], password=self.config["galaxy_default_admin_password"] @@ -125,10 +125,10 @@ class RunWorkflow(speciesData.SpeciesData): try: self.instance.histories.get_histories() except bioblend.ConnectionError: - logging.critical("Cannot connect to galaxy instance (%s) " % self.instance_url) + logging.critical("Cannot connect to galaxy instance (%s)" % self.instance_url) sys.exit() else: - logging.debug("Successfully connected to galaxy instance (%s) " % self.instance_url) + # logging.debug("Successfully connected to galaxy instance (%s) " % self.instance_url) return 1 @@ -357,6 +357,107 @@ class RunWorkflow(speciesData.SpeciesData): return({"org_id": org_id, "genome_analysis_id": genome_analysis_id, "ogs_analysis_id": ogs_analysis_id}) + def add_organism_blastp_analysis(self): + """ + Add OGS and genome vX analyses to Chado database + Required for Chado Load Tripal Synchronize workflow (which should be ran as the first workflow) + Called outside workflow for practical reasons (Chado add doesn't have an input link for analysis or organism) + + :return: + + """ + + self.connect_to_instance() + self.set_get_history() + + tool_version = "2.3.4+galaxy0" + + get_organism_tool = self.instance.tools.show_tool("toolshed.g2.bx.psu.edu/repos/gga/chado_organism_get_organisms/organism_get_organisms/2.3.4+galaxy0") + + get_organisms = self.instance.tools.run_tool( + tool_id="toolshed.g2.bx.psu.edu/repos/gga/chado_organism_get_organisms/organism_get_organisms/%s" % tool_version, + history_id=self.history_id, + tool_inputs={}) + + time.sleep(10) # Ensure the tool has had time to complete + org_outputs = get_organisms["outputs"] # Outputs from the get_organism tool + org_job_out_id = org_outputs[0]["id"] # ID of the get_organism output dataset (list of dicts) + org_json_output = self.instance.datasets.download_dataset(dataset_id=org_job_out_id) # Download the dataset + org_output = json.loads(org_json_output) # Turn the dataset into a list for parsing + + org_id = None + + # Look up list of outputs (dictionaries) + for organism_output_dict in org_output: + if organism_output_dict["genus"] == self.genus and organism_output_dict["species"] == "{0} {1}".format(self.species, self.sex): + correct_organism_id = str(organism_output_dict["organism_id"]) # id needs to be a str to be recognized by chado tools + org_id = str(correct_organism_id) + + + if org_id is None: + if self.common == "" or self.common is None: + add_org_job = self.instance.tools.run_tool( + tool_id="toolshed.g2.bx.psu.edu/repos/gga/chado_organism_add_organism/organism_add_organism/%s" % tool_version, + history_id=self.history_id, + tool_inputs={"abbr": self.abbreviation, + "genus": self.genus_uppercase, + "species": self.chado_species_name, + "common": self.abbreviation}) + org_job_out_id = add_org_job["outputs"][0]["id"] + org_json_output = self.instance.datasets.download_dataset(dataset_id=org_job_out_id) + org_output = json.loads(org_json_output) + org_id = str(org_output["organism_id"]) # id needs to be a str to be recognized by chado tools + else: + add_org_job = self.instance.tools.run_tool( + tool_id="toolshed.g2.bx.psu.edu/repos/gga/chado_organism_add_organism/organism_add_organism/%s" % tool_version, + history_id=self.history_id, + tool_inputs={"abbr": self.abbreviation, + "genus": self.genus_uppercase, + "species": self.chado_species_name, + "common": self.common}) + org_job_out_id = add_org_job["outputs"][0]["id"] + org_json_output = self.instance.datasets.download_dataset(dataset_id=org_job_out_id) + org_output = json.loads(org_json_output) + org_id = str(org_output["organism_id"]) # id needs to be a str to be recognized by chado tools + + + get_analyses = self.instance.tools.run_tool( + tool_id="toolshed.g2.bx.psu.edu/repos/gga/chado_analysis_get_analyses/analysis_get_analyses/%s" % tool_version, + history_id=self.history_id, + tool_inputs={}) + + time.sleep(10) + analysis_outputs = get_analyses["outputs"] + analysis_job_out_id = analysis_outputs[0]["id"] + analysis_json_output = self.instance.datasets.download_dataset(dataset_id=analysis_job_out_id) + analysis_output = json.loads(analysis_json_output) + + blastp_analysis_id = None + + # Look up list of outputs (dictionaries) + for analysis_output_dict in analysis_output: + if analysis_output_dict["name"] == "Diamond on " + self.full_name_lowercase + " OGS" + self.ogs_version: + blastp_analysis_id = str(analysis_output_dict["analysis_id"]) + + + if blastp_analysis_id is None: + add_blast_analysis_job = self.instance.tools.run_tool( + tool_id="toolshed.g2.bx.psu.edu/repos/gga/chado_analysis_add_analysis/analysis_add_analysis/%s" % tool_version, + history_id=self.history_id, + tool_inputs={"name": "Diamond on " + self.full_name_lowercase + " OGS" + self.ogs_version, + "program": "Performed by Genoscope", + "programversion": str(self.sex + " OGS" + self.ogs_version), + "sourcename": "Genoscope", + "date_executed": self.date}) + analysis_outputs = add_blast_analysis_job["outputs"] + analysis_job_out_id = analysis_outputs[0]["id"] + analysis_json_output = self.instance.datasets.download_dataset(dataset_id=analysis_job_out_id) + analysis_output = json.loads(analysis_json_output) + blastp_analysis_id = str(analysis_output["analysis_id"]) + + # print({"org_id": org_id, "genome_analysis_id": genome_analysis_id, "ogs_analysis_id": ogs_analysis_id}) + return({"org_id": org_id, "blastp_analysis_id": blastp_analysis_id}) + def add_interproscan_analysis(self): """ """ @@ -393,43 +494,6 @@ class RunWorkflow(speciesData.SpeciesData): return self.interpro_analysis_id - def add_blastp_diamond_analysis(self): - """ - - """ - # Add Blastp (diamond) analysis to chado - logging.info("Adding Blastp Diamond analysis to the instance's chado database") - self.instance.tools.run_tool( - tool_id="toolshed.g2.bx.psu.edu/repos/gga/chado_analysis_add_analysis/analysis_add_analysis/2.3.3", - history_id=self.history_id, - tool_inputs={"name": "Diamond on OGS%s" % self.ogs_version, - "program": "Diamond", - "programversion": "OGS%s" % self.ogs_version, - "sourcename": "Genoscope", - "date_executed": self.date}) - - - def get_blastp_diamond_analysis_id(self): - """ - """ - - # Get blasp ID - blast_diamond_analysis = self.instance.tools.run_tool( - tool_id="toolshed.g2.bx.psu.edu/repos/gga/chado_analysis_get_analyses/analysis_get_analyses/2.3.3", - history_id=self.history_id, - tool_inputs={"name": "Diamond on OGS%s" % self.ogs_version}) - blast_diamond_analysis_job_out = blast_diamond_analysis["outputs"][0]["id"] - blast_diamond_analysis_json_output = self.instance.datasets.download_dataset(dataset_id=blast_diamond_analysis_job_out) - try: - blast_diamond_analysis_output = json.loads(blast_diamond_analysis_json_output)[0] - self.blast_diamond_analysis_id = str(blast_diamond_analysis_output["analysis_id"]) - except IndexError as exc: - logging.critical("No matching InterproScan analysis exists in the instance's chado database") - sys.exit(exc) - - return self.blast_diamond_analysis_id - - def run_workflow(self, workflow_path, workflow_parameters, workflow_name, datamap): """ Run a workflow in galaxy @@ -547,7 +611,6 @@ class RunWorkflow(speciesData.SpeciesData): for e in v2: if type(e) == dict: if "transcripts" in e["name"]: - # the attributes datasets is set in the function get_instance_attributes() self.datasets["transcripts_file"] = e["ldda_id"] self.datasets_name["transcripts_file"] = e["name"] elif "proteins" in e["name"]: @@ -560,12 +623,12 @@ class RunWorkflow(speciesData.SpeciesData): self.datasets["interproscan_file"] = e["ldda_id"] self.datasets_name["interproscan_file"] = e["name"] elif "blastp" in e["name"]: - self.datasets["blast_diamond_file"] = e["ldda_id"] - self.datasets_name["blast_diamond_file"] = e["name"] + self.datasets["blastp_file"] = e["ldda_id"] + self.datasets_name["blastp_file"] = e["name"] history_datasets_li = self.instance.datasets.get_datasets() - genome_hda_id, gff_hda_id, transcripts_hda_id, proteins_hda_id, blast_diamond_hda_id, interproscan_hda_id = None, None, None, None, None, None + genome_hda_id, gff_hda_id, transcripts_hda_id, proteins_hda_id, blastp_hda_id, interproscan_hda_id = None, None, None, None, None, None # Finding datasets in history (matching datasets names) for dataset in history_datasets_li: @@ -573,14 +636,14 @@ class RunWorkflow(speciesData.SpeciesData): dataset_id = dataset["id"] if dataset_name == "{0}_v{1}.fasta".format(self.dataset_prefix, self.genome_version): genome_hda_id = dataset_id - if dataset_name == "{0}_OGS{1}.gff".format(self.dataset_prefix, self.ogs_version): + if dataset_name == "{0}_OGS{1}_{2}.gff".format(self.dataset_prefix, self.ogs_version, self.date): gff_hda_id = dataset_id if dataset_name == "{0}_OGS{1}_transcripts.fasta".format(self.dataset_prefix, self.ogs_version): transcripts_hda_id = dataset_id if dataset_name == "{0}_OGS{1}_proteins.fasta".format(self.dataset_prefix, self.ogs_version): proteins_hda_id = dataset_id - if dataset_name == "{0}_OGS{1}_blastx.xml".format(self.dataset_prefix, self.ogs_version): - blast_diamond_hda_id = dataset_id + if dataset_name == "{0}_OGS{1}_blastp.xml".format(self.dataset_prefix, self.ogs_version): + blastp_hda_id = dataset_id # Import each dataset into history if it is not imported @@ -604,27 +667,27 @@ class RunWorkflow(speciesData.SpeciesData): interproscan_hda_id = interproscan_dataset_upload["id"] except Exception as exc: logging.debug("Interproscan file not found in library (history: {0})".format(self.history_id)) - if blast_diamond_hda_id is None: + if blastp_hda_id is None: try: - blast_diamond_dataset_upload = self.instance.histories.upload_dataset_from_library(history_id=self.history_id, lib_dataset_id=self.datasets["blast_diamond_file"]) - blast_diamond_hda_id = blast_diamond_upload["id"] + blastp_dataset_upload = self.instance.histories.upload_dataset_from_library(history_id=self.history_id, lib_dataset_id=self.datasets["blastp_file"]) + blastp_hda_id = blastp_dataset_upload["id"] except Exception as exc: - logging.debug("Blastp file not found in library (history: {0})".format(self.history_id)) + logging.debug("blastp file not found in library (history: {0})".format(self.history_id)) - logging.debug("History dataset IDs (hda_id) for %s:" % self.full_name) - logging.debug({"genome_hda_id": genome_hda_id, - "gff_hda_id": gff_hda_id, - "transcripts_hda_id": transcripts_hda_id, - "proteins_hda_id": proteins_hda_id, - "blast_diamond_hda_id": blast_diamond_hda_id, - "interproscan_hda_id": interproscan_hda_id}) + # logging.debug("History dataset IDs (hda_id) for %s:" % self.full_name) + # logging.debug({"genome_hda_id": genome_hda_id, + # "gff_hda_id": gff_hda_id, + # "transcripts_hda_id": transcripts_hda_id, + # "proteins_hda_id": proteins_hda_id, + # "blastp_hda_id": blastp_hda_id, + # "interproscan_hda_id": interproscan_hda_id}) # Return a dict made of the hda ids return {"genome_hda_id": genome_hda_id, "gff_hda_id": gff_hda_id, "transcripts_hda_id": transcripts_hda_id, "proteins_hda_id": proteins_hda_id, - "blast_diamond_hda_id": blast_diamond_hda_id, + "blastp_hda_id": blastp_hda_id, "interproscan_hda_id": interproscan_hda_id} @@ -662,17 +725,16 @@ class RunWorkflow(speciesData.SpeciesData): if "interproscan_file" in self.datasets_name.keys(): if dataset_dict["name"] == self.datasets_name["interproscan_file"] and dataset_dict["id"] not in imported_datasets_ids: interproscan_dataset_hda_id = dataset_dict["id"] - if "blast_diamond_file" in self.datasets_name.keys(): - if dataset_dict["name"] == self.datasets_name["blast_diamond_file"] and dataset_dict["id"] not in imported_datasets_ids: - blast_diamond_dataset_hda_id = dataset_dict["id"] + if dataset_dict["name"] == self.datasets_name["blastp_file"] and dataset_dict["id"] not in imported_datasets_ids: + blastp_dataset_hda_id = dataset_dict["id"] logging.debug("Genome dataset hda id: %s" % genome_dataset_hda_id) logging.debug("Proteins dataset hda ID: %s" % proteins_datasets_hda_id) logging.debug("Transcripts dataset hda ID: %s" % transcripts_dataset_hda_id) logging.debug("GFF dataset hda ID: %s" % gff_dataset_hda_id) logging.debug("InterproScan dataset hda ID: %s" % gff_dataset_hda_id) - logging.debug("Blast Diamond dataset hda ID: %s" % gff_dataset_hda_id) + logging.debug("Blastp Diamond dataset hda ID: %s" % blastp_dataset_hda_id) # Add datasets IDs to already imported IDs (so we don't assign all the wrong IDs to the next organism if there is one) imported_datasets_ids.append(genome_dataset_hda_id) @@ -680,13 +742,13 @@ class RunWorkflow(speciesData.SpeciesData): imported_datasets_ids.append(proteins_datasets_hda_id) imported_datasets_ids.append(gff_dataset_hda_id) imported_datasets_ids.append(interproscan_dataset_hda_id) - imported_datasets_ids.append(blast_diamond_dataset_hda_id) + imported_datasets_ids.append(blastp_dataset_hda_id) # Return a dict made of the hda ids return {"genome_hda_id": genome_dataset_hda_id, "transcripts_hda_id": transcripts_dataset_hda_id, "proteins_hda_id": proteins_datasets_hda_id, "gff_hda_id": gff_dataset_hda_id, "interproscan_hda_id": interproscan_dataset_hda_id, - "blast_diamond_hda_id": blast_diamond_dataset_hda_id, + "blastp_hda_id": blastp_dataset_hda_id, "imported_datasets_ids": imported_datasets_ids} @@ -715,7 +777,7 @@ def run_workflow(workflow_path, workflow_parameters, datamap, config, input_spec if "jbrowse_menu_url" not in config.keys(): jbrowse_menu_url = "https://{hostname}/sp/{genus_sp}/feature/{Genus}/{species}/mRNA/{id}".format(hostname=self.config["hostname"], genus_sp=self.genus_species, Genus=self.genus_uppercase, species=self.species, id="{id}") else: - jbrowse_menu_url = config["menu_url"] + jbrowse_menu_url = config["jbrowse_menu_url"] if workflow_name == "Jbrowse": workflow_dict["steps"]["2"]["tool_state"] = workflow_dict["steps"]["2"]["tool_state"].replace("__MENU_URL__", jbrowse_menu_url) # The UNIQUE_ID is specific to a combination genus_species_strain_sex so every combination should have its unique workflow @@ -748,7 +810,7 @@ def run_workflow(workflow_path, workflow_parameters, datamap, config, input_spec -def create_sp_workflow_dict(sp_dict, main_dir, config): +def create_sp_workflow_dict(sp_dict, main_dir, config, workflow_type): """ """ @@ -778,26 +840,72 @@ def create_sp_workflow_dict(sp_dict, main_dir, config): run_workflow_for_current_organism.species) + if workflow_type == "load_fasta_gff_jbrowse": + run_workflow_for_current_organism.connect_to_instance() + + history_id = run_workflow_for_current_organism.set_get_history() + + run_workflow_for_current_organism.install_changesets_revisions_for_individual_tools() + ids = run_workflow_for_current_organism.add_organism_ogs_genome_analyses() + + org_id = None + genome_analysis_id = None + ogs_analysis_id = None + org_id = ids["org_id"] + genome_analysis_id = ids["genome_analysis_id"] + ogs_analysis_id = ids["ogs_analysis_id"] + instance_attributes = run_workflow_for_current_organism.get_instance_attributes() + hda_ids = run_workflow_for_current_organism.import_datasets_into_history() + + strain_sex = "{0}_{1}".format(run_workflow_for_current_organism.strain, run_workflow_for_current_organism.sex) + genus_species = run_workflow_for_current_organism.genus_species + + # Create the dictionary holding all attributes needed to connect to the galaxy instance + attributes = {"genus": run_workflow_for_current_organism.genus, + "species": run_workflow_for_current_organism.species, + "genus_species": run_workflow_for_current_organism.genus_species, + "full_name": run_workflow_for_current_organism.full_name, + "species_folder_name": run_workflow_for_current_organism.species_folder_name, + "sex": run_workflow_for_current_organism.sex, + "strain": run_workflow_for_current_organism.strain, + "org_id": org_id, + "genome_analysis_id": genome_analysis_id, + "ogs_analysis_id": ogs_analysis_id, + "instance_attributes": instance_attributes, + "hda_ids": hda_ids, + "history_id": history_id, + "instance": run_workflow_for_current_organism.instance, + "instance_url": run_workflow_for_current_organism.instance_url, + "email": config["galaxy_default_admin_email"], + "password": config["galaxy_default_admin_password"]} + + sp_workflow_dict[genus_species] = {strain_sex: attributes} + + else: + logging.critical("The galaxy container for %s is not ready yet!" % run_workflow_for_current_organism.full_name) + sys.exit() + + return sp_workflow_dict + + if workflow_type == "blast": run_workflow_for_current_organism.connect_to_instance() history_id = run_workflow_for_current_organism.set_get_history() run_workflow_for_current_organism.install_changesets_revisions_for_individual_tools() - ids = run_workflow_for_current_organism.add_organism_ogs_genome_analyses() + ids = run_workflow_for_current_organism.add_organism_blastp_analysis() org_id = None - genome_analysis_id = None - ogs_analysis_id = None org_id = ids["org_id"] - genome_analysis_id = ids["genome_analysis_id"] - ogs_analysis_id = ids["ogs_analysis_id"] + blastp_analysis_id = None + blastp_analysis_id = ids["blastp_analysis_id"] instance_attributes = run_workflow_for_current_organism.get_instance_attributes() hda_ids = run_workflow_for_current_organism.import_datasets_into_history() strain_sex = "{0}_{1}".format(run_workflow_for_current_organism.strain, run_workflow_for_current_organism.sex) genus_species = run_workflow_for_current_organism.genus_species - # Create the dictionary holding all attributes needed to connect to the galaxy instance + # Create the dictionary holding all attributes needed to connect to the galaxy instance attributes = {"genus": run_workflow_for_current_organism.genus, "species": run_workflow_for_current_organism.species, "genus_species": run_workflow_for_current_organism.genus_species, @@ -806,8 +914,7 @@ def create_sp_workflow_dict(sp_dict, main_dir, config): "sex": run_workflow_for_current_organism.sex, "strain": run_workflow_for_current_organism.strain, "org_id": org_id, - "genome_analysis_id": genome_analysis_id, - "ogs_analysis_id": ogs_analysis_id, + "blastp_analysis_id": blastp_analysis_id, "instance_attributes": instance_attributes, "hda_ids": hda_ids, "history_id": history_id, @@ -823,8 +930,6 @@ def create_sp_workflow_dict(sp_dict, main_dir, config): sys.exit() - return sp_workflow_dict - def install_changesets_revisions_from_workflow(instance, workflow_path): """ @@ -897,7 +1002,7 @@ if __name__ == "__main__": parser.add_argument("--workflow", "-w", type=str, - help="Worfklow to run") + help="Worfklow to run. Available options: load_fasta_gff_jbrowse, blast, interpro") args = parser.parse_args() @@ -921,44 +1026,455 @@ if __name__ == "__main__": sp_dict_list = utilities.parse_input(args.input) - # # Checking if user specified a workflow to run - # if not args.workflow: - # logging.critical("No workflow specified, exiting") - # sys.exit() - # else: - # workflow = os.path.abspath(args.workflow) + workflow_valid_types = ["load_fasta_gff_jbrowse", "blast", "interpro"] + + workflow_type = None + # Checking if user specified a workflow to run + if not args.workflow: + logging.critical("No workflow type specified, exiting") + sys.exit() + elif args.workflow in workflow_valid_types: + workflow_type = args.workflow + logging.info("Workflow type set to %s" % workflow_type) script_dir = os.path.dirname(os.path.realpath(sys.argv[0])) config = utilities.parse_config(args.config) all_sp_workflow_dict = {} - for sp_dict in sp_dict_list: - # Add and retrieve all analyses/organisms for the current input species and add their IDs to the input dictionary - current_sp_workflow_dict = create_sp_workflow_dict(sp_dict, main_dir=args.main_directory, config=config) + if workflow_type == "load_fasta_gff_jbrowse": + for sp_dict in sp_dict_list: - current_sp_key = list(current_sp_workflow_dict.keys())[0] - current_sp_value = list(current_sp_workflow_dict.values())[0] - current_sp_strain_sex_key = list(current_sp_value.keys())[0] - current_sp_strain_sex_value = list(current_sp_value.values())[0] + # Add and retrieve all analyses/organisms for the current input species and add their IDs to the input dictionary + current_sp_workflow_dict = create_sp_workflow_dict(sp_dict, main_dir=args.main_directory, config=config, workflow_type="load_fasta_gff_jbrowse") - # Add the species dictionary to the complete dictionary - # This dictionary contains every organism present in the input file - # Its structure is the following: - # {genus species: {strain1_sex1: {variables_key: variables_values}, strain1_sex2: {variables_key: variables_values}}} - if not current_sp_key in all_sp_workflow_dict.keys(): - all_sp_workflow_dict[current_sp_key] = current_sp_value - else: - all_sp_workflow_dict[current_sp_key][current_sp_strain_sex_key] = current_sp_strain_sex_value + current_sp_key = list(current_sp_workflow_dict.keys())[0] + current_sp_value = list(current_sp_workflow_dict.values())[0] + current_sp_strain_sex_key = list(current_sp_value.keys())[0] + current_sp_strain_sex_value = list(current_sp_value.values())[0] + # Add the species dictionary to the complete dictionary + # This dictionary contains every organism present in the input file + # Its structure is the following: + # {genus species: {strain1_sex1: {variables_key: variables_values}, strain1_sex2: {variables_key: variables_values}}} + if not current_sp_key in all_sp_workflow_dict.keys(): + all_sp_workflow_dict[current_sp_key] = current_sp_value + else: + all_sp_workflow_dict[current_sp_key][current_sp_strain_sex_key] = current_sp_strain_sex_value + + + for k, v in all_sp_workflow_dict.items(): + if len(list(v.keys())) == 1: + logging.info("Input organism %s: 1 species detected in input dictionary" % k) + + # Set workflow path (1 organism) + workflow_path = os.path.join(os.path.abspath(script_dir), "workflows_phaeoexplorer/Galaxy-Workflow-chado_load_tripal_synchronize_jbrowse_1org_v4.ga") + + # Instance object required variables + instance_url, email, password = None, None, None + + # Set the galaxy instance variables + for k2, v2 in v.items(): + instance_url = v2["instance_url"] + email = v2["email"] + password = v2["password"] + + instance = galaxy.GalaxyInstance(url=instance_url, email=email, password=password) + + # Check if the versions of tools specified in the workflow are installed in galaxy + install_changesets_revisions_from_workflow(workflow_path=workflow_path, instance=instance) + + organism_key_name = list(v.keys()) + org_dict = v[organisms_key_names[0]] + + history_id = org_dict["history_id"] + + # Organism 1 attributes + org_genus = org_dict["genus"] + org_species = org_dict["species"] + org_genus_species = org_dict["genus_species"] + org_species_folder_name = org_dict["species_folder_name"] + org_full_name = org_dict["full_name"] + org_strain = org_dict["sex"] + org_sex = org_dict["strain"] + org_org_id = org_dict["org_id"] + org_genome_analysis_id = org_dict["genome_analysis_id"] + org_ogs_analysis_id = org_dict["ogs_analysis_id"] + org_genome_hda_id = org_dict["hda_ids"]["genome_hda_id"] + org_transcripts_hda_id = org_dict["hda_ids"]["transcripts_hda_id"] + org_proteins_hda_id = org_dict["hda_ids"]["proteins_hda_id"] + org_gff_hda_id = org_dict["hda_ids"]["gff_hda_id"] + + # Store these values into a dict for parameters logging/validation + org_parameters_dict = { + "org_genus": org_genus, + "org_species": org_species, + "org_genus_species": org_genus_species, + "org_species_folder_name": org_species_folder_name, + "org_full_name": org_full_name, + "org_strain": org_strain, + "org_sex": org_sex, + "org_org_id": org_org_id, + "org_genome_analysis_id": org_genome_analysis_id, + "org_ogs_analysis_id": org_ogs_analysis_id, + "org_genome_hda_id": org_genome_hda_id, + "org_transcripts_hda_id": org_transcripts_hda_id, + "org_proteins_hda_id": org_proteins_hda_id, + "org_gff_hda_id": org_gff_hda_id, + } + + # Look for empty parameters values, throw a critical error if a parameter value is invalid + for param_name, param_value in org_parameters_dict.items(): + if param_value is None or param_value == "": + logging.critical("Empty parameter value found for organism {0} (parameter: {1}, parameter value: {2})".format(org_full_name, param_name, param_value)) + sys.exit() + + # Set the workflow parameters (individual tools runtime parameters in the workflow) + workflow_parameters = {} + + GENOME_FASTA_FILE_ORG = "0" + GFF_FILE_ORG = "1" + PROTEINS_FASTA_FILE_ORG = "2" + LOAD_FASTA_ORG = "3" + JBROWSE_ORG = "4" + LOAD_GFF_ORG = "5" + JBROWSE_CONTAINER = "6" + SYNC_FEATURES_ORG = "7" + POPULATE_MAT_VIEWS = "8" + INDEX_TRIPAL_DATA = "9" + + # Input files have no parameters (they are set via assigning the hda IDs in the datamap parameter of the bioblend method) + workflow_parameters[GENOME_FASTA_FILE_ORG] = {} + workflow_parameters[GFF_FILE_ORG] = {} + workflow_parameters[PROTEINS_FASTA_FILE_ORG] = {} + workflow_parameters[LOAD_FASTA_ORG] = {"organism": org_org_id, + "analysis_id": org_genome_analysis_id, + "do_update": "true"} + workflow_parameters[JBROWSE_ORG] = {} + workflow_parameters[LOAD_GFF_ORG] = {"organism": org_org_id, "analysis_id": org_ogs_analysis_id} + workflow_parameters[SYNC_FEATURES_ORG] = {"organism_id": org_org_id} + # POPULATE + INDEX DATA + workflow_parameters[POPULATE_MAT_VIEWS] = {} + workflow_parameters[INDEX_TRIPAL_DATA] = {} + + # Set datamap (mapping of input files in the workflow) + datamap = {} + + datamap[GENOME_FASTA_FILE_ORG] = {"src": "hda", "id": org_genome_hda_id} + datamap[GFF_FILE_ORG] = {"src": "hda", "id": org_gff_hda_id} + datamap[PROTEINS_FASTA_FILE_ORG] = {"src": "hda", "id": org_proteins_hda_id} + + + with open(workflow_path, 'r') as ga_in_file: + + # Store the decoded json dictionary + workflow_dict = json.load(ga_in_file) + workflow_name = workflow_dict["name"] + + # For the Jbrowse tool, we unfortunately have to manually edit the parameters instead of setting them + # as runtime values, using runtime parameters makes the tool throw an internal critical error ("replace not found" error) + # Scratchgmod test: need "http" (or "https"), the hostname (+ port) + jbrowse_menu_url_org = "https://{hostname}/sp/{genus_sp}/feature/{Genus}/{species}/mRNA/{id}".format(hostname=config["hostname"], genus_sp=org_genus_species, Genus=org_genus[0].upper() + org_genus[1:], species=org_species, id="{id}") + if "jbrowse_menu_url" not in config.keys(): + jbrowse_menu_url_org = "https://{hostname}/sp/{genus_sp}/feature/{Genus}/{species}/mRNA/{id}".format(hostname=config["hostname"], genus_sp=org_genus_species, Genus=org_genus[0].upper() + org_genus[1:], species=org_species, id="{id}") + else: + jbrowse_menu_url_org = config["jbrowse_menu_url"] + "/sp/{genus_sp}/feature/{Genus}/{species}/mRNA/{id}".format(genus_sp=org_genus_species, Genus=org_genus[0].upper() + org_genus[1:], species=org_species, id="{id}") + + # show_tool_add_organism = instance.tools.show_tool(tool_id="toolshed.g2.bx.psu.edu/repos/gga/chado_organism_add_organism/organism_add_organism/2.3.4+galaxy0", io_details=True) + # print(show_tool_add_organism) + # show_jbrowse_tool = instance.tools.show_tool(tool_id="toolshed.g2.bx.psu.edu/repos/iuc/jbrowse/jbrowse/1.16.11+galaxy0", io_details=True) + # print(show_jbrowse_tool) + # show_jbrowse_container_tool = instance.tools.show_tool(tool_id="toolshed.g2.bx.psu.edu/repos/gga/jbrowse_to_container/jbrowse_to_container/0.5.1", io_details=True) + # print(show_jbrowse_container_tool) + + # Replace values in the workflow dictionary + workflow_dict["steps"]["4"]["tool_state"] = workflow_dict["steps"]["4"]["tool_state"].replace("__MENU_URL_ORG__", jbrowse_menu_url_org) + workflow_dict["steps"]["6"]["tool_state"] = workflow_dict["steps"]["6"]["tool_state"].replace("__DISPLAY_NAME_ORG__", org_full_name).replace("__UNIQUE_ID_ORG__", org_species_folder_name) + + # Import the workflow in galaxy as a dict + instance.workflows.import_workflow_dict(workflow_dict=workflow_dict) + + # Get its attributes + workflow_attributes = instance.workflows.get_workflows(name=workflow_name) + # Then get its ID (required to invoke the workflow) + workflow_id = workflow_attributes[0]["id"] # Index 0 is the most recently imported workflow (the one we want) + show_workflow = instance.workflows.show_workflow(workflow_id=workflow_id) + # Check if the workflow is found + try: + logging.debug("Workflow ID: %s" % workflow_id) + except bioblend.ConnectionError: + logging.warning("Error finding workflow %s" % workflow_name) + + # Finally, invoke the workflow alogn with its datamap, parameters and the history in which to invoke it + instance.workflows.invoke_workflow(workflow_id=workflow_id, history_id=history_id, params=workflow_parameters, inputs=datamap, allow_tool_state_corrections=True) + + logging.info("Successfully imported and invoked workflow {0}, check the galaxy instance ({1}) for the jobs state".format(workflow_name, instance_url)) + + + if len(list(v.keys())) == 2: + + logging.info("Input organism %s: 2 species detected in input dictionary" % k) + + # Set workflow path (2 organisms) + workflow_path = os.path.join(os.path.abspath(script_dir), "workflows_phaeoexplorer/Galaxy-Workflow-chado_load_tripal_synchronize_jbrowse_2org_v4.ga") + + # Instance object required variables + instance_url, email, password = None, None, None + + # Set the galaxy instance variables + for k2, v2 in v.items(): + instance_url = v2["instance_url"] + email = v2["email"] + password = v2["password"] + + instance = galaxy.GalaxyInstance(url=instance_url, email=email, password=password) + + # Check if the versions of tools specified in the workflow are installed in galaxy + install_changesets_revisions_from_workflow(workflow_path=workflow_path, instance=instance) + + # Get key names from the current organism (item 1 = organism 1, item 2 = organism 2) + organisms_key_names = list(v.keys()) + org1_dict = v[organisms_key_names[0]] + org2_dict = v[organisms_key_names[1]] + + history_id = org1_dict["history_id"] + + # Organism 1 attributes + org1_genus = org1_dict["genus"] + org1_species = org1_dict["species"] + org1_genus_species = org1_dict["genus_species"] + org1_species_folder_name = org1_dict["species_folder_name"] + org1_full_name = org1_dict["full_name"] + org1_strain = org1_dict["sex"] + org1_sex = org1_dict["strain"] + org1_org_id = org1_dict["org_id"] + org1_genome_analysis_id = org1_dict["genome_analysis_id"] + org1_ogs_analysis_id = org1_dict["ogs_analysis_id"] + org1_genome_hda_id = org1_dict["hda_ids"]["genome_hda_id"] + org1_transcripts_hda_id = org1_dict["hda_ids"]["transcripts_hda_id"] + org1_proteins_hda_id = org1_dict["hda_ids"]["proteins_hda_id"] + org1_gff_hda_id = org1_dict["hda_ids"]["gff_hda_id"] + + # Store these values into a dict for parameters logging/validation + org1_parameters_dict = { + "org1_genus": org1_genus, + "org1_species": org1_species, + "org1_genus_species": org1_genus_species, + "org1_species_folder_name": org1_species_folder_name, + "org1_full_name": org1_full_name, + "org1_strain": org1_strain, + "org1_sex": org1_sex, + "org1_org_id": org1_org_id, + "org1_genome_analysis_id": org1_genome_analysis_id, + "org1_ogs_analysis_id": org1_ogs_analysis_id, + "org1_genome_hda_id": org1_genome_hda_id, + "org1_transcripts_hda_id": org1_transcripts_hda_id, + "org1_proteins_hda_id": org1_proteins_hda_id, + "org1_gff_hda_id": org1_gff_hda_id, + } + + # Look for empty parameters values, throw a critical error if a parameter value is invalid + for param_name, param_value in org1_parameters_dict.items(): + if param_value is None or param_value == "": + logging.critical("Empty parameter value found for organism {0} (parameter: {1}, parameter value: {2})".format(org1_full_name, param_name, param_value)) + sys.exit() + + # Organism 2 attributes + org2_genus = org2_dict["genus"] + org2_species = org2_dict["species"] + org2_genus_species = org2_dict["genus_species"] + org2_species_folder_name = org2_dict["species_folder_name"] + org2_full_name = org2_dict["full_name"] + org2_strain = org2_dict["sex"] + org2_sex = org2_dict["strain"] + org2_org_id = org2_dict["org_id"] + org2_genome_analysis_id = org2_dict["genome_analysis_id"] + org2_ogs_analysis_id = org2_dict["ogs_analysis_id"] + org2_genome_hda_id = org2_dict["hda_ids"]["genome_hda_id"] + org2_transcripts_hda_id = org2_dict["hda_ids"]["transcripts_hda_id"] + org2_proteins_hda_id = org2_dict["hda_ids"]["proteins_hda_id"] + org2_gff_hda_id = org2_dict["hda_ids"]["gff_hda_id"] + + # Store these values into a dict for parameters logging/validation + org2_parameters_dict = { + "org2_genus": org2_genus, + "org2_species": org2_species, + "org2_genus_species": org2_genus_species, + "org2_species_folder_name": org2_species_folder_name, + "org2_full_name": org2_full_name, + "org2_strain": org2_strain, + "org2_sex": org2_sex, + "org2_org_id": org2_org_id, + "org2_genome_analysis_id": org2_genome_analysis_id, + "org2_ogs_analysis_id": org2_ogs_analysis_id, + "org2_genome_hda_id": org2_genome_hda_id, + "org2_transcripts_hda_id": org2_transcripts_hda_id, + "org2_proteins_hda_id": org2_proteins_hda_id, + "org2_gff_hda_id": org2_gff_hda_id, + } + + # Look for empty parameters values, throw a critical error if a parameter value is invalid + for param_name, param_value in org2_parameters_dict.items(): + if param_value is None or param_value == "": + logging.critical("Empty parameter value found for organism {0} (parameter: {1}, parameter value: {2})".format(org2_full_name, param_name, param_value)) + sys.exit() + + # Source files association (ordered by their IDs in the workflow) + # WARNING: Be very careful about how the workflow is "organized" (i.e the order of the steps/datasets, check the .ga if there is any error) + GFF_FILE_ORG1 = "0" + GENOME_FASTA_FILE_ORG1 = "1" + PROTEINS_FASTA_FILE_ORG1 = "2" + + GENOME_FASTA_FILE_ORG2 = "3" + GFF_FILE_ORG2 = "4" + PROTEINS_FASTA_FILE_ORG2 = "5" + + LOAD_FASTA_ORG1 = "6" + JBROWSE_ORG1 = "7" + JRBOWSE_ORG2 = "8" + + LOAD_GFF_ORG1 = "9" + JBROWSE_CONTAINER = "10" + SYNC_FEATURES_ORG1 = "11" + + LOAD_FASTA_ORG2 = "12" + LOAD_GFF_ORG2 = "13" + + SYNC_FEATURES_ORG2 = "14" + POPULATE_MAT_VIEWS = "15" + INDEX_TRIPAL_DATA = "16" + + # Set the workflow parameters (individual tools runtime parameters in the workflow) + workflow_parameters = {} + + # Input files have no parameters (they are set via assigning the hda IDs in the datamap parameter of the bioblend method) + workflow_parameters[GENOME_FASTA_FILE_ORG1] = {} + workflow_parameters[GFF_FILE_ORG1] = {} + workflow_parameters[PROTEINS_FASTA_FILE_ORG1] = {} + workflow_parameters[GENOME_FASTA_FILE_ORG2] = {} + workflow_parameters[GFF_FILE_ORG2] = {} + workflow_parameters[PROTEINS_FASTA_FILE_ORG2] = {} + + # Organism 1 + workflow_parameters[LOAD_FASTA_ORG1] = {"organism": org1_org_id, + "analysis_id": org1_genome_analysis_id, + "do_update": "true"} + # workflow_parameters[JBROWSE_ORG1] = {"jbrowse_menu_url": jbrowse_menu_url_org1} + workflow_parameters[JBROWSE_ORG1] = {} + workflow_parameters[LOAD_GFF_ORG1] = {"organism": org1_org_id, "analysis_id": org1_ogs_analysis_id} + workflow_parameters[SYNC_FEATURES_ORG1] = {"organism_id": org1_org_id} + # workflow_parameters[JBROWSE_CONTAINER] = {"organisms": [{"name": org1_full_name, "unique_id": org1_species_folder_name, }, {"name": org2_full_name, "unique_id": org2_species_folder_name}]} + workflow_parameters[JBROWSE_CONTAINER] = {} + + # Organism 2 + workflow_parameters[LOAD_FASTA_ORG2] = {"organism": org2_org_id, + "analysis_id": org2_genome_analysis_id, + "do_update": "true"} + workflow_parameters[LOAD_GFF_ORG2] = {"organism": org2_org_id, "analysis_id": org2_ogs_analysis_id} + # workflow_parameters[JRBOWSE_ORG2] = {"jbrowse_menu_url": jbrowse_menu_url_org2} + workflow_parameters[JRBOWSE_ORG2] = {} + workflow_parameters[SYNC_FEATURES_ORG2] = {"organism_id": org2_org_id} + + + # POPULATE + INDEX DATA + workflow_parameters[POPULATE_MAT_VIEWS] = {} + workflow_parameters[INDEX_TRIPAL_DATA] = {} + + + # Set datamap (mapping of input files in the workflow) + datamap = {} + + # Organism 1 + datamap[GENOME_FASTA_FILE_ORG1] = {"src": "hda", "id": org1_genome_hda_id} + datamap[GFF_FILE_ORG1] = {"src": "hda", "id": org1_gff_hda_id} + datamap[PROTEINS_FASTA_FILE_ORG1] = {"src": "hda", "id": org1_proteins_hda_id} + + # Organism 2 + datamap[GENOME_FASTA_FILE_ORG2] = {"src": "hda", "id": org2_genome_hda_id} + datamap[GFF_FILE_ORG2] = {"src": "hda", "id": org2_gff_hda_id} + datamap[PROTEINS_FASTA_FILE_ORG2] = {"src": "hda", "id": org2_proteins_hda_id} + + with open(workflow_path, 'r') as ga_in_file: + + # Store the decoded json dictionary + workflow_dict = json.load(ga_in_file) + workflow_name = workflow_dict["name"] + + # For the Jbrowse tool, we unfortunately have to manually edit the parameters instead of setting them + # as runtime values, using runtime parameters makes the tool throw an internal critical error ("replace not found" error) + # Scratchgmod test: need "http" (or "https"), the hostname (+ port) + jbrowse_menu_url_org1 = "https://{hostname}/sp/{genus_sp}/feature/{Genus}/{species}/mRNA/{id}".format(hostname=config["hostname"], genus_sp=org1_genus_species, Genus=org1_genus[0].upper() + org1_genus[1:], species=org1_species, id="{id}") + jbrowse_menu_url_org2 = "https://{hostname}/sp/{genus_sp}/feature/{Genus}/{species}/mRNA/{id}".format(hostname=config["hostname"], genus_sp=org2_genus_species, Genus=org2_genus[0].upper() + org2_genus[1:], species=org2_species, id="{id}") + if "jbrowse_menu_url" not in config.keys(): + jbrowse_menu_url_org1 = "https://{hostname}/sp/{genus_sp}/feature/{Genus}/{species}/mRNA/{id}".format(hostname=config["hostname"], genus_sp=org1_genus_species, Genus=org1_genus[0].upper() + org1_genus[1:], species=org1_species, id="{id}") + jbrowse_menu_url_org2 = "https://{hostname}/sp/{genus_sp}/feature/{Genus}/{species}/mRNA/{id}".format(hostname=config["hostname"], genus_sp=org2_genus_species, Genus=org2_genus[0].upper() + org2_genus[1:], species=org2_species, id="{id}") + else: + jbrowse_menu_url_org1 = config["jbrowse_menu_url"] + "/sp/{genus_sp}/feature/{Genus}/{species}/mRNA/{id}".format(genus_sp=org1_genus_species, Genus=org1_genus[0].upper() + org1_genus[1:], species=org1_species, id="{id}") + jbrowse_menu_url_org2 = config["jbrowse_menu_url"] + "/sp/{genus_sp}/feature/{Genus}/{species}/mRNA/{id}".format(genus_sp=org2_genus_species, Genus=org2_genus[0].upper() + org2_genus[1:], species=org2_species, id="{id}") + + # show_tool_add_organism = instance.tools.show_tool(tool_id="toolshed.g2.bx.psu.edu/repos/gga/chado_organism_add_organism/organism_add_organism/2.3.4+galaxy0", io_details=True) + # print(show_tool_add_organism) + # show_jbrowse_tool = instance.tools.show_tool(tool_id="toolshed.g2.bx.psu.edu/repos/iuc/jbrowse/jbrowse/1.16.11+galaxy0", io_details=True) + # print(show_jbrowse_tool) + # show_jbrowse_container_tool = instance.tools.show_tool(tool_id="toolshed.g2.bx.psu.edu/repos/gga/jbrowse_to_container/jbrowse_to_container/0.5.1", io_details=True) + # print(show_jbrowse_container_tool) + + # Replace values in the workflow dictionary + workflow_dict["steps"]["7"]["tool_state"] = workflow_dict["steps"]["7"]["tool_state"].replace("__MENU_URL_ORG1__", jbrowse_menu_url_org1) + workflow_dict["steps"]["8"]["tool_state"] = workflow_dict["steps"]["8"]["tool_state"].replace("__MENU_URL_ORG2__", jbrowse_menu_url_org2) + # The UNIQUE_ID is specific to a combination genus_species_strain_sex so every combination should have its unique workflow + # in galaxy --> define a naming method for these workflows + workflow_dict["steps"]["10"]["tool_state"] = workflow_dict["steps"]["10"]["tool_state"].replace("__DISPLAY_NAME_ORG1__", org1_full_name).replace("__UNIQUE_ID_ORG1__", org1_species_folder_name) + workflow_dict["steps"]["10"]["tool_state"] = workflow_dict["steps"]["10"]["tool_state"].replace("__DISPLAY_NAME_ORG2__", org2_full_name).replace("__UNIQUE_ID_ORG2__", org2_species_folder_name) + + # Import the workflow in galaxy as a dict + instance.workflows.import_workflow_dict(workflow_dict=workflow_dict) + + # Get its attributes + workflow_attributes = instance.workflows.get_workflows(name=workflow_name) + # Then get its ID (required to invoke the workflow) + workflow_id = workflow_attributes[0]["id"] # Index 0 is the most recently imported workflow (the one we want) + show_workflow = instance.workflows.show_workflow(workflow_id=workflow_id) + # Check if the workflow is found + try: + logging.debug("Workflow ID: %s" % workflow_id) + except bioblend.ConnectionError: + logging.warning("Error finding workflow %s" % workflow_name) + + # Finally, invoke the workflow alogn with its datamap, parameters and the history in which to invoke it + instance.workflows.invoke_workflow(workflow_id=workflow_id, history_id=history_id, params=workflow_parameters, inputs=datamap, allow_tool_state_corrections=True) + + logging.info("Successfully imported and invoked workflow {0}, check the galaxy instance ({1}) for the jobs state".format(workflow_name, instance_url)) + + if workflow_type == "blast": + for sp_dict in sp_dict_list: + + # Add and retrieve all analyses/organisms for the current input species and add their IDs to the input dictionary + current_sp_workflow_dict = create_sp_workflow_dict(sp_dict, main_dir=args.main_directory, config=config, workfow_type="blast") + + current_sp_key = list(current_sp_workflow_dict.keys())[0] + current_sp_value = list(current_sp_workflow_dict.values())[0] + current_sp_strain_sex_key = list(current_sp_value.keys())[0] + current_sp_strain_sex_value = list(current_sp_value.values())[0] + + # Add the species dictionary to the complete dictionary + # This dictionary contains every organism present in the input file + # Its structure is the following: + # {genus species: {strain1_sex1: {variables_key: variables_values}, strain1_sex2: {variables_key: variables_values}}} + if not current_sp_key in all_sp_workflow_dict.keys(): + all_sp_workflow_dict[current_sp_key] = current_sp_value + else: + all_sp_workflow_dict[current_sp_key][current_sp_strain_sex_key] = current_sp_strain_sex_value - for k, v in all_sp_workflow_dict.items(): if len(list(v.keys())) == 1: logging.info("Input organism %s: 1 species detected in input dictionary" % k) # Set workflow path (1 organism) - workflow_path = os.path.join(os.path.abspath(script_dir), "workflows_phaeoexplorer/Galaxy-Workflow-chado_load_tripal_synchronize_jbrowse_1org_v2.ga") + workflow_path = os.path.join(os.path.abspath(script_dir), "workflows_phaeoexplorer/Galaxy-Workflow-load_blast_results_1org_v1.ga") + + # Instance object required variables + instance_url, email, password = None, None, None # Set the galaxy instance variables for k2, v2 in v.items(): @@ -971,19 +1487,91 @@ if __name__ == "__main__": # Check if the versions of tools specified in the workflow are installed in galaxy install_changesets_revisions_from_workflow(workflow_path=workflow_path, instance=instance) - # Set datamap (mapping of input files in the workflow) - datamap = {} + organism_key_name = list(v.keys()) + org_dict = v[organisms_key_names[0]] + + history_id = org_dict["history_id"] + + # Organism attributes + org_genus = org_dict["genus"] + org_species = org_dict["species"] + org_genus_species = org_dict["genus_species"] + org_species_folder_name = org_dict["species_folder_name"] + org_full_name = org_dict["full_name"] + org_strain = org_dict["sex"] + org_sex = org_dict["strain"] + org_org_id = org_dict["org_id"] + org_blastp_analysis_id = org_dict["blastp_analysis_id"] + org_blastp_hda_id = org_dict["hda_ids"]["blastp_hda_id"] + + # Store these values into a dict for parameters logging/validation + org_parameters_dict = { + "org_genus": org_genus, + "org_species": org_species, + "org_genus_species": org_genus_species, + "org_species_folder_name": org_species_folder_name, + "org_full_name": org_full_name, + "org_strain": org_strain, + "org_sex": org_sex, + "org_org_id": org_org_id, + "org_blast_analysis_id": org_blastp_analysis_id, + "org_blastp_hda_id": org_blastp_hda_id, + } + + # Look for empty parameters values, throw a critical error if a parameter value is invalid + for param_name, param_value in org_parameters_dict.items(): + if param_value is None or param_value == "": + logging.critical("Empty parameter value found for organism {0} (parameter: {1}, parameter value: {2})".format(org_full_name, param_name, param_value)) + sys.exit() + BLASTP_FILE = "0" + LOAD_BLASTP_FILE = "1" + SYNC_BLASTP_ANALYSIS = "2" + POPULATE_MAT_VIEWS = "3" + INDEX_TRIPAL_DATA = "4" # Set the workflow parameters (individual tools runtime parameters in the workflow) workflow_parameters = {} + workflow_parameters[BLASTP_FILE] = {} + workflow_parameters[LOAD_BLASTP_FILE] = {"analysis_id": org_blastp_analysis_id, "organism_id": org_org_id} + workflow_parameters[SYNC_BLASTP_ANALYSIS] = {"analysis_id": org_blastp_analysis_id} + workflow_parameters[POPULATE_MAT_VIEWS] = {} + workflow_parameters[INDEX_TRIPAL_DATA] = {} + + datamap = {} + datamap[BLASTP_FILE] = {"src": "hda", "id": org_blastp_hda_id} + + with open(workflow_path, 'r') as ga_in_file: + # Store the decoded json dictionary + workflow_dict = json.load(ga_in_file) + workflow_name = workflow_dict["name"] + + # Import the workflow in galaxy as a dict + instance.workflows.import_workflow_dict(workflow_dict=workflow_dict) + # Get its attributes + workflow_attributes = instance.workflows.get_workflows(name=workflow_name) + # Then get its ID (required to invoke the workflow) + workflow_id = workflow_attributes[0]["id"] # Index 0 is the most recently imported workflow (the one we want) + show_workflow = instance.workflows.show_workflow(workflow_id=workflow_id) + # Check if the workflow is found + try: + logging.debug("Workflow ID: %s" % workflow_id) + except bioblend.ConnectionError: + logging.warning("Error finding workflow %s" % workflow_name) + + # Finally, invoke the workflow alogn with its datamap, parameters and the history in which to invoke it + instance.workflows.invoke_workflow(workflow_id=workflow_id, history_id=history_id, params=workflow_parameters, inputs=datamap, allow_tool_state_corrections=True) + + logging.info("Successfully imported and invoked workflow {0}, check the galaxy instance ({1}) for the jobs state".format(workflow_name, instance_url)) + + if len(list(v.keys())) == 2: - + logging.info("Input organism %s: 2 species detected in input dictionary" % k) # Set workflow path (2 organisms) - workflow_path = os.path.join(os.path.abspath(script_dir), "workflows_phaeoexplorer/Galaxy-Workflow-chado_load_tripal_synchronize_jbrowse_2org_v4.ga") + workflow_path = os.path.join(os.path.abspath(script_dir), "workflows_phaeoexplorer/Galaxy-Workflow-load_blast_results_2org_v1.ga") # Instance object required variables instance_url, email, password = None, None, None @@ -999,7 +1587,6 @@ if __name__ == "__main__": # Check if the versions of tools specified in the workflow are installed in galaxy install_changesets_revisions_from_workflow(workflow_path=workflow_path, instance=instance) - # Get key names from the current organism (item 1 = organism 1, item 2 = organism 2) organisms_key_names = list(v.keys()) org1_dict = v[organisms_key_names[0]] org2_dict = v[organisms_key_names[1]] @@ -1015,12 +1602,8 @@ if __name__ == "__main__": org1_strain = org1_dict["sex"] org1_sex = org1_dict["strain"] org1_org_id = org1_dict["org_id"] - org1_genome_analysis_id = org1_dict["genome_analysis_id"] - org1_ogs_analysis_id = org1_dict["ogs_analysis_id"] - org1_genome_hda_id = org1_dict["hda_ids"]["genome_hda_id"] - org1_transcripts_hda_id = org1_dict["hda_ids"]["transcripts_hda_id"] - org1_proteins_hda_id = org1_dict["hda_ids"]["proteins_hda_id"] - org1_gff_hda_id = org1_dict["hda_ids"]["gff_hda_id"] + org1_blastp_analysis_id = org1_dict["blastp_analysis_id"] + org1_blastp_hda_id = org1_dict["hda_ids"]["blastp_hda_id"] # Store these values into a dict for parameters logging/validation org1_parameters_dict = { @@ -1032,20 +1615,17 @@ if __name__ == "__main__": "org1_strain": org1_strain, "org1_sex": org1_sex, "org1_org_id": org1_org_id, - "org1_genome_analysis_id": org1_genome_analysis_id, - "org1_ogs_analysis_id": org1_ogs_analysis_id, - "org1_genome_hda_id": org1_genome_hda_id, - "org1_transcripts_hda_id": org1_transcripts_hda_id, - "org1_proteins_hda_id": org1_proteins_hda_id, - "org1_gff_hda_id": org1_gff_hda_id, + "org1_blast_analysis_id": org1_blastp_analysis_id, + "org1_blastp_hda_id": org1_blastp_hda_id, } + # Look for empty parameters values, throw a critical error if a parameter value is invalid for param_name, param_value in org1_parameters_dict.items(): if param_value is None or param_value == "": logging.critical("Empty parameter value found for organism {0} (parameter: {1}, parameter value: {2})".format(org1_full_name, param_name, param_value)) sys.exit() - + # Organism 2 attributes org2_genus = org2_dict["genus"] org2_species = org2_dict["species"] @@ -1055,160 +1635,77 @@ if __name__ == "__main__": org2_strain = org2_dict["sex"] org2_sex = org2_dict["strain"] org2_org_id = org2_dict["org_id"] - org2_genome_analysis_id = org2_dict["genome_analysis_id"] - org2_ogs_analysis_id = org2_dict["ogs_analysis_id"] - org2_genome_hda_id = org2_dict["hda_ids"]["genome_hda_id"] - org2_transcripts_hda_id = org2_dict["hda_ids"]["transcripts_hda_id"] - org2_proteins_hda_id = org2_dict["hda_ids"]["proteins_hda_id"] - org2_gff_hda_id = org2_dict["hda_ids"]["gff_hda_id"] + org2_blastp_analysis_id = org2_dict["blastp_analysis_id"] + org2_blastp_hda_id = org2_dict["hda_ids"]["blastp_hda_id"] # Store these values into a dict for parameters logging/validation org2_parameters_dict = { "org2_genus": org2_genus, "org2_species": org2_species, "org2_genus_species": org2_genus_species, - "org2_species_folder_name": org2_species_folder_name, + "org2_species_folder_name": orgé_species_folder_name, "org2_full_name": org2_full_name, "org2_strain": org2_strain, "org2_sex": org2_sex, "org2_org_id": org2_org_id, - "org2_genome_analysis_id": org2_genome_analysis_id, - "org2_ogs_analysis_id": org2_ogs_analysis_id, - "org2_genome_hda_id": org2_genome_hda_id, - "org2_transcripts_hda_id": org2_transcripts_hda_id, - "org2_proteins_hda_id": org2_proteins_hda_id, - "org2_gff_hda_id": org2_gff_hda_id, + "org2_blast_analysis_id": org2_blastp_analysis_id, + "org2_blastp_hda_id": org2_blastp_hda_id, } - + + # Look for empty parameters values, throw a critical error if a parameter value is invalid for param_name, param_value in org2_parameters_dict.items(): if param_value is None or param_value == "": logging.critical("Empty parameter value found for organism {0} (parameter: {1}, parameter value: {2})".format(org2_full_name, param_name, param_value)) sys.exit() - - jbrowse_menu_url_org1 = "https://{hostname}/sp/{genus_sp}/feature/{Genus}/{species}/mRNA/{id}".format(hostname=config["hostname"], genus_sp=org1_genus_species, Genus=org1_genus[0].upper() + org1_genus[1:], species=org1_species, id="\{id\}") - jbrowse_menu_url_org2 = "https://{hostname}/sp/{genus_sp}/feature/{Genus}/{species}/mRNA/{id}".format(hostname=config["hostname"], genus_sp=org2_genus_species, Genus=org2_genus[0].upper() + org2_genus[1:], species=org2_species, id="\{id\}") - if "jbrowse_menu_url" not in config.keys(): - jbrowse_menu_url_org1 = "https://{hostname}/sp/{genus_sp}/feature/{Genus}/{species}/mRNA/{id}".format(hostname=config["hostname"], genus_sp=org1_genus_species, Genus=org1_genus[0].upper() + org1_genus[1:], species=org1_species, id="\{id\}") - jbrowse_menu_url_org2 = "https://{hostname}/sp/{genus_sp}/feature/{Genus}/{species}/mRNA/{id}".format(hostname=config["hostname"], genus_sp=org2_genus_species, Genus=org2_genus[0].upper() + org2_genus[1:], species=org2_species, id="\{id\}") - else: - jbrowse_menu_url_org1 = config["jbrowse_menu_url"] - jbrowse_menu_url_org2 = "https://{hostname}/sp/{genus_sp}/feature/{Genus}/{species}/mRNA/{id}".format(hostname=config["hostname"], genus_sp=org2_genus_species, Genus=org2_genus[0].upper() + org2_genus[1:], species=org2_species, id="\{id\}") - - # Source files association (ordered by their IDs in the workflow) # WARNING: Be very careful about how the workflow is "organized" (i.e the order of the steps/datasets, check the .ga if there is any error) - GFF_FILE_ORG1 = "0" - GENOME_FASTA_FILE_ORG1 = "1" - PROTEINS_FASTA_FILE_ORG1 = "2" - - GENOME_FASTA_FILE_ORG2 = "3" - GFF_FILE_ORG2 = "4" - PROTEINS_FASTA_FILE_ORG2 = "5" - - LOAD_FASTA_ORG1 = "6" - JBROWSE_ORG1 = "7" - JRBOWSE_ORG2 = "8" - - LOAD_GFF_ORG1 = "9" - JBROWSE_CONTAINER = "10" - SYNC_FEATURES_ORG1 = "11" - - LOAD_FASTA_ORG2 = "12" - LOAD_GFF_ORG2 = "13" - - SYNC_FEATURES_ORG2 = "14" - POPULATE_MAT_VIEWS = "15" - INDEX_TRIPAL_DATA = "16" + BLASTP_FILE_ORG1 = "0" + BLASTP_FILE_ORG2 = "1" + LOAD_BLASTP_FILE_ORG1 = "2" + LOAD_BLASTP_FILE_ORG1 = "3" + SYNC_BLASTP_ANALYSIS_ORG1 = "4" + SYNC_BLASTP_ANALYSIS_ORG2 = "5" + POPULATE_MAT_VIEWS = "6" + INDEX_TRIPAL_DATA = "7" # Set the workflow parameters (individual tools runtime parameters in the workflow) workflow_parameters = {} # Input files have no parameters (they are set via assigning the hda IDs in the datamap parameter of the bioblend method) - workflow_parameters[GENOME_FASTA_FILE_ORG1] = {} - workflow_parameters[GFF_FILE_ORG1] = {} - workflow_parameters[PROTEINS_FASTA_FILE_ORG1] = {} - workflow_parameters[GENOME_FASTA_FILE_ORG2] = {} - workflow_parameters[GFF_FILE_ORG2] = {} - workflow_parameters[PROTEINS_FASTA_FILE_ORG2] = {} + workflow_parameters[BLASTP_FILE_ORG1] = {} + workflow_parameters[BLASTP_FILE_ORG2] = {} # Organism 1 - workflow_parameters[LOAD_FASTA_ORG1] = {"organism": org1_org_id, - "analysis_id": org1_genome_analysis_id, - "do_update": "true"} - # workflow_parameters[JBROWSE_ORG1] = {"jbrowse_menu_url": jbrowse_menu_url_org1} - workflow_parameters[JBROWSE_ORG1] = {} - workflow_parameters[LOAD_GFF_ORG1] = {"organism": org1_org_id, "analysis_id": org1_ogs_analysis_id} - workflow_parameters[SYNC_FEATURES_ORG1] = {"organism_id": org1_org_id} - # workflow_parameters[JBROWSE_CONTAINER] = {"organisms": [{"name": org1_full_name, "unique_id": org1_species_folder_name, }, {"name": org2_full_name, "unique_id": org2_species_folder_name}]} - workflow_parameters[JBROWSE_CONTAINER] = {} + workflow_parameters[LOAD_BLASTP_FILE_ORG1] = {"organism_id": org1_org_id, + "analysis_id": org1_blastp_analysis_id} + workflow_parameters[SYNC_BLASTP_ANALYSIS_ORG1] = {"analysis_id": org1_blastp_analysis_id} # Organism 2 - workflow_parameters[LOAD_FASTA_ORG2] = {"organism": org2_org_id, - "analysis_id": org2_genome_analysis_id, - "do_update": "true"} - workflow_parameters[LOAD_GFF_ORG2] = {"organism": org2_org_id, "analysis_id": org2_ogs_analysis_id} - # workflow_parameters[JRBOWSE_ORG2] = {"jbrowse_menu_url": jbrowse_menu_url_org2} - workflow_parameters[JRBOWSE_ORG2] = {} - workflow_parameters[SYNC_FEATURES_ORG2] = {"organism_id": org2_org_id} + workflow_parameters[LOAD_BLASTP_FILE_ORG2] = {"organism_id": org2_org_id, + "analysis_id": org2_blastp_analysis_id} + workflow_parameters[SYNC_BLASTP_ANALYSIS_ORG2] = {"analysis_id": org2_blastp_analysis_id} - - # POPULATE + INDEX DATA workflow_parameters[POPULATE_MAT_VIEWS] = {} workflow_parameters[INDEX_TRIPAL_DATA] = {} - # Set datamap (mapping of input files in the workflow) datamap = {} # Organism 1 - datamap[GENOME_FASTA_FILE_ORG1] = {"src": "hda", "id": org1_genome_hda_id} - datamap[GFF_FILE_ORG1] = {"src": "hda", "id": org1_gff_hda_id} - datamap[PROTEINS_FASTA_FILE_ORG1] = {"src": "hda", "id": org1_proteins_hda_id} + datamap[BLASTP_FILE_ORG1] = {"src": "hda", "id": org1_blastp_hda_id} # Organism 2 - datamap[GENOME_FASTA_FILE_ORG2] = {"src": "hda", "id": org2_genome_hda_id} - datamap[GFF_FILE_ORG2] = {"src": "hda", "id": org2_gff_hda_id} - datamap[PROTEINS_FASTA_FILE_ORG2] = {"src": "hda", "id": org2_proteins_hda_id} + datamap[BLASTP_FILE_ORG2] = {"src": "hda", "id": org2_blastp_hda_id} with open(workflow_path, 'r') as ga_in_file: - # Store the decoded json dictionary workflow_dict = json.load(ga_in_file) workflow_name = workflow_dict["name"] - - # For the Jbrowse tool, we unfortunately have to manually edit the parameters instead of setting them - # as runtime values, using runtime parameters makes the tool throw an internal critical error ("replace not found" error) - # Scratchgmod test: need "http" (or "https"), the hostname (+ port) - jbrowse_menu_url_org1 = "https://{hostname}/sp/{genus_sp}/feature/{Genus}/{species}/mRNA/{id}".format(hostname=config["hostname"], genus_sp=org1_genus_species, Genus=org1_genus[0].upper() + org1_genus[1:], species=org1_species, id="{id}") - jbrowse_menu_url_org2 = "https://{hostname}/sp/{genus_sp}/feature/{Genus}/{species}/mRNA/{id}".format(hostname=config["hostname"], genus_sp=org2_genus_species, Genus=org2_genus[0].upper() + org2_genus[1:], species=org2_species, id="{id}") - if "jbrowse_menu_url" not in config.keys(): - jbrowse_menu_url_org1 = "https://{hostname}/sp/{genus_sp}/feature/{Genus}/{species}/mRNA/{id}".format(hostname=config["hostname"], genus_sp=org1_genus_species, Genus=org1_genus[0].upper() + org1_genus[1:], species=org1_species, id="{id}") - jbrowse_menu_url_org2 = "https://{hostname}/sp/{genus_sp}/feature/{Genus}/{species}/mRNA/{id}".format(hostname=config["hostname"], genus_sp=org2_genus_species, Genus=org2_genus[0].upper() + org2_genus[1:], species=org2_species, id="{id}") - else: - jbrowse_menu_url_org1 = config["jbrowse_menu_url"] + "/sp/{genus_sp}/feature/{Genus}/{species}/mRNA/{id}".format(genus_sp=org1_genus_species, Genus=org1_genus[0].upper() + org1_genus[1:], species=org1_species, id="{id}") - jbrowse_menu_url_org2 = config["jbrowse_menu_url"] + "/sp/{genus_sp}/feature/{Genus}/{species}/mRNA/{id}".format(genus_sp=org2_genus_species, Genus=org2_genus[0].upper() + org2_genus[1:], species=org2_species, id="{id}") - - # show_tool_add_organism = instance.tools.show_tool(tool_id="toolshed.g2.bx.psu.edu/repos/gga/chado_organism_add_organism/organism_add_organism/2.3.4+galaxy0", io_details=True) - # print(show_tool_add_organism) - # show_jbrowse_tool = instance.tools.show_tool(tool_id="toolshed.g2.bx.psu.edu/repos/iuc/jbrowse/jbrowse/1.16.11+galaxy0", io_details=True) - # print(show_jbrowse_tool) - # show_jbrowse_container_tool = instance.tools.show_tool(tool_id="toolshed.g2.bx.psu.edu/repos/gga/jbrowse_to_container/jbrowse_to_container/0.5.1", io_details=True) - # print(show_jbrowse_container_tool) - - # Replace values in the workflow dictionary - workflow_dict["steps"]["7"]["tool_state"] = workflow_dict["steps"]["7"]["tool_state"].replace("__MENU_URL_ORG1__", jbrowse_menu_url_org1) - workflow_dict["steps"]["8"]["tool_state"] = workflow_dict["steps"]["8"]["tool_state"].replace("__MENU_URL_ORG2__", jbrowse_menu_url_org2) - # The UNIQUE_ID is specific to a combination genus_species_strain_sex so every combination should have its unique workflow - # in galaxy --> define a naming method for these workflows - workflow_dict["steps"]["10"]["tool_state"] = workflow_dict["steps"]["10"]["tool_state"].replace("__DISPLAY_NAME_ORG1__", org1_full_name).replace("__UNIQUE_ID_ORG1__", org1_species_folder_name) - workflow_dict["steps"]["10"]["tool_state"] = workflow_dict["steps"]["10"]["tool_state"].replace("__DISPLAY_NAME_ORG2__", org2_full_name).replace("__UNIQUE_ID_ORG2__", org2_species_folder_name) - # Import the workflow in galaxy as a dict instance.workflows.import_workflow_dict(workflow_dict=workflow_dict) - # Get its attributes workflow_attributes = instance.workflows.get_workflows(name=workflow_name) # Then get its ID (required to invoke the workflow) @@ -1225,320 +1722,3 @@ if __name__ == "__main__": logging.info("Successfully imported and invoked workflow {0}, check the galaxy instance ({1}) for the jobs state".format(workflow_name, instance_url)) - - - - - - - - # Get the instance attribute from the object for future connections - # This is the GalaxyInstance object from bioblend (not the url!) - # instance = run_workflow_for_current_organism.instance - - # if "2org" in str(workflow): - # logging.info("Executing workflow %s" % workflow) - - - # run_workflow_for_current_organism.connect_to_instance() - # run_workflow_for_current_organism.set_get_history() - - # # TODO: only do this once per instance (not at each iteration!) - # run_workflow_for_current_organism.install_changesets_revisions_for_individual_tools() - # run_workflow_for_current_organism.install_changesets_revisions_from_workflow(workflow_path=workflow) - # run_workflow_for_current_organism.add_organism_ogs_genome_analyses() - # org_id = run_workflow_for_current_organism.get_organism_id() - # genome_analysis_id = run_workflow_for_current_organism.get_genome_analysis_id() - # ogs_analysis_id = run_workflow_for_current_organism.get_ogs_analysis_id() - # instance_attributes = run_workflow_for_current_organism.get_instance_attributes() - - - # # Import datasets into history and retrieve their hda IDs - # # TODO: can be simplified with direct access to the folder contents via the full path (no loop required) - - # hda_ids = run_workflow_for_current_organism.import_datasets_into_history() - # hda_ids_list.append(hda_ids) - - - # # TODO: Exlcude the workflow invocation from the loop - # # Extract instance url from one, attributes from both in lists ? - - - # # Source files association (ordered by their IDs in the workflow) - # GENOME_FASTA_FILE_ORG1 = "0" - # GFF_FILE_ORG1 = "1" - # PROTEINS_FASTA_FILE_ORG1 = "2" - # GENOME_FASTA_FILE_ORG2 = "3" - # GFF_FILE_ORG2 = "4" - # PROTEINS_FASTA_FILE_ORG2 = "5" - - # LOAD_FASTA_ORG1 = "6" - # JBROWSE_ORG1 = "7" - # JRBOWSE_ORG2 = "8" - - # LOAD_GFF_ORG1 = "9" - # JBROWSE_CONTAINER = "10" - # SYNC_FEATURES_ORG1 = "11" - - # LOAD_FASTA_ORG2 = "12" - # LOAD_GFF_ORG2 = "13" - - # SYNC_FEATURES_ORG2 = "14" - # POPULATE_MAT_VIEWS = "15" - # INDEX_TRIPAL_DATA = "16" - - # workflow_parameters = {} - - # workflow_parameters[GENOME_FASTA_FILE_ORG1] = {} - # workflow_parameters[GFF_FILE_ORG1] = {} - # workflow_parameters[PROTEINS_FASTA_FILE_ORG1] = {} - # workflow_parameters[GENOME_FASTA_FILE_ORG2] = {} - # workflow_parameters[GFF_FILE_ORG2] = {} - # workflow_parameters[PROTEINS_FASTA_FILE_ORG2] = {} - - - # # ORGANISM 1 - # workflow_parameters[LOAD_FASTA_ORG1] = {"organism": org_ids[0], - # "analysis_id": genome_analysis_ids[0], - # "do_update": "true"} - # # Change "do_update": "true" to "do_update": "false" in above parameters to prevent appending/updates to the fasta file in chado - # # WARNING: It is safer to never update it and just change the genome/ogs versions in the config - # workflow_parameters[JBROWSE_ORG1] = {} - # workflow_parameters[LOAD_GFF_ORG1] = {"organism": org_ids[0], "analysis_id": ogs_analysis_ids[0]} - # workflow_parameters[SYNC_FEATURES_ORG1] = {"organism_id": org_ids[0]} - # workflow_parameters[JBROWSE_CONTAINER] = {} - - - # # ORGANISM 2 - # workflow_parameters[LOAD_FASTA_ORG2] = {"organism": org_ids[1], - # "analysis_id": genome_analysis_ids[1], - # "do_update": "true"} - # # Change "do_update": "true" to "do_update": "false" in above parameters to prevent appending/updates to the fasta file in chado - # # WARNING: It is safer to never update it and just change the genome/ogs versions in the config - # workflow_parameters[LOAD_GFF_ORG2] = {"organism": org_ids[1], "analysis_id": ogs_analysis_ids[1]} - # workflow_parameters[JRBOWSE_ORG2] = {} - # workflow_parameters[SYNC_FEATURES_ORG2] = {"organism_id": org_ids[1]} - - # workflow_parameters[SYNC_GENOME_ANALYSIS_INTO_TRIPAL] = {"analysis_id": ogs_analysis_ids[0]} - # workflow_parameters[SYNC_OGS_ANALYSIS_INTO_TRIPAL] = {"analysis_id": genome_analysis_ids[0]} - # workflow_parameters[SYNC_FEATURES_INTO_TRIPAL] = {"organism_id": org_ids[0]} - - # # POPULATE + INDEX DATA - # workflow_parameters[POPULATE_MAT_VIEWS] = {} - # workflow_parameters[INDEX_TRIPAL_DATA] = {} - - # # Datamap for input datasets - dataset source (type): ldda (LibraryDatasetDatasetAssociation) - # run_workflow_for_current_organism.datamap = {} - - # run_workflow_for_current_organism.datamap[GENOME_FASTA_FILE_ORG1] = {"src": "hda", "id": hda_ids_list[0]["genome_hda_id"]} - # run_workflow_for_current_organism.datamap[GFF_FILE_ORG1] = {"src": "hda", "id": hda_ids_list[0]["gff_hda_id"]} - # run_workflow_for_current_organism.datamap[PROTEINS_FASTA_FILE_ORG1] = {"src": "hda", "id": hda_ids_list[0]["proteins_hda_id"]} - - # run_workflow_for_current_organism.datamap[GENOME_FASTA_FILE_ORG2] = {"src": "hda", "id": hda_ids_list[1]["genome_hda_id"]} - # run_workflow_for_current_organism.datamap[GFF_FILE_ORG2] = {"src": "hda", "id": hda_ids_list[1]["gff_hda_id"]} - # run_workflow_for_current_organism.datamap[GFF_FILE_ORG2] = {"src": "hda", "id": hda_ids_list[1]["proteins_hda_id"]} - - - - # logging.info("OK: Workflow invoked") - - - - # # If input workflow is Chado_load_Tripal_synchronize.ga - # if "Chado_load_Tripal_synchronize" in str(workflow): - - # logging.info("Executing workflow 'Chado_load_Tripal_synchronize'") - - # run_workflow_for_current_organism.connect_to_instance() - # run_workflow_for_current_organism.set_get_history() - # # run_workflow_for_current_organism.get_species_history_id() - - # run_workflow_for_current_organism.install_changesets_revisions_for_individual_tools() - # run_workflow_for_current_organism.install_changesets_revisions_from_workflow(workflow_path=workflow) - # run_workflow_for_current_organism.add_organism_ogs_genome_analyses() - # run_workflow_for_current_organism.get_organism_id() - # run_workflow_for_current_organism.get_genome_analysis_id() - # run_workflow_for_current_organism.get_ogs_analysis_id() - - # # run_workflow_for_current_organism.tripal_synchronize_organism_analyses() - - # # Get the attributes of the instance and project data files - # run_workflow_for_current_organism.get_instance_attributes() - - # # Import datasets into history and retrieve their hda IDs - # # TODO: can be simplified with direct access to the folder contents via the full path (no loop required) - # hda_ids = run_workflow_for_current_organism.import_datasets_into_history() - - # # DEBUG - # # run_workflow_for_current_organism.get_invocation_report(workflow_name="Chado load Tripal synchronize") - - # # Explicit workflow parameter names - # GENOME_FASTA_FILE = "0" - # GFF_FILE = "1" - # PROTEINS_FASTA_FILE = "2" - # TRANSCRIPTS_FASTA_FILE = "3" - - # LOAD_FASTA_IN_CHADO = "4" - # LOAD_GFF_IN_CHADO = "5" - # SYNC_ORGANISM_INTO_TRIPAL = "6" - # SYNC_GENOME_ANALYSIS_INTO_TRIPAL = "7" - # SYNC_OGS_ANALYSIS_INTO_TRIPAL = "8" - # SYNC_FEATURES_INTO_TRIPAL = "9" - - # workflow_parameters = {} - - # workflow_parameters[GENOME_FASTA_FILE] = {} - # workflow_parameters[GFF_FILE] = {} - # workflow_parameters[PROTEINS_FASTA_FILE] = {} - # workflow_parameters[TRANSCRIPTS_FASTA_FILE] = {} - - # workflow_parameters[LOAD_FASTA_IN_CHADO] = {"organism": run_workflow_for_current_organism.org_id, - # "analysis_id": run_workflow_for_current_organism.genome_analysis_id, - # "do_update": "true"} - # # Change "do_update": "true" to "do_update": "false" in above parameters to prevent appending/updates to the fasta file in chado - # # WARNING: It is safer to never update it and just change the genome/ogs versions in the config - # workflow_parameters[LOAD_GFF_IN_CHADO] = {"organism": run_workflow_for_current_organism.org_id, - # "analysis_id": run_workflow_for_current_organism.ogs_analysis_id} - # workflow_parameters[SYNC_ORGANISM_INTO_TRIPAL] = {"organism_id": run_workflow_for_current_organism.org_id} - # workflow_parameters[SYNC_GENOME_ANALYSIS_INTO_TRIPAL] = {"analysis_id": run_workflow_for_current_organism.ogs_analysis_id} - # workflow_parameters[SYNC_OGS_ANALYSIS_INTO_TRIPAL] = {"analysis_id": run_workflow_for_current_organism.genome_analysis_id} - # workflow_parameters[SYNC_FEATURES_INTO_TRIPAL] = {"organism_id": run_workflow_for_current_organism.org_id} - - # # Datamap for input datasets - dataset source (type): ldda (LibraryDatasetDatasetAssociation) - # run_workflow_for_current_organism.datamap = {} - # run_workflow_for_current_organism.datamap[GENOME_FASTA_FILE] = {"src": "hda", "id": hda_ids["genome_hda_id"]} - # run_workflow_for_current_organism.datamap[GFF_FILE] = {"src": "hda", "id": hda_ids["gff_hda_id"]} - # run_workflow_for_current_organism.datamap[PROTEINS_FASTA_FILE] = {"src": "hda", "id": hda_ids["proteins_hda_id"]} - # run_workflow_for_current_organism.datamap[TRANSCRIPTS_FASTA_FILE] = {"src": "hda", "id": hda_ids["transcripts_hda_id"]} - - # # run_workflow_for_current_organism.datamap = {} - # # run_workflow_for_current_organism.datamap[GENOME_FASTA_FILE] = {"src": "hda", "id": - # # run_workflow_for_current_organism.datasets["genome_file"]} - # # run_workflow_for_current_organism.datamap[GFF_FILE] = {"src": "hda", - # # "id": hda_ids["gff_hda_id"]} - - # # Ensures galaxy has had time to retrieve datasets - # time.sleep(60) - # # Run the Chado load Tripal sync workflow with the parameters set above - # run_workflow_for_current_organism.run_workflow(workflow_path=workflow, - # workflow_parameters=workflow_parameters, - # datamap=run_workflow_for_current_organism.datamap, - # workflow_name="Chado load Tripal synchronize") - - # # Jbrowse creation workflow - # elif "Jbrowse" in str(workflow): - - # logging.info("Executing workflow 'Jbrowse'") - - # run_workflow_for_current_organism.connect_to_instance() - # run_workflow_for_current_organism.set_get_history() - # run_workflow_for_current_organism.install_changesets_revisions_from_workflow(workflow_path=workflow) - # run_workflow_for_current_organism.get_organism_id() - # # Import datasets into history and get their hda IDs - # run_workflow_for_current_organism.import_datasets_into_history() - # hda_ids = run_workflow_for_current_organism.get_datasets_hda_ids() # Note: only call this function AFTER calling "import_datasets_into_history()" - - # # Debugging - # # run_workflow_for_current_organism.get_invocation_report(workflow_name="Jbrowse") - - # GENOME_FASTA_FILE = "0" - # GFF_FILE = "1" - # ADD_JBROWSE = "2" - # ADD_ORGANISM_TO_JBROWSE = "3" - - # workflow_parameters = {} - # workflow_parameters[GENOME_FASTA_FILE] = {} - # workflow_parameters[GFF_FILE] = {} - # workflow_parameters[ADD_JBROWSE] = {} - # workflow_parameters[ADD_ORGANISM_TO_JBROWSE] = {} - - # run_workflow_for_current_organism.datamap = {} - # run_workflow_for_current_organism.datamap[GENOME_FASTA_FILE] = {"src": "hda", "id": hda_ids["genome_hda_id"]} - # run_workflow_for_current_organism.datamap[GFF_FILE] = {"src": "hda", "id": hda_ids["gff_hda_id"]} - - # # Run the jbrowse creation workflow - # run_workflow_for_current_organism.run_workflow(workflow_path=workflow, - # workflow_parameters=workflow_parameters, - # datamap=run_workflow_for_current_organism.datamap, - # workflow_name="Jbrowse") - - # elif "Interpro" in str(workflow): - - # logging.info("Executing workflow 'Interproscan") - - # run_workflow_for_current_organism.connect_to_instance() - # run_workflow_for_current_organism.set_get_history() - # run_workflow_for_current_organism.install_changesets_revisions_from_workflow(workflow_path=workflow) - # # run_workflow_for_current_organism.get_species_history_id() - - # # Get the attributes of the instance and project data files - # run_workflow_for_current_organism.get_instance_attributes() - # run_workflow.add_interproscan_analysis() - # run_workflow_for_current_organism.get_interpro_analysis_id() - - # # Import datasets into history and retrieve their hda IDs - # run_workflow_for_current_organism.import_datasets_into_history() - # hda_ids = run_workflow_for_current_organism.get_datasets_hda_ids() - - # INTERPRO_FILE = "0" - # LOAD_INTERPRO_IN_CHADO = "1" - # SYNC_INTERPRO_ANALYSIS_INTO_TRIPAL = "2" - # SYNC_FEATURES_INTO_TRIPAL = "3" - # POPULATE_MAT_VIEWS = "4" - # INDEX_TRIPAL_DATA = "5" - - # workflow_parameters = {} - # workflow_parameters[INTERPRO_FILE] = {} - # workflow_parameters[LOAD_INTERPRO_IN_CHADO] = {"organism": run_workflow_for_current_organism.org_id, - # "analysis_id": run_workflow_for_current_organism.interpro_analysis_id} - # workflow_parameters[SYNC_INTERPRO_ANALYSIS_INTO_TRIPAL] = {"analysis_id": run_workflow_for_current_organism.interpro_analysis_id} - - - # run_workflow_for_current_organism.datamap = {} - # run_workflow_for_current_organism.datamap[INTERPRO_FILE] = {"src": "hda", "id": run_workflow_for_current_organism.hda_ids["interproscan_hda_id"]} - - # # Run Interproscan workflow - # run_workflow_for_current_organism.run_workflow(workflow_path=workflow, - # workflow_parameters=workflow_parameters, - # datamap=run_workflow_for_current_organism.datamap, - # workflow_name="Interproscan") - - # elif "Blast" in str(workflow): - - # logging.info("Executing workflow 'Blast_Diamond") - - # run_workflow_for_current_organism.connect_to_instance() - # run_workflow_for_current_organism.set_get_history() - # run_workflow_for_current_organism.install_changesets_revisions_from_workflow(workflow_path=workflow) - # # run_workflow_for_current_organism.get_species_history_id() - - # # Get the attributes of the instance and project data files - # run_workflow_for_current_organism.get_instance_attributes() - # run_workflow_for_current_organism.add_blastp_diamond_analysis() - # run_workflow_for_current_organism.get_blastp_diamond_analysis_id() - - # # Import datasets into history and retrieve their hda IDs - # run_workflow_for_current_organism.import_datasets_into_history() - # hda_ids = run_workflow_for_current_organism.get_datasets_hda_ids() - - # BLAST_FILE = "0" - # LOAD_BLAST_IN_CHADO = "1" - # SYNC_BLAST_ANALYSIS_INTO_TRIPAL = "2" - # SYNC_FEATURES_INTO_TRIPAL = "3" - # POPULATE_MAT_VIEWS = "4" - # INDEX_TRIPAL_DATA = "5" - - # workflow_parameters = {} - # workflow_parameters[INTERPRO_FILE] = {} - # workflow_parameters[LOAD_BLAST_IN_CHADO] = {"organism": run_workflow_for_current_organism.org_id, - # "analysis_id": run_workflow_for_current_organism.blast_diamond_analysis_id} - # workflow_parameters[SYNC_BLAST_ANALYSIS_INTO_TRIPAL] = {"analysis_id": run_workflow_for_current_organism.blast_diamond_analysis_id} - - # run_workflow_for_current_organism.datamap = {} - # run_workflow_for_current_organism.datamap[INTERPRO_FILE] = {"src": "hda", "id": hda_ids["interproscan_hda_id"]} - - # # Run Interproscan workflow - # run_workflow_for_current_organism.run_workflow(workflow_path=workflow, - # workflow_parameters=workflow_parameters, - # datamap=run_workflow_for_current_organism.datamap, - # workflow_name="Interproscan") diff --git a/workflows_phaeoexplorer/Galaxy-Workflow-chado_load_tripal_synchronize_jbrowse_1org_v4.ga b/workflows_phaeoexplorer/Galaxy-Workflow-chado_load_tripal_synchronize_jbrowse_1org_v4.ga new file mode 100644 index 0000000000000000000000000000000000000000..0349a497fc1c8baa9ab98ba75c092dad191da08b --- /dev/null +++ b/workflows_phaeoexplorer/Galaxy-Workflow-chado_load_tripal_synchronize_jbrowse_1org_v4.ga @@ -0,0 +1,535 @@ +{ + "a_galaxy_workflow": "true", + "annotation": "", + "format-version": "0.1", + "name": "chado_load_tripal_synchronize_jbrowse_1org_v4", + "steps": { + "0": { + "annotation": "", + "content_id": null, + "errors": null, + "id": 0, + "input_connections": {}, + "inputs": [ + { + "description": "", + "name": "genome" + } + ], + "label": "genome", + "name": "Input dataset", + "outputs": [], + "position": { + "bottom": 277.1999969482422, + "height": 61.19999694824219, + "left": 436.5, + "right": 636.5, + "top": 216, + "width": 200, + "x": 436.5, + "y": 216 + }, + "tool_id": null, + "tool_state": "{\"optional\": false}", + "tool_version": null, + "type": "data_input", + "uuid": "fa9981ea-4012-40aa-ad84-6e6f61049104", + "workflow_outputs": [ + { + "label": null, + "output_name": "output", + "uuid": "65bba69d-b8f0-4f7e-a66a-71afa9a8975f" + } + ] + }, + "1": { + "annotation": "", + "content_id": null, + "errors": null, + "id": 1, + "input_connections": {}, + "inputs": [ + { + "description": "", + "name": "annotations" + } + ], + "label": "annotations", + "name": "Input dataset", + "outputs": [], + "position": { + "bottom": 367.1999969482422, + "height": 61.19999694824219, + "left": 467.5, + "right": 667.5, + "top": 306, + "width": 200, + "x": 467.5, + "y": 306 + }, + "tool_id": null, + "tool_state": "{\"optional\": false}", + "tool_version": null, + "type": "data_input", + "uuid": "61d23082-b459-4014-8584-6ff5b98ce689", + "workflow_outputs": [ + { + "label": null, + "output_name": "output", + "uuid": "acb766e7-fcd7-42e2-8fcf-638024338fc4" + } + ] + }, + "2": { + "annotation": "", + "content_id": null, + "errors": null, + "id": 2, + "input_connections": {}, + "inputs": [ + { + "description": "", + "name": "proteins" + } + ], + "label": "proteins", + "name": "Input dataset", + "outputs": [], + "position": { + "bottom": 456.1999969482422, + "height": 61.19999694824219, + "left": 489.5, + "right": 689.5, + "top": 395, + "width": 200, + "x": 489.5, + "y": 395 + }, + "tool_id": null, + "tool_state": "{\"optional\": false}", + "tool_version": null, + "type": "data_input", + "uuid": "ea25f583-f55d-4fdd-a7a9-86ffb4b9c731", + "workflow_outputs": [ + { + "label": null, + "output_name": "output", + "uuid": "eba4266d-d468-448f-8ce3-fa87b497cbf8" + } + ] + }, + "3": { + "annotation": "", + "content_id": "toolshed.g2.bx.psu.edu/repos/gga/chado_feature_load_fasta/feature_load_fasta/2.3.4+galaxy0", + "errors": null, + "id": 3, + "input_connections": { + "fasta": { + "id": 0, + "output_name": "output" + } + }, + "inputs": [ + { + "description": "runtime parameter for tool Chado load fasta", + "name": "analysis_id" + }, + { + "description": "runtime parameter for tool Chado load fasta", + "name": "fasta" + }, + { + "description": "runtime parameter for tool Chado load fasta", + "name": "organism" + }, + { + "description": "runtime parameter for tool Chado load fasta", + "name": "wait_for" + } + ], + "label": null, + "name": "Chado load fasta", + "outputs": [ + { + "name": "results", + "type": "json" + } + ], + "position": { + "bottom": 356.1999969482422, + "height": 143.1999969482422, + "left": 766.5, + "right": 966.5, + "top": 213, + "width": 200, + "x": 766.5, + "y": 213 + }, + "post_job_actions": {}, + "tool_id": "toolshed.g2.bx.psu.edu/repos/gga/chado_feature_load_fasta/feature_load_fasta/2.3.4+galaxy0", + "tool_shed_repository": { + "changeset_revision": "ba4d07fbaf47", + "name": "chado_feature_load_fasta", + "owner": "gga", + "tool_shed": "toolshed.g2.bx.psu.edu" + }, + "tool_state": "{\"analysis_id\": {\"__class__\": \"RuntimeValue\"}, \"do_update\": \"false\", \"ext_db\": {\"db\": \"\", \"re_db_accession\": \"\"}, \"fasta\": {\"__class__\": \"RuntimeValue\"}, \"match_on_name\": \"false\", \"organism\": {\"__class__\": \"RuntimeValue\"}, \"psql_target\": {\"method\": \"remote\", \"__current_case__\": 0}, \"re_name\": \"\", \"re_uniquename\": \"\", \"relationships\": {\"rel_type\": \"none\", \"__current_case__\": 0}, \"sequence_type\": \"contig\", \"wait_for\": {\"__class__\": \"RuntimeValue\"}, \"__page__\": null, \"__rerun_remap_job_id__\": null}", + "tool_version": "2.3.4+galaxy0", + "type": "tool", + "uuid": "86b4962b-d001-44f3-b2f5-349e0daccc69", + "workflow_outputs": [ + { + "label": null, + "output_name": "results", + "uuid": "892534b5-0d67-44da-8892-f17da2be9e9c" + } + ] + }, + "4": { + "annotation": "", + "content_id": "toolshed.g2.bx.psu.edu/repos/iuc/jbrowse/jbrowse/1.16.11+galaxy0", + "errors": null, + "id": 4, + "input_connections": { + "reference_genome|genome": { + "id": 0, + "output_name": "output" + }, + "track_groups_0|data_tracks_0|data_format|annotation": { + "id": 1, + "output_name": "output" + } + }, + "inputs": [ + { + "description": "runtime parameter for tool JBrowse", + "name": "reference_genome" + } + ], + "label": null, + "name": "JBrowse", + "outputs": [ + { + "name": "output", + "type": "html" + } + ], + "position": { + "bottom": 572, + "height": 184, + "left": 753.5, + "right": 953.5, + "top": 388, + "width": 200, + "x": 753.5, + "y": 388 + }, + "post_job_actions": {}, + "tool_id": "toolshed.g2.bx.psu.edu/repos/iuc/jbrowse/jbrowse/1.16.11+galaxy0", + "tool_shed_repository": { + "changeset_revision": "4542035c1075", + "name": "jbrowse", + "owner": "iuc", + "tool_shed": "toolshed.g2.bx.psu.edu" + }, + "tool_state": "{\"action\": {\"action_select\": \"create\", \"__current_case__\": 0}, \"gencode\": \"1\", \"jbgen\": {\"defaultLocation\": \"\", \"trackPadding\": \"20\", \"shareLink\": \"true\", \"aboutDescription\": \"\", \"show_tracklist\": \"true\", \"show_nav\": \"true\", \"show_overview\": \"true\", \"show_menu\": \"true\", \"hideGenomeOptions\": \"false\"}, \"plugins\": {\"BlastView\": \"true\", \"ComboTrackSelector\": \"false\", \"GCContent\": \"false\"}, \"reference_genome\": {\"genome_type_select\": \"history\", \"__current_case__\": 1, \"genome\": {\"__class__\": \"RuntimeValue\"}}, \"standalone\": \"minimal\", \"track_groups\": [{\"__index__\": 0, \"category\": \"Annotation\", \"data_tracks\": [{\"__index__\": 0, \"data_format\": {\"data_format_select\": \"gene_calls\", \"__current_case__\": 2, \"annotation\": {\"__class__\": \"RuntimeValue\"}, \"match_part\": {\"match_part_select\": \"false\", \"__current_case__\": 1}, \"index\": \"false\", \"track_config\": {\"track_class\": \"NeatHTMLFeatures/View/Track/NeatFeatures\", \"__current_case__\": 3, \"html_options\": {\"topLevelFeatures\": \"\"}}, \"jbstyle\": {\"style_classname\": \"transcript\", \"style_label\": \"product,name,id\", \"style_description\": \"note,description\", \"style_height\": \"10px\", \"max_height\": \"600\"}, \"jbcolor_scale\": {\"color_score\": {\"color_score_select\": \"none\", \"__current_case__\": 0, \"color\": {\"color_select\": \"automatic\", \"__current_case__\": 0}}}, \"jb_custom_config\": {\"option\": []}, \"jbmenu\": {\"track_menu\": [{\"__index__\": 0, \"menu_action\": \"iframeDialog\", \"menu_label\": \"View transcript report\", \"menu_title\": \"Transcript {id}\", \"menu_url\": \"__MENU_URL_ORG__\", \"menu_icon\": \"dijitIconBookmark\"}]}, \"track_visibility\": \"default_off\", \"override_apollo_plugins\": \"False\", \"override_apollo_drag\": \"False\"}}]}], \"uglyTestingHack\": \"\", \"__page__\": null, \"__rerun_remap_job_id__\": null}", + "tool_version": "1.16.11+galaxy0", + "type": "tool", + "uuid": "4e87e6b5-c37c-4429-a491-2d6a411d8a13", + "workflow_outputs": [ + { + "label": null, + "output_name": "output", + "uuid": "37d09194-4527-4428-963c-85cb351efcba" + } + ] + }, + "5": { + "annotation": "", + "content_id": "toolshed.g2.bx.psu.edu/repos/gga/chado_feature_load_gff/feature_load_gff/2.3.4+galaxy0", + "errors": null, + "id": 5, + "input_connections": { + "fasta": { + "id": 2, + "output_name": "output" + }, + "gff": { + "id": 1, + "output_name": "output" + }, + "wait_for": { + "id": 3, + "output_name": "results" + } + }, + "inputs": [ + { + "description": "runtime parameter for tool Chado load gff", + "name": "analysis_id" + }, + { + "description": "runtime parameter for tool Chado load gff", + "name": "fasta" + }, + { + "description": "runtime parameter for tool Chado load gff", + "name": "gff" + }, + { + "description": "runtime parameter for tool Chado load gff", + "name": "organism" + }, + { + "description": "runtime parameter for tool Chado load gff", + "name": "wait_for" + } + ], + "label": null, + "name": "Chado load gff", + "outputs": [ + { + "name": "results", + "type": "txt" + } + ], + "position": { + "bottom": 383.6000061035156, + "height": 173.60000610351562, + "left": 1043.5, + "right": 1243.5, + "top": 210, + "width": 200, + "x": 1043.5, + "y": 210 + }, + "post_job_actions": {}, + "tool_id": "toolshed.g2.bx.psu.edu/repos/gga/chado_feature_load_gff/feature_load_gff/2.3.4+galaxy0", + "tool_shed_repository": { + "changeset_revision": "e9a6d7568817", + "name": "chado_feature_load_gff", + "owner": "gga", + "tool_shed": "toolshed.g2.bx.psu.edu" + }, + "tool_state": "{\"add_only\": \"false\", \"analysis_id\": {\"__class__\": \"RuntimeValue\"}, \"fasta\": {\"__class__\": \"RuntimeValue\"}, \"gff\": {\"__class__\": \"RuntimeValue\"}, \"landmark_type\": \"contig\", \"no_seq_compute\": \"false\", \"organism\": {\"__class__\": \"RuntimeValue\"}, \"prot_naming\": {\"method\": \"regex\", \"__current_case__\": 1, \"re_protein_capture\": \"^mRNA(_.+)$\", \"re_protein\": \"prot\\\\1\"}, \"psql_target\": {\"method\": \"remote\", \"__current_case__\": 0}, \"wait_for\": {\"__class__\": \"RuntimeValue\"}, \"__page__\": null, \"__rerun_remap_job_id__\": null}", + "tool_version": "2.3.4+galaxy0", + "type": "tool", + "uuid": "34eb5e35-988e-4b49-8f2c-b11761a43588", + "workflow_outputs": [ + { + "label": null, + "output_name": "results", + "uuid": "2c0c2e3d-7ce1-4f7e-ad2f-456d9e97fdb8" + } + ] + }, + "6": { + "annotation": "", + "content_id": "toolshed.g2.bx.psu.edu/repos/gga/jbrowse_to_container/jbrowse_to_container/0.5.1", + "errors": null, + "id": 6, + "input_connections": { + "organisms_0|jbrowse": { + "id": 4, + "output_name": "output" + } + }, + "inputs": [], + "label": null, + "name": "Add organisms to JBrowse container", + "outputs": [ + { + "name": "output", + "type": "html" + } + ], + "position": { + "bottom": 551.1999969482422, + "height": 133.1999969482422, + "left": 1039.5, + "right": 1239.5, + "top": 418, + "width": 200, + "x": 1039.5, + "y": 418 + }, + "post_job_actions": {}, + "tool_id": "toolshed.g2.bx.psu.edu/repos/gga/jbrowse_to_container/jbrowse_to_container/0.5.1", + "tool_shed_repository": { + "changeset_revision": "11033bdad2ca", + "name": "jbrowse_to_container", + "owner": "gga", + "tool_shed": "toolshed.g2.bx.psu.edu" + }, + "tool_state": "{\"organisms\": [{\"__index__\": 0, \"jbrowse\": {\"__class__\": \"RuntimeValue\"}, \"name\": \"__DISPLAY_NAME_ORG__\", \"advanced\": {\"unique_id\": \"__UNIQUE_ID_ORG__\"}}], \"__page__\": null, \"__rerun_remap_job_id__\": null}", + "tool_version": "0.5.1", + "type": "tool", + "uuid": "f06c3ec7-936f-41be-9718-248b1d760d11", + "workflow_outputs": [ + { + "label": null, + "output_name": "output", + "uuid": "948e9774-03e5-43be-ad57-4b785372f78a" + } + ] + }, + "7": { + "annotation": "", + "content_id": "toolshed.g2.bx.psu.edu/repos/gga/tripal_feature_sync/feature_sync/3.2.1.0", + "errors": null, + "id": 7, + "input_connections": { + "wait_for": { + "id": 5, + "output_name": "results" + } + }, + "inputs": [ + { + "description": "runtime parameter for tool Synchronize features", + "name": "organism_id" + } + ], + "label": null, + "name": "Synchronize features", + "outputs": [ + { + "name": "results", + "type": "txt" + } + ], + "position": { + "bottom": 370.6000061035156, + "height": 153.60000610351562, + "left": 1325.5, + "right": 1525.5, + "top": 217, + "width": 200, + "x": 1325.5, + "y": 217 + }, + "post_job_actions": {}, + "tool_id": "toolshed.g2.bx.psu.edu/repos/gga/tripal_feature_sync/feature_sync/3.2.1.0", + "tool_shed_repository": { + "changeset_revision": "64e36c3f0dd6", + "name": "tripal_feature_sync", + "owner": "gga", + "tool_shed": "toolshed.g2.bx.psu.edu" + }, + "tool_state": "{\"organism_id\": {\"__class__\": \"RuntimeValue\"}, \"repeat_ids\": [], \"repeat_types\": [{\"__index__\": 0, \"types\": \"mRNA\"}, {\"__index__\": 1, \"types\": \"polypeptide\"}], \"wait_for\": {\"__class__\": \"ConnectedValue\"}, \"__page__\": null, \"__rerun_remap_job_id__\": null}", + "tool_version": "3.2.1.0", + "type": "tool", + "uuid": "38dd9a7d-46e1-48c6-8a5a-9c62a6860431", + "workflow_outputs": [ + { + "label": null, + "output_name": "results", + "uuid": "704c9382-8a3e-4b2b-8190-399a43e6455f" + } + ] + }, + "8": { + "annotation": "", + "content_id": "toolshed.g2.bx.psu.edu/repos/gga/tripal_db_populate_mviews/db_populate_mviews/3.2.1.0", + "errors": null, + "id": 8, + "input_connections": { + "wait_for": { + "id": 7, + "output_name": "results" + } + }, + "inputs": [], + "label": null, + "name": "Populate materialized views", + "outputs": [ + { + "name": "results", + "type": "txt" + } + ], + "position": { + "bottom": 369.6000061035156, + "height": 153.60000610351562, + "left": 1611.5, + "right": 1811.5, + "top": 216, + "width": 200, + "x": 1611.5, + "y": 216 + }, + "post_job_actions": {}, + "tool_id": "toolshed.g2.bx.psu.edu/repos/gga/tripal_db_populate_mviews/db_populate_mviews/3.2.1.0", + "tool_shed_repository": { + "changeset_revision": "3c08f32a3dc1", + "name": "tripal_db_populate_mviews", + "owner": "gga", + "tool_shed": "toolshed.g2.bx.psu.edu" + }, + "tool_state": "{\"mview\": \"\", \"wait_for\": {\"__class__\": \"ConnectedValue\"}, \"__page__\": null, \"__rerun_remap_job_id__\": null}", + "tool_version": "3.2.1.0", + "type": "tool", + "uuid": "b34ddfee-c317-4c21-99a6-679bd640b1be", + "workflow_outputs": [ + { + "label": null, + "output_name": "results", + "uuid": "53919587-2586-4c0f-9ae6-082119727c97" + } + ] + }, + "9": { + "annotation": "", + "content_id": "toolshed.g2.bx.psu.edu/repos/gga/tripal_db_index/db_index/3.2.1.1", + "errors": null, + "id": 9, + "input_connections": { + "wait_for": { + "id": 8, + "output_name": "results" + } + }, + "inputs": [], + "label": null, + "name": "Index Tripal data", + "outputs": [ + { + "name": "results", + "type": "txt" + } + ], + "position": { + "bottom": 328.8000030517578, + "height": 112.80000305175781, + "left": 1897.5, + "right": 2097.5, + "top": 216, + "width": 200, + "x": 1897.5, + "y": 216 + }, + "post_job_actions": {}, + "tool_id": "toolshed.g2.bx.psu.edu/repos/gga/tripal_db_index/db_index/3.2.1.1", + "tool_shed_repository": { + "changeset_revision": "d55a39f12dda", + "name": "tripal_db_index", + "owner": "gga", + "tool_shed": "toolshed.g2.bx.psu.edu" + }, + "tool_state": "{\"expose\": {\"do_expose\": \"no\", \"__current_case__\": 0}, \"queues\": \"10\", \"table\": {\"mode\": \"website\", \"__current_case__\": 0}, \"tokenizer\": \"standard\", \"wait_for\": {\"__class__\": \"ConnectedValue\"}, \"__page__\": null, \"__rerun_remap_job_id__\": null}", + "tool_version": "3.2.1.1", + "type": "tool", + "uuid": "6f8cf6b5-82f2-40bf-80c0-aecf74bedd5a", + "workflow_outputs": [ + { + "label": null, + "output_name": "results", + "uuid": "65a75a90-c47f-4cb4-be74-87cd89d1988a" + } + ] + } + }, + "tags": [], + "uuid": "f8c6fa33-4ade-4251-a214-0ce77cdaac6e", + "version": 1 +} \ No newline at end of file diff --git a/workflows_phaeoexplorer/Galaxy-Workflow-load_blast_results_1org_v1.ga b/workflows_phaeoexplorer/Galaxy-Workflow-load_blast_results_1org_v1.ga new file mode 100644 index 0000000000000000000000000000000000000000..db4e9537e0094dd39932dc823f5fe789911d3fb6 --- /dev/null +++ b/workflows_phaeoexplorer/Galaxy-Workflow-load_blast_results_1org_v1.ga @@ -0,0 +1,271 @@ +{ + "a_galaxy_workflow": "true", + "annotation": "", + "format-version": "0.1", + "name": "load_blast_results_1org_v1", + "steps": { + "0": { + "annotation": "", + "content_id": null, + "errors": null, + "id": 0, + "input_connections": {}, + "inputs": [ + { + "description": "", + "name": "blast file xml org1" + } + ], + "label": "blast file xml org1", + "name": "Input dataset", + "outputs": [], + "position": { + "bottom": 416.1999969482422, + "height": 82.19999694824219, + "left": 410, + "right": 610, + "top": 334, + "width": 200, + "x": 410, + "y": 334 + }, + "tool_id": null, + "tool_state": "{\"optional\": false}", + "tool_version": null, + "type": "data_input", + "uuid": "b1c63d94-61a7-4bf1-8b5d-e08fb34c0357", + "workflow_outputs": [ + { + "label": null, + "output_name": "output", + "uuid": "f602d234-8cea-4db9-ab77-678cdc0d2101" + } + ] + }, + "1": { + "annotation": "", + "content_id": "toolshed.g2.bx.psu.edu/repos/gga/chado_load_blast/load_blast/2.3.4+galaxy0", + "errors": null, + "id": 1, + "input_connections": { + "input": { + "id": 0, + "output_name": "output" + } + }, + "inputs": [ + { + "description": "runtime parameter for tool Chado load Blast results", + "name": "analysis_id" + }, + { + "description": "runtime parameter for tool Chado load Blast results", + "name": "input" + }, + { + "description": "runtime parameter for tool Chado load Blast results", + "name": "organism_id" + }, + { + "description": "runtime parameter for tool Chado load Blast results", + "name": "wait_for" + } + ], + "label": "load blast results org1", + "name": "Chado load Blast results", + "outputs": [ + { + "name": "results", + "type": "json" + } + ], + "position": { + "bottom": 457.3999938964844, + "height": 164.39999389648438, + "left": 711, + "right": 911, + "top": 293, + "width": 200, + "x": 711, + "y": 293 + }, + "post_job_actions": {}, + "tool_id": "toolshed.g2.bx.psu.edu/repos/gga/chado_load_blast/load_blast/2.3.4+galaxy0", + "tool_shed_repository": { + "changeset_revision": "08ae8b27b193", + "name": "chado_load_blast", + "owner": "gga", + "tool_shed": "toolshed.g2.bx.psu.edu" + }, + "tool_state": "{\"analysis_id\": {\"__class__\": \"RuntimeValue\"}, \"blastdb_id\": \"21\", \"input\": {\"__class__\": \"RuntimeValue\"}, \"match_on_name\": \"false\", \"organism_id\": {\"__class__\": \"RuntimeValue\"}, \"psql_target\": {\"method\": \"remote\", \"__current_case__\": 0}, \"query_type\": \"polypeptide\", \"re_name\": \"\", \"skip_missing\": \"false\", \"wait_for\": {\"__class__\": \"RuntimeValue\"}, \"__page__\": null, \"__rerun_remap_job_id__\": null}", + "tool_version": "2.3.4+galaxy0", + "type": "tool", + "uuid": "10144cf8-f121-45f3-ba64-9f4d66bf1e56", + "workflow_outputs": [ + { + "label": null, + "output_name": "results", + "uuid": "95708895-8439-4257-bff6-96e4c51a0725" + } + ] + }, + "2": { + "annotation": "", + "content_id": "toolshed.g2.bx.psu.edu/repos/gga/tripal_analysis_sync/analysis_sync/3.2.1.0", + "errors": null, + "id": 2, + "input_connections": { + "wait_for": { + "id": 1, + "output_name": "results" + } + }, + "inputs": [ + { + "description": "runtime parameter for tool Synchronize an analysis", + "name": "analysis_id" + } + ], + "label": "sync blast analysis org1", + "name": "Synchronize an analysis", + "outputs": [ + { + "name": "results", + "type": "json" + } + ], + "position": { + "bottom": 451.3999938964844, + "height": 154.39999389648438, + "left": 1010, + "right": 1210, + "top": 297, + "width": 200, + "x": 1010, + "y": 297 + }, + "post_job_actions": {}, + "tool_id": "toolshed.g2.bx.psu.edu/repos/gga/tripal_analysis_sync/analysis_sync/3.2.1.0", + "tool_shed_repository": { + "changeset_revision": "f487ff676088", + "name": "tripal_analysis_sync", + "owner": "gga", + "tool_shed": "toolshed.g2.bx.psu.edu" + }, + "tool_state": "{\"analysis_id\": {\"__class__\": \"RuntimeValue\"}, \"wait_for\": {\"__class__\": \"ConnectedValue\"}, \"__page__\": null, \"__rerun_remap_job_id__\": null}", + "tool_version": "3.2.1.0", + "type": "tool", + "uuid": "99e7496d-ac32-467d-8c09-2efd48d0231a", + "workflow_outputs": [ + { + "label": "Synchronize Analysis into Tripal", + "output_name": "results", + "uuid": "1fb6db92-90a2-4e33-beec-f2f974e369e9" + } + ] + }, + "3": { + "annotation": "", + "content_id": "toolshed.g2.bx.psu.edu/repos/gga/tripal_db_populate_mviews/db_populate_mviews/3.2.1.0", + "errors": null, + "id": 3, + "input_connections": { + "wait_for": { + "id": 2, + "output_name": "results" + } + }, + "inputs": [], + "label": "populate mat views", + "name": "Populate materialized views", + "outputs": [ + { + "name": "results", + "type": "txt" + } + ], + "position": { + "bottom": 452.3999938964844, + "height": 154.39999389648438, + "left": 1295, + "right": 1495, + "top": 298, + "width": 200, + "x": 1295, + "y": 298 + }, + "post_job_actions": {}, + "tool_id": "toolshed.g2.bx.psu.edu/repos/gga/tripal_db_populate_mviews/db_populate_mviews/3.2.1.0", + "tool_shed_repository": { + "changeset_revision": "3c08f32a3dc1", + "name": "tripal_db_populate_mviews", + "owner": "gga", + "tool_shed": "toolshed.g2.bx.psu.edu" + }, + "tool_state": "{\"mview\": \"\", \"wait_for\": {\"__class__\": \"ConnectedValue\"}, \"__page__\": null, \"__rerun_remap_job_id__\": null}", + "tool_version": "3.2.1.0", + "type": "tool", + "uuid": "5c947dd5-89df-4146-9ab8-9e1d6de42360", + "workflow_outputs": [ + { + "label": "Populate Tripal materialized view(s)", + "output_name": "results", + "uuid": "0a0c9fa7-3a3c-459d-b5c7-b7a5a11459f3" + } + ] + }, + "4": { + "annotation": "", + "content_id": "toolshed.g2.bx.psu.edu/repos/gga/tripal_db_index/db_index/3.2.1.1", + "errors": null, + "id": 4, + "input_connections": { + "wait_for": { + "id": 3, + "output_name": "results" + } + }, + "inputs": [], + "label": "index tripal data", + "name": "Index Tripal data", + "outputs": [ + { + "name": "results", + "type": "txt" + } + ], + "position": { + "bottom": 433.6000061035156, + "height": 113.60000610351562, + "left": 1570, + "right": 1770, + "top": 320, + "width": 200, + "x": 1570, + "y": 320 + }, + "post_job_actions": {}, + "tool_id": "toolshed.g2.bx.psu.edu/repos/gga/tripal_db_index/db_index/3.2.1.1", + "tool_shed_repository": { + "changeset_revision": "d55a39f12dda", + "name": "tripal_db_index", + "owner": "gga", + "tool_shed": "toolshed.g2.bx.psu.edu" + }, + "tool_state": "{\"expose\": {\"do_expose\": \"no\", \"__current_case__\": 0}, \"queues\": \"10\", \"table\": {\"mode\": \"website\", \"__current_case__\": 0}, \"tokenizer\": \"standard\", \"wait_for\": {\"__class__\": \"ConnectedValue\"}, \"__page__\": null, \"__rerun_remap_job_id__\": null}", + "tool_version": "3.2.1.1", + "type": "tool", + "uuid": "5ecc30b0-07ab-4f9b-81f0-31310358c221", + "workflow_outputs": [ + { + "label": "Index Tripal data", + "output_name": "results", + "uuid": "5c0f0431-acb0-4e40-a7e4-8a562933fd97" + } + ] + } + }, + "tags": [], + "uuid": "80e32784-e39e-48ce-a6e3-7627de734ca6", + "version": 4 +} \ No newline at end of file diff --git a/workflows_phaeoexplorer/Galaxy-Workflow-load_blast_results_2org_v1.ga b/workflows_phaeoexplorer/Galaxy-Workflow-load_blast_results_2org_v1.ga new file mode 100644 index 0000000000000000000000000000000000000000..ba2591c8dbd09e02b0ac52dcaf979709f2587bdb --- /dev/null +++ b/workflows_phaeoexplorer/Galaxy-Workflow-load_blast_results_2org_v1.ga @@ -0,0 +1,439 @@ +{ + "a_galaxy_workflow": "true", + "annotation": "", + "format-version": "0.1", + "name": "load_blast_results_2org_v1", + "steps": { + "0": { + "annotation": "", + "content_id": null, + "errors": null, + "id": 0, + "input_connections": {}, + "inputs": [ + { + "description": "", + "name": "blast file xml org1" + } + ], + "label": "blast file xml org1", + "name": "Input dataset", + "outputs": [], + "position": { + "bottom": 230.39999389648438, + "height": 61.19999694824219, + "left": 97.5, + "right": 297.5, + "top": 169.1999969482422, + "width": 200, + "x": 97.5, + "y": 169.1999969482422 + }, + "tool_id": null, + "tool_state": "{\"optional\": false}", + "tool_version": null, + "type": "data_input", + "uuid": "b1c63d94-61a7-4bf1-8b5d-e08fb34c0357", + "workflow_outputs": [ + { + "label": null, + "output_name": "output", + "uuid": "72006174-6297-4777-95bd-ca427b9ea729" + } + ] + }, + "1": { + "annotation": "", + "content_id": null, + "errors": null, + "id": 1, + "input_connections": {}, + "inputs": [ + { + "description": "", + "name": "blast file xml org2" + } + ], + "label": "blast file xml org2", + "name": "Input dataset", + "outputs": [], + "position": { + "bottom": 341.40000915527344, + "height": 61.19999694824219, + "left": 129.5, + "right": 329.5, + "top": 280.20001220703125, + "width": 200, + "x": 129.5, + "y": 280.20001220703125 + }, + "tool_id": null, + "tool_state": "{\"optional\": false}", + "tool_version": null, + "type": "data_input", + "uuid": "9de2716c-eecd-48fc-8a71-b3d1f5daef85", + "workflow_outputs": [ + { + "label": null, + "output_name": "output", + "uuid": "45971e82-4e85-4993-a9cb-9a4608e9def7" + } + ] + }, + "2": { + "annotation": "", + "content_id": "toolshed.g2.bx.psu.edu/repos/gga/chado_load_blast/load_blast/2.3.4+galaxy0", + "errors": null, + "id": 2, + "input_connections": { + "input": { + "id": 0, + "output_name": "output" + } + }, + "inputs": [ + { + "description": "runtime parameter for tool Chado load Blast results", + "name": "analysis_id" + }, + { + "description": "runtime parameter for tool Chado load Blast results", + "name": "organism_id" + }, + { + "description": "runtime parameter for tool Chado load Blast results", + "name": "wait_for" + } + ], + "label": "load blast results org1", + "name": "Chado load Blast results", + "outputs": [ + { + "name": "results", + "type": "json" + } + ], + "position": { + "bottom": 255.8000030517578, + "height": 163.60000610351562, + "left": 457.5, + "right": 657.5, + "top": 92.19999694824219, + "width": 200, + "x": 457.5, + "y": 92.19999694824219 + }, + "post_job_actions": {}, + "tool_id": "toolshed.g2.bx.psu.edu/repos/gga/chado_load_blast/load_blast/2.3.4+galaxy0", + "tool_shed_repository": { + "changeset_revision": "08ae8b27b193", + "name": "chado_load_blast", + "owner": "gga", + "tool_shed": "toolshed.g2.bx.psu.edu" + }, + "tool_state": "{\"analysis_id\": {\"__class__\": \"RuntimeValue\"}, \"blastdb_id\": \"21\", \"input\": {\"__class__\": \"ConnectedValue\"}, \"match_on_name\": \"false\", \"organism_id\": {\"__class__\": \"RuntimeValue\"}, \"psql_target\": {\"method\": \"remote\", \"__current_case__\": 0}, \"query_type\": \"polypeptide\", \"re_name\": \"\", \"skip_missing\": \"false\", \"wait_for\": {\"__class__\": \"RuntimeValue\"}, \"__page__\": null, \"__rerun_remap_job_id__\": null}", + "tool_version": "2.3.4+galaxy0", + "type": "tool", + "uuid": "595f6e1f-955a-42be-b03b-1269d1f7d189", + "workflow_outputs": [ + { + "label": null, + "output_name": "results", + "uuid": "cb238779-29f4-4f22-b6f3-6a8cc84857d1" + } + ] + }, + "3": { + "annotation": "", + "content_id": "toolshed.g2.bx.psu.edu/repos/gga/tripal_analysis_sync/analysis_sync/3.2.1.0", + "errors": null, + "id": 3, + "input_connections": { + "wait_for": { + "id": 2, + "output_name": "results" + } + }, + "inputs": [ + { + "description": "runtime parameter for tool Synchronize an analysis", + "name": "analysis_id" + }, + { + "description": "runtime parameter for tool Synchronize an analysis", + "name": "wait_for" + } + ], + "label": "sync blast analysis org1", + "name": "Synchronize an analysis", + "outputs": [ + { + "name": "results", + "type": "json" + } + ], + "position": { + "bottom": 254.8000030517578, + "height": 153.60000610351562, + "left": 787.5, + "right": 987.5, + "top": 101.19999694824219, + "width": 200, + "x": 787.5, + "y": 101.19999694824219 + }, + "post_job_actions": {}, + "tool_id": "toolshed.g2.bx.psu.edu/repos/gga/tripal_analysis_sync/analysis_sync/3.2.1.0", + "tool_shed_repository": { + "changeset_revision": "f487ff676088", + "name": "tripal_analysis_sync", + "owner": "gga", + "tool_shed": "toolshed.g2.bx.psu.edu" + }, + "tool_state": "{\"analysis_id\": {\"__class__\": \"RuntimeValue\"}, \"wait_for\": {\"__class__\": \"RuntimeValue\"}, \"__page__\": null, \"__rerun_remap_job_id__\": null}", + "tool_version": "3.2.1.0", + "type": "tool", + "uuid": "c98dedf6-8857-4d23-be94-fe6630f245d7", + "workflow_outputs": [ + { + "label": "Synchronize Analysis into Tripal", + "output_name": "results", + "uuid": "1ff4b1db-b6bf-4c48-a0ab-0a8513683999" + } + ] + }, + "4": { + "annotation": "", + "content_id": "toolshed.g2.bx.psu.edu/repos/gga/chado_load_blast/load_blast/2.3.4+galaxy0", + "errors": null, + "id": 4, + "input_connections": { + "input": { + "id": 1, + "output_name": "output" + }, + "wait_for": { + "id": 3, + "output_name": "results" + } + }, + "inputs": [ + { + "description": "runtime parameter for tool Chado load Blast results", + "name": "analysis_id" + }, + { + "description": "runtime parameter for tool Chado load Blast results", + "name": "input" + }, + { + "description": "runtime parameter for tool Chado load Blast results", + "name": "organism_id" + }, + { + "description": "runtime parameter for tool Chado load Blast results", + "name": "wait_for" + } + ], + "label": "load blast results org2", + "name": "Chado load Blast results", + "outputs": [ + { + "name": "results", + "type": "json" + } + ], + "position": { + "bottom": 439.8000183105469, + "height": 163.60000610351562, + "left": 520.5, + "right": 720.5, + "top": 276.20001220703125, + "width": 200, + "x": 520.5, + "y": 276.20001220703125 + }, + "post_job_actions": {}, + "tool_id": "toolshed.g2.bx.psu.edu/repos/gga/chado_load_blast/load_blast/2.3.4+galaxy0", + "tool_shed_repository": { + "changeset_revision": "08ae8b27b193", + "name": "chado_load_blast", + "owner": "gga", + "tool_shed": "toolshed.g2.bx.psu.edu" + }, + "tool_state": "{\"analysis_id\": {\"__class__\": \"RuntimeValue\"}, \"blastdb_id\": \"21\", \"input\": {\"__class__\": \"RuntimeValue\"}, \"match_on_name\": \"false\", \"organism_id\": {\"__class__\": \"RuntimeValue\"}, \"psql_target\": {\"method\": \"remote\", \"__current_case__\": 0}, \"query_type\": \"polypeptide\", \"re_name\": \"\", \"skip_missing\": \"false\", \"wait_for\": {\"__class__\": \"RuntimeValue\"}, \"__page__\": null, \"__rerun_remap_job_id__\": null}", + "tool_version": "2.3.4+galaxy0", + "type": "tool", + "uuid": "a7ec5c91-7cef-4b9f-95a0-ed5542b8e142", + "workflow_outputs": [ + { + "label": null, + "output_name": "results", + "uuid": "119f219e-3d80-4b42-bb38-d07d4583048c" + } + ] + }, + "5": { + "annotation": "", + "content_id": "toolshed.g2.bx.psu.edu/repos/gga/tripal_analysis_sync/analysis_sync/3.2.1.0", + "errors": null, + "id": 5, + "input_connections": { + "wait_for": { + "id": 4, + "output_name": "results" + } + }, + "inputs": [ + { + "description": "runtime parameter for tool Synchronize an analysis", + "name": "analysis_id" + }, + { + "description": "runtime parameter for tool Synchronize an analysis", + "name": "wait_for" + } + ], + "label": "sync blast analysis org2", + "name": "Synchronize an analysis", + "outputs": [ + { + "name": "results", + "type": "json" + } + ], + "position": { + "bottom": 440.8000183105469, + "height": 153.60000610351562, + "left": 828.5, + "right": 1028.5, + "top": 287.20001220703125, + "width": 200, + "x": 828.5, + "y": 287.20001220703125 + }, + "post_job_actions": {}, + "tool_id": "toolshed.g2.bx.psu.edu/repos/gga/tripal_analysis_sync/analysis_sync/3.2.1.0", + "tool_shed_repository": { + "changeset_revision": "f487ff676088", + "name": "tripal_analysis_sync", + "owner": "gga", + "tool_shed": "toolshed.g2.bx.psu.edu" + }, + "tool_state": "{\"analysis_id\": {\"__class__\": \"RuntimeValue\"}, \"wait_for\": {\"__class__\": \"RuntimeValue\"}, \"__page__\": null, \"__rerun_remap_job_id__\": null}", + "tool_version": "3.2.1.0", + "type": "tool", + "uuid": "2fff7637-7904-46ff-87e1-ce2721727e75", + "workflow_outputs": [ + { + "label": "Synchronize Analysis into Tripal", + "output_name": "results", + "uuid": "924991f3-6dd4-4752-9ce2-3832d72dff57" + } + ] + }, + "6": { + "annotation": "", + "content_id": "toolshed.g2.bx.psu.edu/repos/gga/tripal_db_populate_mviews/db_populate_mviews/3.2.1.0", + "errors": null, + "id": 6, + "input_connections": { + "wait_for": { + "id": 5, + "output_name": "results" + } + }, + "inputs": [], + "label": "populate mat views", + "name": "Populate materialized views", + "outputs": [ + { + "name": "results", + "type": "txt" + } + ], + "position": { + "bottom": 368.8000030517578, + "height": 153.60000610351562, + "left": 1103.5, + "right": 1303.5, + "top": 215.1999969482422, + "width": 200, + "x": 1103.5, + "y": 215.1999969482422 + }, + "post_job_actions": {}, + "tool_id": "toolshed.g2.bx.psu.edu/repos/gga/tripal_db_populate_mviews/db_populate_mviews/3.2.1.0", + "tool_shed_repository": { + "changeset_revision": "3c08f32a3dc1", + "name": "tripal_db_populate_mviews", + "owner": "gga", + "tool_shed": "toolshed.g2.bx.psu.edu" + }, + "tool_state": "{\"mview\": \"\", \"wait_for\": {\"__class__\": \"ConnectedValue\"}, \"__page__\": null, \"__rerun_remap_job_id__\": null}", + "tool_version": "3.2.1.0", + "type": "tool", + "uuid": "5c947dd5-89df-4146-9ab8-9e1d6de42360", + "workflow_outputs": [ + { + "label": "Populate Tripal materialized view(s)", + "output_name": "results", + "uuid": "dc519305-8c27-4c53-9150-7dd37b5090cd" + } + ] + }, + "7": { + "annotation": "", + "content_id": "toolshed.g2.bx.psu.edu/repos/gga/tripal_db_index/db_index/3.2.1.1", + "errors": null, + "id": 7, + "input_connections": { + "wait_for": { + "id": 6, + "output_name": "results" + } + }, + "inputs": [], + "label": "index tripal data", + "name": "Index Tripal data", + "outputs": [ + { + "name": "results", + "type": "txt" + } + ], + "position": { + "bottom": 349, + "height": 112.80000305175781, + "left": 1373.5, + "right": 1573.5, + "top": 236.1999969482422, + "width": 200, + "x": 1373.5, + "y": 236.1999969482422 + }, + "post_job_actions": {}, + "tool_id": "toolshed.g2.bx.psu.edu/repos/gga/tripal_db_index/db_index/3.2.1.1", + "tool_shed_repository": { + "changeset_revision": "d55a39f12dda", + "name": "tripal_db_index", + "owner": "gga", + "tool_shed": "toolshed.g2.bx.psu.edu" + }, + "tool_state": "{\"expose\": {\"do_expose\": \"no\", \"__current_case__\": 0}, \"queues\": \"10\", \"table\": {\"mode\": \"website\", \"__current_case__\": 0}, \"tokenizer\": \"standard\", \"wait_for\": {\"__class__\": \"ConnectedValue\"}, \"__page__\": null, \"__rerun_remap_job_id__\": null}", + "tool_version": "3.2.1.1", + "type": "tool", + "uuid": "5ecc30b0-07ab-4f9b-81f0-31310358c221", + "workflow_outputs": [ + { + "label": "Index Tripal data", + "output_name": "results", + "uuid": "e2911922-2412-4618-97fe-bcc783bb0865" + } + ] + } + }, + "tags": [], + "uuid": "ffae97b5-698a-41a5-8561-470300594544", + "version": 6 +} \ No newline at end of file