diff --git a/gga_get_data.py b/gga_get_data.py index 6ff524b29333ee5470ce146b54976e6e3d448c5a..20ec80d37f1b1449fd216ae616d8f594976e952a 100755 --- a/gga_get_data.py +++ b/gga_get_data.py @@ -122,14 +122,14 @@ class GetData(speciesData.SpeciesData): # Where to store blast results? # search_excluded_datasets = ["interpro_path", "orthofinder_path", "blastp_path", "blastx_path"] - # # These datasets will not be searched if missing in the input file + # These datasets will not be searched if missing in the input file # Copy datasets in the organism src_data dir tree correct folder for k, v in datasets_to_get.items(): if v: # If dataset is not present in input file, skip copy if k in genome_datasets: logging.info("Copying {0} ({1}) into {2}".format(k, v, organism_genome_dir)) - genome_fname = "v%s.fasta" % self.genome_version + genome_fname = "{0}_v{1}.fasta".format(self.dataset_prefix, self.genome_version) try: shutil.copyfile(os.path.abspath(v), os.path.join(organism_genome_dir, genome_fname)) except Exception as exc: @@ -137,19 +137,19 @@ class GetData(speciesData.SpeciesData): elif k in annotation_datasets: dataset_fname = "" if k == "gff_path": - dataset_fname = "OGS%s.gff" % self.ogs_version + dataset_fname = "{0}_OGS{1}.gff".format(self.dataset_prefix, self.ogs_version) elif k == "transcripts_path": - dataset_fname = "OGS%s_transcripts.fasta" % self.ogs_version + dataset_fname = "{0}_OGS{1}_transcripts.fasta".format(self.dataset_prefix, self.ogs_version) elif k == "proteins_path": - dataset_fname = "OGS%s_proteins.fasta" % self.ogs_version + dataset_fname = "{0}_OGS{1}_proteins.fasta".format(self.dataset_prefix, self.ogs_version) elif k == "orthofinder_path": - dataset_fname = "OGS%s_orthofinder.tsv" % self.ogs_version + dataset_fname = "{0}_OGS{1}_orthofinder.tsv".format(self.dataset_prefix, self.ogs_version) elif k == "interpro_path": - dataset_fname = "OGS%s_interproscan.xml" % self.ogs_version + dataset_fname = "{0}_OGS{1}_interproscan.xml".format(self.dataset_prefix, self.ogs_version) elif k == "blastp_path": - dataset_fname = "OGS%s_blastp.xml" % self.ogs_version + dataset_fname = "{0}_OGS{1}_blastp.xml".format(self.dataset_prefix, self.ogs_version) elif k == "blastx_path": - dataset_fname = "OGS%s_blastx.xml" % self.ogs_version + dataset_fname = "{0}_OGS{1}_blastx.xml".format(self.dataset_prefix, self.ogs_version) logging.info("Copying {0} ({1}) into {2}".format(k, v, organism_annotation_dir)) try: shutil.copyfile(os.path.abspath(v), os.path.join(organism_annotation_dir, dataset_fname)) diff --git a/gga_init.py b/gga_init.py index fbefcebd1c2ce145db1270455cb8518ce3dd47a9..bd1beb4e7ebed69a7945fb0e8ed58a4545bf3483 100755 --- a/gga_init.py +++ b/gga_init.py @@ -64,8 +64,9 @@ class DeploySpeciesStack(speciesData.SpeciesData): if not config["banner_path"] == "" and os.path.isfile(os.path.abspath(config["banner_path"])): banner_dest_path = os.path.abspath("./banner.png") logging.info("Custom banner path: %s" % self.config["banner_path"]) - if os.path.samefile(os.path.abspath(config["banner_path"]),banner_dest_path): - shutil.copy(os.path.abspath(self.config["banner_path"]), banner_dest_path) + # if os.path.samefile(os.path.abspath(config["banner_path"]), banner_dest_path): + # shutil.copy(os.path.abspath(self.config["banner_path"]), banner_dest_path) + shutil.copy(os.path.abspath(self.config["banner_path"]), banner_dest_path) else: logging.debug("Using default banner for Tripal pages") self.config.pop("banner_path", None) diff --git a/gga_load_data.py b/gga_load_data.py index 856d0435c2319bd7575b81089da7586159702f73..c4ed5594a9d3d173bd11d4ee4918a1152cd6a5ca 100755 --- a/gga_load_data.py +++ b/gga_load_data.py @@ -59,20 +59,18 @@ class LoadData(speciesData.SpeciesData): """ Create or set the working history to the current species one - TODO - move to utilities? - :return: """ try: - histories = self.instance.histories.get_histories(name=str(self.full_name)) + histories = self.instance.histories.get_histories(name=str(self.genus_species)) self.history_id = histories[0]["id"] - logging.info("History for {0}: {1}".format(self.full_name, self.history_id)) + logging.debug("History ID set for {0} {1}: {2}".format(self.genus, self.species, self.history_id)) except IndexError: - logging.info("Creating history for %s" % self.full_name) - self.instance.histories.create_history(name=str(self.full_name)) - histories = self.instance.histories.get_histories(name=str(self.full_name)) + logging.info("Creating history for {0} {1}".format(self.genus, self.species)) + self.instance.histories.create_history(name=str(self.genus_species)) + histories = self.instance.histories.get_histories(name=str(self.genus_species)) self.history_id = histories[0]["id"] - logging.info("History for {0}: {1}".format(self.full_name, self.history_id)) + logging.debug("History ID set for {0} {1}: {2}".format(self.genus, self.species, self.history_id)) return self.history_id @@ -82,9 +80,16 @@ class LoadData(speciesData.SpeciesData): Will do nothing if H. sapiens isn't in the database """ + + get_organism_tool = self.instance.tools.show_tool("toolshed.g2.bx.psu.edu/repos/gga/chado_organism_get_organisms/organism_get_organisms/2.3.4+galaxy0") + delete_organism_tool = self.instance.tools.show_tool("toolshed.g2.bx.psu.edu/repos/gga/chado_organism_delete_organisms/organism_delete_organisms/2.3.4+galaxy0") + + get_organism_tool_version = get_organism_tool["version"] + delete_organism_tool_version = delete_organism_tool["version"] + logging.debug("Getting 'Homo sapiens' ID in instance's chado database") get_sapiens_id_job = self.instance.tools.run_tool( - tool_id="toolshed.g2.bx.psu.edu/repos/gga/chado_organism_get_organisms/organism_get_organisms/2.3.4+galaxy0", + tool_id="toolshed.g2.bx.psu.edu/repos/gga/chado_organism_get_organisms/organism_get_organisms/%s" % get_organism_tool_version, history_id=self.history_id, tool_inputs={"genus": "Homo", "species": "sapiens"}) get_sapiens_id_job_output = get_sapiens_id_job["outputs"][0]["id"] @@ -95,7 +100,7 @@ class LoadData(speciesData.SpeciesData): sapiens_id = str( get_sapiens_id_final_output["organism_id"]) # needs to be str to be recognized by the chado tool self.instance.tools.run_tool( - tool_id="toolshed.g2.bx.psu.edu/repos/gga/chado_organism_delete_organisms/organism_delete_organisms/2.3.4+galaxy0", + tool_id="toolshed.g2.bx.psu.edu/repos/gga/chado_organism_delete_organisms/organism_delete_organisms/%s" % delete_organism_tool_version, history_id=self.history_id, tool_inputs={"organism": str(sapiens_id)}) except bioblend.ConnectionError: @@ -386,9 +391,9 @@ if __name__ == "__main__": load_data_for_current_species.connect_to_instance() # Load the datasets into a galaxy library - logging.info("Setting up library for %s" % load_data_for_current_species.full_name) + logging.info("Setting up library for {0} {1}".format(load_data_for_current_species.genus, load_data_for_current_species.species)) load_data_for_current_species.setup_library() - logging.info("Successfully set up library in galaxy for %s" % load_data_for_current_species.full_name) + logging.info("Successfully set up library in galaxy for {0} {1}".format(load_data_for_current_species.genus, load_data_for_current_species.species)) # Set or get the history for the current organism load_data_for_current_species.set_get_history() @@ -402,8 +407,8 @@ if __name__ == "__main__": # load_data_for_current_species.purge_histories() # Testing purposes - logging.info("Data successfully loaded and imported for %s" % load_data_for_current_species.full_name) + logging.info("Data successfully loaded and imported for {0} {1}".format(load_data_for_current_species.genus, load_data_for_current_species.species)) else: - logging.critical("The galaxy container for %s is not ready yet!" % load_data_for_current_species.full_name) + logging.critical("The galaxy container for {0} {1} is not ready yet".format(load_data_for_current_species.genus, load_data_for_current_species.species)) sys.exit() diff --git a/run_workflow_phaeoexplorer.py b/run_workflow_phaeoexplorer.py index 49f3a2a9ba6ccab47bfd5e7118db987fee52d706..2c385d476ca128e949e9b9a1353c0679ed5d6958 100755 --- a/run_workflow_phaeoexplorer.py +++ b/run_workflow_phaeoexplorer.py @@ -38,17 +38,18 @@ class RunWorkflow(speciesData.SpeciesData): """ Create or set the working history to the current species one - :return: """ try: - histories = self.instance.histories.get_histories(name=str(self.full_name)) + histories = self.instance.histories.get_histories(name=str(self.genus_species)) self.history_id = histories[0]["id"] + logging.debug("History ID set for {0}: {1}".format(self.full_name, self.history_id)) except IndexError: logging.info("Creating history for %s" % self.full_name) self.instance.histories.create_history(name=str(self.full_name)) - histories = self.instance.histories.get_histories(name=str(self.full_name)) + histories = self.instance.histories.get_histories(name=str(self.genus_species)) self.history_id = histories[0]["id"] + logging.debug("History ID set for {0}: {1}".format(self.full_name, self.history_id)) return self.history_id @@ -114,7 +115,7 @@ class RunWorkflow(speciesData.SpeciesData): """ - logging.info("Connecting to the galaxy instance (%s)" % self.instance_url) + logging.debug("Connecting to the galaxy instance (%s)" % self.instance_url) self.instance = galaxy.GalaxyInstance(url=self.instance_url, email=self.config["galaxy_default_admin_email"], password=self.config["galaxy_default_admin_password"] @@ -127,53 +128,17 @@ class RunWorkflow(speciesData.SpeciesData): logging.critical("Cannot connect to galaxy instance (%s) " % self.instance_url) sys.exit() else: - logging.info("Successfully connected to galaxy instance (%s) " % self.instance_url) + logging.debug("Successfully connected to galaxy instance (%s) " % self.instance_url) return 1 - def install_changesets_revisions_from_workflow(self, workflow_path): - """ - Read a .ga file to extract the information about the different tools called. - Check if every tool is installed via a "show_tool". - If a tool is not installed (versions don't match), send a warning to the logger and install the required changeset (matching the tool version) - Doesn't do anything if versions match - - :return: - """ - - logging.info("Validating that installed tools versions and changesets match workflow versions") - - # Load the workflow file (.ga) in a buffer - with open(workflow_path, 'r') as ga_in_file: - - # Then store the decoded json dictionary - workflow_dict = json.load(ga_in_file) - - # Look up every "step_id" looking for tools - for k, v in workflow_dict["steps"].items(): - if v["tool_id"]: - # Get the descriptive dictionary of the installed tool (using the tool id in the workflow) - show_tool = self.instance.tools.show_tool(v["tool_id"]) + def return_instance(self): - # Check if an installed version matches the workflow tool version - # (If it's not installed, the show_tool version returned will be a default version with the suffix "XXXX+0") - if show_tool["version"] != v["tool_version"]: - # If it doesn't match, proceed to install of the correct changeset revision - print(show_tool) - # logging.warning("Tool versions don't match for {0} (changeset installed: {1} | changeset required: {2}). Installing changeset revision {3}...".format(v["tool_shed_repository"]["name"], show_tool["changeset_revision"], v["tool_shed_repository"]["changeset_revision"], v["tool_shed_repository"]["changeset_revision"])) - toolshed = "https://" + v["tool_shed_repository"]["tool_shed"] - name = v["tool_shed_repository"]["name"] - owner = v["tool_shed_repository"]["owner"] - changeset_revision = v["tool_shed_repository"]["changeset_revision"] - self.instance.toolshed.install_repository_revision(tool_shed_url=toolshed, name=name, owner=owner, - changeset_revision=changeset_revision, - install_tool_dependencies=True, - install_repository_dependencies=False, - install_resolver_dependencies=True) + return self.instance + - logging.info("Tools versions and changesets from workflow validated") def install_changesets_revisions_for_individual_tools(self): """ @@ -189,21 +154,22 @@ class RunWorkflow(speciesData.SpeciesData): logging.info("Validating installed individual tools versions and changesets") # Verify that the add_organism and add_analysis versions are correct in the toolshed - add_organism_tool = self.instance.tools.show_tool("toolshed.g2.bx.psu.edu/repos/gga/chado_organism_add_organism/organism_add_organism/2.3.3") - add_analysis_tool = self.instance.tools.show_tool("toolshed.g2.bx.psu.edu/repos/gga/chado_analysis_add_analysis/analysis_add_analysis/2.3.3") - get_organism_tool = self.instance.tools.show_tool("toolshed.g2.bx.psu.edu/repos/gga/chado_organism_get_organisms/organism_get_organisms/2.3.3") - get_analysis_tool = self.instance.tools.show_tool("toolshed.g2.bx.psu.edu/repos/gga/chado_analysis_get_analyses/analysis_get_analyses/2.3.3") + add_organism_tool = self.instance.tools.show_tool("toolshed.g2.bx.psu.edu/repos/gga/chado_organism_add_organism/organism_add_organism/2.3.4+galaxy0") + add_analysis_tool = self.instance.tools.show_tool("toolshed.g2.bx.psu.edu/repos/gga/chado_analysis_add_analysis/analysis_add_analysis/2.3.4+galaxy0") + get_organism_tool = self.instance.tools.show_tool("toolshed.g2.bx.psu.edu/repos/gga/chado_organism_get_organisms/organism_get_organisms/2.3.4+galaxy0") + get_analysis_tool = self.instance.tools.show_tool("toolshed.g2.bx.psu.edu/repos/gga/chado_analysis_get_analyses/analysis_get_analyses/2.3.4+galaxy0") - # changeset for 2.3.3 has to be manually found because there is no way to get the wanted changeset of a non installed tool via bioblend + # changeset for 2.3.4+galaxy0 has to be manually found because there is no way to get the wanted changeset of a non installed tool via bioblend # except for workflows (.ga) that already contain the changeset revisions inside the steps ids - if get_organism_tool["version"] != "2.3.3": - logging.warning("Changeset for %s is not installed" % toolshed_dict["name"]) - changeset_revision = "b07279b5f3bf" + if get_organism_tool["version"] != "2.3.4+galaxy0": toolshed_dict = get_organism_tool["tool_shed_repository"] + logging.warning("Changeset for %s is not installed" % toolshed_dict["name"]) + changeset_revision = "831229e6cda2" name = toolshed_dict["name"] owner = toolshed_dict["owner"] toolshed = "https://" + toolshed_dict["tool_shed"] + logging.warning("Installing changeset revision {0} for {1}".format(changeset_revision, name)) self.instance.toolshed.install_repository_revision(tool_shed_url=toolshed, name=name, owner=owner, changeset_revision=changeset_revision, @@ -211,13 +177,14 @@ class RunWorkflow(speciesData.SpeciesData): install_repository_dependencies=False, install_resolver_dependencies=True) - if get_analysis_tool["version"] != "2.3.3": - logging.warning("Changeset for %s is not installed" % toolshed_dict["name"]) - changeset_revision = "c7be2feafd73" + if get_analysis_tool["version"] != "2.3.4+galaxy0": toolshed_dict = changeset_revision["tool_shed_repository"] + logging.warning("Changeset for %s is not installed" % toolshed_dict["name"]) + changeset_revision = "a867923f555e" name = toolshed_dict["name"] owner = toolshed_dict["owner"] toolshed = "https://" + toolshed_dict["tool_shed"] + logging.warning("Installing changeset revision {0} for {1}".format(changeset_revision, name)) self.instance.toolshed.install_repository_revision(tool_shed_url=toolshed, name=name, owner=owner, changeset_revision=changeset_revision, @@ -225,13 +192,14 @@ class RunWorkflow(speciesData.SpeciesData): install_repository_dependencies=False, install_resolver_dependencies=True) - if add_organism_tool["version"] != "2.3.3": - logging.warning("Changeset for %s is not installed" % toolshed_dict["name"]) - changeset_revision = "680a1fe3c266" + if add_organism_tool["version"] != "2.3.4+galaxy0": toolshed_dict = add_organism_tool["tool_shed_repository"] + logging.warning("Changeset for %s is not installed" % toolshed_dict["name"]) + changeset_revision = "1f12b9650028" name = toolshed_dict["name"] owner = toolshed_dict["owner"] toolshed = "https://" + toolshed_dict["tool_shed"] + logging.warning("Installing changeset revision {0} for {1}".format(changeset_revision, name)) self.instance.toolshed.install_repository_revision(tool_shed_url=toolshed, name=name, owner=owner, changeset_revision=changeset_revision, @@ -239,14 +207,15 @@ class RunWorkflow(speciesData.SpeciesData): install_repository_dependencies=False, install_resolver_dependencies=True) - if add_analysis_tool["version"] != "2.3.3": - logging.warning("Changeset for %s is not installed" % toolshed_dict["name"]) - changeset_revision = "43c36801669f" + if add_analysis_tool["version"] != "2.3.4+galaxy0": toolshed_dict = add_analysis_tool["tool_shed_repository"] + logging.warning("Changeset for %s is not installed" % toolshed_dict["name"]) + changeset_revision = "10b2b1c70e69" name = toolshed_dict["name"] owner = toolshed_dict["owner"] toolshed = "https://" + toolshed_dict["tool_shed"] - logging.warning("Installing changeset revision %s for add_analysis" % changeset_revision) + logging.warning("Installing changeset revision {0} for {1}".format(changeset_revision, name)) + self.instance.toolshed.install_repository_revision(tool_shed_url=toolshed, name=name, owner=owner, changeset_revision=changeset_revision, install_tool_dependencies=True, @@ -281,120 +250,111 @@ class RunWorkflow(speciesData.SpeciesData): self.connect_to_instance() self.set_get_history() - # We want the tools version default to be 2.3.3 at the moment - tool_version = "2.3.3" - # Add organism (species) to chado - logging.info("Adding organism to the instance's chado database") - if self.common == "" or self.common is None: - self.instance.tools.run_tool( - tool_id="toolshed.g2.bx.psu.edu/repos/gga/chado_organism_add_organism/organism_add_organism/%s" % tool_version, - history_id=self.history_id, - tool_inputs={"abbr": self.abbreviation, - "genus": self.genus_uppercase, - "species": self.chado_species_name, - "common": self.abbreviation}) - else: - self.instance.tools.run_tool( - tool_id="toolshed.g2.bx.psu.edu/repos/gga/chado_organism_add_organism/organism_add_organism/%s" % tool_version, - history_id=self.history_id, - tool_inputs={"abbr": self.abbreviation, - "genus": self.genus_uppercase, - "species": self.chado_species_name, - "common": self.common}) + tool_version = "2.3.4+galaxy0" - # Add OGS analysis to chado - logging.info("Adding OGS analysis to the instance's chado database") - self.instance.tools.run_tool( - tool_id="toolshed.g2.bx.psu.edu/repos/gga/chado_analysis_add_analysis/analysis_add_analysis/%s" % tool_version, - history_id=self.history_id, - tool_inputs={"name": self.full_name_lowercase + " OGS" + self.ogs_version, - "program": "Performed by Genoscope", - "programversion": str(self.sex + " OGS" + self.ogs_version), - "sourcename": "Genoscope", - "date_executed": self.date}) + get_organism_tool = self.instance.tools.show_tool("toolshed.g2.bx.psu.edu/repos/gga/chado_organism_get_organisms/organism_get_organisms/2.3.4+galaxy0") - # Add genome analysis to chado - logging.info("Adding genome analysis to the instance's chado database") - self.instance.tools.run_tool( - tool_id="toolshed.g2.bx.psu.edu/repos/gga/chado_analysis_add_analysis/analysis_add_analysis/%s" % tool_version, + get_organisms = self.instance.tools.run_tool( + tool_id="toolshed.g2.bx.psu.edu/repos/gga/chado_organism_get_organisms/organism_get_organisms/%s" % tool_version, history_id=self.history_id, - tool_inputs={"name": self.full_name_lowercase + " genome v" + self.genome_version, - "program": "Performed by Genoscope", - "programversion": str(self.sex + "genome v" + self.genome_version), - "sourcename": "Genoscope", - "date_executed": self.date}) + tool_inputs={}) - # # TODO: check output of get_organism --> if empty or wrong --> rerun --> else: go next - # # Get organism and analyses IDs (runtime inputs for workflow) - # time.sleep(3) - # # Get the ID for the current organism in chado - # org = self.instance.tools.run_tool( - # tool_id="toolshed.g2.bx.psu.edu/repos/gga/chado_organism_get_organisms/organism_get_organisms/%s" % tool_version, - # history_id=self.history_id, - # tool_inputs={"abbr": self.abbreviation, - # "genus": self.genus_uppercase, - # "species": self.chado_species_name, - # "common": self.common}) - - # time.sleep(3) - # # Run tool again (sometimes the tool doesn't return anything despite the organism already being in the db) - # org = self.instance.tools.run_tool( - # tool_id="toolshed.g2.bx.psu.edu/repos/gga/chado_organism_get_organisms/organism_get_organisms/%s" % tool_version, - # history_id=self.history_id, - # tool_inputs={"abbr": self.abbreviation, - # "genus": self.genus_uppercase, - # "species": self.chado_species_name, - # "common": self.common}) - - # org_job_out = org["outputs"][0]["id"] - # org_json_output = self.instance.datasets.download_dataset(dataset_id=org_job_out) - # try: - # org_output = json.loads(org_json_output)[0] - # self.org_id = str(org_output["organism_id"]) # id needs to be a str to be recognized by chado tools - # except IndexError: - # logging.critical("No organism matching " + self.full_name + " exists in the instance's chado database") - # sys.exit() - - - def get_genome_analysis_id(self): - """ - """ + time.sleep(10) # Ensure the tool has had time to complete + org_outputs = get_organisms["outputs"] # Outputs from the get_organism tool + org_job_out_id = org_outputs[0]["id"] # ID of the get_organism output dataset (list of dicts) + org_json_output = self.instance.datasets.download_dataset(dataset_id=org_job_out_id) # Download the dataset + org_output = json.loads(org_json_output) # Turn the dataset into a list for parsing - # Get the ID for the genome analysis in chado - genome_analysis = self.instance.tools.run_tool( - tool_id="toolshed.g2.bx.psu.edu/repos/gga/chado_analysis_get_analyses/analysis_get_analyses/2.3.3", - history_id=self.history_id, - tool_inputs={"name": self.full_name_lowercase + " genome v" + self.genome_version}) - genome_analysis_job_out = genome_analysis["outputs"][0]["id"] - genome_analysis_json_output = self.instance.datasets.download_dataset(dataset_id=genome_analysis_job_out) - try: - genome_analysis_output = json.loads(genome_analysis_json_output)[0] - self.genome_analysis_id = str(genome_analysis_output["analysis_id"]) - except IndexError as exc: - logging.critical("no matching genome analysis exists in the instance's chado database") - sys.exit(exc) + org_id = None - return self.genome_analysis_id + # Look up list of outputs (dictionaries) + for organism_output_dict in org_output: + if organism_output_dict["genus"] == self.genus and organism_output_dict["species"] == "{0} {1}".format(self.species, self.sex): + correct_organism_id = str(organism_output_dict["organism_id"]) # id needs to be a str to be recognized by chado tools + org_id = str(correct_organism_id) - def get_ogs_analysis_id(self): - """ - """ - # Get the ID for the OGS analysis in chado - ogs_analysis = self.instance.tools.run_tool( - tool_id="toolshed.g2.bx.psu.edu/repos/gga/chado_analysis_get_analyses/analysis_get_analyses/2.3.3", + if org_id is None: + if self.common == "" or self.common is None: + add_org_job = self.instance.tools.run_tool( + tool_id="toolshed.g2.bx.psu.edu/repos/gga/chado_organism_add_organism/organism_add_organism/%s" % tool_version, + history_id=self.history_id, + tool_inputs={"abbr": self.abbreviation, + "genus": self.genus_uppercase, + "species": self.chado_species_name, + "common": self.abbreviation}) + org_job_out_id = add_org_job["outputs"][0]["id"] + org_json_output = self.instance.datasets.download_dataset(dataset_id=org_job_out_id) + org_output = json.loads(org_json_output) + org_id = str(org_output["organism_id"]) # id needs to be a str to be recognized by chado tools + else: + add_org_job = self.instance.tools.run_tool( + tool_id="toolshed.g2.bx.psu.edu/repos/gga/chado_organism_add_organism/organism_add_organism/%s" % tool_version, + history_id=self.history_id, + tool_inputs={"abbr": self.abbreviation, + "genus": self.genus_uppercase, + "species": self.chado_species_name, + "common": self.common}) + org_job_out_id = add_org_job["outputs"][0]["id"] + org_json_output = self.instance.datasets.download_dataset(dataset_id=org_job_out_id) + org_output = json.loads(org_json_output) + org_id = str(org_output["organism_id"]) # id needs to be a str to be recognized by chado tools + + + get_analyses = self.instance.tools.run_tool( + tool_id="toolshed.g2.bx.psu.edu/repos/gga/chado_analysis_get_analyses/analysis_get_analyses/%s" % tool_version, history_id=self.history_id, - tool_inputs={"name": self.full_name_lowercase + " OGS" + self.ogs_version}) - ogs_analysis_job_out = ogs_analysis["outputs"][0]["id"] - ogs_analysis_json_output = self.instance.datasets.download_dataset(dataset_id=ogs_analysis_job_out) - try: - ogs_analysis_output = json.loads(ogs_analysis_json_output)[0] - self.ogs_analysis_id = str(ogs_analysis_output["analysis_id"]) - except IndexError as exc: - logging.critical("No matching OGS analysis exists in the instance's chado database") - sys.exit(exc) + tool_inputs={}) + + time.sleep(10) + analysis_outputs = get_analyses["outputs"] + analysis_job_out_id = analysis_outputs[0]["id"] + analysis_json_output = self.instance.datasets.download_dataset(dataset_id=analysis_job_out_id) + analysis_output = json.loads(analysis_json_output) + + ogs_analysis_id = None + genome_analysis_id = None + + # Look up list of outputs (dictionaries) + for analysis_output_dict in analysis_output: + if analysis_output_dict["name"] == self.full_name_lowercase + " OGS" + self.ogs_version: + ogs_analysis_id = str(analysis_output_dict["analysis_id"]) + if analysis_output_dict["name"] == self.full_name_lowercase + " genome v" + self.genome_version: + genome_analysis_id = str(analysis_output_dict["analysis_id"]) - return self.ogs_analysis_id + + if ogs_analysis_id is None: + add_ogs_analysis_job = self.instance.tools.run_tool( + tool_id="toolshed.g2.bx.psu.edu/repos/gga/chado_analysis_add_analysis/analysis_add_analysis/%s" % tool_version, + history_id=self.history_id, + tool_inputs={"name": self.full_name_lowercase + " OGS" + self.ogs_version, + "program": "Performed by Genoscope", + "programversion": str(self.sex + " OGS" + self.ogs_version), + "sourcename": "Genoscope", + "date_executed": self.date}) + analysis_outputs = add_ogs_analysis_job["outputs"] + analysis_job_out_id = analysis_outputs[0]["id"] + analysis_json_output = self.instance.datasets.download_dataset(dataset_id=analysis_job_out_id) + analysis_output = json.loads(analysis_json_output) + ogs_analysis_id = str(analysis_output["analysis_id"]) + + if genome_analysis_id is None: + add_genome_analysis_job = self.instance.tools.run_tool( + tool_id="toolshed.g2.bx.psu.edu/repos/gga/chado_analysis_add_analysis/analysis_add_analysis/%s" % tool_version, + history_id=self.history_id, + tool_inputs={"name": self.full_name_lowercase + " genome v" + self.genome_version, + "program": "Performed by Genoscope", + "programversion": str(self.sex + "genome v" + self.genome_version), + "sourcename": "Genoscope", + "date_executed": self.date}) + analysis_outputs = add_genome_analysis_job["outputs"] + analysis_job_out_id = analysis_outputs[0]["id"] + analysis_json_output = self.instance.datasets.download_dataset(dataset_id=analysis_job_out_id) + analysis_output = json.loads(analysis_json_output) + genome_analysis_id = str(analysis_output["analysis_id"]) + + # print({"org_id": org_id, "genome_analysis_id": genome_analysis_id, "ogs_analysis_id": ogs_analysis_id}) + return({"org_id": org_id, "genome_analysis_id": genome_analysis_id, "ogs_analysis_id": ogs_analysis_id}) def add_interproscan_analysis(self): @@ -404,7 +364,7 @@ class RunWorkflow(speciesData.SpeciesData): # Add Interpro analysis to chado logging.info("Adding Interproscan analysis to the instance's chado database") self.instance.tools.run_tool( - tool_id="toolshed.g2.bx.psu.edu/repos/gga/chado_analysis_add_analysis/analysis_add_analysis/2.3.3", + tool_id="toolshed.g2.bx.psu.edu/repos/gga/chado_analysis_add_analysis/analysis_add_analysis/2.3.4+galaxy0", history_id=self.history_id, tool_inputs={"name": "InterproScan on OGS%s" % self.ogs_version, "program": "InterproScan", @@ -419,7 +379,7 @@ class RunWorkflow(speciesData.SpeciesData): # Get interpro ID interpro_analysis = self.instance.tools.run_tool( - tool_id="toolshed.g2.bx.psu.edu/repos/gga/chado_analysis_get_analyses/analysis_get_analyses/2.3.3", + tool_id="toolshed.g2.bx.psu.edu/repos/gga/chado_analysis_get_analyses/analysis_get_analyses/2.3.4+galaxy0", history_id=self.history_id, tool_inputs={"name": "InterproScan on OGS%s" % self.ogs_version}) interpro_analysis_job_out = interpro_analysis["outputs"][0]["id"] @@ -546,16 +506,6 @@ class RunWorkflow(speciesData.SpeciesData): return invocation_report - - - def get_datasets_ldda_ids(self): - """ - Get and return the ldda_ids (and names) for the datasets in the library - """ - - return 0 - - def import_datasets_into_history(self): """ Find datasets in a library, get their ID and import them into the current history if they are not already @@ -580,19 +530,16 @@ class RunWorkflow(speciesData.SpeciesData): for i in instance_source_data_folders: folders_ids[i["name"]] = i["id"] - # Iterating over the folders to find datasets and map datasets to their IDs - logging.debug("Datasets IDs: ") for k, v in folders_ids.items(): if k == "/genome/{0}/v{1}".format(self.species_folder_name, self.genome_version): sub_folder_content = self.instance.folders.show_folder(folder_id=v, contents=True) for k2, v2 in sub_folder_content.items(): for e in v2: if type(e) == dict: - if e["name"].endswith(".fa"): + if e["name"].endswith(".fasta"): self.datasets["genome_file"] = e["ldda_id"] self.datasets_name["genome_file"] = e["name"] - logging.debug("\tGenome file:\t" + e["name"] + ": " + e["ldda_id"]) if k == "/annotation/{0}/OGS{1}".format(self.species_folder_name, self.ogs_version): sub_folder_content = self.instance.folders.show_folder(folder_id=v, contents=True) @@ -600,56 +547,85 @@ class RunWorkflow(speciesData.SpeciesData): for e in v2: if type(e) == dict: if "transcripts" in e["name"]: + # the attributes datasets is set in the function get_instance_attributes() self.datasets["transcripts_file"] = e["ldda_id"] self.datasets_name["transcripts_file"] = e["name"] - logging.debug("\tTranscripts file:\t" + e["name"] + ": " + e["ldda_id"]) elif "proteins" in e["name"]: self.datasets["proteins_file"] = e["ldda_id"] self.datasets_name["proteins_file"] = e["name"] - logging.debug("\tProteins file:\t" + e["name"] + ": " + e["ldda_id"]) elif "gff" in e["name"]: self.datasets["gff_file"] = e["ldda_id"] self.datasets_name["gff_file"] = e["name"] - logging.debug("\tGFF file:\t" + e["name"] + ": " + e["ldda_id"]) elif "interpro" in e["name"]: self.datasets["interproscan_file"] = e["ldda_id"] self.datasets_name["interproscan_file"] = e["name"] - logging.debug("\tInterproscan file:\t" + e["name"] + ": " + e["ldda_id"]) elif "blastp" in e["name"]: self.datasets["blast_diamond_file"] = e["ldda_id"] self.datasets_name["blast_diamond_file"] = e["name"] - logging.debug("\tBlastp diamond file:\t" + e["name"] + ": " + e["ldda_id"]) - logging.debug("Uploading datasets into history %s" % self.history_id) + + history_datasets_li = self.instance.datasets.get_datasets() + genome_hda_id, gff_hda_id, transcripts_hda_id, proteins_hda_id, blast_diamond_hda_id, interproscan_hda_id = None, None, None, None, None, None + + # Finding datasets in history (matching datasets names) + for dataset in history_datasets_li: + dataset_name = dataset["name"] + dataset_id = dataset["id"] + if dataset_name == "{0}_v{1}.fasta".format(self.dataset_prefix, self.genome_version): + genome_hda_id = dataset_id + if dataset_name == "{0}_OGS{1}.gff".format(self.dataset_prefix, self.ogs_version): + gff_hda_id = dataset_id + if dataset_name == "{0}_OGS{1}_transcripts.fasta".format(self.dataset_prefix, self.ogs_version): + transcripts_hda_id = dataset_id + if dataset_name == "{0}_OGS{1}_proteins.fasta".format(self.dataset_prefix, self.ogs_version): + proteins_hda_id = dataset_id + if dataset_name == "{0}_OGS{1}_blastx.xml".format(self.dataset_prefix, self.ogs_version): + blast_diamond_hda_id = dataset_id + + # Import each dataset into history if it is not imported + logging.debug("Uploading datasets into history %s" % self.history_id) - first_hda_ids = self.get_datasets_hda_ids() - - if first_hda_ids["genome_hda_id"] is None: - self.instance.histories.upload_dataset_from_library(history_id=self.history_id, lib_dataset_id=self.datasets["genome_file"]) - if first_hda_ids["gff_hda_id"] is None: - self.instance.histories.upload_dataset_from_library(history_id=self.history_id, lib_dataset_id=self.datasets["gff_file"]) - if first_hda_ids["transcripts_hda_id"] is None: - self.instance.histories.upload_dataset_from_library(history_id=self.history_id, lib_dataset_id=self.datasets["transcripts_file"]) - if first_hda_ids["proteins_hda_id"] is None: - self.instance.histories.upload_dataset_from_library(history_id=self.history_id, lib_dataset_id=self.datasets["proteins_file"]) - if first_hda_ids["interproscan_hda_id"] is None: + if genome_hda_id is None: + genome_dataset_upload = self.instance.histories.upload_dataset_from_library(history_id=self.history_id, lib_dataset_id=self.datasets["genome_file"]) + genome_hda_id = genome_dataset_upload["id"] + if gff_hda_id is None: + gff_dataset_upload = self.instance.histories.upload_dataset_from_library(history_id=self.history_id, lib_dataset_id=self.datasets["gff_file"]) + gff_hda_id = gff_dataset_upload["id"] + if transcripts_hda_id is None: + transcripts_dataset_upload = self.instance.histories.upload_dataset_from_library(history_id=self.history_id, lib_dataset_id=self.datasets["transcripts_file"]) + transcripts_hda_id = transcripts_dataset_upload["id"] + if proteins_hda_id is None: + proteins_dataset_upload = self.instance.histories.upload_dataset_from_library(history_id=self.history_id, lib_dataset_id=self.datasets["proteins_file"]) + proteins_hda_id = proteins_dataset_upload["id"] + if interproscan_hda_id is None: try: - self.instance.histories.upload_dataset_from_library(history_id=self.history_id, lib_dataset_id=self.datasets["interproscan_file"]) + interproscan_dataset_upload = self.instance.histories.upload_dataset_from_library(history_id=self.history_id, lib_dataset_id=self.datasets["interproscan_file"]) + interproscan_hda_id = interproscan_dataset_upload["id"] except Exception as exc: logging.debug("Interproscan file not found in library (history: {0})".format(self.history_id)) - if first_hda_ids["blast_diamond_hda_id"] is None: + if blast_diamond_hda_id is None: try: - self.instance.histories.upload_dataset_from_library(history_id=self.history_id, lib_dataset_id=self.datasets["blast_diamond_file"]) + blast_diamond_dataset_upload = self.instance.histories.upload_dataset_from_library(history_id=self.history_id, lib_dataset_id=self.datasets["blast_diamond_file"]) + blast_diamond_hda_id = blast_diamond_upload["id"] except Exception as exc: logging.debug("Blastp file not found in library (history: {0})".format(self.history_id)) - # _datasets = self.instance.datasets.get_datasets() - # with open(os.path.join(self.main_dir, "datasets_ids.json"), "w") as datasets_ids_outfile: - # datasets_ids_outfile.write(str(_datasets)) + logging.debug("History dataset IDs (hda_id) for %s:" % self.full_name) + logging.debug({"genome_hda_id": genome_hda_id, + "gff_hda_id": gff_hda_id, + "transcripts_hda_id": transcripts_hda_id, + "proteins_hda_id": proteins_hda_id, + "blast_diamond_hda_id": blast_diamond_hda_id, + "interproscan_hda_id": interproscan_hda_id}) # Return a dict made of the hda ids - return self.get_datasets_hda_ids() + return {"genome_hda_id": genome_hda_id, + "gff_hda_id": gff_hda_id, + "transcripts_hda_id": transcripts_hda_id, + "proteins_hda_id": proteins_hda_id, + "blast_diamond_hda_id": blast_diamond_hda_id, + "interproscan_hda_id": interproscan_hda_id} def get_datasets_hda_ids(self): @@ -675,107 +651,229 @@ class RunWorkflow(speciesData.SpeciesData): # Match files imported in history names vs library datasets names to assign their respective hda_id for dataset_dict in history_datasets_li: if dataset_dict["history_id"] == self.history_id: - if dataset_dict["name"] == self.datasets_name["genome_file"]: + if dataset_dict["name"] == self.datasets_name["genome_file"] and dataset_dict["id"] not in imported_datasets_ids: genome_dataset_hda_id = dataset_dict["id"] - logging.debug("Genome dataset hda id: %s" % genome_dataset_hda_id) - elif dataset_dict["name"] == self.datasets_name["proteins_file"]: + elif dataset_dict["name"] == self.datasets_name["proteins_file"] and dataset_dict["id"] not in imported_datasets_ids: proteins_datasets_hda_id = dataset_dict["id"] - logging.debug("Proteins dataset hda ID: %s" % proteins_datasets_hda_id) - elif dataset_dict["name"] == self.datasets_name["transcripts_file"]: + elif dataset_dict["name"] == self.datasets_name["transcripts_file"] and dataset_dict["id"] not in imported_datasets_ids: transcripts_dataset_hda_id = dataset_dict["id"] - logging.debug("Transcripts dataset hda ID: %s" % transcripts_dataset_hda_id) - elif dataset_dict["name"] == self.datasets_name["gff_file"]: + elif dataset_dict["name"] == self.datasets_name["gff_file"] and dataset_dict["id"] not in imported_datasets_ids: gff_dataset_hda_id = dataset_dict["id"] - logging.debug("GFF dataset hda ID: %s" % gff_dataset_hda_id) - if "interproscan_file" in self.datasets_name.keys(): - if dataset_dict["name"] == self.datasets_name["interproscan_file"]: + if dataset_dict["name"] == self.datasets_name["interproscan_file"] and dataset_dict["id"] not in imported_datasets_ids: interproscan_dataset_hda_id = dataset_dict["id"] - logging.debug("InterproScan dataset hda ID: %s" % gff_dataset_hda_id) + if "blast_diamond_file" in self.datasets_name.keys(): - if dataset_dict["name"] == self.datasets_name["blast_diamond_file"]: + if dataset_dict["name"] == self.datasets_name["blast_diamond_file"] and dataset_dict["id"] not in imported_datasets_ids: blast_diamond_dataset_hda_id = dataset_dict["id"] - logging.debug("Blast Diamond dataset hda ID: %s" % gff_dataset_hda_id) + + logging.debug("Genome dataset hda id: %s" % genome_dataset_hda_id) + logging.debug("Proteins dataset hda ID: %s" % proteins_datasets_hda_id) + logging.debug("Transcripts dataset hda ID: %s" % transcripts_dataset_hda_id) + logging.debug("GFF dataset hda ID: %s" % gff_dataset_hda_id) + logging.debug("InterproScan dataset hda ID: %s" % gff_dataset_hda_id) + logging.debug("Blast Diamond dataset hda ID: %s" % gff_dataset_hda_id) + + # Add datasets IDs to already imported IDs (so we don't assign all the wrong IDs to the next organism if there is one) + imported_datasets_ids.append(genome_dataset_hda_id) + imported_datasets_ids.append(transcripts_dataset_hda_id) + imported_datasets_ids.append(proteins_datasets_hda_id) + imported_datasets_ids.append(gff_dataset_hda_id) + imported_datasets_ids.append(interproscan_dataset_hda_id) + imported_datasets_ids.append(blast_diamond_dataset_hda_id) # Return a dict made of the hda ids return {"genome_hda_id": genome_dataset_hda_id, "transcripts_hda_id": transcripts_dataset_hda_id, "proteins_hda_id": proteins_datasets_hda_id, "gff_hda_id": gff_dataset_hda_id, "interproscan_hda_id": interproscan_dataset_hda_id, - "blast_diamond_hda_id": blast_diamond_dataset_hda_id} + "blast_diamond_hda_id": blast_diamond_dataset_hda_id, + "imported_datasets_ids": imported_datasets_ids} - def get_organism_id(self): - """ - Retrieve current organism ID - Will try to add it to Chado if the organism ID can't be found - :return: - """ +def run_workflow(workflow_path, workflow_parameters, datamap, config, input_species_number): + """ + Run a workflow in galaxy + Requires the .ga file to be loaded as a dictionary (optionally could be uploaded as a raw file) - tool_version = "2.3.3" - time.sleep(3) + :param workflow_name: + :param workflow_parameters: + :param datamap: + :return: + """ - # # Get the ID for the current organism in chado - # org = self.instance.tools.run_tool( - # tool_id="toolshed.g2.bx.psu.edu/repos/gga/chado_organism_get_organisms/organism_get_organisms/2.3.3", - # history_id=self.history_id, - # tool_inputs={"abbr": self.abbreviation, - # "genus": self.genus_uppercase, - # "species": self.chado_species_name, - # "common": self.common}) + logging.info("Importing workflow %s" % str(workflow_path)) - # time.sleep(3) + # Load the workflow file (.ga) in a buffer + with open(workflow_path, 'r') as ga_in_file: - # Run tool again (sometimes the tool doesn't return anything despite the organism already being in the db) - org = self.instance.tools.run_tool( - tool_id="toolshed.g2.bx.psu.edu/repos/gga/chado_organism_get_organisms/organism_get_organisms/2.3.3", - history_id=self.history_id, - tool_inputs={"abbr": self.abbreviation, - "genus": self.genus_uppercase, - "species": self.chado_species_name, - "common": self.common}) + # Then store the decoded json dictionary + workflow_dict = json.load(ga_in_file) + + # In case of the Jbrowse workflow, we unfortunately have to manually edit the parameters instead of setting them + # as runtime values, using runtime parameters makes the tool throw an internal critical error ("replace not found" error) + # Scratchgmod test: need "http" (or "https"), the hostname (+ port) + if "menu_url" not in config.keys(): + jbrowse_menu_url = "https://{hostname}/sp/{genus_sp}/feature/{Genus}/{species}/mRNA/{id}".format(hostname=self.config["hostname"], genus_sp=self.genus_species, Genus=self.genus_uppercase, species=self.species, id="{id}") + else: + jbrowse_menu_url = config["menu_url"] + if workflow_name == "Jbrowse": + workflow_dict["steps"]["2"]["tool_state"] = workflow_dict["steps"]["2"]["tool_state"].replace("__MENU_URL__", jbrowse_menu_url) + # The UNIQUE_ID is specific to a combination genus_species_strain_sex so every combination should have its unique workflow + # in galaxy --> define a naming method for these workflows + workflow_dict["steps"]["3"]["tool_state"] = workflow_dict["steps"]["3"]["tool_state"].replace("__FULL_NAME__", self.full_name).replace("__UNIQUE_ID__", self.species_folder_name) + + # Import the workflow in galaxy as a dict + self.instance.workflows.import_workflow_dict(workflow_dict=workflow_dict) - org_job_out = org["outputs"][0]["id"] - org_json_output = self.instance.datasets.download_dataset(dataset_id=org_job_out) + # Get its attributes + workflow_attributes = self.instance.workflows.get_workflows(name=workflow_name) + # Then get its ID (required to invoke the workflow) + workflow_id = workflow_attributes[0]["id"] # Index 0 is the most recently imported workflow (the one we want) + show_workflow = self.instance.workflows.show_workflow(workflow_id=workflow_id) + # Check if the workflow is found try: - org_output = json.loads(org_json_output)[0] - self.org_id = str(org_output["organism_id"]) # id needs to be a str to be recognized by chado tools - except IndexError: - logging.warning("No organism matching " + self.full_name + " exists in the instance's chado database, adding it") - if self.common == "" or self.common is None: - self.instance.tools.run_tool( - tool_id="toolshed.g2.bx.psu.edu/repos/gga/chado_organism_add_organism/organism_add_organism/%s" % tool_version, - history_id=self.history_id, - tool_inputs={"abbr": self.abbreviation, - "genus": self.genus_uppercase, - "species": self.chado_species_name, - "common": self.abbreviation}) - else: - self.instance.tools.run_tool( - tool_id="toolshed.g2.bx.psu.edu/repos/gga/chado_organism_add_organism/organism_add_organism/%s" % tool_version, - history_id=self.history_id, - tool_inputs={"abbr": self.abbreviation, - "genus": self.genus_uppercase, - "species": self.chado_species_name, - "common": self.common}) - # Run tool again (sometimes the tool doesn't return anything despite the organism already being in the db) - org = self.instance.tools.run_tool( - tool_id="toolshed.g2.bx.psu.edu/repos/gga/chado_organism_get_organisms/organism_get_organisms/2.3.3", - history_id=self.history_id, - tool_inputs={"abbr": self.abbreviation, - "genus": self.genus_uppercase, - "species": self.chado_species_name, - "common": self.common}) + logging.debug("Workflow ID: %s" % workflow_id) + except bioblend.ConnectionError: + logging.warning("Error retrieving workflow attributes for workflow %s" % workflow_name) - org_job_out = org["outputs"][0]["id"] - org_json_output = self.instance.datasets.download_dataset(dataset_id=org_job_out) - try: - org_output = json.loads(org_json_output)[0] - self.org_id = str(org_output["organism_id"]) # id needs to be a str to be recognized by chado tools - except IndexError: - logging.critical("Cannot add {0} as an organism in Chado, please check the galaxy instance {1}".format(self.full_name, self.instance_url)) - sys.exit() + # Finally, invoke the workflow alogn with its datamap, parameters and the history in which to invoke it + self.instance.workflows.invoke_workflow(workflow_id=workflow_id, + history_id=self.history_id, + params=workflow_parameters, + inputs=datamap, + allow_tool_state_corrections=True) + + logging.info("Successfully imported and invoked workflow {0}, check the galaxy instance ({1}) for the jobs state".format(workflow_name, self.instance_url)) + + + + +def create_sp_workflow_dict(sp_dict, main_dir, config): + """ + """ - return self.org_id + sp_workflow_dict = {} + run_workflow_for_current_organism = RunWorkflow(parameters_dictionary=sp_dict) + + # Verifying the galaxy container is running + if utilities.check_galaxy_state(genus_lowercase=run_workflow_for_current_organism.genus_lowercase, + species=run_workflow_for_current_organism.species, + script_dir=run_workflow_for_current_organism.script_dir): + + # Starting + logging.info("run_workflow.py called for %s" % run_workflow_for_current_organism.full_name) + + # Setting some of the instance attributes + run_workflow_for_current_organism.main_dir = main_dir + run_workflow_for_current_organism.species_dir = os.path.join(run_workflow_for_current_organism.main_dir, + run_workflow_for_current_organism.genus_species + + "/") + + # Parse the config yaml file + run_workflow_for_current_organism.config = config + # Set the instance url attribute --> TODO: the localhost rule in the docker-compose still doesn't work on scratchgmodv1 + run_workflow_for_current_organism.instance_url = "http://localhost:{0}/sp/{1}_{2}/galaxy/".format( + run_workflow_for_current_organism.config["http_port"], + run_workflow_for_current_organism.genus_lowercase, + run_workflow_for_current_organism.species) + + + run_workflow_for_current_organism.connect_to_instance() + + history_id = run_workflow_for_current_organism.set_get_history() + + run_workflow_for_current_organism.install_changesets_revisions_for_individual_tools() + ids = run_workflow_for_current_organism.add_organism_ogs_genome_analyses() + + org_id = None + genome_analysis_id = None + ogs_analysis_id = None + org_id = ids["org_id"] + genome_analysis_id = ids["genome_analysis_id"] + ogs_analysis_id = ids["ogs_analysis_id"] + instance_attributes = run_workflow_for_current_organism.get_instance_attributes() + hda_ids = run_workflow_for_current_organism.import_datasets_into_history() + + strain_sex = "{0}_{1}".format(run_workflow_for_current_organism.strain, run_workflow_for_current_organism.sex) + genus_species = run_workflow_for_current_organism.genus_species + + # Create the dictionary holding all attributes needed to connect to the galaxy instance + attributes = {"genus": run_workflow_for_current_organism.genus, + "species": run_workflow_for_current_organism.species, + "genus_species": run_workflow_for_current_organism.genus_species, + "full_name": run_workflow_for_current_organism.full_name, + "species_folder_name": run_workflow_for_current_organism.species_folder_name, + "sex": run_workflow_for_current_organism.sex, + "strain": run_workflow_for_current_organism.strain, + "org_id": org_id, + "genome_analysis_id": genome_analysis_id, + "ogs_analysis_id": ogs_analysis_id, + "instance_attributes": instance_attributes, + "hda_ids": hda_ids, + "history_id": history_id, + "instance": run_workflow_for_current_organism.instance, + "instance_url": run_workflow_for_current_organism.instance_url, + "email": config["galaxy_default_admin_email"], + "password": config["galaxy_default_admin_password"]} + + sp_workflow_dict[genus_species] = {strain_sex: attributes} + + else: + logging.critical("The galaxy container for %s is not ready yet!" % run_workflow_for_current_organism.full_name) + sys.exit() + + + return sp_workflow_dict + + +def install_changesets_revisions_from_workflow(instance, workflow_path): + """ + Read a .ga file to extract the information about the different tools called. + Check if every tool is installed via a "show_tool". + If a tool is not installed (versions don't match), send a warning to the logger and install the required changeset (matching the tool version) + Doesn't do anything if versions match + + :return: + """ + + logging.info("Validating that installed tools versions and changesets match workflow versions") + + # Load the workflow file (.ga) in a buffer + with open(workflow_path, 'r') as ga_in_file: + + # Then store the decoded json dictionary + workflow_dict = json.load(ga_in_file) + + # Look up every "step_id" looking for tools + for k, v in workflow_dict["steps"].items(): + if v["tool_id"]: + # Get the descriptive dictionary of the installed tool (using the tool id in the workflow) + show_tool = instance.tools.show_tool(v["tool_id"]) + # Check if an installed version matches the workflow tool version + # (If it's not installed, the show_tool version returned will be a default version with the suffix "XXXX+0") + if show_tool["version"] != v["tool_version"]: + # If it doesn't match, proceed to install of the correct changeset revision + toolshed = "https://" + v["tool_shed_repository"]["tool_shed"] + name = v["tool_shed_repository"]["name"] + owner = v["tool_shed_repository"]["owner"] + changeset_revision = v["tool_shed_repository"]["changeset_revision"] + + logging.warning("Installed tool versions for tool {0} do not match the version required by the specified workflow, installing changeset {1}".format(name, changeset_revision)) + + # Install changeset + instance.toolshed.install_repository_revision(tool_shed_url=toolshed, name=name, owner=owner, + changeset_revision=changeset_revision, + install_tool_dependencies=True, + install_repository_dependencies=False, + install_resolver_dependencies=True) + else: + toolshed = "https://" + v["tool_shed_repository"]["tool_shed"] + name = v["tool_shed_repository"]["name"] + owner = v["tool_shed_repository"]["owner"] + changeset_revision = v["tool_shed_repository"]["changeset_revision"] + logging.debug("Installed tool versions for tool {0} match the version in the specified workflow (changeset {1})".format(name, changeset_revision)) + + logging.info("Tools versions and changesets from workflow validated") if __name__ == "__main__": @@ -826,242 +924,624 @@ if __name__ == "__main__": sp_dict_list = utilities.parse_input(args.input) + # # Checking if user specified a workflow to run + # if not args.workflow: + # logging.critical("No workflow specified, exiting") + # sys.exit() + # else: + # workflow = os.path.abspath(args.workflow) + + + script_dir = os.path.dirname(os.path.realpath(sys.argv[0])) + config = utilities.parse_config(args.config) + all_sp_workflow_dict = {} + for sp_dict in sp_dict_list: - # Creating an instance of the RunWorkflow object for the current organism - run_workflow_for_current_organism = RunWorkflow(parameters_dictionary=sp_dict) + # Add and retrieve all analyses/organisms for the current input species and add their IDs to the input dictionary + current_sp_workflow_dict = create_sp_workflow_dict(sp_dict, main_dir=args.main_directory, config=config) - # Checking if user specified a workflow to run - if not args.workflow: - logging.critical("No workflow specified, exiting") - sys.exit() + current_sp_key = list(current_sp_workflow_dict.keys())[0] + current_sp_value = list(current_sp_workflow_dict.values())[0] + current_sp_strain_sex_key = list(current_sp_value.keys())[0] + current_sp_strain_sex_value = list(current_sp_value.values())[0] + + # Add the species dictionary to the complete dictionary + # This dictionary contains every organism present in the input file + # Its structure is the following: + # {genus species: {strain1_sex1: {variables_key: variables_values}, strain1_sex2: {variables_key: variables_values}}} + if not current_sp_key in all_sp_workflow_dict.keys(): + all_sp_workflow_dict[current_sp_key] = current_sp_value else: - workflow = os.path.abspath(args.workflow) - - # Verifying the galaxy container is running - if utilities.check_galaxy_state(genus_lowercase=run_workflow_for_current_organism.genus_lowercase, - species=run_workflow_for_current_organism.species, - script_dir=run_workflow_for_current_organism.script_dir): - - # Starting - logging.info("run_workflow.py called for %s" % run_workflow_for_current_organism.full_name) - - # Setting some of the instance attributes - run_workflow_for_current_organism.main_dir = args.main_directory - run_workflow_for_current_organism.species_dir = os.path.join(run_workflow_for_current_organism.main_dir, - run_workflow_for_current_organism.genus_species + - "/") - - # Parse the config yaml file - run_workflow_for_current_organism.config = utilities.parse_config(args.config) - # Set the instance url attribute --> TODO: the localhost rule in the docker-compose still doesn't work on scratchgmodv1 - run_workflow_for_current_organism.instance_url = "http://localhost:{0}/sp/{1}_{2}/galaxy/".format( - run_workflow_for_current_organism.config["http_port"], - run_workflow_for_current_organism.genus_lowercase, - run_workflow_for_current_organism.species) - - - - # If input workflow is Chado_load_Tripal_synchronize.ga - if "Chado_load_Tripal_synchronize" in str(workflow): - - logging.info("Executing workflow 'Chado_load_Tripal_synchronize'") - - run_workflow_for_current_organism.connect_to_instance() - run_workflow_for_current_organism.set_get_history() - # run_workflow_for_current_organism.get_species_history_id() - - run_workflow_for_current_organism.install_changesets_revisions_for_individual_tools() - run_workflow_for_current_organism.install_changesets_revisions_from_workflow(workflow_path=workflow) - run_workflow_for_current_organism.add_organism_ogs_genome_analyses() - run_workflow_for_current_organism.get_organism_id() - run_workflow_for_current_organism.get_genome_analysis_id() - run_workflow_for_current_organism.get_ogs_analysis_id() - - # run_workflow_for_current_organism.tripal_synchronize_organism_analyses() - - # Get the attributes of the instance and project data files - run_workflow_for_current_organism.get_instance_attributes() - - # Import datasets into history and retrieve their hda IDs - # TODO: can be simplified with direct access to the folder contents via the full path (no loop required) - hda_ids = run_workflow_for_current_organism.import_datasets_into_history() - - # DEBUG - # run_workflow_for_current_organism.get_invocation_report(workflow_name="Chado load Tripal synchronize") - - # Explicit workflow parameter names - GENOME_FASTA_FILE = "0" - GFF_FILE = "1" - PROTEINS_FASTA_FILE = "2" - TRANSCRIPTS_FASTA_FILE = "3" - - LOAD_FASTA_IN_CHADO = "4" - LOAD_GFF_IN_CHADO = "5" - SYNC_ORGANISM_INTO_TRIPAL = "6" - SYNC_GENOME_ANALYSIS_INTO_TRIPAL = "7" - SYNC_OGS_ANALYSIS_INTO_TRIPAL = "8" - SYNC_FEATURES_INTO_TRIPAL = "9" - - workflow_parameters = {} - - workflow_parameters[GENOME_FASTA_FILE] = {} - workflow_parameters[GFF_FILE] = {} - workflow_parameters[PROTEINS_FASTA_FILE] = {} - workflow_parameters[TRANSCRIPTS_FASTA_FILE] = {} - - workflow_parameters[LOAD_FASTA_IN_CHADO] = {"organism": run_workflow_for_current_organism.org_id, - "analysis_id": run_workflow_for_current_organism.genome_analysis_id, - "do_update": "true"} - # Change "do_update": "true" to "do_update": "false" in above parameters to prevent appending/updates to the fasta file in chado - # WARNING: It is safer to never update it and just change the genome/ogs versions in the config - workflow_parameters[LOAD_GFF_IN_CHADO] = {"organism": run_workflow_for_current_organism.org_id, - "analysis_id": run_workflow_for_current_organism.ogs_analysis_id} - workflow_parameters[SYNC_ORGANISM_INTO_TRIPAL] = {"organism_id": run_workflow_for_current_organism.org_id} - workflow_parameters[SYNC_GENOME_ANALYSIS_INTO_TRIPAL] = {"analysis_id": run_workflow_for_current_organism.ogs_analysis_id} - workflow_parameters[SYNC_OGS_ANALYSIS_INTO_TRIPAL] = {"analysis_id": run_workflow_for_current_organism.genome_analysis_id} - workflow_parameters[SYNC_FEATURES_INTO_TRIPAL] = {"organism_id": run_workflow_for_current_organism.org_id} - - # Datamap for input datasets - dataset source (type): ldda (LibraryDatasetDatasetAssociation) - run_workflow_for_current_organism.datamap = {} - run_workflow_for_current_organism.datamap[GENOME_FASTA_FILE] = {"src": "hda", "id": hda_ids["genome_hda_id"]} - run_workflow_for_current_organism.datamap[GFF_FILE] = {"src": "hda", "id": hda_ids["gff_hda_id"]} - run_workflow_for_current_organism.datamap[PROTEINS_FASTA_FILE] = {"src": "hda", "id": hda_ids["proteins_hda_id"]} - run_workflow_for_current_organism.datamap[TRANSCRIPTS_FASTA_FILE] = {"src": "hda", "id": hda_ids["transcripts_hda_id"]} - - # run_workflow_for_current_organism.datamap = {} - # run_workflow_for_current_organism.datamap[GENOME_FASTA_FILE] = {"src": "hda", "id": - # run_workflow_for_current_organism.datasets["genome_file"]} - # run_workflow_for_current_organism.datamap[GFF_FILE] = {"src": "hda", - # "id": hda_ids["gff_hda_id"]} - - # Ensures galaxy has had time to retrieve datasets - time.sleep(60) - # Run the Chado load Tripal sync workflow with the parameters set above - run_workflow_for_current_organism.run_workflow(workflow_path=workflow, - workflow_parameters=workflow_parameters, - datamap=run_workflow_for_current_organism.datamap, - workflow_name="Chado load Tripal synchronize") - - # Jbrowse creation workflow - elif "Jbrowse" in str(workflow): - - logging.info("Executing workflow 'Jbrowse'") - - run_workflow_for_current_organism.connect_to_instance() - run_workflow_for_current_organism.set_get_history() - run_workflow_for_current_organism.install_changesets_revisions_from_workflow(workflow_path=workflow) - run_workflow_for_current_organism.get_organism_id() - # Import datasets into history and get their hda IDs - run_workflow_for_current_organism.import_datasets_into_history() - hda_ids = run_workflow_for_current_organism.get_datasets_hda_ids() # Note: only call this function AFTER calling "import_datasets_into_history()" - - # Debugging - # run_workflow_for_current_organism.get_invocation_report(workflow_name="Jbrowse") - - GENOME_FASTA_FILE = "0" - GFF_FILE = "1" - ADD_JBROWSE = "2" - ADD_ORGANISM_TO_JBROWSE = "3" - - workflow_parameters = {} - workflow_parameters[GENOME_FASTA_FILE] = {} - workflow_parameters[GFF_FILE] = {} - workflow_parameters[ADD_JBROWSE] = {} - workflow_parameters[ADD_ORGANISM_TO_JBROWSE] = {} - - run_workflow_for_current_organism.datamap = {} - run_workflow_for_current_organism.datamap[GENOME_FASTA_FILE] = {"src": "hda", "id": hda_ids["genome_hda_id"]} - run_workflow_for_current_organism.datamap[GFF_FILE] = {"src": "hda", "id": hda_ids["gff_hda_id"]} - - # Run the jbrowse creation workflow - run_workflow_for_current_organism.run_workflow(workflow_path=workflow, - workflow_parameters=workflow_parameters, - datamap=run_workflow_for_current_organism.datamap, - workflow_name="Jbrowse") - - elif "Interpro" in str(workflow): - - logging.info("Executing workflow 'Interproscan") - - run_workflow_for_current_organism.connect_to_instance() - run_workflow_for_current_organism.set_get_history() - run_workflow_for_current_organism.install_changesets_revisions_from_workflow(workflow_path=workflow) - # run_workflow_for_current_organism.get_species_history_id() - - # Get the attributes of the instance and project data files - run_workflow_for_current_organism.get_instance_attributes() - run_workflow.add_interproscan_analysis() - run_workflow_for_current_organism.get_interpro_analysis_id() - - # Import datasets into history and retrieve their hda IDs - run_workflow_for_current_organism.import_datasets_into_history() - hda_ids = run_workflow_for_current_organism.get_datasets_hda_ids() - - INTERPRO_FILE = "0" - LOAD_INTERPRO_IN_CHADO = "1" - SYNC_INTERPRO_ANALYSIS_INTO_TRIPAL = "2" - SYNC_FEATURES_INTO_TRIPAL = "3" - POPULATE_MAT_VIEWS = "4" - INDEX_TRIPAL_DATA = "5" - - workflow_parameters = {} - workflow_parameters[INTERPRO_FILE] = {} - workflow_parameters[LOAD_INTERPRO_IN_CHADO] = {"organism": run_workflow_for_current_organism.org_id, - "analysis_id": run_workflow_for_current_organism.interpro_analysis_id} - workflow_parameters[SYNC_INTERPRO_ANALYSIS_INTO_TRIPAL] = {"analysis_id": run_workflow_for_current_organism.interpro_analysis_id} - - - run_workflow_for_current_organism.datamap = {} - run_workflow_for_current_organism.datamap[INTERPRO_FILE] = {"src": "hda", "id": run_workflow_for_current_organism.hda_ids["interproscan_hda_id"]} - - # Run Interproscan workflow - run_workflow_for_current_organism.run_workflow(workflow_path=workflow, - workflow_parameters=workflow_parameters, - datamap=run_workflow_for_current_organism.datamap, - workflow_name="Interproscan") - - elif "Blast" in str(workflow): - - logging.info("Executing workflow 'Blast_Diamond") - - run_workflow_for_current_organism.connect_to_instance() - run_workflow_for_current_organism.set_get_history() - run_workflow_for_current_organism.install_changesets_revisions_from_workflow(workflow_path=workflow) - # run_workflow_for_current_organism.get_species_history_id() - - # Get the attributes of the instance and project data files - run_workflow_for_current_organism.get_instance_attributes() - run_workflow_for_current_organism.add_blastp_diamond_analysis() - run_workflow_for_current_organism.get_blastp_diamond_analysis_id() - - # Import datasets into history and retrieve their hda IDs - run_workflow_for_current_organism.import_datasets_into_history() - hda_ids = run_workflow_for_current_organism.get_datasets_hda_ids() - - BLAST_FILE = "0" - LOAD_BLAST_IN_CHADO = "1" - SYNC_BLAST_ANALYSIS_INTO_TRIPAL = "2" - SYNC_FEATURES_INTO_TRIPAL = "3" - POPULATE_MAT_VIEWS = "4" - INDEX_TRIPAL_DATA = "5" - - workflow_parameters = {} - workflow_parameters[INTERPRO_FILE] = {} - workflow_parameters[LOAD_BLAST_IN_CHADO] = {"organism": run_workflow_for_current_organism.org_id, - "analysis_id": run_workflow_for_current_organism.blast_diamond_analysis_id} - workflow_parameters[SYNC_BLAST_ANALYSIS_INTO_TRIPAL] = {"analysis_id": run_workflow_for_current_organism.blast_diamond_analysis_id} - - run_workflow_for_current_organism.datamap = {} - run_workflow_for_current_organism.datamap[INTERPRO_FILE] = {"src": "hda", "id": hda_ids["interproscan_hda_id"]} - - # Run Interproscan workflow - run_workflow_for_current_organism.run_workflow(workflow_path=workflow, - workflow_parameters=workflow_parameters, - datamap=run_workflow_for_current_organism.datamap, - workflow_name="Interproscan") + all_sp_workflow_dict[current_sp_key][current_sp_strain_sex_key] = current_sp_strain_sex_value + + + for k, v in all_sp_workflow_dict.items(): + if len(list(v.keys())) == 1: + logging.info("Input organism %s: 1 species detected in input dictionary" % k) + + # Set workflow path (1 organism) + workflow_path = os.path.join(os.path.abspath(script_dir), "workflows_phaeoexplorer/Galaxy-Workflow-chado_load_tripal_synchronize_jbrowse_1org_v2.ga") + + # Set the galaxy instance variables + for k2, v2 in v.items(): + instance_url = v2["instance_url"] + email = v2["email"] + password = v2["password"] + + instance = galaxy.GalaxyInstance(url=instance_url, email=email, password=password) + + # Check if the versions of tools specified in the workflow are installed in galaxy + install_changesets_revisions_from_workflow(workflow_path=workflow_path, instance=instance) + + # Set datamap (mapping of input files in the workflow) + datamap = {} + + + # Set the workflow parameters (individual tools runtime parameters in the workflow) + workflow_parameters = {} + + if len(list(v.keys())) == 2: + + logging.info("Input organism %s: 2 species detected in input dictionary" % k) + + # Set workflow path (2 organisms) + workflow_path = os.path.join(os.path.abspath(script_dir), "workflows_phaeoexplorer/Galaxy-Workflow-chado_load_tripal_synchronize_jbrowse_2org_v4.ga") + + # Instance object required variables + instance_url, email, password = None, None, None + + # Set the galaxy instance variables + for k2, v2 in v.items(): + instance_url = v2["instance_url"] + email = v2["email"] + password = v2["password"] + + instance = galaxy.GalaxyInstance(url=instance_url, email=email, password=password) + + # Check if the versions of tools specified in the workflow are installed in galaxy + install_changesets_revisions_from_workflow(workflow_path=workflow_path, instance=instance) + + # Get key names from the current organism (item 1 = organism 1, item 2 = organism 2) + organisms_key_names = list(v.keys()) + org1_dict = v[organisms_key_names[0]] + org2_dict = v[organisms_key_names[1]] + + history_id = org1_dict["history_id"] + + # Organism 1 attributes + org1_genus = org1_dict["genus"] + org1_species = org1_dict["species"] + org1_genus_species = org1_dict["genus_species"] + org1_species_folder_name = org1_dict["species_folder_name"] + org1_full_name = org1_dict["full_name"] + org1_strain = org1_dict["sex"] + org1_sex = org1_dict["strain"] + org1_org_id = org1_dict["org_id"] + org1_genome_analysis_id = org1_dict["genome_analysis_id"] + org1_ogs_analysis_id = org1_dict["ogs_analysis_id"] + org1_genome_hda_id = org1_dict["hda_ids"]["genome_hda_id"] + org1_transcripts_hda_id = org1_dict["hda_ids"]["transcripts_hda_id"] + org1_proteins_hda_id = org1_dict["hda_ids"]["proteins_hda_id"] + org1_gff_hda_id = org1_dict["hda_ids"]["gff_hda_id"] + + # Store these values into a dict for parameters logging/validation + org1_parameters_dict = { + "org1_genus": org1_genus, + "org1_species": org1_species, + "org1_genus_species": org1_genus_species, + "org1_species_folder_name": org1_species_folder_name, + "org1_full_name": org1_full_name, + "org1_strain": org1_strain, + "org1_sex": org1_sex, + "org1_org_id": org1_org_id, + "org1_genome_analysis_id": org1_genome_analysis_id, + "org1_ogs_analysis_id": org1_ogs_analysis_id, + "org1_genome_hda_id": org1_genome_hda_id, + "org1_transcripts_hda_id": org1_transcripts_hda_id, + "org1_proteins_hda_id": org1_proteins_hda_id, + "org1_gff_hda_id": org1_gff_hda_id, + } + + # Look for empty parameters values, throw a critical error if a parameter value is invalid + for param_name, param_value in org1_parameters_dict.items(): + if param_value is None or param_value == "": + logging.critical("Empty parameter value found for organism {0} (parameter: {1}, parameter value: {2})".format(org1_full_name, param_name, param_value)) + sys.exit() + + # Organism 2 attributes + org2_genus = org2_dict["genus"] + org2_species = org2_dict["species"] + org2_genus_species = org2_dict["genus_species"] + org2_species_folder_name = org2_dict["species_folder_name"] + org2_full_name = org2_dict["full_name"] + org2_strain = org2_dict["sex"] + org2_sex = org2_dict["strain"] + org2_org_id = org2_dict["org_id"] + org2_genome_analysis_id = org2_dict["genome_analysis_id"] + org2_ogs_analysis_id = org2_dict["ogs_analysis_id"] + org2_genome_hda_id = org2_dict["hda_ids"]["genome_hda_id"] + org2_transcripts_hda_id = org2_dict["hda_ids"]["transcripts_hda_id"] + org2_proteins_hda_id = org2_dict["hda_ids"]["proteins_hda_id"] + org2_gff_hda_id = org2_dict["hda_ids"]["gff_hda_id"] + + # Store these values into a dict for parameters logging/validation + org2_parameters_dict = { + "org2_genus": org2_genus, + "org2_species": org2_species, + "org2_genus_species": org2_genus_species, + "org2_species_folder_name": org2_species_folder_name, + "org2_full_name": org2_full_name, + "org2_strain": org2_strain, + "org2_sex": org2_sex, + "org2_org_id": org2_org_id, + "org2_genome_analysis_id": org2_genome_analysis_id, + "org2_ogs_analysis_id": org2_ogs_analysis_id, + "org2_genome_hda_id": org2_genome_hda_id, + "org2_transcripts_hda_id": org2_transcripts_hda_id, + "org2_proteins_hda_id": org2_proteins_hda_id, + "org2_gff_hda_id": org2_gff_hda_id, + } + + # Look for empty parameters values, throw a critical error if a parameter value is invalid + for param_name, param_value in org2_parameters_dict.items(): + if param_value is None or param_value == "": + logging.critical("Empty parameter value found for organism {0} (parameter: {1}, parameter value: {2})".format(org2_full_name, param_name, param_value)) + sys.exit() + + + jbrowse_menu_url_org1 = "https://{hostname}/sp/{genus_sp}/feature/{Genus}/{species}/mRNA/{id}".format(hostname=config["hostname"], genus_sp=org1_genus_species, Genus=org1_genus[0].upper() + org1_genus[1:], species=org1_species, id="\{id\}") + jbrowse_menu_url_org2 = "https://{hostname}/sp/{genus_sp}/feature/{Genus}/{species}/mRNA/{id}".format(hostname=config["hostname"], genus_sp=org2_genus_species, Genus=org2_genus[0].upper() + org2_genus[1:], species=org2_species, id="\{id\}") + if "menu_url" not in config.keys(): + jbrowse_menu_url_org1 = "https://{hostname}/sp/{genus_sp}/feature/{Genus}/{species}/mRNA/{id}".format(hostname=config["hostname"], genus_sp=org1_genus_species, Genus=org1_genus[0].upper() + org1_genus[1:], species=org1_species, id="\{id\}") + jbrowse_menu_url_org2 = "https://{hostname}/sp/{genus_sp}/feature/{Genus}/{species}/mRNA/{id}".format(hostname=config["hostname"], genus_sp=org2_genus_species, Genus=org2_genus[0].upper() + org2_genus[1:], species=org2_species, id="\{id\}") + else: + jbrowse_menu_url_org1 = config["menu_url"] + jbrowse_menu_url_org2 = "https://{hostname}/sp/{genus_sp}/feature/{Genus}/{species}/mRNA/{id}".format(hostname=config["hostname"], genus_sp=org2_genus_species, Genus=org2_genus[0].upper() + org2_genus[1:], species=org2_species, id="\{id\}") + + + # Source files association (ordered by their IDs in the workflow) + # WARNING: Be very careful about how the workflow is "organized" (i.e the order of the steps/datasets, check the .ga if there is any error) + GFF_FILE_ORG1 = "0" + GENOME_FASTA_FILE_ORG1 = "1" + PROTEINS_FASTA_FILE_ORG1 = "2" + + GENOME_FASTA_FILE_ORG2 = "3" + GFF_FILE_ORG2 = "4" + PROTEINS_FASTA_FILE_ORG2 = "5" + + LOAD_FASTA_ORG1 = "6" + JBROWSE_ORG1 = "7" + JRBOWSE_ORG2 = "8" + + LOAD_GFF_ORG1 = "9" + JBROWSE_CONTAINER = "10" + SYNC_FEATURES_ORG1 = "11" + + LOAD_FASTA_ORG2 = "12" + LOAD_GFF_ORG2 = "13" + + SYNC_FEATURES_ORG2 = "14" + POPULATE_MAT_VIEWS = "15" + INDEX_TRIPAL_DATA = "16" + + # Set the workflow parameters (individual tools runtime parameters in the workflow) + workflow_parameters = {} + + # Input files have no parameters (they are set via assigning the hda IDs in the datamap parameter of the bioblend method) + workflow_parameters[GENOME_FASTA_FILE_ORG1] = {} + workflow_parameters[GFF_FILE_ORG1] = {} + workflow_parameters[PROTEINS_FASTA_FILE_ORG1] = {} + workflow_parameters[GENOME_FASTA_FILE_ORG2] = {} + workflow_parameters[GFF_FILE_ORG2] = {} + workflow_parameters[PROTEINS_FASTA_FILE_ORG2] = {} + + # Organism 1 + workflow_parameters[LOAD_FASTA_ORG1] = {"organism": org1_org_id, + "analysis_id": org1_genome_analysis_id, + "do_update": "true"} + # workflow_parameters[JBROWSE_ORG1] = {"menu_url": jbrowse_menu_url_org1} + workflow_parameters[JBROWSE_ORG1] = {} + workflow_parameters[LOAD_GFF_ORG1] = {"organism": org1_org_id, "analysis_id": org1_ogs_analysis_id} + workflow_parameters[SYNC_FEATURES_ORG1] = {"organism_id": org1_org_id} + # workflow_parameters[JBROWSE_CONTAINER] = {"organisms": [{"name": org1_full_name, "unique_id": org1_species_folder_name, }, {"name": org2_full_name, "unique_id": org2_species_folder_name}]} + workflow_parameters[JBROWSE_CONTAINER] = {} + + # Organism 2 + workflow_parameters[LOAD_FASTA_ORG2] = {"organism": org2_org_id, + "analysis_id": org2_genome_analysis_id, + "do_update": "true"} + workflow_parameters[LOAD_GFF_ORG2] = {"organism": org2_org_id, "analysis_id": org2_ogs_analysis_id} + # workflow_parameters[JRBOWSE_ORG2] = {"menu_url": jbrowse_menu_url_org2} + workflow_parameters[JRBOWSE_ORG2] = {} + workflow_parameters[SYNC_FEATURES_ORG2] = {"organism_id": org2_org_id} + + + # POPULATE + INDEX DATA + workflow_parameters[POPULATE_MAT_VIEWS] = {} + workflow_parameters[INDEX_TRIPAL_DATA] = {} + + + # Set datamap (mapping of input files in the workflow) + datamap = {} + + # Organism 1 + datamap[GENOME_FASTA_FILE_ORG1] = {"src": "hda", "id": org1_genome_hda_id} + datamap[GFF_FILE_ORG1] = {"src": "hda", "id": org1_gff_hda_id} + datamap[PROTEINS_FASTA_FILE_ORG1] = {"src": "hda", "id": org1_proteins_hda_id} + + # Organism 2 + datamap[GENOME_FASTA_FILE_ORG2] = {"src": "hda", "id": org2_genome_hda_id} + datamap[GFF_FILE_ORG2] = {"src": "hda", "id": org2_gff_hda_id} + datamap[PROTEINS_FASTA_FILE_ORG2] = {"src": "hda", "id": org2_proteins_hda_id} + + with open(workflow_path, 'r') as ga_in_file: + + # Store the decoded json dictionary + workflow_dict = json.load(ga_in_file) + workflow_name = workflow_dict["name"] + + + # For the Jbrowse tool, we unfortunately have to manually edit the parameters instead of setting them + # as runtime values, using runtime parameters makes the tool throw an internal critical error ("replace not found" error) + # Scratchgmod test: need "http" (or "https"), the hostname (+ port) + jbrowse_menu_url_org1 = "https://{hostname}/sp/{genus_sp}/feature/{Genus}/{species}/mRNA/{id}".format(hostname=config["hostname"], genus_sp=org1_genus_species, Genus=org1_genus[0].upper() + org1_genus[1:], species=org1_species, id="{id}") + jbrowse_menu_url_org2 = "https://{hostname}/sp/{genus_sp}/feature/{Genus}/{species}/mRNA/{id}".format(hostname=config["hostname"], genus_sp=org2_genus_species, Genus=org2_genus[0].upper() + org2_genus[1:], species=org2_species, id="{id}") + if "menu_url" not in config.keys(): + jbrowse_menu_url_org1 = "https://{hostname}/sp/{genus_sp}/feature/{Genus}/{species}/mRNA/{id}".format(hostname=config["hostname"], genus_sp=org1_genus_species, Genus=org1_genus[0].upper() + org1_genus[1:], species=org1_species, id="{id}") + jbrowse_menu_url_org2 = "https://{hostname}/sp/{genus_sp}/feature/{Genus}/{species}/mRNA/{id}".format(hostname=config["hostname"], genus_sp=org2_genus_species, Genus=org2_genus[0].upper() + org2_genus[1:], species=org2_species, id="{id}") + else: + jbrowse_menu_url_org1 = config["menu_url"] + "/sp/{genus_sp}/feature/{Genus}/{species}/mRNA/{id}".format(genus_sp=org1_genus_species, Genus=org1_genus[0].upper() + org1_genus[1:], species=org1_species, id="{id}") + jbrowse_menu_url_org2 = config["menu_url"] + "/sp/{genus_sp}/feature/{Genus}/{species}/mRNA/{id}".format(genus_sp=org2_genus_species, Genus=org2_genus[0].upper() + org2_genus[1:], species=org2_species, id="{id}") + + # show_tool_add_organism = instance.tools.show_tool(tool_id="toolshed.g2.bx.psu.edu/repos/gga/chado_organism_add_organism/organism_add_organism/2.3.4+galaxy0", io_details=True) + # print(show_tool_add_organism) + # show_jbrowse_tool = instance.tools.show_tool(tool_id="toolshed.g2.bx.psu.edu/repos/iuc/jbrowse/jbrowse/1.16.11+galaxy0", io_details=True) + # print(show_jbrowse_tool) + # show_jbrowse_container_tool = instance.tools.show_tool(tool_id="toolshed.g2.bx.psu.edu/repos/gga/jbrowse_to_container/jbrowse_to_container/0.5.1", io_details=True) + # print(show_jbrowse_container_tool) + + # Replace values in the workflow dictionary + workflow_dict["steps"]["7"]["tool_state"] = workflow_dict["steps"]["7"]["tool_state"].replace("__MENU_URL_ORG1__", jbrowse_menu_url_org1) + workflow_dict["steps"]["8"]["tool_state"] = workflow_dict["steps"]["8"]["tool_state"].replace("__MENU_URL_ORG2__", jbrowse_menu_url_org2) + # The UNIQUE_ID is specific to a combination genus_species_strain_sex so every combination should have its unique workflow + # in galaxy --> define a naming method for these workflows + workflow_dict["steps"]["10"]["tool_state"] = workflow_dict["steps"]["10"]["tool_state"].replace("__DISPLAY_NAME_ORG1__", org1_full_name).replace("__UNIQUE_ID_ORG1__", org1_species_folder_name) + workflow_dict["steps"]["10"]["tool_state"] = workflow_dict["steps"]["10"]["tool_state"].replace("__DISPLAY_NAME_ORG2__", org2_full_name).replace("__UNIQUE_ID_ORG2__", org2_species_folder_name) + # Import the workflow in galaxy as a dict + instance.workflows.import_workflow_dict(workflow_dict=workflow_dict) - else: - logging.critical("The galaxy container for %s is not ready yet!" % run_workflow_for_current_organism.full_name) - sys.exit() + # Get its attributes + workflow_attributes = instance.workflows.get_workflows(name=workflow_name) + # Then get its ID (required to invoke the workflow) + workflow_id = workflow_attributes[0]["id"] # Index 0 is the most recently imported workflow (the one we want) + show_workflow = instance.workflows.show_workflow(workflow_id=workflow_id) + # Check if the workflow is found + try: + logging.debug("Workflow ID: %s" % workflow_id) + except bioblend.ConnectionError: + logging.warning("Error finding workflow %s" % workflow_name) + + # Finally, invoke the workflow alogn with its datamap, parameters and the history in which to invoke it + instance.workflows.invoke_workflow(workflow_id=workflow_id, history_id=history_id, params=workflow_parameters, inputs=datamap, allow_tool_state_corrections=True) + + logging.info("Successfully imported and invoked workflow {0}, check the galaxy instance ({1}) for the jobs state".format(workflow_name, instance_url)) + + + + + + + + + # Get the instance attribute from the object for future connections + # This is the GalaxyInstance object from bioblend (not the url!) + # instance = run_workflow_for_current_organism.instance + + # if "2org" in str(workflow): + # logging.info("Executing workflow %s" % workflow) + + + # run_workflow_for_current_organism.connect_to_instance() + # run_workflow_for_current_organism.set_get_history() + + # # TODO: only do this once per instance (not at each iteration!) + # run_workflow_for_current_organism.install_changesets_revisions_for_individual_tools() + # run_workflow_for_current_organism.install_changesets_revisions_from_workflow(workflow_path=workflow) + # run_workflow_for_current_organism.add_organism_ogs_genome_analyses() + # org_id = run_workflow_for_current_organism.get_organism_id() + # genome_analysis_id = run_workflow_for_current_organism.get_genome_analysis_id() + # ogs_analysis_id = run_workflow_for_current_organism.get_ogs_analysis_id() + # instance_attributes = run_workflow_for_current_organism.get_instance_attributes() + + + # # Import datasets into history and retrieve their hda IDs + # # TODO: can be simplified with direct access to the folder contents via the full path (no loop required) + + # hda_ids = run_workflow_for_current_organism.import_datasets_into_history() + # hda_ids_list.append(hda_ids) + + + # # TODO: Exlcude the workflow invocation from the loop + # # Extract instance url from one, attributes from both in lists ? + + + # # Source files association (ordered by their IDs in the workflow) + # GENOME_FASTA_FILE_ORG1 = "0" + # GFF_FILE_ORG1 = "1" + # PROTEINS_FASTA_FILE_ORG1 = "2" + # GENOME_FASTA_FILE_ORG2 = "3" + # GFF_FILE_ORG2 = "4" + # PROTEINS_FASTA_FILE_ORG2 = "5" + + # LOAD_FASTA_ORG1 = "6" + # JBROWSE_ORG1 = "7" + # JRBOWSE_ORG2 = "8" + + # LOAD_GFF_ORG1 = "9" + # JBROWSE_CONTAINER = "10" + # SYNC_FEATURES_ORG1 = "11" + + # LOAD_FASTA_ORG2 = "12" + # LOAD_GFF_ORG2 = "13" + + # SYNC_FEATURES_ORG2 = "14" + # POPULATE_MAT_VIEWS = "15" + # INDEX_TRIPAL_DATA = "16" + + # workflow_parameters = {} + + # workflow_parameters[GENOME_FASTA_FILE_ORG1] = {} + # workflow_parameters[GFF_FILE_ORG1] = {} + # workflow_parameters[PROTEINS_FASTA_FILE_ORG1] = {} + # workflow_parameters[GENOME_FASTA_FILE_ORG2] = {} + # workflow_parameters[GFF_FILE_ORG2] = {} + # workflow_parameters[PROTEINS_FASTA_FILE_ORG2] = {} + + + # # ORGANISM 1 + # workflow_parameters[LOAD_FASTA_ORG1] = {"organism": org_ids[0], + # "analysis_id": genome_analysis_ids[0], + # "do_update": "true"} + # # Change "do_update": "true" to "do_update": "false" in above parameters to prevent appending/updates to the fasta file in chado + # # WARNING: It is safer to never update it and just change the genome/ogs versions in the config + # workflow_parameters[JBROWSE_ORG1] = {} + # workflow_parameters[LOAD_GFF_ORG1] = {"organism": org_ids[0], "analysis_id": ogs_analysis_ids[0]} + # workflow_parameters[SYNC_FEATURES_ORG1] = {"organism_id": org_ids[0]} + # workflow_parameters[JBROWSE_CONTAINER] = {} + + + # # ORGANISM 2 + # workflow_parameters[LOAD_FASTA_ORG2] = {"organism": org_ids[1], + # "analysis_id": genome_analysis_ids[1], + # "do_update": "true"} + # # Change "do_update": "true" to "do_update": "false" in above parameters to prevent appending/updates to the fasta file in chado + # # WARNING: It is safer to never update it and just change the genome/ogs versions in the config + # workflow_parameters[LOAD_GFF_ORG2] = {"organism": org_ids[1], "analysis_id": ogs_analysis_ids[1]} + # workflow_parameters[JRBOWSE_ORG2] = {} + # workflow_parameters[SYNC_FEATURES_ORG2] = {"organism_id": org_ids[1]} + + # workflow_parameters[SYNC_GENOME_ANALYSIS_INTO_TRIPAL] = {"analysis_id": ogs_analysis_ids[0]} + # workflow_parameters[SYNC_OGS_ANALYSIS_INTO_TRIPAL] = {"analysis_id": genome_analysis_ids[0]} + # workflow_parameters[SYNC_FEATURES_INTO_TRIPAL] = {"organism_id": org_ids[0]} + + # # POPULATE + INDEX DATA + # workflow_parameters[POPULATE_MAT_VIEWS] = {} + # workflow_parameters[INDEX_TRIPAL_DATA] = {} + + # # Datamap for input datasets - dataset source (type): ldda (LibraryDatasetDatasetAssociation) + # run_workflow_for_current_organism.datamap = {} + + # run_workflow_for_current_organism.datamap[GENOME_FASTA_FILE_ORG1] = {"src": "hda", "id": hda_ids_list[0]["genome_hda_id"]} + # run_workflow_for_current_organism.datamap[GFF_FILE_ORG1] = {"src": "hda", "id": hda_ids_list[0]["gff_hda_id"]} + # run_workflow_for_current_organism.datamap[PROTEINS_FASTA_FILE_ORG1] = {"src": "hda", "id": hda_ids_list[0]["proteins_hda_id"]} + + # run_workflow_for_current_organism.datamap[GENOME_FASTA_FILE_ORG2] = {"src": "hda", "id": hda_ids_list[1]["genome_hda_id"]} + # run_workflow_for_current_organism.datamap[GFF_FILE_ORG2] = {"src": "hda", "id": hda_ids_list[1]["gff_hda_id"]} + # run_workflow_for_current_organism.datamap[GFF_FILE_ORG2] = {"src": "hda", "id": hda_ids_list[1]["proteins_hda_id"]} + + + + # logging.info("OK: Workflow invoked") + + + + # # If input workflow is Chado_load_Tripal_synchronize.ga + # if "Chado_load_Tripal_synchronize" in str(workflow): + + # logging.info("Executing workflow 'Chado_load_Tripal_synchronize'") + + # run_workflow_for_current_organism.connect_to_instance() + # run_workflow_for_current_organism.set_get_history() + # # run_workflow_for_current_organism.get_species_history_id() + + # run_workflow_for_current_organism.install_changesets_revisions_for_individual_tools() + # run_workflow_for_current_organism.install_changesets_revisions_from_workflow(workflow_path=workflow) + # run_workflow_for_current_organism.add_organism_ogs_genome_analyses() + # run_workflow_for_current_organism.get_organism_id() + # run_workflow_for_current_organism.get_genome_analysis_id() + # run_workflow_for_current_organism.get_ogs_analysis_id() + + # # run_workflow_for_current_organism.tripal_synchronize_organism_analyses() + + # # Get the attributes of the instance and project data files + # run_workflow_for_current_organism.get_instance_attributes() + + # # Import datasets into history and retrieve their hda IDs + # # TODO: can be simplified with direct access to the folder contents via the full path (no loop required) + # hda_ids = run_workflow_for_current_organism.import_datasets_into_history() + + # # DEBUG + # # run_workflow_for_current_organism.get_invocation_report(workflow_name="Chado load Tripal synchronize") + + # # Explicit workflow parameter names + # GENOME_FASTA_FILE = "0" + # GFF_FILE = "1" + # PROTEINS_FASTA_FILE = "2" + # TRANSCRIPTS_FASTA_FILE = "3" + + # LOAD_FASTA_IN_CHADO = "4" + # LOAD_GFF_IN_CHADO = "5" + # SYNC_ORGANISM_INTO_TRIPAL = "6" + # SYNC_GENOME_ANALYSIS_INTO_TRIPAL = "7" + # SYNC_OGS_ANALYSIS_INTO_TRIPAL = "8" + # SYNC_FEATURES_INTO_TRIPAL = "9" + + # workflow_parameters = {} + + # workflow_parameters[GENOME_FASTA_FILE] = {} + # workflow_parameters[GFF_FILE] = {} + # workflow_parameters[PROTEINS_FASTA_FILE] = {} + # workflow_parameters[TRANSCRIPTS_FASTA_FILE] = {} + + # workflow_parameters[LOAD_FASTA_IN_CHADO] = {"organism": run_workflow_for_current_organism.org_id, + # "analysis_id": run_workflow_for_current_organism.genome_analysis_id, + # "do_update": "true"} + # # Change "do_update": "true" to "do_update": "false" in above parameters to prevent appending/updates to the fasta file in chado + # # WARNING: It is safer to never update it and just change the genome/ogs versions in the config + # workflow_parameters[LOAD_GFF_IN_CHADO] = {"organism": run_workflow_for_current_organism.org_id, + # "analysis_id": run_workflow_for_current_organism.ogs_analysis_id} + # workflow_parameters[SYNC_ORGANISM_INTO_TRIPAL] = {"organism_id": run_workflow_for_current_organism.org_id} + # workflow_parameters[SYNC_GENOME_ANALYSIS_INTO_TRIPAL] = {"analysis_id": run_workflow_for_current_organism.ogs_analysis_id} + # workflow_parameters[SYNC_OGS_ANALYSIS_INTO_TRIPAL] = {"analysis_id": run_workflow_for_current_organism.genome_analysis_id} + # workflow_parameters[SYNC_FEATURES_INTO_TRIPAL] = {"organism_id": run_workflow_for_current_organism.org_id} + + # # Datamap for input datasets - dataset source (type): ldda (LibraryDatasetDatasetAssociation) + # run_workflow_for_current_organism.datamap = {} + # run_workflow_for_current_organism.datamap[GENOME_FASTA_FILE] = {"src": "hda", "id": hda_ids["genome_hda_id"]} + # run_workflow_for_current_organism.datamap[GFF_FILE] = {"src": "hda", "id": hda_ids["gff_hda_id"]} + # run_workflow_for_current_organism.datamap[PROTEINS_FASTA_FILE] = {"src": "hda", "id": hda_ids["proteins_hda_id"]} + # run_workflow_for_current_organism.datamap[TRANSCRIPTS_FASTA_FILE] = {"src": "hda", "id": hda_ids["transcripts_hda_id"]} + + # # run_workflow_for_current_organism.datamap = {} + # # run_workflow_for_current_organism.datamap[GENOME_FASTA_FILE] = {"src": "hda", "id": + # # run_workflow_for_current_organism.datasets["genome_file"]} + # # run_workflow_for_current_organism.datamap[GFF_FILE] = {"src": "hda", + # # "id": hda_ids["gff_hda_id"]} + + # # Ensures galaxy has had time to retrieve datasets + # time.sleep(60) + # # Run the Chado load Tripal sync workflow with the parameters set above + # run_workflow_for_current_organism.run_workflow(workflow_path=workflow, + # workflow_parameters=workflow_parameters, + # datamap=run_workflow_for_current_organism.datamap, + # workflow_name="Chado load Tripal synchronize") + + # # Jbrowse creation workflow + # elif "Jbrowse" in str(workflow): + + # logging.info("Executing workflow 'Jbrowse'") + + # run_workflow_for_current_organism.connect_to_instance() + # run_workflow_for_current_organism.set_get_history() + # run_workflow_for_current_organism.install_changesets_revisions_from_workflow(workflow_path=workflow) + # run_workflow_for_current_organism.get_organism_id() + # # Import datasets into history and get their hda IDs + # run_workflow_for_current_organism.import_datasets_into_history() + # hda_ids = run_workflow_for_current_organism.get_datasets_hda_ids() # Note: only call this function AFTER calling "import_datasets_into_history()" + + # # Debugging + # # run_workflow_for_current_organism.get_invocation_report(workflow_name="Jbrowse") + + # GENOME_FASTA_FILE = "0" + # GFF_FILE = "1" + # ADD_JBROWSE = "2" + # ADD_ORGANISM_TO_JBROWSE = "3" + + # workflow_parameters = {} + # workflow_parameters[GENOME_FASTA_FILE] = {} + # workflow_parameters[GFF_FILE] = {} + # workflow_parameters[ADD_JBROWSE] = {} + # workflow_parameters[ADD_ORGANISM_TO_JBROWSE] = {} + + # run_workflow_for_current_organism.datamap = {} + # run_workflow_for_current_organism.datamap[GENOME_FASTA_FILE] = {"src": "hda", "id": hda_ids["genome_hda_id"]} + # run_workflow_for_current_organism.datamap[GFF_FILE] = {"src": "hda", "id": hda_ids["gff_hda_id"]} + + # # Run the jbrowse creation workflow + # run_workflow_for_current_organism.run_workflow(workflow_path=workflow, + # workflow_parameters=workflow_parameters, + # datamap=run_workflow_for_current_organism.datamap, + # workflow_name="Jbrowse") + + # elif "Interpro" in str(workflow): + + # logging.info("Executing workflow 'Interproscan") + + # run_workflow_for_current_organism.connect_to_instance() + # run_workflow_for_current_organism.set_get_history() + # run_workflow_for_current_organism.install_changesets_revisions_from_workflow(workflow_path=workflow) + # # run_workflow_for_current_organism.get_species_history_id() + + # # Get the attributes of the instance and project data files + # run_workflow_for_current_organism.get_instance_attributes() + # run_workflow.add_interproscan_analysis() + # run_workflow_for_current_organism.get_interpro_analysis_id() + + # # Import datasets into history and retrieve their hda IDs + # run_workflow_for_current_organism.import_datasets_into_history() + # hda_ids = run_workflow_for_current_organism.get_datasets_hda_ids() + + # INTERPRO_FILE = "0" + # LOAD_INTERPRO_IN_CHADO = "1" + # SYNC_INTERPRO_ANALYSIS_INTO_TRIPAL = "2" + # SYNC_FEATURES_INTO_TRIPAL = "3" + # POPULATE_MAT_VIEWS = "4" + # INDEX_TRIPAL_DATA = "5" + + # workflow_parameters = {} + # workflow_parameters[INTERPRO_FILE] = {} + # workflow_parameters[LOAD_INTERPRO_IN_CHADO] = {"organism": run_workflow_for_current_organism.org_id, + # "analysis_id": run_workflow_for_current_organism.interpro_analysis_id} + # workflow_parameters[SYNC_INTERPRO_ANALYSIS_INTO_TRIPAL] = {"analysis_id": run_workflow_for_current_organism.interpro_analysis_id} + + + # run_workflow_for_current_organism.datamap = {} + # run_workflow_for_current_organism.datamap[INTERPRO_FILE] = {"src": "hda", "id": run_workflow_for_current_organism.hda_ids["interproscan_hda_id"]} + + # # Run Interproscan workflow + # run_workflow_for_current_organism.run_workflow(workflow_path=workflow, + # workflow_parameters=workflow_parameters, + # datamap=run_workflow_for_current_organism.datamap, + # workflow_name="Interproscan") + + # elif "Blast" in str(workflow): + + # logging.info("Executing workflow 'Blast_Diamond") + + # run_workflow_for_current_organism.connect_to_instance() + # run_workflow_for_current_organism.set_get_history() + # run_workflow_for_current_organism.install_changesets_revisions_from_workflow(workflow_path=workflow) + # # run_workflow_for_current_organism.get_species_history_id() + + # # Get the attributes of the instance and project data files + # run_workflow_for_current_organism.get_instance_attributes() + # run_workflow_for_current_organism.add_blastp_diamond_analysis() + # run_workflow_for_current_organism.get_blastp_diamond_analysis_id() + + # # Import datasets into history and retrieve their hda IDs + # run_workflow_for_current_organism.import_datasets_into_history() + # hda_ids = run_workflow_for_current_organism.get_datasets_hda_ids() + + # BLAST_FILE = "0" + # LOAD_BLAST_IN_CHADO = "1" + # SYNC_BLAST_ANALYSIS_INTO_TRIPAL = "2" + # SYNC_FEATURES_INTO_TRIPAL = "3" + # POPULATE_MAT_VIEWS = "4" + # INDEX_TRIPAL_DATA = "5" + + # workflow_parameters = {} + # workflow_parameters[INTERPRO_FILE] = {} + # workflow_parameters[LOAD_BLAST_IN_CHADO] = {"organism": run_workflow_for_current_organism.org_id, + # "analysis_id": run_workflow_for_current_organism.blast_diamond_analysis_id} + # workflow_parameters[SYNC_BLAST_ANALYSIS_INTO_TRIPAL] = {"analysis_id": run_workflow_for_current_organism.blast_diamond_analysis_id} + + # run_workflow_for_current_organism.datamap = {} + # run_workflow_for_current_organism.datamap[INTERPRO_FILE] = {"src": "hda", "id": hda_ids["interproscan_hda_id"]} + + # # Run Interproscan workflow + # run_workflow_for_current_organism.run_workflow(workflow_path=workflow, + # workflow_parameters=workflow_parameters, + # datamap=run_workflow_for_current_organism.datamap, + # workflow_name="Interproscan") diff --git a/speciesData.py b/speciesData.py index 5d9ffc2a51abf5d4211232c9e6db09226729e286..a8637b01484de65a257f12eec6e5afacc406d014 100755 --- a/speciesData.py +++ b/speciesData.py @@ -39,11 +39,11 @@ class SpeciesData: if parameters_dictionary["data"]["genome_version"] == "": self.genome_version = "1.0" else: - self.genome_version = parameters_dictionary["data"]["genome_version"] + self.genome_version = str(parameters_dictionary["data"]["genome_version"]) if parameters_dictionary["data"]["ogs_version"] == "": self.ogs_version = "1.0" else: - self.ogs_version = parameters_dictionary["data"]["ogs_version"] + self.ogs_version = str(parameters_dictionary["data"]["ogs_version"]) # TODO: catch blocks if key is absent in input self.genome_path = parameters_dictionary["data"]["genome_path"] @@ -66,18 +66,27 @@ class SpeciesData: self.full_name_lowercase = self.full_name.lower() self.abbreviation = "_".join(utilities.filter_empty_not_empty_items([self.genus_lowercase[0], self.species, self.strain, self.sex])["not_empty"]) - self.genus_species = self.genus_lowercase + "_" + self.species + self.genus_species = "{0}_{1}".format(self.genus.lower(), self.species.lower()) + self.dataset_prefix = None + if self.sex is not None or self.sex != "": + self.dataset_prefix = self.genus[0].lower() + "_" + self.species.lower() + "_" + self.sex[0].lower() + else: + self.dataset_prefix = self.genus[0].lower() + "_" + self.species.lower() + + # Bioblend/Chado IDs for an organism analyses/organisms/datasets/history/library + self.org_id = None + self.genome_analysis_id = None + self.ogs_analysis_id = None self.instance_url = None self.instance = None self.history_id = None self.library = None self.library_id = None + self.script_dir = os.path.dirname(os.path.realpath(sys.argv[0])) self.main_dir = None self.species_dir = None - self.org_id = None - self.genome_analysis_id = None - self.ogs_analysis_id = None + self.tool_panel = None self.datasets = dict() self.datasets_name = dict() @@ -88,7 +97,9 @@ class SpeciesData: self.datasets = dict() self.config = None # Custom config used to set environment variables inside containers self.species_folder_name = "_".join(utilities.filter_empty_not_empty_items([self.genus_lowercase.lower(), self.species.lower(), self.strain.lower(), self.sex.lower()])["not_empty"]) - self.species_folder_name = self.species_folder_name .replace("-", "_") + self.species_folder_name = self.species_folder_name .replace("-", "_").replace('__', '_').replace("(", "_").replace(")", "_") + if self.species_folder_name.endswith("_"): + self.species_folder_name = self.species_folder_name[0:-2] self.existing_folders_cache = {} self.bam_metadata_cache = {} diff --git a/workflows_phaeoexplorer/Galaxy-Workflow-chado_load_tripal_synchronize_jbrowse_2org_v3.ga b/workflows_phaeoexplorer/Galaxy-Workflow-chado_load_tripal_synchronize_jbrowse_2org_v3.ga new file mode 100644 index 0000000000000000000000000000000000000000..aa88764d3aba81740d81e0415838e148da4a372c --- /dev/null +++ b/workflows_phaeoexplorer/Galaxy-Workflow-chado_load_tripal_synchronize_jbrowse_2org_v3.ga @@ -0,0 +1,907 @@ +{ + "a_galaxy_workflow": "true", + "annotation": "", + "format-version": "0.1", + "name": "chado_load_tripal_synchronize_jbrowse_2org_v2", + "steps": { + "0": { + "annotation": "", + "content_id": null, + "errors": null, + "id": 0, + "input_connections": {}, + "inputs": [ + { + "description": "", + "name": "genome org1" + } + ], + "label": "genome org1", + "name": "Input dataset", + "outputs": [], + "position": { + "bottom": 204.8000030517578, + "height": 61.80000305175781, + "left": 215, + "right": 415, + "top": 143, + "width": 200, + "x": 215, + "y": 143 + }, + "tool_id": null, + "tool_state": "{\"optional\": false}", + "tool_version": null, + "type": "data_input", + "uuid": "59f823c8-fd7c-441c-84b6-21c367ba12f6", + "workflow_outputs": [ + { + "label": null, + "output_name": "output", + "uuid": "95cc9784-0f9c-46df-9f62-4c342f6695e7" + } + ] + }, + "1": { + "annotation": "", + "content_id": null, + "errors": null, + "id": 1, + "input_connections": {}, + "inputs": [ + { + "description": "", + "name": "annotations org1" + } + ], + "label": "annotations org1", + "name": "Input dataset", + "outputs": [], + "position": { + "bottom": 294.8000030517578, + "height": 61.80000305175781, + "left": 214, + "right": 414, + "top": 233, + "width": 200, + "x": 214, + "y": 233 + }, + "tool_id": null, + "tool_state": "{\"optional\": false}", + "tool_version": null, + "type": "data_input", + "uuid": "c82b756f-2ef5-41d5-9e14-eb9a7ef942c7", + "workflow_outputs": [ + { + "label": null, + "output_name": "output", + "uuid": "b67afacf-2d88-49f6-af15-02500f0ddc90" + } + ] + }, + "2": { + "annotation": "", + "content_id": null, + "errors": null, + "id": 2, + "input_connections": {}, + "inputs": [ + { + "description": "", + "name": "proteins org1" + } + ], + "label": "proteins org1", + "name": "Input dataset", + "outputs": [], + "position": { + "bottom": 389.8000030517578, + "height": 61.80000305175781, + "left": 215, + "right": 415, + "top": 328, + "width": 200, + "x": 215, + "y": 328 + }, + "tool_id": null, + "tool_state": "{\"optional\": false}", + "tool_version": null, + "type": "data_input", + "uuid": "0a0ac416-68b8-4c48-980e-04c6230c721e", + "workflow_outputs": [ + { + "label": null, + "output_name": "output", + "uuid": "cc50348e-1372-406e-9b3e-d2cacfea81b2" + } + ] + }, + "3": { + "annotation": "", + "content_id": null, + "errors": null, + "id": 3, + "input_connections": {}, + "inputs": [ + { + "description": "", + "name": "genome org2" + } + ], + "label": "genome org2", + "name": "Input dataset", + "outputs": [], + "position": { + "bottom": 479.8000030517578, + "height": 61.80000305175781, + "left": 214, + "right": 414, + "top": 418, + "width": 200, + "x": 214, + "y": 418 + }, + "tool_id": null, + "tool_state": "{\"optional\": false}", + "tool_version": null, + "type": "data_input", + "uuid": "ddd5f0f8-9e14-42f0-a866-2971d02e5435", + "workflow_outputs": [ + { + "label": null, + "output_name": "output", + "uuid": "0f63af8f-7c12-4c7a-a491-4f55f65fff4e" + } + ] + }, + "4": { + "annotation": "", + "content_id": null, + "errors": null, + "id": 4, + "input_connections": {}, + "inputs": [ + { + "description": "", + "name": "annotations org2" + } + ], + "label": "annotations org2", + "name": "Input dataset", + "outputs": [], + "position": { + "bottom": 573.8000030517578, + "height": 61.80000305175781, + "left": 216, + "right": 416, + "top": 512, + "width": 200, + "x": 216, + "y": 512 + }, + "tool_id": null, + "tool_state": "{\"optional\": false}", + "tool_version": null, + "type": "data_input", + "uuid": "c7f3b553-5cd2-46e6-a792-e864ab0d2459", + "workflow_outputs": [ + { + "label": null, + "output_name": "output", + "uuid": "e5ec0e3f-c56a-4c4c-b905-26b7e85808d3" + } + ] + }, + "5": { + "annotation": "", + "content_id": null, + "errors": null, + "id": 5, + "input_connections": {}, + "inputs": [ + { + "description": "", + "name": "proteins org2" + } + ], + "label": "proteins org2", + "name": "Input dataset", + "outputs": [], + "position": { + "bottom": 663.8000030517578, + "height": 61.80000305175781, + "left": 217, + "right": 417, + "top": 602, + "width": 200, + "x": 217, + "y": 602 + }, + "tool_id": null, + "tool_state": "{\"optional\": false}", + "tool_version": null, + "type": "data_input", + "uuid": "b5b82a8a-f73c-4183-bc0a-6956e5ad9d0a", + "workflow_outputs": [ + { + "label": null, + "output_name": "output", + "uuid": "92d17a84-9137-4437-add9-5b3e551bb6b2" + } + ] + }, + "6": { + "annotation": "", + "content_id": "toolshed.g2.bx.psu.edu/repos/gga/chado_feature_load_fasta/feature_load_fasta/2.3.4+galaxy0", + "errors": null, + "id": 6, + "input_connections": { + "fasta": { + "id": 0, + "output_name": "output" + } + }, + "inputs": [ + { + "description": "runtime parameter for tool Chado load fasta", + "name": "analysis_id" + }, + { + "description": "runtime parameter for tool Chado load fasta", + "name": "fasta" + }, + { + "description": "runtime parameter for tool Chado load fasta", + "name": "organism" + }, + { + "description": "runtime parameter for tool Chado load fasta", + "name": "wait_for" + } + ], + "label": "Chado load fasta org", + "name": "Chado load fasta", + "outputs": [ + { + "name": "results", + "type": "json" + } + ], + "position": { + "bottom": 307.3999938964844, + "height": 164.39999389648438, + "left": 501, + "right": 701, + "top": 143, + "width": 200, + "x": 501, + "y": 143 + }, + "post_job_actions": {}, + "tool_id": "toolshed.g2.bx.psu.edu/repos/gga/chado_feature_load_fasta/feature_load_fasta/2.3.4+galaxy0", + "tool_shed_repository": { + "changeset_revision": "ba4d07fbaf47", + "name": "chado_feature_load_fasta", + "owner": "gga", + "tool_shed": "toolshed.g2.bx.psu.edu" + }, + "tool_state": "{\"analysis_id\": {\"__class__\": \"RuntimeValue\"}, \"do_update\": \"false\", \"ext_db\": {\"db\": \"\", \"re_db_accession\": \"\"}, \"fasta\": {\"__class__\": \"RuntimeValue\"}, \"match_on_name\": \"false\", \"organism\": {\"__class__\": \"RuntimeValue\"}, \"psql_target\": {\"method\": \"remote\", \"__current_case__\": 0}, \"re_name\": \"\", \"re_uniquename\": \"\", \"relationships\": {\"rel_type\": \"none\", \"__current_case__\": 0}, \"sequence_type\": \"contig\", \"wait_for\": {\"__class__\": \"RuntimeValue\"}, \"__page__\": null, \"__rerun_remap_job_id__\": null}", + "tool_version": "2.3.4+galaxy0", + "type": "tool", + "uuid": "d15253d8-a673-4bfb-8051-e06d6017288e", + "workflow_outputs": [ + { + "label": null, + "output_name": "results", + "uuid": "c5b2da92-4326-46c1-94cb-e0301e2629f8" + } + ] + }, + "7": { + "annotation": "", + "content_id": "toolshed.g2.bx.psu.edu/repos/iuc/jbrowse/jbrowse/1.16.8+galaxy1", + "errors": null, + "id": 7, + "input_connections": { + "reference_genome|genome": { + "id": 0, + "output_name": "output" + }, + "track_groups_0|data_tracks_0|data_format|annotation": { + "id": 1, + "output_name": "output" + } + }, + "inputs": [], + "label": "JBrowse org1", + "name": "JBrowse", + "outputs": [ + { + "name": "output", + "type": "html" + } + ], + "position": { + "bottom": 868.1999969482422, + "height": 205.1999969482422, + "left": 513, + "right": 713, + "top": 663, + "width": 200, + "x": 513, + "y": 663 + }, + "post_job_actions": {}, + "tool_id": "toolshed.g2.bx.psu.edu/repos/iuc/jbrowse/jbrowse/1.16.8+galaxy1", + "tool_shed_repository": { + "changeset_revision": "fd5dbf0f732e", + "name": "jbrowse", + "owner": "iuc", + "tool_shed": "toolshed.g2.bx.psu.edu" + }, + "tool_state": "{\"action\": {\"action_select\": \"create\", \"__current_case__\": 0}, \"gencode\": \"1\", \"jbgen\": {\"defaultLocation\": \"\", \"trackPadding\": \"20\", \"shareLink\": \"true\", \"aboutDescription\": \"\", \"show_tracklist\": \"true\", \"show_nav\": \"true\", \"show_overview\": \"true\", \"show_menu\": \"true\", \"hideGenomeOptions\": \"false\"}, \"plugins\": {\"BlastView\": \"true\", \"ComboTrackSelector\": \"false\", \"GCContent\": \"false\"}, \"reference_genome\": {\"genome_type_select\": \"history\", \"__current_case__\": 1, \"genome\": {\"__class__\": \"ConnectedValue\"}}, \"standalone\": \"false\", \"track_groups\": [{\"__index__\": 0, \"category\": \"Annotation\", \"data_tracks\": [{\"__index__\": 0, \"data_format\": {\"data_format_select\": \"gene_calls\", \"__current_case__\": 2, \"annotation\": {\"__class__\": \"ConnectedValue\"}, \"match_part\": {\"match_part_select\": \"false\", \"__current_case__\": 1}, \"index\": \"false\", \"track_config\": {\"track_class\": \"NeatHTMLFeatures/View/Track/NeatFeatures\", \"__current_case__\": 3, \"html_options\": {\"topLevelFeatures\": \"\"}}, \"jbstyle\": {\"style_classname\": \"transcript\", \"style_label\": \"product,name,id\", \"style_description\": \"note,description\", \"style_height\": \"10px\", \"max_height\": \"600\"}, \"jbcolor_scale\": {\"color_score\": {\"color_score_select\": \"none\", \"__current_case__\": 0, \"color\": {\"color_select\": \"automatic\", \"__current_case__\": 0}}}, \"jb_custom_config\": {\"option\": []}, \"jbmenu\": {\"track_menu\": [{\"__index__\": 0, \"menu_action\": \"iframeDialog\", \"menu_label\": \"View transcript report\", \"menu_title\": \"Transcript {id}\", \"menu_url\": \"__MENU_URL_ORG1__\", \"menu_icon\": \"dijitIconBookmark\"}]}, \"track_visibility\": \"default_off\", \"override_apollo_plugins\": \"False\", \"override_apollo_drag\": \"False\"}}]}], \"uglyTestingHack\": \"\", \"__page__\": null, \"__rerun_remap_job_id__\": null}", + "tool_version": "1.16.8+galaxy1", + "type": "tool", + "uuid": "16f56647-e3c0-4bda-a126-9011cd3115b8", + "workflow_outputs": [ + { + "label": null, + "output_name": "output", + "uuid": "0a865401-1041-44b8-bd20-2c422bc695fa" + } + ] + }, + "8": { + "annotation": "", + "content_id": "toolshed.g2.bx.psu.edu/repos/iuc/jbrowse/jbrowse/1.16.8+galaxy1", + "errors": null, + "id": 8, + "input_connections": { + "reference_genome|genome": { + "id": 3, + "output_name": "output" + }, + "track_groups_0|data_tracks_0|data_format|annotation": { + "id": 4, + "output_name": "output" + } + }, + "inputs": [], + "label": "JBrowse org2", + "name": "JBrowse", + "outputs": [ + { + "name": "output", + "type": "html" + } + ], + "position": { + "bottom": 1084.1999969482422, + "height": 205.1999969482422, + "left": 518, + "right": 718, + "top": 879, + "width": 200, + "x": 518, + "y": 879 + }, + "post_job_actions": {}, + "tool_id": "toolshed.g2.bx.psu.edu/repos/iuc/jbrowse/jbrowse/1.16.8+galaxy1", + "tool_shed_repository": { + "changeset_revision": "fd5dbf0f732e", + "name": "jbrowse", + "owner": "iuc", + "tool_shed": "toolshed.g2.bx.psu.edu" + }, + "tool_state": "{\"action\": {\"action_select\": \"create\", \"__current_case__\": 0}, \"gencode\": \"1\", \"jbgen\": {\"defaultLocation\": \"\", \"trackPadding\": \"20\", \"shareLink\": \"true\", \"aboutDescription\": \"\", \"show_tracklist\": \"true\", \"show_nav\": \"true\", \"show_overview\": \"true\", \"show_menu\": \"true\", \"hideGenomeOptions\": \"false\"}, \"plugins\": {\"BlastView\": \"true\", \"ComboTrackSelector\": \"false\", \"GCContent\": \"false\"}, \"reference_genome\": {\"genome_type_select\": \"history\", \"__current_case__\": 1, \"genome\": {\"__class__\": \"ConnectedValue\"}}, \"standalone\": \"false\", \"track_groups\": [{\"__index__\": 0, \"category\": \"Annotation\", \"data_tracks\": [{\"__index__\": 0, \"data_format\": {\"data_format_select\": \"gene_calls\", \"__current_case__\": 2, \"annotation\": {\"__class__\": \"ConnectedValue\"}, \"match_part\": {\"match_part_select\": \"false\", \"__current_case__\": 1}, \"index\": \"false\", \"track_config\": {\"track_class\": \"NeatHTMLFeatures/View/Track/NeatFeatures\", \"__current_case__\": 3, \"html_options\": {\"topLevelFeatures\": \"\"}}, \"jbstyle\": {\"style_classname\": \"transcript\", \"style_label\": \"product,name,id\", \"style_description\": \"note,description\", \"style_height\": \"10px\", \"max_height\": \"600\"}, \"jbcolor_scale\": {\"color_score\": {\"color_score_select\": \"none\", \"__current_case__\": 0, \"color\": {\"color_select\": \"automatic\", \"__current_case__\": 0}}}, \"jb_custom_config\": {\"option\": []}, \"jbmenu\": {\"track_menu\": [{\"__index__\": 0, \"menu_action\": \"iframeDialog\", \"menu_label\": \"View transcript report\", \"menu_title\": \"Transcript {id}\", \"menu_url\": \"__MENU_URL_ORG2__\", \"menu_icon\": \"dijitIconBookmark\"}]}, \"track_visibility\": \"default_off\", \"override_apollo_plugins\": \"False\", \"override_apollo_drag\": \"False\"}}]}], \"uglyTestingHack\": \"\", \"__page__\": null, \"__rerun_remap_job_id__\": null}", + "tool_version": "1.16.8+galaxy1", + "type": "tool", + "uuid": "f4525181-f8b3-47ab-979f-fee0f46f13ba", + "workflow_outputs": [ + { + "label": null, + "output_name": "output", + "uuid": "4acd10c7-ef6d-4808-8aac-253d91733853" + } + ] + }, + "9": { + "annotation": "", + "content_id": "toolshed.g2.bx.psu.edu/repos/gga/chado_feature_load_gff/feature_load_gff/2.3.4+galaxy0", + "errors": null, + "id": 9, + "input_connections": { + "fasta": { + "id": 2, + "output_name": "output" + }, + "gff": { + "id": 1, + "output_name": "output" + }, + "wait_for": { + "id": 6, + "output_name": "results" + } + }, + "inputs": [ + { + "description": "runtime parameter for tool Chado load gff", + "name": "analysis_id" + }, + { + "description": "runtime parameter for tool Chado load gff", + "name": "fasta" + }, + { + "description": "runtime parameter for tool Chado load gff", + "name": "gff" + }, + { + "description": "runtime parameter for tool Chado load gff", + "name": "organism" + }, + { + "description": "runtime parameter for tool Chado load gff", + "name": "wait_for" + } + ], + "label": "Chado load gff ", + "name": "Chado load gff", + "outputs": [ + { + "name": "results", + "type": "txt" + } + ], + "position": { + "bottom": 317.3999938964844, + "height": 174.39999389648438, + "left": 787, + "right": 987, + "top": 143, + "width": 200, + "x": 787, + "y": 143 + }, + "post_job_actions": {}, + "tool_id": "toolshed.g2.bx.psu.edu/repos/gga/chado_feature_load_gff/feature_load_gff/2.3.4+galaxy0", + "tool_shed_repository": { + "changeset_revision": "e9a6d7568817", + "name": "chado_feature_load_gff", + "owner": "gga", + "tool_shed": "toolshed.g2.bx.psu.edu" + }, + "tool_state": "{\"add_only\": \"false\", \"analysis_id\": {\"__class__\": \"RuntimeValue\"}, \"fasta\": {\"__class__\": \"RuntimeValue\"}, \"gff\": {\"__class__\": \"RuntimeValue\"}, \"landmark_type\": \"contig\", \"no_seq_compute\": \"false\", \"organism\": {\"__class__\": \"RuntimeValue\"}, \"prot_naming\": {\"method\": \"regex\", \"__current_case__\": 1, \"re_protein_capture\": \"^mRNA(_.+)$\", \"re_protein\": \"prot\\\\1\"}, \"psql_target\": {\"method\": \"remote\", \"__current_case__\": 0}, \"wait_for\": {\"__class__\": \"RuntimeValue\"}, \"__page__\": null, \"__rerun_remap_job_id__\": null}", + "tool_version": "2.3.4+galaxy0", + "type": "tool", + "uuid": "281d98df-9a07-4beb-b3e8-a1ac72d8c0c9", + "workflow_outputs": [ + { + "label": null, + "output_name": "results", + "uuid": "e92a76f7-7b7e-429c-9aa7-cfc8ce5f0cd1" + } + ] + }, + "10": { + "annotation": "", + "content_id": "toolshed.g2.bx.psu.edu/repos/gga/jbrowse_to_container/jbrowse_to_container/0.5.1", + "errors": null, + "id": 10, + "input_connections": { + "organisms_0|jbrowse": { + "id": 7, + "output_name": "output" + }, + "organisms_1|jbrowse": { + "id": 8, + "output_name": "output" + } + }, + "inputs": [], + "label": null, + "name": "Add organisms to JBrowse container", + "outputs": [ + { + "name": "output", + "type": "html" + } + ], + "position": { + "bottom": 995.8000030517578, + "height": 184.8000030517578, + "left": 883, + "right": 1083, + "top": 811, + "width": 200, + "x": 883, + "y": 811 + }, + "post_job_actions": {}, + "tool_id": "toolshed.g2.bx.psu.edu/repos/gga/jbrowse_to_container/jbrowse_to_container/0.5.1", + "tool_shed_repository": { + "changeset_revision": "11033bdad2ca", + "name": "jbrowse_to_container", + "owner": "gga", + "tool_shed": "toolshed.g2.bx.psu.edu" + }, + "tool_state": "{\"organisms\": [{\"__index__\": 0, \"jbrowse\": {\"__class__\": \"ConnectedValue\"}, \"name\": \"__FULL_NAME_ORG1__\", \"advanced\": {\"unique_id\": \"__UNIQUE_ID_ORG1__\"}}, {\"__index__\": 1, \"jbrowse\": {\"__class__\": \"ConnectedValue\"}, \"name\": \"__FULL_NAME_ORG2__\", \"advanced\": {\"unique_id\": \"__UNIQUE_ID_ORG2__\"}}], \"__page__\": null, \"__rerun_remap_job_id__\": null}", + "tool_version": "0.5.1", + "type": "tool", + "uuid": "0f70a2d4-f599-4514-846d-16a558251a9a", + "workflow_outputs": [ + { + "label": null, + "output_name": "output", + "uuid": "0e8e64c8-7dc6-46f1-aca2-d4a6ffce638b" + } + ] + }, + "11": { + "annotation": "", + "content_id": "toolshed.g2.bx.psu.edu/repos/gga/tripal_feature_sync/feature_sync/3.2.1.0", + "errors": null, + "id": 11, + "input_connections": { + "wait_for": { + "id": 9, + "output_name": "results" + } + }, + "inputs": [ + { + "description": "runtime parameter for tool Synchronize features", + "name": "organism_id" + } + ], + "label": "Synchronize features org1", + "name": "Synchronize features", + "outputs": [ + { + "name": "results", + "type": "txt" + } + ], + "position": { + "bottom": 300.3999938964844, + "height": 154.39999389648438, + "left": 1069, + "right": 1269, + "top": 146, + "width": 200, + "x": 1069, + "y": 146 + }, + "post_job_actions": {}, + "tool_id": "toolshed.g2.bx.psu.edu/repos/gga/tripal_feature_sync/feature_sync/3.2.1.0", + "tool_shed_repository": { + "changeset_revision": "64e36c3f0dd6", + "name": "tripal_feature_sync", + "owner": "gga", + "tool_shed": "toolshed.g2.bx.psu.edu" + }, + "tool_state": "{\"organism_id\": {\"__class__\": \"RuntimeValue\"}, \"repeat_ids\": [], \"repeat_types\": [{\"__index__\": 0, \"types\": \"mRNA\"}, {\"__index__\": 1, \"types\": \"polypeptide\"}], \"wait_for\": {\"__class__\": \"ConnectedValue\"}, \"__page__\": null, \"__rerun_remap_job_id__\": null}", + "tool_version": "3.2.1.0", + "type": "tool", + "uuid": "7ed6c0a0-f36b-4a57-9fd8-4900af93b39c", + "workflow_outputs": [ + { + "label": null, + "output_name": "results", + "uuid": "ddd66d56-b81b-4ad7-a96d-baf365d8b85a" + } + ] + }, + "12": { + "annotation": "", + "content_id": "toolshed.g2.bx.psu.edu/repos/gga/chado_feature_load_fasta/feature_load_fasta/2.3.4+galaxy0", + "errors": null, + "id": 12, + "input_connections": { + "fasta": { + "id": 3, + "output_name": "output" + }, + "wait_for": { + "id": 11, + "output_name": "results" + } + }, + "inputs": [ + { + "description": "runtime parameter for tool Chado load fasta", + "name": "analysis_id" + }, + { + "description": "runtime parameter for tool Chado load fasta", + "name": "fasta" + }, + { + "description": "runtime parameter for tool Chado load fasta", + "name": "organism" + }, + { + "description": "runtime parameter for tool Chado load fasta", + "name": "wait_for" + } + ], + "label": "Chado load fasta org2", + "name": "Chado load fasta", + "outputs": [ + { + "name": "results", + "type": "json" + } + ], + "position": { + "bottom": 562.3999938964844, + "height": 164.39999389648438, + "left": 514, + "right": 714, + "top": 398, + "width": 200, + "x": 514, + "y": 398 + }, + "post_job_actions": {}, + "tool_id": "toolshed.g2.bx.psu.edu/repos/gga/chado_feature_load_fasta/feature_load_fasta/2.3.4+galaxy0", + "tool_shed_repository": { + "changeset_revision": "ba4d07fbaf47", + "name": "chado_feature_load_fasta", + "owner": "gga", + "tool_shed": "toolshed.g2.bx.psu.edu" + }, + "tool_state": "{\"analysis_id\": {\"__class__\": \"RuntimeValue\"}, \"do_update\": \"false\", \"ext_db\": {\"db\": \"\", \"re_db_accession\": \"\"}, \"fasta\": {\"__class__\": \"RuntimeValue\"}, \"match_on_name\": \"false\", \"organism\": {\"__class__\": \"RuntimeValue\"}, \"psql_target\": {\"method\": \"remote\", \"__current_case__\": 0}, \"re_name\": \"\", \"re_uniquename\": \"\", \"relationships\": {\"rel_type\": \"none\", \"__current_case__\": 0}, \"sequence_type\": \"contig\", \"wait_for\": {\"__class__\": \"RuntimeValue\"}, \"__page__\": null, \"__rerun_remap_job_id__\": null}", + "tool_version": "2.3.4+galaxy0", + "type": "tool", + "uuid": "049f96ea-620a-434c-b32a-886cac9174ef", + "workflow_outputs": [ + { + "label": null, + "output_name": "results", + "uuid": "e67a8a18-2a9d-4906-8960-3d76455de221" + } + ] + }, + "13": { + "annotation": "", + "content_id": "toolshed.g2.bx.psu.edu/repos/gga/chado_feature_load_gff/feature_load_gff/2.3.4+galaxy0", + "errors": null, + "id": 13, + "input_connections": { + "fasta": { + "id": 5, + "output_name": "output" + }, + "gff": { + "id": 4, + "output_name": "output" + }, + "wait_for": { + "id": 12, + "output_name": "results" + } + }, + "inputs": [ + { + "description": "runtime parameter for tool Chado load gff", + "name": "analysis_id" + }, + { + "description": "runtime parameter for tool Chado load gff", + "name": "fasta" + }, + { + "description": "runtime parameter for tool Chado load gff", + "name": "gff" + }, + { + "description": "runtime parameter for tool Chado load gff", + "name": "organism" + }, + { + "description": "runtime parameter for tool Chado load gff", + "name": "wait_for" + } + ], + "label": "Chado load gff org2", + "name": "Chado load gff", + "outputs": [ + { + "name": "results", + "type": "txt" + } + ], + "position": { + "bottom": 594.8000030517578, + "height": 194.8000030517578, + "left": 799, + "right": 999, + "top": 400, + "width": 200, + "x": 799, + "y": 400 + }, + "post_job_actions": {}, + "tool_id": "toolshed.g2.bx.psu.edu/repos/gga/chado_feature_load_gff/feature_load_gff/2.3.4+galaxy0", + "tool_shed_repository": { + "changeset_revision": "e9a6d7568817", + "name": "chado_feature_load_gff", + "owner": "gga", + "tool_shed": "toolshed.g2.bx.psu.edu" + }, + "tool_state": "{\"add_only\": \"false\", \"analysis_id\": {\"__class__\": \"RuntimeValue\"}, \"fasta\": {\"__class__\": \"RuntimeValue\"}, \"gff\": {\"__class__\": \"RuntimeValue\"}, \"landmark_type\": \"contig\", \"no_seq_compute\": \"false\", \"organism\": {\"__class__\": \"RuntimeValue\"}, \"prot_naming\": {\"method\": \"regex\", \"__current_case__\": 1, \"re_protein_capture\": \"^mRNA(_.+)$\", \"re_protein\": \"prot\\\\1\"}, \"psql_target\": {\"method\": \"remote\", \"__current_case__\": 0}, \"wait_for\": {\"__class__\": \"RuntimeValue\"}, \"__page__\": null, \"__rerun_remap_job_id__\": null}", + "tool_version": "2.3.4+galaxy0", + "type": "tool", + "uuid": "9293d4b6-7cad-4d6b-8143-e134f58c75fe", + "workflow_outputs": [ + { + "label": null, + "output_name": "results", + "uuid": "975db068-46e0-4c0a-a74f-80c7d2137ac8" + } + ] + }, + "14": { + "annotation": "", + "content_id": "toolshed.g2.bx.psu.edu/repos/gga/tripal_feature_sync/feature_sync/3.2.1.0", + "errors": null, + "id": 14, + "input_connections": { + "wait_for": { + "id": 13, + "output_name": "results" + } + }, + "inputs": [ + { + "description": "runtime parameter for tool Synchronize features", + "name": "organism_id" + } + ], + "label": "Synchronize features org2", + "name": "Synchronize features", + "outputs": [ + { + "name": "results", + "type": "txt" + } + ], + "position": { + "bottom": 560.3999938964844, + "height": 154.39999389648438, + "left": 1078, + "right": 1278, + "top": 406, + "width": 200, + "x": 1078, + "y": 406 + }, + "post_job_actions": {}, + "tool_id": "toolshed.g2.bx.psu.edu/repos/gga/tripal_feature_sync/feature_sync/3.2.1.0", + "tool_shed_repository": { + "changeset_revision": "64e36c3f0dd6", + "name": "tripal_feature_sync", + "owner": "gga", + "tool_shed": "toolshed.g2.bx.psu.edu" + }, + "tool_state": "{\"organism_id\": {\"__class__\": \"RuntimeValue\"}, \"repeat_ids\": [], \"repeat_types\": [{\"__index__\": 0, \"types\": \"mRNA\"}, {\"__index__\": 1, \"types\": \"polypeptide\"}], \"wait_for\": {\"__class__\": \"ConnectedValue\"}, \"__page__\": null, \"__rerun_remap_job_id__\": null}", + "tool_version": "3.2.1.0", + "type": "tool", + "uuid": "090a12c8-f931-4f3c-8853-9ae581c7c091", + "workflow_outputs": [ + { + "label": null, + "output_name": "results", + "uuid": "f51aa0a5-e2f8-41af-bb56-dbfe220e7fc8" + } + ] + }, + "15": { + "annotation": "", + "content_id": "toolshed.g2.bx.psu.edu/repos/gga/tripal_db_populate_mviews/db_populate_mviews/3.2.1.0", + "errors": null, + "id": 15, + "input_connections": { + "wait_for": { + "id": 14, + "output_name": "results" + } + }, + "inputs": [], + "label": null, + "name": "Populate materialized views", + "outputs": [ + { + "name": "results", + "type": "txt" + } + ], + "position": { + "bottom": 456.3999938964844, + "height": 154.39999389648438, + "left": 1362, + "right": 1562, + "top": 302, + "width": 200, + "x": 1362, + "y": 302 + }, + "post_job_actions": {}, + "tool_id": "toolshed.g2.bx.psu.edu/repos/gga/tripal_db_populate_mviews/db_populate_mviews/3.2.1.0", + "tool_shed_repository": { + "changeset_revision": "3c08f32a3dc1", + "name": "tripal_db_populate_mviews", + "owner": "gga", + "tool_shed": "toolshed.g2.bx.psu.edu" + }, + "tool_state": "{\"mview\": \"\", \"wait_for\": {\"__class__\": \"ConnectedValue\"}, \"__page__\": null, \"__rerun_remap_job_id__\": null}", + "tool_version": "3.2.1.0", + "type": "tool", + "uuid": "0a3bd8c6-240b-45f6-a90d-e88ff3703fa7", + "workflow_outputs": [ + { + "label": null, + "output_name": "results", + "uuid": "c7229bc3-fbe7-4c7f-9a08-8ad631d57766" + } + ] + }, + "16": { + "annotation": "", + "content_id": "toolshed.g2.bx.psu.edu/repos/gga/tripal_db_index/db_index/3.2.1.1", + "errors": null, + "id": 16, + "input_connections": { + "wait_for": { + "id": 15, + "output_name": "results" + } + }, + "inputs": [], + "label": null, + "name": "Index Tripal data", + "outputs": [ + { + "name": "results", + "type": "txt" + } + ], + "position": { + "bottom": 416.6000061035156, + "height": 113.60000610351562, + "left": 1638, + "right": 1838, + "top": 303, + "width": 200, + "x": 1638, + "y": 303 + }, + "post_job_actions": {}, + "tool_id": "toolshed.g2.bx.psu.edu/repos/gga/tripal_db_index/db_index/3.2.1.1", + "tool_shed_repository": { + "changeset_revision": "d55a39f12dda", + "name": "tripal_db_index", + "owner": "gga", + "tool_shed": "toolshed.g2.bx.psu.edu" + }, + "tool_state": "{\"expose\": {\"do_expose\": \"no\", \"__current_case__\": 0}, \"queues\": \"10\", \"table\": {\"mode\": \"website\", \"__current_case__\": 0}, \"tokenizer\": \"standard\", \"wait_for\": {\"__class__\": \"ConnectedValue\"}, \"__page__\": null, \"__rerun_remap_job_id__\": null}", + "tool_version": "3.2.1.1", + "type": "tool", + "uuid": "b01a554e-a43a-494e-876c-ebe134c4c48f", + "workflow_outputs": [ + { + "label": null, + "output_name": "results", + "uuid": "0547bccc-451b-4de1-9452-83fe86d8bc2a" + } + ] + } + }, + "tags": [], + "uuid": "ef44b9b7-f9df-454e-bbba-070299d056d1", + "version": 1 +} \ No newline at end of file diff --git a/workflows_phaeoexplorer/Galaxy-Workflow-chado_load_tripal_synchronize_jbrowse_2org_v4.ga b/workflows_phaeoexplorer/Galaxy-Workflow-chado_load_tripal_synchronize_jbrowse_2org_v4.ga new file mode 100644 index 0000000000000000000000000000000000000000..045e442219face7eed174cafcec206dd9de0ffa0 --- /dev/null +++ b/workflows_phaeoexplorer/Galaxy-Workflow-chado_load_tripal_synchronize_jbrowse_2org_v4.ga @@ -0,0 +1,881 @@ +{ + "a_galaxy_workflow": "true", + "annotation": "", + "format-version": "0.1", + "name": "chado_load_tripal_synchronize_jbrowse_2org_v4", + "steps": { + "0": { + "annotation": "", + "content_id": null, + "errors": null, + "id": 0, + "input_connections": {}, + "inputs": [ + { + "description": "", + "name": "annotations org1" + } + ], + "label": "annotations org1", + "name": "Input dataset", + "outputs": [], + "position": { + "bottom": 64.80000305175781, + "height": 61.80000305175781, + "left": 233, + "right": 433, + "top": 3, + "width": 200, + "x": 233, + "y": 3 + }, + "tool_id": null, + "tool_state": "{\"optional\": false}", + "tool_version": null, + "type": "data_input", + "uuid": "c82b756f-2ef5-41d5-9e14-eb9a7ef942c7", + "workflow_outputs": [ + { + "label": null, + "output_name": "output", + "uuid": "cd6ee602-9669-4542-820c-c4655bc573b0" + } + ] + }, + "1": { + "annotation": "", + "content_id": null, + "errors": null, + "id": 1, + "input_connections": {}, + "inputs": [ + { + "description": "", + "name": "genome org1" + } + ], + "label": "genome org1", + "name": "Input dataset", + "outputs": [], + "position": { + "bottom": -25.199996948242188, + "height": 61.80000305175781, + "left": 234, + "right": 434, + "top": -87, + "width": 200, + "x": 234, + "y": -87 + }, + "tool_id": null, + "tool_state": "{\"optional\": false}", + "tool_version": null, + "type": "data_input", + "uuid": "59f823c8-fd7c-441c-84b6-21c367ba12f6", + "workflow_outputs": [ + { + "label": null, + "output_name": "output", + "uuid": "6b57ff4d-58c7-4a22-a2c7-66acca4ebe33" + } + ] + }, + "2": { + "annotation": "", + "content_id": null, + "errors": null, + "id": 2, + "input_connections": {}, + "inputs": [ + { + "description": "", + "name": "proteins org1" + } + ], + "label": "proteins org1", + "name": "Input dataset", + "outputs": [], + "position": { + "bottom": 159.8000030517578, + "height": 61.80000305175781, + "left": 234, + "right": 434, + "top": 98, + "width": 200, + "x": 234, + "y": 98 + }, + "tool_id": null, + "tool_state": "{\"optional\": false}", + "tool_version": null, + "type": "data_input", + "uuid": "0a0ac416-68b8-4c48-980e-04c6230c721e", + "workflow_outputs": [ + { + "label": null, + "output_name": "output", + "uuid": "64cd0c64-5d21-400f-a3c8-3a035978be06" + } + ] + }, + "3": { + "annotation": "", + "content_id": null, + "errors": null, + "id": 3, + "input_connections": {}, + "inputs": [ + { + "description": "", + "name": "genome org2" + } + ], + "label": "genome org2", + "name": "Input dataset", + "outputs": [], + "position": { + "bottom": 249.8000030517578, + "height": 61.80000305175781, + "left": 233, + "right": 433, + "top": 188, + "width": 200, + "x": 233, + "y": 188 + }, + "tool_id": null, + "tool_state": "{\"optional\": false}", + "tool_version": null, + "type": "data_input", + "uuid": "ddd5f0f8-9e14-42f0-a866-2971d02e5435", + "workflow_outputs": [ + { + "label": null, + "output_name": "output", + "uuid": "bcf5bb6d-9796-4e37-bcf8-ac3d90efd7bf" + } + ] + }, + "4": { + "annotation": "", + "content_id": null, + "errors": null, + "id": 4, + "input_connections": {}, + "inputs": [ + { + "description": "", + "name": "annotations org2" + } + ], + "label": "annotations org2", + "name": "Input dataset", + "outputs": [], + "position": { + "bottom": 343.8000030517578, + "height": 61.80000305175781, + "left": 235, + "right": 435, + "top": 282, + "width": 200, + "x": 235, + "y": 282 + }, + "tool_id": null, + "tool_state": "{\"optional\": false}", + "tool_version": null, + "type": "data_input", + "uuid": "c7f3b553-5cd2-46e6-a792-e864ab0d2459", + "workflow_outputs": [ + { + "label": null, + "output_name": "output", + "uuid": "2de7390d-d075-4122-80ee-63217adf4f24" + } + ] + }, + "5": { + "annotation": "", + "content_id": null, + "errors": null, + "id": 5, + "input_connections": {}, + "inputs": [ + { + "description": "", + "name": "proteins org2" + } + ], + "label": "proteins org2", + "name": "Input dataset", + "outputs": [], + "position": { + "bottom": 433.8000030517578, + "height": 61.80000305175781, + "left": 236, + "right": 436, + "top": 372, + "width": 200, + "x": 236, + "y": 372 + }, + "tool_id": null, + "tool_state": "{\"optional\": false}", + "tool_version": null, + "type": "data_input", + "uuid": "b5b82a8a-f73c-4183-bc0a-6956e5ad9d0a", + "workflow_outputs": [ + { + "label": null, + "output_name": "output", + "uuid": "84bb8de5-0039-40bc-98b0-2524a8958443" + } + ] + }, + "6": { + "annotation": "", + "content_id": "toolshed.g2.bx.psu.edu/repos/gga/chado_feature_load_fasta/feature_load_fasta/2.3.4+galaxy0", + "errors": null, + "id": 6, + "input_connections": { + "fasta": { + "id": 1, + "output_name": "output" + } + }, + "inputs": [ + { + "description": "runtime parameter for tool Chado load fasta", + "name": "analysis_id" + }, + { + "description": "runtime parameter for tool Chado load fasta", + "name": "organism" + }, + { + "description": "runtime parameter for tool Chado load fasta", + "name": "wait_for" + } + ], + "label": "Chado load fasta org", + "name": "Chado load fasta", + "outputs": [ + { + "name": "results", + "type": "json" + } + ], + "position": { + "bottom": 77.39999389648438, + "height": 164.39999389648438, + "left": 519, + "right": 719, + "top": -87, + "width": 200, + "x": 519, + "y": -87 + }, + "post_job_actions": {}, + "tool_id": "toolshed.g2.bx.psu.edu/repos/gga/chado_feature_load_fasta/feature_load_fasta/2.3.4+galaxy0", + "tool_shed_repository": { + "changeset_revision": "ba4d07fbaf47", + "name": "chado_feature_load_fasta", + "owner": "gga", + "tool_shed": "toolshed.g2.bx.psu.edu" + }, + "tool_state": "{\"analysis_id\": {\"__class__\": \"RuntimeValue\"}, \"do_update\": \"false\", \"ext_db\": {\"db\": \"\", \"re_db_accession\": \"\"}, \"fasta\": {\"__class__\": \"ConnectedValue\"}, \"match_on_name\": \"false\", \"organism\": {\"__class__\": \"RuntimeValue\"}, \"psql_target\": {\"method\": \"remote\", \"__current_case__\": 0}, \"re_name\": \"\", \"re_uniquename\": \"\", \"relationships\": {\"rel_type\": \"none\", \"__current_case__\": 0}, \"sequence_type\": \"contig\", \"wait_for\": {\"__class__\": \"RuntimeValue\"}, \"__page__\": null, \"__rerun_remap_job_id__\": null}", + "tool_version": "2.3.4+galaxy0", + "type": "tool", + "uuid": "d706dd73-5dc7-4c06-8c56-825aa81394a1", + "workflow_outputs": [ + { + "label": null, + "output_name": "results", + "uuid": "b058f83f-8d18-4ef4-b3a6-bae86dc0f9f6" + } + ] + }, + "7": { + "annotation": "", + "content_id": "toolshed.g2.bx.psu.edu/repos/iuc/jbrowse/jbrowse/1.16.11+galaxy0", + "errors": null, + "id": 7, + "input_connections": { + "reference_genome|genome": { + "id": 1, + "output_name": "output" + }, + "track_groups_0|data_tracks_0|data_format|annotation": { + "id": 0, + "output_name": "output" + } + }, + "inputs": [ + { + "description": "runtime parameter for tool JBrowse", + "name": "reference_genome" + } + ], + "label": "JBrowse org1", + "name": "JBrowse", + "outputs": [ + { + "name": "output", + "type": "html" + } + ], + "position": { + "bottom": 638.1999969482422, + "height": 205.1999969482422, + "left": 532, + "right": 732, + "top": 433, + "width": 200, + "x": 532, + "y": 433 + }, + "post_job_actions": {}, + "tool_id": "toolshed.g2.bx.psu.edu/repos/iuc/jbrowse/jbrowse/1.16.11+galaxy0", + "tool_shed_repository": { + "changeset_revision": "4542035c1075", + "name": "jbrowse", + "owner": "iuc", + "tool_shed": "toolshed.g2.bx.psu.edu" + }, + "tool_state": "{\"action\": {\"action_select\": \"create\", \"__current_case__\": 0}, \"gencode\": \"1\", \"jbgen\": {\"defaultLocation\": \"\", \"trackPadding\": \"20\", \"shareLink\": \"true\", \"aboutDescription\": \"\", \"show_tracklist\": \"true\", \"show_nav\": \"true\", \"show_overview\": \"true\", \"show_menu\": \"true\", \"hideGenomeOptions\": \"false\"}, \"plugins\": {\"BlastView\": \"true\", \"ComboTrackSelector\": \"false\", \"GCContent\": \"false\"}, \"reference_genome\": {\"genome_type_select\": \"history\", \"__current_case__\": 1, \"genome\": {\"__class__\": \"RuntimeValue\"}}, \"standalone\": \"complete\", \"track_groups\": [{\"__index__\": 0, \"category\": \"Annotation\", \"data_tracks\": [{\"__index__\": 0, \"data_format\": {\"data_format_select\": \"gene_calls\", \"__current_case__\": 2, \"annotation\": {\"__class__\": \"RuntimeValue\"}, \"match_part\": {\"match_part_select\": \"false\", \"__current_case__\": 1}, \"index\": \"false\", \"track_config\": {\"track_class\": \"NeatHTMLFeatures/View/Track/NeatFeatures\", \"__current_case__\": 3, \"html_options\": {\"topLevelFeatures\": \"\"}}, \"jbstyle\": {\"style_classname\": \"transcript\", \"style_label\": \"product,name,id\", \"style_description\": \"note,description\", \"style_height\": \"10px\", \"max_height\": \"600\"}, \"jbcolor_scale\": {\"color_score\": {\"color_score_select\": \"none\", \"__current_case__\": 0, \"color\": {\"color_select\": \"automatic\", \"__current_case__\": 0}}}, \"jb_custom_config\": {\"option\": []}, \"jbmenu\": {\"track_menu\": [{\"__index__\": 0, \"menu_action\": \"iframeDialog\", \"menu_label\": \"View transcript report\", \"menu_title\": \"Transcript {id}\", \"menu_url\": \"__MENU_URL_ORG1__\", \"menu_icon\": \"dijitIconBookmark\"}]}, \"track_visibility\": \"default_off\", \"override_apollo_plugins\": \"False\", \"override_apollo_drag\": \"False\"}}]}], \"uglyTestingHack\": \"\", \"__page__\": null, \"__rerun_remap_job_id__\": null}", + "tool_version": "1.16.11+galaxy0", + "type": "tool", + "uuid": "ab159045-e72e-46cc-9e9f-d7ad2b50687e", + "workflow_outputs": [ + { + "label": null, + "output_name": "output", + "uuid": "e93e82c7-7a3b-4219-a2f6-09afbe60b1e0" + } + ] + }, + "8": { + "annotation": "", + "content_id": "toolshed.g2.bx.psu.edu/repos/iuc/jbrowse/jbrowse/1.16.11+galaxy0", + "errors": null, + "id": 8, + "input_connections": { + "reference_genome|genome": { + "id": 3, + "output_name": "output" + }, + "track_groups_0|data_tracks_0|data_format|annotation": { + "id": 4, + "output_name": "output" + } + }, + "inputs": [ + { + "description": "runtime parameter for tool JBrowse", + "name": "reference_genome" + } + ], + "label": "JBrowse org2", + "name": "JBrowse", + "outputs": [ + { + "name": "output", + "type": "html" + } + ], + "position": { + "bottom": 854.1999969482422, + "height": 205.1999969482422, + "left": 537, + "right": 737, + "top": 649, + "width": 200, + "x": 537, + "y": 649 + }, + "post_job_actions": {}, + "tool_id": "toolshed.g2.bx.psu.edu/repos/iuc/jbrowse/jbrowse/1.16.11+galaxy0", + "tool_shed_repository": { + "changeset_revision": "4542035c1075", + "name": "jbrowse", + "owner": "iuc", + "tool_shed": "toolshed.g2.bx.psu.edu" + }, + "tool_state": "{\"action\": {\"action_select\": \"create\", \"__current_case__\": 0}, \"gencode\": \"1\", \"jbgen\": {\"defaultLocation\": \"\", \"trackPadding\": \"20\", \"shareLink\": \"true\", \"aboutDescription\": \"\", \"show_tracklist\": \"true\", \"show_nav\": \"true\", \"show_overview\": \"true\", \"show_menu\": \"true\", \"hideGenomeOptions\": \"false\"}, \"plugins\": {\"BlastView\": \"true\", \"ComboTrackSelector\": \"false\", \"GCContent\": \"false\"}, \"reference_genome\": {\"genome_type_select\": \"history\", \"__current_case__\": 1, \"genome\": {\"__class__\": \"RuntimeValue\"}}, \"standalone\": \"complete\", \"track_groups\": [{\"__index__\": 0, \"category\": \"Annotation\", \"data_tracks\": [{\"__index__\": 0, \"data_format\": {\"data_format_select\": \"gene_calls\", \"__current_case__\": 2, \"annotation\": {\"__class__\": \"RuntimeValue\"}, \"match_part\": {\"match_part_select\": \"false\", \"__current_case__\": 1}, \"index\": \"false\", \"track_config\": {\"track_class\": \"NeatHTMLFeatures/View/Track/NeatFeatures\", \"__current_case__\": 3, \"html_options\": {\"topLevelFeatures\": \"\"}}, \"jbstyle\": {\"style_classname\": \"transcript\", \"style_label\": \"product,name,id\", \"style_description\": \"note,description\", \"style_height\": \"10px\", \"max_height\": \"600\"}, \"jbcolor_scale\": {\"color_score\": {\"color_score_select\": \"none\", \"__current_case__\": 0, \"color\": {\"color_select\": \"automatic\", \"__current_case__\": 0}}}, \"jb_custom_config\": {\"option\": []}, \"jbmenu\": {\"track_menu\": [{\"__index__\": 0, \"menu_action\": \"iframeDialog\", \"menu_label\": \"View transcript report\", \"menu_title\": \"Transcript {id}\", \"menu_url\": \"__MENU_URL_ORG2__\", \"menu_icon\": \"dijitIconBookmark\"}]}, \"track_visibility\": \"default_off\", \"override_apollo_plugins\": \"False\", \"override_apollo_drag\": \"False\"}}]}], \"uglyTestingHack\": \"\", \"__page__\": null, \"__rerun_remap_job_id__\": null}", + "tool_version": "1.16.11+galaxy0", + "type": "tool", + "uuid": "a06b4426-5546-4393-aa6a-23a0c490eb8f", + "workflow_outputs": [ + { + "label": null, + "output_name": "output", + "uuid": "a44c76ce-c7f4-4f24-bc96-be90aafc9efb" + } + ] + }, + "9": { + "annotation": "", + "content_id": "toolshed.g2.bx.psu.edu/repos/gga/chado_feature_load_gff/feature_load_gff/2.3.4+galaxy0", + "errors": null, + "id": 9, + "input_connections": { + "fasta": { + "id": 2, + "output_name": "output" + }, + "gff": { + "id": 0, + "output_name": "output" + }, + "wait_for": { + "id": 6, + "output_name": "results" + } + }, + "inputs": [ + { + "description": "runtime parameter for tool Chado load gff", + "name": "analysis_id" + }, + { + "description": "runtime parameter for tool Chado load gff", + "name": "organism" + } + ], + "label": "Chado load gff ", + "name": "Chado load gff", + "outputs": [ + { + "name": "results", + "type": "txt" + } + ], + "position": { + "bottom": 87.39999389648438, + "height": 174.39999389648438, + "left": 806, + "right": 1006, + "top": -87, + "width": 200, + "x": 806, + "y": -87 + }, + "post_job_actions": {}, + "tool_id": "toolshed.g2.bx.psu.edu/repos/gga/chado_feature_load_gff/feature_load_gff/2.3.4+galaxy0", + "tool_shed_repository": { + "changeset_revision": "e9a6d7568817", + "name": "chado_feature_load_gff", + "owner": "gga", + "tool_shed": "toolshed.g2.bx.psu.edu" + }, + "tool_state": "{\"add_only\": \"false\", \"analysis_id\": {\"__class__\": \"RuntimeValue\"}, \"fasta\": {\"__class__\": \"ConnectedValue\"}, \"gff\": {\"__class__\": \"ConnectedValue\"}, \"landmark_type\": \"contig\", \"no_seq_compute\": \"false\", \"organism\": {\"__class__\": \"RuntimeValue\"}, \"prot_naming\": {\"method\": \"regex\", \"__current_case__\": 1, \"re_protein_capture\": \"^mRNA(_.+)$\", \"re_protein\": \"prot\\\\1\"}, \"psql_target\": {\"method\": \"remote\", \"__current_case__\": 0}, \"wait_for\": {\"__class__\": \"ConnectedValue\"}, \"__page__\": null, \"__rerun_remap_job_id__\": null}", + "tool_version": "2.3.4+galaxy0", + "type": "tool", + "uuid": "f93c9145-6484-4c19-ab15-97f042fa4f1e", + "workflow_outputs": [ + { + "label": null, + "output_name": "results", + "uuid": "4b935868-ea0a-4ccf-9fe4-90d8fbe280df" + } + ] + }, + "10": { + "annotation": "", + "content_id": "toolshed.g2.bx.psu.edu/repos/gga/jbrowse_to_container/jbrowse_to_container/0.5.1", + "errors": null, + "id": 10, + "input_connections": { + "organisms_0|jbrowse": { + "id": 7, + "output_name": "output" + }, + "organisms_1|jbrowse": { + "id": 8, + "output_name": "output" + } + }, + "inputs": [], + "label": null, + "name": "Add organisms to JBrowse container", + "outputs": [ + { + "name": "output", + "type": "html" + } + ], + "position": { + "bottom": 765.8000030517578, + "height": 184.8000030517578, + "left": 902, + "right": 1102, + "top": 581, + "width": 200, + "x": 902, + "y": 581 + }, + "post_job_actions": {}, + "tool_id": "toolshed.g2.bx.psu.edu/repos/gga/jbrowse_to_container/jbrowse_to_container/0.5.1", + "tool_shed_repository": { + "changeset_revision": "11033bdad2ca", + "name": "jbrowse_to_container", + "owner": "gga", + "tool_shed": "toolshed.g2.bx.psu.edu" + }, + "tool_state": "{\"organisms\": [{\"__index__\": 0, \"jbrowse\": {\"__class__\": \"RuntimeValue\"}, \"name\": \"__DISPLAY_NAME_ORG1__\", \"advanced\": {\"unique_id\": \"__UNIQUE_ID_ORG1__\"}}, {\"__index__\": 1, \"jbrowse\": {\"__class__\": \"RuntimeValue\"}, \"name\": \"__DISPLAY_NAME_ORG2__\", \"advanced\": {\"unique_id\": \"__UNIQUE_ID_ORG2__\"}}], \"__page__\": null, \"__rerun_remap_job_id__\": null}", + "tool_version": "0.5.1", + "type": "tool", + "uuid": "2c90cac2-d1ef-4d3b-bf43-d01e27059823", + "workflow_outputs": [ + { + "label": null, + "output_name": "output", + "uuid": "91511518-e85c-4a29-a929-77334cad6a1c" + } + ] + }, + "11": { + "annotation": "", + "content_id": "toolshed.g2.bx.psu.edu/repos/gga/tripal_feature_sync/feature_sync/3.2.1.0", + "errors": null, + "id": 11, + "input_connections": { + "wait_for": { + "id": 9, + "output_name": "results" + } + }, + "inputs": [ + { + "description": "runtime parameter for tool Synchronize features", + "name": "organism_id" + } + ], + "label": "Synchronize features org1", + "name": "Synchronize features", + "outputs": [ + { + "name": "results", + "type": "txt" + } + ], + "position": { + "bottom": 70.39999389648438, + "height": 154.39999389648438, + "left": 1088, + "right": 1288, + "top": -84, + "width": 200, + "x": 1088, + "y": -84 + }, + "post_job_actions": {}, + "tool_id": "toolshed.g2.bx.psu.edu/repos/gga/tripal_feature_sync/feature_sync/3.2.1.0", + "tool_shed_repository": { + "changeset_revision": "64e36c3f0dd6", + "name": "tripal_feature_sync", + "owner": "gga", + "tool_shed": "toolshed.g2.bx.psu.edu" + }, + "tool_state": "{\"organism_id\": {\"__class__\": \"RuntimeValue\"}, \"repeat_ids\": [], \"repeat_types\": [{\"__index__\": 0, \"types\": \"mRNA\"}, {\"__index__\": 1, \"types\": \"polypeptide\"}], \"wait_for\": {\"__class__\": \"ConnectedValue\"}, \"__page__\": null, \"__rerun_remap_job_id__\": null}", + "tool_version": "3.2.1.0", + "type": "tool", + "uuid": "7ed6c0a0-f36b-4a57-9fd8-4900af93b39c", + "workflow_outputs": [ + { + "label": null, + "output_name": "results", + "uuid": "ef7589b7-bd80-48c5-bd44-f275d49ea324" + } + ] + }, + "12": { + "annotation": "", + "content_id": "toolshed.g2.bx.psu.edu/repos/gga/chado_feature_load_fasta/feature_load_fasta/2.3.4+galaxy0", + "errors": null, + "id": 12, + "input_connections": { + "fasta": { + "id": 3, + "output_name": "output" + }, + "wait_for": { + "id": 11, + "output_name": "results" + } + }, + "inputs": [ + { + "description": "runtime parameter for tool Chado load fasta", + "name": "analysis_id" + }, + { + "description": "runtime parameter for tool Chado load fasta", + "name": "organism" + } + ], + "label": "Chado load fasta org2", + "name": "Chado load fasta", + "outputs": [ + { + "name": "results", + "type": "json" + } + ], + "position": { + "bottom": 332.3999938964844, + "height": 164.39999389648438, + "left": 533, + "right": 733, + "top": 168, + "width": 200, + "x": 533, + "y": 168 + }, + "post_job_actions": {}, + "tool_id": "toolshed.g2.bx.psu.edu/repos/gga/chado_feature_load_fasta/feature_load_fasta/2.3.4+galaxy0", + "tool_shed_repository": { + "changeset_revision": "ba4d07fbaf47", + "name": "chado_feature_load_fasta", + "owner": "gga", + "tool_shed": "toolshed.g2.bx.psu.edu" + }, + "tool_state": "{\"analysis_id\": {\"__class__\": \"RuntimeValue\"}, \"do_update\": \"false\", \"ext_db\": {\"db\": \"\", \"re_db_accession\": \"\"}, \"fasta\": {\"__class__\": \"ConnectedValue\"}, \"match_on_name\": \"false\", \"organism\": {\"__class__\": \"RuntimeValue\"}, \"psql_target\": {\"method\": \"remote\", \"__current_case__\": 0}, \"re_name\": \"\", \"re_uniquename\": \"\", \"relationships\": {\"rel_type\": \"none\", \"__current_case__\": 0}, \"sequence_type\": \"contig\", \"wait_for\": {\"__class__\": \"ConnectedValue\"}, \"__page__\": null, \"__rerun_remap_job_id__\": null}", + "tool_version": "2.3.4+galaxy0", + "type": "tool", + "uuid": "c860eb94-43cb-468e-ad5e-257a519b66ca", + "workflow_outputs": [ + { + "label": null, + "output_name": "results", + "uuid": "a742ac3c-b152-464e-bcfd-5139efd90599" + } + ] + }, + "13": { + "annotation": "", + "content_id": "toolshed.g2.bx.psu.edu/repos/gga/chado_feature_load_gff/feature_load_gff/2.3.4+galaxy0", + "errors": null, + "id": 13, + "input_connections": { + "fasta": { + "id": 5, + "output_name": "output" + }, + "gff": { + "id": 4, + "output_name": "output" + }, + "wait_for": { + "id": 12, + "output_name": "results" + } + }, + "inputs": [ + { + "description": "runtime parameter for tool Chado load gff", + "name": "analysis_id" + }, + { + "description": "runtime parameter for tool Chado load gff", + "name": "organism" + } + ], + "label": "Chado load gff org2", + "name": "Chado load gff", + "outputs": [ + { + "name": "results", + "type": "txt" + } + ], + "position": { + "bottom": 364.8000030517578, + "height": 194.8000030517578, + "left": 818, + "right": 1018, + "top": 170, + "width": 200, + "x": 818, + "y": 170 + }, + "post_job_actions": {}, + "tool_id": "toolshed.g2.bx.psu.edu/repos/gga/chado_feature_load_gff/feature_load_gff/2.3.4+galaxy0", + "tool_shed_repository": { + "changeset_revision": "e9a6d7568817", + "name": "chado_feature_load_gff", + "owner": "gga", + "tool_shed": "toolshed.g2.bx.psu.edu" + }, + "tool_state": "{\"add_only\": \"false\", \"analysis_id\": {\"__class__\": \"RuntimeValue\"}, \"fasta\": {\"__class__\": \"ConnectedValue\"}, \"gff\": {\"__class__\": \"ConnectedValue\"}, \"landmark_type\": \"contig\", \"no_seq_compute\": \"false\", \"organism\": {\"__class__\": \"RuntimeValue\"}, \"prot_naming\": {\"method\": \"regex\", \"__current_case__\": 1, \"re_protein_capture\": \"^mRNA(_.+)$\", \"re_protein\": \"prot\\\\1\"}, \"psql_target\": {\"method\": \"remote\", \"__current_case__\": 0}, \"wait_for\": {\"__class__\": \"ConnectedValue\"}, \"__page__\": null, \"__rerun_remap_job_id__\": null}", + "tool_version": "2.3.4+galaxy0", + "type": "tool", + "uuid": "a4d0333d-bcbc-44bb-88ff-7bba058a50d8", + "workflow_outputs": [ + { + "label": null, + "output_name": "results", + "uuid": "0d07b3da-3317-4d92-bb85-c301ee2d0753" + } + ] + }, + "14": { + "annotation": "", + "content_id": "toolshed.g2.bx.psu.edu/repos/gga/tripal_feature_sync/feature_sync/3.2.1.0", + "errors": null, + "id": 14, + "input_connections": { + "wait_for": { + "id": 13, + "output_name": "results" + } + }, + "inputs": [ + { + "description": "runtime parameter for tool Synchronize features", + "name": "organism_id" + } + ], + "label": "Synchronize features org2", + "name": "Synchronize features", + "outputs": [ + { + "name": "results", + "type": "txt" + } + ], + "position": { + "bottom": 330.3999938964844, + "height": 154.39999389648438, + "left": 1097, + "right": 1297, + "top": 176, + "width": 200, + "x": 1097, + "y": 176 + }, + "post_job_actions": {}, + "tool_id": "toolshed.g2.bx.psu.edu/repos/gga/tripal_feature_sync/feature_sync/3.2.1.0", + "tool_shed_repository": { + "changeset_revision": "64e36c3f0dd6", + "name": "tripal_feature_sync", + "owner": "gga", + "tool_shed": "toolshed.g2.bx.psu.edu" + }, + "tool_state": "{\"organism_id\": {\"__class__\": \"RuntimeValue\"}, \"repeat_ids\": [], \"repeat_types\": [{\"__index__\": 0, \"types\": \"mRNA\"}, {\"__index__\": 1, \"types\": \"polypeptide\"}], \"wait_for\": {\"__class__\": \"ConnectedValue\"}, \"__page__\": null, \"__rerun_remap_job_id__\": null}", + "tool_version": "3.2.1.0", + "type": "tool", + "uuid": "090a12c8-f931-4f3c-8853-9ae581c7c091", + "workflow_outputs": [ + { + "label": null, + "output_name": "results", + "uuid": "b896a431-ea9b-42e7-a2e8-99c5c43a6069" + } + ] + }, + "15": { + "annotation": "", + "content_id": "toolshed.g2.bx.psu.edu/repos/gga/tripal_db_populate_mviews/db_populate_mviews/3.2.1.0", + "errors": null, + "id": 15, + "input_connections": { + "wait_for": { + "id": 14, + "output_name": "results" + } + }, + "inputs": [], + "label": null, + "name": "Populate materialized views", + "outputs": [ + { + "name": "results", + "type": "txt" + } + ], + "position": { + "bottom": 226.39999389648438, + "height": 154.39999389648438, + "left": 1381, + "right": 1581, + "top": 72, + "width": 200, + "x": 1381, + "y": 72 + }, + "post_job_actions": {}, + "tool_id": "toolshed.g2.bx.psu.edu/repos/gga/tripal_db_populate_mviews/db_populate_mviews/3.2.1.0", + "tool_shed_repository": { + "changeset_revision": "3c08f32a3dc1", + "name": "tripal_db_populate_mviews", + "owner": "gga", + "tool_shed": "toolshed.g2.bx.psu.edu" + }, + "tool_state": "{\"mview\": \"\", \"wait_for\": {\"__class__\": \"ConnectedValue\"}, \"__page__\": null, \"__rerun_remap_job_id__\": null}", + "tool_version": "3.2.1.0", + "type": "tool", + "uuid": "0a3bd8c6-240b-45f6-a90d-e88ff3703fa7", + "workflow_outputs": [ + { + "label": null, + "output_name": "results", + "uuid": "ba904db6-a017-4090-8d13-e3076f0052c4" + } + ] + }, + "16": { + "annotation": "", + "content_id": "toolshed.g2.bx.psu.edu/repos/gga/tripal_db_index/db_index/3.2.1.1", + "errors": null, + "id": 16, + "input_connections": { + "wait_for": { + "id": 15, + "output_name": "results" + } + }, + "inputs": [], + "label": null, + "name": "Index Tripal data", + "outputs": [ + { + "name": "results", + "type": "txt" + } + ], + "position": { + "bottom": 186.60000610351562, + "height": 113.60000610351562, + "left": 1657, + "right": 1857, + "top": 73, + "width": 200, + "x": 1657, + "y": 73 + }, + "post_job_actions": {}, + "tool_id": "toolshed.g2.bx.psu.edu/repos/gga/tripal_db_index/db_index/3.2.1.1", + "tool_shed_repository": { + "changeset_revision": "d55a39f12dda", + "name": "tripal_db_index", + "owner": "gga", + "tool_shed": "toolshed.g2.bx.psu.edu" + }, + "tool_state": "{\"expose\": {\"do_expose\": \"no\", \"__current_case__\": 0}, \"queues\": \"10\", \"table\": {\"mode\": \"website\", \"__current_case__\": 0}, \"tokenizer\": \"standard\", \"wait_for\": {\"__class__\": \"ConnectedValue\"}, \"__page__\": null, \"__rerun_remap_job_id__\": null}", + "tool_version": "3.2.1.1", + "type": "tool", + "uuid": "b01a554e-a43a-494e-876c-ebe134c4c48f", + "workflow_outputs": [ + { + "label": null, + "output_name": "results", + "uuid": "f431f362-7ad4-484f-bce7-b5d290b782d6" + } + ] + } + }, + "tags": [], + "uuid": "43552d52-cef0-4601-8a5f-0a9e209c1426", + "version": 1 +} \ No newline at end of file