From 6ba67a53b4908cc004f6999067310b2d18463527 Mon Sep 17 00:00:00 2001 From: Arthur Le Bars <arthur.le-bars@sb-roscoff.fr> Date: Fri, 30 Apr 2021 09:06:20 +0200 Subject: [PATCH] workflow 2org --- run_workflow_phaeoexplorer.py | 1173 +++++++++++++++++++++------------ speciesData.py | 13 +- 2 files changed, 763 insertions(+), 423 deletions(-) diff --git a/run_workflow_phaeoexplorer.py b/run_workflow_phaeoexplorer.py index 753803f..36c4de9 100755 --- a/run_workflow_phaeoexplorer.py +++ b/run_workflow_phaeoexplorer.py @@ -115,7 +115,7 @@ class RunWorkflow(speciesData.SpeciesData): """ - logging.info("Connecting to the galaxy instance (%s)" % self.instance_url) + logging.debug("Connecting to the galaxy instance (%s)" % self.instance_url) self.instance = galaxy.GalaxyInstance(url=self.instance_url, email=self.config["galaxy_default_admin_email"], password=self.config["galaxy_default_admin_password"] @@ -128,54 +128,10 @@ class RunWorkflow(speciesData.SpeciesData): logging.critical("Cannot connect to galaxy instance (%s) " % self.instance_url) sys.exit() else: - logging.info("Successfully connected to galaxy instance (%s) " % self.instance_url) + logging.debug("Successfully connected to galaxy instance (%s) " % self.instance_url) return 1 - def install_changesets_revisions_from_workflow(self, workflow_path): - """ - Read a .ga file to extract the information about the different tools called. - Check if every tool is installed via a "show_tool". - If a tool is not installed (versions don't match), send a warning to the logger and install the required changeset (matching the tool version) - Doesn't do anything if versions match - - :return: - """ - - logging.info("Validating that installed tools versions and changesets match workflow versions") - - # Load the workflow file (.ga) in a buffer - with open(workflow_path, 'r') as ga_in_file: - - # Then store the decoded json dictionary - workflow_dict = json.load(ga_in_file) - - # Look up every "step_id" looking for tools - for k, v in workflow_dict["steps"].items(): - if v["tool_id"]: - - # Get the descriptive dictionary of the installed tool (using the tool id in the workflow) - show_tool = self.instance.tools.show_tool(v["tool_id"]) - - # Check if an installed version matches the workflow tool version - # (If it's not installed, the show_tool version returned will be a default version with the suffix "XXXX+0") - if show_tool["version"] != v["tool_version"]: - # If it doesn't match, proceed to install of the correct changeset revision - print(show_tool) - # logging.warning("Tool versions don't match for {0} (changeset installed: {1} | changeset required: {2}). Installing changeset revision {3}...".format(v["tool_shed_repository"]["name"], show_tool["changeset_revision"], v["tool_shed_repository"]["changeset_revision"], v["tool_shed_repository"]["changeset_revision"])) - toolshed = "https://" + v["tool_shed_repository"]["tool_shed"] - name = v["tool_shed_repository"]["name"] - owner = v["tool_shed_repository"]["owner"] - changeset_revision = v["tool_shed_repository"]["changeset_revision"] - - self.instance.toolshed.install_repository_revision(tool_shed_url=toolshed, name=name, owner=owner, - changeset_revision=changeset_revision, - install_tool_dependencies=True, - install_repository_dependencies=False, - install_resolver_dependencies=True) - - logging.info("Tools versions and changesets from workflow validated") - def return_instance(self): @@ -198,18 +154,18 @@ class RunWorkflow(speciesData.SpeciesData): logging.info("Validating installed individual tools versions and changesets") # Verify that the add_organism and add_analysis versions are correct in the toolshed - add_organism_tool = self.instance.tools.show_tool("toolshed.g2.bx.psu.edu/repos/gga/chado_organism_add_organism/organism_add_organism/2.3.3") - add_analysis_tool = self.instance.tools.show_tool("toolshed.g2.bx.psu.edu/repos/gga/chado_analysis_add_analysis/analysis_add_analysis/2.3.3") - get_organism_tool = self.instance.tools.show_tool("toolshed.g2.bx.psu.edu/repos/gga/chado_organism_get_organisms/organism_get_organisms/2.3.3") - get_analysis_tool = self.instance.tools.show_tool("toolshed.g2.bx.psu.edu/repos/gga/chado_analysis_get_analyses/analysis_get_analyses/2.3.3") + add_organism_tool = self.instance.tools.show_tool("toolshed.g2.bx.psu.edu/repos/gga/chado_organism_add_organism/organism_add_organism/2.3.4+galaxy0") + add_analysis_tool = self.instance.tools.show_tool("toolshed.g2.bx.psu.edu/repos/gga/chado_analysis_add_analysis/analysis_add_analysis/2.3.4+galaxy0") + get_organism_tool = self.instance.tools.show_tool("toolshed.g2.bx.psu.edu/repos/gga/chado_organism_get_organisms/organism_get_organisms/2.3.4+galaxy0") + get_analysis_tool = self.instance.tools.show_tool("toolshed.g2.bx.psu.edu/repos/gga/chado_analysis_get_analyses/analysis_get_analyses/2.3.4+galaxy0") - # changeset for 2.3.3 has to be manually found because there is no way to get the wanted changeset of a non installed tool via bioblend + # changeset for 2.3.4+galaxy0 has to be manually found because there is no way to get the wanted changeset of a non installed tool via bioblend # except for workflows (.ga) that already contain the changeset revisions inside the steps ids - if get_organism_tool["version"] != "2.3.3": + if get_organism_tool["version"] != "2.3.4+galaxy0": toolshed_dict = get_organism_tool["tool_shed_repository"] logging.warning("Changeset for %s is not installed" % toolshed_dict["name"]) - changeset_revision = "b07279b5f3bf" + changeset_revision = "831229e6cda2" name = toolshed_dict["name"] owner = toolshed_dict["owner"] toolshed = "https://" + toolshed_dict["tool_shed"] @@ -220,10 +176,10 @@ class RunWorkflow(speciesData.SpeciesData): install_repository_dependencies=False, install_resolver_dependencies=True) - if get_analysis_tool["version"] != "2.3.3": + if get_analysis_tool["version"] != "2.3.4+galaxy0": toolshed_dict = changeset_revision["tool_shed_repository"] logging.warning("Changeset for %s is not installed" % toolshed_dict["name"]) - changeset_revision = "c7be2feafd73" + changeset_revision = "a867923f555e" name = toolshed_dict["name"] owner = toolshed_dict["owner"] toolshed = "https://" + toolshed_dict["tool_shed"] @@ -234,10 +190,10 @@ class RunWorkflow(speciesData.SpeciesData): install_repository_dependencies=False, install_resolver_dependencies=True) - if add_organism_tool["version"] != "2.3.3": + if add_organism_tool["version"] != "2.3.4+galaxy0": toolshed_dict = add_organism_tool["tool_shed_repository"] logging.warning("Changeset for %s is not installed" % toolshed_dict["name"]) - changeset_revision = "680a1fe3c266" + changeset_revision = "1f12b9650028" name = toolshed_dict["name"] owner = toolshed_dict["owner"] toolshed = "https://" + toolshed_dict["tool_shed"] @@ -248,10 +204,10 @@ class RunWorkflow(speciesData.SpeciesData): install_repository_dependencies=False, install_resolver_dependencies=True) - if add_analysis_tool["version"] != "2.3.3": + if add_analysis_tool["version"] != "2.3.4+galaxy0": toolshed_dict = add_analysis_tool["tool_shed_repository"] logging.warning("Changeset for %s is not installed" % toolshed_dict["name"]) - changeset_revision = "43c36801669f" + changeset_revision = "10b2b1c70e69" name = toolshed_dict["name"] owner = toolshed_dict["owner"] toolshed = "https://" + toolshed_dict["tool_shed"] @@ -290,8 +246,7 @@ class RunWorkflow(speciesData.SpeciesData): self.connect_to_instance() self.set_get_history() - # We want the tools version default to be 2.3.3 at the moment - tool_version = "2.3.3" + tool_version = "2.3.4+galaxy0" # Add organism (species) to chado logging.info("Adding organism to the instance's chado database") if self.common == "" or self.common is None: @@ -371,7 +326,7 @@ class RunWorkflow(speciesData.SpeciesData): # Get the ID for the genome analysis in chado genome_analysis = self.instance.tools.run_tool( - tool_id="toolshed.g2.bx.psu.edu/repos/gga/chado_analysis_get_analyses/analysis_get_analyses/2.3.3", + tool_id="toolshed.g2.bx.psu.edu/repos/gga/chado_analysis_get_analyses/analysis_get_analyses/2.3.4+galaxy0", history_id=self.history_id, tool_inputs={"name": self.full_name_lowercase + " genome v" + self.genome_version}) genome_analysis_job_out = genome_analysis["outputs"][0]["id"] @@ -391,7 +346,7 @@ class RunWorkflow(speciesData.SpeciesData): # Get the ID for the OGS analysis in chado ogs_analysis = self.instance.tools.run_tool( - tool_id="toolshed.g2.bx.psu.edu/repos/gga/chado_analysis_get_analyses/analysis_get_analyses/2.3.3", + tool_id="toolshed.g2.bx.psu.edu/repos/gga/chado_analysis_get_analyses/analysis_get_analyses/2.3.4+galaxy0", history_id=self.history_id, tool_inputs={"name": self.full_name_lowercase + " OGS" + self.ogs_version}) ogs_analysis_job_out = ogs_analysis["outputs"][0]["id"] @@ -413,7 +368,7 @@ class RunWorkflow(speciesData.SpeciesData): # Add Interpro analysis to chado logging.info("Adding Interproscan analysis to the instance's chado database") self.instance.tools.run_tool( - tool_id="toolshed.g2.bx.psu.edu/repos/gga/chado_analysis_add_analysis/analysis_add_analysis/2.3.3", + tool_id="toolshed.g2.bx.psu.edu/repos/gga/chado_analysis_add_analysis/analysis_add_analysis/2.3.4+galaxy0", history_id=self.history_id, tool_inputs={"name": "InterproScan on OGS%s" % self.ogs_version, "program": "InterproScan", @@ -428,7 +383,7 @@ class RunWorkflow(speciesData.SpeciesData): # Get interpro ID interpro_analysis = self.instance.tools.run_tool( - tool_id="toolshed.g2.bx.psu.edu/repos/gga/chado_analysis_get_analyses/analysis_get_analyses/2.3.3", + tool_id="toolshed.g2.bx.psu.edu/repos/gga/chado_analysis_get_analyses/analysis_get_analyses/2.3.4+galaxy0", history_id=self.history_id, tool_inputs={"name": "InterproScan on OGS%s" % self.ogs_version}) interpro_analysis_job_out = interpro_analysis["outputs"][0]["id"] @@ -565,7 +520,7 @@ class RunWorkflow(speciesData.SpeciesData): return 0 - def import_datasets_into_history(self): + def import_datasets_into_history(self, imported_datasets_ids): """ Find datasets in a library, get their ID and import them into the current history if they are not already @@ -589,7 +544,6 @@ class RunWorkflow(speciesData.SpeciesData): for i in instance_source_data_folders: folders_ids[i["name"]] = i["id"] - # Iterating over the folders to find datasets and map datasets to their IDs logging.debug("Datasets IDs: ") for k, v in folders_ids.items(): @@ -598,7 +552,7 @@ class RunWorkflow(speciesData.SpeciesData): for k2, v2 in sub_folder_content.items(): for e in v2: if type(e) == dict: - if e["name"].endswith(".fa"): + if e["name"].endswith(".fasta"): self.datasets["genome_file"] = e["ldda_id"] self.datasets_name["genome_file"] = e["name"] logging.debug("\tGenome file:\t" + e["name"] + ": " + e["ldda_id"]) @@ -632,15 +586,15 @@ class RunWorkflow(speciesData.SpeciesData): logging.debug("Uploading datasets into history %s" % self.history_id) # Import each dataset into history if it is not imported - first_hda_ids = self.get_datasets_hda_ids() + first_hda_ids = self.get_datasets_hda_ids(imported_datasets_ids=imported_datasets_ids) - if first_hda_ids["genome_hda_id"] is None: + if first_hda_ids["genome_hda_id"] is not None: self.instance.histories.upload_dataset_from_library(history_id=self.history_id, lib_dataset_id=self.datasets["genome_file"]) - if first_hda_ids["gff_hda_id"] is None: + if first_hda_ids["gff_hda_id"] is not None: self.instance.histories.upload_dataset_from_library(history_id=self.history_id, lib_dataset_id=self.datasets["gff_file"]) - if first_hda_ids["transcripts_hda_id"] is None: + if first_hda_ids["transcripts_hda_id"] is not None: self.instance.histories.upload_dataset_from_library(history_id=self.history_id, lib_dataset_id=self.datasets["transcripts_file"]) - if first_hda_ids["proteins_hda_id"] is None: + if first_hda_ids["proteins_hda_id"] is not None: self.instance.histories.upload_dataset_from_library(history_id=self.history_id, lib_dataset_id=self.datasets["proteins_file"]) if first_hda_ids["interproscan_hda_id"] is None: try: @@ -658,10 +612,10 @@ class RunWorkflow(speciesData.SpeciesData): # datasets_ids_outfile.write(str(_datasets)) # Return a dict made of the hda ids - return self.get_datasets_hda_ids() + return self.get_datasets_hda_ids(imported_datasets_ids=first_hda_ids["imported_datasets_ids"]) - def get_datasets_hda_ids(self): + def get_datasets_hda_ids(self, imported_datasets_ids): """ Get the hda IDs of the datasets imported into an history @@ -684,33 +638,43 @@ class RunWorkflow(speciesData.SpeciesData): # Match files imported in history names vs library datasets names to assign their respective hda_id for dataset_dict in history_datasets_li: if dataset_dict["history_id"] == self.history_id: - if dataset_dict["name"] == self.datasets_name["genome_file"]: + if dataset_dict["name"] == self.datasets_name["genome_file"] and dataset_dict["id"] not in imported_datasets_ids: genome_dataset_hda_id = dataset_dict["id"] - logging.debug("Genome dataset hda id: %s" % genome_dataset_hda_id) - elif dataset_dict["name"] == self.datasets_name["proteins_file"]: + logging.debug("Genome dataset hda id: %s" % genome_dataset_hda_id) + elif dataset_dict["name"] == self.datasets_name["proteins_file"] and dataset_dict["id"] not in imported_datasets_ids: proteins_datasets_hda_id = dataset_dict["id"] - logging.debug("Proteins dataset hda ID: %s" % proteins_datasets_hda_id) - elif dataset_dict["name"] == self.datasets_name["transcripts_file"]: + logging.debug("Proteins dataset hda ID: %s" % proteins_datasets_hda_id) + elif dataset_dict["name"] == self.datasets_name["transcripts_file"] and dataset_dict["id"] not in imported_datasets_ids: transcripts_dataset_hda_id = dataset_dict["id"] - logging.debug("Transcripts dataset hda ID: %s" % transcripts_dataset_hda_id) - elif dataset_dict["name"] == self.datasets_name["gff_file"]: + logging.debug("Transcripts dataset hda ID: %s" % transcripts_dataset_hda_id) + elif dataset_dict["name"] == self.datasets_name["gff_file"] and dataset_dict["id"] not in imported_datasets_ids: gff_dataset_hda_id = dataset_dict["id"] - logging.debug("GFF dataset hda ID: %s" % gff_dataset_hda_id) + logging.debug("GFF dataset hda ID: %s" % gff_dataset_hda_id) if "interproscan_file" in self.datasets_name.keys(): - if dataset_dict["name"] == self.datasets_name["interproscan_file"]: + if dataset_dict["name"] == self.datasets_name["interproscan_file"] and dataset_dict["id"] not in imported_datasets_ids: interproscan_dataset_hda_id = dataset_dict["id"] - logging.debug("InterproScan dataset hda ID: %s" % gff_dataset_hda_id) + logging.debug("InterproScan dataset hda ID: %s" % gff_dataset_hda_id) if "blast_diamond_file" in self.datasets_name.keys(): - if dataset_dict["name"] == self.datasets_name["blast_diamond_file"]: + if dataset_dict["name"] == self.datasets_name["blast_diamond_file"] and dataset_dict["id"] not in imported_datasets_ids: blast_diamond_dataset_hda_id = dataset_dict["id"] - logging.debug("Blast Diamond dataset hda ID: %s" % gff_dataset_hda_id) + logging.debug("Blast Diamond dataset hda ID: %s" % gff_dataset_hda_id) + + # Add datasets IDs to already imported IDs (so we don't assign all the wrong IDs to the next organism if there is one) + imported_datasets_ids.append(genome_dataset_hda_id) + imported_datasets_ids.append(transcripts_dataset_hda_id) + imported_datasets_ids.append(proteins_datasets_hda_id) + imported_datasets_ids.append(gff_dataset_hda_id) + imported_datasets_ids.append(interproscan_dataset_hda_id) + imported_datasets_ids.append(blast_diamond_dataset_hda_id) # Return a dict made of the hda ids return {"genome_hda_id": genome_dataset_hda_id, "transcripts_hda_id": transcripts_dataset_hda_id, "proteins_hda_id": proteins_datasets_hda_id, "gff_hda_id": gff_dataset_hda_id, "interproscan_hda_id": interproscan_dataset_hda_id, - "blast_diamond_hda_id": blast_diamond_dataset_hda_id} + "blast_diamond_hda_id": blast_diamond_dataset_hda_id, + "imported_datasets_ids": imported_datasets_ids} + def get_organism_id(self): """ @@ -720,7 +684,7 @@ class RunWorkflow(speciesData.SpeciesData): :return: """ - tool_version = "2.3.3" + tool_version = "2.3.4+galaxy0" time.sleep(3) # # Get the ID for the current organism in chado @@ -736,7 +700,7 @@ class RunWorkflow(speciesData.SpeciesData): # Run tool again (sometimes the tool doesn't return anything despite the organism already being in the db) org = self.instance.tools.run_tool( - tool_id="toolshed.g2.bx.psu.edu/repos/gga/chado_organism_get_organisms/organism_get_organisms/2.3.3", + tool_id="toolshed.g2.bx.psu.edu/repos/gga/chado_organism_get_organisms/organism_get_organisms/2.3.4+galaxy0", history_id=self.history_id, tool_inputs={"abbr": self.abbreviation, "genus": self.genus_uppercase, @@ -768,7 +732,7 @@ class RunWorkflow(speciesData.SpeciesData): "common": self.common}) # Run tool again (sometimes the tool doesn't return anything despite the organism already being in the db) org = self.instance.tools.run_tool( - tool_id="toolshed.g2.bx.psu.edu/repos/gga/chado_organism_get_organisms/organism_get_organisms/2.3.3", + tool_id="toolshed.g2.bx.psu.edu/repos/gga/chado_organism_get_organisms/organism_get_organisms/2.3.4+galaxy0", history_id=self.history_id, tool_inputs={"abbr": self.abbreviation, "genus": self.genus_uppercase, @@ -787,6 +751,186 @@ class RunWorkflow(speciesData.SpeciesData): return self.org_id +def run_workflow(workflow_path, workflow_parameters, datamap, config, input_species_number): + """ + Run a workflow in galaxy + Requires the .ga file to be loaded as a dictionary (optionally could be uploaded as a raw file) + + :param workflow_name: + :param workflow_parameters: + :param datamap: + :return: + """ + + logging.info("Importing workflow %s" % str(workflow_path)) + + # Load the workflow file (.ga) in a buffer + with open(workflow_path, 'r') as ga_in_file: + + # Then store the decoded json dictionary + workflow_dict = json.load(ga_in_file) + + # In case of the Jbrowse workflow, we unfortunately have to manually edit the parameters instead of setting them + # as runtime values, using runtime parameters makes the tool throw an internal critical error ("replace not found" error) + # Scratchgmod test: need "http" (or "https"), the hostname (+ port) + if "menu_url" not in config.keys(): + jbrowse_menu_url = "https://{hostname}/sp/{genus_sp}/feature/{Genus}/{species}/mRNA/{id}".format(hostname=self.config["hostname"], genus_sp=self.genus_species, Genus=self.genus_uppercase, species=self.species, id="{id}") + else: + jbrowse_menu_url = config["menu_url"] + if workflow_name == "Jbrowse": + workflow_dict["steps"]["2"]["tool_state"] = workflow_dict["steps"]["2"]["tool_state"].replace("__MENU_URL__", jbrowse_menu_url) + # The UNIQUE_ID is specific to a combination genus_species_strain_sex so every combination should have its unique workflow + # in galaxy --> define a naming method for these workflows + workflow_dict["steps"]["3"]["tool_state"] = workflow_dict["steps"]["3"]["tool_state"].replace("__FULL_NAME__", self.full_name).replace("__UNIQUE_ID__", self.species_folder_name) + + # Import the workflow in galaxy as a dict + self.instance.workflows.import_workflow_dict(workflow_dict=workflow_dict) + + # Get its attributes + workflow_attributes = self.instance.workflows.get_workflows(name=workflow_name) + # Then get its ID (required to invoke the workflow) + workflow_id = workflow_attributes[0]["id"] # Index 0 is the most recently imported workflow (the one we want) + show_workflow = self.instance.workflows.show_workflow(workflow_id=workflow_id) + # Check if the workflow is found + try: + logging.debug("Workflow ID: %s" % workflow_id) + except bioblend.ConnectionError: + logging.warning("Error retrieving workflow attributes for workflow %s" % workflow_name) + + # Finally, invoke the workflow alogn with its datamap, parameters and the history in which to invoke it + self.instance.workflows.invoke_workflow(workflow_id=workflow_id, + history_id=self.history_id, + params=workflow_parameters, + inputs=datamap, + allow_tool_state_corrections=True) + + logging.info("Successfully imported and invoked workflow {0}, check the galaxy instance ({1}) for the jobs state".format(workflow_name, self.instance_url)) + + + + + + +def create_sp_workflow_dict(sp_dict, main_dir, config, imported_datasets_ids): + """ + """ + + sp_workflow_dict = {} + run_workflow_for_current_organism = RunWorkflow(parameters_dictionary=sp_dict) + + # Verifying the galaxy container is running + if utilities.check_galaxy_state(genus_lowercase=run_workflow_for_current_organism.genus_lowercase, + species=run_workflow_for_current_organism.species, + script_dir=run_workflow_for_current_organism.script_dir): + + # Starting + logging.info("run_workflow.py called for %s" % run_workflow_for_current_organism.full_name) + + # Setting some of the instance attributes + run_workflow_for_current_organism.main_dir = main_dir + run_workflow_for_current_organism.species_dir = os.path.join(run_workflow_for_current_organism.main_dir, + run_workflow_for_current_organism.genus_species + + "/") + + # Parse the config yaml file + run_workflow_for_current_organism.config = config + # Set the instance url attribute --> TODO: the localhost rule in the docker-compose still doesn't work on scratchgmodv1 + run_workflow_for_current_organism.instance_url = "http://localhost:{0}/sp/{1}_{2}/galaxy/".format( + run_workflow_for_current_organism.config["http_port"], + run_workflow_for_current_organism.genus_lowercase, + run_workflow_for_current_organism.species) + + + run_workflow_for_current_organism.connect_to_instance() + + history_id = run_workflow_for_current_organism.set_get_history() + + run_workflow_for_current_organism.install_changesets_revisions_for_individual_tools() + run_workflow_for_current_organism.add_organism_ogs_genome_analyses() + + org_id = run_workflow_for_current_organism.get_organism_id() + genome_analysis_id = run_workflow_for_current_organism.get_genome_analysis_id() + ogs_analysis_id = run_workflow_for_current_organism.get_ogs_analysis_id() + instance_attributes = run_workflow_for_current_organism.get_instance_attributes() + hda_ids = run_workflow_for_current_organism.import_datasets_into_history(imported_datasets_ids=imported_datasets_ids) + + strain_sex = "{0}_{1}".format(run_workflow_for_current_organism.strain, run_workflow_for_current_organism.sex) + genus_species = run_workflow_for_current_organism.genus_species + + # Create the dictionary holding all attributes needed to connect to the galaxy instance + attributes = {"genus": run_workflow_for_current_organism.genus, + "species": run_workflow_for_current_organism.species, + "genus_species": run_workflow_for_current_organism.genus_species, + "full_name": run_workflow_for_current_organism.full_name, + "species_folder_name": run_workflow_for_current_organism.species_folder_name, + "sex": run_workflow_for_current_organism.sex, + "strain": run_workflow_for_current_organism.strain, + "org_id": org_id, + "genome_analysis_id": genome_analysis_id, + "ogs_analysis_id": ogs_analysis_id, + "instance_attributes": instance_attributes, + "hda_ids": hda_ids, + "history_id": history_id, + "instance": run_workflow_for_current_organism.instance, + "instance_url": run_workflow_for_current_organism.instance_url, + "email": config["galaxy_default_admin_email"], + "password": config["galaxy_default_admin_password"]} + + sp_workflow_dict[genus_species] = {strain_sex: attributes} + + else: + logging.critical("The galaxy container for %s is not ready yet!" % run_workflow_for_current_organism.full_name) + sys.exit() + + + return sp_workflow_dict + + +def install_changesets_revisions_from_workflow(instance, workflow_path): + """ + Read a .ga file to extract the information about the different tools called. + Check if every tool is installed via a "show_tool". + If a tool is not installed (versions don't match), send a warning to the logger and install the required changeset (matching the tool version) + Doesn't do anything if versions match + + :return: + """ + + logging.info("Validating that installed tools versions and changesets match workflow versions") + + # Load the workflow file (.ga) in a buffer + with open(workflow_path, 'r') as ga_in_file: + + # Then store the decoded json dictionary + workflow_dict = json.load(ga_in_file) + + # Look up every "step_id" looking for tools + for k, v in workflow_dict["steps"].items(): + if v["tool_id"]: + # Get the descriptive dictionary of the installed tool (using the tool id in the workflow) + show_tool = instance.tools.show_tool(v["tool_id"]) + # Check if an installed version matches the workflow tool version + # (If it's not installed, the show_tool version returned will be a default version with the suffix "XXXX+0") + if show_tool["version"] != v["tool_version"]: + # If it doesn't match, proceed to install of the correct changeset revision + # logging.warning("Tool versions don't match for {0} (changeset installed: {1} | changeset required: {2}). Installing changeset revision {3}...".format(v["tool_shed_repository"]["name"], show_tool["changeset_revision"], v["tool_shed_repository"]["changeset_revision"], v["tool_shed_repository"]["changeset_revision"])) + toolshed = "https://" + v["tool_shed_repository"]["tool_shed"] + name = v["tool_shed_repository"]["name"] + owner = v["tool_shed_repository"]["owner"] + changeset_revision = v["tool_shed_repository"]["changeset_revision"] + + logging.debug("Installing changeset {0} for tool {1}".format(changeset_revision, name)) + + # Install changeset + instance.toolshed.install_repository_revision(tool_shed_url=toolshed, name=name, owner=owner, + changeset_revision=changeset_revision, + install_tool_dependencies=True, + install_repository_dependencies=False, + install_resolver_dependencies=True) + + logging.info("Tools versions and changesets from workflow validated") + + if __name__ == "__main__": parser = argparse.ArgumentParser(description="Automatic data loading in containers and interaction " "with galaxy instances for GGA" @@ -835,365 +979,554 @@ if __name__ == "__main__": sp_dict_list = utilities.parse_input(args.input) + # # Checking if user specified a workflow to run + # if not args.workflow: + # logging.critical("No workflow specified, exiting") + # sys.exit() + # else: + # workflow = os.path.abspath(args.workflow) + - org_ids = [] - genome_analysis_ids = [] - ogs_analysis_ids = [] - hda_ids_list = [] - instance_attributes_list = [] + script_dir = os.path.dirname(os.path.realpath(sys.argv[0])) + config = utilities.parse_config(args.config) + all_sp_workflow_dict = {} - instance_url_2org = None + # IDs of already imported datasets (useful in case there are several species within the same instance) + # TODO: Not a very smart way to filter datasets as the list will grow at every input species, whereas + # we only need to do the ID lookup for a single galaxy instance --> Possible issue where we encounter 2 identical IDs in + # different instances + imported_datasets_ids = [] for sp_dict in sp_dict_list: - # Creating an instance of the RunWorkflow object for the current organism - run_workflow_for_current_organism = RunWorkflow(parameters_dictionary=sp_dict) + # Add and retrieve all analyses/organisms for the current input species and add their IDs to the input dictionary + current_sp_workflow_dict = create_sp_workflow_dict(sp_dict, main_dir=args.main_directory, config=config, imported_datasets_ids=imported_datasets_ids) - # Checking if user specified a workflow to run - if not args.workflow: - logging.critical("No workflow specified, exiting") - sys.exit() + current_sp_key = list(current_sp_workflow_dict.keys())[0] + current_sp_value = list(current_sp_workflow_dict.values())[0] + current_sp_strain_sex_key = list(current_sp_value.keys())[0] + current_sp_strain_sex_value = list(current_sp_value.values())[0] + + # Add the species dictionary to the complete dictionary + # This dictionary contains every organism present in the input file + # Its structure is the following: + # {genus species: {strain1_sex1: {variables_key: variables_values}, strain1_sex2: {variables_key: variables_values}}} + if not current_sp_key in all_sp_workflow_dict.keys(): + all_sp_workflow_dict[current_sp_key] = current_sp_value else: - workflow = os.path.abspath(args.workflow) + all_sp_workflow_dict[current_sp_key][current_sp_strain_sex_key] = current_sp_strain_sex_value + + + for k, v in all_sp_workflow_dict.items(): + if len(list(v.keys())) == 1: + logging.info("Input organism %s: 1 species detected in input dictionary" % k) + + # Set workflow path (1 organism) + workflow_path = os.path.join(os.path.abspath(script_dir), "workflows/Galaxy-Workflow-chado_load_tripal_synchronize_jbrowse_1org_v2.ga") + + # Set the galaxy instance variables + for k2, v2 in v.items(): + instance_url = v2["instance_url"] + email = v2["email"] + password = v2["password"] + + instance = galaxy.GalaxyInstance(url=instance_url, email=email, password=password) + + # Check if the versions of tools specified in the workflow are installed in galaxy + install_changesets_revisions_from_workflow(workflow_path=workflow_path, instance=instance) + + # Set datamap (mapping of input files in the workflow) + datamap = {} + + + # Set the workflow parameters (individual tools runtime parameters in the workflow) + workflow_parameters = {} + + if len(list(v.keys())) == 2: + + logging.info("Input organism %s: 2 species detected in input dictionary" % k) + + # Set workflow path (2 organisms) + workflow_path = os.path.join(os.path.abspath(script_dir), "workflows/Galaxy-Workflow-chado_load_tripal_synchronize_jbrowse_2org_v2.ga") + + # Instance object required variables + instance_url, email, password = None, None, None + + # Set the galaxy instance variables + for k2, v2 in v.items(): + instance_url = v2["instance_url"] + email = v2["email"] + password = v2["password"] + + instance = galaxy.GalaxyInstance(url=instance_url, email=email, password=password) + + # Check if the versions of tools specified in the workflow are installed in galaxy + install_changesets_revisions_from_workflow(workflow_path=workflow_path, instance=instance) + + # Get key names from the current organism (item 1 = organism 1, item 2 = organism 2) + organisms_key_names = list(v.keys()) + org1_dict = v[organisms_key_names[0]] + org2_dict = v[organisms_key_names[1]] + + history_id = org1_dict["history_id"] + + # Organism 1 attributes + org1_genus = org1_dict["genus"] + org1_species = org1_dict["species"] + org1_genus_species = org1_dict["genus_species"] + org1_species_folder_name = org1_dict["species_folder_name"] + org1_full_name = org1_dict["full_name"] + org1_strain = org1_dict["sex"] + org1_sex = org1_dict["strain"] + org1_org_id = org1_dict["org_id"] + org1_genome_analysis_id = org1_dict["genome_analysis_id"] + org1_ogs_analysis_id = org1_dict["ogs_analysis_id"] + org1_genome_hda_id = org1_dict["hda_ids"]["genome_hda_id"] + org1_transcripts_hda_id = org1_dict["hda_ids"]["transcripts_hda_id"] + org1_proteins_hda_id = org1_dict["hda_ids"]["proteins_hda_id"] + org1_gff_hda_id = org1_dict["hda_ids"]["gff_hda_id"] + + # Organism 2 attributes + org2_genus = org2_dict["genus"] + org2_species = org2_dict["species"] + org2_genus_species = org2_dict["genus_species"] + org2_species_folder_name = org2_dict["species_folder_name"] + org2_full_name = org2_dict["full_name"] + org2_strain = org2_dict["sex"] + org2_sex = org2_dict["strain"] + org2_org_id = org2_dict["org_id"] + org2_genome_analysis_id = org2_dict["genome_analysis_id"] + org2_ogs_analysis_id = org2_dict["ogs_analysis_id"] + org2_genome_hda_id = org2_dict["hda_ids"]["genome_hda_id"] + org2_transcripts_hda_id = org2_dict["hda_ids"]["transcripts_hda_id"] + org2_proteins_hda_id = org2_dict["hda_ids"]["proteins_hda_id"] + org2_gff_hda_id = org2_dict["hda_ids"]["gff_hda_id"] + + + # Source files association (ordered by their IDs in the workflow) + GENOME_FASTA_FILE_ORG1 = "0" + GFF_FILE_ORG1 = "1" + PROTEINS_FASTA_FILE_ORG1 = "2" + + GENOME_FASTA_FILE_ORG2 = "3" + GFF_FILE_ORG2 = "4" + PROTEINS_FASTA_FILE_ORG2 = "5" + + LOAD_FASTA_ORG1 = "6" + JBROWSE_ORG1 = "7" + JRBOWSE_ORG2 = "8" + + LOAD_GFF_ORG1 = "9" + JBROWSE_CONTAINER = "10" + SYNC_FEATURES_ORG1 = "11" + + LOAD_FASTA_ORG2 = "12" + LOAD_GFF_ORG2 = "13" + + SYNC_FEATURES_ORG2 = "14" + POPULATE_MAT_VIEWS = "15" + INDEX_TRIPAL_DATA = "16" + + # Set the workflow parameters (individual tools runtime parameters in the workflow) + workflow_parameters = {} + + # Input files have no parameters (they are set via assigning the hda IDs in the datamap parameter of the bioblend method) + workflow_parameters[GENOME_FASTA_FILE_ORG1] = {} + workflow_parameters[GFF_FILE_ORG1] = {} + workflow_parameters[PROTEINS_FASTA_FILE_ORG1] = {} + workflow_parameters[GENOME_FASTA_FILE_ORG2] = {} + workflow_parameters[GFF_FILE_ORG2] = {} + workflow_parameters[PROTEINS_FASTA_FILE_ORG2] = {} + + # Organism 1 + workflow_parameters[LOAD_FASTA_ORG1] = {"organism": org1_org_id, + "analysis_id": org1_genome_analysis_id, + "do_update": "true"} + workflow_parameters[JBROWSE_ORG1] = {} + workflow_parameters[LOAD_GFF_ORG1] = {"organism": org1_org_id, "analysis_id": org1_ogs_analysis_id} + workflow_parameters[SYNC_FEATURES_ORG1] = {"organism_id": org1_org_id} + workflow_parameters[JBROWSE_CONTAINER] = {} + + + # Organism 2 + workflow_parameters[LOAD_FASTA_ORG2] = {"organism": org2_org_id, + "analysis_id": org2_genome_analysis_id, + "do_update": "true"} + workflow_parameters[LOAD_GFF_ORG2] = {"organism": org2_org_id, "analysis_id": org2_ogs_analysis_id} + workflow_parameters[JRBOWSE_ORG2] = {} + workflow_parameters[SYNC_FEATURES_ORG2] = {"organism_id": org2_org_id} + + + # POPULATE + INDEX DATA + workflow_parameters[POPULATE_MAT_VIEWS] = {} + workflow_parameters[INDEX_TRIPAL_DATA] = {} + + + # Set datamap (mapping of input files in the workflow) + datamap = {} + + # Organism 1 + datamap[GENOME_FASTA_FILE_ORG1] = {"src": "hda", "id": org1_genome_hda_id} + datamap[GFF_FILE_ORG1] = {"src": "hda", "id": org1_gff_hda_id} + datamap[PROTEINS_FASTA_FILE_ORG1] = {"src": "hda", "id": org1_proteins_hda_id} + + # Organism 2 + datamap[GENOME_FASTA_FILE_ORG2] = {"src": "hda", "id": org2_genome_hda_id} + datamap[GFF_FILE_ORG2] = {"src": "hda", "id": org2_gff_hda_id} + datamap[PROTEINS_FASTA_FILE_ORG2] = {"src": "hda", "id": org2_proteins_hda_id} + + with open(workflow_path, 'r') as ga_in_file: + + # Store the decoded json dictionary + workflow_dict = json.load(ga_in_file) + workflow_name = workflow_dict["name"] + + # For the Jbrowse tool, we unfortunately have to manually edit the parameters instead of setting them + # as runtime values, using runtime parameters makes the tool throw an internal critical error ("replace not found" error) + # Scratchgmod test: need "http" (or "https"), the hostname (+ port) + if "menu_url" not in config.keys(): + jbrowse_menu_url_org1 = "https://{hostname}/sp/{genus_sp}/feature/{Genus}/{species}/mRNA/{id}".format(hostname=config["hostname"], genus_sp=org1_genus_species, Genus=org1_genus[0].upper() + org1_genus[1:], species=org1_species, id="\{id\}") + jbrowse_menu_url_org2 = "https://{hostname}/sp/{genus_sp}/feature/{Genus}/{species}/mRNA/{id}".format(hostname=config["hostname"], genus_sp=org2_genus_species, Genus=org2_genus[0].upper() + org2_genus[1:], species=org2_species, id="\{id\}") + else: + jbrowse_menu_url_org1 = config["menu_url"] + jbrowse_menu_url_org2 = jbrowse_menu_url_org1 + + # Replace values in the workflow dictionary + workflow_dict["steps"]["7"]["tool_state"] = workflow_dict["steps"]["7"]["tool_state"].replace("__MENU_URL_ORG1__", jbrowse_menu_url_org1) + workflow_dict["steps"]["8"]["tool_state"] = workflow_dict["steps"]["8"]["tool_state"].replace("__MENU_URL_ORG2__", jbrowse_menu_url_org2) + # The UNIQUE_ID is specific to a combination genus_species_strain_sex so every combination should have its unique workflow + # in galaxy --> define a naming method for these workflows + workflow_dict["steps"]["10"]["tool_state"] = workflow_dict["steps"]["10"]["tool_state"].replace("__FULL_NAME_ORG1__", org1_full_name).replace("__UNIQUE_ID_ORG1__", org1_species_folder_name) + workflow_dict["steps"]["10"]["tool_state"] = workflow_dict["steps"]["10"]["tool_state"].replace("__FULL_NAME_ORG2__", org2_full_name).replace("__UNIQUE_ID_ORG2__", org2_species_folder_name) - # Verifying the galaxy container is running - if utilities.check_galaxy_state(genus_lowercase=run_workflow_for_current_organism.genus_lowercase, - species=run_workflow_for_current_organism.species, - script_dir=run_workflow_for_current_organism.script_dir): + # Import the workflow in galaxy as a dict + instance.workflows.import_workflow_dict(workflow_dict=workflow_dict) - # Starting - logging.info("run_workflow.py called for %s" % run_workflow_for_current_organism.full_name) + # Get its attributes + workflow_attributes = instance.workflows.get_workflows(name=workflow_name) + # Then get its ID (required to invoke the workflow) + workflow_id = workflow_attributes[0]["id"] # Index 0 is the most recently imported workflow (the one we want) + show_workflow = instance.workflows.show_workflow(workflow_id=workflow_id) + # Check if the workflow is found + try: + logging.debug("Workflow ID: %s" % workflow_id) + except bioblend.ConnectionError: + logging.warning("Error finding workflow %s" % workflow_name) - # Setting some of the instance attributes - run_workflow_for_current_organism.main_dir = args.main_directory - run_workflow_for_current_organism.species_dir = os.path.join(run_workflow_for_current_organism.main_dir, - run_workflow_for_current_organism.genus_species + - "/") + # Finally, invoke the workflow alogn with its datamap, parameters and the history in which to invoke it + instance.workflows.invoke_workflow(workflow_id=workflow_id, history_id=history_id, params=workflow_parameters, inputs=datamap, allow_tool_state_corrections=True) - # Parse the config yaml file - run_workflow_for_current_organism.config = utilities.parse_config(args.config) - # Set the instance url attribute --> TODO: the localhost rule in the docker-compose still doesn't work on scratchgmodv1 - run_workflow_for_current_organism.instance_url = "http://localhost:{0}/sp/{1}_{2}/galaxy/".format( - run_workflow_for_current_organism.config["http_port"], - run_workflow_for_current_organism.genus_lowercase, - run_workflow_for_current_organism.species) + logging.info("Successfully imported and invoked workflow {0}, check the galaxy instance ({1}) for the jobs state".format(workflow_name, instance_url)) - instance_url_2org = run_workflow_for_current_organism.instance_url # Get the instance attribute from the object for future connections - instance = run_workflow_for_current_organism.instance + # This is the GalaxyInstance object from bioblend (not the url!) + # instance = run_workflow_for_current_organism.instance - if "2org" in str(workflow): - logging.info("Executing workflow %s" % workflow) + # if "2org" in str(workflow): + # logging.info("Executing workflow %s" % workflow) - run_workflow_for_current_organism.connect_to_instance() - run_workflow_for_current_organism.set_get_history() - run_workflow_for_current_organism.install_changesets_revisions_for_individual_tools() - run_workflow_for_current_organism.install_changesets_revisions_from_workflow(workflow_path=workflow) - run_workflow_for_current_organism.add_organism_ogs_genome_analyses() - org_id = run_workflow_for_current_organism.get_organism_id() - org_ids.append(org_id) - genome_analysis_id = run_workflow_for_current_organism.get_genome_analysis_id() - genome_analysis_ids.append(genome_analysis_id) - ogs_analysis_id = run_workflow_for_current_organism.get_ogs_analysis_id() - ogs_analysis_ids.append(ogs_analysis_id) - instance_attributes = run_workflow_for_current_organism.get_instance_attributes() - instance_attributes_list.append(instance_attributes) + # run_workflow_for_current_organism.connect_to_instance() + # run_workflow_for_current_organism.set_get_history() + # # TODO: only do this once per instance (not at each iteration!) + # run_workflow_for_current_organism.install_changesets_revisions_for_individual_tools() + # run_workflow_for_current_organism.install_changesets_revisions_from_workflow(workflow_path=workflow) + # run_workflow_for_current_organism.add_organism_ogs_genome_analyses() + # org_id = run_workflow_for_current_organism.get_organism_id() + # genome_analysis_id = run_workflow_for_current_organism.get_genome_analysis_id() + # ogs_analysis_id = run_workflow_for_current_organism.get_ogs_analysis_id() + # instance_attributes = run_workflow_for_current_organism.get_instance_attributes() - # Import datasets into history and retrieve their hda IDs - # TODO: can be simplified with direct access to the folder contents via the full path (no loop required) - hda_ids = run_workflow_for_current_organism.import_datasets_into_history() - hda_ids_list.append(hda_ids) + # # Import datasets into history and retrieve their hda IDs + # # TODO: can be simplified with direct access to the folder contents via the full path (no loop required) + # hda_ids = run_workflow_for_current_organism.import_datasets_into_history() + # hda_ids_list.append(hda_ids) - # TODO: Exlcude the workflow invocation from the loop - # Extract instance url from one, attributes from both in lists ? + # # TODO: Exlcude the workflow invocation from the loop + # # Extract instance url from one, attributes from both in lists ? - # Source files association (ordered by their IDs in the workflow) - GENOME_FASTA_FILE_ORG1 = "0" - GFF_FILE_ORG1 = "1" - PROTEINS_FASTA_FILE_ORG1 = "2" - GENOME_FASTA_FILE_ORG2 = "3" - GFF_FILE_ORG2 = "4" - PROTEINS_FASTA_FILE_ORG2 = "5" - LOAD_FASTA_ORG1 = "6" - JBROWSE_ORG1 = "7" - JRBOWSE_ORG2 = "8" + # # Source files association (ordered by their IDs in the workflow) + # GENOME_FASTA_FILE_ORG1 = "0" + # GFF_FILE_ORG1 = "1" + # PROTEINS_FASTA_FILE_ORG1 = "2" + # GENOME_FASTA_FILE_ORG2 = "3" + # GFF_FILE_ORG2 = "4" + # PROTEINS_FASTA_FILE_ORG2 = "5" - LOAD_GFF_ORG1 = "9" - JBROWSE_CONTAINER = "10" - SYNC_FEATURES_ORG1 = "11" + # LOAD_FASTA_ORG1 = "6" + # JBROWSE_ORG1 = "7" + # JRBOWSE_ORG2 = "8" - LOAD_FASTA_ORG2 = "12" - LOAD_GFF_ORG2 = "13" + # LOAD_GFF_ORG1 = "9" + # JBROWSE_CONTAINER = "10" + # SYNC_FEATURES_ORG1 = "11" - SYNC_FEATURES_ORG2 = "14" - POPULATE_MAT_VIEWS = "15" - INDEX_TRIPAL_DATA = "16" + # LOAD_FASTA_ORG2 = "12" + # LOAD_GFF_ORG2 = "13" - workflow_parameters = {} + # SYNC_FEATURES_ORG2 = "14" + # POPULATE_MAT_VIEWS = "15" + # INDEX_TRIPAL_DATA = "16" - workflow_parameters[GENOME_FASTA_FILE_ORG1] = {} - workflow_parameters[GFF_FILE_ORG1] = {} - workflow_parameters[PROTEINS_FASTA_FILE_ORG1] = {} - workflow_parameters[GENOME_FASTA_FILE_ORG2] = {} - workflow_parameters[GFF_FILE_ORG2] = {} - workflow_parameters[PROTEINS_FASTA_FILE_ORG2] = {} + # workflow_parameters = {} - - # ORGANISM 1 - workflow_parameters[LOAD_FASTA_ORG1] = {"organism": org_ids[0], - "analysis_id": genome_analysis_ids[0], - "do_update": "true"} - # Change "do_update": "true" to "do_update": "false" in above parameters to prevent appending/updates to the fasta file in chado - # WARNING: It is safer to never update it and just change the genome/ogs versions in the config - workflow_parameters[JBROWSE_ORG1] = {} - workflow_parameters[LOAD_GFF_ORG1] = {"organism": org_ids[0], "analysis_id": ogs_analysis_ids[0]} - workflow_parameters[SYNC_FEATURES_ORG1] = {"organism_id": org_ids[0]} - workflow_parameters[JBROWSE_CONTAINER] = {} - - - # ORGANISM 2 - workflow_parameters[LOAD_FASTA_ORG2] = {"organism": org_ids[1], - "analysis_id": genome_analysis_ids[1], - "do_update": "true"} - # Change "do_update": "true" to "do_update": "false" in above parameters to prevent appending/updates to the fasta file in chado - # WARNING: It is safer to never update it and just change the genome/ogs versions in the config - workflow_parameters[LOAD_GFF_ORG2] = {"organism": org_ids[1], "analysis_id": ogs_analysis_ids[1]} - workflow_parameters[JRBOWSE_ORG2] = {} - workflow_parameters[SYNC_FEATURES_ORG2] = {"organism_id": org_ids[1]} - - workflow_parameters[SYNC_GENOME_ANALYSIS_INTO_TRIPAL] = {"analysis_id": ogs_analysis_ids[0]} - workflow_parameters[SYNC_OGS_ANALYSIS_INTO_TRIPAL] = {"analysis_id": genome_analysis_ids[0]} - workflow_parameters[SYNC_FEATURES_INTO_TRIPAL] = {"organism_id": org_ids[0]} - - # POPULATE + INDEX DATA - workflow_parameters[POPULATE_MAT_VIEWS] = {} - workflow_parameters[INDEX_TRIPAL_DATA] = {} - - # Datamap for input datasets - dataset source (type): ldda (LibraryDatasetDatasetAssociation) - run_workflow_for_current_organism.datamap = {} - - run_workflow_for_current_organism.datamap[GENOME_FASTA_FILE_ORG1] = {"src": "hda", "id": hda_ids_list[0]["genome_hda_id"]} - run_workflow_for_current_organism.datamap[GFF_FILE_ORG1] = {"src": "hda", "id": hda_ids_list[0]["gff_hda_id"]} - run_workflow_for_current_organism.datamap[PROTEINS_FASTA_FILE_ORG1] = {"src": "hda", "id": hda_ids_list[0]["proteins_hda_id"]} - - run_workflow_for_current_organism.datamap[GENOME_FASTA_FILE_ORG2] = {"src": "hda", "id": hda_ids_list[1]["genome_hda_id"]} - run_workflow_for_current_organism.datamap[GFF_FILE_ORG2] = {"src": "hda", "id": hda_ids_list[1]["gff_hda_id"]} - run_workflow_for_current_organism.datamap[GFF_FILE_ORG2] = {"src": "hda", "id": hda_ids_list[1]["proteins_hda_id"]} - - - - logging.info("OK: Workflow invoked") - - - - # If input workflow is Chado_load_Tripal_synchronize.ga - if "Chado_load_Tripal_synchronize" in str(workflow): - - logging.info("Executing workflow 'Chado_load_Tripal_synchronize'") - - run_workflow_for_current_organism.connect_to_instance() - run_workflow_for_current_organism.set_get_history() - # run_workflow_for_current_organism.get_species_history_id() - - run_workflow_for_current_organism.install_changesets_revisions_for_individual_tools() - run_workflow_for_current_organism.install_changesets_revisions_from_workflow(workflow_path=workflow) - run_workflow_for_current_organism.add_organism_ogs_genome_analyses() - run_workflow_for_current_organism.get_organism_id() - run_workflow_for_current_organism.get_genome_analysis_id() - run_workflow_for_current_organism.get_ogs_analysis_id() - - # run_workflow_for_current_organism.tripal_synchronize_organism_analyses() - - # Get the attributes of the instance and project data files - run_workflow_for_current_organism.get_instance_attributes() - - # Import datasets into history and retrieve their hda IDs - # TODO: can be simplified with direct access to the folder contents via the full path (no loop required) - hda_ids = run_workflow_for_current_organism.import_datasets_into_history() - - # DEBUG - # run_workflow_for_current_organism.get_invocation_report(workflow_name="Chado load Tripal synchronize") - - # Explicit workflow parameter names - GENOME_FASTA_FILE = "0" - GFF_FILE = "1" - PROTEINS_FASTA_FILE = "2" - TRANSCRIPTS_FASTA_FILE = "3" - - LOAD_FASTA_IN_CHADO = "4" - LOAD_GFF_IN_CHADO = "5" - SYNC_ORGANISM_INTO_TRIPAL = "6" - SYNC_GENOME_ANALYSIS_INTO_TRIPAL = "7" - SYNC_OGS_ANALYSIS_INTO_TRIPAL = "8" - SYNC_FEATURES_INTO_TRIPAL = "9" - - workflow_parameters = {} - - workflow_parameters[GENOME_FASTA_FILE] = {} - workflow_parameters[GFF_FILE] = {} - workflow_parameters[PROTEINS_FASTA_FILE] = {} - workflow_parameters[TRANSCRIPTS_FASTA_FILE] = {} - - workflow_parameters[LOAD_FASTA_IN_CHADO] = {"organism": run_workflow_for_current_organism.org_id, - "analysis_id": run_workflow_for_current_organism.genome_analysis_id, - "do_update": "true"} - # Change "do_update": "true" to "do_update": "false" in above parameters to prevent appending/updates to the fasta file in chado - # WARNING: It is safer to never update it and just change the genome/ogs versions in the config - workflow_parameters[LOAD_GFF_IN_CHADO] = {"organism": run_workflow_for_current_organism.org_id, - "analysis_id": run_workflow_for_current_organism.ogs_analysis_id} - workflow_parameters[SYNC_ORGANISM_INTO_TRIPAL] = {"organism_id": run_workflow_for_current_organism.org_id} - workflow_parameters[SYNC_GENOME_ANALYSIS_INTO_TRIPAL] = {"analysis_id": run_workflow_for_current_organism.ogs_analysis_id} - workflow_parameters[SYNC_OGS_ANALYSIS_INTO_TRIPAL] = {"analysis_id": run_workflow_for_current_organism.genome_analysis_id} - workflow_parameters[SYNC_FEATURES_INTO_TRIPAL] = {"organism_id": run_workflow_for_current_organism.org_id} - - # Datamap for input datasets - dataset source (type): ldda (LibraryDatasetDatasetAssociation) - run_workflow_for_current_organism.datamap = {} - run_workflow_for_current_organism.datamap[GENOME_FASTA_FILE] = {"src": "hda", "id": hda_ids["genome_hda_id"]} - run_workflow_for_current_organism.datamap[GFF_FILE] = {"src": "hda", "id": hda_ids["gff_hda_id"]} - run_workflow_for_current_organism.datamap[PROTEINS_FASTA_FILE] = {"src": "hda", "id": hda_ids["proteins_hda_id"]} - run_workflow_for_current_organism.datamap[TRANSCRIPTS_FASTA_FILE] = {"src": "hda", "id": hda_ids["transcripts_hda_id"]} - - # run_workflow_for_current_organism.datamap = {} - # run_workflow_for_current_organism.datamap[GENOME_FASTA_FILE] = {"src": "hda", "id": - # run_workflow_for_current_organism.datasets["genome_file"]} - # run_workflow_for_current_organism.datamap[GFF_FILE] = {"src": "hda", - # "id": hda_ids["gff_hda_id"]} - - # Ensures galaxy has had time to retrieve datasets - time.sleep(60) - # Run the Chado load Tripal sync workflow with the parameters set above - run_workflow_for_current_organism.run_workflow(workflow_path=workflow, - workflow_parameters=workflow_parameters, - datamap=run_workflow_for_current_organism.datamap, - workflow_name="Chado load Tripal synchronize") - - # Jbrowse creation workflow - elif "Jbrowse" in str(workflow): - - logging.info("Executing workflow 'Jbrowse'") - - run_workflow_for_current_organism.connect_to_instance() - run_workflow_for_current_organism.set_get_history() - run_workflow_for_current_organism.install_changesets_revisions_from_workflow(workflow_path=workflow) - run_workflow_for_current_organism.get_organism_id() - # Import datasets into history and get their hda IDs - run_workflow_for_current_organism.import_datasets_into_history() - hda_ids = run_workflow_for_current_organism.get_datasets_hda_ids() # Note: only call this function AFTER calling "import_datasets_into_history()" - - # Debugging - # run_workflow_for_current_organism.get_invocation_report(workflow_name="Jbrowse") - - GENOME_FASTA_FILE = "0" - GFF_FILE = "1" - ADD_JBROWSE = "2" - ADD_ORGANISM_TO_JBROWSE = "3" - - workflow_parameters = {} - workflow_parameters[GENOME_FASTA_FILE] = {} - workflow_parameters[GFF_FILE] = {} - workflow_parameters[ADD_JBROWSE] = {} - workflow_parameters[ADD_ORGANISM_TO_JBROWSE] = {} - - run_workflow_for_current_organism.datamap = {} - run_workflow_for_current_organism.datamap[GENOME_FASTA_FILE] = {"src": "hda", "id": hda_ids["genome_hda_id"]} - run_workflow_for_current_organism.datamap[GFF_FILE] = {"src": "hda", "id": hda_ids["gff_hda_id"]} - - # Run the jbrowse creation workflow - run_workflow_for_current_organism.run_workflow(workflow_path=workflow, - workflow_parameters=workflow_parameters, - datamap=run_workflow_for_current_organism.datamap, - workflow_name="Jbrowse") - - elif "Interpro" in str(workflow): - - logging.info("Executing workflow 'Interproscan") - - run_workflow_for_current_organism.connect_to_instance() - run_workflow_for_current_organism.set_get_history() - run_workflow_for_current_organism.install_changesets_revisions_from_workflow(workflow_path=workflow) - # run_workflow_for_current_organism.get_species_history_id() - - # Get the attributes of the instance and project data files - run_workflow_for_current_organism.get_instance_attributes() - run_workflow.add_interproscan_analysis() - run_workflow_for_current_organism.get_interpro_analysis_id() - - # Import datasets into history and retrieve their hda IDs - run_workflow_for_current_organism.import_datasets_into_history() - hda_ids = run_workflow_for_current_organism.get_datasets_hda_ids() - - INTERPRO_FILE = "0" - LOAD_INTERPRO_IN_CHADO = "1" - SYNC_INTERPRO_ANALYSIS_INTO_TRIPAL = "2" - SYNC_FEATURES_INTO_TRIPAL = "3" - POPULATE_MAT_VIEWS = "4" - INDEX_TRIPAL_DATA = "5" - - workflow_parameters = {} - workflow_parameters[INTERPRO_FILE] = {} - workflow_parameters[LOAD_INTERPRO_IN_CHADO] = {"organism": run_workflow_for_current_organism.org_id, - "analysis_id": run_workflow_for_current_organism.interpro_analysis_id} - workflow_parameters[SYNC_INTERPRO_ANALYSIS_INTO_TRIPAL] = {"analysis_id": run_workflow_for_current_organism.interpro_analysis_id} - - - run_workflow_for_current_organism.datamap = {} - run_workflow_for_current_organism.datamap[INTERPRO_FILE] = {"src": "hda", "id": run_workflow_for_current_organism.hda_ids["interproscan_hda_id"]} - - # Run Interproscan workflow - run_workflow_for_current_organism.run_workflow(workflow_path=workflow, - workflow_parameters=workflow_parameters, - datamap=run_workflow_for_current_organism.datamap, - workflow_name="Interproscan") - - elif "Blast" in str(workflow): - - logging.info("Executing workflow 'Blast_Diamond") - - run_workflow_for_current_organism.connect_to_instance() - run_workflow_for_current_organism.set_get_history() - run_workflow_for_current_organism.install_changesets_revisions_from_workflow(workflow_path=workflow) - # run_workflow_for_current_organism.get_species_history_id() - - # Get the attributes of the instance and project data files - run_workflow_for_current_organism.get_instance_attributes() - run_workflow_for_current_organism.add_blastp_diamond_analysis() - run_workflow_for_current_organism.get_blastp_diamond_analysis_id() - - # Import datasets into history and retrieve their hda IDs - run_workflow_for_current_organism.import_datasets_into_history() - hda_ids = run_workflow_for_current_organism.get_datasets_hda_ids() - - BLAST_FILE = "0" - LOAD_BLAST_IN_CHADO = "1" - SYNC_BLAST_ANALYSIS_INTO_TRIPAL = "2" - SYNC_FEATURES_INTO_TRIPAL = "3" - POPULATE_MAT_VIEWS = "4" - INDEX_TRIPAL_DATA = "5" - - workflow_parameters = {} - workflow_parameters[INTERPRO_FILE] = {} - workflow_parameters[LOAD_BLAST_IN_CHADO] = {"organism": run_workflow_for_current_organism.org_id, - "analysis_id": run_workflow_for_current_organism.blast_diamond_analysis_id} - workflow_parameters[SYNC_BLAST_ANALYSIS_INTO_TRIPAL] = {"analysis_id": run_workflow_for_current_organism.blast_diamond_analysis_id} - - run_workflow_for_current_organism.datamap = {} - run_workflow_for_current_organism.datamap[INTERPRO_FILE] = {"src": "hda", "id": hda_ids["interproscan_hda_id"]} - - # Run Interproscan workflow - run_workflow_for_current_organism.run_workflow(workflow_path=workflow, - workflow_parameters=workflow_parameters, - datamap=run_workflow_for_current_organism.datamap, - workflow_name="Interproscan") + # workflow_parameters[GENOME_FASTA_FILE_ORG1] = {} + # workflow_parameters[GFF_FILE_ORG1] = {} + # workflow_parameters[PROTEINS_FASTA_FILE_ORG1] = {} + # workflow_parameters[GENOME_FASTA_FILE_ORG2] = {} + # workflow_parameters[GFF_FILE_ORG2] = {} + # workflow_parameters[PROTEINS_FASTA_FILE_ORG2] = {} - - else: - logging.critical("The galaxy container for %s is not ready yet!" % run_workflow_for_current_organism.full_name) - sys.exit() + + # # ORGANISM 1 + # workflow_parameters[LOAD_FASTA_ORG1] = {"organism": org_ids[0], + # "analysis_id": genome_analysis_ids[0], + # "do_update": "true"} + # # Change "do_update": "true" to "do_update": "false" in above parameters to prevent appending/updates to the fasta file in chado + # # WARNING: It is safer to never update it and just change the genome/ogs versions in the config + # workflow_parameters[JBROWSE_ORG1] = {} + # workflow_parameters[LOAD_GFF_ORG1] = {"organism": org_ids[0], "analysis_id": ogs_analysis_ids[0]} + # workflow_parameters[SYNC_FEATURES_ORG1] = {"organism_id": org_ids[0]} + # workflow_parameters[JBROWSE_CONTAINER] = {} + + + # # ORGANISM 2 + # workflow_parameters[LOAD_FASTA_ORG2] = {"organism": org_ids[1], + # "analysis_id": genome_analysis_ids[1], + # "do_update": "true"} + # # Change "do_update": "true" to "do_update": "false" in above parameters to prevent appending/updates to the fasta file in chado + # # WARNING: It is safer to never update it and just change the genome/ogs versions in the config + # workflow_parameters[LOAD_GFF_ORG2] = {"organism": org_ids[1], "analysis_id": ogs_analysis_ids[1]} + # workflow_parameters[JRBOWSE_ORG2] = {} + # workflow_parameters[SYNC_FEATURES_ORG2] = {"organism_id": org_ids[1]} + + # workflow_parameters[SYNC_GENOME_ANALYSIS_INTO_TRIPAL] = {"analysis_id": ogs_analysis_ids[0]} + # workflow_parameters[SYNC_OGS_ANALYSIS_INTO_TRIPAL] = {"analysis_id": genome_analysis_ids[0]} + # workflow_parameters[SYNC_FEATURES_INTO_TRIPAL] = {"organism_id": org_ids[0]} + + # # POPULATE + INDEX DATA + # workflow_parameters[POPULATE_MAT_VIEWS] = {} + # workflow_parameters[INDEX_TRIPAL_DATA] = {} + + # # Datamap for input datasets - dataset source (type): ldda (LibraryDatasetDatasetAssociation) + # run_workflow_for_current_organism.datamap = {} + + # run_workflow_for_current_organism.datamap[GENOME_FASTA_FILE_ORG1] = {"src": "hda", "id": hda_ids_list[0]["genome_hda_id"]} + # run_workflow_for_current_organism.datamap[GFF_FILE_ORG1] = {"src": "hda", "id": hda_ids_list[0]["gff_hda_id"]} + # run_workflow_for_current_organism.datamap[PROTEINS_FASTA_FILE_ORG1] = {"src": "hda", "id": hda_ids_list[0]["proteins_hda_id"]} + + # run_workflow_for_current_organism.datamap[GENOME_FASTA_FILE_ORG2] = {"src": "hda", "id": hda_ids_list[1]["genome_hda_id"]} + # run_workflow_for_current_organism.datamap[GFF_FILE_ORG2] = {"src": "hda", "id": hda_ids_list[1]["gff_hda_id"]} + # run_workflow_for_current_organism.datamap[GFF_FILE_ORG2] = {"src": "hda", "id": hda_ids_list[1]["proteins_hda_id"]} + + + + # logging.info("OK: Workflow invoked") + + + + # # If input workflow is Chado_load_Tripal_synchronize.ga + # if "Chado_load_Tripal_synchronize" in str(workflow): + + # logging.info("Executing workflow 'Chado_load_Tripal_synchronize'") + + # run_workflow_for_current_organism.connect_to_instance() + # run_workflow_for_current_organism.set_get_history() + # # run_workflow_for_current_organism.get_species_history_id() + + # run_workflow_for_current_organism.install_changesets_revisions_for_individual_tools() + # run_workflow_for_current_organism.install_changesets_revisions_from_workflow(workflow_path=workflow) + # run_workflow_for_current_organism.add_organism_ogs_genome_analyses() + # run_workflow_for_current_organism.get_organism_id() + # run_workflow_for_current_organism.get_genome_analysis_id() + # run_workflow_for_current_organism.get_ogs_analysis_id() + + # # run_workflow_for_current_organism.tripal_synchronize_organism_analyses() + + # # Get the attributes of the instance and project data files + # run_workflow_for_current_organism.get_instance_attributes() + + # # Import datasets into history and retrieve their hda IDs + # # TODO: can be simplified with direct access to the folder contents via the full path (no loop required) + # hda_ids = run_workflow_for_current_organism.import_datasets_into_history() + + # # DEBUG + # # run_workflow_for_current_organism.get_invocation_report(workflow_name="Chado load Tripal synchronize") + + # # Explicit workflow parameter names + # GENOME_FASTA_FILE = "0" + # GFF_FILE = "1" + # PROTEINS_FASTA_FILE = "2" + # TRANSCRIPTS_FASTA_FILE = "3" + + # LOAD_FASTA_IN_CHADO = "4" + # LOAD_GFF_IN_CHADO = "5" + # SYNC_ORGANISM_INTO_TRIPAL = "6" + # SYNC_GENOME_ANALYSIS_INTO_TRIPAL = "7" + # SYNC_OGS_ANALYSIS_INTO_TRIPAL = "8" + # SYNC_FEATURES_INTO_TRIPAL = "9" + + # workflow_parameters = {} + + # workflow_parameters[GENOME_FASTA_FILE] = {} + # workflow_parameters[GFF_FILE] = {} + # workflow_parameters[PROTEINS_FASTA_FILE] = {} + # workflow_parameters[TRANSCRIPTS_FASTA_FILE] = {} + + # workflow_parameters[LOAD_FASTA_IN_CHADO] = {"organism": run_workflow_for_current_organism.org_id, + # "analysis_id": run_workflow_for_current_organism.genome_analysis_id, + # "do_update": "true"} + # # Change "do_update": "true" to "do_update": "false" in above parameters to prevent appending/updates to the fasta file in chado + # # WARNING: It is safer to never update it and just change the genome/ogs versions in the config + # workflow_parameters[LOAD_GFF_IN_CHADO] = {"organism": run_workflow_for_current_organism.org_id, + # "analysis_id": run_workflow_for_current_organism.ogs_analysis_id} + # workflow_parameters[SYNC_ORGANISM_INTO_TRIPAL] = {"organism_id": run_workflow_for_current_organism.org_id} + # workflow_parameters[SYNC_GENOME_ANALYSIS_INTO_TRIPAL] = {"analysis_id": run_workflow_for_current_organism.ogs_analysis_id} + # workflow_parameters[SYNC_OGS_ANALYSIS_INTO_TRIPAL] = {"analysis_id": run_workflow_for_current_organism.genome_analysis_id} + # workflow_parameters[SYNC_FEATURES_INTO_TRIPAL] = {"organism_id": run_workflow_for_current_organism.org_id} + + # # Datamap for input datasets - dataset source (type): ldda (LibraryDatasetDatasetAssociation) + # run_workflow_for_current_organism.datamap = {} + # run_workflow_for_current_organism.datamap[GENOME_FASTA_FILE] = {"src": "hda", "id": hda_ids["genome_hda_id"]} + # run_workflow_for_current_organism.datamap[GFF_FILE] = {"src": "hda", "id": hda_ids["gff_hda_id"]} + # run_workflow_for_current_organism.datamap[PROTEINS_FASTA_FILE] = {"src": "hda", "id": hda_ids["proteins_hda_id"]} + # run_workflow_for_current_organism.datamap[TRANSCRIPTS_FASTA_FILE] = {"src": "hda", "id": hda_ids["transcripts_hda_id"]} + + # # run_workflow_for_current_organism.datamap = {} + # # run_workflow_for_current_organism.datamap[GENOME_FASTA_FILE] = {"src": "hda", "id": + # # run_workflow_for_current_organism.datasets["genome_file"]} + # # run_workflow_for_current_organism.datamap[GFF_FILE] = {"src": "hda", + # # "id": hda_ids["gff_hda_id"]} + + # # Ensures galaxy has had time to retrieve datasets + # time.sleep(60) + # # Run the Chado load Tripal sync workflow with the parameters set above + # run_workflow_for_current_organism.run_workflow(workflow_path=workflow, + # workflow_parameters=workflow_parameters, + # datamap=run_workflow_for_current_organism.datamap, + # workflow_name="Chado load Tripal synchronize") + + # # Jbrowse creation workflow + # elif "Jbrowse" in str(workflow): + + # logging.info("Executing workflow 'Jbrowse'") + + # run_workflow_for_current_organism.connect_to_instance() + # run_workflow_for_current_organism.set_get_history() + # run_workflow_for_current_organism.install_changesets_revisions_from_workflow(workflow_path=workflow) + # run_workflow_for_current_organism.get_organism_id() + # # Import datasets into history and get their hda IDs + # run_workflow_for_current_organism.import_datasets_into_history() + # hda_ids = run_workflow_for_current_organism.get_datasets_hda_ids() # Note: only call this function AFTER calling "import_datasets_into_history()" + + # # Debugging + # # run_workflow_for_current_organism.get_invocation_report(workflow_name="Jbrowse") + + # GENOME_FASTA_FILE = "0" + # GFF_FILE = "1" + # ADD_JBROWSE = "2" + # ADD_ORGANISM_TO_JBROWSE = "3" + + # workflow_parameters = {} + # workflow_parameters[GENOME_FASTA_FILE] = {} + # workflow_parameters[GFF_FILE] = {} + # workflow_parameters[ADD_JBROWSE] = {} + # workflow_parameters[ADD_ORGANISM_TO_JBROWSE] = {} + + # run_workflow_for_current_organism.datamap = {} + # run_workflow_for_current_organism.datamap[GENOME_FASTA_FILE] = {"src": "hda", "id": hda_ids["genome_hda_id"]} + # run_workflow_for_current_organism.datamap[GFF_FILE] = {"src": "hda", "id": hda_ids["gff_hda_id"]} + + # # Run the jbrowse creation workflow + # run_workflow_for_current_organism.run_workflow(workflow_path=workflow, + # workflow_parameters=workflow_parameters, + # datamap=run_workflow_for_current_organism.datamap, + # workflow_name="Jbrowse") + + # elif "Interpro" in str(workflow): + + # logging.info("Executing workflow 'Interproscan") + + # run_workflow_for_current_organism.connect_to_instance() + # run_workflow_for_current_organism.set_get_history() + # run_workflow_for_current_organism.install_changesets_revisions_from_workflow(workflow_path=workflow) + # # run_workflow_for_current_organism.get_species_history_id() + + # # Get the attributes of the instance and project data files + # run_workflow_for_current_organism.get_instance_attributes() + # run_workflow.add_interproscan_analysis() + # run_workflow_for_current_organism.get_interpro_analysis_id() + + # # Import datasets into history and retrieve their hda IDs + # run_workflow_for_current_organism.import_datasets_into_history() + # hda_ids = run_workflow_for_current_organism.get_datasets_hda_ids() + + # INTERPRO_FILE = "0" + # LOAD_INTERPRO_IN_CHADO = "1" + # SYNC_INTERPRO_ANALYSIS_INTO_TRIPAL = "2" + # SYNC_FEATURES_INTO_TRIPAL = "3" + # POPULATE_MAT_VIEWS = "4" + # INDEX_TRIPAL_DATA = "5" + + # workflow_parameters = {} + # workflow_parameters[INTERPRO_FILE] = {} + # workflow_parameters[LOAD_INTERPRO_IN_CHADO] = {"organism": run_workflow_for_current_organism.org_id, + # "analysis_id": run_workflow_for_current_organism.interpro_analysis_id} + # workflow_parameters[SYNC_INTERPRO_ANALYSIS_INTO_TRIPAL] = {"analysis_id": run_workflow_for_current_organism.interpro_analysis_id} + + + # run_workflow_for_current_organism.datamap = {} + # run_workflow_for_current_organism.datamap[INTERPRO_FILE] = {"src": "hda", "id": run_workflow_for_current_organism.hda_ids["interproscan_hda_id"]} + + # # Run Interproscan workflow + # run_workflow_for_current_organism.run_workflow(workflow_path=workflow, + # workflow_parameters=workflow_parameters, + # datamap=run_workflow_for_current_organism.datamap, + # workflow_name="Interproscan") + + # elif "Blast" in str(workflow): + + # logging.info("Executing workflow 'Blast_Diamond") + + # run_workflow_for_current_organism.connect_to_instance() + # run_workflow_for_current_organism.set_get_history() + # run_workflow_for_current_organism.install_changesets_revisions_from_workflow(workflow_path=workflow) + # # run_workflow_for_current_organism.get_species_history_id() + + # # Get the attributes of the instance and project data files + # run_workflow_for_current_organism.get_instance_attributes() + # run_workflow_for_current_organism.add_blastp_diamond_analysis() + # run_workflow_for_current_organism.get_blastp_diamond_analysis_id() + + # # Import datasets into history and retrieve their hda IDs + # run_workflow_for_current_organism.import_datasets_into_history() + # hda_ids = run_workflow_for_current_organism.get_datasets_hda_ids() + + # BLAST_FILE = "0" + # LOAD_BLAST_IN_CHADO = "1" + # SYNC_BLAST_ANALYSIS_INTO_TRIPAL = "2" + # SYNC_FEATURES_INTO_TRIPAL = "3" + # POPULATE_MAT_VIEWS = "4" + # INDEX_TRIPAL_DATA = "5" + + # workflow_parameters = {} + # workflow_parameters[INTERPRO_FILE] = {} + # workflow_parameters[LOAD_BLAST_IN_CHADO] = {"organism": run_workflow_for_current_organism.org_id, + # "analysis_id": run_workflow_for_current_organism.blast_diamond_analysis_id} + # workflow_parameters[SYNC_BLAST_ANALYSIS_INTO_TRIPAL] = {"analysis_id": run_workflow_for_current_organism.blast_diamond_analysis_id} + + # run_workflow_for_current_organism.datamap = {} + # run_workflow_for_current_organism.datamap[INTERPRO_FILE] = {"src": "hda", "id": hda_ids["interproscan_hda_id"]} + + # # Run Interproscan workflow + # run_workflow_for_current_organism.run_workflow(workflow_path=workflow, + # workflow_parameters=workflow_parameters, + # datamap=run_workflow_for_current_organism.datamap, + # workflow_name="Interproscan") diff --git a/speciesData.py b/speciesData.py index caccfda..2dbd952 100755 --- a/speciesData.py +++ b/speciesData.py @@ -59,17 +59,24 @@ class SpeciesData: self.full_name_lowercase = self.full_name.lower() self.abbreviation = "_".join(utilities.filter_empty_not_empty_items([self.genus_lowercase[0], self.species, self.strain, self.sex])["not_empty"]) + # Bioblend/Chado IDs for an organism analyses/organisms/datasets/history/library + self.org_id = None + self.genome_analysis_id = None + self.ogs_analysis_id = None + self.interproscan_analysis_id = None + self.blastp_diamond_analysis_id = None + self.blastx_diamond_analysis_id = None + self.hda_ids = {} self.instance_url = None self.instance = None self.history_id = None self.library = None self.library_id = None + self.script_dir = os.path.dirname(os.path.realpath(sys.argv[0])) self.main_dir = None self.species_dir = None - self.org_id = None - self.genome_analysis_id = None - self.ogs_analysis_id = None + self.tool_panel = None self.datasets = dict() self.datasets_name = dict() -- GitLab