From ca829bee8283713ac1125ecb9bd10efdefa0b354 Mon Sep 17 00:00:00 2001 From: Troubardours <arthur.lebars@gmail.com> Date: Mon, 26 Jul 2021 17:10:36 +0200 Subject: [PATCH] New blast+interproscan branch (merging issues with refactor_blast_interproscan_workflow) --- constants_phaeo.py | 80 +- gga_run_workflow_phaeo_blast_interpro.py | 1607 ++++------------- ...load_blast_interproscan_results_1org_v1.ga | 611 +++++++ ...load_blast_interproscan_results_2org_v1.ga | 1063 +++++++++++ 4 files changed, 2032 insertions(+), 1329 deletions(-) create mode 100644 workflows_phaeoexplorer/Galaxy-Workflow-load_blast_interproscan_results_1org_v1.ga create mode 100644 workflows_phaeoexplorer/Galaxy-Workflow-load_blast_interproscan_results_2org_v1.ga diff --git a/constants_phaeo.py b/constants_phaeo.py index 57d2bd5..74afc10 100644 --- a/constants_phaeo.py +++ b/constants_phaeo.py @@ -8,7 +8,8 @@ import constants WORKFLOWS_PATH = "workflows_phaeoexplorer/" -WF_LOAD_GFF_JB_1_ORG_FILE = "Galaxy-Workflow-chado_load_tripal_synchronize_jbrowse_1org_v7.ga" +# Workflow to load the genome and annotations into chado, create/update organisms in a jbrowse instance +WF_LOAD_GFF_JB_1_ORG_FILE = "Galaxy-Workflow-chado_load_tripal_synchronize_jbrowse_1org_v6.ga" WF_LOAD_GFF_JB_1_ORG_INPUT_GENOME = "0" WF_LOAD_GFF_JB_1_ORG_INPUT_GFF = "1" WF_LOAD_GFF_JB_1_ORG_INPUT_PROTEINS = "2" @@ -16,37 +17,64 @@ WF_LOAD_GFF_JB_1_ORG_STEP_LOAD_FASTA = "3" WF_LOAD_GFF_JB_1_ORG_STEP_JBROWSE = "4" WF_LOAD_GFF_JB_1_ORG_STEP_LOAD_GFF = "5" WF_LOAD_GFF_JB_1_ORG_STEP_JB_TO_CONTAINER = "6" -WF_LOAD_GFF_JB_1_ORG_STEP_SYNC_FEATURE = "7" -WF_LOAD_GFF_JB_1_ORG_STEP_SYNC_ORG = "8" -WF_LOAD_GFF_JB_1_ORG_STEP_SYNC_GENOME_ANALYSIS = "9" -WF_LOAD_GFF_JB_1_ORG_STEP_SYNC_OGS_ANALYSIS = "10" -WF_LOAD_GFF_JB_1_ORG_STEP_POPULATE_VIEWS = "11" -WF_LOAD_GFF_JB_1_ORG_STEP_INDEX = "12" +WF_LOAD_GFF_JB_1_ORG_STEP_FEATURE_SYNC = "7" +WF_LOAD_GFF_JB_1_ORG_STEP_POPULATE_VIEWS = "8" +WF_LOAD_GFF_JB_1_ORG_STEP_INDEX = "9" -WF_LOAD_GFF_JB_2_ORG_FILE = "Galaxy-Workflow-chado_load_tripal_synchronize_jbrowse_2org_v7.ga" -WF_LOAD_GFF_JB_2_ORG_INPUT_GENOME_ORG1 = "4" -WF_LOAD_GFF_JB_2_ORG_INPUT_GFF_ORG1 = "2" -WF_LOAD_GFF_JB_2_ORG_INPUT_PROTEINS_ORG1 = "0" -WF_LOAD_GFF_JB_2_ORG_INPUT_GENOME_ORG2 = "1" -WF_LOAD_GFF_JB_2_ORG_INPUT_GFF_ORG2 = "3" +WF_LOAD_GFF_JB_2_ORG_FILE = "Galaxy-Workflow-chado_load_tripal_synchronize_jbrowse_2org_v6.ga" +WF_LOAD_GFF_JB_2_ORG_INPUT_GENOME_ORG1 = "0" +WF_LOAD_GFF_JB_2_ORG_INPUT_GFF_ORG1 = "1" +WF_LOAD_GFF_JB_2_ORG_INPUT_PROTEINS_ORG1 = "2" +WF_LOAD_GFF_JB_2_ORG_INPUT_GENOME_ORG2 = "3" +WF_LOAD_GFF_JB_2_ORG_INPUT_GFF_ORG2 = "4" WF_LOAD_GFF_JB_2_ORG_INPUT_PROTEINS_ORG2 = "5" -WF_LOAD_GFF_JB_2_ORG_STEP_LOAD_FASTA_ORG1 = "7" -WF_LOAD_GFF_JB_2_ORG_STEP_JBROWSE_ORG1 = "8" -WF_LOAD_GFF_JB_2_ORG_STEP_JBROWSE_ORG2 = "6" +WF_LOAD_GFF_JB_2_ORG_STEP_LOAD_FASTA_ORG1 = "6" +WF_LOAD_GFF_JB_2_ORG_STEP_JBROWSE_ORG1 = "7" +WF_LOAD_GFF_JB_2_ORG_STEP_JRBOWSE_ORG2 = "8" WF_LOAD_GFF_JB_2_ORG_STEP_LOAD_GFF_ORG1 = "9" WF_LOAD_GFF_JB_2_ORG_STEP_JB_TO_CONTAINER = "10" -WF_LOAD_GFF_JB_2_ORG_STEP_SYNC_FEATURE_ORG1 = "11" +WF_LOAD_GFF_JB_2_ORG_STEP_FEATURE_SYNC_ORG1 = "11" WF_LOAD_GFF_JB_2_ORG_STEP_LOAD_FASTA_ORG2 = "12" WF_LOAD_GFF_JB_2_ORG_STEP_LOAD_GFF_ORG2 = "13" -WF_LOAD_GFF_JB_2_ORG_STEP_SYNC_FEATURE_ORG2 = "14" -WF_LOAD_GFF_JB_2_ORG_STEP_SYNC_ORG1 = "15" -WF_LOAD_GFF_JB_2_ORG_STEP_SYNC_ORG2 = "16" -WF_LOAD_GFF_JB_2_ORG_STEP_SYNC_GENOME_ANALYSIS_ORG1 = "17" -WF_LOAD_GFF_JB_2_ORG_STEP_SYNC_OGS_ANALYSIS_ORG1 = "18" -WF_LOAD_GFF_JB_2_ORG_STEP_SYNC_GENOME_ANALYSIS_ORG2 = "19" -WF_LOAD_GFF_JB_2_ORG_STEP_SYNC_OGS_ANALYSIS_ORG2 = "20" -WF_LOAD_GFF_JB_2_ORG_STEP_POPULATE_VIEWS = "21" -WF_LOAD_GFF_JB_2_ORG_STEP_INDEX = "22" +WF_LOAD_GFF_JB_2_ORG_STEP_FEATURE_SYNC_ORG2 = "14" +WF_LOAD_GFF_JB_2_ORG_STEP_POPULATE_VIEWS = "15" +WF_LOAD_GFF_JB_2_ORG_STEP_INDEX = "16" + +# Workflow to load blastp, blastx and interproscan into chado and tripal +WF_BLAST_INTERPRO_1_ORG_FILE = "Galaxy-Workflow-load_blast_interproscan_results_1org_v1.ga" +WF_BLAST_INTERPRO_1_ORG_INPUT_BLASTP = "0" +WF_BLAST_INTERPRO_1_ORG_INPUT_BLASTX = "1" +WF_BLAST_INTERPRO_1_ORG_INPUT_INTERPRO = "2" +WF_BLAST_INTERPRO_1_ORG_LOAD_BLASTP = "3" +WF_BLAST_INTERPRO_1_ORG_LOAD_BLASTX = "4" +WF_BLAST_INTERPRO_1_ORG_LOAD_INTERPRO = "5" +WF_BLAST_INTERPRO_1_ORG_POPULATE_MAT_VIEWS = "6" +WF_BLAST_INTERPRO_1_ORG_INDEX_TRIPAL_DATA = "7" +WF_BLAST_INTERPRO_1_ORG_SYNC_BLASTP_ANALYSIS = "8" +WF_BLAST_INTERPRO_1_ORG_SYNC_BLASTX_ANALYSIS = "9" +WF_BLAST_INTERPRO_1_ORG_SYNC_INTERPRO_ANALYSIS = "10" + +WF_BLAST_INTERPRO_2_ORG_FILE = "Galaxy-Workflow-load_blast_interproscan_results_2org_v1.ga" +WF_BLAST_INTERPRO_2_ORG_INPUT_BLASTP_ORG1 = "0" +WF_BLAST_INTERPRO_2_ORG_INPUT_BLASTP_ORG2 = "1" +WF_BLAST_INTERPRO_2_ORG_INPUT_BLASTX_ORG1 = "2" +WF_BLAST_INTERPRO_2_ORG_INPUT_BLASTX_ORG2 = "3" +WF_BLAST_INTERPRO_2_ORG_INPUT_INTERPRO_ORG1 = "4" +WF_BLAST_INTERPRO_2_ORG_INPUT_INTERPRO_ORG2 = "5" +WF_BLAST_INTERPRO_2_ORG_LOAD_BLASTP_ORG1 = "6" +WF_BLAST_INTERPRO_2_ORG_LOAD_BLASTP_ORG2 = "7" +WF_BLAST_INTERPRO_2_ORG_LOAD_BLASTX_ORG1 = "8" +WF_BLAST_INTERPRO_2_ORG_LOAD_BLASTX_ORG2 = "9" +WF_BLAST_INTERPRO_2_ORG_LOAD_INTERPRO_ORG1 = "10" +WF_BLAST_INTERPRO_2_ORG_LOAD_INTERPRO_ORG2 = "11" +WF_BLAST_INTERPRO_2_ORG_POPULATE_MAT_VIEWS = "12" +WF_BLAST_INTERPRO_2_ORG_INDEX_TRIPAL_DATA = "13" +WF_BLAST_INTERPRO_2_ORG_SYNC_BLASTP_ANALYSIS_ORG1 = "14" +WF_BLAST_INTERPRO_2_ORG_SYNC_BLASTX_ANALYSIS_ORG1 = "15" +WF_BLAST_INTERPRO_2_ORG_SYNC_INTERPRO_ANALYSIS_ORG1 = "16" +WF_BLAST_INTERPRO_2_ORG_SYNC_BLASTP_ANALYSIS_ORG2 = "17" +WF_BLAST_INTERPRO_2_ORG_SYNC_BLASTX_ANALYSIS_ORG2 = "18" +WF_BLAST_INTERPRO_2_ORG_SYNC_INTERPRO_ANALYSIS_ORG2 = "19" ### Galaxy tools diff --git a/gga_run_workflow_phaeo_blast_interpro.py b/gga_run_workflow_phaeo_blast_interpro.py index 85a565b..30579fc 100644 --- a/gga_run_workflow_phaeo_blast_interpro.py +++ b/gga_run_workflow_phaeo_blast_interpro.py @@ -7,74 +7,42 @@ import os import logging import sys import json -import time - -from bioblend import galaxy +from bioblend.galaxy.objects import GalaxyInstance import utilities import utilities_bioblend -import species_data import constants import constants_phaeo import gga_run_workflow_phaeo -class OrgWorkflowParamJbrowse(gga_run_workflow_phaeo.OrgWorkflowParam): - - def __init__(self, genus_species, strain_sex, genus_uppercase, chado_species_name, full_name, species_folder_name, - org_id, history_id, instance, genome_analysis_id=None, ogs_analysis_id=None, blastp_analysis_id=None, interpro_analysis_id=None, - genome_hda_id=None, gff_hda_id=None, transcripts_hda_id=None, proteins_hda_id=None, blastp_hda_id=None, blastx_hda_id=None, interproscan_hda_id=None): - self.genus_species = genus_species - self.strain_sex = strain_sex - self.genus_uppercase = genus_uppercase - self.chado_species_name = chado_species_name, - self.full_name = full_name - self.species_folder_name = species_folder_name - self.org_id = org_id - self.genome_analysis_id = genome_analysis_id - self.ogs_analysis_id = ogs_analysis_id +class OrgWorkflowParamBlastInterproscan(gga_run_workflow_phaeo.OrgWorkflowParam): + + def __init__(self, genus_uppercase, chado_species_name, full_name, species_folder_name, org_id, history_id, + instance, blastp_analysis_id=None, blastx_analysis_id=None, interpro_analysis_id=None, + blastp_hda_id=None, blastx_hda_id=None, interproscan_hda_id=None): self.blastp_analysis_id = blastp_analysis_id + self.blastx_analysis_id = blastx_analysis_id self.interpro_analysis_id = interpro_analysis_id - self.history_id = history_id - self.instance = instance - self.genome_hda_id = genome_hda_id - self.gff_hda_id = gff_hda_id - self.transcripts_hda_id = transcripts_hda_id - self.proteins_hda_id = proteins_hda_id self.blastp_hda_id = blastp_hda_id self.blastx_hda_id = blastx_hda_id self.interproscan_hda_id = interproscan_hda_id - super().__init__(genus_species, strain_sex, genus_uppercase, chado_species_name, full_name, species_folder_name, - org_id, history_id, instance) - - def check_param_for_workflow_load_fasta_gff_jbrowse(self): - params = [self.genus_species, self.strain_sex, self.genus_uppercase, - self.chado_species_name, self.full_name, - self.species_folder_name, self.org_id, - self.history_id, self.instance, - self.genome_analysis_id, self.ogs_analysis_id, - self.genome_hda_id, self.gff_hda_id, self.transcripts_hda_id, self.proteins_hda_id] - utilities_bioblend.check_wf_param(self.full_name, params) - - def check_param_for_workflow_blastp(self): - params = [self.genus_species, self.strain_sex, self.genus_uppercase, - self.chado_species_name, self.full_name, - self.species_folder_name, self.org_id, - self.history_id, self.instance, + super().__init__(genus_uppercase, chado_species_name, full_name, species_folder_name, + org_id, history_id, instance) + + def check_param(self): + params = [self.genus_uppercase, + self.chado_species_name, + self.full_name, + self.species_folder_name, + self.org_id, + self.history_id, + self.instance, self.blastp_analysis_id, self.blastp_hda_id] utilities_bioblend.check_wf_param(self.full_name, params) - def check_param_for_workflow_interpro(self): - params = [self.genus_species, self.strain_sex, self.genus_uppercase, - self.chado_species_name, self.full_name, - self.species_folder_name, self.org_id, - self.history_id, self.instance, - self.interpro_analysis_id, - self.interproscan_hda_id] - utilities_bioblend.check_wf_param(self.full_name, params) - -class RunWorkflow(species_data.SpeciesData): +class RunWorkflowBlastInterpro(gga_run_workflow_phaeo.RunWorkflow): """ Run a workflow into the galaxy instance's history of a given species @@ -89,29 +57,30 @@ class RunWorkflow(species_data.SpeciesData): super().__init__(parameters_dictionary) - self.chado_species_name = " ".join(utilities.filter_empty_not_empty_items( - [self.species, self.strain, self.sex])["not_empty"]) + self.chado_species_name = " ".join(utilities.filter_empty_not_empty_items([self.species, + self.strain, + self.sex])["not_empty"]) self.abbreviation = self.genus_uppercase[0] + ". " + self.chado_species_name self.common = self.name - if not self.common_name is None and self.common_name != "": + if self.common_name is not None and self.common_name != "": self.common = self.common_name self.history_name = str(self.genus_species) - self.genome_analysis_name = "genome v{0} of {1}".format(self.genome_version, self.full_name) - self.genome_analysis_programversion = "genome v{0}".format(self.genome_version) - self.genome_analysis_sourcename = self.full_name + self.blastp_analysis_name = "Diamond blastp on OGS{0} of {1}".format(self.ogs_version, self.full_name) + self.blastp_analysis_programversion = "Diamond blastp on OGS{0}".format(self.ogs_version) + self.blastp_analysis_sourcename = self.full_name + + self.blastx_analysis_name = "Diamond blastx on OGS{0} of {1}".format(self.ogs_version, self.full_name) + self.blastx_analysis_programversion = "Diamond blastx on OGS{0}".format(self.ogs_version) + self.blastx_analysis_sourcename = self.full_name - self.ogs_analysis_name = "OGS{0} of {1}".format(self.ogs_version, self.full_name) - self.ogs_analysis_programversion = "OGS{0}".format(self.ogs_version) - self.ogs_analysis_sourcename = self.full_name + self.interpro_analysis_name = "InterProScan on OGS{0} of {1}".format(self.ogs_version, self.full_name) + self.interpro_analysis_programversion = "InterProScan on OGS{0}".format(self.ogs_version) + self.interpro_analysis_sourcename = self.full_name - self.genome_hda_id = None - self.gff_hda_id = None - self.transcripts_hda_id = None - self.proteins_hda_id = None self.blastp_hda_id = None self.blastx_hda_id = None self.interproscan_hda_id = None @@ -128,8 +97,9 @@ class RunWorkflow(species_data.SpeciesData): logging.info("Validating installed individual tools versions and changesets") # Verify that the add_organism and add_analysis versions are correct in the instance - # changeset for 2.3.4+galaxy0 has to be manually found because there is no way to get the wanted changeset of a non installed tool via bioblend - # except for workflows (.ga) that already contain the changeset revisions inside the steps ids + # changeset for 2.3.4+galaxy0 has to be manually found because there is no way to get + # the wanted changeset of a non installed tool via bioblend except for workflows (.ga) + # that already contain the changeset revisions inside the steps ids utilities_bioblend.install_repository_revision(tool_id=constants_phaeo.GET_ORGANISMS_TOOL_ID, version=constants_phaeo.GET_ORGANISMS_TOOL_VERSION, @@ -141,11 +111,6 @@ class RunWorkflow(species_data.SpeciesData): changeset_revision=constants_phaeo.GET_ANALYSES_TOOL_CHANGESET_REVISION, instance=self.instance) - utilities_bioblend.install_repository_revision(tool_id=constants_phaeo.ADD_ORGANISM_TOOL_ID, - version=constants_phaeo.ADD_ORGANISM_TOOL_VERSION, - changeset_revision=constants_phaeo.ADD_ORGANISM_TOOL_CHANGESET_REVISION, - instance=self.instance) - utilities_bioblend.install_repository_revision(tool_id=constants_phaeo.ADD_ANALYSIS_TOOL_ID, version=constants_phaeo.ADD_ANALYSIS_TOOL_VERSION, changeset_revision=constants_phaeo.ADD_ANALYSIS_TOOL_CHANGESET_REVISION, @@ -179,16 +144,54 @@ class RunWorkflow(species_data.SpeciesData): return analysis_id - def sync_analysis(self, analysis_id): + def install_individual_tools(self): + """ + This function is used to verify that installed tools called outside workflows have the correct versions and changesets + If it finds versions don't match, will install the correct version + changeset in the instance + Doesn't do anything if versions match - time.sleep(60) - utilities_bioblend.run_tool( - instance=self.instance, - tool_id=constants_phaeo.ANALYSIS_SYNC_TOOL_ID, - history_id=self.history_id, - tool_inputs={"analysis_id": analysis_id}) + :return: + """ + + logging.info("Validating installed individual tools versions and changesets") + + # Verify that the add_organism and add_analysis versions are correct in the instance + # changeset for 2.3.4+galaxy0 has to be manually found because there is no way to get the wanted changeset of a non installed tool via bioblend + # except for workflows (.ga) that already contain the changeset revisions inside the steps ids + + utilities_bioblend.install_repository_revision(tool_id=constants_phaeo.GET_ORGANISMS_TOOL_ID, + version=constants_phaeo.GET_ORGANISMS_TOOL_VERSION, + changeset_revision=constants_phaeo.GET_ORGANISMS_TOOL_CHANGESET_REVISION, + instance=self.instance) + + utilities_bioblend.install_repository_revision(tool_id=constants_phaeo.GET_ANALYSES_TOOL_ID, + version=constants_phaeo.GET_ANALYSES_TOOL_VERSION, + changeset_revision=constants_phaeo.GET_ANALYSES_TOOL_CHANGESET_REVISION, + instance=self.instance) + + utilities_bioblend.install_repository_revision(tool_id=constants_phaeo.ADD_ORGANISM_TOOL_ID, + version=constants_phaeo.ADD_ORGANISM_TOOL_VERSION, + changeset_revision=constants_phaeo.ADD_ORGANISM_TOOL_CHANGESET_REVISION, + instance=self.instance) + + utilities_bioblend.install_repository_revision(tool_id=constants_phaeo.ADD_ANALYSIS_TOOL_ID, + version=constants_phaeo.ADD_ANALYSIS_TOOL_VERSION, + changeset_revision=constants_phaeo.ADD_ANALYSIS_TOOL_CHANGESET_REVISION, + instance=self.instance) + + utilities_bioblend.install_repository_revision(tool_id=constants_phaeo.ANALYSIS_SYNC_TOOL_ID, + version=constants_phaeo.ANALYSIS_SYNC_TOOL_VERSION, + changeset_revision=constants_phaeo.ANALYSIS_SYNC_TOOL_CHANGESET_REVISION, + instance=self.instance) + + utilities_bioblend.install_repository_revision(tool_id=constants_phaeo.ORGANISM_SYNC_TOOL_ID, + version=constants_phaeo.ORGANISM_SYNC_TOOL_VERSION, + changeset_revision=constants_phaeo.ORGANISM_SYNC_TOOL_CHANGESET_REVISION, + instance=self.instance) - def add_organism_and_sync(self): + logging.info("Success: individual tools versions and changesets validated") + + def get_organism(self): get_organisms_tool_dataset = utilities_bioblend.run_tool_and_download_single_output_dataset( instance=self.instance, @@ -207,25 +210,8 @@ class RunWorkflow(species_data.SpeciesData): org_id = str(org_dict["organism_id"]) # id needs to be a str to be recognized by chado tools if org_id is None: - add_organism_tool_dataset = utilities_bioblend.run_tool_and_download_single_output_dataset( - instance=self.instance, - tool_id=constants_phaeo.ADD_ORGANISM_TOOL_ID, - history_id=self.history_id, - tool_inputs={"abbr": self.abbreviation, - "genus": self.genus_uppercase, - "species": self.chado_species_name, - "common": self.common}) - organism_dict = json.loads(add_organism_tool_dataset) - org_id = str(organism_dict["organism_id"]) # id needs to be a str to be recognized by chado tools - - # Synchronize newly added organism in Tripal - logging.info("Synchronizing organism %s in Tripal" % self.full_name) - time.sleep(60) - utilities_bioblend.run_tool( - instance=self.instance, - tool_id=constants_phaeo.ORGANISM_SYNC_TOOL_ID, - history_id=self.history_id, - tool_inputs={"organism_id": org_id}) + logging.critical("Cannot find required organism in Chado database for %s" % self.full_name) + sys.exit() return org_id @@ -262,283 +248,34 @@ class RunWorkflow(species_data.SpeciesData): sourcename=analysis_sourcename ) - # Synchronize analysis in Tripal - logging.info("Synchronizing analysis %s in Tripal" % analysis_name) - self.sync_analysis(analysis_id=analysis_id) - return analysis_id - def add_organism_blastp_analysis(self): - """ - Add OGS and genome vX analyses to Chado database - Required for Chado Load Tripal Synchronize workflow (which should be ran as the first workflow) - Called outside workflow for practical reasons (Chado add doesn't have an input link for analysis or organism) - - :return: - - """ - - self.set_galaxy_instance() - self.set_history() - - tool_version = "2.3.4+galaxy0" - - get_organism_tool = self.instance.tools.show_tool("toolshed.g2.bx.psu.edu/repos/gga/chado_organism_get_organisms/organism_get_organisms/2.3.4+galaxy0") - - get_organisms = self.instance.tools.run_tool( - tool_id="toolshed.g2.bx.psu.edu/repos/gga/chado_organism_get_organisms/organism_get_organisms/%s" % tool_version, - history_id=self.history_id, - tool_inputs={}) - - time.sleep(10) # Ensure the tool has had time to complete - org_outputs = get_organisms["outputs"] # Outputs from the get_organism tool - org_job_out_id = org_outputs[0]["id"] # ID of the get_organism output dataset (list of dicts) - org_json_output = self.instance.datasets.download_dataset(dataset_id=org_job_out_id) # Download the dataset - org_output = json.loads(org_json_output) # Turn the dataset into a list for parsing - - org_id = None - - # Look up list of outputs (dictionaries) - for organism_output_dict in org_output: - if organism_output_dict["genus"] == self.genus and organism_output_dict["species"] == "{0} {1}".format(self.species, self.sex): - correct_organism_id = str(organism_output_dict["organism_id"]) # id needs to be a str to be recognized by chado tools - org_id = str(correct_organism_id) - - - if org_id is None: - add_org_job = self.instance.tools.run_tool( - tool_id="toolshed.g2.bx.psu.edu/repos/gga/chado_organism_add_organism/organism_add_organism/%s" % tool_version, - history_id=self.history_id, - tool_inputs={"abbr": self.abbreviation, - "genus": self.genus_uppercase, - "species": self.chado_species_name, - "common": self.common}) - org_job_out_id = add_org_job["outputs"][0]["id"] - org_json_output = self.instance.datasets.download_dataset(dataset_id=org_job_out_id) - org_output = json.loads(org_json_output) - org_id = str(org_output["organism_id"]) # id needs to be a str to be recognized by chado tools - - # Synchronize newly added organism in Tripal - logging.info("Synchronizing organism %s in Tripal" % self.full_name) - time.sleep(60) - org_sync = self.instance.tools.run_tool(tool_id="toolshed.g2.bx.psu.edu/repos/gga/tripal_organism_sync/organism_sync/3.2.1.0", - history_id=self.history_id, - tool_inputs={"organism_id": org_id}) - - - get_analyses = self.instance.tools.run_tool( - tool_id="toolshed.g2.bx.psu.edu/repos/gga/chado_analysis_get_analyses/analysis_get_analyses/%s" % tool_version, - history_id=self.history_id, - tool_inputs={}) - - time.sleep(10) - analysis_outputs = get_analyses["outputs"] - analysis_job_out_id = analysis_outputs[0]["id"] - analysis_json_output = self.instance.datasets.download_dataset(dataset_id=analysis_job_out_id) - analysis_output = json.loads(analysis_json_output) - - blastp_analysis_id = None - - # Look up list of outputs (dictionaries) - for analysis_output_dict in analysis_output: - if analysis_output_dict["name"] == "Diamond on " + self.full_name_lowercase + " OGS" + self.ogs_version: - blastp_analysis_id = str(analysis_output_dict["analysis_id"]) - - - if blastp_analysis_id is None: - add_blast_analysis_job = self.instance.tools.run_tool( - tool_id="toolshed.g2.bx.psu.edu/repos/gga/chado_analysis_add_analysis/analysis_add_analysis/%s" % tool_version, - history_id=self.history_id, - tool_inputs={"name": "Diamond on " + self.full_name_lowercase + " OGS" + self.ogs_version, - "program": "Performed by Genoscope", - "programversion": str(self.sex + " OGS" + self.ogs_version), - "sourcename": "Genoscope", - "date_executed": self.date}) - analysis_outputs = add_blast_analysis_job["outputs"] - analysis_job_out_id = analysis_outputs[0]["id"] - analysis_json_output = self.instance.datasets.download_dataset(dataset_id=analysis_job_out_id) - analysis_output = json.loads(analysis_json_output) - blastp_analysis_id = str(analysis_output["analysis_id"]) - - # Synchronize blastp analysis - logging.info("Synchronizing Diamong blastp OGS%s analysis in Tripal" % self.ogs_version) - time.sleep(60) - blastp_analysis_sync = self.instance.tools.run_tool(tool_id="toolshed.g2.bx.psu.edu/repos/gga/tripal_analysis_sync/analysis_sync/3.2.1.0", - history_id=self.history_id, - tool_inputs={"analysis_id": blastp_analysis_id}) - - # print({"org_id": org_id, "genome_analysis_id": genome_analysis_id, "ogs_analysis_id": ogs_analysis_id}) - return {"org_id": org_id, "blastp_analysis_id": blastp_analysis_id} - - def add_organism_interproscan_analysis(self): - """ - Add OGS and genome vX analyses to Chado database - Required for Chado Load Tripal Synchronize workflow (which should be ran as the first workflow) - Called outside workflow for practical reasons (Chado add doesn't have an input link for analysis or organism) - - :return: - - """ - - self.set_galaxy_instance() - self.set_history() - - tool_version = "2.3.4+galaxy0" - - get_organism_tool = self.instance.tools.show_tool("toolshed.g2.bx.psu.edu/repos/gga/chado_organism_get_organisms/organism_get_organisms/2.3.4+galaxy0") - - get_organisms = self.instance.tools.run_tool( - tool_id="toolshed.g2.bx.psu.edu/repos/gga/chado_organism_get_organisms/organism_get_organisms/%s" % tool_version, - history_id=self.history_id, - tool_inputs={}) - - time.sleep(10) # Ensure the tool has had time to complete - org_outputs = get_organisms["outputs"] # Outputs from the get_organism tool - org_job_out_id = org_outputs[0]["id"] # ID of the get_organism output dataset (list of dicts) - org_json_output = self.instance.datasets.download_dataset(dataset_id=org_job_out_id) # Download the dataset - org_output = json.loads(org_json_output) # Turn the dataset into a list for parsing - - org_id = None - - # Look up list of outputs (dictionaries) - for organism_output_dict in org_output: - if organism_output_dict["genus"] == self.genus and organism_output_dict["species"] == "{0} {1}".format(self.species, self.sex): - correct_organism_id = str(organism_output_dict["organism_id"]) # id needs to be a str to be recognized by chado tools - org_id = str(correct_organism_id) - - - if org_id is None: - add_org_job = self.instance.tools.run_tool( - tool_id="toolshed.g2.bx.psu.edu/repos/gga/chado_organism_add_organism/organism_add_organism/%s" % tool_version, - history_id=self.history_id, - tool_inputs={"abbr": self.abbreviation, - "genus": self.genus_uppercase, - "species": self.chado_species_name, - "common": self.common}) - org_job_out_id = add_org_job["outputs"][0]["id"] - org_json_output = self.instance.datasets.download_dataset(dataset_id=org_job_out_id) - org_output = json.loads(org_json_output) - org_id = str(org_output["organism_id"]) # id needs to be a str to be recognized by chado tools - - # Synchronize newly added organism in Tripal - logging.info("Synchronizing organism %s in Tripal" % self.full_name) - time.sleep(60) - org_sync = self.instance.tools.run_tool(tool_id="toolshed.g2.bx.psu.edu/repos/gga/tripal_organism_sync/organism_sync/3.2.1.0", - history_id=self.history_id, - tool_inputs={"organism_id": org_id}) - - - get_analyses = self.instance.tools.run_tool( - tool_id="toolshed.g2.bx.psu.edu/repos/gga/chado_analysis_get_analyses/analysis_get_analyses/%s" % tool_version, - history_id=self.history_id, - tool_inputs={}) - - time.sleep(10) - analysis_outputs = get_analyses["outputs"] - analysis_job_out_id = analysis_outputs[0]["id"] - analysis_json_output = self.instance.datasets.download_dataset(dataset_id=analysis_job_out_id) - analysis_output = json.loads(analysis_json_output) - - interpro_analysis_id = None - - # Look up list of outputs (dictionaries) - for analysis_output_dict in analysis_output: - if analysis_output_dict["name"] == "Interproscan on " + self.full_name_lowercase + " OGS" + self.ogs_version: - interpro_analysis_id = str(analysis_output_dict["analysis_id"]) - - - if interpro_analysis_id is None: - add_interproscan_analysis_job = self.instance.tools.run_tool( - tool_id="toolshed.g2.bx.psu.edu/repos/gga/chado_analysis_add_analysis/analysis_add_analysis/%s" % tool_version, - history_id=self.history_id, - tool_inputs={"name": "Interproscan on " + self.full_name_lowercase + " OGS" + self.ogs_version, - "program": "Performed by Genoscope", - "programversion": str(self.sex + " OGS" + self.ogs_version), - "sourcename": "Genoscope", - "date_executed": self.date}) - analysis_outputs = add_interproscan_analysis_job["outputs"] - analysis_job_out_id = analysis_outputs[0]["id"] - analysis_json_output = self.instance.datasets.download_dataset(dataset_id=analysis_job_out_id) - analysis_output = json.loads(analysis_json_output) - interpro_analysis_id = str(analysis_output["analysis_id"]) - - # Synchronize blastp analysis - logging.info("Synchronizing Diamong blastp OGS%s analysis in Tripal" % self.ogs_version) - time.sleep(60) - interproscan_analysis_sync = self.instance.tools.run_tool(tool_id="toolshed.g2.bx.psu.edu/repos/gga/tripal_analysis_sync/analysis_sync/3.2.1.0", - history_id=self.history_id, - tool_inputs={"analysis_id": interpro_analysis_id}) - - # print({"org_id": org_id, "genome_analysis_id": genome_analysis_id, "ogs_analysis_id": ogs_analysis_id}) - return({"org_id": org_id, "interpro_analysis_id": interpro_analysis_id}) - - - def get_interpro_analysis_id(self): - """ - """ - - # Get interpro ID - interpro_analysis = self.instance.tools.run_tool( - tool_id="toolshed.g2.bx.psu.edu/repos/gga/chado_analysis_get_analyses/analysis_get_analyses/2.3.4+galaxy0", - history_id=self.history_id, - tool_inputs={"name": "InterproScan on OGS%s" % self.ogs_version}) - interpro_analysis_job_out = interpro_analysis["outputs"][0]["id"] - interpro_analysis_json_output = self.instance.datasets.download_dataset(dataset_id=interpro_analysis_job_out) - try: - interpro_analysis_output = json.loads(interpro_analysis_json_output)[0] - self.interpro_analysis_id = str(interpro_analysis_output["analysis_id"]) - except IndexError as exc: - logging.critical("No matching InterproScan analysis exists in the instance's chado database") - sys.exit(exc) - - return self.interpro_analysis_id - - - def get_invocation_report(self, workflow_name): - """ - Debugging method for workflows - - Simply logs and returns a report of the previous workflow invocation (execution of a workflow in - the instance via the API) - - :param workflow_name: - :return: - """ - - workflow_attributes = self.instance.workflows.get_workflows(name=workflow_name) - workflow_id = workflow_attributes[1]["id"] # Most recently imported workflow (index 1 in the list) - invocations = self.instance.workflows.get_invocations(workflow_id=workflow_id) - invocation_id = invocations[1]["id"] # Most recent invocation - invocation_report = self.instance.invocations.get_invocation_report(invocation_id=invocation_id) - - logging.debug(invocation_report) - - return invocation_report - - def import_datasets_into_history(self): + def import_datasets_into_history(self, config): """ Find datasets in a library, get their ID and import them into the current history if they are not already :return: """ - genome_ldda_id = None - transcripts_ldda_id = None - proteins_ldda_id = None - gff_ldda_id = None interpro_ldda_id = None blastp_ldda_id = None blastx_ldda_id = None - genome_hda_id = None - gff_hda_id = None - transcripts_hda_id = None - proteins_hda_id = None blastp_hda_id = None blastx_hda_id = None interproscan_hda_id = None - folder_dict_list = self.instance.libraries.get_folders(library_id=str(self.library_id)) + gio = GalaxyInstance(url=self.instance_url, + email=config[constants.CONF_GALAXY_DEFAULT_ADMIN_EMAIL], + password=config[constants.CONF_GALAXY_DEFAULT_ADMIN_PASSWORD] + ) + prj_lib = gio.libraries.get_previews(constants.GALAXY_LIBRARY_NAME) + if len(prj_lib) == 1: + library_id = prj_lib[0].id + else: + logging.critical("Multiple (or no one) libraries '%s' exist" % constants.GALAXY_LIBRARY_NAME) + sys.exit() + folder_dict_list = self.instance.libraries.get_folders(library_id=str(library_id)) folders_id_dict = {} @@ -548,14 +285,6 @@ class RunWorkflow(species_data.SpeciesData): # Iterating over the folders to find datasets and map datasets to their IDs for folder_name, folder_id in folders_id_dict.items(): - if folder_name == "/genome/{0}/v{1}".format(self.species_folder_name, self.genome_version): - sub_folder_content = self.instance.folders.show_folder(folder_id=folder_id, contents=True) - for value in sub_folder_content.values(): - for e in value: - if type(e) == dict: - if e["name"].endswith(self.genome_filename): - genome_ldda_id = e["ldda_id"] - if folder_name == "/annotation/{0}/OGS{1}".format(self.species_folder_name, self.ogs_version): sub_folder_content = self.instance.folders.show_folder(folder_id=folder_id, contents=True) for value in sub_folder_content.values(): @@ -563,13 +292,7 @@ class RunWorkflow(species_data.SpeciesData): if type(e) == dict: ldda_name = e["name"] ldda_id = e["ldda_id"] - if ldda_name.endswith(self.transcripts_filename): - transcripts_ldda_id = ldda_id - elif ldda_name.endswith(self.proteins_filename): - proteins_ldda_id = ldda_id - elif ldda_name.endswith(self.gff_filename): - gff_ldda_id = ldda_id - elif ldda_name.endswith(self.interpro_filename): + if ldda_name.endswith(self.interpro_filename): interpro_ldda_id = ldda_id elif ldda_name.endswith(self.blastp_filename): blastp_ldda_id = ldda_id @@ -583,7 +306,7 @@ class RunWorkflow(species_data.SpeciesData): hda_id = hda["id"] if hda_name == self.genome_filename: genome_hda_id = hda_id - if hda_name == self.gff_filename: + if hda_name == self.gff_filename: gff_hda_id = hda_id if hda_name == self.transcripts_filename: transcripts_hda_id = hda_id @@ -599,220 +322,117 @@ class RunWorkflow(species_data.SpeciesData): # Import each dataset into history if it is not imported logging.debug("Uploading datasets into history %s" % self.history_id) - if genome_hda_id is None: - genome_dataset_upload = self.instance.histories.upload_dataset_from_library(history_id=self.history_id, lib_dataset_id=genome_ldda_id) - genome_hda_id = genome_dataset_upload["id"] - if gff_hda_id is None: - gff_dataset_upload = self.instance.histories.upload_dataset_from_library(history_id=self.history_id, lib_dataset_id=gff_ldda_id) - gff_hda_id = gff_dataset_upload["id"] - if proteins_hda_id is None: - proteins_dataset_upload = self.instance.histories.upload_dataset_from_library(history_id=self.history_id, lib_dataset_id=proteins_ldda_id) - proteins_hda_id = proteins_dataset_upload["id"] - if transcripts_hda_id is None: - transcripts_dataset_upload = self.instance.histories.upload_dataset_from_library(history_id=self.history_id, lib_dataset_id=transcripts_ldda_id) - transcripts_hda_id = transcripts_dataset_upload["id"] if interproscan_hda_id is None: try: - interproscan_dataset_upload = self.instance.histories.upload_dataset_from_library(history_id=self.history_id, lib_dataset_id=interpro_ldda_id) + interproscan_dataset_upload = self.instance.histories.upload_dataset_from_library(history_id=self.history_id, + lib_dataset_id=interpro_ldda_id) interproscan_hda_id = interproscan_dataset_upload["id"] - except Exception as exc: - logging.debug("Interproscan file not found in library (history: {0})".format(self.history_id)) + except bioblend.ConnectionError as exc: + logging.debug("Interproscan file not found in library (exit code: {0})".format(exc)) if blastp_hda_id is None: try: - blastp_dataset_upload = self.instance.histories.upload_dataset_from_library(history_id=self.history_id, lib_dataset_id=blastp_ldda_id) + blastp_dataset_upload = self.instance.histories.upload_dataset_from_library(history_id=self.history_id, + lib_dataset_id=blastp_ldda_id) blastp_hda_id = blastp_dataset_upload["id"] - except Exception as exc: - logging.debug("blastp file not found in library (history: {0})".format(self.history_id)) + except bioblend.ConnectionError as exc: + logging.debug("blastp file not found in library (exit code: {0})".format(exc)) if blastx_hda_id is None: try: - blastx_dataset_upload = self.instance.histories.upload_dataset_from_library(history_id=self.history_id, lib_dataset_id=blastx_ldda_id) + blastx_dataset_upload = self.instance.histories.upload_dataset_from_library(history_id=self.history_id, + lib_dataset_id=blastx_ldda_id) blastx_hda_id = blastx_dataset_upload["id"] - except Exception as exc: - logging.debug("blastp file not found in library (history: {0})".format(self.history_id)) + except bioblend.ConnectionError as exc: + logging.debug("blastp file not found in library (exit code: {0})".format(exc)) - self.genome_hda_id = genome_hda_id - self.gff_hda_id = gff_hda_id - self.transcripts_hda_id = transcripts_hda_id - self.proteins_hda_id = proteins_hda_id self.blastp_hda_id = blastp_hda_id self.blastx_hda_id = blastx_hda_id self.interproscan_hda_id = interproscan_hda_id -def get_sp_workflow_param(sp_dict, main_dir, config, workflow_type): + +def prepare_history_and_get_wf_param(sp_dict_list, main_dir, config): """ + """ - run_workflow_for_current_organism = RunWorkflow(parameters_dictionary=sp_dict) + all_org_wf_param_dict = {} + for sp_dict in sp_dict_list: - # Verifying the galaxy container is running - if not utilities_bioblend.check_galaxy_state(network_name=run_workflow_for_current_organism.genus_species, - script_dir=run_workflow_for_current_organism.script_dir): - logging.critical("The galaxy container for %s is not ready yet!" % run_workflow_for_current_organism.genus_species) - sys.exit() + run_workflow_for_current_organism = RunWorkflowBlastInterpro(parameters_dictionary=sp_dict) - else: + # Verifying the galaxy container is running + if not utilities_bioblend.check_galaxy_state(network_name=run_workflow_for_current_organism.genus_species, + script_dir=run_workflow_for_current_organism.script_dir): + logging.critical( + "The galaxy container for %s is not ready yet!" % run_workflow_for_current_organism.genus_species) + sys.exit() - # Setting some of the instance attributes - run_workflow_for_current_organism.main_dir = main_dir - run_workflow_for_current_organism.species_dir = os.path.join(run_workflow_for_current_organism.main_dir, - run_workflow_for_current_organism.genus_species + - "/") - - # Parse the config yaml file - run_workflow_for_current_organism.config = config - # Set the instance url attribute --> TODO: the localhost rule in the docker-compose still doesn't work on scratchgmodv1 - run_workflow_for_current_organism.instance_url = "http://localhost:{0}/sp/{1}/galaxy/".format( - run_workflow_for_current_organism.config[constants.CONF_ALL_HTTP_PORT], - run_workflow_for_current_organism.genus_species) - - run_workflow_for_current_organism.instance = utilities_bioblend.get_galaxy_instance( - instance_url=run_workflow_for_current_organism.instance_url, - email=run_workflow_for_current_organism.config[constants.CONF_GALAXY_DEFAULT_ADMIN_EMAIL], - password=run_workflow_for_current_organism.config[constants.CONF_GALAXY_DEFAULT_ADMIN_PASSWORD], - ) - history_id = utilities_bioblend.get_history( - instance=run_workflow_for_current_organism.instance, - history_name=run_workflow_for_current_organism.history_name) - run_workflow_for_current_organism.install_changesets_revisions_for_individual_tools() + else: + + # Setting some of the instance attributes + run_workflow_for_current_organism.main_dir = main_dir - if workflow_type == constants_phaeo.WF_LOAD_GFF_JB: + run_workflow_for_current_organism.set_galaxy_instance(config) + run_workflow_for_current_organism.set_history() + run_workflow_for_current_organism.install_individual_tools() + run_workflow_for_current_organism.import_datasets_into_history(config) analyses_dict_list = run_workflow_for_current_organism.get_analyses() - org_id = run_workflow_for_current_organism.add_organism_and_sync() - genome_analysis_id = run_workflow_for_current_organism.add_analysis_and_sync( + org_id = run_workflow_for_current_organism.get_organism() + blastp_analysis_id = run_workflow_for_current_organism.add_analysis_if_needed( analyses_dict_list=analyses_dict_list, - analysis_name=run_workflow_for_current_organism.genome_analysis_name, - analysis_programversion=run_workflow_for_current_organism.genome_analysis_programversion, - analysis_sourcename=run_workflow_for_current_organism.genome_analysis_sourcename + analysis_name=run_workflow_for_current_organism.blastp_analysis_name, + analysis_programversion=run_workflow_for_current_organism.blastp_analysis_programversion, + analysis_sourcename=run_workflow_for_current_organism.blastp_analysis_sourcename ) - ogs_analysis_id = run_workflow_for_current_organism.add_analysis_and_sync( + blastx_analysis_id = run_workflow_for_current_organism.add_analysis_if_needed( analyses_dict_list=analyses_dict_list, - analysis_name=run_workflow_for_current_organism.ogs_analysis_name, - analysis_programversion=run_workflow_for_current_organism.ogs_analysis_programversion, - analysis_sourcename=run_workflow_for_current_organism.ogs_analysis_sourcename + analysis_name=run_workflow_for_current_organism.blastx_analysis_name, + analysis_programversion=run_workflow_for_current_organism.blastx_analysis_programversion, + analysis_sourcename=run_workflow_for_current_organism.blastx_analysis_sourcename ) - - run_workflow_for_current_organism.import_datasets_into_history() - - # Create the StrainWorkflowParam object holding all attributes needed for the workflow - sp_wf_param = OrgWorkflowParamJbrowse( - genus_species=run_workflow_for_current_organism.genus_species, - strain_sex=run_workflow_for_current_organism.strain_sex, - genus_uppercase = run_workflow_for_current_organism.genus_uppercase, - full_name=run_workflow_for_current_organism.full_name, - species_folder_name=run_workflow_for_current_organism.species_folder_name, - chado_species_name=run_workflow_for_current_organism.chado_species_name, - org_id=org_id, - genome_analysis_id=genome_analysis_id, - ogs_analysis_id=ogs_analysis_id, - genome_hda_id=run_workflow_for_current_organism.genome_hda_id, - gff_hda_id=run_workflow_for_current_organism.gff_hda_id, - transcripts_hda_id=run_workflow_for_current_organism.transcripts_hda_id, - proteins_hda_id=run_workflow_for_current_organism.proteins_hda_id, - blastp_hda_id=run_workflow_for_current_organism.blastp_hda_id, - blastx_hda_id=run_workflow_for_current_organism.blastx_hda_id, - interproscan_hda_id=run_workflow_for_current_organism.interproscan_hda_id, - history_id=history_id, - instance=run_workflow_for_current_organism.instance + interpro_analysis_id = run_workflow_for_current_organism.add_analysis_if_needed( + analyses_dict_list=analyses_dict_list, + analysis_name=run_workflow_for_current_organism.interpro_analysis_name, + analysis_programversion=run_workflow_for_current_organism.interpro_analysis_programversion, + analysis_sourcename=run_workflow_for_current_organism.interpro_analysis_sourcename ) - sp_wf_param.check_param_for_workflow_load_fasta_gff_jbrowse() - - if workflow_type == "blast": - - ids = run_workflow_for_current_organism.add_organism_blastp_analysis() - - org_id = ids["org_id"] - blastp_analysis_id = ids["blastp_analysis_id"] - run_workflow_for_current_organism.import_datasets_into_history() # Create the StrainWorkflowParam object holding all attributes needed for the workflow - sp_wf_param = OrgWorkflowParamJbrowse( - genus_species=run_workflow_for_current_organism.genus_species, - strain_sex=run_workflow_for_current_organism.strain_sex, - genus_uppercase = run_workflow_for_current_organism.genus_uppercase, + org_wf_param = OrgWorkflowParamBlastInterproscan( + genus_uppercase=run_workflow_for_current_organism.genus_uppercase, full_name=run_workflow_for_current_organism.full_name, species_folder_name=run_workflow_for_current_organism.species_folder_name, chado_species_name=run_workflow_for_current_organism.chado_species_name, org_id=org_id, blastp_analysis_id=blastp_analysis_id, - genome_hda_id=run_workflow_for_current_organism.genome_hda_id, - gff_hda_id=run_workflow_for_current_organism.gff_hda_id, - transcripts_hda_id=run_workflow_for_current_organism.transcripts_hda_id, - proteins_hda_id=run_workflow_for_current_organism.proteins_hda_id, - blastp_hda_id=run_workflow_for_current_organism.blastp_hda_id, - blastx_hda_id=run_workflow_for_current_organism.blastx_hda_id, - interproscan_hda_id=run_workflow_for_current_organism.interproscan_hda_id, - history_id=history_id, - instance=run_workflow_for_current_organism.instance - ) - sp_wf_param.check_param_for_workflow_blastp() - - if workflow_type == "interpro": - - ids = run_workflow_for_current_organism.add_organism_interproscan_analysis() - - org_id = ids["org_id"] - interpro_analysis_id = ids["interpro_analysis_id"] - run_workflow_for_current_organism.import_datasets_into_history() - - # Create the StrainWorkflowParam object holding all attributes needed for the workflow - sp_wf_param = OrgWorkflowParamJbrowse( - genus_species=run_workflow_for_current_organism.genus_species, - strain_sex=run_workflow_for_current_organism.strain_sex, - genus_uppercase = run_workflow_for_current_organism.genus_uppercase, - full_name=run_workflow_for_current_organism.full_name, - species_folder_name=run_workflow_for_current_organism.species_folder_name, - chado_species_name=run_workflow_for_current_organism.chado_species_name, - org_id=org_id, + blastx_analysis_id=blastx_analysis_id, interpro_analysis_id=interpro_analysis_id, - genome_hda_id=run_workflow_for_current_organism.genome_hda_id, - gff_hda_id=run_workflow_for_current_organism.gff_hda_id, - transcripts_hda_id=run_workflow_for_current_organism.transcripts_hda_id, - proteins_hda_id=run_workflow_for_current_organism.proteins_hda_id, - blastp_hda_id=run_workflow_for_current_organism.blastp_hda_id, - blastx_hda_id=run_workflow_for_current_organism.blastx_hda_id, - interproscan_hda_id=run_workflow_for_current_organism.interproscan_hda_id, - history_id=history_id, + history_id=run_workflow_for_current_organism.history_id, instance=run_workflow_for_current_organism.instance ) - sp_wf_param.check_param_for_workflow_interpro() - - return sp_wf_param - - -def install_changesets_revisions_from_workflow(instance, workflow_path): - """ - Read a .ga file to extract the information about the different tools called. - Check if every tool is installed via a "show_tool". - If a tool is not installed (versions don't match), send a warning to the logger and install the required changeset (matching the tool version) - Doesn't do anything if versions match - - :return: - """ - - logging.info("Validating that installed tools versions and changesets match workflow versions") - - # Load the workflow file (.ga) in a buffer - with open(workflow_path, 'r') as ga_in_file: + org_wf_param.check_param() - # Then store the decoded json dictionary - workflow_dict = json.load(ga_in_file) + # Add the species dictionary to the complete dictionary + # This dictionary contains every organism present in the input file + # Its structure is the following: + # {genus species: {strain1_sex1: {variables_key: variables_values}, strain1_sex2: {variables_key: variables_values}}} + if run_workflow_for_current_organism.genus_species not in all_org_wf_param_dict.keys(): + all_org_wf_param_dict[run_workflow_for_current_organism.genus_species] = { + run_workflow_for_current_organism.strain_sex: org_wf_param} + else: + if run_workflow_for_current_organism.strain_sex not in all_org_wf_param_dict[run_workflow_for_current_organism.genus_species].keys(): + all_org_wf_param_dict[run_workflow_for_current_organism.genus_species][ + run_workflow_for_current_organism.strain_sex] = org_wf_param + else: + logging.error("Duplicate organism with 'genus_species' = '{0}' and 'strain_sex' = '{1}'".format( + run_workflow_for_current_organism.genus_species, run_workflow_for_current_organism.strain_sex)) - # Look up every "step_id" looking for tools - for step in workflow_dict["steps"].values(): - if step["tool_id"]: - # Check if an installed version matches the workflow tool version - # (If it's not installed, the show_tool version returned will be a default version with the suffix "XXXX+0") - utilities_bioblend.install_repository_revision(tool_id=step["tool_id"], - version=step["tool_version"], - changeset_revision=step["tool_shed_repository"]["changeset_revision"], - instance=instance) + return all_org_wf_param_dict - logging.info("Tools versions and changeset_revisions from workflow validated") if __name__ == "__main__": - parser = argparse.ArgumentParser(description="Run Galaxy workflows, specific to Phaeoexplorer data") + parser = argparse.ArgumentParser(description="Run Galaxy workflow to load Blast and Interproscan data in GGA, specific to Phaeoexplorer data") parser.add_argument("input", type=str, @@ -822,6 +442,10 @@ if __name__ == "__main__": help="Increase output verbosity", action="store_true") + parser.add_argument("-vv", "--very_verbose", + help="Increase output verbosity", + action="store_true") + parser.add_argument("--config", type=str, help="Config path, default to the 'config' file inside the script repository") @@ -830,26 +454,22 @@ if __name__ == "__main__": type=str, help="Where the stack containers will be located, defaults to working directory") - parser.add_argument("--workflow", "-w", - type=str, - help="Worfklow to run. Available options: load_fasta_gff_jbrowse, blast, interpro") - args = parser.parse_args() - bioblend_logger = logging.getLogger("bioblend") - if args.verbose: + if args.verbose or args.very_verbose: logging.basicConfig(level=logging.DEBUG) - bioblend_logger.setLevel(logging.DEBUG) else: logging.basicConfig(level=logging.INFO) - bioblend_logger.setLevel(logging.INFO) + + if not args.very_verbose: + logging.getLogger("urllib3").setLevel(logging.INFO) + logging.getLogger("bioblend").setLevel(logging.INFO) # Parsing the config file if provided, using the default config otherwise if args.config: config_file = os.path.abspath(args.config) else: config_file = os.path.join(os.path.dirname(os.path.realpath(sys.argv[0])), constants.DEFAULT_CONFIG) - config = utilities.parse_config(config_file) main_dir = None if not args.main_directory: @@ -857,811 +477,192 @@ if __name__ == "__main__": else: main_dir = os.path.abspath(args.main_directory) + config = utilities.parse_config(config_file) sp_dict_list = utilities.parse_input(args.input) - - workflow_type = None - # Checking if user specified a workflow to run - if not args.workflow: - logging.critical("No workflow type specified, exiting") - sys.exit() - elif args.workflow in constants_phaeo.WORKFLOW_VALID_TYPES: - workflow_type = args.workflow - logging.info("Workflow type set to '%s'" % workflow_type) - script_dir = os.path.dirname(os.path.realpath(sys.argv[0])) - all_sp_workflow_dict = {} - - if workflow_type == constants_phaeo.WF_LOAD_GFF_JB: - for sp_dict in sp_dict_list: - - # Add and retrieve all analyses/organisms for the current input species and add their IDs to the input dictionary - sp_wf_param = get_sp_workflow_param( - sp_dict, - main_dir=main_dir, - config=config, - workflow_type=constants_phaeo.WF_LOAD_GFF_JB) - current_sp_genus_species = sp_wf_param.genus_species - current_sp_strain_sex = sp_wf_param.strain_sex - - # Add the species dictionary to the complete dictionary - # This dictionary contains every organism present in the input file - # Its structure is the following: - # {genus species: {strain1_sex1: {variables_key: variables_values}, strain1_sex2: {variables_key: variables_values}}} - if not current_sp_genus_species in all_sp_workflow_dict.keys(): - all_sp_workflow_dict[current_sp_genus_species] = {current_sp_strain_sex: sp_wf_param} - else: - if not current_sp_strain_sex in all_sp_workflow_dict[current_sp_genus_species].keys(): - all_sp_workflow_dict[current_sp_genus_species][current_sp_strain_sex] = sp_wf_param - else: - logging.error("Duplicate organism with 'genus_species' = '{0}' and 'strain_sex' = '{1}'".format(current_sp_genus_species, current_sp_strain_sex)) - - for species, strains in all_sp_workflow_dict.items(): - strains_list = list(strains.keys()) - strains_count = len(strains_list) - - if strains_count == 1: - logging.info("Input species %s: 1 strain detected in input dictionary" % species) - strain_sex = list(strains.keys())[0] - sp_wf_param = strains[strain_sex] - - # Set workflow path (1 organism) - workflow_path = os.path.join(os.path.abspath(script_dir), constants_phaeo.WORKFLOWS_PATH, constants_phaeo.WF_LOAD_GFF_JB_1_ORG_FILE) - - # Check if the versions of tools specified in the workflow are installed in galaxy - install_changesets_revisions_from_workflow(workflow_path=workflow_path, instance=sp_wf_param.instance) - - # Set the workflow parameters (individual tools runtime parameters in the workflow) - workflow_parameters = {} - # Input files have no parameters (they are set via assigning the hda IDs in the datamap parameter of the bioblend method) - workflow_parameters[constants_phaeo.WF_LOAD_GFF_JB_1_ORG_INPUT_GENOME] = {} - workflow_parameters[constants_phaeo.WF_LOAD_GFF_JB_1_ORG_INPUT_GFF] = {} - workflow_parameters[constants_phaeo.WF_LOAD_GFF_JB_1_ORG_INPUT_PROTEINS] = {} - workflow_parameters[constants_phaeo.WF_LOAD_GFF_JB_1_ORG_STEP_LOAD_FASTA] = { - "organism": sp_wf_param.org_id, - "analysis_id": sp_wf_param.genome_analysis_id, - "do_update": "true"} - workflow_parameters[constants_phaeo.WF_LOAD_GFF_JB_1_ORG_STEP_JBROWSE] = {} - workflow_parameters[constants_phaeo.WF_LOAD_GFF_JB_1_ORG_STEP_LOAD_GFF] = { - "organism": sp_wf_param.org_id, - "analysis_id": sp_wf_param.ogs_analysis_id} - workflow_parameters[constants_phaeo.WF_LOAD_GFF_JB_1_ORG_STEP_SYNC_FEATURE] = { - "organism_id": sp_wf_param.org_id} - workflow_parameters[constants_phaeo.WF_LOAD_GFF_JB_1_ORG_STEP_POPULATE_VIEWS] = {} - workflow_parameters[constants_phaeo.WF_LOAD_GFF_JB_1_ORG_STEP_INDEX] = {} - - # Set datamap (mapping of input files in the workflow) - datamap = {} - datamap[constants_phaeo.WF_LOAD_GFF_JB_1_ORG_INPUT_GENOME] = {"src": "hda", "id": sp_wf_param.genome_hda_id} - datamap[constants_phaeo.WF_LOAD_GFF_JB_1_ORG_INPUT_GFF] = {"src": "hda", "id": sp_wf_param.gff_hda_id} - datamap[constants_phaeo.WF_LOAD_GFF_JB_1_ORG_INPUT_PROTEINS] = {"src": "hda", "id": sp_wf_param.proteins_hda_id} - - with open(workflow_path, 'r') as ga_in_file: - - # Store the decoded json dictionary - workflow_dict = json.load(ga_in_file) - workflow_name = workflow_dict["name"] - - # For the Jbrowse tool, we unfortunately have to manually edit the parameters instead of setting them - # as runtime values, using runtime parameters makes the tool throw an internal critical error ("replace not found" error) - # Scratchgmod test: need "http" (or "https"), the hostname (+ port) - if constants.CONF_JBROWSE_MENU_URL not in config.keys(): - # default - root_url = "https://{0}".format(config[constants.CONF_ALL_HOSTNAME]) - else: - root_url = config[constants.CONF_JBROWSE_MENU_URL] - species_strain_sex = sp_wf_param.chado_species_name.replace(" ", "-") - jbrowse_menu_url = "{root_url}/sp/{genus_sp}/feature/{Genus}/{species_strain_sex}/mRNA/{id}".format( - root_url=root_url, - genus_sp=sp_wf_param.genus_species, - Genus=sp_wf_param.genus_uppercase, - species_strain_sex=species_strain_sex, - id="{id}") - - # Replace values in the workflow dictionary - jbrowse_tool_state = workflow_dict["steps"][constants_phaeo.WF_LOAD_GFF_JB_1_ORG_STEP_JBROWSE]["tool_state"] - jbrowse_tool_state = jbrowse_tool_state.replace("__MENU_URL_ORG__", jbrowse_menu_url) - jb_to_container_tool_state = workflow_dict["steps"][constants_phaeo.WF_LOAD_GFF_JB_1_ORG_STEP_JB_TO_CONTAINER]["tool_state"] - jb_to_container_tool_state = jb_to_container_tool_state\ - .replace("__DISPLAY_NAME_ORG__", sp_wf_param.full_name)\ - .replace("__UNIQUE_ID_ORG__", sp_wf_param.species_folder_name) - - # Import the workflow in galaxy as a dict - sp_wf_param.instance.workflows.import_workflow_dict(workflow_dict=workflow_dict) - - # Get its attributes - workflow_attributes = sp_wf_param.instance.workflows.get_workflows(name=workflow_name) - # Then get its ID (required to invoke the workflow) - workflow_id = workflow_attributes[0]["id"] # Index 0 is the most recently imported workflow (the one we want) - logging.debug("Workflow ID: %s" % workflow_id) - # Check if the workflow is found - try: - show_workflow = sp_wf_param.instance.workflows.show_workflow(workflow_id=workflow_id) - except bioblend.ConnectionError: - logging.warning("Error finding workflow %s" % workflow_name) - - # Finally, invoke the workflow along with its datamap, parameters and the history in which to invoke it - sp_wf_param.instance.workflows.invoke_workflow( - workflow_id=workflow_id, - history_id=sp_wf_param.history_id, - params=workflow_parameters, - inputs=datamap, - allow_tool_state_corrections=True) - - logging.info("Successfully imported and invoked workflow {0}, check the galaxy instance for the jobs state".format(workflow_name)) - - if strains_count == 2: - - logging.info("Input organism %s: 2 species detected in input dictionary" % species) - strain_sex_org1 = strains_list[0] - strain_sex_org2 = strains_list[1] - sp_wf_param_org1 = strains[strain_sex_org1] - sp_wf_param_org2 = strains[strain_sex_org2] - - # Set workflow path (2 organisms) - workflow_path = os.path.join(os.path.abspath(script_dir), constants_phaeo.WORKFLOWS_PATH, constants_phaeo.WF_LOAD_GFF_JB_2_ORG_FILE) - - # Check if the versions of tools specified in the workflow are installed in galaxy - install_changesets_revisions_from_workflow(workflow_path=workflow_path, instance=sp_wf_param_org1.instance) - - # Set the workflow parameters (individual tools runtime parameters in the workflow) - workflow_parameters = {} - - # Input files have no parameters (they are set via assigning the hda IDs in the datamap parameter of the bioblend method) - workflow_parameters[constants_phaeo.WF_LOAD_GFF_JB_2_ORG_INPUT_GENOME_ORG1] = {} - workflow_parameters[constants_phaeo.WF_LOAD_GFF_JB_2_ORG_INPUT_GFF_ORG1] = {} - workflow_parameters[constants_phaeo.WF_LOAD_GFF_JB_2_ORG_INPUT_PROTEINS_ORG1] = {} - workflow_parameters[constants_phaeo.WF_LOAD_GFF_JB_2_ORG_INPUT_GENOME_ORG2] = {} - workflow_parameters[constants_phaeo.WF_LOAD_GFF_JB_2_ORG_INPUT_GFF_ORG2] = {} - workflow_parameters[constants_phaeo.WF_LOAD_GFF_JB_2_ORG_INPUT_PROTEINS_ORG2] = {} - - # Organism 1 - workflow_parameters[constants_phaeo.WF_LOAD_GFF_JB_2_ORG_STEP_LOAD_FASTA_ORG1] = { - "organism": sp_wf_param_org1.org_id, - "analysis_id": sp_wf_param_org1.genome_analysis_id, - "do_update": "true"} - # workflow_parameters[JBROWSE_ORG1] = {"jbrowse_menu_url": jbrowse_menu_url_org1} - workflow_parameters[constants_phaeo.WF_LOAD_GFF_JB_2_ORG_STEP_JBROWSE_ORG1] = {} - workflow_parameters[constants_phaeo.WF_LOAD_GFF_JB_2_ORG_STEP_LOAD_GFF_ORG1] = { - "organism": sp_wf_param_org1.org_id, - "analysis_id": sp_wf_param_org1.ogs_analysis_id} - workflow_parameters[constants_phaeo.WF_LOAD_GFF_JB_2_ORG_STEP_SYNC_FEATURE_ORG1] = { - "organism_id": sp_wf_param_org1.org_id} - # workflow_parameters[JBROWSE_CONTAINER] = {"organisms": [{"name": org1_full_name, "unique_id": org1_species_folder_name, }, {"name": org2_full_name, "unique_id": org2_species_folder_name}]} - workflow_parameters[constants_phaeo.WF_LOAD_GFF_JB_2_ORG_STEP_JB_TO_CONTAINER] = {} - - # Organism 2 - workflow_parameters[constants_phaeo.WF_LOAD_GFF_JB_2_ORG_STEP_LOAD_FASTA_ORG2] = { - "organism": sp_wf_param_org2.org_id, - "analysis_id": sp_wf_param_org2.genome_analysis_id, - "do_update": "true"} - workflow_parameters[constants_phaeo.WF_LOAD_GFF_JB_2_ORG_STEP_LOAD_GFF_ORG2] = { - "organism": sp_wf_param_org2.org_id, - "analysis_id": sp_wf_param_org2.ogs_analysis_id} - # workflow_parameters[JRBOWSE_ORG2] = {"jbrowse_menu_url": jbrowse_menu_url_org2} - workflow_parameters[constants_phaeo.WF_LOAD_GFF_JB_2_ORG_STEP_JRBOWSE_ORG2] = {} - workflow_parameters[constants_phaeo.WF_LOAD_GFF_JB_2_ORG_STEP_SYNC_FEATURE_ORG2] = { - "organism_id": sp_wf_param_org2.org_id} - - # POPULATE + INDEX DATA - workflow_parameters[constants_phaeo.WF_LOAD_GFF_JB_2_ORG_STEP_POPULATE_VIEWS] = {} - workflow_parameters[constants_phaeo.WF_LOAD_GFF_JB_2_ORG_STEP_INDEX] = {} - - # Set datamap (mapping of input files in the workflow) - datamap = {} - - # Organism 1 - datamap[constants_phaeo.WF_LOAD_GFF_JB_2_ORG_INPUT_GENOME_ORG1] = {"src": "hda", "id": sp_wf_param_org1.genome_hda_id} - datamap[constants_phaeo.WF_LOAD_GFF_JB_2_ORG_INPUT_GFF_ORG1] = {"src": "hda", "id": sp_wf_param_org1.gff_hda_id} - datamap[constants_phaeo.WF_LOAD_GFF_JB_2_ORG_INPUT_PROTEINS_ORG1] = {"src": "hda", "id": sp_wf_param_org1.proteins_hda_id} - - # Organism 2 - datamap[constants_phaeo.WF_LOAD_GFF_JB_2_ORG_INPUT_GENOME_ORG2] = {"src": "hda", "id": sp_wf_param_org2.genome_hda_id} - datamap[constants_phaeo.WF_LOAD_GFF_JB_2_ORG_INPUT_GFF_ORG2] = {"src": "hda", "id": sp_wf_param_org2.gff_hda_id} - datamap[constants_phaeo.WF_LOAD_GFF_JB_2_ORG_INPUT_PROTEINS_ORG2] = {"src": "hda", "id": sp_wf_param_org2.proteins_hda_id} - - with open(workflow_path, 'r') as ga_in_file: - - # Store the decoded json dictionary - workflow_dict = json.load(ga_in_file) - workflow_name = workflow_dict["name"] - - # For the Jbrowse tool, we unfortunately have to manually edit the parameters instead of setting them - # as runtime values, using runtime parameters makes the tool throw an internal critical error ("replace not found" error) - # Scratchgmod test: need "http" (or "https"), the hostname (+ port) - if constants.CONF_JBROWSE_MENU_URL not in config.keys(): - # default - root_url = "https://{0}".format(config[constants.CONF_ALL_HOSTNAME]) - else: - root_url = config[constants.CONF_JBROWSE_MENU_URL] - species_strain_sex_org1 = sp_wf_param_org1.chado_species_name.replace(" ", "-") - species_strain_sex_org2 = sp_wf_param_org2.chado_species_name.replace(" ", "-") - jbrowse_menu_url_org1 = "{root_url}/sp/{genus_sp}/feature/{Genus}/{species_strain_sex}/mRNA/{id}".format( - root_url=root_url, - genus_sp=sp_wf_param_org1.genus_species, - Genus=sp_wf_param_org1.genus_uppercase, - species_strain_sex=species_strain_sex_org1, - id="{id}") - jbrowse_menu_url_org2 = "{root_url}/sp/{genus_sp}/feature/{Genus}/{species_strain_sex}/mRNA/{id}".format( - root_url=root_url, - genus_sp=sp_wf_param_org2.genus_species, - Genus=sp_wf_param_org2.genus_uppercase, - species_strain_sex=species_strain_sex_org2, - id="{id}") - - # Replace values in the workflow dictionary - jbrowse_tool_state_org1 = workflow_dict["steps"][constants_phaeo.WF_LOAD_GFF_JB_2_ORG_STEP_JBROWSE_ORG1]["tool_state"] - jbrowse_tool_state_org1 = jbrowse_tool_state_org1.replace("__MENU_URL_ORG1__", jbrowse_menu_url_org1) - jbrowse_tool_state_org2 = workflow_dict["steps"][constants_phaeo.WF_LOAD_GFF_JB_2_ORG_STEP_JRBOWSE_ORG2]["tool_state"] - jbrowse_tool_state_org2 = jbrowse_tool_state_org2.replace("__MENU_URL_ORG2__", jbrowse_menu_url_org2) - # The UNIQUE_ID is specific to a combination genus_species_strain_sex so every combination should have its unique workflow - # in galaxy --> define a naming method for these workflows - jb_to_container_tool_state = workflow_dict["steps"][constants_phaeo.WF_LOAD_GFF_JB_2_ORG_STEP_JB_TO_CONTAINER]["tool_state"] - jb_to_container_tool_state = jb_to_container_tool_state\ - .replace("__DISPLAY_NAME_ORG1__", sp_wf_param_org1.full_name)\ - .replace("__UNIQUE_ID_ORG1__", sp_wf_param_org1.species_folder_name)\ - .replace("__DISPLAY_NAME_ORG2__", sp_wf_param_org2.full_name)\ - .replace("__UNIQUE_ID_ORG2__", sp_wf_param_org2.species_folder_name) - - # Import the workflow in galaxy as a dict - sp_wf_param_org1.instance.workflows.import_workflow_dict(workflow_dict=workflow_dict) - - # Get its attributes - workflow_attributes = sp_wf_param_org1.instance.workflows.get_workflows(name=workflow_name) - # Then get its ID (required to invoke the workflow) - workflow_id = workflow_attributes[0]["id"] # Index 0 is the most recently imported workflow (the one we want) - logging.debug("Workflow ID: %s" % workflow_id) - # Check if the workflow is found - try: - show_workflow = sp_wf_param_org1.instance.workflows.show_workflow(workflow_id=workflow_id) - except bioblend.ConnectionError: - logging.warning("Error finding workflow %s" % workflow_name) - - # Finally, invoke the workflow alogn with its datamap, parameters and the history in which to invoke it - sp_wf_param_org1.instance.workflows.invoke_workflow( - workflow_id=workflow_id, - history_id=sp_wf_param_org1.history_id, - params=workflow_parameters, - inputs=datamap, - allow_tool_state_corrections=True) - - logging.info("Successfully imported and invoked workflow {0}, check the galaxy instance for the jobs state".format(workflow_name)) - - if workflow_type == constants_phaeo.WORKFLOW_BLAST: - for sp_dict in sp_dict_list: - - # Add and retrieve all analyses/organisms for the current input species and add their IDs to the input dictionary - sp_wf_param = get_sp_workflow_param(sp_dict, main_dir=args.main_directory, config=config, workflow_type=constants_phaeo.WORKFLOW_BLAST) - - current_sp_genus_species = list(sp_wf_param.keys())[0] - current_sp_genus_species_dict = list(sp_wf_param.values())[0] - current_sp_strain_sex = list(current_sp_genus_species_dict.keys())[0] - current_sp_strain_sex_attributes_dict = list(current_sp_genus_species_dict.values())[0] + all_org_wf_param_dict = prepare_history_and_get_wf_param( + sp_dict_list=sp_dict_list, + main_dir=main_dir, + config=config) - # Add the species dictionary to the complete dictionary - # This dictionary contains every organism present in the input file - # Its structure is the following: - # {genus species: {strain1_sex1: {variables_key: variables_values}, strain1_sex2: {variables_key: variables_values}}} - if not current_sp_genus_species in all_sp_workflow_dict.keys(): - all_sp_workflow_dict[current_sp_genus_species] = current_sp_genus_species_dict - else: - all_sp_workflow_dict[current_sp_genus_species][current_sp_strain_sex] = current_sp_strain_sex_attributes_dict + for genus_species, strains in all_org_wf_param_dict.items(): + strains_list = list(strains.keys()) + strains_count = len(strains_list) - if len(list(strains.keys())) == 1: - logging.info("Input organism %s: 1 species detected in input dictionary" % species) + if strains_count == 1: + logging.info("Input species %s: 1 strain detected in input dictionary" % genus_species) + strain_sex = list(strains.keys())[0] + org_wf_param = strains[strain_sex] # Set workflow path (1 organism) - workflow_path = os.path.join(os.path.abspath(script_dir), "workflows_phaeoexplorer/Galaxy-Workflow-load_blast_results_1org_v1.ga") - - # Instance object required variables - instance_url, email, password = None, None, None - - # Set the galaxy instance variables - for k2, v2 in strains.items(): - instance_url = v2["instance_url"] - email = v2["email"] - password = v2["password"] - - instance = galaxy.GalaxyInstance(url=instance_url, email=email, password=password) + workflow_path = os.path.join(os.path.abspath(script_dir), constants_phaeo.WORKFLOWS_PATH, + constants_phaeo.WF_LOAD_GFF_JB_1_ORG_FILE) # Check if the versions of tools specified in the workflow are installed in galaxy - install_changesets_revisions_from_workflow(workflow_path=workflow_path, instance=instance) - - organisms_key_name = list(strains.keys()) - org_dict = strains[organisms_key_name[0]] - - history_id = org_dict["history_id"] - - # Organism attributes - org_genus = org_dict["genus"] - org_species = org_dict["species"] - org_genus_species = org_dict["genus_species"] - org_species_folder_name = org_dict["species_folder_name"] - org_full_name = org_dict["full_name"] - org_strain = org_dict["sex"] - org_sex = org_dict["strain"] - org_org_id = org_dict["org_id"] - org_blastp_analysis_id = org_dict["blastp_analysis_id"] - org_blastp_hda_id = org_dict["hda_ids"]["blastp_hda_id"] - - # Store these values into a dict for parameters logging/validation - org_parameters_dict = { - "org_genus": org_genus, - "org_species": org_species, - "org_genus_species": org_genus_species, - "org_species_folder_name": org_species_folder_name, - "org_full_name": org_full_name, - "org_strain": org_strain, - "org_sex": org_sex, - "org_org_id": org_org_id, - "org_blast_analysis_id": org_blastp_analysis_id, - "org_blastp_hda_id": org_blastp_hda_id, - } - - # Look for empty parameters values, throw a critical error if a parameter value is invalid - for param_name, param_value in org_parameters_dict.items(): - if param_value is None or param_value == "": - logging.critical("Empty parameter value found for organism {0} (parameter: {1}, parameter value: {2})".format(org_full_name, param_name, param_value)) - sys.exit() - - BLASTP_FILE = "0" - LOAD_BLASTP_FILE = "1" - WF_LOAD_GFF_JB_2_ORG_STEP_POPULATE_VIEWS = "2" - WF_LOAD_GFF_JB_2_ORG_STEP_INDEX = "3" + utilities_bioblend.install_workflow_tools(workflow_path=workflow_path, instance=org_wf_param.instance) # Set the workflow parameters (individual tools runtime parameters in the workflow) - workflow_parameters = {} - workflow_parameters[BLASTP_FILE] = {} - workflow_parameters[LOAD_BLASTP_FILE] = {"analysis_id": org_blastp_analysis_id, "organism_id": org_org_id} - workflow_parameters[WF_LOAD_GFF_JB_2_ORG_STEP_POPULATE_VIEWS] = {} - workflow_parameters[WF_LOAD_GFF_JB_2_ORG_STEP_INDEX] = {} - - datamap = {} - datamap[BLASTP_FILE] = {"src": "hda", "id": org_blastp_hda_id} - - with open(workflow_path, 'r') as ga_in_file: - # Store the decoded json dictionary - workflow_dict = json.load(ga_in_file) - workflow_name = workflow_dict["name"] + workflow_parameters = dict() - # Import the workflow in galaxy as a dict - instance.workflows.import_workflow_dict(workflow_dict=workflow_dict) - # Get its attributes - workflow_attributes = instance.workflows.get_workflows(name=workflow_name) - # Then get its ID (required to invoke the workflow) - workflow_id = workflow_attributes[0]["id"] # Index 0 is the most recently imported workflow (the one we want) - show_workflow = instance.workflows.show_workflow(workflow_id=workflow_id) - # Check if the workflow is found - try: - logging.debug("Workflow ID: %s" % workflow_id) - except bioblend.ConnectionError: - logging.warning("Error finding workflow %s" % workflow_name) - - # Finally, invoke the workflow alogn with its datamap, parameters and the history in which to invoke it - instance.workflows.invoke_workflow(workflow_id=workflow_id, history_id=history_id, params=workflow_parameters, inputs=datamap, allow_tool_state_corrections=True) - - logging.info("Successfully imported and invoked workflow {0}, check the galaxy instance ({1}) for the jobs state".format(workflow_name, instance_url)) - - - - if len(list(strains.keys())) == 2: - - logging.info("Input organism %s: 2 species detected in input dictionary" % species) - - # Set workflow path (2 organisms) - workflow_path = os.path.join(os.path.abspath(script_dir), "workflows_phaeoexplorer/Galaxy-Workflow-load_blast_results_2org_v1.ga") - - # Instance object required variables - instance_url, email, password = None, None, None - - # Set the galaxy instance variables - for k2, v2 in strains.items(): - instance_url = v2["instance_url"] - email = v2["email"] - password = v2["password"] - - instance = galaxy.GalaxyInstance(url=instance_url, email=email, password=password) - - # Check if the versions of tools specified in the workflow are installed in galaxy - install_changesets_revisions_from_workflow(workflow_path=workflow_path, instance=instance) - - organisms_key_names = list(strains.keys()) - org1_dict = strains[organisms_key_names[0]] - org2_dict = strains[organisms_key_names[1]] - - history_id = org1_dict["history_id"] - - # Organism 1 attributes - org1_genus = org1_dict["genus"] - org1_species = org1_dict["species"] - org1_genus_species = org1_dict["genus_species"] - org1_species_folder_name = org1_dict["species_folder_name"] - org1_full_name = org1_dict["full_name"] - org1_strain = org1_dict["sex"] - org1_sex = org1_dict["strain"] - org1_org_id = org1_dict["org_id"] - org1_blastp_analysis_id = org1_dict["blastp_analysis_id"] - org1_blastp_hda_id = org1_dict["hda_ids"]["blastp_hda_id"] - - # Store these values into a dict for parameters logging/validation - org1_parameters_dict = { - "org1_genus": org1_genus, - "org1_species": org1_species, - "org1_genus_species": org1_genus_species, - "org1_species_folder_name": org1_species_folder_name, - "org1_full_name": org1_full_name, - "org1_strain": org1_strain, - "org1_sex": org1_sex, - "org1_org_id": org1_org_id, - "org1_blast_analysis_id": org1_blastp_analysis_id, - "org1_blastp_hda_id": org1_blastp_hda_id, + # Input files have no parameters (they are set via assigning the hda IDs in the datamap parameter of the bioblend method) + workflow_parameters[constants_phaeo.WF_BLAST_INTERPRO_1_ORG_INPUT_BLASTP] = {} + workflow_parameters[constants_phaeo.WF_BLAST_INTERPRO_1_ORG_INPUT_BLASTX] = {} + workflow_parameters[constants_phaeo.WF_BLAST_INTERPRO_1_ORG_INPUT_INTERPRO] = {} + workflow_parameters[constants_phaeo.WF_BLAST_INTERPRO_1_ORG_LOAD_BLASTP] = { + "organism_id": org_wf_param.org_id, + "analysis_id": org_wf_param.blastp_analysis_id} + workflow_parameters[constants_phaeo.WF_BLAST_INTERPRO_1_ORG_LOAD_BLASTX] = { + "organism_id": org_wf_param.org_id, + "analysis_id": org_wf_param.blastx_analysis_id} + workflow_parameters[constants_phaeo.WF_BLAST_INTERPRO_1_ORG_LOAD_INTERPRO] = { + "organism_id": org_wf_param.org_id, + "analysis_id": org_wf_param.interpro_analysis_id} + workflow_parameters[constants_phaeo.WF_LOAD_GFF_JB_1_ORG_STEP_POPULATE_VIEWS] = {} + workflow_parameters[constants_phaeo.WF_LOAD_GFF_JB_1_ORG_STEP_INDEX] = {} + workflow_parameters[constants_phaeo.WF_BLAST_INTERPRO_1_ORG_SYNC_BLASTP_ANALYSIS] = { + "analysis_id": org_wf_param.blastp_analysis_id } - - - # Look for empty parameters values, throw a critical error if a parameter value is invalid - for param_name, param_value in org1_parameters_dict.items(): - if param_value is None or param_value == "": - logging.critical("Empty parameter value found for organism {0} (parameter: {1}, parameter value: {2})".format(org1_full_name, param_name, param_value)) - sys.exit() - - # Organism 2 attributes - org2_genus = org2_dict["genus"] - org2_species = org2_dict["species"] - org2_genus_species = org2_dict["genus_species"] - org2_species_folder_name = org2_dict["species_folder_name"] - org2_full_name = org2_dict["full_name"] - org2_strain = org2_dict["sex"] - org2_sex = org2_dict["strain"] - org2_org_id = org2_dict["org_id"] - org2_blastp_analysis_id = org2_dict["blastp_analysis_id"] - org2_blastp_hda_id = org2_dict["hda_ids"]["blastp_hda_id"] - - # Store these values into a dict for parameters logging/validation - org2_parameters_dict = { - "org2_genus": org2_genus, - "org2_species": org2_species, - "org2_genus_species": org2_genus_species, - "org2_species_folder_name": org_species_folder_name, - "org2_full_name": org2_full_name, - "org2_strain": org2_strain, - "org2_sex": org2_sex, - "org2_org_id": org2_org_id, - "org2_blast_analysis_id": org2_blastp_analysis_id, - "org2_blastp_hda_id": org2_blastp_hda_id, + workflow_parameters[constants_phaeo.WF_BLAST_INTERPRO_1_ORG_SYNC_BLASTX_ANALYSIS] = { + "analysis_id": org_wf_param.blastx_analysis_id + } + workflow_parameters[constants_phaeo.WF_BLAST_INTERPRO_1_ORG_SYNC_INTERPRO_ANALYSIS] = { + "analysis_id": org_wf_param.interpro_analysis_id } - - - # Look for empty parameters values, throw a critical error if a parameter value is invalid - for param_name, param_value in org2_parameters_dict.items(): - if param_value is None or param_value == "": - logging.critical("Empty parameter value found for organism {0} (parameter: {1}, parameter value: {2})".format(org2_full_name, param_name, param_value)) - sys.exit() - - # Source files association (ordered by their IDs in the workflow) - # WARNING: Be very careful about how the workflow is "organized" (i.e the order of the steps/datasets, check the .ga if there is any error) - BLASTP_FILE_ORG1 = "0" - BLASTP_FILE_ORG2 = "1" - LOAD_BLASTP_FILE_ORG1 = "2" - LOAD_BLASTP_FILE_ORG2 = "3" - WF_LOAD_GFF_JB_2_ORG_STEP_POPULATE_VIEWS = "4" - WF_LOAD_GFF_JB_2_ORG_STEP_INDEX = "5" - - # Set the workflow parameters (individual tools runtime parameters in the workflow) - workflow_parameters = {} - - # Input files have no parameters (they are set via assigning the hda IDs in the datamap parameter of the bioblend method) - workflow_parameters[BLASTP_FILE_ORG1] = {} - workflow_parameters[BLASTP_FILE_ORG2] = {} - - # Organism 1 - workflow_parameters[LOAD_BLASTP_FILE_ORG1] = {"organism_id": org1_org_id, - "analysis_id": org1_blastp_analysis_id} - - # Organism 2 - workflow_parameters[LOAD_BLASTP_FILE_ORG2] = {"organism_id": org2_org_id, - "analysis_id": org2_blastp_analysis_id} - - workflow_parameters[WF_LOAD_GFF_JB_2_ORG_STEP_POPULATE_VIEWS] = {} - workflow_parameters[WF_LOAD_GFF_JB_2_ORG_STEP_INDEX] = {} # Set datamap (mapping of input files in the workflow) - datamap = {} - - # Organism 1 - datamap[BLASTP_FILE_ORG1] = {"src": "hda", "id": org1_blastp_hda_id} - - # Organism 2 - datamap[BLASTP_FILE_ORG2] = {"src": "hda", "id": org2_blastp_hda_id} + datamap = dict() + datamap[constants_phaeo.WF_BLAST_INTERPRO_1_ORG_INPUT_BLASTP] = {"src": "hda", + "id": org_wf_param.blastp_hda_id} + datamap[constants_phaeo.WF_BLAST_INTERPRO_1_ORG_INPUT_BLASTX] = {"src": "hda", + "id": org_wf_param.blastx_hda_id} + datamap[constants_phaeo.WF_BLAST_INTERPRO_1_ORG_INPUT_INTERPRO] = {"src": "hda", + "id": org_wf_param.interproscan_hda_id} with open(workflow_path, 'r') as ga_in_file: - # Store the decoded json dictionary - workflow_dict = json.load(ga_in_file) - workflow_name = workflow_dict["name"] - # Import the workflow in galaxy as a dict - instance.workflows.import_workflow_dict(workflow_dict=workflow_dict) - # Get its attributes - workflow_attributes = instance.workflows.get_workflows(name=workflow_name) - # Then get its ID (required to invoke the workflow) - workflow_id = workflow_attributes[0]["id"] # Index 0 is the most recently imported workflow (the one we want) - show_workflow = instance.workflows.show_workflow(workflow_id=workflow_id) - # Check if the workflow is found - try: - logging.debug("Workflow ID: %s" % workflow_id) - except bioblend.ConnectionError: - logging.warning("Error finding workflow %s" % workflow_name) - - # Finally, invoke the workflow alogn with its datamap, parameters and the history in which to invoke it - instance.workflows.invoke_workflow(workflow_id=workflow_id, history_id=history_id, params=workflow_parameters, inputs=datamap, allow_tool_state_corrections=True) - - logging.info("Successfully imported and invoked workflow {0}, check the galaxy instance ({1}) for the jobs state".format(workflow_name, instance_url)) - - - if workflow_type == "interpro": - for sp_dict in sp_dict_list: - - # Add and retrieve all analyses/organisms for the current input species and add their IDs to the input dictionary - sp_wf_param = get_sp_workflow_param(sp_dict, main_dir=args.main_directory, config=config, workfow_type="blast") - - current_sp_genus_species = list(sp_wf_param.keys())[0] - current_sp_genus_species_dict = list(sp_wf_param.values())[0] - current_sp_strain_sex = list(current_sp_genus_species_dict.keys())[0] - current_sp_strain_sex_attributes_dict = list(current_sp_genus_species_dict.values())[0] - - # Add the species dictionary to the complete dictionary - # This dictionary contains every organism present in the input file - # Its structure is the following: - # {genus species: {strain1_sex1: {variables_key: variables_values}, strain1_sex2: {variables_key: variables_values}}} - if not current_sp_genus_species in all_sp_workflow_dict.keys(): - all_sp_workflow_dict[current_sp_genus_species] = current_sp_genus_species_dict - else: - all_sp_workflow_dict[current_sp_genus_species][current_sp_strain_sex] = current_sp_strain_sex_attributes_dict - - if len(list(strains.keys())) == 1: - logging.info("Input organism %s: 1 species detected in input dictionary" % species) - - # Set workflow path (1 organism) - workflow_path = os.path.join(os.path.abspath(script_dir), "workflows_phaeoexplorer/Galaxy-Workflow-load_blast_results_1org_v1.ga") - - # Instance object required variables - instance_url, email, password = None, None, None - - # Set the galaxy instance variables - for k2, v2 in strains.items(): - instance_url = v2["instance_url"] - email = v2["email"] - password = v2["password"] - - instance = galaxy.GalaxyInstance(url=instance_url, email=email, password=password) - - # Check if the versions of tools specified in the workflow are installed in galaxy - install_changesets_revisions_from_workflow(workflow_path=workflow_path, instance=instance) - - organism_key_name = list(strains.keys()) - org_dict = strains[organisms_key_name[0]] - - history_id = org_dict["history_id"] - - # Organism attributes - org_genus = org_dict["genus"] - org_species = org_dict["species"] - org_genus_species = org_dict["genus_species"] - org_species_folder_name = org_dict["species_folder_name"] - org_full_name = org_dict["full_name"] - org_strain = org_dict["sex"] - org_sex = org_dict["strain"] - org_org_id = org_dict["org_id"] - org_inteproscan_analysis_id = org_dict["inteproscan_analysis_id"] - org_interproscan_hda_id = org_dict["hda_ids"]["interproscan_hda_id"] - - # Store these values into a dict for parameters logging/validation - org_parameters_dict = { - "org_genus": org_genus, - "org_species": org_species, - "org_genus_species": org_genus_species, - "org_species_folder_name": org_species_folder_name, - "org_full_name": org_full_name, - "org_strain": org_strain, - "org_sex": org_sex, - "org_org_id": org_org_id, - "org_inteproscan_analysis_id": org_inteproscan_analysis_id, - "org_interproscan_hda_id": org_interproscan_hda_id, - } - - # Look for empty parameters values, throw a critical error if a parameter value is invalid - for param_name, param_value in org_parameters_dict.items(): - if param_value is None or param_value == "": - logging.critical("Empty parameter value found for organism {0} (parameter: {1}, parameter value: {2})".format(org_full_name, param_name, param_value)) - sys.exit() - - INTEPRO_FILE = "0" - LOAD_INTERPRO_FILE = "1" - WF_LOAD_GFF_JB_2_ORG_STEP_POPULATE_VIEWS = "2" - WF_LOAD_GFF_JB_2_ORG_STEP_INDEX = "3" - - # Set the workflow parameters (individual tools runtime parameters in the workflow) - workflow_parameters = {} - workflow_parameters[INTEPRO_FILE] = {} - workflow_parameters[LOAD_INTERPRO_FILE] = {"analysis_id": org_inteproscan_analysis_id, "organism_id": org_org_id} - workflow_parameters[WF_LOAD_GFF_JB_2_ORG_STEP_POPULATE_VIEWS] = {} - workflow_parameters[WF_LOAD_GFF_JB_2_ORG_STEP_INDEX] = {} - - datamap = {} - datamap[INTEPRO_FILE] = {"src": "hda", "id": org_interproscan_hda_id} - - with open(workflow_path, 'r') as ga_in_file: # Store the decoded json dictionary workflow_dict = json.load(ga_in_file) workflow_name = workflow_dict["name"] # Import the workflow in galaxy as a dict - instance.workflows.import_workflow_dict(workflow_dict=workflow_dict) + org_wf_param.instance.workflows.import_workflow_dict(workflow_dict=workflow_dict) + # Get its attributes - workflow_attributes = instance.workflows.get_workflows(name=workflow_name) + workflow_dict_list = org_wf_param.instance.workflows.get_workflows(name=workflow_name) # Then get its ID (required to invoke the workflow) - workflow_id = workflow_attributes[0]["id"] # Index 0 is the most recently imported workflow (the one we want) - show_workflow = instance.workflows.show_workflow(workflow_id=workflow_id) + workflow_id = workflow_dict_list[0]["id"] # Index 0 is the most recently imported workflow (the one we want) + logging.debug("Workflow ID: %s" % workflow_id) # Check if the workflow is found try: - logging.debug("Workflow ID: %s" % workflow_id) + show_workflow = org_wf_param.instance.workflows.show_workflow(workflow_id=workflow_id) except bioblend.ConnectionError: logging.warning("Error finding workflow %s" % workflow_name) - # Finally, invoke the workflow alogn with its datamap, parameters and the history in which to invoke it - instance.workflows.invoke_workflow(workflow_id=workflow_id, history_id=history_id, params=workflow_parameters, inputs=datamap, allow_tool_state_corrections=True) - - logging.info("Successfully imported and invoked workflow {0}, check the galaxy instance ({1}) for the jobs state".format(workflow_name, instance_url)) + # Finally, invoke the workflow along with its datamap, parameters and the history in which to invoke it + org_wf_param.instance.workflows.invoke_workflow( + workflow_id=workflow_id, + history_id=org_wf_param.history_id, + params=workflow_parameters, + inputs=datamap, + allow_tool_state_corrections=True) + logging.info("Successfully imported and invoked workflow {0}, check the galaxy instance for the jobs state".format(workflow_name)) + if strains_count == 2: - if len(list(strains.keys())) == 2: - - logging.info("Input organism %s: 2 species detected in input dictionary" % species) + logging.info("Input organism %s: 2 species detected in input dictionary" % genus_species) + strain_sex_org1 = strains_list[0] + strain_sex_org2 = strains_list[1] + sp_wf_param_org1 = strains[strain_sex_org1] + sp_wf_param_org2 = strains[strain_sex_org2] # Set workflow path (2 organisms) - workflow_path = os.path.join(os.path.abspath(script_dir), "workflows_phaeoexplorer/Galaxy-Workflow-load_blast_results_2org_v1.ga") - - # Instance object required variables - instance_url, email, password = None, None, None - - # Set the galaxy instance variables - for k2, v2 in strains.items(): - instance_url = v2["instance_url"] - email = v2["email"] - password = v2["password"] - - instance = galaxy.GalaxyInstance(url=instance_url, email=email, password=password) + workflow_path = os.path.join(os.path.abspath(script_dir), constants_phaeo.WORKFLOWS_PATH, constants_phaeo.WF_BLAST_INTERPRO_2_ORG_FILE) # Check if the versions of tools specified in the workflow are installed in galaxy - install_changesets_revisions_from_workflow(workflow_path=workflow_path, instance=instance) - - organisms_key_names = list(strains.keys()) - org1_dict = strains[organisms_key_names[0]] - org2_dict = strains[organisms_key_names[1]] - - history_id = org1_dict["history_id"] - - # Organism 1 attributes - org1_genus = org1_dict["genus"] - org1_species = org1_dict["species"] - org1_genus_species = org1_dict["genus_species"] - org1_species_folder_name = org1_dict["species_folder_name"] - org1_full_name = org1_dict["full_name"] - org1_strain = org1_dict["sex"] - org1_sex = org1_dict["strain"] - org1_org_id = org1_dict["org_id"] - org1_interproscan_analysis_id = org1_dict["interproscan_analysis_id"] - org1_interproscan_hda_id = org1_dict["hda_ids"]["interproscan_hda_id"] - - # Store these values into a dict for parameters logging/validation - org1_parameters_dict = { - "org1_genus": org1_genus, - "org1_species": org1_species, - "org1_genus_species": org1_genus_species, - "org1_species_folder_name": org1_species_folder_name, - "org1_full_name": org1_full_name, - "org1_strain": org1_strain, - "org1_sex": org1_sex, - "org1_org_id": org1_org_id, - "org1_interproscan_analysis_id": org1_interproscan_analysis_id, - "org1_interproscan_hda_id": org1_interproscan_hda_id, - } - - - # Look for empty parameters values, throw a critical error if a parameter value is invalid - for param_name, param_value in org1_parameters_dict.items(): - if param_value is None or param_value == "": - logging.critical("Empty parameter value found for organism {0} (parameter: {1}, parameter value: {2})".format(org1_full_name, param_name, param_value)) - sys.exit() - - # Organism 2 attributes - org2_genus = org2_dict["genus"] - org2_species = org2_dict["species"] - org2_genus_species = org2_dict["genus_species"] - org2_species_folder_name = org2_dict["species_folder_name"] - org2_full_name = org2_dict["full_name"] - org2_strain = org2_dict["sex"] - org2_sex = org2_dict["strain"] - org2_org_id = org2_dict["org_id"] - org2_interproscan_analysis_id = org2_dict["interproscan_analysis_id"] - org2_interproscan_hda_id = org2_dict["hda_ids"]["interproscan_hda_id"] - - # Store these values into a dict for parameters logging/validation - org2_parameters_dict = { - "org2_genus": org2_genus, - "org2_species": org2_species, - "org2_genus_species": org2_genus_species, - "org2_species_folder_name": org_species_folder_name, - "org2_full_name": org2_full_name, - "org2_strain": org2_strain, - "org2_sex": org2_sex, - "org2_org_id": org2_org_id, - "org2_interproscan_analysis_id": org2_interproscan_analysis_id, - "org2_interproscan_hda_id": org2_interproscan_hda_id, - } - - - # Look for empty parameters values, throw a critical error if a parameter value is invalid - for param_name, param_value in org2_parameters_dict.items(): - if param_value is None or param_value == "": - logging.critical("Empty parameter value found for organism {0} (parameter: {1}, parameter value: {2})".format(org2_full_name, param_name, param_value)) - sys.exit() - - # Source files association (ordered by their IDs in the workflow) - # WARNING: Be very careful about how the workflow is "organized" (i.e the order of the steps/datasets, check the .ga if there is any error) - INTERPRO_FILE_ORG1 = "0" - INTERPRO_FILE_ORG2 = "1" - LOAD_INTERPRO_FILE_ORG1 = "2" - LOAD_INTERPRO_FILE_ORG2 = "3" - WF_LOAD_GFF_JB_2_ORG_STEP_POPULATE_VIEWS = "4" - WF_LOAD_GFF_JB_2_ORG_STEP_INDEX = "5" + utilities_bioblend.install_workflow_tools(workflow_path=workflow_path, instance=sp_wf_param_org1.instance) # Set the workflow parameters (individual tools runtime parameters in the workflow) - workflow_parameters = {} + workflow_parameters = dict() # Input files have no parameters (they are set via assigning the hda IDs in the datamap parameter of the bioblend method) - workflow_parameters[INTERPRO_FILE_ORG1] = {} - workflow_parameters[INTERPRO_FILE_ORG2] = {} - - # Organism 1 - workflow_parameters[LOAD_INTERPRO_FILE_ORG1] = {"organism_id": org1_org_id, - "analysis_id": org1_interproscan_analysis_id} - - # Organism 2 - workflow_parameters[LOAD_INTERPRO_FILE_ORG2] = {"organism_id": org2_org_id, - "analysis_id": org2_interproscan_analysis_id} - - workflow_parameters[WF_LOAD_GFF_JB_2_ORG_STEP_POPULATE_VIEWS] = {} - workflow_parameters[WF_LOAD_GFF_JB_2_ORG_STEP_INDEX] = {} + workflow_parameters[constants_phaeo.WF_BLAST_INTERPRO_2_ORG_INPUT_BLASTP_ORG1] = {} + workflow_parameters[constants_phaeo.WF_BLAST_INTERPRO_2_ORG_INPUT_BLASTP_ORG2] = {} + workflow_parameters[constants_phaeo.WF_BLAST_INTERPRO_2_ORG_INPUT_BLASTX_ORG1] = {} + workflow_parameters[constants_phaeo.WF_BLAST_INTERPRO_2_ORG_INPUT_BLASTX_ORG2] = {} + workflow_parameters[constants_phaeo.WF_BLAST_INTERPRO_2_ORG_INPUT_INTERPRO_ORG1] = {} + workflow_parameters[constants_phaeo.WF_BLAST_INTERPRO_2_ORG_INPUT_INTERPRO_ORG2] = {} + + # Load xml results + workflow_parameters[constants_phaeo.WF_BLAST_INTERPRO_2_ORG_LOAD_BLASTP_ORG1] = {"organism_id": sp_wf_param_org1.org_id, + "analysis_id": sp_wf_param_org1.blastp_analysis_id} + workflow_parameters[constants_phaeo.WF_BLAST_INTERPRO_2_ORG_LOAD_BLASTP_ORG2] = {"organism_id": sp_wf_param_org2.org_id, + "analysis_id": sp_wf_param_org2.blastp_analysis_id} + workflow_parameters[constants_phaeo.WF_BLAST_INTERPRO_2_ORG_LOAD_BLASTX_ORG1] = {"organism_id": sp_wf_param_org1.org_id, + "analysis_id": sp_wf_param_org1.blastx_analysis_id} + workflow_parameters[constants_phaeo.WF_BLAST_INTERPRO_2_ORG_LOAD_BLASTX_ORG2] = {"organism_id": sp_wf_param_org2.org_id, + "analysis_id": sp_wf_param_org2.blastx_analysis_id} + workflow_parameters[constants_phaeo.WF_BLAST_INTERPRO_2_ORG_LOAD_INTERPRO_ORG1] = {"organism_id": sp_wf_param_org1.org_id, + "analysis_id": sp_wf_param_org1.interpro_analysis_id} + workflow_parameters[constants_phaeo.WF_BLAST_INTERPRO_2_ORG_LOAD_INTERPRO_ORG2] = {"organism_id": sp_wf_param_org2.org_id, + "analysis_id": sp_wf_param_org2.interpro_analysis_id} + workflow_parameters[constants_phaeo.WF_BLAST_INTERPRO_2_ORG_POPULATE_MAT_VIEWS] = {} + workflow_parameters[constants_phaeo.WF_BLAST_INTERPRO_2_ORG_INDEX_TRIPAL_DATA] = {} + + workflow_parameters[constants_phaeo.WF_BLAST_INTERPRO_2_ORG_SYNC_BLASTP_ANALYSIS_ORG1] = {"analysis_id": sp_wf_param_org1.blastp_analysis_id} + workflow_parameters[constants_phaeo.WF_BLAST_INTERPRO_2_ORG_SYNC_BLASTX_ANALYSIS_ORG1] = {"analysis_id": sp_wf_param_org1.blastx_analysis_id} + workflow_parameters[constants_phaeo.WF_BLAST_INTERPRO_2_ORG_SYNC_INTERPRO_ANALYSIS_ORG1] = {"analysis_id": sp_wf_param_org1.interpro_analysis_id} + workflow_parameters[constants_phaeo.WF_BLAST_INTERPRO_2_ORG_SYNC_BLASTP_ANALYSIS_ORG2] = {"analysis_id": sp_wf_param_org2.blastp_analysis_id} + workflow_parameters[constants_phaeo.WF_BLAST_INTERPRO_2_ORG_SYNC_BLASTX_ANALYSIS_ORG2] = {"analysis_id": sp_wf_param_org2.blastx_analysis_id} + workflow_parameters[constants_phaeo.WF_BLAST_INTERPRO_2_ORG_SYNC_INTERPRO_ANALYSIS_ORG2] = {"analysis_id": sp_wf_param_org2.interpro_analysis_id} # Set datamap (mapping of input files in the workflow) - datamap = {} - - # Organism 1 - datamap[BLASTP_FILE_ORG1] = {"src": "hda", "id": org1_interproscan_hda_id} - - # Organism 2 - datamap[BLASTP_FILE_ORG2] = {"src": "hda", "id": org2_interproscan_hda_id} + datamap = dict() + + datamap[constants_phaeo.WF_BLAST_INTERPRO_2_ORG_INPUT_INTERPRO_ORG1] = {"src": "hda", + "id": sp_wf_param_org1.interproscan_hda_id} + datamap[constants_phaeo.WF_BLAST_INTERPRO_2_ORG_INPUT_BLASTP_ORG1] = {"src": "hda", + "id": sp_wf_param_org1.blastp_hda_id} + datamap[constants_phaeo.WF_BLAST_INTERPRO_2_ORG_INPUT_BLASTX_ORG1] = {"src": "hda", + "id": sp_wf_param_org1.blastx_hda_id} + + datamap[constants_phaeo.WF_BLAST_INTERPRO_2_ORG_INPUT_INTERPRO_ORG2] = {"src": "hda", + "id": sp_wf_param_org2.interproscan_hda_id} + datamap[constants_phaeo.WF_BLAST_INTERPRO_2_ORG_INPUT_BLASTP_ORG2] = {"src": "hda", + "id": sp_wf_param_org2.blastp_hda_id} + datamap[constants_phaeo.WF_BLAST_INTERPRO_2_ORG_INPUT_BLASTX_ORG2] = {"src": "hda", + "id": sp_wf_param_org2.blastx_hda_id} with open(workflow_path, 'r') as ga_in_file: + # Store the decoded json dictionary workflow_dict = json.load(ga_in_file) workflow_name = workflow_dict["name"] # Import the workflow in galaxy as a dict - instance.workflows.import_workflow_dict(workflow_dict=workflow_dict) + sp_wf_param_org1.instance.workflows.import_workflow_dict(workflow_dict=workflow_dict) + # Get its attributes - workflow_attributes = instance.workflows.get_workflows(name=workflow_name) + workflow_dict_list = sp_wf_param_org1.instance.workflows.get_workflows(name=workflow_name) # Then get its ID (required to invoke the workflow) - workflow_id = workflow_attributes[0]["id"] # Index 0 is the most recently imported workflow (the one we want) - show_workflow = instance.workflows.show_workflow(workflow_id=workflow_id) + workflow_id = workflow_dict_list[0][ + "id"] # Index 0 is the most recently imported workflow (the one we want) + logging.debug("Workflow ID: %s" % workflow_id) # Check if the workflow is found try: - logging.debug("Workflow ID: %s" % workflow_id) + show_workflow = sp_wf_param_org1.instance.workflows.show_workflow(workflow_id=workflow_id) except bioblend.ConnectionError: logging.warning("Error finding workflow %s" % workflow_name) # Finally, invoke the workflow alogn with its datamap, parameters and the history in which to invoke it - instance.workflows.invoke_workflow(workflow_id=workflow_id, history_id=history_id, params=workflow_parameters, inputs=datamap, allow_tool_state_corrections=True) - - logging.info("Successfully imported and invoked workflow {0}, check the galaxy instance ({1}) for the jobs state".format(workflow_name, instance_url)) + sp_wf_param_org1.instance.workflows.invoke_workflow( + workflow_id=workflow_id, + history_id=sp_wf_param_org1.history_id, + params=workflow_parameters, + inputs=datamap, + allow_tool_state_corrections=True) + + logging.info( + "Successfully imported and invoked workflow {0}, check the galaxy instance for the jobs state".format( + workflow_name)) diff --git a/workflows_phaeoexplorer/Galaxy-Workflow-load_blast_interproscan_results_1org_v1.ga b/workflows_phaeoexplorer/Galaxy-Workflow-load_blast_interproscan_results_1org_v1.ga new file mode 100644 index 0000000..be675e5 --- /dev/null +++ b/workflows_phaeoexplorer/Galaxy-Workflow-load_blast_interproscan_results_1org_v1.ga @@ -0,0 +1,611 @@ +{ + "a_galaxy_workflow": "true", + "annotation": "", + "format-version": "0.1", + "name": "load_blast_interproscan_results_1org_v1", + "steps": { + "0": { + "annotation": "", + "content_id": null, + "errors": null, + "id": 0, + "input_connections": {}, + "inputs": [ + { + "description": "", + "name": "blastp file org" + } + ], + "label": "blastp file org", + "name": "Input dataset", + "outputs": [], + "position": { + "bottom": 343.8000030517578, + "height": 61.80000305175781, + "left": 381, + "right": 581, + "top": 282, + "width": 200, + "x": 381, + "y": 282 + }, + "tool_id": null, + "tool_state": "{\"optional\": false}", + "tool_version": null, + "type": "data_input", + "uuid": "8327075f-dcdf-429b-9d35-6c899e5096b5", + "workflow_outputs": [ + { + "label": null, + "output_name": "output", + "uuid": "6c6461d5-7752-4169-86aa-a1fdc6a59490" + } + ] + }, + "1": { + "annotation": "", + "content_id": null, + "errors": null, + "id": 1, + "input_connections": {}, + "inputs": [ + { + "description": "", + "name": "blastx file org" + } + ], + "label": "blastx file org", + "name": "Input dataset", + "outputs": [], + "position": { + "bottom": 524.8000030517578, + "height": 61.80000305175781, + "left": 659, + "right": 859, + "top": 463, + "width": 200, + "x": 659, + "y": 463 + }, + "tool_id": null, + "tool_state": "{\"optional\": false}", + "tool_version": null, + "type": "data_input", + "uuid": "435d2bbe-746f-4e68-991e-8822834ba8c0", + "workflow_outputs": [ + { + "label": null, + "output_name": "output", + "uuid": "4634cf97-8960-4ebb-9f55-79244c042e22" + } + ] + }, + "2": { + "annotation": "", + "content_id": null, + "errors": null, + "id": 2, + "input_connections": {}, + "inputs": [ + { + "description": "", + "name": "interproscan file org" + } + ], + "label": "interproscan file org", + "name": "Input dataset", + "outputs": [], + "position": { + "bottom": 575.1999969482422, + "height": 82.19999694824219, + "left": 937, + "right": 1137, + "top": 493, + "width": 200, + "x": 937, + "y": 493 + }, + "tool_id": null, + "tool_state": "{\"optional\": false}", + "tool_version": null, + "type": "data_input", + "uuid": "1b9e1874-f200-49a9-8bda-9d4d14ff046d", + "workflow_outputs": [ + { + "label": null, + "output_name": "output", + "uuid": "cd5eccc5-f364-4476-b5ea-afc4534a3e11" + } + ] + }, + "3": { + "annotation": "", + "content_id": "toolshed.g2.bx.psu.edu/repos/gga/chado_load_blast/load_blast/2.3.4+galaxy0", + "errors": null, + "id": 3, + "input_connections": { + "input": { + "id": 0, + "output_name": "output" + } + }, + "inputs": [ + { + "description": "runtime parameter for tool Chado load Blast results", + "name": "analysis_id" + }, + { + "description": "runtime parameter for tool Chado load Blast results", + "name": "input" + }, + { + "description": "runtime parameter for tool Chado load Blast results", + "name": "organism_id" + }, + { + "description": "runtime parameter for tool Chado load Blast results", + "name": "wait_for" + } + ], + "label": "load blastp results org", + "name": "Chado load Blast results", + "outputs": [ + { + "name": "results", + "type": "json" + } + ], + "position": { + "bottom": 425.3999938964844, + "height": 164.39999389648438, + "left": 659, + "right": 859, + "top": 261, + "width": 200, + "x": 659, + "y": 261 + }, + "post_job_actions": {}, + "tool_id": "toolshed.g2.bx.psu.edu/repos/gga/chado_load_blast/load_blast/2.3.4+galaxy0", + "tool_shed_repository": { + "changeset_revision": "08ae8b27b193", + "name": "chado_load_blast", + "owner": "gga", + "tool_shed": "toolshed.g2.bx.psu.edu" + }, + "tool_state": "{\"analysis_id\": {\"__class__\": \"RuntimeValue\"}, \"blastdb_id\": \"176\", \"input\": {\"__class__\": \"RuntimeValue\"}, \"match_on_name\": \"false\", \"organism_id\": {\"__class__\": \"RuntimeValue\"}, \"psql_target\": {\"method\": \"remote\", \"__current_case__\": 0}, \"query_type\": \"polypeptide\", \"re_name\": \"\", \"skip_missing\": \"false\", \"wait_for\": {\"__class__\": \"RuntimeValue\"}, \"__page__\": null, \"__rerun_remap_job_id__\": null}", + "tool_version": null, + "type": "tool", + "uuid": "9a4b7425-f3dd-4a37-a259-d2dd37b6d9c3", + "workflow_outputs": [ + { + "label": null, + "output_name": "results", + "uuid": "b532b247-339e-4c21-ac4b-bf15226981f7" + } + ] + }, + "4": { + "annotation": "", + "content_id": "toolshed.g2.bx.psu.edu/repos/gga/chado_load_blast/load_blast/2.3.4+galaxy0", + "errors": null, + "id": 4, + "input_connections": { + "input": { + "id": 1, + "output_name": "output" + }, + "wait_for": { + "id": 3, + "output_name": "results" + } + }, + "inputs": [ + { + "description": "runtime parameter for tool Chado load Blast results", + "name": "analysis_id" + }, + { + "description": "runtime parameter for tool Chado load Blast results", + "name": "input" + }, + { + "description": "runtime parameter for tool Chado load Blast results", + "name": "organism_id" + }, + { + "description": "runtime parameter for tool Chado load Blast results", + "name": "wait_for" + } + ], + "label": "load blastx results org", + "name": "Chado load Blast results", + "outputs": [ + { + "name": "results", + "type": "json" + } + ], + "position": { + "bottom": 456.3999938964844, + "height": 164.39999389648438, + "left": 939, + "right": 1139, + "top": 292, + "width": 200, + "x": 939, + "y": 292 + }, + "post_job_actions": {}, + "tool_id": "toolshed.g2.bx.psu.edu/repos/gga/chado_load_blast/load_blast/2.3.4+galaxy0", + "tool_shed_repository": { + "changeset_revision": "08ae8b27b193", + "name": "chado_load_blast", + "owner": "gga", + "tool_shed": "toolshed.g2.bx.psu.edu" + }, + "tool_state": "{\"analysis_id\": {\"__class__\": \"RuntimeValue\"}, \"blastdb_id\": \"176\", \"input\": {\"__class__\": \"RuntimeValue\"}, \"match_on_name\": \"false\", \"organism_id\": {\"__class__\": \"RuntimeValue\"}, \"psql_target\": {\"method\": \"remote\", \"__current_case__\": 0}, \"query_type\": \"polypeptide\", \"re_name\": \"\", \"skip_missing\": \"false\", \"wait_for\": {\"__class__\": \"RuntimeValue\"}, \"__page__\": null, \"__rerun_remap_job_id__\": null}", + "tool_version": null, + "type": "tool", + "uuid": "bc71b4f2-3b55-41ab-b4cd-d171c880978c", + "workflow_outputs": [ + { + "label": null, + "output_name": "results", + "uuid": "7d106bd3-779b-473c-b487-b1e6b4080e02" + } + ] + }, + "5": { + "annotation": "", + "content_id": "toolshed.g2.bx.psu.edu/repos/gga/chado_load_interpro/load_interpro/2.3.4+galaxy0", + "errors": null, + "id": 5, + "input_connections": { + "input": { + "id": 2, + "output_name": "output" + }, + "wait_for": { + "id": 4, + "output_name": "results" + } + }, + "inputs": [ + { + "description": "runtime parameter for tool Chado load InterProScan results", + "name": "analysis_id" + }, + { + "description": "runtime parameter for tool Chado load InterProScan results", + "name": "input" + }, + { + "description": "runtime parameter for tool Chado load InterProScan results", + "name": "organism_id" + }, + { + "description": "runtime parameter for tool Chado load InterProScan results", + "name": "wait_for" + } + ], + "label": "load interproscan results org", + "name": "Chado load InterProScan results", + "outputs": [ + { + "name": "results", + "type": "json" + } + ], + "position": { + "bottom": 486.3999938964844, + "height": 164.39999389648438, + "left": 1216, + "right": 1416, + "top": 322, + "width": 200, + "x": 1216, + "y": 322 + }, + "post_job_actions": {}, + "tool_id": "toolshed.g2.bx.psu.edu/repos/gga/chado_load_interpro/load_interpro/2.3.4+galaxy0", + "tool_shed_repository": { + "changeset_revision": "3284a0c7570e", + "name": "chado_load_interpro", + "owner": "gga", + "tool_shed": "toolshed.g2.bx.psu.edu" + }, + "tool_state": "{\"analysis_id\": {\"__class__\": \"RuntimeValue\"}, \"input\": {\"__class__\": \"RuntimeValue\"}, \"match_on_name\": \"false\", \"organism_id\": {\"__class__\": \"RuntimeValue\"}, \"parse_go\": \"false\", \"psql_target\": {\"method\": \"remote\", \"__current_case__\": 0}, \"query_type\": \"polypeptide\", \"re_name\": \"\", \"skip_missing\": \"false\", \"wait_for\": {\"__class__\": \"RuntimeValue\"}, \"__page__\": null, \"__rerun_remap_job_id__\": null}", + "tool_version": null, + "type": "tool", + "uuid": "4cd2a7bd-aaa7-4cb4-bedf-fb5d2c161901", + "workflow_outputs": [ + { + "label": null, + "output_name": "results", + "uuid": "2f2cab43-a0e0-43b0-9b7c-3513354dbb98" + } + ] + }, + "6": { + "annotation": "", + "content_id": "toolshed.g2.bx.psu.edu/repos/gga/tripal_analysis_sync/analysis_sync/3.2.1.0", + "errors": null, + "id": 6, + "input_connections": { + "wait_for": { + "id": 5, + "output_name": "results" + } + }, + "inputs": [ + { + "description": "runtime parameter for tool Synchronize an analysis", + "name": "analysis_id" + }, + { + "description": "runtime parameter for tool Synchronize an analysis", + "name": "wait_for" + } + ], + "label": "sync blastp analysis org", + "name": "Synchronize an analysis", + "outputs": [ + { + "name": "results", + "type": "json" + } + ], + "position": { + "bottom": 480.3999938964844, + "height": 154.39999389648438, + "left": 1493, + "right": 1693, + "top": 326, + "width": 200, + "x": 1493, + "y": 326 + }, + "post_job_actions": {}, + "tool_id": "toolshed.g2.bx.psu.edu/repos/gga/tripal_analysis_sync/analysis_sync/3.2.1.0", + "tool_shed_repository": { + "changeset_revision": "f487ff676088", + "name": "tripal_analysis_sync", + "owner": "gga", + "tool_shed": "toolshed.g2.bx.psu.edu" + }, + "tool_state": "{\"analysis_id\": {\"__class__\": \"RuntimeValue\"}, \"wait_for\": {\"__class__\": \"RuntimeValue\"}, \"__page__\": null, \"__rerun_remap_job_id__\": null}", + "tool_version": null, + "type": "tool", + "uuid": "096dc1ba-28fa-4ac7-8077-5b0d7dfd50a9", + "workflow_outputs": [ + { + "label": "Synchronize Analysis into Tripal", + "output_name": "results", + "uuid": "c79f9855-071c-4b69-ba7e-517722a2bbee" + } + ] + }, + "7": { + "annotation": "", + "content_id": "toolshed.g2.bx.psu.edu/repos/gga/tripal_analysis_sync/analysis_sync/3.2.1.0", + "errors": null, + "id": 7, + "input_connections": { + "wait_for": { + "id": 6, + "output_name": "results" + } + }, + "inputs": [ + { + "description": "runtime parameter for tool Synchronize an analysis", + "name": "analysis_id" + }, + { + "description": "runtime parameter for tool Synchronize an analysis", + "name": "wait_for" + } + ], + "label": "sync blastx analysis org", + "name": "Synchronize an analysis", + "outputs": [ + { + "name": "results", + "type": "json" + } + ], + "position": { + "bottom": 480.3999938964844, + "height": 154.39999389648438, + "left": 1771, + "right": 1971, + "top": 326, + "width": 200, + "x": 1771, + "y": 326 + }, + "post_job_actions": {}, + "tool_id": "toolshed.g2.bx.psu.edu/repos/gga/tripal_analysis_sync/analysis_sync/3.2.1.0", + "tool_shed_repository": { + "changeset_revision": "f487ff676088", + "name": "tripal_analysis_sync", + "owner": "gga", + "tool_shed": "toolshed.g2.bx.psu.edu" + }, + "tool_state": "{\"analysis_id\": {\"__class__\": \"RuntimeValue\"}, \"wait_for\": {\"__class__\": \"RuntimeValue\"}, \"__page__\": null, \"__rerun_remap_job_id__\": null}", + "tool_version": null, + "type": "tool", + "uuid": "beb6de1d-e0ef-477d-a2d3-1156ee80e299", + "workflow_outputs": [ + { + "label": "Synchronize Analysis into Tripal", + "output_name": "results", + "uuid": "b8e8a0cc-1966-4263-9f45-97e08734dccb" + } + ] + }, + "8": { + "annotation": "", + "content_id": "toolshed.g2.bx.psu.edu/repos/gga/tripal_analysis_sync/analysis_sync/3.2.1.0", + "errors": null, + "id": 8, + "input_connections": { + "wait_for": { + "id": 7, + "output_name": "results" + } + }, + "inputs": [ + { + "description": "runtime parameter for tool Synchronize an analysis", + "name": "analysis_id" + }, + { + "description": "runtime parameter for tool Synchronize an analysis", + "name": "wait_for" + } + ], + "label": "sync interproscan analysis org", + "name": "Synchronize an analysis", + "outputs": [ + { + "name": "results", + "type": "json" + } + ], + "position": { + "bottom": 480.3999938964844, + "height": 154.39999389648438, + "left": 2049, + "right": 2249, + "top": 326, + "width": 200, + "x": 2049, + "y": 326 + }, + "post_job_actions": {}, + "tool_id": "toolshed.g2.bx.psu.edu/repos/gga/tripal_analysis_sync/analysis_sync/3.2.1.0", + "tool_shed_repository": { + "changeset_revision": "f487ff676088", + "name": "tripal_analysis_sync", + "owner": "gga", + "tool_shed": "toolshed.g2.bx.psu.edu" + }, + "tool_state": "{\"analysis_id\": {\"__class__\": \"RuntimeValue\"}, \"wait_for\": {\"__class__\": \"RuntimeValue\"}, \"__page__\": null, \"__rerun_remap_job_id__\": null}", + "tool_version": null, + "type": "tool", + "uuid": "48eaf070-3d21-486c-a098-7bb459598a8b", + "workflow_outputs": [ + { + "label": "Synchronize Analysis into Tripal", + "output_name": "results", + "uuid": "e42bc9c9-7fa4-4934-bf64-9928e019ef51" + } + ] + }, + "9": { + "annotation": "", + "content_id": "toolshed.g2.bx.psu.edu/repos/gga/tripal_db_populate_mviews/db_populate_mviews/3.2.1.0", + "errors": null, + "id": 9, + "input_connections": { + "wait_for": { + "id": 8, + "output_name": "results" + } + }, + "inputs": [], + "label": "populate mat views", + "name": "Populate materialized views", + "outputs": [ + { + "name": "results", + "type": "txt" + } + ], + "position": { + "bottom": 480.3999938964844, + "height": 154.39999389648438, + "left": 2327, + "right": 2527, + "top": 326, + "width": 200, + "x": 2327, + "y": 326 + }, + "post_job_actions": {}, + "tool_id": "toolshed.g2.bx.psu.edu/repos/gga/tripal_db_populate_mviews/db_populate_mviews/3.2.1.0", + "tool_shed_repository": { + "changeset_revision": "3c08f32a3dc1", + "name": "tripal_db_populate_mviews", + "owner": "gga", + "tool_shed": "toolshed.g2.bx.psu.edu" + }, + "tool_state": "{\"mview\": \"\", \"wait_for\": {\"__class__\": \"ConnectedValue\"}, \"__page__\": null, \"__rerun_remap_job_id__\": null}", + "tool_version": null, + "type": "tool", + "uuid": "18f15132-1161-49b0-809f-eab40e945db4", + "workflow_outputs": [ + { + "label": "Populate Tripal materialized view(s)", + "output_name": "results", + "uuid": "36a6b38e-7def-4092-a3d6-8d4df216a45b" + } + ] + }, + "10": { + "annotation": "", + "content_id": "toolshed.g2.bx.psu.edu/repos/gga/tripal_db_index/db_index/3.2.1.1", + "errors": null, + "id": 10, + "input_connections": { + "wait_for": { + "id": 9, + "output_name": "results" + } + }, + "inputs": [], + "label": "index tripal data", + "name": "Index Tripal data", + "outputs": [ + { + "name": "results", + "type": "txt" + } + ], + "position": { + "bottom": 481.6000061035156, + "height": 113.60000610351562, + "left": 2605, + "right": 2805, + "top": 368, + "width": 200, + "x": 2605, + "y": 368 + }, + "post_job_actions": {}, + "tool_id": "toolshed.g2.bx.psu.edu/repos/gga/tripal_db_index/db_index/3.2.1.1", + "tool_shed_repository": { + "changeset_revision": "d55a39f12dda", + "name": "tripal_db_index", + "owner": "gga", + "tool_shed": "toolshed.g2.bx.psu.edu" + }, + "tool_state": "{\"expose\": {\"do_expose\": \"no\", \"__current_case__\": 0}, \"queues\": \"10\", \"table\": {\"mode\": \"website\", \"__current_case__\": 0}, \"tokenizer\": \"standard\", \"wait_for\": {\"__class__\": \"ConnectedValue\"}, \"__page__\": null, \"__rerun_remap_job_id__\": null}", + "tool_version": null, + "type": "tool", + "uuid": "1c8c2f38-7c44-44fd-bc37-b4ff151440e8", + "workflow_outputs": [ + { + "label": "Index Tripal data", + "output_name": "results", + "uuid": "c5e7ffe8-f322-4957-a7c1-9b92a3cf2fa1" + } + ] + } + }, + "tags": [], + "uuid": "0a48b57a-2c27-4d25-a942-31bf4497d2f6", + "version": 3 +} \ No newline at end of file diff --git a/workflows_phaeoexplorer/Galaxy-Workflow-load_blast_interproscan_results_2org_v1.ga b/workflows_phaeoexplorer/Galaxy-Workflow-load_blast_interproscan_results_2org_v1.ga new file mode 100644 index 0000000..9cf37bf --- /dev/null +++ b/workflows_phaeoexplorer/Galaxy-Workflow-load_blast_interproscan_results_2org_v1.ga @@ -0,0 +1,1063 @@ +{ + "a_galaxy_workflow": "true", + "annotation": "", + "format-version": "0.1", + "name": "load_blast_interproscan_results_2org_v1", + "steps": { + "0": { + "annotation": "", + "content_id": null, + "errors": null, + "id": 0, + "input_connections": {}, + "inputs": [ + { + "description": "", + "name": "blastp file org1" + } + ], + "label": "blastp file org1", + "name": "Input dataset", + "outputs": [], + "position": { + "bottom": 198.3000030517578, + "height": 61.80000305175781, + "left": -280, + "right": -80, + "top": 136.5, + "width": 200, + "x": -280, + "y": 136.5 + }, + "tool_id": null, + "tool_state": "{\"optional\": false}", + "tool_version": null, + "type": "data_input", + "uuid": "c7230bbe-4603-4f3a-b841-157daf9e4b86", + "workflow_outputs": [ + { + "label": null, + "output_name": "output", + "uuid": "77e22768-c8ed-488a-ac74-5bd1f0fdad26" + } + ] + }, + "1": { + "annotation": "", + "content_id": null, + "errors": null, + "id": 1, + "input_connections": {}, + "inputs": [ + { + "description": "", + "name": "blastp file org2" + } + ], + "label": "blastp file org2", + "name": "Input dataset", + "outputs": [], + "position": { + "bottom": 379.3000030517578, + "height": 61.80000305175781, + "left": -2, + "right": 198, + "top": 317.5, + "width": 200, + "x": -2, + "y": 317.5 + }, + "tool_id": null, + "tool_state": "{\"optional\": false}", + "tool_version": null, + "type": "data_input", + "uuid": "99dc8355-cd87-4eab-9170-cded45dc962a", + "workflow_outputs": [ + { + "label": null, + "output_name": "output", + "uuid": "f6dac312-112c-4a28-a999-acfea4522e40" + } + ] + }, + "2": { + "annotation": "", + "content_id": null, + "errors": null, + "id": 2, + "input_connections": {}, + "inputs": [ + { + "description": "", + "name": "blastx file org1" + } + ], + "label": "blastx file org1", + "name": "Input dataset", + "outputs": [], + "position": { + "bottom": 409.3000030517578, + "height": 61.80000305175781, + "left": 276, + "right": 476, + "top": 347.5, + "width": 200, + "x": 276, + "y": 347.5 + }, + "tool_id": null, + "tool_state": "{\"optional\": false}", + "tool_version": null, + "type": "data_input", + "uuid": "3f7f7682-46cb-4a61-a649-7a2a43174ae2", + "workflow_outputs": [ + { + "label": null, + "output_name": "output", + "uuid": "b623f9b8-6258-4a02-a6d7-f38cc6b51467" + } + ] + }, + "3": { + "annotation": "", + "content_id": null, + "errors": null, + "id": 3, + "input_connections": {}, + "inputs": [ + { + "description": "", + "name": "blastx file org2" + } + ], + "label": "blastx file org2", + "name": "Input dataset", + "outputs": [], + "position": { + "bottom": 439.3000030517578, + "height": 61.80000305175781, + "left": 554, + "right": 754, + "top": 377.5, + "width": 200, + "x": 554, + "y": 377.5 + }, + "tool_id": null, + "tool_state": "{\"optional\": false}", + "tool_version": null, + "type": "data_input", + "uuid": "07615e25-392b-4533-8efd-6a400de9a2b6", + "workflow_outputs": [ + { + "label": null, + "output_name": "output", + "uuid": "216b7b96-8fa0-4c03-82d7-496a398e04ef" + } + ] + }, + "4": { + "annotation": "", + "content_id": null, + "errors": null, + "id": 4, + "input_connections": {}, + "inputs": [ + { + "description": "", + "name": "interproscan file org1" + } + ], + "label": "interproscan file org1", + "name": "Input dataset", + "outputs": [], + "position": { + "bottom": 489.6999969482422, + "height": 82.19999694824219, + "left": 832, + "right": 1032, + "top": 407.5, + "width": 200, + "x": 832, + "y": 407.5 + }, + "tool_id": null, + "tool_state": "{\"optional\": false}", + "tool_version": null, + "type": "data_input", + "uuid": "fe4a1d12-ac12-435b-acd3-10a66d6eee89", + "workflow_outputs": [ + { + "label": null, + "output_name": "output", + "uuid": "17e2b6a0-8cd5-4cf9-974b-d843ee35d237" + } + ] + }, + "5": { + "annotation": "", + "content_id": null, + "errors": null, + "id": 5, + "input_connections": {}, + "inputs": [ + { + "description": "", + "name": "interproscan file org2" + } + ], + "label": "interproscan file org2", + "name": "Input dataset", + "outputs": [], + "position": { + "bottom": 519.6999969482422, + "height": 82.19999694824219, + "left": 1110, + "right": 1310, + "top": 437.5, + "width": 200, + "x": 1110, + "y": 437.5 + }, + "tool_id": null, + "tool_state": "{\"optional\": false}", + "tool_version": null, + "type": "data_input", + "uuid": "7dc39a56-6afa-46b1-a49c-3103ca847405", + "workflow_outputs": [ + { + "label": null, + "output_name": "output", + "uuid": "030a9365-6810-484b-ac90-8d236262a931" + } + ] + }, + "6": { + "annotation": "", + "content_id": "toolshed.g2.bx.psu.edu/repos/gga/chado_load_blast/load_blast/2.3.4+galaxy0", + "errors": null, + "id": 6, + "input_connections": { + "input": { + "id": 0, + "output_name": "output" + } + }, + "inputs": [ + { + "description": "runtime parameter for tool Chado load Blast results", + "name": "analysis_id" + }, + { + "description": "runtime parameter for tool Chado load Blast results", + "name": "organism_id" + }, + { + "description": "runtime parameter for tool Chado load Blast results", + "name": "wait_for" + } + ], + "label": "load blastp results org1", + "name": "Chado load Blast results", + "outputs": [ + { + "name": "results", + "type": "json" + } + ], + "position": { + "bottom": 279.8999938964844, + "height": 164.39999389648438, + "left": -2, + "right": 198, + "top": 115.5, + "width": 200, + "x": -2, + "y": 115.5 + }, + "post_job_actions": {}, + "tool_id": "toolshed.g2.bx.psu.edu/repos/gga/chado_load_blast/load_blast/2.3.4+galaxy0", + "tool_shed_repository": { + "changeset_revision": "08ae8b27b193", + "name": "chado_load_blast", + "owner": "gga", + "tool_shed": "toolshed.g2.bx.psu.edu" + }, + "tool_state": "{\"analysis_id\": {\"__class__\": \"RuntimeValue\"}, \"blastdb_id\": \"176\", \"input\": {\"__class__\": \"ConnectedValue\"}, \"match_on_name\": \"false\", \"organism_id\": {\"__class__\": \"RuntimeValue\"}, \"psql_target\": {\"method\": \"remote\", \"__current_case__\": 0}, \"query_type\": \"polypeptide\", \"re_name\": \"\", \"skip_missing\": \"false\", \"wait_for\": {\"__class__\": \"RuntimeValue\"}, \"__page__\": null, \"__rerun_remap_job_id__\": null}", + "tool_version": null, + "type": "tool", + "uuid": "da2ce96c-d7e7-40ba-b5e3-da001c7e8ea7", + "workflow_outputs": [ + { + "label": null, + "output_name": "results", + "uuid": "f9fe5c14-42c7-4d13-a6d0-796e6534c466" + } + ] + }, + "7": { + "annotation": "", + "content_id": "toolshed.g2.bx.psu.edu/repos/gga/chado_load_blast/load_blast/2.3.4+galaxy0", + "errors": null, + "id": 7, + "input_connections": { + "input": { + "id": 1, + "output_name": "output" + }, + "wait_for": { + "id": 6, + "output_name": "results" + } + }, + "inputs": [ + { + "description": "runtime parameter for tool Chado load Blast results", + "name": "analysis_id" + }, + { + "description": "runtime parameter for tool Chado load Blast results", + "name": "organism_id" + } + ], + "label": "load blastp results org2", + "name": "Chado load Blast results", + "outputs": [ + { + "name": "results", + "type": "json" + } + ], + "position": { + "bottom": 309.8999938964844, + "height": 164.39999389648438, + "left": 276, + "right": 476, + "top": 145.5, + "width": 200, + "x": 276, + "y": 145.5 + }, + "post_job_actions": {}, + "tool_id": "toolshed.g2.bx.psu.edu/repos/gga/chado_load_blast/load_blast/2.3.4+galaxy0", + "tool_shed_repository": { + "changeset_revision": "08ae8b27b193", + "name": "chado_load_blast", + "owner": "gga", + "tool_shed": "toolshed.g2.bx.psu.edu" + }, + "tool_state": "{\"analysis_id\": {\"__class__\": \"RuntimeValue\"}, \"blastdb_id\": \"176\", \"input\": {\"__class__\": \"ConnectedValue\"}, \"match_on_name\": \"false\", \"organism_id\": {\"__class__\": \"RuntimeValue\"}, \"psql_target\": {\"method\": \"remote\", \"__current_case__\": 0}, \"query_type\": \"polypeptide\", \"re_name\": \"\", \"skip_missing\": \"false\", \"wait_for\": {\"__class__\": \"ConnectedValue\"}, \"__page__\": null, \"__rerun_remap_job_id__\": null}", + "tool_version": null, + "type": "tool", + "uuid": "e570180a-d503-4ab1-bb5f-7916e32b974a", + "workflow_outputs": [ + { + "label": null, + "output_name": "results", + "uuid": "37193d90-4908-4dc4-87e1-12320c9012f2" + } + ] + }, + "8": { + "annotation": "", + "content_id": "toolshed.g2.bx.psu.edu/repos/gga/chado_load_blast/load_blast/2.3.4+galaxy0", + "errors": null, + "id": 8, + "input_connections": { + "input": { + "id": 2, + "output_name": "output" + }, + "wait_for": { + "id": 7, + "output_name": "results" + } + }, + "inputs": [ + { + "description": "runtime parameter for tool Chado load Blast results", + "name": "analysis_id" + }, + { + "description": "runtime parameter for tool Chado load Blast results", + "name": "input" + }, + { + "description": "runtime parameter for tool Chado load Blast results", + "name": "organism_id" + }, + { + "description": "runtime parameter for tool Chado load Blast results", + "name": "wait_for" + } + ], + "label": "load blastx results org1", + "name": "Chado load Blast results", + "outputs": [ + { + "name": "results", + "type": "json" + } + ], + "position": { + "bottom": 339.8999938964844, + "height": 164.39999389648438, + "left": 554, + "right": 754, + "top": 175.5, + "width": 200, + "x": 554, + "y": 175.5 + }, + "post_job_actions": {}, + "tool_id": "toolshed.g2.bx.psu.edu/repos/gga/chado_load_blast/load_blast/2.3.4+galaxy0", + "tool_shed_repository": { + "changeset_revision": "08ae8b27b193", + "name": "chado_load_blast", + "owner": "gga", + "tool_shed": "toolshed.g2.bx.psu.edu" + }, + "tool_state": "{\"analysis_id\": {\"__class__\": \"RuntimeValue\"}, \"blastdb_id\": \"176\", \"input\": {\"__class__\": \"RuntimeValue\"}, \"match_on_name\": \"false\", \"organism_id\": {\"__class__\": \"RuntimeValue\"}, \"psql_target\": {\"method\": \"remote\", \"__current_case__\": 0}, \"query_type\": \"polypeptide\", \"re_name\": \"\", \"skip_missing\": \"false\", \"wait_for\": {\"__class__\": \"RuntimeValue\"}, \"__page__\": null, \"__rerun_remap_job_id__\": null}", + "tool_version": null, + "type": "tool", + "uuid": "3fd25463-269d-454f-8467-72deb1788fdc", + "workflow_outputs": [ + { + "label": null, + "output_name": "results", + "uuid": "c3f95842-f8e6-4088-9255-175aebb90ce1" + } + ] + }, + "9": { + "annotation": "", + "content_id": "toolshed.g2.bx.psu.edu/repos/gga/chado_load_blast/load_blast/2.3.4+galaxy0", + "errors": null, + "id": 9, + "input_connections": { + "input": { + "id": 3, + "output_name": "output" + }, + "wait_for": { + "id": 8, + "output_name": "results" + } + }, + "inputs": [ + { + "description": "runtime parameter for tool Chado load Blast results", + "name": "analysis_id" + }, + { + "description": "runtime parameter for tool Chado load Blast results", + "name": "input" + }, + { + "description": "runtime parameter for tool Chado load Blast results", + "name": "organism_id" + }, + { + "description": "runtime parameter for tool Chado load Blast results", + "name": "wait_for" + } + ], + "label": "load blastx results org2", + "name": "Chado load Blast results", + "outputs": [ + { + "name": "results", + "type": "json" + } + ], + "position": { + "bottom": 369.8999938964844, + "height": 164.39999389648438, + "left": 832, + "right": 1032, + "top": 205.5, + "width": 200, + "x": 832, + "y": 205.5 + }, + "post_job_actions": {}, + "tool_id": "toolshed.g2.bx.psu.edu/repos/gga/chado_load_blast/load_blast/2.3.4+galaxy0", + "tool_shed_repository": { + "changeset_revision": "08ae8b27b193", + "name": "chado_load_blast", + "owner": "gga", + "tool_shed": "toolshed.g2.bx.psu.edu" + }, + "tool_state": "{\"analysis_id\": {\"__class__\": \"RuntimeValue\"}, \"blastdb_id\": \"176\", \"input\": {\"__class__\": \"RuntimeValue\"}, \"match_on_name\": \"false\", \"organism_id\": {\"__class__\": \"RuntimeValue\"}, \"psql_target\": {\"method\": \"remote\", \"__current_case__\": 0}, \"query_type\": \"polypeptide\", \"re_name\": \"\", \"skip_missing\": \"false\", \"wait_for\": {\"__class__\": \"RuntimeValue\"}, \"__page__\": null, \"__rerun_remap_job_id__\": null}", + "tool_version": null, + "type": "tool", + "uuid": "e5d9b775-8d0c-468f-b32a-067228bc1e05", + "workflow_outputs": [ + { + "label": null, + "output_name": "results", + "uuid": "e2461d5a-918e-42e4-9be8-9d64d0624de7" + } + ] + }, + "10": { + "annotation": "", + "content_id": "toolshed.g2.bx.psu.edu/repos/gga/chado_load_interpro/load_interpro/2.3.4+galaxy0", + "errors": null, + "id": 10, + "input_connections": { + "input": { + "id": 4, + "output_name": "output" + }, + "wait_for": { + "id": 9, + "output_name": "results" + } + }, + "inputs": [ + { + "description": "runtime parameter for tool Chado load InterProScan results", + "name": "analysis_id" + }, + { + "description": "runtime parameter for tool Chado load InterProScan results", + "name": "organism_id" + } + ], + "label": "load interproscan results org1", + "name": "Chado load InterProScan results", + "outputs": [ + { + "name": "results", + "type": "json" + } + ], + "position": { + "bottom": 399.8999938964844, + "height": 164.39999389648438, + "left": 1110, + "right": 1310, + "top": 235.5, + "width": 200, + "x": 1110, + "y": 235.5 + }, + "post_job_actions": {}, + "tool_id": "toolshed.g2.bx.psu.edu/repos/gga/chado_load_interpro/load_interpro/2.3.4+galaxy0", + "tool_shed_repository": { + "changeset_revision": "3284a0c7570e", + "name": "chado_load_interpro", + "owner": "gga", + "tool_shed": "toolshed.g2.bx.psu.edu" + }, + "tool_state": "{\"analysis_id\": {\"__class__\": \"RuntimeValue\"}, \"input\": {\"__class__\": \"ConnectedValue\"}, \"match_on_name\": \"false\", \"organism_id\": {\"__class__\": \"RuntimeValue\"}, \"parse_go\": \"false\", \"psql_target\": {\"method\": \"remote\", \"__current_case__\": 0}, \"query_type\": \"polypeptide\", \"re_name\": \"\", \"skip_missing\": \"false\", \"wait_for\": {\"__class__\": \"ConnectedValue\"}, \"__page__\": null, \"__rerun_remap_job_id__\": null}", + "tool_version": null, + "type": "tool", + "uuid": "93acf95c-2f49-4424-8d6a-3aaa50dff8c0", + "workflow_outputs": [ + { + "label": null, + "output_name": "results", + "uuid": "d9d0fde7-38f3-43ff-9f3d-6c26dcde83aa" + } + ] + }, + "11": { + "annotation": "", + "content_id": "toolshed.g2.bx.psu.edu/repos/gga/chado_load_interpro/load_interpro/2.3.4+galaxy0", + "errors": null, + "id": 11, + "input_connections": { + "input": { + "id": 5, + "output_name": "output" + }, + "wait_for": { + "id": 10, + "output_name": "results" + } + }, + "inputs": [ + { + "description": "runtime parameter for tool Chado load InterProScan results", + "name": "analysis_id" + }, + { + "description": "runtime parameter for tool Chado load InterProScan results", + "name": "organism_id" + } + ], + "label": "load interproscan results org2", + "name": "Chado load InterProScan results", + "outputs": [ + { + "name": "results", + "type": "json" + } + ], + "position": { + "bottom": 429.8999938964844, + "height": 164.39999389648438, + "left": 1388, + "right": 1588, + "top": 265.5, + "width": 200, + "x": 1388, + "y": 265.5 + }, + "post_job_actions": {}, + "tool_id": "toolshed.g2.bx.psu.edu/repos/gga/chado_load_interpro/load_interpro/2.3.4+galaxy0", + "tool_shed_repository": { + "changeset_revision": "3284a0c7570e", + "name": "chado_load_interpro", + "owner": "gga", + "tool_shed": "toolshed.g2.bx.psu.edu" + }, + "tool_state": "{\"analysis_id\": {\"__class__\": \"RuntimeValue\"}, \"input\": {\"__class__\": \"ConnectedValue\"}, \"match_on_name\": \"false\", \"organism_id\": {\"__class__\": \"RuntimeValue\"}, \"parse_go\": \"false\", \"psql_target\": {\"method\": \"remote\", \"__current_case__\": 0}, \"query_type\": \"polypeptide\", \"re_name\": \"\", \"skip_missing\": \"false\", \"wait_for\": {\"__class__\": \"ConnectedValue\"}, \"__page__\": null, \"__rerun_remap_job_id__\": null}", + "tool_version": null, + "type": "tool", + "uuid": "a6f4ea32-821c-4d8f-92b8-133dedad1cf3", + "workflow_outputs": [ + { + "label": null, + "output_name": "results", + "uuid": "1116bc6e-ffde-404d-8f94-f90310babad4" + } + ] + }, + "12": { + "annotation": "", + "content_id": "toolshed.g2.bx.psu.edu/repos/gga/tripal_analysis_sync/analysis_sync/3.2.1.0", + "errors": null, + "id": 12, + "input_connections": { + "wait_for": { + "id": 11, + "output_name": "results" + } + }, + "inputs": [ + { + "description": "runtime parameter for tool Synchronize an analysis", + "name": "analysis_id" + } + ], + "label": "sync blastp analysis org1", + "name": "Synchronize an analysis", + "outputs": [ + { + "name": "results", + "type": "json" + } + ], + "position": { + "bottom": 424.8999938964844, + "height": 154.39999389648438, + "left": 1666, + "right": 1866, + "top": 270.5, + "width": 200, + "x": 1666, + "y": 270.5 + }, + "post_job_actions": {}, + "tool_id": "toolshed.g2.bx.psu.edu/repos/gga/tripal_analysis_sync/analysis_sync/3.2.1.0", + "tool_shed_repository": { + "changeset_revision": "f487ff676088", + "name": "tripal_analysis_sync", + "owner": "gga", + "tool_shed": "toolshed.g2.bx.psu.edu" + }, + "tool_state": "{\"analysis_id\": {\"__class__\": \"RuntimeValue\"}, \"wait_for\": {\"__class__\": \"ConnectedValue\"}, \"__page__\": null, \"__rerun_remap_job_id__\": null}", + "tool_version": null, + "type": "tool", + "uuid": "ded3538e-88bb-4958-bab2-59236702a55a", + "workflow_outputs": [ + { + "label": "Synchronize Analysis into Tripal", + "output_name": "results", + "uuid": "872ba62f-d5b2-4be6-8e04-312fc323b66e" + } + ] + }, + "13": { + "annotation": "", + "content_id": "toolshed.g2.bx.psu.edu/repos/gga/tripal_analysis_sync/analysis_sync/3.2.1.0", + "errors": null, + "id": 13, + "input_connections": { + "wait_for": { + "id": 12, + "output_name": "results" + } + }, + "inputs": [ + { + "description": "runtime parameter for tool Synchronize an analysis", + "name": "analysis_id" + } + ], + "label": "sync blastp analysis org2", + "name": "Synchronize an analysis", + "outputs": [ + { + "name": "results", + "type": "json" + } + ], + "position": { + "bottom": 424.8999938964844, + "height": 154.39999389648438, + "left": 1944, + "right": 2144, + "top": 270.5, + "width": 200, + "x": 1944, + "y": 270.5 + }, + "post_job_actions": {}, + "tool_id": "toolshed.g2.bx.psu.edu/repos/gga/tripal_analysis_sync/analysis_sync/3.2.1.0", + "tool_shed_repository": { + "changeset_revision": "f487ff676088", + "name": "tripal_analysis_sync", + "owner": "gga", + "tool_shed": "toolshed.g2.bx.psu.edu" + }, + "tool_state": "{\"analysis_id\": {\"__class__\": \"RuntimeValue\"}, \"wait_for\": {\"__class__\": \"ConnectedValue\"}, \"__page__\": null, \"__rerun_remap_job_id__\": null}", + "tool_version": null, + "type": "tool", + "uuid": "856b071e-d134-401a-8083-c937ef1e9a82", + "workflow_outputs": [ + { + "label": "Synchronize Analysis into Tripal", + "output_name": "results", + "uuid": "dbb5ca07-fc4b-4792-8b0a-225799e35ed9" + } + ] + }, + "14": { + "annotation": "", + "content_id": "toolshed.g2.bx.psu.edu/repos/gga/tripal_analysis_sync/analysis_sync/3.2.1.0", + "errors": null, + "id": 14, + "input_connections": { + "wait_for": { + "id": 13, + "output_name": "results" + } + }, + "inputs": [ + { + "description": "runtime parameter for tool Synchronize an analysis", + "name": "analysis_id" + } + ], + "label": "sync blastx analysis org1", + "name": "Synchronize an analysis", + "outputs": [ + { + "name": "results", + "type": "json" + } + ], + "position": { + "bottom": 424.8999938964844, + "height": 154.39999389648438, + "left": 2222, + "right": 2422, + "top": 270.5, + "width": 200, + "x": 2222, + "y": 270.5 + }, + "post_job_actions": {}, + "tool_id": "toolshed.g2.bx.psu.edu/repos/gga/tripal_analysis_sync/analysis_sync/3.2.1.0", + "tool_shed_repository": { + "changeset_revision": "f487ff676088", + "name": "tripal_analysis_sync", + "owner": "gga", + "tool_shed": "toolshed.g2.bx.psu.edu" + }, + "tool_state": "{\"analysis_id\": {\"__class__\": \"RuntimeValue\"}, \"wait_for\": {\"__class__\": \"ConnectedValue\"}, \"__page__\": null, \"__rerun_remap_job_id__\": null}", + "tool_version": null, + "type": "tool", + "uuid": "9f737334-00a3-40e4-8d1d-2c82e43ea41c", + "workflow_outputs": [ + { + "label": "Synchronize Analysis into Tripal", + "output_name": "results", + "uuid": "fce3de08-8796-43e0-922d-7f6fbf35c20c" + } + ] + }, + "15": { + "annotation": "", + "content_id": "toolshed.g2.bx.psu.edu/repos/gga/tripal_analysis_sync/analysis_sync/3.2.1.0", + "errors": null, + "id": 15, + "input_connections": { + "wait_for": { + "id": 14, + "output_name": "results" + } + }, + "inputs": [ + { + "description": "runtime parameter for tool Synchronize an analysis", + "name": "analysis_id" + } + ], + "label": "sync blastx analysis org2", + "name": "Synchronize an analysis", + "outputs": [ + { + "name": "results", + "type": "json" + } + ], + "position": { + "bottom": 424.8999938964844, + "height": 154.39999389648438, + "left": 2500, + "right": 2700, + "top": 270.5, + "width": 200, + "x": 2500, + "y": 270.5 + }, + "post_job_actions": {}, + "tool_id": "toolshed.g2.bx.psu.edu/repos/gga/tripal_analysis_sync/analysis_sync/3.2.1.0", + "tool_shed_repository": { + "changeset_revision": "f487ff676088", + "name": "tripal_analysis_sync", + "owner": "gga", + "tool_shed": "toolshed.g2.bx.psu.edu" + }, + "tool_state": "{\"analysis_id\": {\"__class__\": \"RuntimeValue\"}, \"wait_for\": {\"__class__\": \"ConnectedValue\"}, \"__page__\": null, \"__rerun_remap_job_id__\": null}", + "tool_version": null, + "type": "tool", + "uuid": "c9579d14-f8e8-4d98-8c15-55242f6ab970", + "workflow_outputs": [ + { + "label": "Synchronize Analysis into Tripal", + "output_name": "results", + "uuid": "163dfb0c-e4fc-406e-aff3-0377deb0808b" + } + ] + }, + "16": { + "annotation": "", + "content_id": "toolshed.g2.bx.psu.edu/repos/gga/tripal_analysis_sync/analysis_sync/3.2.1.0", + "errors": null, + "id": 16, + "input_connections": { + "wait_for": { + "id": 15, + "output_name": "results" + } + }, + "inputs": [ + { + "description": "runtime parameter for tool Synchronize an analysis", + "name": "analysis_id" + } + ], + "label": "sync interproscan analysis org1", + "name": "Synchronize an analysis", + "outputs": [ + { + "name": "results", + "type": "json" + } + ], + "position": { + "bottom": 424.8999938964844, + "height": 154.39999389648438, + "left": 2778, + "right": 2978, + "top": 270.5, + "width": 200, + "x": 2778, + "y": 270.5 + }, + "post_job_actions": {}, + "tool_id": "toolshed.g2.bx.psu.edu/repos/gga/tripal_analysis_sync/analysis_sync/3.2.1.0", + "tool_shed_repository": { + "changeset_revision": "f487ff676088", + "name": "tripal_analysis_sync", + "owner": "gga", + "tool_shed": "toolshed.g2.bx.psu.edu" + }, + "tool_state": "{\"analysis_id\": {\"__class__\": \"RuntimeValue\"}, \"wait_for\": {\"__class__\": \"ConnectedValue\"}, \"__page__\": null, \"__rerun_remap_job_id__\": null}", + "tool_version": null, + "type": "tool", + "uuid": "78622cfb-78f2-468d-8427-cf3763b28630", + "workflow_outputs": [ + { + "label": "Synchronize Analysis into Tripal", + "output_name": "results", + "uuid": "ca3e8b73-686d-4975-88b6-388decfb63da" + } + ] + }, + "17": { + "annotation": "", + "content_id": "toolshed.g2.bx.psu.edu/repos/gga/tripal_analysis_sync/analysis_sync/3.2.1.0", + "errors": null, + "id": 17, + "input_connections": { + "wait_for": { + "id": 16, + "output_name": "results" + } + }, + "inputs": [ + { + "description": "runtime parameter for tool Synchronize an analysis", + "name": "analysis_id" + } + ], + "label": "sync interproscan analysis org2", + "name": "Synchronize an analysis", + "outputs": [ + { + "name": "results", + "type": "json" + } + ], + "position": { + "bottom": 424.8999938964844, + "height": 154.39999389648438, + "left": 3056, + "right": 3256, + "top": 270.5, + "width": 200, + "x": 3056, + "y": 270.5 + }, + "post_job_actions": {}, + "tool_id": "toolshed.g2.bx.psu.edu/repos/gga/tripal_analysis_sync/analysis_sync/3.2.1.0", + "tool_shed_repository": { + "changeset_revision": "f487ff676088", + "name": "tripal_analysis_sync", + "owner": "gga", + "tool_shed": "toolshed.g2.bx.psu.edu" + }, + "tool_state": "{\"analysis_id\": {\"__class__\": \"RuntimeValue\"}, \"wait_for\": {\"__class__\": \"ConnectedValue\"}, \"__page__\": null, \"__rerun_remap_job_id__\": null}", + "tool_version": null, + "type": "tool", + "uuid": "191af79d-80cb-4da3-b6b8-47a3f651ba4b", + "workflow_outputs": [ + { + "label": "Synchronize Analysis into Tripal", + "output_name": "results", + "uuid": "cfc30f51-85f5-4bda-9ae0-9550805a306e" + } + ] + }, + "18": { + "annotation": "", + "content_id": "toolshed.g2.bx.psu.edu/repos/gga/tripal_db_populate_mviews/db_populate_mviews/3.2.1.0", + "errors": null, + "id": 18, + "input_connections": { + "wait_for": { + "id": 17, + "output_name": "results" + } + }, + "inputs": [], + "label": "populate mat views", + "name": "Populate materialized views", + "outputs": [ + { + "name": "results", + "type": "txt" + } + ], + "position": { + "bottom": 424.8999938964844, + "height": 154.39999389648438, + "left": 3334, + "right": 3534, + "top": 270.5, + "width": 200, + "x": 3334, + "y": 270.5 + }, + "post_job_actions": {}, + "tool_id": "toolshed.g2.bx.psu.edu/repos/gga/tripal_db_populate_mviews/db_populate_mviews/3.2.1.0", + "tool_shed_repository": { + "changeset_revision": "3c08f32a3dc1", + "name": "tripal_db_populate_mviews", + "owner": "gga", + "tool_shed": "toolshed.g2.bx.psu.edu" + }, + "tool_state": "{\"mview\": \"\", \"wait_for\": {\"__class__\": \"ConnectedValue\"}, \"__page__\": null, \"__rerun_remap_job_id__\": null}", + "tool_version": null, + "type": "tool", + "uuid": "b2c9b425-3df6-40a8-b83f-d4719c2c925f", + "workflow_outputs": [ + { + "label": "Populate Tripal materialized view(s)", + "output_name": "results", + "uuid": "7942bda1-54d6-4ae7-9694-e8946d089d9b" + } + ] + }, + "19": { + "annotation": "", + "content_id": "toolshed.g2.bx.psu.edu/repos/gga/tripal_db_index/db_index/3.2.1.1", + "errors": null, + "id": 19, + "input_connections": { + "wait_for": { + "id": 18, + "output_name": "results" + } + }, + "inputs": [], + "label": "index tripal data", + "name": "Index Tripal data", + "outputs": [ + { + "name": "results", + "type": "txt" + } + ], + "position": { + "bottom": 426.1000061035156, + "height": 113.60000610351562, + "left": 3612, + "right": 3812, + "top": 312.5, + "width": 200, + "x": 3612, + "y": 312.5 + }, + "post_job_actions": {}, + "tool_id": "toolshed.g2.bx.psu.edu/repos/gga/tripal_db_index/db_index/3.2.1.1", + "tool_shed_repository": { + "changeset_revision": "d55a39f12dda", + "name": "tripal_db_index", + "owner": "gga", + "tool_shed": "toolshed.g2.bx.psu.edu" + }, + "tool_state": "{\"expose\": {\"do_expose\": \"no\", \"__current_case__\": 0}, \"queues\": \"10\", \"table\": {\"mode\": \"website\", \"__current_case__\": 0}, \"tokenizer\": \"standard\", \"wait_for\": {\"__class__\": \"ConnectedValue\"}, \"__page__\": null, \"__rerun_remap_job_id__\": null}", + "tool_version": null, + "type": "tool", + "uuid": "7bce942d-6087-43a9-aa07-fd64d1a7079c", + "workflow_outputs": [ + { + "label": "Index Tripal data", + "output_name": "results", + "uuid": "87687ee1-8319-47b1-a94c-2ea95ac88511" + } + ] + } + }, + "tags": [], + "uuid": "419ce581-8a2f-4376-a0ce-df72321bf11d", + "version": 10 +} \ No newline at end of file -- GitLab