From 5bbc05a7b44bac7f06f2354bc04888314491e915 Mon Sep 17 00:00:00 2001 From: Loraine Gueguen <loraine.gueguen@sb-roscoff.fr> Date: Sun, 30 May 2021 13:01:25 +0200 Subject: [PATCH] Refactor run_wf. Fix linting errors and warnings. --- gga_get_data.py | 8 -- gga_init.py | 20 ++--- gga_load_data.py | 11 ++- phaeo_constants.py | 4 +- run_workflow_phaeoexplorer.py | 142 +++++++++++++++++++--------------- speciesData.py | 81 +++++++++---------- utilities.py | 16 ++-- 7 files changed, 137 insertions(+), 145 deletions(-) diff --git a/gga_get_data.py b/gga_get_data.py index f46d733..89788e2 100755 --- a/gga_get_data.py +++ b/gga_get_data.py @@ -5,7 +5,6 @@ import argparse import os import logging import sys -import time import shutil import utilities @@ -65,13 +64,6 @@ class GetData(speciesData.SpeciesData): logging.info("src_data directory tree generated for %s" % self.full_name) - def get_last_modified_time_string(self, filePath): - # give the last modification date for the file, with format '20190130' - lastModifiedTimestamp = os.path.getmtime(filePath) - lastModifiedTimeStructure = time.localtime(lastModifiedTimestamp) - lastModifiedDate = time.strftime("%Y%m%d", lastModifiedTimeStructure) - return lastModifiedDate - def get_source_data_files_from_path(self): """ Find source data files and copy them into the src_data dir tree diff --git a/gga_init.py b/gga_init.py index eee4968..0f7d7bf 100755 --- a/gga_init.py +++ b/gga_init.py @@ -37,6 +37,9 @@ class DeploySpeciesStack(speciesData.SpeciesData): the organism's directory tree to create the required docker-compose files and stack deployment """ + def __init__(self, parameters_dictionary): + self.picture_path = None + super().__init__(parameters_dictionary) def make_directory_tree(self): """ @@ -131,7 +134,7 @@ class DeploySpeciesStack(speciesData.SpeciesData): "genus_species_sex": "{0}_{1}_{2}".format(self.genus_lowercase, self.species_lowercase, self.sex), "strain": self.strain, "sex": self.sex, "Genus_species": "{0} {1}".format(self.genus_uppercase, self.species_lowercase), "blast": self.blast, "go": self.go, "picture_path": self.picture_path} - if (len(self.config.keys()) == 0): + if len(self.config.keys()) == 0: logging.error("Empty config dictionary") # Merge the two dicts render_vars = {**self.config, **input_vars} @@ -161,19 +164,6 @@ class DeploySpeciesStack(speciesData.SpeciesData): os.chdir(self.main_dir) - def make_orthology_compose_files(self): - """ - Create/update orthology compose files - - :return: - """ - - os.chdir(self.main_dir) - - make_dirs["./orthology", "./orthology/src_data", "./orthology/src_data/genomes", - "./orthology/src_data/gff", "./orthology/src_data/newicks", "./orthology/src_data/proteomes"] - - def make_dirs(dir_paths_li): """ Recursively create directories from a list of paths with a try-catch condition @@ -386,7 +376,7 @@ if __name__ == "__main__": else: config_file = os.path.join(os.path.dirname(os.path.realpath(sys.argv[0])), constants.DEFAULT_CONFIG) config = utilities.parse_config(config_file) - if (len(config.keys()) == 0): + if len(config.keys()) == 0: logging.error("Empty config dictionary") main_dir = None diff --git a/gga_load_data.py b/gga_load_data.py index d1bb064..d0ef036 100755 --- a/gga_load_data.py +++ b/gga_load_data.py @@ -10,7 +10,6 @@ import sys import time import json import yaml -import subprocess from bioblend import galaxy from bioblend.galaxy.objects import GalaxyInstance @@ -52,7 +51,7 @@ class LoadData(speciesData.SpeciesData): self.history_id = histories[0]["id"] logging.debug("History ID set for {0} {1}: {2}".format(self.genus, self.species, self.history_id)) else: - logging.critical("Multiple histories exists for {1}: {2}".format(self.genus, self.species)) + logging.critical("Multiple histories exists for {0}: {1}".format(self.genus, self.species)) except IndexError: logging.info("Creating history for {0} {1}".format(self.genus, self.species)) hist_dict = self.instance.histories.create_history(name=str(self.genus_species)) @@ -114,9 +113,9 @@ class LoadData(speciesData.SpeciesData): data_dir_root=os.path.join(self.get_species_dir(), constants.HOST_DATA_DIR) gio = GalaxyInstance(url=self.instance_url, - email=self.config[constants.CONF_GALAXY_DEFAULT_ADMIN_EMAIL], - password=self.config[constants.CONF_GALAXY_DEFAULT_ADMIN_PASSWORD] - ) + email=self.config[constants.CONF_GALAXY_DEFAULT_ADMIN_EMAIL], + password=self.config[constants.CONF_GALAXY_DEFAULT_ADMIN_PASSWORD] + ) logging.info("Looking for project data in %s" % data_dir_root) folders = dict() @@ -141,7 +140,7 @@ class LoadData(speciesData.SpeciesData): for fname, files in folders.items(): if fname and files: - folder_name = re.sub(data_dir_root + "/", "", fname) + folder_name = re.sub(re.compile(data_dir_root + "/"), "", str(fname)) logging.info("Creating folder: %s" % folder_name) folder = self.create_deep_folder(prj_lib, folder_name) diff --git a/phaeo_constants.py b/phaeo_constants.py index 1cca9b6..0457cc2 100644 --- a/phaeo_constants.py +++ b/phaeo_constants.py @@ -47,8 +47,8 @@ WORKFLOW_VALID_TYPES = [WF_LOAD_GFF_JB, WORKFLOW_BLAST, WORKFLOW_INTERPRO] ### Galaxy tools ADD_ORGANISM_TOOL_NAME = "toolshed.g2.bx.psu.edu/repos/gga/chado_organism_add_organism/organism_add_organism/" -ADD_ORGANISM_TOOL_NAME = "2.3.4+galaxy0" -ADD_ORGANISM_TOOL_ID = ADD_ORGANISM_TOOL_NAME + ADD_ORGANISM_TOOL_NAME +ADD_ORGANISM_TOOL_VERSION = "2.3.4+galaxy0" +ADD_ORGANISM_TOOL_ID = ADD_ORGANISM_TOOL_NAME + ADD_ORGANISM_TOOL_VERSION ADD_ORGANISM_TOOL_CHANGESET_REVISION = "1f12b9650028" ADD_ANALYSIS_TOOL_NAME = "toolshed.g2.bx.psu.edu/repos/gga/chado_analysis_add_analysis/analysis_add_analysis/" diff --git a/run_workflow_phaeoexplorer.py b/run_workflow_phaeoexplorer.py index 94c6477..9d9b12e 100755 --- a/run_workflow_phaeoexplorer.py +++ b/run_workflow_phaeoexplorer.py @@ -28,8 +28,9 @@ class StrainWorkflowParam: def __init__(self, genus_species, strain_sex, genus, genus_uppercase, species, chado_species_name, sex, strain, full_name, species_folder_name, org_id, - genome_analysis_id, ogs_analysis_id, blastp_analysis_id, interpro_analysis_id, history_id, - instance, instance_url, email, password): + history_id, instance, instance_url, email, password, + genome_analysis_id=None, ogs_analysis_id=None, blastp_analysis_id=None, interpro_analysis_id=None, + genome_hda_id=None, gff_hda_id=None, transcripts_hda_id=None, proteins_hda_id=None, blastp_hda_id=None, blastx_hda_id=None, interproscan_hda_id=None): self.genus_species = genus_species self.strain_sex = strain_sex self.genus = genus @@ -50,36 +51,40 @@ class StrainWorkflowParam: self.instance_url = instance_url self.email = email self.password = password - - def check_param(self, params): - if not utilities.no_empty_items(params): - logging.critical( - "One empty workflow parameter found for organism {0}: {1})".format(org_full_name, params)) - sys.exit() - - def check_param_for_workflow_load_fasta_gff_jbrowse(self, params): + self.genome_hda_id = genome_hda_id, + self.gff_hda_id = gff_hda_id, + self.transcripts_hda_id = transcripts_hda_id, + self.proteins_hda_id = proteins_hda_id, + self.blastp_hda_id = blastp_hda_id, + self.blastx_hda_id = blastx_hda_id, + self.interproscan_hda_id = interproscan_hda_id, + + def check_param_for_workflow_load_fasta_gff_jbrowse(self): params = [self.genus_species, self.strain_sex, self.genus, self.genus_uppercase, self.species, self.sex, self.strain, self.chado_species_name, self.full_name, self.species_folder_name, self.org_id, - self.genome_analysis_id, self.ogs_analysis_id, self.history_id, - self.instance, self.instance_url, self.email, self.password] - self.check_param(params) + self.history_id, self.instance, self.instance_url, self.email, self.password, + self.genome_analysis_id, self.ogs_analysis_id, + self.genome_hda_id, self.gff_hda_id, self.transcripts_hda_id, self.proteins_hda_id] + utilities.check_wf_param(self.full_name, params) - def check_param_for_workflow_blast(self, params): + def check_param_for_workflow_blastp(self): params = [self.genus_species, self.strain_sex, self.genus, self.genus_uppercase, self.species, self.sex, self.strain, self.chado_species_name, self.full_name, self.species_folder_name, self.org_id, - self.blastp_analysis_id, self.history_id, - self.instance, self.instance_url, self.email, self.password] - self.check_param(params) + self.history_id, self.instance, self.instance_url, self.email, self.password, + self.blastp_analysis_id, + self.blastp_hda_id] + utilities.check_wf_param(self.full_name, params) - def check_param_for_workflow_interpro(self, params): + def check_param_for_workflow_interpro(self): params = [self.genus_species, self.strain_sex, self.genus, self.genus_uppercase, self.species, self.sex, self.strain, self.chado_species_name, self.full_name, self.species_folder_name, self.org_id, - self.interpro_analysis_id, self.history_id, - self.instance, self.instance_url, self.email, self.password] - self.check_param(params) + self.history_id, self.instance, self.instance_url, self.email, self.password, + self.interpro_analysis_id, + self.interproscan_hda_id] + utilities.check_wf_param(self.full_name, params) class RunWorkflow(speciesData.SpeciesData): @@ -329,7 +334,7 @@ class RunWorkflow(speciesData.SpeciesData): logging.info("Synchronizing analysis %s in Tripal" % analysis_name) self.sync_analysis(analysis_id=analysis_id) - return(analysis_id) + return analysis_id def add_organism_blastp_analysis(self): """ @@ -431,7 +436,7 @@ class RunWorkflow(speciesData.SpeciesData): tool_inputs={"analysis_id": blastp_analysis_id}) # print({"org_id": org_id, "genome_analysis_id": genome_analysis_id, "ogs_analysis_id": ogs_analysis_id}) - return({"org_id": org_id, "blastp_analysis_id": blastp_analysis_id}) + return {"org_id": org_id, "blastp_analysis_id": blastp_analysis_id} def add_organism_interproscan_analysis(self): """ @@ -590,9 +595,7 @@ class RunWorkflow(speciesData.SpeciesData): email=self.config[constants.CONF_GALAXY_DEFAULT_ADMIN_EMAIL], password=self.config[constants.CONF_GALAXY_DEFAULT_ADMIN_PASSWORD]) - prj_lib = gio.libraries.get_previews(name=constants.GALAXY_LIBRARY_NAME) - library_id = prj_lib[0].id - folder_dict_list = self.instance.libraries.get_folders(library_id=str(library_id)) + folder_dict_list = self.instance.libraries.get_folders(library_id=str(self.library_id)) folders_id_dict = {} @@ -809,7 +812,7 @@ def get_sp_workflow_param(sp_dict, main_dir, config, workflow_type): email=config[constants.CONF_GALAXY_DEFAULT_ADMIN_EMAIL], password=config[constants.CONF_GALAXY_DEFAULT_ADMIN_PASSWORD] ) - sp_wf_param.check_param_for_workflow_blast() + sp_wf_param.check_param_for_workflow_blastp() if workflow_type == "interpro": @@ -970,10 +973,12 @@ if __name__ == "__main__": logging.error("Duplicate organism with 'genus_species' = '{0}' and 'strain_sex' = '{1}'".format(current_sp_genus_species, current_sp_strain_sex)) for species, strains in all_sp_workflow_dict.items(): - strains_count = len(list(strains.keys())) + strains_list = list(strains.keys()) + strains_count = len(strains_list) + if strains_count == 1: logging.info("Input species %s: 1 strain detected in input dictionary" % species) - strain_sex = strains.keys()[0] + strain_sex = list(strains.keys())[0] sp_workflow_param = strains[strain_sex] # Set workflow path (1 organism) @@ -1064,8 +1069,8 @@ if __name__ == "__main__": if strains_count == 2: logging.info("Input organism %s: 2 species detected in input dictionary" % species) - strain_sex_org1 = strains.keys()[0] - strain_sex_org2 = strains.keys()[1] + strain_sex_org1 = strains_list[0] + strain_sex_org2 = strains_list[1] sp_workflow_param_org1 = strains[strain_sex_org1] sp_workflow_param_org2 = strains[strain_sex_org2] @@ -1140,54 +1145,69 @@ if __name__ == "__main__": # For the Jbrowse tool, we unfortunately have to manually edit the parameters instead of setting them # as runtime values, using runtime parameters makes the tool throw an internal critical error ("replace not found" error) # Scratchgmod test: need "http" (or "https"), the hostname (+ port) - jbrowse_menu_url_org1 = "https://{hostname}/sp/{genus_sp}/feature/{Genus}/{species}/mRNA/{id}".format(hostname=config["hostname"], genus_sp=org1_genus_species, Genus=org1_genus[0].upper() + org1_genus[1:], species=org1_species, id="{id}") - jbrowse_menu_url_org2 = "https://{hostname}/sp/{genus_sp}/feature/{Genus}/{species}/mRNA/{id}".format(hostname=config["hostname"], genus_sp=org2_genus_species, Genus=org2_genus[0].upper() + org2_genus[1:], species=org2_species, id="{id}") - if "jbrowse_menu_url" not in config.keys(): - jbrowse_menu_url_org1 = "https://{hostname}/sp/{genus_sp}/feature/{Genus}/{species}/mRNA/{id}".format(hostname=config["hostname"], genus_sp=org1_genus_species, Genus=org1_genus[0].upper() + org1_genus[1:], species=org1_species, id="{id}") - jbrowse_menu_url_org2 = "https://{hostname}/sp/{genus_sp}/feature/{Genus}/{species}/mRNA/{id}".format(hostname=config["hostname"], genus_sp=org2_genus_species, Genus=org2_genus[0].upper() + org2_genus[1:], species=org2_species, id="{id}") + if constants.CONF_JBROWSE_MENU_URL not in config.keys(): + # default + root_url = "https://{0}".format(config[constants.CONF_ALL_HOSTNAME]) else: - jbrowse_menu_url_org1 = config["jbrowse_menu_url"] + "/sp/{genus_sp}/feature/{Genus}/{species}/mRNA/{id}".format(genus_sp=org1_genus_species, Genus=org1_genus[0].upper() + org1_genus[1:], species=org1_species, id="{id}") - jbrowse_menu_url_org2 = config["jbrowse_menu_url"] + "/sp/{genus_sp}/feature/{Genus}/{species}/mRNA/{id}".format(genus_sp=org2_genus_species, Genus=org2_genus[0].upper() + org2_genus[1:], species=org2_species, id="{id}") - - # show_tool_add_organism = instance.tools.show_tool(tool_id="toolshed.g2.bx.psu.edu/repos/gga/chado_organism_add_organism/organism_add_organism/2.3.4+galaxy0", io_details=True) - # print(show_tool_add_organism) - # show_jbrowse_tool = instance.tools.show_tool(tool_id="toolshed.g2.bx.psu.edu/repos/iuc/jbrowse/jbrowse/1.16.11+galaxy0", io_details=True) - # print(show_jbrowse_tool) - # show_jbrowse_container_tool = instance.tools.show_tool(tool_id="toolshed.g2.bx.psu.edu/repos/gga/jbrowse_to_container/jbrowse_to_container/0.5.1", io_details=True) - # print(show_jbrowse_container_tool) + root_url = config[constants.CONF_JBROWSE_MENU_URL] + species_strain_sex_org1 = sp_workflow_param_org1.chado_species_name.replace(" ", "-") + species_strain_sex_org2 = sp_workflow_param_org2.chado_species_name.replace(" ", "-") + jbrowse_menu_url_org1 = "{root_url}/sp/{genus_sp}/feature/{Genus}/{species_strain_sex}/mRNA/{id}".format( + root_url=root_url, + genus_sp=sp_workflow_param_org1.genus_species, + Genus=sp_workflow_param_org1.genus_uppercase, + species_strain_sex=species_strain_sex_org1, + id="{id}") + jbrowse_menu_url_org2 = "{root_url}/sp/{genus_sp}/feature/{Genus}/{species_strain_sex}/mRNA/{id}".format( + root_url=root_url, + genus_sp=sp_workflow_param_org2.genus_species, + Genus=sp_workflow_param_org2.genus_uppercase, + species_strain_sex=species_strain_sex_org2, + id="{id}") # Replace values in the workflow dictionary - workflow_dict["steps"]["7"]["tool_state"] = workflow_dict["steps"]["7"]["tool_state"].replace("__MENU_URL_ORG1__", jbrowse_menu_url_org1) - workflow_dict["steps"]["8"]["tool_state"] = workflow_dict["steps"]["8"]["tool_state"].replace("__MENU_URL_ORG2__", jbrowse_menu_url_org2) + jbrowse_tool_state_org1 = workflow_dict["steps"][phaeo_constants.WF_LOAD_GFF_JB_2_ORG_STEP_JBROWSE_ORG1]["tool_state"] + jbrowse_tool_state_org1 = jbrowse_tool_state_org1.replace("__MENU_URL_ORG1__", jbrowse_menu_url_org1) + jbrowse_tool_state_org2 = workflow_dict["steps"][phaeo_constants.WF_LOAD_GFF_JB_2_ORG_STEP_JRBOWSE_ORG2]["tool_state"] + jbrowse_tool_state_org2 = jbrowse_tool_state_org2.replace("__MENU_URL_ORG2__", jbrowse_menu_url_org2) # The UNIQUE_ID is specific to a combination genus_species_strain_sex so every combination should have its unique workflow # in galaxy --> define a naming method for these workflows - workflow_dict["steps"]["10"]["tool_state"] = workflow_dict["steps"]["10"]["tool_state"].replace("__DISPLAY_NAME_ORG1__", org1_full_name).replace("__UNIQUE_ID_ORG1__", org1_species_folder_name) - workflow_dict["steps"]["10"]["tool_state"] = workflow_dict["steps"]["10"]["tool_state"].replace("__DISPLAY_NAME_ORG2__", org2_full_name).replace("__UNIQUE_ID_ORG2__", org2_species_folder_name) + jb_to_container_tool_state = workflow_dict["steps"][phaeo_constants.WF_LOAD_GFF_JB_2_ORG_STEP_JB_TO_CONTAINER]["tool_state"] + jb_to_container_tool_state = jb_to_container_tool_state\ + .replace("__DISPLAY_NAME_ORG1__", sp_workflow_param_org1.full_name)\ + .replace("__UNIQUE_ID_ORG1__", sp_workflow_param_org1.species_folder_name)\ + .replace("__DISPLAY_NAME_ORG2__", sp_workflow_param_org2.full_name)\ + .replace("__UNIQUE_ID_ORG2__", sp_workflow_param_org2.species_folder_name) # Import the workflow in galaxy as a dict - instance.workflows.import_workflow_dict(workflow_dict=workflow_dict) + sp_workflow_param_org1.instance.workflows.import_workflow_dict(workflow_dict=workflow_dict) # Get its attributes - workflow_attributes = instance.workflows.get_workflows(name=workflow_name) + workflow_attributes = sp_workflow_param_org1.instance.workflows.get_workflows(name=workflow_name) # Then get its ID (required to invoke the workflow) workflow_id = workflow_attributes[0]["id"] # Index 0 is the most recently imported workflow (the one we want) - show_workflow = instance.workflows.show_workflow(workflow_id=workflow_id) + logging.debug("Workflow ID: %s" % workflow_id) # Check if the workflow is found try: - logging.debug("Workflow ID: %s" % workflow_id) + show_workflow = sp_workflow_param_org1.instance.workflows.show_workflow(workflow_id=workflow_id) except bioblend.ConnectionError: logging.warning("Error finding workflow %s" % workflow_name) # Finally, invoke the workflow alogn with its datamap, parameters and the history in which to invoke it - instance.workflows.invoke_workflow(workflow_id=workflow_id, history_id=history_id, params=workflow_parameters, inputs=datamap, allow_tool_state_corrections=True) + sp_workflow_param_org1.instance.workflows.invoke_workflow( + workflow_id=workflow_id, + history_id=sp_workflow_param_org1.history_id, + params=workflow_parameters, + inputs=datamap, + allow_tool_state_corrections=True) - logging.info("Successfully imported and invoked workflow {0}, check the galaxy instance ({1}) for the jobs state".format(workflow_name, instance_url)) + logging.info("Successfully imported and invoked workflow {0}, check the galaxy instance ({1}) for the jobs state".format(workflow_name, sp_workflow_param_org1.instance_url)) - if workflow_type == "blast": + if workflow_type == phaeo_constants.WORKFLOW_BLAST: for sp_dict in sp_dict_list: # Add and retrieve all analyses/organisms for the current input species and add their IDs to the input dictionary - sp_workflow_param = get_sp_workflow_param(sp_dict, main_dir=args.main_directory, config=config, workfow_type="blast") + sp_workflow_param = get_sp_workflow_param(sp_dict, main_dir=args.main_directory, config=config, workflow_type=phaeo_constants.WORKFLOW_BLAST) current_sp_genus_species = list(sp_workflow_param.keys())[0] current_sp_genus_species_dict = list(sp_workflow_param.values())[0] @@ -1223,7 +1243,7 @@ if __name__ == "__main__": # Check if the versions of tools specified in the workflow are installed in galaxy install_changesets_revisions_from_workflow(workflow_path=workflow_path, instance=instance) - organism_key_name = list(strains.keys()) + organisms_key_name = list(strains.keys()) org_dict = strains[organisms_key_name[0]] history_id = org_dict["history_id"] @@ -1377,7 +1397,7 @@ if __name__ == "__main__": "org2_genus": org2_genus, "org2_species": org2_species, "org2_genus_species": org2_genus_species, - "org2_species_folder_name": orgé_species_folder_name, + "org2_species_folder_name": org_species_folder_name, "org2_full_name": org2_full_name, "org2_strain": org2_strain, "org2_sex": org2_sex, @@ -1398,7 +1418,7 @@ if __name__ == "__main__": BLASTP_FILE_ORG1 = "0" BLASTP_FILE_ORG2 = "1" LOAD_BLASTP_FILE_ORG1 = "2" - LOAD_BLASTP_FILE_ORG1 = "3" + LOAD_BLASTP_FILE_ORG2 = "3" WF_LOAD_GFF_JB_2_ORG_STEP_POPULATE_VIEWS = "4" WF_LOAD_GFF_JB_2_ORG_STEP_INDEX = "5" @@ -1647,7 +1667,7 @@ if __name__ == "__main__": "org2_genus": org2_genus, "org2_species": org2_species, "org2_genus_species": org2_genus_species, - "org2_species_folder_name": orgé_species_folder_name, + "org2_species_folder_name": org_species_folder_name, "org2_full_name": org2_full_name, "org2_strain": org2_strain, "org2_sex": org2_sex, diff --git a/speciesData.py b/speciesData.py index cae48c4..1e2a9f3 100755 --- a/speciesData.py +++ b/speciesData.py @@ -9,6 +9,27 @@ import constants from _datetime import datetime +def clean_string(string): + if not string is None and string != "": + cleaned_string = string \ + .replace(" ", "_") \ + .replace("-", "_") \ + .replace("(", "") \ + .replace(")", "") \ + .replace("'", "").strip() + return cleaned_string + else: + return string + +def set_service_attribute(service, parameters_dictionary): + parameters_dictionary_services = parameters_dictionary[constants.ORG_PARAM_SERVICES] + service_value = "0" + if (service in parameters_dictionary_services.keys() + and parameters_dictionary_services[service] is not None + and parameters_dictionary_services[service] != ""): + service_value = parameters_dictionary_services[service] + return service_value + class SpeciesData: """ This class contains attributes and functions to interact with the galaxy container of the GGA environment @@ -40,38 +61,17 @@ class SpeciesData: sys.exit(0) return 1 - def clean_string(self, string): - if not string is None and string != "": - clean_string = string\ - .replace(" ", "_")\ - .replace("-", "_")\ - .replace("(", "")\ - .replace(")", "")\ - .replace("'", "").strip() - return clean_string - else: - return string - - def set_service_attribute(self, service, parameters_dictionary): - parameters_dictionary_services = parameters_dictionary[constants.ORG_PARAM_SERVICES] - service_value = "0" - if(service in parameters_dictionary_services.keys() - and parameters_dictionary_services[service] is not None - and parameters_dictionary_services[service] != ""): - service_value = parameters_dictionary_services[service] - return service_value - def __init__(self, parameters_dictionary): self.parameters_dictionary = parameters_dictionary self.name = parameters_dictionary[constants.ORG_PARAM_NAME] parameters_dictionary_description=parameters_dictionary[constants.ORG_PARAM_DESC] parameters_dictionary_data = parameters_dictionary[constants.ORG_PARAM_DATA] - self.species = self.clean_string(parameters_dictionary_description[constants.ORG_PARAM_DESC_SPECIES]) - self.genus = self.clean_string(parameters_dictionary_description[constants.ORG_PARAM_DESC_GENUS]) - self.strain = self.clean_string(parameters_dictionary_description[constants.ORG_PARAM_DESC_STRAIN]) - self.sex = self.clean_string(parameters_dictionary_description[constants.ORG_PARAM_DESC_SEX]) - self.common_name = self.clean_string(parameters_dictionary_description[constants.ORG_PARAM_DESC_COMMON_NAME]) + self.species = clean_string(parameters_dictionary_description[constants.ORG_PARAM_DESC_SPECIES]) + self.genus = clean_string(parameters_dictionary_description[constants.ORG_PARAM_DESC_GENUS]) + self.strain = clean_string(parameters_dictionary_description[constants.ORG_PARAM_DESC_STRAIN]) + self.sex = clean_string(parameters_dictionary_description[constants.ORG_PARAM_DESC_SEX]) + self.common_name = clean_string(parameters_dictionary_description[constants.ORG_PARAM_DESC_COMMON_NAME]) self.date = datetime.today().strftime("%Y-%m-%d") self.origin = parameters_dictionary_description[constants.ORG_PARAM_DESC_ORIGIN] @@ -96,8 +96,8 @@ class SpeciesData: self.blastx_path = parameters_dictionary_data[constants.ORG_PARAM_DATA_BLASTX_PATH] self.orthofinder_path = parameters_dictionary_data[constants.ORG_PARAM_DATA_ORTHOFINDER_PATH] - self.blast = self.set_service_attribute(constants.ORG_PARAM_SERVICES_BLAST, parameters_dictionary) - self.go = self.set_service_attribute(constants.ORG_PARAM_SERVICES_GO, parameters_dictionary) + self.blast = set_service_attribute(constants.ORG_PARAM_SERVICES_BLAST, parameters_dictionary) + self.go = set_service_attribute(constants.ORG_PARAM_SERVICES_GO, parameters_dictionary) self.genus_lowercase = self.genus.lower() self.species_lowercase = self.species.lower() @@ -123,31 +123,20 @@ class SpeciesData: self.genome_filename = "{0}_v{1}.fasta".format(self.dataset_prefix, self.genome_version) self.gff_filename = "{0}_OGS{1}_{2}.gff".format(self.dataset_prefix, self.ogs_version, constants.DATA_DATE) - self.transcripts_filename = "{0}_OGS{1}_{2}".format(self.dataset_prefix, self.ogs_version, constants.TRANSCRIPTS_FILENAME_SUFFIX) + self.transcripts_filename = "{0}_OGS{1}_{2}".format(self.dataset_prefix, self.ogs_version, constants.FILENAME_SUFFIX_TRANSCRIPTS) self.proteins_filename = "{0}_OGS{1}_{2}".format(self.dataset_prefix, self.ogs_version, constants.FILENAME_SUFFIX_PROTEINS) self.interpro_filename = "{0}_OGS{1}_{2}".format(self.dataset_prefix, self.ogs_version, constants.FILENAME_SUFFIX_INTERPRO) self.blastp_filename = "{0}_OGS{1}_{2}".format(self.dataset_prefix, self.ogs_version, constants.FILENAME_SUFFIX_BLASTP) self.blastx_filename = "{0}_OGS{1}_{2}".format(self.dataset_prefix, self.ogs_version, constants.FILENAME_SUFFIX_BLASTX) self.orthofinder_filename = "{0}_OGS{1}_{2}".format(self.dataset_prefix, self.ogs_version, constants.FILENAME_SUFFIX_ORTHOFINDER) - # Bioblend/Chado IDs for an organism analyses/organisms/datasets/history/library - self.org_id = None - self.genome_analysis_id = None - self.ogs_analysis_id = None - self.instance_url = None - self.instance = None - self.history_id = None - self.library = None - self.library_id = None - self.script_dir = os.path.dirname(os.path.realpath(sys.argv[0])) self.main_dir = None self.species_dir = None - - self.tool_panel = None - self.source_files = dict() - self.workflow_name = None - self.metadata = dict() - self.api_key = None # API key used to communicate with the galaxy instance. Cannot be used to do user-tied actions - self.datasets = dict() self.config = None # Custom config used to set environment variables inside containers + + # # Bioblend/Chado IDs for an organism analyses/organisms/datasets/history/library + self.instance_url = None + self.instance = None + self.history_id = None + self.library_id = None diff --git a/utilities.py b/utilities.py index d1bb089..14d8510 100755 --- a/utilities.py +++ b/utilities.py @@ -84,14 +84,16 @@ def no_empty_items(li): empty = False return empty +def check_wf_param(full_name, params): + + if not no_empty_items(params): + logging.critical( + "One empty workflow parameter found for organism {0}: {1})".format(full_name, params)) + sys.exit() + def check_galaxy_state(network_name, script_dir): """ Read the logs of the galaxy container for the current species to check if the service is "ready" - - :param genus_lowercase: - :param species: - :param script_dir: - :return: """ # Run supervisorctl status in the galaxy container via serexec @@ -207,10 +209,10 @@ def run_tool(instance, tool_id, history_id, tool_inputs): return output_dict -def run_tool_and_download_single_output_dataset(instance, tool_id, history_id, tool_inputs, time_sleep): +def run_tool_and_download_single_output_dataset(instance, tool_id, history_id, tool_inputs, time_sleep = 0): output_dict = run_tool(instance, tool_id, history_id, tool_inputs) - if (not time_sleep is None): + if not time_sleep is None: time.sleep(time_sleep) single_output_dataset_id = output_dict["outputs"][0]["id"] dataset = instance.datasets.download_dataset(dataset_id=single_output_dataset_id) -- GitLab