diff --git a/phaeoexplorer_constants.py b/phaeo_constants.py similarity index 71% rename from phaeoexplorer_constants.py rename to phaeo_constants.py index 2773e2ba468adcce44ebbc083f2b535166d4029e..1cca9b60a3c9a814cf12649829f1add8a929289c 100644 --- a/phaeoexplorer_constants.py +++ b/phaeo_constants.py @@ -4,6 +4,7 @@ import constants ### Workflows +# WARNING: Be very careful about how the workflow is "organized" (i.e the order of the steps/datasets, check the .ga if there is any error) WORKFLOWS_PATH = "workflows_phaeoexplorer/" WF_LOAD_GFF_JB = "load_fasta_gff_jbrowse" @@ -20,6 +21,25 @@ WF_LOAD_GFF_JB_1_ORG_STEP_FEATURE_SYNC = "7" WF_LOAD_GFF_JB_1_ORG_STEP_POPULATE_VIEWS = "8" WF_LOAD_GFF_JB_1_ORG_STEP_INDEX = "9" +WF_LOAD_GFF_JB_2_ORG_FILE = "Galaxy-Workflow-chado_load_tripal_synchronize_jbrowse_2org_v4.ga" +WF_LOAD_GFF_JB_2_ORG_INPUT_GFF_ORG1 = "0" +WF_LOAD_GFF_JB_2_ORG_INPUT_GENOME_ORG1 = "1" +WF_LOAD_GFF_JB_2_ORG_INPUT_PROTEINS_ORG1 = "2" +WF_LOAD_GFF_JB_2_ORG_INPUT_GENOME_ORG2 = "3" +WF_LOAD_GFF_JB_2_ORG_INPUT_GFF_ORG2 = "4" +WF_LOAD_GFF_JB_2_ORG_INPUT_PROTEINS_ORG2 = "5" +WF_LOAD_GFF_JB_2_ORG_STEP_LOAD_FASTA_ORG1 = "6" +WF_LOAD_GFF_JB_2_ORG_STEP_JBROWSE_ORG1 = "7" +WF_LOAD_GFF_JB_2_ORG_STEP_JRBOWSE_ORG2 = "8" +WF_LOAD_GFF_JB_2_ORG_STEP_LOAD_GFF_ORG1 = "9" +WF_LOAD_GFF_JB_2_ORG_STEP_JB_TO_CONTAINER = "10" +WF_LOAD_GFF_JB_2_ORG_STEP_FEATURE_SYNC_ORG1 = "11" +WF_LOAD_GFF_JB_2_ORG_STEP_LOAD_FASTA_ORG2 = "12" +WF_LOAD_GFF_JB_2_ORG_STEP_LOAD_GFF_ORG2 = "13" +WF_LOAD_GFF_JB_2_ORG_STEP_FEATURE_SYNC_ORG2 = "14" +WF_LOAD_GFF_JB_2_ORG_STEP_POPULATE_VIEWS = "15" +WF_LOAD_GFF_JB_2_ORG_STEP_INDEX = "16" + WORKFLOW_BLAST = "blast" WORKFLOW_INTERPRO = "interpro" WORKFLOW_VALID_TYPES = [WF_LOAD_GFF_JB, WORKFLOW_BLAST, WORKFLOW_INTERPRO] diff --git a/run_workflow_phaeoexplorer.py b/run_workflow_phaeoexplorer.py index 60c0ed3803231a59389d40c6638cb65cec676052..94c647749370db8a29b16902393cad2f80620a49 100755 --- a/run_workflow_phaeoexplorer.py +++ b/run_workflow_phaeoexplorer.py @@ -16,7 +16,7 @@ from bioblend import galaxy import utilities import speciesData import constants -import phaeoexplorer_constants +import phaeo_constants """ gga_init.py @@ -26,13 +26,16 @@ Usage: $ python3 gga_init.py -i input_example.yml --config [config file] [OPTION class StrainWorkflowParam: - def __init__(self, genus_species, strain_sex, genus, species, sex, strain, full_name, species_folder_name, org_id, + def __init__(self, genus_species, strain_sex, genus, genus_uppercase, species, chado_species_name, sex, + strain, full_name, species_folder_name, org_id, genome_analysis_id, ogs_analysis_id, blastp_analysis_id, interpro_analysis_id, history_id, instance, instance_url, email, password): self.genus_species = genus_species self.strain_sex = strain_sex self.genus = genus + self.genus_uppercase = genus_uppercase self.species = species + self.chado_species_name = chado_species_name, self.full_name = full_name self.species_folder_name = species_folder_name self.sex = sex @@ -55,21 +58,24 @@ class StrainWorkflowParam: sys.exit() def check_param_for_workflow_load_fasta_gff_jbrowse(self, params): - params = [self.genus_species, self.strain_sex, self.genus, self.species, self.sex, self.strain, self.full_name, + params = [self.genus_species, self.strain_sex, self.genus, self.genus_uppercase, self.species, + self.sex, self.strain, self.chado_species_name, self.full_name, self.species_folder_name, self.org_id, self.genome_analysis_id, self.ogs_analysis_id, self.history_id, self.instance, self.instance_url, self.email, self.password] self.check_param(params) def check_param_for_workflow_blast(self, params): - params = [self.genus_species, self.strain_sex, self.genus, self.species, self.sex, self.strain, self.full_name, + params = [self.genus_species, self.strain_sex, self.genus, self.genus_uppercase, self.species, + self.sex, self.strain, self.chado_species_name, self.full_name, self.species_folder_name, self.org_id, self.blastp_analysis_id, self.history_id, self.instance, self.instance_url, self.email, self.password] self.check_param(params) def check_param_for_workflow_interpro(self, params): - params = [self.genus_species, self.strain_sex, self.genus, self.species, self.sex, self.strain, self.full_name, + params = [self.genus_species, self.strain_sex, self.genus, self.genus_uppercase, self.species, + self.sex, self.strain, self.chado_species_name, self.full_name, self.species_folder_name, self.org_id, self.interpro_analysis_id, self.history_id, self.instance, self.instance_url, self.email, self.password] @@ -172,50 +178,50 @@ class RunWorkflow(speciesData.SpeciesData): # Verify that the add_organism and add_analysis versions are correct in the instance - add_organism_tool = self.instance.tools.show_tool(phaeoexplorer_constants.ADD_ORGANISM_TOOL_ID) - add_analysis_tool = self.instance.tools.show_tool(phaeoexplorer_constants.ADD_ANALYSIS_TOOL_ID) - get_organisms_tool = self.instance.tools.show_tool(phaeoexplorer_constants.GET_ORGANISMS_TOOL_ID) - get_analyses_tool = self.instance.tools.show_tool(phaeoexplorer_constants.GET_ANALYSES_TOOL_ID) - analysis_sync_tool = self.instance.tools.show_tool(phaeoexplorer_constants.ANALYSIS_SYNC_TOOL_ID) - organism_sync_tool = self.instance.tools.show_tool(phaeoexplorer_constants.ORGANISM_SYNC_TOOL_ID) + add_organism_tool = self.instance.tools.show_tool(phaeo_constants.ADD_ORGANISM_TOOL_ID) + add_analysis_tool = self.instance.tools.show_tool(phaeo_constants.ADD_ANALYSIS_TOOL_ID) + get_organisms_tool = self.instance.tools.show_tool(phaeo_constants.GET_ORGANISMS_TOOL_ID) + get_analyses_tool = self.instance.tools.show_tool(phaeo_constants.GET_ANALYSES_TOOL_ID) + analysis_sync_tool = self.instance.tools.show_tool(phaeo_constants.ANALYSIS_SYNC_TOOL_ID) + organism_sync_tool = self.instance.tools.show_tool(phaeo_constants.ORGANISM_SYNC_TOOL_ID) # changeset for 2.3.4+galaxy0 has to be manually found because there is no way to get the wanted changeset of a non installed tool via bioblend # except for workflows (.ga) that already contain the changeset revisions inside the steps ids utilities.install_repository_revision(current_version=get_organisms_tool["version"], toolshed_dict=get_organisms_tool["tool_shed_repository"], - version_to_install=phaeoexplorer_constants.GET_ORGANISMS_TOOL_VERSION, - changeset_revision=phaeoexplorer_constants.GET_ORGANISMS_TOOL_CHANGESET_REVISION, + version_to_install=phaeo_constants.GET_ORGANISMS_TOOL_VERSION, + changeset_revision=phaeo_constants.GET_ORGANISMS_TOOL_CHANGESET_REVISION, instance=self.instance) utilities.install_repository_revision(current_version=get_analyses_tool["version"], toolshed_dict=get_analyses_tool["tool_shed_repository"], - version_to_install=phaeoexplorer_constants.GET_ANALYSES_TOOL_VERSION, - changeset_revision=phaeoexplorer_constants.GET_ANALYSES_TOOL_CHANGESET_REVISION, + version_to_install=phaeo_constants.GET_ANALYSES_TOOL_VERSION, + changeset_revision=phaeo_constants.GET_ANALYSES_TOOL_CHANGESET_REVISION, instance=self.instance) utilities.install_repository_revision(current_version=add_organism_tool["version"], toolshed_dict=add_organism_tool["tool_shed_repository"], - version_to_install=phaeoexplorer_constants.ADD_ORGANISM_TOOL_VERSION, - changeset_revision=phaeoexplorer_constants.ADD_ORGANISM_TOOL_CHANGESET_REVISION, + version_to_install=phaeo_constants.ADD_ORGANISM_TOOL_VERSION, + changeset_revision=phaeo_constants.ADD_ORGANISM_TOOL_CHANGESET_REVISION, instance=self.instance) utilities.install_repository_revision(current_version=add_analysis_tool["version"], toolshed_dict=add_analysis_tool["tool_shed_repository"], - version_to_install=phaeoexplorer_constants.ADD_ANALYSIS_TOOL_VERSION, - changeset_revision=phaeoexplorer_constants.ADD_ANALYSIS_TOOL_CHANGESET_REVISION, + version_to_install=phaeo_constants.ADD_ANALYSIS_TOOL_VERSION, + changeset_revision=phaeo_constants.ADD_ANALYSIS_TOOL_CHANGESET_REVISION, instance=self.instance) utilities.install_repository_revision(current_version=analysis_sync_tool["version"], toolshed_dict=analysis_sync_tool["tool_shed_repository"], - version_to_install=phaeoexplorer_constants.ANALYSIS_SYNC_TOOL_VERSION, - changeset_revision=phaeoexplorer_constants.ANALYSIS_SYNC_TOOL_CHANGESET_REVISION, + version_to_install=phaeo_constants.ANALYSIS_SYNC_TOOL_VERSION, + changeset_revision=phaeo_constants.ANALYSIS_SYNC_TOOL_CHANGESET_REVISION, instance=self.instance) utilities.install_repository_revision(current_version=organism_sync_tool["version"], toolshed_dict=organism_sync_tool["tool_shed_repository"], - version_to_install=phaeoexplorer_constants.ORGANISM_SYNC_TOOL_VERSION, - changeset_revision=phaeoexplorer_constants.ORGANISM_SYNC_TOOL_CHANGESET_REVISION, + version_to_install=phaeo_constants.ORGANISM_SYNC_TOOL_VERSION, + changeset_revision=phaeo_constants.ORGANISM_SYNC_TOOL_CHANGESET_REVISION, instance=self.instance) logging.info("Success: individual tools versions and changesets validated") @@ -224,13 +230,13 @@ class RunWorkflow(speciesData.SpeciesData): add_analysis_tool_dataset = utilities.run_tool_and_download_single_output_dataset( instance=self.instance, - tool_id=phaeoexplorer_constants.ADD_ANALYSIS_TOOL_ID, + tool_id=phaeo_constants.ADD_ANALYSIS_TOOL_ID, history_id=self.history_id, tool_inputs={"name": name, - "program": phaeoexplorer_constants.ADD_ANALYSIS_TOOL_PARAM_PROGRAM, + "program": phaeo_constants.ADD_ANALYSIS_TOOL_PARAM_PROGRAM, "programversion": programversion, "sourcename": sourcename, - "date_executed": phaeoexplorer_constants.ADD_ANALYSIS_TOOL_PARAM_DATE}) + "date_executed": phaeo_constants.ADD_ANALYSIS_TOOL_PARAM_DATE}) analysis_dict = json.loads(add_analysis_tool_dataset) analysis_id = str(analysis_dict["analysis_id"]) @@ -241,7 +247,7 @@ class RunWorkflow(speciesData.SpeciesData): time.sleep(60) utilities.run_tool( instance=self.instance, - tool_id=phaeoexplorer_constants.ANALYSIS_SYNC_TOOL_ID, + tool_id=phaeo_constants.ANALYSIS_SYNC_TOOL_ID, history_id=self.history_id, tool_inputs={"analysis_id": analysis_id}) @@ -249,7 +255,7 @@ class RunWorkflow(speciesData.SpeciesData): get_organisms_tool_dataset = utilities.run_tool_and_download_single_output_dataset( instance=self.instance, - tool_id=phaeoexplorer_constants.GET_ORGANISMS_TOOL_ID, + tool_id=phaeo_constants.GET_ORGANISMS_TOOL_ID, history_id=self.history_id, tool_inputs={}, time_sleep=10 @@ -266,7 +272,7 @@ class RunWorkflow(speciesData.SpeciesData): if org_id is None: add_organism_tool_dataset = utilities.run_tool_and_download_single_output_dataset( instance=self.instance, - tool_id=phaeoexplorer_constants.ADD_ORGANISM_TOOL_ID, + tool_id=phaeo_constants.ADD_ORGANISM_TOOL_ID, history_id=self.history_id, tool_inputs={"abbr": self.abbreviation, "genus": self.genus_uppercase, @@ -280,7 +286,7 @@ class RunWorkflow(speciesData.SpeciesData): time.sleep(60) utilities.run_tool( instance=self.instance, - tool_id=phaeoexplorer_constants.ORGANISM_SYNC_TOOL_ID, + tool_id=phaeo_constants.ORGANISM_SYNC_TOOL_ID, history_id=self.history_id, tool_inputs={"organism_id": org_id}) @@ -290,7 +296,7 @@ class RunWorkflow(speciesData.SpeciesData): get_analyses_tool_dataset = utilities.run_tool_and_download_single_output_dataset( instance=self.instance, - tool_id=phaeoexplorer_constants.GET_ANALYSES_TOOL_ID, + tool_id=phaeo_constants.GET_ANALYSES_TOOL_ID, history_id=self.history_id, tool_inputs={}, time_sleep=10 @@ -718,7 +724,7 @@ def get_sp_workflow_param(sp_dict, main_dir, config, workflow_type): history_id = run_workflow_for_current_organism.set_history() run_workflow_for_current_organism.install_changesets_revisions_for_individual_tools() - if workflow_type == phaeoexplorer_constants.WF_LOAD_GFF_JB: + if workflow_type == phaeo_constants.WF_LOAD_GFF_JB: analyses_dict_list = run_workflow_for_current_organism.get_analyses() @@ -743,11 +749,13 @@ def get_sp_workflow_param(sp_dict, main_dir, config, workflow_type): genus_species=run_workflow_for_current_organism.genus_species, strain_sex=run_workflow_for_current_organism.strain_sex, genus=run_workflow_for_current_organism.genus, + genus_uppercase = run_workflow_for_current_organism.genus_uppercase, species=species, full_name=run_workflow_for_current_organism.full_name, species_folder_name=run_workflow_for_current_organism.species_folder_name, sex=run_workflow_for_current_organism.sex, strain=run_workflow_for_current_organism.strain, + chado_species_name=run_workflow_for_current_organism.chado_species_name, org_id=org_id, genome_analysis_id=genome_analysis_id, ogs_analysis_id=ogs_analysis_id, @@ -779,11 +787,13 @@ def get_sp_workflow_param(sp_dict, main_dir, config, workflow_type): genus_species=run_workflow_for_current_organism.genus_species, strain_sex=run_workflow_for_current_organism.strain_sex, genus=run_workflow_for_current_organism.genus, + genus_uppercase = run_workflow_for_current_organism.genus_uppercase, species=species, full_name=run_workflow_for_current_organism.full_name, species_folder_name=run_workflow_for_current_organism.species_folder_name, sex=run_workflow_for_current_organism.sex, strain=run_workflow_for_current_organism.strain, + chado_species_name=run_workflow_for_current_organism.chado_species_name, org_id=org_id, blastp_analysis_id=genome_analysis_id, genome_hda_id=run_workflow_for_current_organism.genome_hda_id, @@ -814,11 +824,13 @@ def get_sp_workflow_param(sp_dict, main_dir, config, workflow_type): genus_species=run_workflow_for_current_organism.genus_species, strain_sex=run_workflow_for_current_organism.strain_sex, genus=run_workflow_for_current_organism.genus, + genus_uppercase = run_workflow_for_current_organism.genus_uppercase, species=species, full_name=run_workflow_for_current_organism.full_name, species_folder_name=run_workflow_for_current_organism.species_folder_name, sex=run_workflow_for_current_organism.sex, strain=run_workflow_for_current_organism.strain, + chado_species_name=run_workflow_for_current_organism.chado_species_name, org_id=org_id, interpro_analysis_id=interpro_analysis_id, genome_hda_id=run_workflow_for_current_organism.genome_hda_id, @@ -858,36 +870,19 @@ def install_changesets_revisions_from_workflow(instance, workflow_path): workflow_dict = json.load(ga_in_file) # Look up every "step_id" looking for tools - for k, v in workflow_dict["steps"].items(): - if v["tool_id"]: + for step in workflow_dict["steps"].values(): + if step["tool_id"]: # Get the descriptive dictionary of the installed tool (using the tool id in the workflow) - show_tool = instance.tools.show_tool(v["tool_id"]) + show_tool = instance.tools.show_tool(step["tool_id"]) # Check if an installed version matches the workflow tool version # (If it's not installed, the show_tool version returned will be a default version with the suffix "XXXX+0") - if show_tool["version"] != v["tool_version"]: - # If it doesn't match, proceed to install of the correct changeset revision - toolshed = "https://" + v["tool_shed_repository"]["tool_shed"] - name = v["tool_shed_repository"]["name"] - owner = v["tool_shed_repository"]["owner"] - changeset_revision = v["tool_shed_repository"]["changeset_revision"] - - logging.warning("Installed tool versions for tool {0} do not match the version required by the specified workflow, installing changeset {1}".format(name, changeset_revision)) - - # Install changeset - instance.toolshed.install_repository_revision(tool_shed_url=toolshed, name=name, owner=owner, - changeset_revision=changeset_revision, - install_tool_dependencies=True, - install_repository_dependencies=False, - install_resolver_dependencies=True) - else: - toolshed = "https://" + v["tool_shed_repository"]["tool_shed"] - name = v["tool_shed_repository"]["name"] - owner = v["tool_shed_repository"]["owner"] - changeset_revision = v["tool_shed_repository"]["changeset_revision"] - logging.debug("Installed tool versions for tool {0} match the version in the specified workflow (changeset {1})".format(name, changeset_revision)) - - logging.info("Tools versions and changesets from workflow validated") + utilities.install_repository_revision(current_version=show_tool["version"], + toolshed_dict=show_tool["tool_shed_repository"], + version_to_install=step["tool_version"], + changeset_revision=step["tool_shed_repository"]["changeset_revision"], + instance=instance) + logging.info("Tools versions and changeset_revisions from workflow validated") if __name__ == "__main__": parser = argparse.ArgumentParser(description="Run Galaxy workflows, specific to Phaeoexplorer data") @@ -942,14 +937,14 @@ if __name__ == "__main__": if not args.workflow: logging.critical("No workflow type specified, exiting") sys.exit() - elif args.workflow in phaeoexplorer_constants.WORKFLOW_VALID_TYPES: + elif args.workflow in phaeo_constants.WORKFLOW_VALID_TYPES: workflow_type = args.workflow logging.info("Workflow type set to '%s'" % workflow_type) script_dir = os.path.dirname(os.path.realpath(sys.argv[0])) all_sp_workflow_dict = {} - if workflow_type == phaeoexplorer_constants.WF_LOAD_GFF_JB: + if workflow_type == phaeo_constants.WF_LOAD_GFF_JB: for sp_dict in sp_dict_list: # Add and retrieve all analyses/organisms for the current input species and add their IDs to the input dictionary @@ -957,7 +952,7 @@ if __name__ == "__main__": sp_dict, main_dir=main_dir, config=config, - workflow_type=phaeoexplorer_constants.WF_LOAD_GFF_JB) + workflow_type=phaeo_constants.WF_LOAD_GFF_JB) current_sp_genus_species = sp_workflow_param.genus_species current_sp_strain_sex = sp_workflow_param.strain_sex @@ -982,55 +977,35 @@ if __name__ == "__main__": sp_workflow_param = strains[strain_sex] # Set workflow path (1 organism) - workflow_path = os.path.join(os.path.abspath(script_dir), phaeoexplorer_constants.WORKFLOWS_PATH, phaeoexplorer_constants.WF_LOAD_GFF_JB_1_ORG_FILE) + workflow_path = os.path.join(os.path.abspath(script_dir), phaeo_constants.WORKFLOWS_PATH, phaeo_constants.WF_LOAD_GFF_JB_1_ORG_FILE) # Check if the versions of tools specified in the workflow are installed in galaxy install_changesets_revisions_from_workflow(workflow_path=workflow_path, instance=sp_workflow_param.instance) - history_id = sp_workflow_param.history_id - - # Organism 1 attributes - org_genus = sp_workflow_param.genus - org_species = sp_workflow_param.species - org_genus_species = sp_workflow_param.genus_species - org_species_folder_name = sp_workflow_param.species_folder_name - org_full_name = sp_workflow_param.full_name - org_strain = sp_workflow_param.strain - org_sex = sp_workflow_param.strain - org_org_id = sp_workflow_param.org_id - org_genome_analysis_id = sp_workflow_param.genome_analysis_id - org_ogs_analysis_id = sp_workflow_param.ogs_analysis_id - org_genome_hda_id = sp_workflow_param.genome_hda_id - org_transcripts_hda_id = sp_workflow_param.transcripts_hda_id - org_proteins_hda_id = sp_workflow_param.proteins_hda_id - org_gff_hda_id = sp_workflow_param.gff_hda_id - # Set the workflow parameters (individual tools runtime parameters in the workflow) workflow_parameters = {} - # Input files have no parameters (they are set via assigning the hda IDs in the datamap parameter of the bioblend method) - workflow_parameters[phaeoexplorer_constants.WF_LOAD_GFF_JB_1_ORG_INPUT_GENOME] = {} - workflow_parameters[phaeoexplorer_constants.WF_LOAD_GFF_JB_1_ORG_INPUT_GFF] = {} - workflow_parameters[phaeoexplorer_constants.WF_LOAD_GFF_JB_1_ORG_INPUT_PROTEINS] = {} - workflow_parameters[phaeoexplorer_constants.WF_LOAD_GFF_JB_1_ORG_STEP_LOAD_FASTA] = { - "organism": org_org_id, - "analysis_id": org_genome_analysis_id, + workflow_parameters[phaeo_constants.WF_LOAD_GFF_JB_1_ORG_INPUT_GENOME] = {} + workflow_parameters[phaeo_constants.WF_LOAD_GFF_JB_1_ORG_INPUT_GFF] = {} + workflow_parameters[phaeo_constants.WF_LOAD_GFF_JB_1_ORG_INPUT_PROTEINS] = {} + workflow_parameters[phaeo_constants.WF_LOAD_GFF_JB_1_ORG_STEP_LOAD_FASTA] = { + "organism": sp_workflow_param.org_id, + "analysis_id": sp_workflow_param.genome_analysis_id, "do_update": "true"} - workflow_parameters[phaeoexplorer_constants.WF_LOAD_GFF_JB_1_ORG_STEP_JBROWSE] = {} - workflow_parameters[phaeoexplorer_constants.WF_LOAD_GFF_JB_1_ORG_STEP_LOAD_GFF] = { - "organism": org_org_id, - "analysis_id": org_ogs_analysis_id} - workflow_parameters[phaeoexplorer_constants.WF_LOAD_GFF_JB_1_ORG_STEP_FEATURE_SYNC] = { - "organism_id": org_org_id} - workflow_parameters[phaeoexplorer_constants.WF_LOAD_GFF_JB_1_ORG_STEP_POPULATE_VIEWS] = {} - workflow_parameters[phaeoexplorer_constants.WF_LOAD_GFF_JB_1_ORG_STEP_INDEX] = {} + workflow_parameters[phaeo_constants.WF_LOAD_GFF_JB_1_ORG_STEP_JBROWSE] = {} + workflow_parameters[phaeo_constants.WF_LOAD_GFF_JB_1_ORG_STEP_LOAD_GFF] = { + "organism": sp_workflow_param.org_id, + "analysis_id": sp_workflow_param.ogs_analysis_id} + workflow_parameters[phaeo_constants.WF_LOAD_GFF_JB_1_ORG_STEP_FEATURE_SYNC] = { + "organism_id": sp_workflow_param.org_id} + workflow_parameters[phaeo_constants.WF_LOAD_GFF_JB_1_ORG_STEP_POPULATE_VIEWS] = {} + workflow_parameters[phaeo_constants.WF_LOAD_GFF_JB_1_ORG_STEP_INDEX] = {} # Set datamap (mapping of input files in the workflow) datamap = {} - - datamap[phaeoexplorer_constants.WF_LOAD_GFF_JB_1_ORG_INPUT_GENOME] = {"src": "hda", "id": org_genome_hda_id} - datamap[phaeoexplorer_constants.WF_LOAD_GFF_JB_1_ORG_INPUT_GFF] = {"src": "hda", "id": org_gff_hda_id} - datamap[phaeoexplorer_constants.WF_LOAD_GFF_JB_1_ORG_INPUT_PROTEINS] = {"src": "hda", "id": org_proteins_hda_id} + datamap[phaeo_constants.WF_LOAD_GFF_JB_1_ORG_INPUT_GENOME] = {"src": "hda", "id": sp_workflow_param.genome_hda_id} + datamap[phaeo_constants.WF_LOAD_GFF_JB_1_ORG_INPUT_GFF] = {"src": "hda", "id": sp_workflow_param.gff_hda_id} + datamap[phaeo_constants.WF_LOAD_GFF_JB_1_ORG_INPUT_PROTEINS] = {"src": "hda", "id": sp_workflow_param.proteins_hda_id} with open(workflow_path, 'r') as ga_in_file: @@ -1041,218 +1016,120 @@ if __name__ == "__main__": # For the Jbrowse tool, we unfortunately have to manually edit the parameters instead of setting them # as runtime values, using runtime parameters makes the tool throw an internal critical error ("replace not found" error) # Scratchgmod test: need "http" (or "https"), the hostname (+ port) - jbrowse_menu_url_org = "https://{hostname}/sp/{genus_sp}/feature/{Genus}/{species}/mRNA/{id}".format(hostname=config["hostname"], genus_sp=org_genus_species, Genus=org_genus[0].upper() + org_genus[1:], species=org_species, id="{id}") - if "jbrowse_menu_url" not in config.keys(): - jbrowse_menu_url_org = "https://{hostname}/sp/{genus_sp}/feature/{Genus}/{species}/mRNA/{id}".format(hostname=config["hostname"], genus_sp=org_genus_species, Genus=org_genus[0].upper() + org_genus[1:], species=org_species, id="{id}") + if constants.CONF_JBROWSE_MENU_URL not in config.keys(): + # default + root_url = "https://{0}".format(config[constants.CONF_ALL_HOSTNAME]) else: - jbrowse_menu_url_org = config["jbrowse_menu_url"] + "/sp/{genus_sp}/feature/{Genus}/{species}/mRNA/{id}".format(genus_sp=org_genus_species, Genus=org_genus[0].upper() + org_genus[1:], species=org_species, id="{id}") + root_url = config[constants.CONF_JBROWSE_MENU_URL] + species_strain_sex = sp_workflow_param.chado_species_name.replace(" ", "-") + jbrowse_menu_url = "{root_url}/sp/{genus_sp}/feature/{Genus}/{species_strain_sex}/mRNA/{id}".format( + root_url=root_url, + genus_sp=sp_workflow_param.genus_species, + Genus=sp_workflow_param.genus_uppercase, + species_strain_sex=species_strain_sex, + id="{id}") # Replace values in the workflow dictionary - workflow_dict["steps"]["4"]["tool_state"] = workflow_dict["steps"]["4"]["tool_state"].replace("__MENU_URL_ORG__", jbrowse_menu_url_org) - workflow_dict["steps"]["6"]["tool_state"] = workflow_dict["steps"]["6"]["tool_state"].replace("__DISPLAY_NAME_ORG__", org_full_name).replace("__UNIQUE_ID_ORG__", org_species_folder_name) + jbrowse_tool_state = workflow_dict["steps"][phaeo_constants.WF_LOAD_GFF_JB_1_ORG_STEP_JBROWSE]["tool_state"] + jbrowse_tool_state = jbrowse_tool_state.replace("__MENU_URL_ORG__", jbrowse_menu_url) + jb_to_container_tool_state = workflow_dict["steps"][phaeo_constants.WF_LOAD_GFF_JB_1_ORG_STEP_JB_TO_CONTAINER]["tool_state"] + jb_to_container_tool_state = jb_to_container_tool_state\ + .replace("__DISPLAY_NAME_ORG__", sp_workflow_param.full_name)\ + .replace("__UNIQUE_ID_ORG__", sp_workflow_param.species_folder_name) # Import the workflow in galaxy as a dict - instance.workflows.import_workflow_dict(workflow_dict=workflow_dict) + sp_workflow_param.instance.workflows.import_workflow_dict(workflow_dict=workflow_dict) # Get its attributes - workflow_attributes = instance.workflows.get_workflows(name=workflow_name) + workflow_attributes = sp_workflow_param.instance.workflows.get_workflows(name=workflow_name) # Then get its ID (required to invoke the workflow) workflow_id = workflow_attributes[0]["id"] # Index 0 is the most recently imported workflow (the one we want) - show_workflow = instance.workflows.show_workflow(workflow_id=workflow_id) + logging.debug("Workflow ID: %s" % workflow_id) # Check if the workflow is found try: - logging.debug("Workflow ID: %s" % workflow_id) + show_workflow = sp_workflow_param.instance.workflows.show_workflow(workflow_id=workflow_id) except bioblend.ConnectionError: logging.warning("Error finding workflow %s" % workflow_name) - # Finally, invoke the workflow alogn with its datamap, parameters and the history in which to invoke it - instance.workflows.invoke_workflow(workflow_id=workflow_id, history_id=history_id, params=workflow_parameters, inputs=datamap, allow_tool_state_corrections=True) - - logging.info("Successfully imported and invoked workflow {0}, check the galaxy instance ({1}) for the jobs state".format(workflow_name, instance_url)) + # Finally, invoke the workflow along with its datamap, parameters and the history in which to invoke it + sp_workflow_param.instance.workflows.invoke_workflow( + workflow_id=workflow_id, + history_id=sp_workflow_param.history_id, + params=workflow_parameters, + inputs=datamap, + allow_tool_state_corrections=True) + logging.info("Successfully imported and invoked workflow {0}, check the galaxy instance ({1}) for the jobs state".format(workflow_name, sp_workflow_param.instance_url)) - if len(list(strains.keys())) == 2: + if strains_count == 2: logging.info("Input organism %s: 2 species detected in input dictionary" % species) + strain_sex_org1 = strains.keys()[0] + strain_sex_org2 = strains.keys()[1] + sp_workflow_param_org1 = strains[strain_sex_org1] + sp_workflow_param_org2 = strains[strain_sex_org2] # Set workflow path (2 organisms) - workflow_path = os.path.join(os.path.abspath(script_dir), "workflows_phaeoexplorer/Galaxy-Workflow-chado_load_tripal_synchronize_jbrowse_2org_v4.ga") - - # Instance object required variables - instance_url, email, password = None, None, None - - # Set the galaxy instance variables - for k2, v2 in strains.items(): - instance_url = v2["instance_url"] - email = v2["email"] - password = v2["password"] - - instance = galaxy.GalaxyInstance(url=instance_url, email=email, password=password) + workflow_path = os.path.join(os.path.abspath(script_dir), phaeo_constants.WORKFLOWS_PATH, phaeo_constants.WF_LOAD_GFF_JB_2_ORG_FILE) # Check if the versions of tools specified in the workflow are installed in galaxy - install_changesets_revisions_from_workflow(workflow_path=workflow_path, instance=instance) - - # Get key names from the current organism (item 1 = organism 1, item 2 = organism 2) - organisms_key_names = list(strains.keys()) - org1_dict = strains[organisms_key_names[0]] - org2_dict = strains[organisms_key_names[1]] - - history_id = org1_dict["history_id"] - - # Organism 1 attributes - org1_genus = org1_dict["genus"] - org1_species = org1_dict["species"] - org1_genus_species = org1_dict["genus_species"] - org1_species_folder_name = org1_dict["species_folder_name"] - org1_full_name = org1_dict["full_name"] - org1_strain = org1_dict["sex"] - org1_sex = org1_dict["strain"] - org1_org_id = org1_dict["org_id"] - org1_genome_analysis_id = org1_dict["genome_analysis_id"] - org1_ogs_analysis_id = org1_dict["ogs_analysis_id"] - org1_genome_hda_id = org1_dict["hda_ids"]["genome_hda_id"] - org1_transcripts_hda_id = org1_dict["hda_ids"]["transcripts_hda_id"] - org1_proteins_hda_id = org1_dict["hda_ids"]["proteins_hda_id"] - org1_gff_hda_id = org1_dict["hda_ids"]["gff_hda_id"] - - # Store these values into a dict for parameters logging/validation - org1_parameters_dict = { - "org1_genus": org1_genus, - "org1_species": org1_species, - "org1_genus_species": org1_genus_species, - "org1_species_folder_name": org1_species_folder_name, - "org1_full_name": org1_full_name, - "org1_strain": org1_strain, - "org1_sex": org1_sex, - "org1_org_id": org1_org_id, - "org1_genome_analysis_id": org1_genome_analysis_id, - "org1_ogs_analysis_id": org1_ogs_analysis_id, - "org1_genome_hda_id": org1_genome_hda_id, - "org1_transcripts_hda_id": org1_transcripts_hda_id, - "org1_proteins_hda_id": org1_proteins_hda_id, - "org1_gff_hda_id": org1_gff_hda_id, - } - - # Look for empty parameters values, throw a critical error if a parameter value is invalid - for param_name, param_value in org1_parameters_dict.items(): - if param_value is None or param_value == "": - logging.critical("Empty parameter value found for organism {0} (parameter: {1}, parameter value: {2})".format(org1_full_name, param_name, param_value)) - sys.exit() - - # Organism 2 attributes - org2_genus = org2_dict["genus"] - org2_species = org2_dict["species"] - org2_genus_species = org2_dict["genus_species"] - org2_species_folder_name = org2_dict["species_folder_name"] - org2_full_name = org2_dict["full_name"] - org2_strain = org2_dict["sex"] - org2_sex = org2_dict["strain"] - org2_org_id = org2_dict["org_id"] - org2_genome_analysis_id = org2_dict["genome_analysis_id"] - org2_ogs_analysis_id = org2_dict["ogs_analysis_id"] - org2_genome_hda_id = org2_dict["hda_ids"]["genome_hda_id"] - org2_transcripts_hda_id = org2_dict["hda_ids"]["transcripts_hda_id"] - org2_proteins_hda_id = org2_dict["hda_ids"]["proteins_hda_id"] - org2_gff_hda_id = org2_dict["hda_ids"]["gff_hda_id"] - - # Store these values into a dict for parameters logging/validation - org2_parameters_dict = { - "org2_genus": org2_genus, - "org2_species": org2_species, - "org2_genus_species": org2_genus_species, - "org2_species_folder_name": org2_species_folder_name, - "org2_full_name": org2_full_name, - "org2_strain": org2_strain, - "org2_sex": org2_sex, - "org2_org_id": org2_org_id, - "org2_genome_analysis_id": org2_genome_analysis_id, - "org2_ogs_analysis_id": org2_ogs_analysis_id, - "org2_genome_hda_id": org2_genome_hda_id, - "org2_transcripts_hda_id": org2_transcripts_hda_id, - "org2_proteins_hda_id": org2_proteins_hda_id, - "org2_gff_hda_id": org2_gff_hda_id, - } - - # Look for empty parameters values, throw a critical error if a parameter value is invalid - for param_name, param_value in org2_parameters_dict.items(): - if param_value is None or param_value == "": - logging.critical("Empty parameter value found for organism {0} (parameter: {1}, parameter value: {2})".format(org2_full_name, param_name, param_value)) - sys.exit() - - # Source files association (ordered by their IDs in the workflow) - # WARNING: Be very careful about how the workflow is "organized" (i.e the order of the steps/datasets, check the .ga if there is any error) - GFF_FILE_ORG1 = "0" - GENOME_FASTA_FILE_ORG1 = "1" - PROTEINS_FASTA_FILE_ORG1 = "2" - - GENOME_FASTA_FILE_ORG2 = "3" - GFF_FILE_ORG2 = "4" - PROTEINS_FASTA_FILE_ORG2 = "5" - - LOAD_FASTA_ORG1 = "6" - JBROWSE_ORG1 = "7" - JRBOWSE_ORG2 = "8" - - LOAD_GFF_ORG1 = "9" - JBROWSE_CONTAINER = "10" - SYNC_FEATURES_ORG1 = "11" - - LOAD_FASTA_ORG2 = "12" - LOAD_GFF_ORG2 = "13" - - SYNC_FEATURES_ORG2 = "14" - POPULATE_MAT_VIEWS = "15" - INDEX_TRIPAL_DATA = "16" + install_changesets_revisions_from_workflow(workflow_path=workflow_path, instance=sp_workflow_param_org1.instance) # Set the workflow parameters (individual tools runtime parameters in the workflow) workflow_parameters = {} # Input files have no parameters (they are set via assigning the hda IDs in the datamap parameter of the bioblend method) - workflow_parameters[GENOME_FASTA_FILE_ORG1] = {} - workflow_parameters[GFF_FILE_ORG1] = {} - workflow_parameters[PROTEINS_FASTA_FILE_ORG1] = {} - workflow_parameters[GENOME_FASTA_FILE_ORG2] = {} - workflow_parameters[GFF_FILE_ORG2] = {} - workflow_parameters[PROTEINS_FASTA_FILE_ORG2] = {} + workflow_parameters[phaeo_constants.WF_LOAD_GFF_JB_2_ORG_INPUT_GENOME_ORG1] = {} + workflow_parameters[phaeo_constants.WF_LOAD_GFF_JB_2_ORG_INPUT_GFF_ORG1] = {} + workflow_parameters[phaeo_constants.WF_LOAD_GFF_JB_2_ORG_INPUT_PROTEINS_ORG1] = {} + workflow_parameters[phaeo_constants.WF_LOAD_GFF_JB_2_ORG_INPUT_GENOME_ORG2] = {} + workflow_parameters[phaeo_constants.WF_LOAD_GFF_JB_2_ORG_INPUT_GFF_ORG2] = {} + workflow_parameters[phaeo_constants.WF_LOAD_GFF_JB_2_ORG_INPUT_PROTEINS_ORG2] = {} # Organism 1 - workflow_parameters[LOAD_FASTA_ORG1] = {"organism": org1_org_id, - "analysis_id": org1_genome_analysis_id, - "do_update": "true"} + workflow_parameters[phaeo_constants.WF_LOAD_GFF_JB_2_ORG_STEP_LOAD_FASTA_ORG1] = { + "organism": sp_workflow_param_org1.org_id, + "analysis_id": sp_workflow_param_org1.genome_analysis_id, + "do_update": "true"} # workflow_parameters[JBROWSE_ORG1] = {"jbrowse_menu_url": jbrowse_menu_url_org1} - workflow_parameters[JBROWSE_ORG1] = {} - workflow_parameters[LOAD_GFF_ORG1] = {"organism": org1_org_id, "analysis_id": org1_ogs_analysis_id} - workflow_parameters[SYNC_FEATURES_ORG1] = {"organism_id": org1_org_id} + workflow_parameters[phaeo_constants.WF_LOAD_GFF_JB_2_ORG_STEP_JBROWSE_ORG1] = {} + workflow_parameters[phaeo_constants.WF_LOAD_GFF_JB_2_ORG_STEP_LOAD_GFF_ORG1] = { + "organism": sp_workflow_param_org1.org_id, + "analysis_id": sp_workflow_param_org1.ogs_analysis_id} + workflow_parameters[phaeo_constants.WF_LOAD_GFF_JB_2_ORG_STEP_FEATURE_SYNC_ORG1] = { + "organism_id": sp_workflow_param_org1.org_id} # workflow_parameters[JBROWSE_CONTAINER] = {"organisms": [{"name": org1_full_name, "unique_id": org1_species_folder_name, }, {"name": org2_full_name, "unique_id": org2_species_folder_name}]} - workflow_parameters[JBROWSE_CONTAINER] = {} + workflow_parameters[phaeo_constants.WF_LOAD_GFF_JB_2_ORG_STEP_JB_TO_CONTAINER] = {} # Organism 2 - workflow_parameters[LOAD_FASTA_ORG2] = {"organism": org2_org_id, - "analysis_id": org2_genome_analysis_id, - "do_update": "true"} - workflow_parameters[LOAD_GFF_ORG2] = {"organism": org2_org_id, "analysis_id": org2_ogs_analysis_id} + workflow_parameters[phaeo_constants.WF_LOAD_GFF_JB_2_ORG_STEP_LOAD_FASTA_ORG2] = { + "organism": sp_workflow_param_org2.org_id, + "analysis_id": sp_workflow_param_org2.genome_analysis_id, + "do_update": "true"} + workflow_parameters[phaeo_constants.WF_LOAD_GFF_JB_2_ORG_STEP_LOAD_GFF_ORG2] = { + "organism": sp_workflow_param_org2.org_id, + "analysis_id": sp_workflow_param_org2.ogs_analysis_id} # workflow_parameters[JRBOWSE_ORG2] = {"jbrowse_menu_url": jbrowse_menu_url_org2} - workflow_parameters[JRBOWSE_ORG2] = {} - workflow_parameters[SYNC_FEATURES_ORG2] = {"organism_id": org2_org_id} - + workflow_parameters[phaeo_constants.WF_LOAD_GFF_JB_2_ORG_STEP_JRBOWSE_ORG2] = {} + workflow_parameters[phaeo_constants.WF_LOAD_GFF_JB_2_ORG_STEP_FEATURE_SYNC_ORG2] = { + "organism_id": sp_workflow_param_org2.org_id} # POPULATE + INDEX DATA - workflow_parameters[POPULATE_MAT_VIEWS] = {} - workflow_parameters[INDEX_TRIPAL_DATA] = {} - + workflow_parameters[phaeo_constants.WF_LOAD_GFF_JB_2_ORG_STEP_POPULATE_VIEWS] = {} + workflow_parameters[phaeo_constants.WF_LOAD_GFF_JB_2_ORG_STEP_INDEX] = {} # Set datamap (mapping of input files in the workflow) datamap = {} # Organism 1 - datamap[GENOME_FASTA_FILE_ORG1] = {"src": "hda", "id": org1_genome_hda_id} - datamap[GFF_FILE_ORG1] = {"src": "hda", "id": org1_gff_hda_id} - datamap[PROTEINS_FASTA_FILE_ORG1] = {"src": "hda", "id": org1_proteins_hda_id} + datamap[phaeo_constants.WF_LOAD_GFF_JB_2_ORG_INPUT_GENOME_ORG1] = {"src": "hda", "id": sp_workflow_param_org1.genome_hda_id} + datamap[phaeo_constants.WF_LOAD_GFF_JB_2_ORG_INPUT_GFF_ORG1] = {"src": "hda", "id": sp_workflow_param_org1.gff_hda_id} + datamap[phaeo_constants.WF_LOAD_GFF_JB_2_ORG_INPUT_PROTEINS_ORG1] = {"src": "hda", "id": sp_workflow_param_org1.proteins_hda_id} # Organism 2 - datamap[GENOME_FASTA_FILE_ORG2] = {"src": "hda", "id": org2_genome_hda_id} - datamap[GFF_FILE_ORG2] = {"src": "hda", "id": org2_gff_hda_id} - datamap[PROTEINS_FASTA_FILE_ORG2] = {"src": "hda", "id": org2_proteins_hda_id} + datamap[phaeo_constants.WF_LOAD_GFF_JB_2_ORG_INPUT_GENOME_ORG2] = {"src": "hda", "id": sp_workflow_param_org2.genome_hda_id} + datamap[phaeo_constants.WF_LOAD_GFF_JB_2_ORG_INPUT_GFF_ORG2] = {"src": "hda", "id": sp_workflow_param_org2.gff_hda_id} + datamap[phaeo_constants.WF_LOAD_GFF_JB_2_ORG_INPUT_PROTEINS_ORG2] = {"src": "hda", "id": sp_workflow_param_org2.proteins_hda_id} with open(workflow_path, 'r') as ga_in_file: @@ -1385,15 +1262,15 @@ if __name__ == "__main__": BLASTP_FILE = "0" LOAD_BLASTP_FILE = "1" - POPULATE_MAT_VIEWS = "2" - INDEX_TRIPAL_DATA = "3" + WF_LOAD_GFF_JB_2_ORG_STEP_POPULATE_VIEWS = "2" + WF_LOAD_GFF_JB_2_ORG_STEP_INDEX = "3" # Set the workflow parameters (individual tools runtime parameters in the workflow) workflow_parameters = {} workflow_parameters[BLASTP_FILE] = {} workflow_parameters[LOAD_BLASTP_FILE] = {"analysis_id": org_blastp_analysis_id, "organism_id": org_org_id} - workflow_parameters[POPULATE_MAT_VIEWS] = {} - workflow_parameters[INDEX_TRIPAL_DATA] = {} + workflow_parameters[WF_LOAD_GFF_JB_2_ORG_STEP_POPULATE_VIEWS] = {} + workflow_parameters[WF_LOAD_GFF_JB_2_ORG_STEP_INDEX] = {} datamap = {} datamap[BLASTP_FILE] = {"src": "hda", "id": org_blastp_hda_id} @@ -1522,8 +1399,8 @@ if __name__ == "__main__": BLASTP_FILE_ORG2 = "1" LOAD_BLASTP_FILE_ORG1 = "2" LOAD_BLASTP_FILE_ORG1 = "3" - POPULATE_MAT_VIEWS = "4" - INDEX_TRIPAL_DATA = "5" + WF_LOAD_GFF_JB_2_ORG_STEP_POPULATE_VIEWS = "4" + WF_LOAD_GFF_JB_2_ORG_STEP_INDEX = "5" # Set the workflow parameters (individual tools runtime parameters in the workflow) workflow_parameters = {} @@ -1540,8 +1417,8 @@ if __name__ == "__main__": workflow_parameters[LOAD_BLASTP_FILE_ORG2] = {"organism_id": org2_org_id, "analysis_id": org2_blastp_analysis_id} - workflow_parameters[POPULATE_MAT_VIEWS] = {} - workflow_parameters[INDEX_TRIPAL_DATA] = {} + workflow_parameters[WF_LOAD_GFF_JB_2_ORG_STEP_POPULATE_VIEWS] = {} + workflow_parameters[WF_LOAD_GFF_JB_2_ORG_STEP_INDEX] = {} # Set datamap (mapping of input files in the workflow) datamap = {} @@ -1655,15 +1532,15 @@ if __name__ == "__main__": INTEPRO_FILE = "0" LOAD_INTERPRO_FILE = "1" - POPULATE_MAT_VIEWS = "2" - INDEX_TRIPAL_DATA = "3" + WF_LOAD_GFF_JB_2_ORG_STEP_POPULATE_VIEWS = "2" + WF_LOAD_GFF_JB_2_ORG_STEP_INDEX = "3" # Set the workflow parameters (individual tools runtime parameters in the workflow) workflow_parameters = {} workflow_parameters[INTEPRO_FILE] = {} workflow_parameters[LOAD_INTERPRO_FILE] = {"analysis_id": org_inteproscan_analysis_id, "organism_id": org_org_id} - workflow_parameters[POPULATE_MAT_VIEWS] = {} - workflow_parameters[INDEX_TRIPAL_DATA] = {} + workflow_parameters[WF_LOAD_GFF_JB_2_ORG_STEP_POPULATE_VIEWS] = {} + workflow_parameters[WF_LOAD_GFF_JB_2_ORG_STEP_INDEX] = {} datamap = {} datamap[INTEPRO_FILE] = {"src": "hda", "id": org_interproscan_hda_id} @@ -1792,8 +1669,8 @@ if __name__ == "__main__": INTERPRO_FILE_ORG2 = "1" LOAD_INTERPRO_FILE_ORG1 = "2" LOAD_INTERPRO_FILE_ORG2 = "3" - POPULATE_MAT_VIEWS = "4" - INDEX_TRIPAL_DATA = "5" + WF_LOAD_GFF_JB_2_ORG_STEP_POPULATE_VIEWS = "4" + WF_LOAD_GFF_JB_2_ORG_STEP_INDEX = "5" # Set the workflow parameters (individual tools runtime parameters in the workflow) workflow_parameters = {} @@ -1810,8 +1687,8 @@ if __name__ == "__main__": workflow_parameters[LOAD_INTERPRO_FILE_ORG2] = {"organism_id": org2_org_id, "analysis_id": org2_interproscan_analysis_id} - workflow_parameters[POPULATE_MAT_VIEWS] = {} - workflow_parameters[INDEX_TRIPAL_DATA] = {} + workflow_parameters[WF_LOAD_GFF_JB_2_ORG_STEP_POPULATE_VIEWS] = {} + workflow_parameters[WF_LOAD_GFF_JB_2_ORG_STEP_INDEX] = {} # Set datamap (mapping of input files in the workflow) datamap = {} diff --git a/speciesData.py b/speciesData.py index 6cc5bb0e2941ec83e0dfd002080d39086b46b990..cae48c4f11eb491ca1c54bfb0e940409ff581856 100755 --- a/speciesData.py +++ b/speciesData.py @@ -42,7 +42,12 @@ class SpeciesData: def clean_string(self, string): if not string is None and string != "": - clean_string = string.replace(" ", "_").replace("-", "_").replace("(", "").replace(")", "").replace("'", "").strip() + clean_string = string\ + .replace(" ", "_")\ + .replace("-", "_")\ + .replace("(", "")\ + .replace(")", "")\ + .replace("'", "").strip() return clean_string else: return string