Skip to content
Snippets Groups Projects
Commit 0cffb203 authored by Loraine Gueguen's avatar Loraine Gueguen
Browse files

Refactor run_wf (WIP)

parent 49f5add2
No related branches found
No related tags found
2 merge requests!24Run wf,!18Release v2.1.0
......@@ -4,6 +4,7 @@
import constants
### Workflows
# WARNING: Be very careful about how the workflow is "organized" (i.e the order of the steps/datasets, check the .ga if there is any error)
WORKFLOWS_PATH = "workflows_phaeoexplorer/"
WF_LOAD_GFF_JB = "load_fasta_gff_jbrowse"
......@@ -20,6 +21,25 @@ WF_LOAD_GFF_JB_1_ORG_STEP_FEATURE_SYNC = "7"
WF_LOAD_GFF_JB_1_ORG_STEP_POPULATE_VIEWS = "8"
WF_LOAD_GFF_JB_1_ORG_STEP_INDEX = "9"
WF_LOAD_GFF_JB_2_ORG_FILE = "Galaxy-Workflow-chado_load_tripal_synchronize_jbrowse_2org_v4.ga"
WF_LOAD_GFF_JB_2_ORG_INPUT_GFF_ORG1 = "0"
WF_LOAD_GFF_JB_2_ORG_INPUT_GENOME_ORG1 = "1"
WF_LOAD_GFF_JB_2_ORG_INPUT_PROTEINS_ORG1 = "2"
WF_LOAD_GFF_JB_2_ORG_INPUT_GENOME_ORG2 = "3"
WF_LOAD_GFF_JB_2_ORG_INPUT_GFF_ORG2 = "4"
WF_LOAD_GFF_JB_2_ORG_INPUT_PROTEINS_ORG2 = "5"
WF_LOAD_GFF_JB_2_ORG_STEP_LOAD_FASTA_ORG1 = "6"
WF_LOAD_GFF_JB_2_ORG_STEP_JBROWSE_ORG1 = "7"
WF_LOAD_GFF_JB_2_ORG_STEP_JRBOWSE_ORG2 = "8"
WF_LOAD_GFF_JB_2_ORG_STEP_LOAD_GFF_ORG1 = "9"
WF_LOAD_GFF_JB_2_ORG_STEP_JB_TO_CONTAINER = "10"
WF_LOAD_GFF_JB_2_ORG_STEP_FEATURE_SYNC_ORG1 = "11"
WF_LOAD_GFF_JB_2_ORG_STEP_LOAD_FASTA_ORG2 = "12"
WF_LOAD_GFF_JB_2_ORG_STEP_LOAD_GFF_ORG2 = "13"
WF_LOAD_GFF_JB_2_ORG_STEP_FEATURE_SYNC_ORG2 = "14"
WF_LOAD_GFF_JB_2_ORG_STEP_POPULATE_VIEWS = "15"
WF_LOAD_GFF_JB_2_ORG_STEP_INDEX = "16"
WORKFLOW_BLAST = "blast"
WORKFLOW_INTERPRO = "interpro"
WORKFLOW_VALID_TYPES = [WF_LOAD_GFF_JB, WORKFLOW_BLAST, WORKFLOW_INTERPRO]
......
......@@ -16,7 +16,7 @@ from bioblend import galaxy
import utilities
import speciesData
import constants
import phaeoexplorer_constants
import phaeo_constants
"""
gga_init.py
......@@ -26,13 +26,16 @@ Usage: $ python3 gga_init.py -i input_example.yml --config [config file] [OPTION
class StrainWorkflowParam:
def __init__(self, genus_species, strain_sex, genus, species, sex, strain, full_name, species_folder_name, org_id,
def __init__(self, genus_species, strain_sex, genus, genus_uppercase, species, chado_species_name, sex,
strain, full_name, species_folder_name, org_id,
genome_analysis_id, ogs_analysis_id, blastp_analysis_id, interpro_analysis_id, history_id,
instance, instance_url, email, password):
self.genus_species = genus_species
self.strain_sex = strain_sex
self.genus = genus
self.genus_uppercase = genus_uppercase
self.species = species
self.chado_species_name = chado_species_name,
self.full_name = full_name
self.species_folder_name = species_folder_name
self.sex = sex
......@@ -55,21 +58,24 @@ class StrainWorkflowParam:
sys.exit()
def check_param_for_workflow_load_fasta_gff_jbrowse(self, params):
params = [self.genus_species, self.strain_sex, self.genus, self.species, self.sex, self.strain, self.full_name,
params = [self.genus_species, self.strain_sex, self.genus, self.genus_uppercase, self.species,
self.sex, self.strain, self.chado_species_name, self.full_name,
self.species_folder_name, self.org_id,
self.genome_analysis_id, self.ogs_analysis_id, self.history_id,
self.instance, self.instance_url, self.email, self.password]
self.check_param(params)
def check_param_for_workflow_blast(self, params):
params = [self.genus_species, self.strain_sex, self.genus, self.species, self.sex, self.strain, self.full_name,
params = [self.genus_species, self.strain_sex, self.genus, self.genus_uppercase, self.species,
self.sex, self.strain, self.chado_species_name, self.full_name,
self.species_folder_name, self.org_id,
self.blastp_analysis_id, self.history_id,
self.instance, self.instance_url, self.email, self.password]
self.check_param(params)
def check_param_for_workflow_interpro(self, params):
params = [self.genus_species, self.strain_sex, self.genus, self.species, self.sex, self.strain, self.full_name,
params = [self.genus_species, self.strain_sex, self.genus, self.genus_uppercase, self.species,
self.sex, self.strain, self.chado_species_name, self.full_name,
self.species_folder_name, self.org_id,
self.interpro_analysis_id, self.history_id,
self.instance, self.instance_url, self.email, self.password]
......@@ -172,50 +178,50 @@ class RunWorkflow(speciesData.SpeciesData):
# Verify that the add_organism and add_analysis versions are correct in the instance
add_organism_tool = self.instance.tools.show_tool(phaeoexplorer_constants.ADD_ORGANISM_TOOL_ID)
add_analysis_tool = self.instance.tools.show_tool(phaeoexplorer_constants.ADD_ANALYSIS_TOOL_ID)
get_organisms_tool = self.instance.tools.show_tool(phaeoexplorer_constants.GET_ORGANISMS_TOOL_ID)
get_analyses_tool = self.instance.tools.show_tool(phaeoexplorer_constants.GET_ANALYSES_TOOL_ID)
analysis_sync_tool = self.instance.tools.show_tool(phaeoexplorer_constants.ANALYSIS_SYNC_TOOL_ID)
organism_sync_tool = self.instance.tools.show_tool(phaeoexplorer_constants.ORGANISM_SYNC_TOOL_ID)
add_organism_tool = self.instance.tools.show_tool(phaeo_constants.ADD_ORGANISM_TOOL_ID)
add_analysis_tool = self.instance.tools.show_tool(phaeo_constants.ADD_ANALYSIS_TOOL_ID)
get_organisms_tool = self.instance.tools.show_tool(phaeo_constants.GET_ORGANISMS_TOOL_ID)
get_analyses_tool = self.instance.tools.show_tool(phaeo_constants.GET_ANALYSES_TOOL_ID)
analysis_sync_tool = self.instance.tools.show_tool(phaeo_constants.ANALYSIS_SYNC_TOOL_ID)
organism_sync_tool = self.instance.tools.show_tool(phaeo_constants.ORGANISM_SYNC_TOOL_ID)
# changeset for 2.3.4+galaxy0 has to be manually found because there is no way to get the wanted changeset of a non installed tool via bioblend
# except for workflows (.ga) that already contain the changeset revisions inside the steps ids
utilities.install_repository_revision(current_version=get_organisms_tool["version"],
toolshed_dict=get_organisms_tool["tool_shed_repository"],
version_to_install=phaeoexplorer_constants.GET_ORGANISMS_TOOL_VERSION,
changeset_revision=phaeoexplorer_constants.GET_ORGANISMS_TOOL_CHANGESET_REVISION,
version_to_install=phaeo_constants.GET_ORGANISMS_TOOL_VERSION,
changeset_revision=phaeo_constants.GET_ORGANISMS_TOOL_CHANGESET_REVISION,
instance=self.instance)
utilities.install_repository_revision(current_version=get_analyses_tool["version"],
toolshed_dict=get_analyses_tool["tool_shed_repository"],
version_to_install=phaeoexplorer_constants.GET_ANALYSES_TOOL_VERSION,
changeset_revision=phaeoexplorer_constants.GET_ANALYSES_TOOL_CHANGESET_REVISION,
version_to_install=phaeo_constants.GET_ANALYSES_TOOL_VERSION,
changeset_revision=phaeo_constants.GET_ANALYSES_TOOL_CHANGESET_REVISION,
instance=self.instance)
utilities.install_repository_revision(current_version=add_organism_tool["version"],
toolshed_dict=add_organism_tool["tool_shed_repository"],
version_to_install=phaeoexplorer_constants.ADD_ORGANISM_TOOL_VERSION,
changeset_revision=phaeoexplorer_constants.ADD_ORGANISM_TOOL_CHANGESET_REVISION,
version_to_install=phaeo_constants.ADD_ORGANISM_TOOL_VERSION,
changeset_revision=phaeo_constants.ADD_ORGANISM_TOOL_CHANGESET_REVISION,
instance=self.instance)
utilities.install_repository_revision(current_version=add_analysis_tool["version"],
toolshed_dict=add_analysis_tool["tool_shed_repository"],
version_to_install=phaeoexplorer_constants.ADD_ANALYSIS_TOOL_VERSION,
changeset_revision=phaeoexplorer_constants.ADD_ANALYSIS_TOOL_CHANGESET_REVISION,
version_to_install=phaeo_constants.ADD_ANALYSIS_TOOL_VERSION,
changeset_revision=phaeo_constants.ADD_ANALYSIS_TOOL_CHANGESET_REVISION,
instance=self.instance)
utilities.install_repository_revision(current_version=analysis_sync_tool["version"],
toolshed_dict=analysis_sync_tool["tool_shed_repository"],
version_to_install=phaeoexplorer_constants.ANALYSIS_SYNC_TOOL_VERSION,
changeset_revision=phaeoexplorer_constants.ANALYSIS_SYNC_TOOL_CHANGESET_REVISION,
version_to_install=phaeo_constants.ANALYSIS_SYNC_TOOL_VERSION,
changeset_revision=phaeo_constants.ANALYSIS_SYNC_TOOL_CHANGESET_REVISION,
instance=self.instance)
utilities.install_repository_revision(current_version=organism_sync_tool["version"],
toolshed_dict=organism_sync_tool["tool_shed_repository"],
version_to_install=phaeoexplorer_constants.ORGANISM_SYNC_TOOL_VERSION,
changeset_revision=phaeoexplorer_constants.ORGANISM_SYNC_TOOL_CHANGESET_REVISION,
version_to_install=phaeo_constants.ORGANISM_SYNC_TOOL_VERSION,
changeset_revision=phaeo_constants.ORGANISM_SYNC_TOOL_CHANGESET_REVISION,
instance=self.instance)
logging.info("Success: individual tools versions and changesets validated")
......@@ -224,13 +230,13 @@ class RunWorkflow(speciesData.SpeciesData):
add_analysis_tool_dataset = utilities.run_tool_and_download_single_output_dataset(
instance=self.instance,
tool_id=phaeoexplorer_constants.ADD_ANALYSIS_TOOL_ID,
tool_id=phaeo_constants.ADD_ANALYSIS_TOOL_ID,
history_id=self.history_id,
tool_inputs={"name": name,
"program": phaeoexplorer_constants.ADD_ANALYSIS_TOOL_PARAM_PROGRAM,
"program": phaeo_constants.ADD_ANALYSIS_TOOL_PARAM_PROGRAM,
"programversion": programversion,
"sourcename": sourcename,
"date_executed": phaeoexplorer_constants.ADD_ANALYSIS_TOOL_PARAM_DATE})
"date_executed": phaeo_constants.ADD_ANALYSIS_TOOL_PARAM_DATE})
analysis_dict = json.loads(add_analysis_tool_dataset)
analysis_id = str(analysis_dict["analysis_id"])
......@@ -241,7 +247,7 @@ class RunWorkflow(speciesData.SpeciesData):
time.sleep(60)
utilities.run_tool(
instance=self.instance,
tool_id=phaeoexplorer_constants.ANALYSIS_SYNC_TOOL_ID,
tool_id=phaeo_constants.ANALYSIS_SYNC_TOOL_ID,
history_id=self.history_id,
tool_inputs={"analysis_id": analysis_id})
......@@ -249,7 +255,7 @@ class RunWorkflow(speciesData.SpeciesData):
get_organisms_tool_dataset = utilities.run_tool_and_download_single_output_dataset(
instance=self.instance,
tool_id=phaeoexplorer_constants.GET_ORGANISMS_TOOL_ID,
tool_id=phaeo_constants.GET_ORGANISMS_TOOL_ID,
history_id=self.history_id,
tool_inputs={},
time_sleep=10
......@@ -266,7 +272,7 @@ class RunWorkflow(speciesData.SpeciesData):
if org_id is None:
add_organism_tool_dataset = utilities.run_tool_and_download_single_output_dataset(
instance=self.instance,
tool_id=phaeoexplorer_constants.ADD_ORGANISM_TOOL_ID,
tool_id=phaeo_constants.ADD_ORGANISM_TOOL_ID,
history_id=self.history_id,
tool_inputs={"abbr": self.abbreviation,
"genus": self.genus_uppercase,
......@@ -280,7 +286,7 @@ class RunWorkflow(speciesData.SpeciesData):
time.sleep(60)
utilities.run_tool(
instance=self.instance,
tool_id=phaeoexplorer_constants.ORGANISM_SYNC_TOOL_ID,
tool_id=phaeo_constants.ORGANISM_SYNC_TOOL_ID,
history_id=self.history_id,
tool_inputs={"organism_id": org_id})
......@@ -290,7 +296,7 @@ class RunWorkflow(speciesData.SpeciesData):
get_analyses_tool_dataset = utilities.run_tool_and_download_single_output_dataset(
instance=self.instance,
tool_id=phaeoexplorer_constants.GET_ANALYSES_TOOL_ID,
tool_id=phaeo_constants.GET_ANALYSES_TOOL_ID,
history_id=self.history_id,
tool_inputs={},
time_sleep=10
......@@ -718,7 +724,7 @@ def get_sp_workflow_param(sp_dict, main_dir, config, workflow_type):
history_id = run_workflow_for_current_organism.set_history()
run_workflow_for_current_organism.install_changesets_revisions_for_individual_tools()
if workflow_type == phaeoexplorer_constants.WF_LOAD_GFF_JB:
if workflow_type == phaeo_constants.WF_LOAD_GFF_JB:
analyses_dict_list = run_workflow_for_current_organism.get_analyses()
......@@ -743,11 +749,13 @@ def get_sp_workflow_param(sp_dict, main_dir, config, workflow_type):
genus_species=run_workflow_for_current_organism.genus_species,
strain_sex=run_workflow_for_current_organism.strain_sex,
genus=run_workflow_for_current_organism.genus,
genus_uppercase = run_workflow_for_current_organism.genus_uppercase,
species=species,
full_name=run_workflow_for_current_organism.full_name,
species_folder_name=run_workflow_for_current_organism.species_folder_name,
sex=run_workflow_for_current_organism.sex,
strain=run_workflow_for_current_organism.strain,
chado_species_name=run_workflow_for_current_organism.chado_species_name,
org_id=org_id,
genome_analysis_id=genome_analysis_id,
ogs_analysis_id=ogs_analysis_id,
......@@ -779,11 +787,13 @@ def get_sp_workflow_param(sp_dict, main_dir, config, workflow_type):
genus_species=run_workflow_for_current_organism.genus_species,
strain_sex=run_workflow_for_current_organism.strain_sex,
genus=run_workflow_for_current_organism.genus,
genus_uppercase = run_workflow_for_current_organism.genus_uppercase,
species=species,
full_name=run_workflow_for_current_organism.full_name,
species_folder_name=run_workflow_for_current_organism.species_folder_name,
sex=run_workflow_for_current_organism.sex,
strain=run_workflow_for_current_organism.strain,
chado_species_name=run_workflow_for_current_organism.chado_species_name,
org_id=org_id,
blastp_analysis_id=genome_analysis_id,
genome_hda_id=run_workflow_for_current_organism.genome_hda_id,
......@@ -814,11 +824,13 @@ def get_sp_workflow_param(sp_dict, main_dir, config, workflow_type):
genus_species=run_workflow_for_current_organism.genus_species,
strain_sex=run_workflow_for_current_organism.strain_sex,
genus=run_workflow_for_current_organism.genus,
genus_uppercase = run_workflow_for_current_organism.genus_uppercase,
species=species,
full_name=run_workflow_for_current_organism.full_name,
species_folder_name=run_workflow_for_current_organism.species_folder_name,
sex=run_workflow_for_current_organism.sex,
strain=run_workflow_for_current_organism.strain,
chado_species_name=run_workflow_for_current_organism.chado_species_name,
org_id=org_id,
interpro_analysis_id=interpro_analysis_id,
genome_hda_id=run_workflow_for_current_organism.genome_hda_id,
......@@ -858,36 +870,19 @@ def install_changesets_revisions_from_workflow(instance, workflow_path):
workflow_dict = json.load(ga_in_file)
# Look up every "step_id" looking for tools
for k, v in workflow_dict["steps"].items():
if v["tool_id"]:
for step in workflow_dict["steps"].values():
if step["tool_id"]:
# Get the descriptive dictionary of the installed tool (using the tool id in the workflow)
show_tool = instance.tools.show_tool(v["tool_id"])
show_tool = instance.tools.show_tool(step["tool_id"])
# Check if an installed version matches the workflow tool version
# (If it's not installed, the show_tool version returned will be a default version with the suffix "XXXX+0")
if show_tool["version"] != v["tool_version"]:
# If it doesn't match, proceed to install of the correct changeset revision
toolshed = "https://" + v["tool_shed_repository"]["tool_shed"]
name = v["tool_shed_repository"]["name"]
owner = v["tool_shed_repository"]["owner"]
changeset_revision = v["tool_shed_repository"]["changeset_revision"]
logging.warning("Installed tool versions for tool {0} do not match the version required by the specified workflow, installing changeset {1}".format(name, changeset_revision))
# Install changeset
instance.toolshed.install_repository_revision(tool_shed_url=toolshed, name=name, owner=owner,
changeset_revision=changeset_revision,
install_tool_dependencies=True,
install_repository_dependencies=False,
install_resolver_dependencies=True)
else:
toolshed = "https://" + v["tool_shed_repository"]["tool_shed"]
name = v["tool_shed_repository"]["name"]
owner = v["tool_shed_repository"]["owner"]
changeset_revision = v["tool_shed_repository"]["changeset_revision"]
logging.debug("Installed tool versions for tool {0} match the version in the specified workflow (changeset {1})".format(name, changeset_revision))
logging.info("Tools versions and changesets from workflow validated")
utilities.install_repository_revision(current_version=show_tool["version"],
toolshed_dict=show_tool["tool_shed_repository"],
version_to_install=step["tool_version"],
changeset_revision=step["tool_shed_repository"]["changeset_revision"],
instance=instance)
logging.info("Tools versions and changeset_revisions from workflow validated")
if __name__ == "__main__":
parser = argparse.ArgumentParser(description="Run Galaxy workflows, specific to Phaeoexplorer data")
......@@ -942,14 +937,14 @@ if __name__ == "__main__":
if not args.workflow:
logging.critical("No workflow type specified, exiting")
sys.exit()
elif args.workflow in phaeoexplorer_constants.WORKFLOW_VALID_TYPES:
elif args.workflow in phaeo_constants.WORKFLOW_VALID_TYPES:
workflow_type = args.workflow
logging.info("Workflow type set to '%s'" % workflow_type)
script_dir = os.path.dirname(os.path.realpath(sys.argv[0]))
all_sp_workflow_dict = {}
if workflow_type == phaeoexplorer_constants.WF_LOAD_GFF_JB:
if workflow_type == phaeo_constants.WF_LOAD_GFF_JB:
for sp_dict in sp_dict_list:
# Add and retrieve all analyses/organisms for the current input species and add their IDs to the input dictionary
......@@ -957,7 +952,7 @@ if __name__ == "__main__":
sp_dict,
main_dir=main_dir,
config=config,
workflow_type=phaeoexplorer_constants.WF_LOAD_GFF_JB)
workflow_type=phaeo_constants.WF_LOAD_GFF_JB)
current_sp_genus_species = sp_workflow_param.genus_species
current_sp_strain_sex = sp_workflow_param.strain_sex
......@@ -982,55 +977,35 @@ if __name__ == "__main__":
sp_workflow_param = strains[strain_sex]
# Set workflow path (1 organism)
workflow_path = os.path.join(os.path.abspath(script_dir), phaeoexplorer_constants.WORKFLOWS_PATH, phaeoexplorer_constants.WF_LOAD_GFF_JB_1_ORG_FILE)
workflow_path = os.path.join(os.path.abspath(script_dir), phaeo_constants.WORKFLOWS_PATH, phaeo_constants.WF_LOAD_GFF_JB_1_ORG_FILE)
# Check if the versions of tools specified in the workflow are installed in galaxy
install_changesets_revisions_from_workflow(workflow_path=workflow_path, instance=sp_workflow_param.instance)
history_id = sp_workflow_param.history_id
# Organism 1 attributes
org_genus = sp_workflow_param.genus
org_species = sp_workflow_param.species
org_genus_species = sp_workflow_param.genus_species
org_species_folder_name = sp_workflow_param.species_folder_name
org_full_name = sp_workflow_param.full_name
org_strain = sp_workflow_param.strain
org_sex = sp_workflow_param.strain
org_org_id = sp_workflow_param.org_id
org_genome_analysis_id = sp_workflow_param.genome_analysis_id
org_ogs_analysis_id = sp_workflow_param.ogs_analysis_id
org_genome_hda_id = sp_workflow_param.genome_hda_id
org_transcripts_hda_id = sp_workflow_param.transcripts_hda_id
org_proteins_hda_id = sp_workflow_param.proteins_hda_id
org_gff_hda_id = sp_workflow_param.gff_hda_id
# Set the workflow parameters (individual tools runtime parameters in the workflow)
workflow_parameters = {}
# Input files have no parameters (they are set via assigning the hda IDs in the datamap parameter of the bioblend method)
workflow_parameters[phaeoexplorer_constants.WF_LOAD_GFF_JB_1_ORG_INPUT_GENOME] = {}
workflow_parameters[phaeoexplorer_constants.WF_LOAD_GFF_JB_1_ORG_INPUT_GFF] = {}
workflow_parameters[phaeoexplorer_constants.WF_LOAD_GFF_JB_1_ORG_INPUT_PROTEINS] = {}
workflow_parameters[phaeoexplorer_constants.WF_LOAD_GFF_JB_1_ORG_STEP_LOAD_FASTA] = {
"organism": org_org_id,
"analysis_id": org_genome_analysis_id,
workflow_parameters[phaeo_constants.WF_LOAD_GFF_JB_1_ORG_INPUT_GENOME] = {}
workflow_parameters[phaeo_constants.WF_LOAD_GFF_JB_1_ORG_INPUT_GFF] = {}
workflow_parameters[phaeo_constants.WF_LOAD_GFF_JB_1_ORG_INPUT_PROTEINS] = {}
workflow_parameters[phaeo_constants.WF_LOAD_GFF_JB_1_ORG_STEP_LOAD_FASTA] = {
"organism": sp_workflow_param.org_id,
"analysis_id": sp_workflow_param.genome_analysis_id,
"do_update": "true"}
workflow_parameters[phaeoexplorer_constants.WF_LOAD_GFF_JB_1_ORG_STEP_JBROWSE] = {}
workflow_parameters[phaeoexplorer_constants.WF_LOAD_GFF_JB_1_ORG_STEP_LOAD_GFF] = {
"organism": org_org_id,
"analysis_id": org_ogs_analysis_id}
workflow_parameters[phaeoexplorer_constants.WF_LOAD_GFF_JB_1_ORG_STEP_FEATURE_SYNC] = {
"organism_id": org_org_id}
workflow_parameters[phaeoexplorer_constants.WF_LOAD_GFF_JB_1_ORG_STEP_POPULATE_VIEWS] = {}
workflow_parameters[phaeoexplorer_constants.WF_LOAD_GFF_JB_1_ORG_STEP_INDEX] = {}
workflow_parameters[phaeo_constants.WF_LOAD_GFF_JB_1_ORG_STEP_JBROWSE] = {}
workflow_parameters[phaeo_constants.WF_LOAD_GFF_JB_1_ORG_STEP_LOAD_GFF] = {
"organism": sp_workflow_param.org_id,
"analysis_id": sp_workflow_param.ogs_analysis_id}
workflow_parameters[phaeo_constants.WF_LOAD_GFF_JB_1_ORG_STEP_FEATURE_SYNC] = {
"organism_id": sp_workflow_param.org_id}
workflow_parameters[phaeo_constants.WF_LOAD_GFF_JB_1_ORG_STEP_POPULATE_VIEWS] = {}
workflow_parameters[phaeo_constants.WF_LOAD_GFF_JB_1_ORG_STEP_INDEX] = {}
# Set datamap (mapping of input files in the workflow)
datamap = {}
datamap[phaeoexplorer_constants.WF_LOAD_GFF_JB_1_ORG_INPUT_GENOME] = {"src": "hda", "id": org_genome_hda_id}
datamap[phaeoexplorer_constants.WF_LOAD_GFF_JB_1_ORG_INPUT_GFF] = {"src": "hda", "id": org_gff_hda_id}
datamap[phaeoexplorer_constants.WF_LOAD_GFF_JB_1_ORG_INPUT_PROTEINS] = {"src": "hda", "id": org_proteins_hda_id}
datamap[phaeo_constants.WF_LOAD_GFF_JB_1_ORG_INPUT_GENOME] = {"src": "hda", "id": sp_workflow_param.genome_hda_id}
datamap[phaeo_constants.WF_LOAD_GFF_JB_1_ORG_INPUT_GFF] = {"src": "hda", "id": sp_workflow_param.gff_hda_id}
datamap[phaeo_constants.WF_LOAD_GFF_JB_1_ORG_INPUT_PROTEINS] = {"src": "hda", "id": sp_workflow_param.proteins_hda_id}
with open(workflow_path, 'r') as ga_in_file:
......@@ -1041,218 +1016,120 @@ if __name__ == "__main__":
# For the Jbrowse tool, we unfortunately have to manually edit the parameters instead of setting them
# as runtime values, using runtime parameters makes the tool throw an internal critical error ("replace not found" error)
# Scratchgmod test: need "http" (or "https"), the hostname (+ port)
jbrowse_menu_url_org = "https://{hostname}/sp/{genus_sp}/feature/{Genus}/{species}/mRNA/{id}".format(hostname=config["hostname"], genus_sp=org_genus_species, Genus=org_genus[0].upper() + org_genus[1:], species=org_species, id="{id}")
if "jbrowse_menu_url" not in config.keys():
jbrowse_menu_url_org = "https://{hostname}/sp/{genus_sp}/feature/{Genus}/{species}/mRNA/{id}".format(hostname=config["hostname"], genus_sp=org_genus_species, Genus=org_genus[0].upper() + org_genus[1:], species=org_species, id="{id}")
if constants.CONF_JBROWSE_MENU_URL not in config.keys():
# default
root_url = "https://{0}".format(config[constants.CONF_ALL_HOSTNAME])
else:
jbrowse_menu_url_org = config["jbrowse_menu_url"] + "/sp/{genus_sp}/feature/{Genus}/{species}/mRNA/{id}".format(genus_sp=org_genus_species, Genus=org_genus[0].upper() + org_genus[1:], species=org_species, id="{id}")
root_url = config[constants.CONF_JBROWSE_MENU_URL]
species_strain_sex = sp_workflow_param.chado_species_name.replace(" ", "-")
jbrowse_menu_url = "{root_url}/sp/{genus_sp}/feature/{Genus}/{species_strain_sex}/mRNA/{id}".format(
root_url=root_url,
genus_sp=sp_workflow_param.genus_species,
Genus=sp_workflow_param.genus_uppercase,
species_strain_sex=species_strain_sex,
id="{id}")
# Replace values in the workflow dictionary
workflow_dict["steps"]["4"]["tool_state"] = workflow_dict["steps"]["4"]["tool_state"].replace("__MENU_URL_ORG__", jbrowse_menu_url_org)
workflow_dict["steps"]["6"]["tool_state"] = workflow_dict["steps"]["6"]["tool_state"].replace("__DISPLAY_NAME_ORG__", org_full_name).replace("__UNIQUE_ID_ORG__", org_species_folder_name)
jbrowse_tool_state = workflow_dict["steps"][phaeo_constants.WF_LOAD_GFF_JB_1_ORG_STEP_JBROWSE]["tool_state"]
jbrowse_tool_state = jbrowse_tool_state.replace("__MENU_URL_ORG__", jbrowse_menu_url)
jb_to_container_tool_state = workflow_dict["steps"][phaeo_constants.WF_LOAD_GFF_JB_1_ORG_STEP_JB_TO_CONTAINER]["tool_state"]
jb_to_container_tool_state = jb_to_container_tool_state\
.replace("__DISPLAY_NAME_ORG__", sp_workflow_param.full_name)\
.replace("__UNIQUE_ID_ORG__", sp_workflow_param.species_folder_name)
# Import the workflow in galaxy as a dict
instance.workflows.import_workflow_dict(workflow_dict=workflow_dict)
sp_workflow_param.instance.workflows.import_workflow_dict(workflow_dict=workflow_dict)
# Get its attributes
workflow_attributes = instance.workflows.get_workflows(name=workflow_name)
workflow_attributes = sp_workflow_param.instance.workflows.get_workflows(name=workflow_name)
# Then get its ID (required to invoke the workflow)
workflow_id = workflow_attributes[0]["id"] # Index 0 is the most recently imported workflow (the one we want)
show_workflow = instance.workflows.show_workflow(workflow_id=workflow_id)
logging.debug("Workflow ID: %s" % workflow_id)
# Check if the workflow is found
try:
logging.debug("Workflow ID: %s" % workflow_id)
show_workflow = sp_workflow_param.instance.workflows.show_workflow(workflow_id=workflow_id)
except bioblend.ConnectionError:
logging.warning("Error finding workflow %s" % workflow_name)
# Finally, invoke the workflow alogn with its datamap, parameters and the history in which to invoke it
instance.workflows.invoke_workflow(workflow_id=workflow_id, history_id=history_id, params=workflow_parameters, inputs=datamap, allow_tool_state_corrections=True)
logging.info("Successfully imported and invoked workflow {0}, check the galaxy instance ({1}) for the jobs state".format(workflow_name, instance_url))
# Finally, invoke the workflow along with its datamap, parameters and the history in which to invoke it
sp_workflow_param.instance.workflows.invoke_workflow(
workflow_id=workflow_id,
history_id=sp_workflow_param.history_id,
params=workflow_parameters,
inputs=datamap,
allow_tool_state_corrections=True)
logging.info("Successfully imported and invoked workflow {0}, check the galaxy instance ({1}) for the jobs state".format(workflow_name, sp_workflow_param.instance_url))
if len(list(strains.keys())) == 2:
if strains_count == 2:
logging.info("Input organism %s: 2 species detected in input dictionary" % species)
strain_sex_org1 = strains.keys()[0]
strain_sex_org2 = strains.keys()[1]
sp_workflow_param_org1 = strains[strain_sex_org1]
sp_workflow_param_org2 = strains[strain_sex_org2]
# Set workflow path (2 organisms)
workflow_path = os.path.join(os.path.abspath(script_dir), "workflows_phaeoexplorer/Galaxy-Workflow-chado_load_tripal_synchronize_jbrowse_2org_v4.ga")
# Instance object required variables
instance_url, email, password = None, None, None
# Set the galaxy instance variables
for k2, v2 in strains.items():
instance_url = v2["instance_url"]
email = v2["email"]
password = v2["password"]
instance = galaxy.GalaxyInstance(url=instance_url, email=email, password=password)
workflow_path = os.path.join(os.path.abspath(script_dir), phaeo_constants.WORKFLOWS_PATH, phaeo_constants.WF_LOAD_GFF_JB_2_ORG_FILE)
# Check if the versions of tools specified in the workflow are installed in galaxy
install_changesets_revisions_from_workflow(workflow_path=workflow_path, instance=instance)
# Get key names from the current organism (item 1 = organism 1, item 2 = organism 2)
organisms_key_names = list(strains.keys())
org1_dict = strains[organisms_key_names[0]]
org2_dict = strains[organisms_key_names[1]]
history_id = org1_dict["history_id"]
# Organism 1 attributes
org1_genus = org1_dict["genus"]
org1_species = org1_dict["species"]
org1_genus_species = org1_dict["genus_species"]
org1_species_folder_name = org1_dict["species_folder_name"]
org1_full_name = org1_dict["full_name"]
org1_strain = org1_dict["sex"]
org1_sex = org1_dict["strain"]
org1_org_id = org1_dict["org_id"]
org1_genome_analysis_id = org1_dict["genome_analysis_id"]
org1_ogs_analysis_id = org1_dict["ogs_analysis_id"]
org1_genome_hda_id = org1_dict["hda_ids"]["genome_hda_id"]
org1_transcripts_hda_id = org1_dict["hda_ids"]["transcripts_hda_id"]
org1_proteins_hda_id = org1_dict["hda_ids"]["proteins_hda_id"]
org1_gff_hda_id = org1_dict["hda_ids"]["gff_hda_id"]
# Store these values into a dict for parameters logging/validation
org1_parameters_dict = {
"org1_genus": org1_genus,
"org1_species": org1_species,
"org1_genus_species": org1_genus_species,
"org1_species_folder_name": org1_species_folder_name,
"org1_full_name": org1_full_name,
"org1_strain": org1_strain,
"org1_sex": org1_sex,
"org1_org_id": org1_org_id,
"org1_genome_analysis_id": org1_genome_analysis_id,
"org1_ogs_analysis_id": org1_ogs_analysis_id,
"org1_genome_hda_id": org1_genome_hda_id,
"org1_transcripts_hda_id": org1_transcripts_hda_id,
"org1_proteins_hda_id": org1_proteins_hda_id,
"org1_gff_hda_id": org1_gff_hda_id,
}
# Look for empty parameters values, throw a critical error if a parameter value is invalid
for param_name, param_value in org1_parameters_dict.items():
if param_value is None or param_value == "":
logging.critical("Empty parameter value found for organism {0} (parameter: {1}, parameter value: {2})".format(org1_full_name, param_name, param_value))
sys.exit()
# Organism 2 attributes
org2_genus = org2_dict["genus"]
org2_species = org2_dict["species"]
org2_genus_species = org2_dict["genus_species"]
org2_species_folder_name = org2_dict["species_folder_name"]
org2_full_name = org2_dict["full_name"]
org2_strain = org2_dict["sex"]
org2_sex = org2_dict["strain"]
org2_org_id = org2_dict["org_id"]
org2_genome_analysis_id = org2_dict["genome_analysis_id"]
org2_ogs_analysis_id = org2_dict["ogs_analysis_id"]
org2_genome_hda_id = org2_dict["hda_ids"]["genome_hda_id"]
org2_transcripts_hda_id = org2_dict["hda_ids"]["transcripts_hda_id"]
org2_proteins_hda_id = org2_dict["hda_ids"]["proteins_hda_id"]
org2_gff_hda_id = org2_dict["hda_ids"]["gff_hda_id"]
# Store these values into a dict for parameters logging/validation
org2_parameters_dict = {
"org2_genus": org2_genus,
"org2_species": org2_species,
"org2_genus_species": org2_genus_species,
"org2_species_folder_name": org2_species_folder_name,
"org2_full_name": org2_full_name,
"org2_strain": org2_strain,
"org2_sex": org2_sex,
"org2_org_id": org2_org_id,
"org2_genome_analysis_id": org2_genome_analysis_id,
"org2_ogs_analysis_id": org2_ogs_analysis_id,
"org2_genome_hda_id": org2_genome_hda_id,
"org2_transcripts_hda_id": org2_transcripts_hda_id,
"org2_proteins_hda_id": org2_proteins_hda_id,
"org2_gff_hda_id": org2_gff_hda_id,
}
# Look for empty parameters values, throw a critical error if a parameter value is invalid
for param_name, param_value in org2_parameters_dict.items():
if param_value is None or param_value == "":
logging.critical("Empty parameter value found for organism {0} (parameter: {1}, parameter value: {2})".format(org2_full_name, param_name, param_value))
sys.exit()
# Source files association (ordered by their IDs in the workflow)
# WARNING: Be very careful about how the workflow is "organized" (i.e the order of the steps/datasets, check the .ga if there is any error)
GFF_FILE_ORG1 = "0"
GENOME_FASTA_FILE_ORG1 = "1"
PROTEINS_FASTA_FILE_ORG1 = "2"
GENOME_FASTA_FILE_ORG2 = "3"
GFF_FILE_ORG2 = "4"
PROTEINS_FASTA_FILE_ORG2 = "5"
LOAD_FASTA_ORG1 = "6"
JBROWSE_ORG1 = "7"
JRBOWSE_ORG2 = "8"
LOAD_GFF_ORG1 = "9"
JBROWSE_CONTAINER = "10"
SYNC_FEATURES_ORG1 = "11"
LOAD_FASTA_ORG2 = "12"
LOAD_GFF_ORG2 = "13"
SYNC_FEATURES_ORG2 = "14"
POPULATE_MAT_VIEWS = "15"
INDEX_TRIPAL_DATA = "16"
install_changesets_revisions_from_workflow(workflow_path=workflow_path, instance=sp_workflow_param_org1.instance)
# Set the workflow parameters (individual tools runtime parameters in the workflow)
workflow_parameters = {}
# Input files have no parameters (they are set via assigning the hda IDs in the datamap parameter of the bioblend method)
workflow_parameters[GENOME_FASTA_FILE_ORG1] = {}
workflow_parameters[GFF_FILE_ORG1] = {}
workflow_parameters[PROTEINS_FASTA_FILE_ORG1] = {}
workflow_parameters[GENOME_FASTA_FILE_ORG2] = {}
workflow_parameters[GFF_FILE_ORG2] = {}
workflow_parameters[PROTEINS_FASTA_FILE_ORG2] = {}
workflow_parameters[phaeo_constants.WF_LOAD_GFF_JB_2_ORG_INPUT_GENOME_ORG1] = {}
workflow_parameters[phaeo_constants.WF_LOAD_GFF_JB_2_ORG_INPUT_GFF_ORG1] = {}
workflow_parameters[phaeo_constants.WF_LOAD_GFF_JB_2_ORG_INPUT_PROTEINS_ORG1] = {}
workflow_parameters[phaeo_constants.WF_LOAD_GFF_JB_2_ORG_INPUT_GENOME_ORG2] = {}
workflow_parameters[phaeo_constants.WF_LOAD_GFF_JB_2_ORG_INPUT_GFF_ORG2] = {}
workflow_parameters[phaeo_constants.WF_LOAD_GFF_JB_2_ORG_INPUT_PROTEINS_ORG2] = {}
# Organism 1
workflow_parameters[LOAD_FASTA_ORG1] = {"organism": org1_org_id,
"analysis_id": org1_genome_analysis_id,
"do_update": "true"}
workflow_parameters[phaeo_constants.WF_LOAD_GFF_JB_2_ORG_STEP_LOAD_FASTA_ORG1] = {
"organism": sp_workflow_param_org1.org_id,
"analysis_id": sp_workflow_param_org1.genome_analysis_id,
"do_update": "true"}
# workflow_parameters[JBROWSE_ORG1] = {"jbrowse_menu_url": jbrowse_menu_url_org1}
workflow_parameters[JBROWSE_ORG1] = {}
workflow_parameters[LOAD_GFF_ORG1] = {"organism": org1_org_id, "analysis_id": org1_ogs_analysis_id}
workflow_parameters[SYNC_FEATURES_ORG1] = {"organism_id": org1_org_id}
workflow_parameters[phaeo_constants.WF_LOAD_GFF_JB_2_ORG_STEP_JBROWSE_ORG1] = {}
workflow_parameters[phaeo_constants.WF_LOAD_GFF_JB_2_ORG_STEP_LOAD_GFF_ORG1] = {
"organism": sp_workflow_param_org1.org_id,
"analysis_id": sp_workflow_param_org1.ogs_analysis_id}
workflow_parameters[phaeo_constants.WF_LOAD_GFF_JB_2_ORG_STEP_FEATURE_SYNC_ORG1] = {
"organism_id": sp_workflow_param_org1.org_id}
# workflow_parameters[JBROWSE_CONTAINER] = {"organisms": [{"name": org1_full_name, "unique_id": org1_species_folder_name, }, {"name": org2_full_name, "unique_id": org2_species_folder_name}]}
workflow_parameters[JBROWSE_CONTAINER] = {}
workflow_parameters[phaeo_constants.WF_LOAD_GFF_JB_2_ORG_STEP_JB_TO_CONTAINER] = {}
# Organism 2
workflow_parameters[LOAD_FASTA_ORG2] = {"organism": org2_org_id,
"analysis_id": org2_genome_analysis_id,
"do_update": "true"}
workflow_parameters[LOAD_GFF_ORG2] = {"organism": org2_org_id, "analysis_id": org2_ogs_analysis_id}
workflow_parameters[phaeo_constants.WF_LOAD_GFF_JB_2_ORG_STEP_LOAD_FASTA_ORG2] = {
"organism": sp_workflow_param_org2.org_id,
"analysis_id": sp_workflow_param_org2.genome_analysis_id,
"do_update": "true"}
workflow_parameters[phaeo_constants.WF_LOAD_GFF_JB_2_ORG_STEP_LOAD_GFF_ORG2] = {
"organism": sp_workflow_param_org2.org_id,
"analysis_id": sp_workflow_param_org2.ogs_analysis_id}
# workflow_parameters[JRBOWSE_ORG2] = {"jbrowse_menu_url": jbrowse_menu_url_org2}
workflow_parameters[JRBOWSE_ORG2] = {}
workflow_parameters[SYNC_FEATURES_ORG2] = {"organism_id": org2_org_id}
workflow_parameters[phaeo_constants.WF_LOAD_GFF_JB_2_ORG_STEP_JRBOWSE_ORG2] = {}
workflow_parameters[phaeo_constants.WF_LOAD_GFF_JB_2_ORG_STEP_FEATURE_SYNC_ORG2] = {
"organism_id": sp_workflow_param_org2.org_id}
# POPULATE + INDEX DATA
workflow_parameters[POPULATE_MAT_VIEWS] = {}
workflow_parameters[INDEX_TRIPAL_DATA] = {}
workflow_parameters[phaeo_constants.WF_LOAD_GFF_JB_2_ORG_STEP_POPULATE_VIEWS] = {}
workflow_parameters[phaeo_constants.WF_LOAD_GFF_JB_2_ORG_STEP_INDEX] = {}
# Set datamap (mapping of input files in the workflow)
datamap = {}
# Organism 1
datamap[GENOME_FASTA_FILE_ORG1] = {"src": "hda", "id": org1_genome_hda_id}
datamap[GFF_FILE_ORG1] = {"src": "hda", "id": org1_gff_hda_id}
datamap[PROTEINS_FASTA_FILE_ORG1] = {"src": "hda", "id": org1_proteins_hda_id}
datamap[phaeo_constants.WF_LOAD_GFF_JB_2_ORG_INPUT_GENOME_ORG1] = {"src": "hda", "id": sp_workflow_param_org1.genome_hda_id}
datamap[phaeo_constants.WF_LOAD_GFF_JB_2_ORG_INPUT_GFF_ORG1] = {"src": "hda", "id": sp_workflow_param_org1.gff_hda_id}
datamap[phaeo_constants.WF_LOAD_GFF_JB_2_ORG_INPUT_PROTEINS_ORG1] = {"src": "hda", "id": sp_workflow_param_org1.proteins_hda_id}
# Organism 2
datamap[GENOME_FASTA_FILE_ORG2] = {"src": "hda", "id": org2_genome_hda_id}
datamap[GFF_FILE_ORG2] = {"src": "hda", "id": org2_gff_hda_id}
datamap[PROTEINS_FASTA_FILE_ORG2] = {"src": "hda", "id": org2_proteins_hda_id}
datamap[phaeo_constants.WF_LOAD_GFF_JB_2_ORG_INPUT_GENOME_ORG2] = {"src": "hda", "id": sp_workflow_param_org2.genome_hda_id}
datamap[phaeo_constants.WF_LOAD_GFF_JB_2_ORG_INPUT_GFF_ORG2] = {"src": "hda", "id": sp_workflow_param_org2.gff_hda_id}
datamap[phaeo_constants.WF_LOAD_GFF_JB_2_ORG_INPUT_PROTEINS_ORG2] = {"src": "hda", "id": sp_workflow_param_org2.proteins_hda_id}
with open(workflow_path, 'r') as ga_in_file:
......@@ -1385,15 +1262,15 @@ if __name__ == "__main__":
BLASTP_FILE = "0"
LOAD_BLASTP_FILE = "1"
POPULATE_MAT_VIEWS = "2"
INDEX_TRIPAL_DATA = "3"
WF_LOAD_GFF_JB_2_ORG_STEP_POPULATE_VIEWS = "2"
WF_LOAD_GFF_JB_2_ORG_STEP_INDEX = "3"
# Set the workflow parameters (individual tools runtime parameters in the workflow)
workflow_parameters = {}
workflow_parameters[BLASTP_FILE] = {}
workflow_parameters[LOAD_BLASTP_FILE] = {"analysis_id": org_blastp_analysis_id, "organism_id": org_org_id}
workflow_parameters[POPULATE_MAT_VIEWS] = {}
workflow_parameters[INDEX_TRIPAL_DATA] = {}
workflow_parameters[WF_LOAD_GFF_JB_2_ORG_STEP_POPULATE_VIEWS] = {}
workflow_parameters[WF_LOAD_GFF_JB_2_ORG_STEP_INDEX] = {}
datamap = {}
datamap[BLASTP_FILE] = {"src": "hda", "id": org_blastp_hda_id}
......@@ -1522,8 +1399,8 @@ if __name__ == "__main__":
BLASTP_FILE_ORG2 = "1"
LOAD_BLASTP_FILE_ORG1 = "2"
LOAD_BLASTP_FILE_ORG1 = "3"
POPULATE_MAT_VIEWS = "4"
INDEX_TRIPAL_DATA = "5"
WF_LOAD_GFF_JB_2_ORG_STEP_POPULATE_VIEWS = "4"
WF_LOAD_GFF_JB_2_ORG_STEP_INDEX = "5"
# Set the workflow parameters (individual tools runtime parameters in the workflow)
workflow_parameters = {}
......@@ -1540,8 +1417,8 @@ if __name__ == "__main__":
workflow_parameters[LOAD_BLASTP_FILE_ORG2] = {"organism_id": org2_org_id,
"analysis_id": org2_blastp_analysis_id}
workflow_parameters[POPULATE_MAT_VIEWS] = {}
workflow_parameters[INDEX_TRIPAL_DATA] = {}
workflow_parameters[WF_LOAD_GFF_JB_2_ORG_STEP_POPULATE_VIEWS] = {}
workflow_parameters[WF_LOAD_GFF_JB_2_ORG_STEP_INDEX] = {}
# Set datamap (mapping of input files in the workflow)
datamap = {}
......@@ -1655,15 +1532,15 @@ if __name__ == "__main__":
INTEPRO_FILE = "0"
LOAD_INTERPRO_FILE = "1"
POPULATE_MAT_VIEWS = "2"
INDEX_TRIPAL_DATA = "3"
WF_LOAD_GFF_JB_2_ORG_STEP_POPULATE_VIEWS = "2"
WF_LOAD_GFF_JB_2_ORG_STEP_INDEX = "3"
# Set the workflow parameters (individual tools runtime parameters in the workflow)
workflow_parameters = {}
workflow_parameters[INTEPRO_FILE] = {}
workflow_parameters[LOAD_INTERPRO_FILE] = {"analysis_id": org_inteproscan_analysis_id, "organism_id": org_org_id}
workflow_parameters[POPULATE_MAT_VIEWS] = {}
workflow_parameters[INDEX_TRIPAL_DATA] = {}
workflow_parameters[WF_LOAD_GFF_JB_2_ORG_STEP_POPULATE_VIEWS] = {}
workflow_parameters[WF_LOAD_GFF_JB_2_ORG_STEP_INDEX] = {}
datamap = {}
datamap[INTEPRO_FILE] = {"src": "hda", "id": org_interproscan_hda_id}
......@@ -1792,8 +1669,8 @@ if __name__ == "__main__":
INTERPRO_FILE_ORG2 = "1"
LOAD_INTERPRO_FILE_ORG1 = "2"
LOAD_INTERPRO_FILE_ORG2 = "3"
POPULATE_MAT_VIEWS = "4"
INDEX_TRIPAL_DATA = "5"
WF_LOAD_GFF_JB_2_ORG_STEP_POPULATE_VIEWS = "4"
WF_LOAD_GFF_JB_2_ORG_STEP_INDEX = "5"
# Set the workflow parameters (individual tools runtime parameters in the workflow)
workflow_parameters = {}
......@@ -1810,8 +1687,8 @@ if __name__ == "__main__":
workflow_parameters[LOAD_INTERPRO_FILE_ORG2] = {"organism_id": org2_org_id,
"analysis_id": org2_interproscan_analysis_id}
workflow_parameters[POPULATE_MAT_VIEWS] = {}
workflow_parameters[INDEX_TRIPAL_DATA] = {}
workflow_parameters[WF_LOAD_GFF_JB_2_ORG_STEP_POPULATE_VIEWS] = {}
workflow_parameters[WF_LOAD_GFF_JB_2_ORG_STEP_INDEX] = {}
# Set datamap (mapping of input files in the workflow)
datamap = {}
......
......@@ -42,7 +42,12 @@ class SpeciesData:
def clean_string(self, string):
if not string is None and string != "":
clean_string = string.replace(" ", "_").replace("-", "_").replace("(", "").replace(")", "").replace("'", "").strip()
clean_string = string\
.replace(" ", "_")\
.replace("-", "_")\
.replace("(", "")\
.replace(")", "")\
.replace("'", "").strip()
return clean_string
else:
return string
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment