From 5bbc05a7b44bac7f06f2354bc04888314491e915 Mon Sep 17 00:00:00 2001
From: Loraine Gueguen <loraine.gueguen@sb-roscoff.fr>
Date: Sun, 30 May 2021 13:01:25 +0200
Subject: [PATCH] Refactor run_wf. Fix linting errors and warnings.

---
 gga_get_data.py               |   8 --
 gga_init.py                   |  20 ++---
 gga_load_data.py              |  11 ++-
 phaeo_constants.py            |   4 +-
 run_workflow_phaeoexplorer.py | 142 +++++++++++++++++++---------------
 speciesData.py                |  81 +++++++++----------
 utilities.py                  |  16 ++--
 7 files changed, 137 insertions(+), 145 deletions(-)

diff --git a/gga_get_data.py b/gga_get_data.py
index f46d733..89788e2 100755
--- a/gga_get_data.py
+++ b/gga_get_data.py
@@ -5,7 +5,6 @@ import argparse
 import os
 import logging
 import sys
-import time
 import shutil
 
 import utilities
@@ -65,13 +64,6 @@ class GetData(speciesData.SpeciesData):
 
         logging.info("src_data directory tree generated for %s" % self.full_name)
 
-    def get_last_modified_time_string(self, filePath):
-        # give the last modification date for the file, with format '20190130'
-        lastModifiedTimestamp = os.path.getmtime(filePath)
-        lastModifiedTimeStructure = time.localtime(lastModifiedTimestamp)
-        lastModifiedDate = time.strftime("%Y%m%d", lastModifiedTimeStructure)
-        return lastModifiedDate
-
     def get_source_data_files_from_path(self):
         """
         Find source data files and copy them into the src_data dir tree
diff --git a/gga_init.py b/gga_init.py
index eee4968..0f7d7bf 100755
--- a/gga_init.py
+++ b/gga_init.py
@@ -37,6 +37,9 @@ class DeploySpeciesStack(speciesData.SpeciesData):
     the organism's directory tree to create the required docker-compose files and stack deployment
 
     """
+    def __init__(self, parameters_dictionary):
+        self.picture_path = None
+        super().__init__(parameters_dictionary)
 
     def make_directory_tree(self):
         """
@@ -131,7 +134,7 @@ class DeploySpeciesStack(speciesData.SpeciesData):
                         "genus_species_sex": "{0}_{1}_{2}".format(self.genus_lowercase, self.species_lowercase, self.sex),
                         "strain": self.strain, "sex": self.sex, "Genus_species": "{0} {1}".format(self.genus_uppercase, self.species_lowercase),
                         "blast": self.blast, "go": self.go, "picture_path": self.picture_path}
-        if (len(self.config.keys()) == 0):
+        if len(self.config.keys()) == 0:
             logging.error("Empty config dictionary")
         # Merge the two dicts
         render_vars = {**self.config, **input_vars}
@@ -161,19 +164,6 @@ class DeploySpeciesStack(speciesData.SpeciesData):
         os.chdir(self.main_dir)
 
 
-    def make_orthology_compose_files(self):
-        """
-        Create/update orthology compose files
-
-        :return:
-        """
-
-        os.chdir(self.main_dir)
-
-        make_dirs["./orthology", "./orthology/src_data", "./orthology/src_data/genomes", 
-                       "./orthology/src_data/gff", "./orthology/src_data/newicks", "./orthology/src_data/proteomes"]
-
-
 def make_dirs(dir_paths_li):
     """
     Recursively create directories from a list of paths with a try-catch condition
@@ -386,7 +376,7 @@ if __name__ == "__main__":
     else:
         config_file = os.path.join(os.path.dirname(os.path.realpath(sys.argv[0])), constants.DEFAULT_CONFIG)
     config = utilities.parse_config(config_file)
-    if (len(config.keys()) == 0):
+    if len(config.keys()) == 0:
         logging.error("Empty config dictionary")
 
     main_dir = None
diff --git a/gga_load_data.py b/gga_load_data.py
index d1bb064..d0ef036 100755
--- a/gga_load_data.py
+++ b/gga_load_data.py
@@ -10,7 +10,6 @@ import sys
 import time
 import json
 import yaml
-import subprocess
 from bioblend import galaxy
 from bioblend.galaxy.objects import GalaxyInstance
 
@@ -52,7 +51,7 @@ class LoadData(speciesData.SpeciesData):
                 self.history_id = histories[0]["id"]
                 logging.debug("History ID set for {0} {1}: {2}".format(self.genus, self.species, self.history_id))
             else:
-                logging.critical("Multiple histories exists for {1}: {2}".format(self.genus, self.species))
+                logging.critical("Multiple histories exists for {0}: {1}".format(self.genus, self.species))
         except IndexError:
             logging.info("Creating history for {0} {1}".format(self.genus, self.species))
             hist_dict = self.instance.histories.create_history(name=str(self.genus_species))
@@ -114,9 +113,9 @@ class LoadData(speciesData.SpeciesData):
         data_dir_root=os.path.join(self.get_species_dir(), constants.HOST_DATA_DIR)
 
         gio = GalaxyInstance(url=self.instance_url,
-                                              email=self.config[constants.CONF_GALAXY_DEFAULT_ADMIN_EMAIL],
-                                              password=self.config[constants.CONF_GALAXY_DEFAULT_ADMIN_PASSWORD]
-                                              )
+                             email=self.config[constants.CONF_GALAXY_DEFAULT_ADMIN_EMAIL],
+                             password=self.config[constants.CONF_GALAXY_DEFAULT_ADMIN_PASSWORD]
+                             )
 
         logging.info("Looking for project data in %s" % data_dir_root)
         folders = dict()
@@ -141,7 +140,7 @@ class LoadData(speciesData.SpeciesData):
 
             for fname, files in folders.items():
                 if fname and files:
-                    folder_name = re.sub(data_dir_root + "/", "", fname)
+                    folder_name = re.sub(re.compile(data_dir_root + "/"), "", str(fname))
                     logging.info("Creating folder: %s" % folder_name)
                     folder = self.create_deep_folder(prj_lib, folder_name)
 
diff --git a/phaeo_constants.py b/phaeo_constants.py
index 1cca9b6..0457cc2 100644
--- a/phaeo_constants.py
+++ b/phaeo_constants.py
@@ -47,8 +47,8 @@ WORKFLOW_VALID_TYPES = [WF_LOAD_GFF_JB, WORKFLOW_BLAST, WORKFLOW_INTERPRO]
 ### Galaxy tools
 
 ADD_ORGANISM_TOOL_NAME = "toolshed.g2.bx.psu.edu/repos/gga/chado_organism_add_organism/organism_add_organism/"
-ADD_ORGANISM_TOOL_NAME = "2.3.4+galaxy0"
-ADD_ORGANISM_TOOL_ID = ADD_ORGANISM_TOOL_NAME + ADD_ORGANISM_TOOL_NAME
+ADD_ORGANISM_TOOL_VERSION = "2.3.4+galaxy0"
+ADD_ORGANISM_TOOL_ID = ADD_ORGANISM_TOOL_NAME + ADD_ORGANISM_TOOL_VERSION
 ADD_ORGANISM_TOOL_CHANGESET_REVISION = "1f12b9650028"
 
 ADD_ANALYSIS_TOOL_NAME = "toolshed.g2.bx.psu.edu/repos/gga/chado_analysis_add_analysis/analysis_add_analysis/"
diff --git a/run_workflow_phaeoexplorer.py b/run_workflow_phaeoexplorer.py
index 94c6477..9d9b12e 100755
--- a/run_workflow_phaeoexplorer.py
+++ b/run_workflow_phaeoexplorer.py
@@ -28,8 +28,9 @@ class StrainWorkflowParam:
 
     def __init__(self, genus_species, strain_sex, genus, genus_uppercase, species, chado_species_name, sex,
                  strain, full_name, species_folder_name, org_id,
-                 genome_analysis_id, ogs_analysis_id, blastp_analysis_id, interpro_analysis_id, history_id,
-                 instance, instance_url, email, password):
+                 history_id, instance, instance_url, email, password,
+                 genome_analysis_id=None, ogs_analysis_id=None, blastp_analysis_id=None, interpro_analysis_id=None,
+                 genome_hda_id=None, gff_hda_id=None, transcripts_hda_id=None, proteins_hda_id=None, blastp_hda_id=None, blastx_hda_id=None, interproscan_hda_id=None):
         self.genus_species = genus_species
         self.strain_sex = strain_sex
         self.genus = genus
@@ -50,36 +51,40 @@ class StrainWorkflowParam:
         self.instance_url = instance_url
         self.email = email
         self.password = password
-
-    def check_param(self, params):
-        if not utilities.no_empty_items(params):
-            logging.critical(
-                "One empty workflow parameter found for organism {0}: {1})".format(org_full_name, params))
-            sys.exit()
-
-    def check_param_for_workflow_load_fasta_gff_jbrowse(self, params):
+        self.genome_hda_id = genome_hda_id,
+        self.gff_hda_id = gff_hda_id,
+        self.transcripts_hda_id = transcripts_hda_id,
+        self.proteins_hda_id = proteins_hda_id,
+        self.blastp_hda_id = blastp_hda_id,
+        self.blastx_hda_id = blastx_hda_id,
+        self.interproscan_hda_id = interproscan_hda_id,
+
+    def check_param_for_workflow_load_fasta_gff_jbrowse(self):
         params = [self.genus_species, self.strain_sex, self.genus, self.genus_uppercase, self.species,
                   self.sex, self.strain, self.chado_species_name, self.full_name,
                   self.species_folder_name, self.org_id,
-                  self.genome_analysis_id, self.ogs_analysis_id, self.history_id,
-                  self.instance, self.instance_url, self.email, self.password]
-        self.check_param(params)
+                  self.history_id, self.instance, self.instance_url, self.email, self.password,
+                  self.genome_analysis_id, self.ogs_analysis_id,
+                  self.genome_hda_id, self.gff_hda_id, self.transcripts_hda_id, self.proteins_hda_id]
+        utilities.check_wf_param(self.full_name, params)
 
-    def check_param_for_workflow_blast(self, params):
+    def check_param_for_workflow_blastp(self):
         params = [self.genus_species, self.strain_sex, self.genus, self.genus_uppercase, self.species,
                   self.sex, self.strain, self.chado_species_name, self.full_name,
                   self.species_folder_name, self.org_id,
-                  self.blastp_analysis_id, self.history_id,
-                  self.instance, self.instance_url, self.email, self.password]
-        self.check_param(params)
+                  self.history_id, self.instance, self.instance_url, self.email, self.password,
+                  self.blastp_analysis_id,
+                  self.blastp_hda_id]
+        utilities.check_wf_param(self.full_name, params)
 
-    def check_param_for_workflow_interpro(self, params):
+    def check_param_for_workflow_interpro(self):
         params = [self.genus_species, self.strain_sex, self.genus, self.genus_uppercase, self.species,
                   self.sex, self.strain, self.chado_species_name, self.full_name,
                   self.species_folder_name, self.org_id,
-                  self.interpro_analysis_id, self.history_id,
-                  self.instance, self.instance_url, self.email, self.password]
-        self.check_param(params)
+                  self.history_id, self.instance, self.instance_url, self.email, self.password,
+                  self.interpro_analysis_id,
+                  self.interproscan_hda_id]
+        utilities.check_wf_param(self.full_name, params)
 
 
 class RunWorkflow(speciesData.SpeciesData):
@@ -329,7 +334,7 @@ class RunWorkflow(speciesData.SpeciesData):
         logging.info("Synchronizing analysis %s in Tripal" % analysis_name)
         self.sync_analysis(analysis_id=analysis_id)
 
-        return(analysis_id)
+        return analysis_id
 
     def add_organism_blastp_analysis(self):
         """
@@ -431,7 +436,7 @@ class RunWorkflow(speciesData.SpeciesData):
                                                             tool_inputs={"analysis_id": blastp_analysis_id})
 
         # print({"org_id": org_id, "genome_analysis_id": genome_analysis_id, "ogs_analysis_id": ogs_analysis_id})
-        return({"org_id": org_id, "blastp_analysis_id": blastp_analysis_id})
+        return {"org_id": org_id, "blastp_analysis_id": blastp_analysis_id}
 
     def add_organism_interproscan_analysis(self):
         """
@@ -590,9 +595,7 @@ class RunWorkflow(speciesData.SpeciesData):
                              email=self.config[constants.CONF_GALAXY_DEFAULT_ADMIN_EMAIL],
                              password=self.config[constants.CONF_GALAXY_DEFAULT_ADMIN_PASSWORD])
 
-        prj_lib = gio.libraries.get_previews(name=constants.GALAXY_LIBRARY_NAME)
-        library_id = prj_lib[0].id
-        folder_dict_list = self.instance.libraries.get_folders(library_id=str(library_id))
+        folder_dict_list = self.instance.libraries.get_folders(library_id=str(self.library_id))
 
         folders_id_dict = {}
 
@@ -809,7 +812,7 @@ def get_sp_workflow_param(sp_dict, main_dir, config, workflow_type):
                 email=config[constants.CONF_GALAXY_DEFAULT_ADMIN_EMAIL],
                 password=config[constants.CONF_GALAXY_DEFAULT_ADMIN_PASSWORD]
             )
-            sp_wf_param.check_param_for_workflow_blast()
+            sp_wf_param.check_param_for_workflow_blastp()
 
         if workflow_type == "interpro":
 
@@ -970,10 +973,12 @@ if __name__ == "__main__":
                     logging.error("Duplicate organism with 'genus_species' = '{0}' and 'strain_sex' = '{1}'".format(current_sp_genus_species, current_sp_strain_sex))
 
         for species, strains in all_sp_workflow_dict.items():
-            strains_count = len(list(strains.keys()))
+            strains_list = list(strains.keys())
+            strains_count = len(strains_list)
+
             if strains_count == 1:
                 logging.info("Input species %s: 1 strain detected in input dictionary" % species)
-                strain_sex = strains.keys()[0]
+                strain_sex = list(strains.keys())[0]
                 sp_workflow_param = strains[strain_sex]
 
                 # Set workflow path (1 organism)
@@ -1064,8 +1069,8 @@ if __name__ == "__main__":
             if strains_count == 2:
 
                 logging.info("Input organism %s: 2 species detected in input dictionary" % species)
-                strain_sex_org1 = strains.keys()[0]
-                strain_sex_org2 = strains.keys()[1]
+                strain_sex_org1 = strains_list[0]
+                strain_sex_org2 = strains_list[1]
                 sp_workflow_param_org1 = strains[strain_sex_org1]
                 sp_workflow_param_org2 = strains[strain_sex_org2]
 
@@ -1140,54 +1145,69 @@ if __name__ == "__main__":
                     # For the Jbrowse tool, we unfortunately have to manually edit the parameters instead of setting them
                     # as runtime values, using runtime parameters makes the tool throw an internal critical error ("replace not found" error)
                     # Scratchgmod test: need "http" (or "https"), the hostname (+ port)
-                    jbrowse_menu_url_org1 = "https://{hostname}/sp/{genus_sp}/feature/{Genus}/{species}/mRNA/{id}".format(hostname=config["hostname"], genus_sp=org1_genus_species, Genus=org1_genus[0].upper() + org1_genus[1:], species=org1_species, id="{id}")
-                    jbrowse_menu_url_org2 = "https://{hostname}/sp/{genus_sp}/feature/{Genus}/{species}/mRNA/{id}".format(hostname=config["hostname"], genus_sp=org2_genus_species, Genus=org2_genus[0].upper() + org2_genus[1:], species=org2_species, id="{id}")
-                    if "jbrowse_menu_url" not in config.keys():
-                        jbrowse_menu_url_org1 = "https://{hostname}/sp/{genus_sp}/feature/{Genus}/{species}/mRNA/{id}".format(hostname=config["hostname"], genus_sp=org1_genus_species, Genus=org1_genus[0].upper() + org1_genus[1:], species=org1_species, id="{id}")
-                        jbrowse_menu_url_org2 = "https://{hostname}/sp/{genus_sp}/feature/{Genus}/{species}/mRNA/{id}".format(hostname=config["hostname"], genus_sp=org2_genus_species, Genus=org2_genus[0].upper() + org2_genus[1:], species=org2_species, id="{id}")
+                    if constants.CONF_JBROWSE_MENU_URL not in config.keys():
+                        # default
+                        root_url = "https://{0}".format(config[constants.CONF_ALL_HOSTNAME])
                     else:
-                        jbrowse_menu_url_org1 = config["jbrowse_menu_url"] + "/sp/{genus_sp}/feature/{Genus}/{species}/mRNA/{id}".format(genus_sp=org1_genus_species, Genus=org1_genus[0].upper() + org1_genus[1:], species=org1_species, id="{id}")
-                        jbrowse_menu_url_org2 = config["jbrowse_menu_url"] + "/sp/{genus_sp}/feature/{Genus}/{species}/mRNA/{id}".format(genus_sp=org2_genus_species, Genus=org2_genus[0].upper() + org2_genus[1:], species=org2_species, id="{id}")
-
-                    # show_tool_add_organism = instance.tools.show_tool(tool_id="toolshed.g2.bx.psu.edu/repos/gga/chado_organism_add_organism/organism_add_organism/2.3.4+galaxy0", io_details=True)
-                    # print(show_tool_add_organism)
-                    # show_jbrowse_tool = instance.tools.show_tool(tool_id="toolshed.g2.bx.psu.edu/repos/iuc/jbrowse/jbrowse/1.16.11+galaxy0", io_details=True)
-                    # print(show_jbrowse_tool)
-                    # show_jbrowse_container_tool = instance.tools.show_tool(tool_id="toolshed.g2.bx.psu.edu/repos/gga/jbrowse_to_container/jbrowse_to_container/0.5.1", io_details=True)
-                    # print(show_jbrowse_container_tool)
+                        root_url = config[constants.CONF_JBROWSE_MENU_URL]
+                    species_strain_sex_org1 = sp_workflow_param_org1.chado_species_name.replace(" ", "-")
+                    species_strain_sex_org2 = sp_workflow_param_org2.chado_species_name.replace(" ", "-")
+                    jbrowse_menu_url_org1 = "{root_url}/sp/{genus_sp}/feature/{Genus}/{species_strain_sex}/mRNA/{id}".format(
+                        root_url=root_url,
+                        genus_sp=sp_workflow_param_org1.genus_species,
+                        Genus=sp_workflow_param_org1.genus_uppercase,
+                        species_strain_sex=species_strain_sex_org1,
+                        id="{id}")
+                    jbrowse_menu_url_org2 = "{root_url}/sp/{genus_sp}/feature/{Genus}/{species_strain_sex}/mRNA/{id}".format(
+                        root_url=root_url,
+                        genus_sp=sp_workflow_param_org2.genus_species,
+                        Genus=sp_workflow_param_org2.genus_uppercase,
+                        species_strain_sex=species_strain_sex_org2,
+                        id="{id}")
 
                     # Replace values in the workflow dictionary
-                    workflow_dict["steps"]["7"]["tool_state"] = workflow_dict["steps"]["7"]["tool_state"].replace("__MENU_URL_ORG1__", jbrowse_menu_url_org1)
-                    workflow_dict["steps"]["8"]["tool_state"] = workflow_dict["steps"]["8"]["tool_state"].replace("__MENU_URL_ORG2__", jbrowse_menu_url_org2)
+                    jbrowse_tool_state_org1 = workflow_dict["steps"][phaeo_constants.WF_LOAD_GFF_JB_2_ORG_STEP_JBROWSE_ORG1]["tool_state"]
+                    jbrowse_tool_state_org1 = jbrowse_tool_state_org1.replace("__MENU_URL_ORG1__", jbrowse_menu_url_org1)
+                    jbrowse_tool_state_org2 = workflow_dict["steps"][phaeo_constants.WF_LOAD_GFF_JB_2_ORG_STEP_JRBOWSE_ORG2]["tool_state"]
+                    jbrowse_tool_state_org2 = jbrowse_tool_state_org2.replace("__MENU_URL_ORG2__", jbrowse_menu_url_org2)
                     # The UNIQUE_ID is specific to a combination genus_species_strain_sex so every combination should have its unique workflow
                     # in galaxy --> define a naming method for these workflows
-                    workflow_dict["steps"]["10"]["tool_state"] = workflow_dict["steps"]["10"]["tool_state"].replace("__DISPLAY_NAME_ORG1__", org1_full_name).replace("__UNIQUE_ID_ORG1__", org1_species_folder_name)
-                    workflow_dict["steps"]["10"]["tool_state"] = workflow_dict["steps"]["10"]["tool_state"].replace("__DISPLAY_NAME_ORG2__", org2_full_name).replace("__UNIQUE_ID_ORG2__", org2_species_folder_name)
+                    jb_to_container_tool_state = workflow_dict["steps"][phaeo_constants.WF_LOAD_GFF_JB_2_ORG_STEP_JB_TO_CONTAINER]["tool_state"]
+                    jb_to_container_tool_state = jb_to_container_tool_state\
+                        .replace("__DISPLAY_NAME_ORG1__", sp_workflow_param_org1.full_name)\
+                        .replace("__UNIQUE_ID_ORG1__", sp_workflow_param_org1.species_folder_name)\
+                        .replace("__DISPLAY_NAME_ORG2__", sp_workflow_param_org2.full_name)\
+                        .replace("__UNIQUE_ID_ORG2__", sp_workflow_param_org2.species_folder_name)
 
                     # Import the workflow in galaxy as a dict
-                    instance.workflows.import_workflow_dict(workflow_dict=workflow_dict)
+                    sp_workflow_param_org1.instance.workflows.import_workflow_dict(workflow_dict=workflow_dict)
 
                     # Get its attributes
-                    workflow_attributes = instance.workflows.get_workflows(name=workflow_name)
+                    workflow_attributes = sp_workflow_param_org1.instance.workflows.get_workflows(name=workflow_name)
                     # Then get its ID (required to invoke the workflow)
                     workflow_id = workflow_attributes[0]["id"]  # Index 0 is the most recently imported workflow (the one we want)
-                    show_workflow = instance.workflows.show_workflow(workflow_id=workflow_id)
+                    logging.debug("Workflow ID: %s" % workflow_id)
                     # Check if the workflow is found
                     try:
-                        logging.debug("Workflow ID: %s" % workflow_id)
+                        show_workflow = sp_workflow_param_org1.instance.workflows.show_workflow(workflow_id=workflow_id)
                     except bioblend.ConnectionError:
                         logging.warning("Error finding workflow %s" % workflow_name)
 
                     # Finally, invoke the workflow alogn with its datamap, parameters and the history in which to invoke it
-                    instance.workflows.invoke_workflow(workflow_id=workflow_id, history_id=history_id, params=workflow_parameters, inputs=datamap, allow_tool_state_corrections=True)
+                    sp_workflow_param_org1.instance.workflows.invoke_workflow(
+                        workflow_id=workflow_id,
+                        history_id=sp_workflow_param_org1.history_id,
+                        params=workflow_parameters,
+                        inputs=datamap,
+                        allow_tool_state_corrections=True)
 
-                    logging.info("Successfully imported and invoked workflow {0}, check the galaxy instance ({1}) for the jobs state".format(workflow_name, instance_url))
+                    logging.info("Successfully imported and invoked workflow {0}, check the galaxy instance ({1}) for the jobs state".format(workflow_name, sp_workflow_param_org1.instance_url))
 
-    if workflow_type == "blast":
+    if workflow_type == phaeo_constants.WORKFLOW_BLAST:
         for sp_dict in sp_dict_list:
 
             # Add and retrieve all analyses/organisms for the current input species and add their IDs to the input dictionary
-            sp_workflow_param = get_sp_workflow_param(sp_dict, main_dir=args.main_directory, config=config, workfow_type="blast")
+            sp_workflow_param = get_sp_workflow_param(sp_dict, main_dir=args.main_directory, config=config, workflow_type=phaeo_constants.WORKFLOW_BLAST)
 
             current_sp_genus_species = list(sp_workflow_param.keys())[0]
             current_sp_genus_species_dict = list(sp_workflow_param.values())[0]
@@ -1223,7 +1243,7 @@ if __name__ == "__main__":
             # Check if the versions of tools specified in the workflow are installed in galaxy
             install_changesets_revisions_from_workflow(workflow_path=workflow_path, instance=instance)
 
-            organism_key_name = list(strains.keys())
+            organisms_key_name = list(strains.keys())
             org_dict = strains[organisms_key_name[0]]
 
             history_id = org_dict["history_id"]
@@ -1377,7 +1397,7 @@ if __name__ == "__main__":
                 "org2_genus": org2_genus,
                 "org2_species": org2_species,
                 "org2_genus_species": org2_genus_species,
-                "org2_species_folder_name": orgÃ©_species_folder_name,
+                "org2_species_folder_name": org_species_folder_name,
                 "org2_full_name": org2_full_name,
                 "org2_strain": org2_strain,
                 "org2_sex": org2_sex,
@@ -1398,7 +1418,7 @@ if __name__ == "__main__":
             BLASTP_FILE_ORG1 = "0"
             BLASTP_FILE_ORG2 = "1"
             LOAD_BLASTP_FILE_ORG1 = "2"
-            LOAD_BLASTP_FILE_ORG1 = "3"
+            LOAD_BLASTP_FILE_ORG2 = "3"
             WF_LOAD_GFF_JB_2_ORG_STEP_POPULATE_VIEWS = "4"
             WF_LOAD_GFF_JB_2_ORG_STEP_INDEX = "5"
 
@@ -1647,7 +1667,7 @@ if __name__ == "__main__":
                 "org2_genus": org2_genus,
                 "org2_species": org2_species,
                 "org2_genus_species": org2_genus_species,
-                "org2_species_folder_name": orgÃ©_species_folder_name,
+                "org2_species_folder_name": org_species_folder_name,
                 "org2_full_name": org2_full_name,
                 "org2_strain": org2_strain,
                 "org2_sex": org2_sex,
diff --git a/speciesData.py b/speciesData.py
index cae48c4..1e2a9f3 100755
--- a/speciesData.py
+++ b/speciesData.py
@@ -9,6 +9,27 @@ import constants
 
 from _datetime import datetime
 
+def clean_string(string):
+    if not string is None and string != "":
+        cleaned_string = string \
+            .replace(" ", "_") \
+            .replace("-", "_") \
+            .replace("(", "") \
+            .replace(")", "") \
+            .replace("'", "").strip()
+        return cleaned_string
+    else:
+        return string
+
+def set_service_attribute(service, parameters_dictionary):
+    parameters_dictionary_services = parameters_dictionary[constants.ORG_PARAM_SERVICES]
+    service_value = "0"
+    if (service in parameters_dictionary_services.keys()
+            and parameters_dictionary_services[service] is not None
+            and parameters_dictionary_services[service] != ""):
+        service_value = parameters_dictionary_services[service]
+    return service_value
+
 class SpeciesData:
     """
     This class contains attributes and functions to interact with the galaxy container of the GGA environment
@@ -40,38 +61,17 @@ class SpeciesData:
             sys.exit(0)
         return 1
 
-    def clean_string(self, string):
-        if not string is None and string != "":
-            clean_string = string\
-                .replace(" ", "_")\
-                .replace("-", "_")\
-                .replace("(", "")\
-                .replace(")", "")\
-                .replace("'", "").strip()
-            return clean_string
-        else:
-            return string
-
-    def set_service_attribute(self, service, parameters_dictionary):
-        parameters_dictionary_services = parameters_dictionary[constants.ORG_PARAM_SERVICES]
-        service_value = "0"
-        if(service in parameters_dictionary_services.keys()
-                and parameters_dictionary_services[service] is not None
-                and parameters_dictionary_services[service] != ""):
-            service_value = parameters_dictionary_services[service]
-        return service_value
-
     def __init__(self, parameters_dictionary):
         self.parameters_dictionary = parameters_dictionary
         self.name = parameters_dictionary[constants.ORG_PARAM_NAME]
         parameters_dictionary_description=parameters_dictionary[constants.ORG_PARAM_DESC]
         parameters_dictionary_data = parameters_dictionary[constants.ORG_PARAM_DATA]
 
-        self.species = self.clean_string(parameters_dictionary_description[constants.ORG_PARAM_DESC_SPECIES])
-        self.genus = self.clean_string(parameters_dictionary_description[constants.ORG_PARAM_DESC_GENUS])
-        self.strain = self.clean_string(parameters_dictionary_description[constants.ORG_PARAM_DESC_STRAIN])
-        self.sex = self.clean_string(parameters_dictionary_description[constants.ORG_PARAM_DESC_SEX])
-        self.common_name = self.clean_string(parameters_dictionary_description[constants.ORG_PARAM_DESC_COMMON_NAME])
+        self.species = clean_string(parameters_dictionary_description[constants.ORG_PARAM_DESC_SPECIES])
+        self.genus = clean_string(parameters_dictionary_description[constants.ORG_PARAM_DESC_GENUS])
+        self.strain = clean_string(parameters_dictionary_description[constants.ORG_PARAM_DESC_STRAIN])
+        self.sex = clean_string(parameters_dictionary_description[constants.ORG_PARAM_DESC_SEX])
+        self.common_name = clean_string(parameters_dictionary_description[constants.ORG_PARAM_DESC_COMMON_NAME])
 
         self.date = datetime.today().strftime("%Y-%m-%d")
         self.origin = parameters_dictionary_description[constants.ORG_PARAM_DESC_ORIGIN]
@@ -96,8 +96,8 @@ class SpeciesData:
         self.blastx_path = parameters_dictionary_data[constants.ORG_PARAM_DATA_BLASTX_PATH]
         self.orthofinder_path = parameters_dictionary_data[constants.ORG_PARAM_DATA_ORTHOFINDER_PATH]
 
-        self.blast = self.set_service_attribute(constants.ORG_PARAM_SERVICES_BLAST, parameters_dictionary)
-        self.go = self.set_service_attribute(constants.ORG_PARAM_SERVICES_GO, parameters_dictionary)
+        self.blast = set_service_attribute(constants.ORG_PARAM_SERVICES_BLAST, parameters_dictionary)
+        self.go = set_service_attribute(constants.ORG_PARAM_SERVICES_GO, parameters_dictionary)
 
         self.genus_lowercase = self.genus.lower()
         self.species_lowercase = self.species.lower()
@@ -123,31 +123,20 @@ class SpeciesData:
 
         self.genome_filename = "{0}_v{1}.fasta".format(self.dataset_prefix, self.genome_version)
         self.gff_filename = "{0}_OGS{1}_{2}.gff".format(self.dataset_prefix, self.ogs_version, constants.DATA_DATE)
-        self.transcripts_filename = "{0}_OGS{1}_{2}".format(self.dataset_prefix, self.ogs_version, constants.TRANSCRIPTS_FILENAME_SUFFIX)
+        self.transcripts_filename = "{0}_OGS{1}_{2}".format(self.dataset_prefix, self.ogs_version, constants.FILENAME_SUFFIX_TRANSCRIPTS)
         self.proteins_filename = "{0}_OGS{1}_{2}".format(self.dataset_prefix, self.ogs_version, constants.FILENAME_SUFFIX_PROTEINS)
         self.interpro_filename = "{0}_OGS{1}_{2}".format(self.dataset_prefix, self.ogs_version, constants.FILENAME_SUFFIX_INTERPRO)
         self.blastp_filename = "{0}_OGS{1}_{2}".format(self.dataset_prefix, self.ogs_version, constants.FILENAME_SUFFIX_BLASTP)
         self.blastx_filename = "{0}_OGS{1}_{2}".format(self.dataset_prefix, self.ogs_version, constants.FILENAME_SUFFIX_BLASTX)
         self.orthofinder_filename = "{0}_OGS{1}_{2}".format(self.dataset_prefix, self.ogs_version, constants.FILENAME_SUFFIX_ORTHOFINDER)
 
-        # Bioblend/Chado IDs for an organism analyses/organisms/datasets/history/library
-        self.org_id = None
-        self.genome_analysis_id = None
-        self.ogs_analysis_id = None
-        self.instance_url = None
-        self.instance = None
-        self.history_id = None
-        self.library = None
-        self.library_id = None
-
         self.script_dir = os.path.dirname(os.path.realpath(sys.argv[0]))
         self.main_dir = None
         self.species_dir = None
-
-        self.tool_panel = None
-        self.source_files = dict()
-        self.workflow_name = None
-        self.metadata = dict()
-        self.api_key = None  # API key used to communicate with the galaxy instance. Cannot be used to do user-tied actions
-        self.datasets = dict()
         self.config = None  # Custom config used to set environment variables inside containers
+
+        # # Bioblend/Chado IDs for an organism analyses/organisms/datasets/history/library
+        self.instance_url = None
+        self.instance = None
+        self.history_id = None
+        self.library_id = None
diff --git a/utilities.py b/utilities.py
index d1bb089..14d8510 100755
--- a/utilities.py
+++ b/utilities.py
@@ -84,14 +84,16 @@ def no_empty_items(li):
             empty = False
     return empty
 
+def check_wf_param(full_name, params):
+
+    if not no_empty_items(params):
+        logging.critical(
+            "One empty workflow parameter found for organism {0}: {1})".format(full_name, params))
+        sys.exit()
+
 def check_galaxy_state(network_name, script_dir):
     """
     Read the logs of the galaxy container for the current species to check if the service is "ready"
-
-    :param genus_lowercase:
-    :param species:
-    :param script_dir:
-    :return:
     """
 
     # Run supervisorctl status in the galaxy container via serexec
@@ -207,10 +209,10 @@ def run_tool(instance, tool_id, history_id, tool_inputs):
 
     return output_dict
 
-def run_tool_and_download_single_output_dataset(instance, tool_id, history_id, tool_inputs, time_sleep):
+def run_tool_and_download_single_output_dataset(instance, tool_id, history_id, tool_inputs, time_sleep = 0):
 
     output_dict = run_tool(instance, tool_id, history_id, tool_inputs)
-    if (not time_sleep is None):
+    if not time_sleep is None:
         time.sleep(time_sleep)
     single_output_dataset_id = output_dict["outputs"][0]["id"]
     dataset = instance.datasets.download_dataset(dataset_id=single_output_dataset_id)
-- 
GitLab