From f5989e1f484a2a25c6001c2b9a11f29e58fc521a Mon Sep 17 00:00:00 2001
From: Loraine Gueguen <loraine.gueguen@sb-roscoff.fr>
Date: Fri, 14 May 2021 17:49:52 +0200
Subject: [PATCH] Move constants to file constants.py. Change config root to
 dict and update parse_config(). Change some config parameters name
 (galaxy_persist_data, tripal_banner_path, jbrowse_menu_url), update example
 config file and compose templates. Remove apollo config variables
 (webapollo_user, webapollo_password) in config file and in gspecies compose
 template, because apollo service not deployed here. Factorize parse_input()
 et parse_config(). Update description of scripts in main. Set default config
 file in scripts. Remove useless variable datasets_to_get in gga_get_data.py.
 Update README.md.

---
 README.md                         | 28 +++++-----
 constants.py                      | 53 +++++++++++++++++++
 examples/config.yml               | 47 ++++++++---------
 gga_get_data.py                   | 87 ++++++++++++++-----------------
 gga_init.py                       | 63 ++++++++++++----------
 gga_load_data.py                  | 46 ++++++++--------
 run_workflow_phaeoexplorer.py     | 25 ++++-----
 speciesData.py                    | 49 ++++++++---------
 templates/gspecies_compose.yml.j2 |  8 +--
 templates/traefik_compose.yml.j2  |  2 +-
 utilities.py                      | 68 +++++++++++-------------
 11 files changed, 256 insertions(+), 220 deletions(-)
 create mode 100644 constants.py

diff --git a/README.md b/README.md
index fd0b624..4601866 100755
--- a/README.md
+++ b/README.md
@@ -1,7 +1,7 @@
 # gga_load_data tools
 
 The gga_load_data tools allow automated deployment of GMOD visualisation tools (Chado, Tripal, JBrowse, Galaxy) for a bunch of genomes and datasets. 
-They are based on the Galaxy Genome Annotation (GGA) project (https://galaxy-genome-annotation.github.io). 
+They are based on the [Galaxy Genome Annotation (GGA) project](https://galaxy-genome-annotation.github.io). 
 
 A stack of Docker services is deployed for each organism, from an input yaml file describing the data.
 See `examples/example.yml` for an example of what information can be described and the correct formatting of this input file.
@@ -33,18 +33,17 @@ Note that Authelia needs a secured connexion (no self-signed certificate) betwee
 The "gga_load_data" tools are composed of 4 scripts:
 
 - gga_init: Create directory tree for organisms and deploy stacks for the input organisms as well as Traefik and optionally Authelia stacks
-- gga_get_data: Create `src_data` directory tree for organisms and copy datasets for the input organisms into the organisms directory tree
+- gga_get_data: Create `src_data` directory tree for organisms and copy datasets for the input organisms into `src_data`
 - gga_load_data: Load the datasets of the input organisms into their Galaxy library
-- run_workflow_phaeoexplorer: Remotely run a custom workflow in Galaxy, proposed as an "example script" to take inspiration from as workflow parameters are specific to Phaeoexplorer data
+- run_workflow_phaeoexplorer: Remotely run a custom workflow in Galaxy, proposed as an "example script" to take inspiration from as workflow parameters are specific to the [Phaeoexplorer](https://phaeoexplorer.sb-roscoff.fr) data
 
 ## Usage:
 
 For all scripts one input file is required, that describes the species and their associated data.
-(see `examples/example.yml`). Every dataset path in this file must be an absolute path.
+(see `examples/citrus_sinensis.yml`). Every dataset path in this file must be an absolute path.
 
 Another yaml file is required, the config file, with configuration variables (Galaxy and Tripal passwords, etc..) that
-the scripts need to create the different services and to access the Galaxy container. By default, the config file
-inside the repository root will be used if none is precised in the command line. An example of this config file is available
+the scripts need to create the different services and to access the Galaxy container. An example of this config file is available
 in the `examples` folder.
 
 **The input file and config file have to be the same for all scripts!**
@@ -52,7 +51,7 @@ in the `examples` folder.
 - Deploy stacks part: 
 
 ```bash
-$ python3 /path/to/repo/gga_init.py input_file.yml -c/--config config_file [-v/--verbose] [OPTIONS]
+$ python3 /path/to/repo/gga_init.py input_file.yml -c/--config config_file.yml [-v/--verbose] [OPTIONS]
 		--main-directory $PATH (Path where to create/update stacks; default=current directory)
 		--force-traefik (If specified, will overwrite traefik and authelia files; default=False)
 ```
@@ -67,28 +66,27 @@ $ python3 /path/to/repo/gga_get_data.py input_file.yml [-v/--verbose] [OPTIONS]
 - Load data in Galaxy library and prepare Galaxy instance: 
 
 ```bash
-$ python3 /path/to/repo/gga_load_data.py input_file.yml -c/--config config_file [-v/--verbose]
+$ python3 /path/to/repo/gga_load_data.py input_file.yml -c/--config config_file.yml [-v/--verbose]
 		--main-directory $PATH (Path where to access stacks; default=current directory)
 ```
 
 - Run a workflow in galaxy: 
  
 ```bash
-$ python3 /path/to/repo/gga_load_data.py input_file.yml -c/--config config_file --workflow /path/to/workflow.ga [-v/--verbose] [OPTIONS]
-		--workflow $WORKFLOW (Path to the workflow to run in galaxy. A couple of preset workflows are available in the "workflows" folder of the repository)
+$ python3 /path/to/repo/run_workflow_phaeoexplorer.py input_file.yml -c/--config config_file.yml --workflow /path/to/workflow.ga [-v/--verbose] [OPTIONS]
+		--workflow $WORKFLOW (Path to the workflow to run in galaxy. A couple of preset workflows are available in the "workflows_phaeoexplorer" folder)
 		--main-directory $PATH (Path where to access stacks; default=current directory)
 ```
 
-## Limitations
-
-The stacks deployment and the data loading into Galaxy should be run separately and only once the Galaxy service is ready.
-The `gga_load_data.py` script check that the Galaxy service is ready before loading the data and exit with a notification if it is not.
+The data loading into Galaxy with `gga_load_data.py` should be run only once the Galaxy service deployed with `gga_init.py` is ready.
+The `gga_load_data.py` script checks that the Galaxy service is ready before loading the data and exit with a notification if it is not.
 
 The status of the Galaxy service can be checked manually with `$ docker service logs -f genus_species_galaxy` or 
 `./serexec genus_species_galaxy supervisorctl status`.
 
+**Note**:
 When deploying the stack of services, the Galaxy service can take a long time to be ready, because of the data persistence. 
-In development mode only, this can be disabled by setting the variable `persist_galaxy_data` to `False` in the config file.
+In development mode only, this can be disabled by setting the variable `galaxy_persist_data` to `False` in the config file.
 
 ## Directory tree:
 
diff --git a/constants.py b/constants.py
new file mode 100644
index 0000000..9fa3876
--- /dev/null
+++ b/constants.py
@@ -0,0 +1,53 @@
+# Constants used in the input yaml
+ORG_PARAM_NAME = "name"
+ORG_PARAM_DESC = "description"
+ORG_PARAM_DESC_GENUS = "genus"
+ORG_PARAM_DESC_SPECIES = "species"
+ORG_PARAM_DESC_SEX = "sex"
+ORG_PARAM_DESC_STRAIN = "strain"
+ORG_PARAM_DESC_COMMON_NAME = "common_name"
+ORG_PARAM_DESC_ORIGIN = "origin"
+ORG_PARAM_DESC_MAIN_SPECIES = "main_species"
+ORG_PARAM_DATA = "data"
+ORG_PARAM_DATA_GENOME_PATH = "genome_path"
+ORG_PARAM_DATA_TRANSCRIPTS_PATH = "transcripts_path"
+ORG_PARAM_DATA_PROTEINS_PATH = "proteins_path"
+ORG_PARAM_DATA_GFF_PATH = "gff_path"
+ORG_PARAM_DATA_INTERPRO_PATH = "interpro_path"
+ORG_PARAM_DATA_ORTHOFINDER_PATH = "orthofinder_path"
+ORG_PARAM_DATA_BLASTP_PATH = "blastp_path"
+ORG_PARAM_DATA_BLASTX_PATH = "blastx_path"
+ORG_PARAM_DATA_GENOME_VERSION = "genome_version"
+ORG_PARAM_DATA_OGS_VERSION = "ogs_version"
+ORG_PARAM_DATA_PERFORMED_BY = "performed_by"
+ORG_PARAM_SERVICES = "services"
+ORG_PARAM_SERVICES_BLAST = "blast"
+
+# Constants used in the config yaml file
+CONF_ALL_HOSTNAME = "hostname"
+CONF_ALL_HTTP_PORT = "http_port"
+CONF_ALL_HTTPS_PORT = "https_port"
+CONF_ALL_PROXY_IP = "proxy_ip"
+CONF_ALL_AUTH_DOMAIN_NAME = "authentication_domain_name"
+CONF_ALL_AUTHELIA_CONFIG_PATH = "authelia_config_path"
+CONF_ALL_AUTHELIA_SECRETS_ENV_PATH = "authelia_secrets_env_path"
+CONF_ALL_AUTHELIA_DB_POSTGRES_PASSWORD = "authelia_db_postgres_password"
+CONF_GALAXY_DEFAULT_ADMIN_EMAIL = "galaxy_default_admin_email"
+CONF_GALAXY_DEFAULT_ADMIN_USER = "galaxy_defaut_admin_user"
+CONF_GALAXY_DEFAULT_ADMIN_PASSWORD = "galaxy_default_admin_password"
+CONF_GALAXY_CONFIG_REMOTE_USER_MAILDOMAIN = "galaxy_config_remote_user_maildomain"
+CONF_GALAXY_PERSIST_DATA = "galaxy_persist_data"
+CONF_TRIPAL_PASSWORD = "tripal_password"
+CONF_TRIPAL_BANNER_PATH = "tripal_banner_path"
+CONF_TRIPAL_THEME_NAME = "tripal_theme_name"
+CONF_TRIPAL_THEME_GIT_CLONE = "tripal_theme_git_clone"
+CONF_JBROWSE_MENU_URL = "jbrowse_menu_url"
+
+# default config file
+DEFAULT_CONFIG = "examples/config"
+
+GET_ORGANISMS_TOOL = "toolshed.g2.bx.psu.edu/repos/gga/chado_organism_get_organisms/organism_get_organisms/2.3.4+galaxy0"
+DELETE_ORGANISMS_TOOL = "toolshed.g2.bx.psu.edu/repos/gga/chado_organism_delete_organisms/organism_delete_organisms/2.3.4+galaxy0"
+
+HOST_DATA_DIR='src_data'
+CONTAINER_DATA_DIR_ROOT='/project_data'
diff --git a/examples/config.yml b/examples/config.yml
index b34b323..c1d5d28 100644
--- a/examples/config.yml
+++ b/examples/config.yml
@@ -1,30 +1,27 @@
 # This is the configuration template file used by the gga_init.py, gga_load_data.py and run_workflow.py scripts
 
-# "all" section contains variables used by several services at once or the paths to import sensitive files
-all:
-      hostname: localhost  # Required. The hosting machine name
-      http_port: 8888  # Required. The HTTP port docker will use on the hosting machine
-      https_port: 8889  # Required for Authelia. The HTTPS port docker will use on the hosting machine
-      proxy_ip: XXX.XXX.XXX.XXX  # Required. IP of the upstream proxy (used by Traefik)
-      authentication_domain_name: XXXXXXXX  #  Required for Authelia. The authentication domain name.
-      authelia_config_path: /path/to/authelia_config.yml #  Required for Authelia. Path to the Authelia configuration file
-      authelia_secrets_env_path: /path/to/authelia/secrets.env #  Required for Authelia. Path to the env file containing passwords and secrets needed for Authelia
-      authelia-db_postgres_password: XXXXXXXX #  Required for Authelia.
+# These variables are used by several services at once or the paths to import sensitive files
+hostname: localhost  # Required. The hosting machine name
+http_port: 8888  # Required. The HTTP port docker will use on the hosting machine
+https_port: 8889  # Required for Authelia. The HTTPS port docker will use on the hosting machine
+proxy_ip: XXX.XXX.XXX.XXX  # Required. IP of the upstream proxy (used by Traefik)
+authentication_domain_name: XXXXXXXX  #  Required for Authelia. The authentication domain name.
+authelia_config_path: /path/to/authelia_config.yml #  Required for Authelia. Path to the Authelia configuration file
+authelia_secrets_env_path: /path/to/authelia/secrets.env #  Required for Authelia. Path to the env file containing passwords and secrets needed for Authelia
+authelia-db_postgres_password: XXXXXXXX #  Required for Authelia.
+
 # galaxy-specific variables
-galaxy:
-      galaxy_default_admin_email: gga@galaxy.org  # Required
-      galaxy_defaut_admin_user: gga  # Required
-      galaxy_default_admin_password: password  # Required
-      webapollo_user: admin_apollo@galaxy.org  # Required
-      webapollo_password: apollopass  # Required
-      galaxy_config_remote_user_maildomain: mydomain.com  # Required. The maildomain used by Galaxy authentication
-      persist_galaxy_data: "True"  # Optional (default: True). If False, docker data will NOT be persisted on your host's file system and will be lost any time the galaxy container is recreated. Do not set this variable to "False" for production
+galaxy_default_admin_email: gga@galaxy.org  # Required
+galaxy_defaut_admin_user: gga  # Required
+galaxy_default_admin_password: password  # Required
+galaxy_config_remote_user_maildomain: mydomain.com  # Required. The maildomain used by Galaxy authentication
+galaxy_persist_data: "True"  # Optional (default: True). If False, docker data will NOT be persisted on your host's file system and will be lost any time the galaxy container is recreated. Do not set this variable to "False" for production
+
 # tripal-specific variables
-tripal:
-      tripal_password: tripalpass  # Required. Tripal database password (also used by galaxy as an environment variable)
-      banner_path: /path/to/banner.png  # Optional. Use this to change the top banner in Tripal
-      tripal_theme_name: tripal_gga   # Optional. Use this to use another theme
-      tripal_theme_git_clone: http://gitlab.sb-roscoff.fr/abims/e-infra/tripal_gga.git  # Optional. Use this to install another theme.
+tripal_password: tripalpass  # Required. Tripal database password (also used by galaxy as an environment variable)
+tripal_banner_path: /path/to/banner.png  # Optional. Use this to change the top banner in Tripal
+tripal_theme_name: tripal_gga   # Optional. Use this to use another theme
+tripal_theme_git_clone: http://gitlab.sb-roscoff.fr/abims/e-infra/tripal_gga.git  # Optional. Use this to install another theme.
+
 # jbrowse-specific variables
-jbrowse:
-      menu_url: "http://localhost:8888/" # Optional. Used with run_workflow_phaeoexplorer.py: if present, this variable is used to define JBrowse menu_url (to define the template url for the JBrowse feature's link to Tripal), if absent, will use default "https://hostname"
\ No newline at end of file
+jbrowse_menu_url: "http://localhost:8888/" # Optional. Used with run_workflow_phaeoexplorer.py: if present, this variable is used to define JBrowse menu_url (to define the template url for the JBrowse feature's link to Tripal), if absent, will use default "https://hostname"
\ No newline at end of file
diff --git a/gga_get_data.py b/gga_get_data.py
index 1c14ed3..9950a86 100755
--- a/gga_get_data.py
+++ b/gga_get_data.py
@@ -10,6 +10,7 @@ import shutil
 
 import utilities
 import speciesData
+import constants
 
 """ 
 gga_get_data.py
@@ -87,55 +88,51 @@ class GetData(speciesData.SpeciesData):
         organism_annotation_dir = os.path.abspath("./src_data/annotation/{0}/OGS{1}".format(self.species_folder_name, self.ogs_version))
         organism_genome_dir = os.path.abspath("./src_data/genome/{0}/v{1}".format(self.species_folder_name, self.genome_version))
 
-        datasets_to_get = {"genome_path": self.genome_path,
-                           "gff_path": self.gff_path, 
-                           "transcripts_path": self.transcripts_path,
-                           "proteins_path": self.proteins_path,
-                           "interpro_path": self.interpro_path, 
-                           "orthofinder_path": self.orthofinder_path, 
-                           "blastp_path": self.blastp_path, 
-                           "blastx_path": self.blastx_path}
-
-        genome_datasets = ["genome_path"]
-        annotation_datasets = ["gff_path", "transcripts_path", "proteins_path", "orthofinder_path", "interpro_path", "blastp_path", "blastx_path"]  
+        genome_datasets = [constants.ORG_PARAM_DATA_GENOME_PATH]
+        annotation_datasets = [constants.ORG_PARAM_DATA_GFF_PATH,
+                               constants.ORG_PARAM_DATA_TRANSCRIPTS_PATH,
+                               constants.ORG_PARAM_DATA_PROTEINS_PATH,
+                               constants.ORG_PARAM_DATA_INTERPRO_PATH,
+                               constants.ORG_PARAM_DATA_ORTHOFINDER_PATH,
+                               constants.ORG_PARAM_DATA_BLASTP_PATH,
+                               constants.ORG_PARAM_DATA_BLASTX_PATH]
         # Where to store blast results?
 
         # search_excluded_datasets = ["interpro_path", "orthofinder_path", "blastp_path", "blastx_path"]  
         # These datasets will not be searched if missing in the input file
 
         # Copy datasets in the organism src_data dir tree correct folder
-        for k, v in datasets_to_get.items():
+        for k, v in genome_datasets.items():
             if v:  # If dataset is not present in input file, skip copy
-                if k in genome_datasets:
-                    logging.info("Copying {0} ({1}) into {2}".format(k, v, organism_genome_dir))
-                    genome_fname = "{0}_v{1}.fasta".format(self.dataset_prefix, self.genome_version)
-                    try:
-                        shutil.copyfile(os.path.abspath(v), os.path.join(organism_genome_dir, genome_fname))
-                    except Exception as exc:
-                        logging.warning("Could not copy {0} ({1}) - Exit Code: {2})".format(k, v, exc))
-                elif k in annotation_datasets:
-                    dataset_fname = ""
-                    if k == "gff_path":
-                        dataset_fname = "{0}_OGS{1}_{2}.gff".format(self.dataset_prefix, self.ogs_version, self.get_last_modified_time_string(os.path.abspath(v)))
-                    elif k == "transcripts_path":
-                        dataset_fname = "{0}_OGS{1}_transcripts.fasta".format(self.dataset_prefix, self.ogs_version)
-                    elif k == "proteins_path":
-                        dataset_fname = "{0}_OGS{1}_proteins.fasta".format(self.dataset_prefix, self.ogs_version)
-                    elif k == "orthofinder_path":
-                        dataset_fname = "{0}_OGS{1}_orthofinder.tsv".format(self.dataset_prefix, self.ogs_version)
-                    elif k == "interpro_path":
-                        dataset_fname = "{0}_OGS{1}_interproscan.xml".format(self.dataset_prefix, self.ogs_version)
-                    elif k == "blastp_path":
-                        dataset_fname = "{0}_OGS{1}_blastp.xml".format(self.dataset_prefix, self.ogs_version)
-                    elif k == "blastx_path":
-                        dataset_fname = "{0}_OGS{1}_blastx.xml".format(self.dataset_prefix, self.ogs_version)
-                    logging.info("Copying {0} ({1}) into {2}".format(k, v, organism_annotation_dir))
-                    try:
-                        shutil.copyfile(os.path.abspath(v), os.path.join(organism_annotation_dir, dataset_fname))
-                    except Exception as exc:
-                        logging.warning("Could not copy {0} ({1}) - Exit Code: {2}".format(k, v, exc))
-                else:
-                    pass
+                logging.info("Copying {0} ({1}) into {2}".format(k, v, organism_genome_dir))
+                genome_fname = "{0}_v{1}.fasta".format(self.dataset_prefix, self.genome_version)
+                try:
+                    shutil.copyfile(os.path.abspath(v), os.path.join(organism_genome_dir, genome_fname))
+                except Exception as exc:
+                    logging.warning("Could not copy {0} ({1}) - Exit Code: {2})".format(k, v, exc))
+
+        for k, v in annotation_datasets.items():
+            if v:  # If dataset is not present in input file, skip copy
+                dataset_fname = ""
+                if k == constants.ORG_PARAM_DATA_GFF_PATH:
+                    dataset_fname = "{0}_OGS{1}_{2}.gff".format(self.dataset_prefix, self.ogs_version, self.get_last_modified_time_string(os.path.abspath(v)))
+                elif k == constants.ORG_PARAM_DATA_TRANSCRIPTS_PATH:
+                    dataset_fname = "{0}_OGS{1}_transcripts.fasta".format(self.dataset_prefix, self.ogs_version)
+                elif k == constants.ORG_PARAM_DATA_PROTEINS_PATH:
+                    dataset_fname = "{0}_OGS{1}_proteins.fasta".format(self.dataset_prefix, self.ogs_version)
+                elif k == constants.ORG_PARAM_DATA_ORTHOFINDER_PATH:
+                    dataset_fname = "{0}_OGS{1}_orthofinder.tsv".format(self.dataset_prefix, self.ogs_version)
+                elif k == constants.ORG_PARAM_DATA_INTERPRO_PATH:
+                    dataset_fname = "{0}_OGS{1}_interproscan.xml".format(self.dataset_prefix, self.ogs_version)
+                elif k == constants.ORG_PARAM_DATA_BLASTP_PATH:
+                    dataset_fname = "{0}_OGS{1}_blastp.xml".format(self.dataset_prefix, self.ogs_version)
+                elif k == constants.ORG_PARAM_DATA_BLASTX_PATH:
+                    dataset_fname = "{0}_OGS{1}_blastx.xml".format(self.dataset_prefix, self.ogs_version)
+                logging.info("Copying {0} ({1}) into {2}".format(k, v, organism_annotation_dir))
+                try:
+                    shutil.copyfile(os.path.abspath(v), os.path.join(organism_annotation_dir, dataset_fname))
+                except Exception as exc:
+                    logging.warning("Could not copy {0} ({1}) - Exit Code: {2}".format(k, v, exc))
 
         os.chdir(self.main_dir)
 
@@ -162,10 +159,7 @@ def make_dirs(dir_paths_li):
     return created_dir_paths_li
     
 if __name__ == "__main__":
-    parser = argparse.ArgumentParser(description="Automatic data loading in containers and interaction "
-                                                 "with galaxy instances for GGA"
-                                                 ", following the protocol @ "
-                                                 "http://gitlab.sb-roscoff.fr/abims/e-infra/gga")
+    parser = argparse.ArgumentParser(description="Create 'src_data' tree and add data files")
 
     parser.add_argument("input",
                         type=str,
@@ -212,4 +206,3 @@ if __name__ == "__main__":
         logging.info("Finding and copying datasets for %s" % get_data_for_current_species.full_name)
         get_data_for_current_species.get_source_data_files_from_path()
         logging.info("Sucessfully copied datasets for %s" % get_data_for_current_species.full_name)
-
diff --git a/gga_init.py b/gga_init.py
index 8ec255f..42215e6 100755
--- a/gga_init.py
+++ b/gga_init.py
@@ -9,12 +9,13 @@ import logging
 import sys
 import yaml
 import shutil
-
 from pathlib import Path
 from jinja2 import Environment, FileSystemLoader
 
 import utilities
 import speciesData
+import constants
+
 
 """ 
 gga_init.py
@@ -60,18 +61,18 @@ class DeploySpeciesStack(speciesData.SpeciesData):
         # Copy the custom banner to the species dir (banner used in tripal pages)
         # If the path specified is invalid (because it's empty or is still the default demo one),
         # use the default banner instead
-        if "banner_path" in self.config.keys():
-            if not config["banner_path"] == "" and os.path.isfile(os.path.abspath(config["banner_path"])):
+        if constants.CONF_TRIPAL_BANNER_PATH in self.config.keys():
+            if not config[constants.CONF_TRIPAL_BANNER_PATH] == "" and os.path.isfile(os.path.abspath(config[constants.CONF_TRIPAL_BANNER_PATH])):
                 banner_dest_path = os.path.join(self.species_dir, os.path.abspath("banner.png"))
-                if not os.path.isfile(banner_dest_path) or not os.path.samefile(os.path.abspath(config["banner_path"]),banner_dest_path):
-                    os.symlink(os.path.abspath(self.config["banner_path"]), banner_dest_path)
-                    logging.info("Custom banner added: symlink from %s" % self.config["banner_path"])
+                if not os.path.isfile(banner_dest_path) or not os.path.samefile(os.path.abspath(config[constants.CONF_TRIPAL_BANNER_PATH]),banner_dest_path):
+                    os.symlink(os.path.abspath(self.config[constants.CONF_TRIPAL_BANNER_PATH]), banner_dest_path)
+                    logging.info("Custom banner added: symlink from %s" % self.config[constants.CONF_TRIPAL_BANNER_PATH])
             else:
-                logging.debug("Using default banner for Tripal pages")
-                self.config.pop("banner_path", None)
+                logging.debug("Using default banner for Tripal pages because %s is not valid in 'config' file" % constants.CONF_TRIPAL_BANNER_PATH)
+                self.config.pop(constants.CONF_TRIPAL_BANNER_PATH, None)
         else:
             logging.debug("Using default banner for Tripal pages")
-            self.config.pop("banner_path", None)
+            self.config.pop(constants.CONF_TRIPAL_BANNER_PATH, None)
 
         # Create nginx dirs and write/re-write nginx conf
         make_dirs(dir_paths_li=["./nginx", "./nginx/conf"])
@@ -206,22 +207,26 @@ def make_traefik_compose_files(config, main_dir):
                 traefik_compose_file.truncate(0)
                 traefik_compose_file.write(traefik_compose_output)
 
-        if "authelia_config_path" in config.keys():
-            if not config["authelia_config_path"] == "" and os.path.isfile(os.path.abspath(config["authelia_config_path"])):
-                try:
-                    shutil.copy(os.path.abspath(config["authelia_config_path"]), "./traefik/authelia/configuration.yml")
-                except Exception as exc:
-                    logging.critical("Could not copy authelia configuration file")
-                    sys.exit(exc)
-            else:
-                logging.critical("Invalid authelia configuration path (%s)" % config["authelia_config_path"])
-                sys.exit()
-
-        # Path to the authelia users in the repo
-        authelia_users_path = script_dir + "/files/authelia_users.yml"
-        # Copy authelia "users" file
-        if not os.path.isfile("./traefik/authelia/users.yml"):
-            shutil.copy(authelia_users_path, "./traefik/authelia/users.yml")
+        if constants.CONF_ALL_HTTPS_PORT in config.keys():
+            logging.info("HTTPS mode (with Authelia)")
+            if constants.CONF_ALL_AUTHELIA_CONFIG_PATH in config.keys():
+                if not config[constants.CONF_ALL_AUTHELIA_CONFIG_PATH] == "" and os.path.isfile(os.path.abspath(config[constants.CONF_ALL_AUTHELIA_CONFIG_PATH])):
+                    try:
+                        shutil.copy(os.path.abspath(config[constants.CONF_ALL_AUTHELIA_CONFIG_PATH]), "./traefik/authelia/configuration.yml")
+                    except Exception as exc:
+                        logging.critical("Could not copy authelia configuration file")
+                        sys.exit(exc)
+                else:
+                    logging.critical("Invalid authelia configuration path (%s)" % config[constants.CONF_ALL_AUTHELIA_CONFIG_PATH])
+                    sys.exit()
+
+            # Path to the authelia users in the repo
+            authelia_users_path = script_dir + "/files/authelia_users.yml"
+            # Copy authelia "users" file
+            if not os.path.isfile("./traefik/authelia/users.yml"):
+                shutil.copy(authelia_users_path, "./traefik/authelia/users.yml")
+        else:
+            logging.info("HTTP mode (without Authelia)")
 
         # Create the mounts for the traefik and authelia services
         traefik_dir = os.path.abspath(os.path.join(main_dir, "traefik"))
@@ -340,7 +345,7 @@ if __name__ == "__main__":
 
     parser.add_argument("--config",
                         type=str,
-                        help="Config path, default to the 'config' file inside the script repository")
+                        help="Config path, default to 'examples/config.yml'")
 
     parser.add_argument("--main-directory",
                         type=str,
@@ -357,7 +362,11 @@ if __name__ == "__main__":
     else:
         logging.basicConfig(level=logging.INFO)
 
-    config_file = os.path.abspath(args.config)
+    # Parsing the config file if provided, using the default config otherwise
+    if args.config:
+        config_file = os.path.abspath(args.config)
+    else:
+        config_file = os.path.join(os.path.dirname(os.path.realpath(sys.argv[0])), constants.DEFAULT_CONFIG)
     config = utilities.parse_config(config_file)
 
     main_dir = None
diff --git a/gga_load_data.py b/gga_load_data.py
index 0dc2c79..1bb7a36 100755
--- a/gga_load_data.py
+++ b/gga_load_data.py
@@ -11,12 +11,12 @@ import time
 import json
 import yaml
 import subprocess
-
 from bioblend import galaxy
 from bioblend.galaxy.objects import GalaxyInstance
 
 import utilities
 import speciesData
+import constants
 
 """ 
 gga_load_data.py
@@ -26,13 +26,6 @@ Usage: $ python3 gga_load_data.py -i input_example.yml --config config.yml [OPTI
 Do not call this script before the galaxy container is ready
 """
 
-# If this version if not found, Galaxy will use the one that is found
-GET_ORGANISMS_TOOL = "toolshed.g2.bx.psu.edu/repos/gga/chado_organism_get_organisms/organism_get_organisms/2.3.4+galaxy0"
-DELETE_ORGANISMS_TOOL = "toolshed.g2.bx.psu.edu/repos/gga/chado_organism_delete_organisms/organism_delete_organisms/2.3.4+galaxy0"
-
-HOST_DATA_DIR='src_data'
-CONTAINER_DATA_DIR_ROOT='/project_data'
-
 class LoadData(speciesData.SpeciesData):
     """
     Child of SpeciesData
@@ -42,6 +35,10 @@ class LoadData(speciesData.SpeciesData):
     Optional data file formatting
 
     """
+    def __init__(self, parameters_dictionary):
+        self.existing_folders_cache = {}
+        self.bam_metadata_cache = {}
+        super().__init__(self, parameters_dictionary)
 
     def get_history(self):
         """
@@ -73,7 +70,7 @@ class LoadData(speciesData.SpeciesData):
 
         logging.debug("Getting 'Homo sapiens' ID in chado database")
         get_sapiens_id_job_output_dataset_id = utilities.run_tool_and_get_single_output_dataset_id(self.instance,
-            tool_id=GET_ORGANISMS_TOOL,
+            tool_id=constants.GET_ORGANISMS_TOOL, # If this version if not found, Galaxy will use the one that is found
             history_id=self.history_id,
             tool_inputs={"genus": "Homo", "species": "sapiens"})
         get_sapiens_id_json_output = self.instance.datasets.download_dataset(dataset_id=get_sapiens_id_job_output_dataset_id)
@@ -83,7 +80,7 @@ class LoadData(speciesData.SpeciesData):
             get_sapiens_id_final_output = json.loads(get_sapiens_id_json_output)[0]
             sapiens_id = str(get_sapiens_id_final_output["organism_id"])  # needs to be str to be recognized by the chado tool
             utilities.run_tool(
-                tool_id=DELETE_ORGANISMS_TOOL,
+                tool_id=constants.DELETE_ORGANISMS_TOOL,
                 history_id=self.history_id,
                 tool_inputs={"organism": sapiens_id})
         except IndexError:
@@ -113,11 +110,11 @@ class LoadData(speciesData.SpeciesData):
         :return:
         """
 
-        data_dir_root=os.path.join(self.get_species_dir(), HOST_DATA_DIR)
+        data_dir_root=os.path.join(self.get_species_dir(), constants.HOST_DATA_DIR)
 
         instance = GalaxyInstance(url=self.instance_url,
-                                              email=self.config["galaxy_default_admin_email"],
-                                              password=self.config["galaxy_default_admin_password"]
+                                              email=self.config[constants.CONF_GALAXY_DEFAULT_ADMIN_EMAIL],
+                                              password=self.config[constants.CONF_GALAXY_DEFAULT_ADMIN_PASSWORD]
                                               )
 
         logging.info("Looking for project data in %s" % data_dir_root)
@@ -185,8 +182,8 @@ class LoadData(speciesData.SpeciesData):
                             logging.info("Skipping useless file '%s'" % single_file)
                             continue
 
-                        single_file_relative_path = re.sub(data_dir_root, CONTAINER_DATA_DIR_ROOT, single_file)
-                        single_file_path_in_container=os.path.join(CONTAINER_DATA_DIR_ROOT, single_file_relative_path)
+                        single_file_relative_path = re.sub(data_dir_root, constants.CONTAINER_DATA_DIR_ROOT, single_file)
+                        single_file_path_in_container=os.path.join(constants.CONTAINER_DATA_DIR_ROOT, single_file_relative_path)
 
                         logging.info("Adding file '%s' with type '%s' and name '%s'" % (single_file_path_in_container, ftype, clean_name))
                         datasets = prj_lib.upload_from_galaxy_fs(
@@ -290,8 +287,8 @@ class LoadData(speciesData.SpeciesData):
 
         logging.info("Connecting to the galaxy instance (%s)" % self.instance_url)
         self.instance = galaxy.GalaxyInstance(url=self.instance_url,
-                                              email=self.config["galaxy_default_admin_email"],
-                                              password=self.config["galaxy_default_admin_password"]
+                                              email=self.config[constants.CONF_GALAXY_DEFAULT_ADMIN_EMAIL],
+                                              password=self.config[constants.CONF_GALAXY_DEFAULT_ADMIN_PASSWORD]
                                               )
 
         try:
@@ -306,10 +303,7 @@ class LoadData(speciesData.SpeciesData):
 
     
 if __name__ == "__main__":
-    parser = argparse.ArgumentParser(description="Automatic data loading in containers and interaction "
-                                                 "with galaxy instances for GGA"
-                                                 ", following the protocol @ "
-                                                 "http://gitlab.sb-roscoff.fr/abims/e-infra/gga")
+    parser = argparse.ArgumentParser(description="Load data into Galaxy library")
 
     parser.add_argument("input",
                         type=str,
@@ -321,7 +315,7 @@ if __name__ == "__main__":
 
     parser.add_argument("--config",
                         type=str,
-                        help="Config path, default to the 'config' file inside the script repository")
+                        help="Config path, default to 'examples/config.yml'")
 
     parser.add_argument("--main-directory",
                         type=str,
@@ -334,7 +328,11 @@ if __name__ == "__main__":
     else:
         logging.basicConfig(level=logging.INFO)
 
-    config_file = os.path.abspath(args.config)
+    # Parsing the config file if provided, using the default config otherwise
+    if args.config:
+        config_file = os.path.abspath(args.config)
+    else:
+        config_file = os.path.join(os.path.dirname(os.path.realpath(sys.argv[0])), constants.DEFAULT_CONFIG)
     config = utilities.parse_config(config_file)
 
     main_dir = None
@@ -365,7 +363,7 @@ if __name__ == "__main__":
         load_data_for_current_species.config = config
         # Set the instance url attribute -- Does not work with localhost on scratch (ALB)
         load_data_for_current_species.instance_url = "http://localhost:{0}/sp/{1}_{2}/galaxy/".format(
-                load_data_for_current_species.config["http_port"],
+                load_data_for_current_species.config[constants.CONF_ALL_HTTP_PORT],
                 load_data_for_current_species.genus_lowercase,
                 load_data_for_current_species.species)
 
diff --git a/run_workflow_phaeoexplorer.py b/run_workflow_phaeoexplorer.py
index 2c385d4..71fc25f 100755
--- a/run_workflow_phaeoexplorer.py
+++ b/run_workflow_phaeoexplorer.py
@@ -452,10 +452,10 @@ class RunWorkflow(speciesData.SpeciesData):
             # In case of the Jbrowse workflow, we unfortunately have to manually edit the parameters instead of setting them
             # as runtime values, using runtime parameters makes the tool throw an internal critical error ("replace not found" error)
             # Scratchgmod test: need "http" (or "https"), the hostname (+ port)
-            if "menu_url" not in self.config.keys():
+            if "jbrowse_menu_url" not in self.config.keys():
                 jbrowse_menu_url = "https://{hostname}/sp/{genus_sp}/feature/{Genus}/{species}/mRNA/{id}".format(hostname=self.config["hostname"], genus_sp=self.genus_species, Genus=self.genus_uppercase, species=self.species, id="{id}")
             else:
-                jbrowse_menu_url = self.config["menu_url"]
+                jbrowse_menu_url = self.config["jbrowse_menu_url"]
             if workflow_name == "Jbrowse":
                 workflow_dict["steps"]["2"]["tool_state"] = workflow_dict["steps"]["2"]["tool_state"].replace("__MENU_URL__", jbrowse_menu_url)
                 # The UNIQUE_ID is specific to a combination genus_species_strain_sex so every combination should have its unique workflow
@@ -712,7 +712,7 @@ def run_workflow(workflow_path, workflow_parameters, datamap, config, input_spec
         # In case of the Jbrowse workflow, we unfortunately have to manually edit the parameters instead of setting them
         # as runtime values, using runtime parameters makes the tool throw an internal critical error ("replace not found" error)
         # Scratchgmod test: need "http" (or "https"), the hostname (+ port)
-        if "menu_url" not in config.keys():
+        if "jbrowse_menu_url" not in config.keys():
             jbrowse_menu_url = "https://{hostname}/sp/{genus_sp}/feature/{Genus}/{species}/mRNA/{id}".format(hostname=self.config["hostname"], genus_sp=self.genus_species, Genus=self.genus_uppercase, species=self.species, id="{id}")
         else:
             jbrowse_menu_url = config["menu_url"]
@@ -877,10 +877,7 @@ def install_changesets_revisions_from_workflow(instance, workflow_path):
 
 
 if __name__ == "__main__":
-    parser = argparse.ArgumentParser(description="Automatic data loading in containers and interaction "
-                                                 "with galaxy instances for GGA"
-                                                 ", following the protocol @ "
-                                                 "http://gitlab.sb-roscoff.fr/abims/e-infra/gga")
+    parser = argparse.ArgumentParser(description="Run Galaxy workflows, specific to Phaeoexplorer data")
 
     parser.add_argument("input",
                         type=str,
@@ -1092,11 +1089,11 @@ if __name__ == "__main__":
 
             jbrowse_menu_url_org1 = "https://{hostname}/sp/{genus_sp}/feature/{Genus}/{species}/mRNA/{id}".format(hostname=config["hostname"], genus_sp=org1_genus_species, Genus=org1_genus[0].upper() + org1_genus[1:], species=org1_species, id="\{id\}")
             jbrowse_menu_url_org2 = "https://{hostname}/sp/{genus_sp}/feature/{Genus}/{species}/mRNA/{id}".format(hostname=config["hostname"], genus_sp=org2_genus_species, Genus=org2_genus[0].upper() + org2_genus[1:], species=org2_species, id="\{id\}")
-            if "menu_url" not in config.keys():
+            if "jbrowse_menu_url" not in config.keys():
                 jbrowse_menu_url_org1 = "https://{hostname}/sp/{genus_sp}/feature/{Genus}/{species}/mRNA/{id}".format(hostname=config["hostname"], genus_sp=org1_genus_species, Genus=org1_genus[0].upper() + org1_genus[1:], species=org1_species, id="\{id\}")
                 jbrowse_menu_url_org2 = "https://{hostname}/sp/{genus_sp}/feature/{Genus}/{species}/mRNA/{id}".format(hostname=config["hostname"], genus_sp=org2_genus_species, Genus=org2_genus[0].upper() + org2_genus[1:], species=org2_species, id="\{id\}")
             else:
-                jbrowse_menu_url_org1 = config["menu_url"]
+                jbrowse_menu_url_org1 = config["jbrowse_menu_url"]
                 jbrowse_menu_url_org2 = "https://{hostname}/sp/{genus_sp}/feature/{Genus}/{species}/mRNA/{id}".format(hostname=config["hostname"], genus_sp=org2_genus_species, Genus=org2_genus[0].upper() + org2_genus[1:], species=org2_species, id="\{id\}")
 
 
@@ -1140,7 +1137,7 @@ if __name__ == "__main__":
             workflow_parameters[LOAD_FASTA_ORG1] = {"organism": org1_org_id,
                                                     "analysis_id": org1_genome_analysis_id,
                                                     "do_update": "true"}
-            # workflow_parameters[JBROWSE_ORG1] = {"menu_url": jbrowse_menu_url_org1}
+            # workflow_parameters[JBROWSE_ORG1] = {"jbrowse_menu_url": jbrowse_menu_url_org1}
             workflow_parameters[JBROWSE_ORG1] = {}  
             workflow_parameters[LOAD_GFF_ORG1] = {"organism": org1_org_id, "analysis_id": org1_ogs_analysis_id}
             workflow_parameters[SYNC_FEATURES_ORG1] = {"organism_id":  org1_org_id}
@@ -1152,7 +1149,7 @@ if __name__ == "__main__":
                                                     "analysis_id": org2_genome_analysis_id,
                                                     "do_update": "true"}
             workflow_parameters[LOAD_GFF_ORG2] = {"organism": org2_org_id, "analysis_id": org2_ogs_analysis_id}
-            # workflow_parameters[JRBOWSE_ORG2] = {"menu_url": jbrowse_menu_url_org2}
+            # workflow_parameters[JRBOWSE_ORG2] = {"jbrowse_menu_url": jbrowse_menu_url_org2}
             workflow_parameters[JRBOWSE_ORG2] = {}
             workflow_parameters[SYNC_FEATURES_ORG2] = {"organism_id":  org2_org_id}
 
@@ -1187,12 +1184,12 @@ if __name__ == "__main__":
                 # Scratchgmod test: need "http" (or "https"), the hostname (+ port)
                 jbrowse_menu_url_org1 = "https://{hostname}/sp/{genus_sp}/feature/{Genus}/{species}/mRNA/{id}".format(hostname=config["hostname"], genus_sp=org1_genus_species, Genus=org1_genus[0].upper() + org1_genus[1:], species=org1_species, id="{id}")
                 jbrowse_menu_url_org2 = "https://{hostname}/sp/{genus_sp}/feature/{Genus}/{species}/mRNA/{id}".format(hostname=config["hostname"], genus_sp=org2_genus_species, Genus=org2_genus[0].upper() + org2_genus[1:], species=org2_species, id="{id}")
-                if "menu_url" not in config.keys():
+                if "jbrowse_menu_url" not in config.keys():
                     jbrowse_menu_url_org1 = "https://{hostname}/sp/{genus_sp}/feature/{Genus}/{species}/mRNA/{id}".format(hostname=config["hostname"], genus_sp=org1_genus_species, Genus=org1_genus[0].upper() + org1_genus[1:], species=org1_species, id="{id}")
                     jbrowse_menu_url_org2 = "https://{hostname}/sp/{genus_sp}/feature/{Genus}/{species}/mRNA/{id}".format(hostname=config["hostname"], genus_sp=org2_genus_species, Genus=org2_genus[0].upper() + org2_genus[1:], species=org2_species, id="{id}")
                 else:
-                    jbrowse_menu_url_org1 = config["menu_url"] + "/sp/{genus_sp}/feature/{Genus}/{species}/mRNA/{id}".format(genus_sp=org1_genus_species, Genus=org1_genus[0].upper() + org1_genus[1:], species=org1_species, id="{id}")
-                    jbrowse_menu_url_org2 = config["menu_url"] + "/sp/{genus_sp}/feature/{Genus}/{species}/mRNA/{id}".format(genus_sp=org2_genus_species, Genus=org2_genus[0].upper() + org2_genus[1:], species=org2_species, id="{id}")
+                    jbrowse_menu_url_org1 = config["jbrowse_menu_url"] + "/sp/{genus_sp}/feature/{Genus}/{species}/mRNA/{id}".format(genus_sp=org1_genus_species, Genus=org1_genus[0].upper() + org1_genus[1:], species=org1_species, id="{id}")
+                    jbrowse_menu_url_org2 = config["jbrowse_menu_url"] + "/sp/{genus_sp}/feature/{Genus}/{species}/mRNA/{id}".format(genus_sp=org2_genus_species, Genus=org2_genus[0].upper() + org2_genus[1:], species=org2_species, id="{id}")
 
                 # show_tool_add_organism = instance.tools.show_tool(tool_id="toolshed.g2.bx.psu.edu/repos/gga/chado_organism_add_organism/organism_add_organism/2.3.4+galaxy0", io_details=True)
                 # print(show_tool_add_organism)
diff --git a/speciesData.py b/speciesData.py
index bb13d11..312c750 100755
--- a/speciesData.py
+++ b/speciesData.py
@@ -5,10 +5,10 @@ import os
 import sys
 import utilities
 import logging
+import constants
 
 from _datetime import datetime
 
-
 class SpeciesData:
     """
     This class contains attributes and functions to interact with the galaxy container of the GGA environment
@@ -48,37 +48,40 @@ class SpeciesData:
             return string
 
     def __init__(self, parameters_dictionary):
-        self.name = parameters_dictionary["name"]
         self.parameters_dictionary = parameters_dictionary
-        parameters_dictionary_description=parameters_dictionary["description"]
-        self.species = self.clean_string(parameters_dictionary_description["species"])
-        self.genus = self.clean_string(parameters_dictionary_description["genus"])
-        self.strain = self.clean_string(parameters_dictionary_description["strain"])
-        self.sex = self.clean_string(parameters_dictionary_description["sex"])
-        self.common = self.clean_string(parameters_dictionary_description["common_name"])
+        self.name = parameters_dictionary[constants.ORG_PARAM_NAME]
+        parameters_dictionary_description=parameters_dictionary[constants.ORG_PARAM_DESC]
+        parameters_dictionary_data = parameters_dictionary[constants.ORG_PARAM_DATA]
+        parameters_dictionary_services = parameters_dictionary[constants.ORG_PARAM_SERVICES]
+
+        self.species = self.clean_string(parameters_dictionary_description[constants.ORG_PARAM_DESC_SPECIES])
+        self.genus = self.clean_string(parameters_dictionary_description[constants.ORG_PARAM_DESC_GENUS])
+        self.strain = self.clean_string(parameters_dictionary_description[constants.ORG_PARAM_DESC_STRAIN])
+        self.sex = self.clean_string(parameters_dictionary_description[constants.ORG_PARAM_DESC_SEX])
+        self.common = self.clean_string(parameters_dictionary_description[constants.ORG_PARAM_DESC_COMMON_NAME])
 
         self.date = datetime.today().strftime("%Y-%m-%d")
-        self.origin = parameters_dictionary["description"]["origin"]
-        self.performed = parameters_dictionary["data"]["performed_by"]
+        self.origin = parameters_dictionary_description[constants.ORG_PARAM_DESC_ORIGIN]
+        self.performed = parameters_dictionary_data[constants.constants.ORG_PARAM_DATA_PERFORMED_BY]
 
-        if parameters_dictionary["data"]["genome_version"] == "":
+        if parameters_dictionary_data[constants.ORG_PARAM_DATA_GENOME_VERSION] == "":
             self.genome_version = "1.0"
         else:
-            self.genome_version = str(parameters_dictionary["data"]["genome_version"])
-        if parameters_dictionary["data"]["ogs_version"] == "":
+            self.genome_version = str(parameters_dictionary_data[constants.ORG_PARAM_DATA_GENOME_VERSION])
+        if parameters_dictionary_data[constants.ORG_PARAM_DATA_OGS_VERSION] == "":
             self.ogs_version = "1.0"
         else:
-            self.ogs_version = str(parameters_dictionary["data"]["ogs_version"])
+            self.ogs_version = str(parameters_dictionary_data[constants.ORG_PARAM_DATA_OGS_VERSION])
 
         # TODO: catch blocks if key is absent in input
-        self.genome_path = parameters_dictionary["data"]["genome_path"]
-        self.transcripts_path = parameters_dictionary["data"]["transcripts_path"]
-        self.proteins_path = parameters_dictionary["data"]["proteins_path"]
-        self.gff_path = parameters_dictionary["data"]["gff_path"]
-        self.interpro_path = parameters_dictionary["data"]["interpro_path"]
-        self.blastp_path = parameters_dictionary["data"]["blastp_path"]
-        self.blastx_path = parameters_dictionary["data"]["blastx_path"]
-        self.orthofinder_path = parameters_dictionary["data"]["orthofinder_path"]
+        self.genome_path = parameters_dictionary_data[constants.ORG_PARAM_DATA_GENOME_PATH]
+        self.transcripts_path = parameters_dictionary_data[constants.ORG_PARAM_DATA_TRANSCRIPTS_PATH]
+        self.proteins_path = parameters_dictionary_data[constants.ORG_PARAM_DATA_PROTEINS_PATH]
+        self.gff_path = parameters_dictionary_data[constants.ORG_PARAM_DATA_GFF_PATH]
+        self.interpro_path = parameters_dictionary_data[constants.ORG_PARAM_DATA_INTERPRO_PATH]
+        self.blastp_path = parameters_dictionary_data[constants.ORG_PARAM_DATA_BLASTP_PATH]
+        self.blastx_path = parameters_dictionary_data[constants.ORG_PARAM_DATA_BLASTX_PATH]
+        self.orthofinder_path = parameters_dictionary_data[constants.ORG_PARAM_DATA_ORTHOFINDER_PATH]
 
         self.genus_lowercase = self.genus[0].lower() + self.genus[1:]
         self.genus_uppercase = self.genus[0].upper() + self.genus[1:]
@@ -121,5 +124,3 @@ class SpeciesData:
         self.species_folder_name = "_".join(utilities.filter_empty_not_empty_items(
             [self.genus_lowercase.lower(), self.species.lower(), self.strain.lower(),
              self.sex.lower()])["not_empty"])
-        self.existing_folders_cache = {}
-        self.bam_metadata_cache = {}
diff --git a/templates/gspecies_compose.yml.j2 b/templates/gspecies_compose.yml.j2
index e1fc127..65f5364 100644
--- a/templates/gspecies_compose.yml.j2
+++ b/templates/gspecies_compose.yml.j2
@@ -38,7 +38,7 @@ services:
             - ./docker_data/galaxy/:/export/:ro
             - ./src_data/:/project_data/:ro
             - ./src_data:/data:ro
-          {% if 'banner_path' is defined %}
+          {% if 'tripal_banner_path' is defined %}
             - ./banner.png:/var/www/html/banner.png:ro
           {% endif %}
             #- /groups/XXX/:/groups/XXX/:ro  # We do this when we have symlinks in src_data pointing to /groups/XXX/...
@@ -115,7 +115,7 @@ services:
     galaxy:
         image: quay.io/galaxy-genome-annotation/docker-galaxy-annotation:gmod
         volumes:
-            {% if (persist_galaxy_data is defined) and (persist_galaxy_data == "False") %}
+            {% if (galaxy_persist_data is defined) and (galaxy_persist_data == "False") %}
             #- ./docker_data/galaxy/:/export/
             {% else %}
             - ./docker_data/galaxy/:/export/
@@ -140,10 +140,6 @@ services:
             PROXY_PREFIX: /sp/{{ genus_species }}/galaxy
             GALAXY_TRIPAL_URL: http://tripal.{{ genus_species }}/tripal/
             GALAXY_TRIPAL_PASSWORD: {{ tripal_password }}  # See tripal config above
-            GALAXY_WEBAPOLLO_URL: http://one-of-the-swarm-node:8888/apollo/
-            GALAXY_WEBAPOLLO_USER: "{{ webapollo_user }}"
-            GALAXY_WEBAPOLLO_PASSWORD: "{{ webapollo_password }}"  # See tripal config below
-            GALAXY_WEBAPOLLO_EXT_URL: /apollo/
             GALAXY_CHADO_DBHOST: tripal-db.{{ genus_species }}
             GALAXY_CHADO_DBSCHEMA: chado
             GALAXY_AUTO_UPDATE_DB: 1
diff --git a/templates/traefik_compose.yml.j2 b/templates/traefik_compose.yml.j2
index a707d1a..c72595e 100644
--- a/templates/traefik_compose.yml.j2
+++ b/templates/traefik_compose.yml.j2
@@ -115,7 +115,7 @@ services:
   authelia-db:
       image: postgres:12.2-alpine
       environment:
-          POSTGRES_PASSWORD: {{ authelia-db_postgres_password }}
+          POSTGRES_PASSWORD: {{ authelia_db_postgres_password }}
       volumes:
           - ./docker_data/authelia_db/:/var/lib/postgresql/data/
       networks:
diff --git a/utilities.py b/utilities.py
index e633843..139d083 100755
--- a/utilities.py
+++ b/utilities.py
@@ -7,7 +7,25 @@ import sys
 import os
 import subprocess
 import bioblend
+import constants
 
+def load_yaml(yaml_file):
+
+    try:
+        with open(yaml_file, 'r') as stream:
+            try:
+                data = yaml.safe_load(stream)
+            except yaml.YAMLError as err:
+                logging.critical("Input file %s is not in YAML format" % yaml_file)
+                sys.exit(err)
+    except FileNotFoundError:
+        logging.critical("Input file doesn't exist (%s)" % yaml_file)
+        sys.exit()
+    except OSError:
+        logging.critical("Input file cannot be read (%s)" % yaml_file)
+        sys.exit()
+
+    return data
 
 def parse_config(config_file):
     """
@@ -17,25 +35,13 @@ def parse_config(config_file):
     :return:
     """
 
-    config_variables = {}
-    logging.debug("Using config: %s" % os.path.abspath(config_file))
-    try:
-        with open(config_file, 'r') as stream:
-            yaml_dict = yaml.safe_load(stream)
-            for k, v in yaml_dict.items():
-                for k2, v2 in v.items():
-                    config_variables[k2] = v2  # Add a key:value pair to variables for replacement in the compose template file
-
-    except FileNotFoundError:
-        logging.critical("The config file specified doesn't exist (%s)" % config_file)
-        sys.exit()
-    except OSError:
-        logging.critical("The config file specified cannot be read (%s)" % config_file)
+    config_dict = load_yaml(config_file)
+    if isinstance(config_dict, dict):
+        return config_dict
+    else:
+        logging.critical("Config yaml file is not a dictionary" % config_file)
         sys.exit()
 
-    return config_variables
-
-
 def parse_input(input_file):
     """
     Parse the yml input file to extract data to create the SpeciesData objects
@@ -45,25 +51,13 @@ def parse_input(input_file):
     :return:
     """
 
-    sp_dict_list = []
-
-    try:
-        with open(input_file, 'r') as stream:
-            try:
-                sp_dict_list = yaml.safe_load(stream)
-            except yaml.YAMLError as err:
-                logging.critical("Input file is not in YAML format")
-                sys.exit(err)
-    except FileNotFoundError:
-        logging.critical("The specified input file doesn't exist (%s)" % input_file)
-        sys.exit()
-    except OSError:
-        logging.critical("The specified input file cannot be read (%s)" % input_file)
+    sp_dict_list = load_yaml(input_file)
+    if isinstance(sp_dict_list, list):
+        return sp_dict_list
+    else:
+        logging.critical("Input organisms yaml file is not a list" % input_file)
         sys.exit()
 
-    return sp_dict_list
-
-
 def filter_empty_not_empty_items(li):
     """
     Separate a list between empty items and non empty items.
@@ -124,8 +118,8 @@ def get_species_history_id(instance, full_name):
 
 def get_gspecies_string_from_sp_dict(sp_dict):
 
-    genus = sp_dict["description"]["genus"]
-    species = sp_dict["description"]["species"]
+    genus = sp_dict[constants.ORG_PARAM_DESC][constants.ORG_PARAM_DESC_GENUS]
+    species = sp_dict[constants.ORG_PARAM_DESC][constants.ORG_PARAM_DESC_SPECIES]
     gspecies = genus.lower() + "_" + species.lower()
     return gspecies
 
@@ -164,7 +158,7 @@ def get_unique_species_dict_list(sp_dict_list):
 
     for sp in sp_dict_list:
         gspecies = get_gspecies_string_from_sp_dict(sp)
-        if gspecies not in unique_species_dict.keys() or sp["description"]["main_species"] == True :
+        if gspecies not in unique_species_dict.keys() or sp[constants.ORG_PARAM_DESC][constants.ORG_PARAM_DESC_MAIN_SPECIES] == True :
             unique_species_dict[gspecies] = sp
         else:
             continue
-- 
GitLab