More templating option for gga_init, cleaned up some deprecated code and added some more comments

350b0e4f · Arthur Le Bars · 10ddfb28 · 350b0e4f · 350b0e4f · 350b0e4f
Commit 350b0e4f authored Jan 29, 2021 by Arthur Le Bars
--- a/gga_init.py
+++ b/gga_init.py
@@ -16,11 +16,10 @@ from jinja2 import Template, Environment, FileSystemLoader
 import utilities
 import speciesData

-
 """ 
 gga_init.py

-Usage: $ python3 gga_init.py -i input_example.yml [OPTIONS]
+Usage: $ python3 gga_init.py -i input_example.yml --config config.yml [OPTIONS]

 """

@@ -30,7 +29,8 @@ class DeploySpeciesStack(speciesData.SpeciesData):
    Child of SpeciesData

    Contains methods and attributes to deploy a stack of services for a given organism, from creating/updating
-    the organism's directory tree to create the required docker-compose files
+    the organism's directory tree to create the required docker-compose files and stack deployment
+
    """

    def make_directory_tree(self):
@@ -42,88 +42,87 @@ class DeploySpeciesStack(speciesData.SpeciesData):

        os.chdir(self.main_dir)

+        # Create the species main directory (name of the dir: genus_species)
        try:
            os.mkdir(self.species_dir)
-            logging.info("Making directory tree for %s" % self.full_name)
        except FileExistsError:
-            logging.info("Updating directory tree for %s" % self.full_name)
+            logging.info("Updating directory tree of %s" % self.genus_species)
        try:
            os.chdir(self.species_dir)
-        except OSError:
-            logging.critical("Cannot access " + self.species_dir + ", run with higher privileges")
-            sys.exit()
+        except OSError as exc:
+            logging.critical("Cannot access %s" % self.genus_species)
+            sys.exit(exc)

        # Copy the custom banner to the species dir (banner used in tripal pages)
-        # if "custom_banner" not in self.config.keys() or not self.config["custom_banner"] == "/path/to/banner" or not self.config["custom_banner"] == "":
-        #     try:
-        #         logging.debug("Custom banner path: %s" % self.config["custom_banner"])
-        #         if os.path.isfile(os.path.abspath(self.config["custom_banner"])):
-        #             shutil.copy(os.path.abspath(self.config["custom_banner"]), "%s/banner.png" % self.species_dir)
-        #     except FileNotFoundError:
-        #         logging.warning("Specified banner not found (%s), skipping" % self.config["custom_banner"])
-
-        # Write nginx conf
+        # If the path specified is invalid (because it's empty or is still the default demo one),
+        # use the default banner instead
+        if "banner_path" in self.config.keys():
+            if self.config["banner_path"] != "/path/to/banner" or self.config["banner_path"] != "":
+                try:
+                    logging.debug("Custom banner path: %s" % self.config["banner_path"])
+                    if os.path.isfile(os.path.abspath(self.config["banner_path"])):
+                        shutil.copy(os.path.abspath(self.config["banner_path"]), "%s/banner.png" % self.species_dir)
+                except FileNotFoundError:
+                    logging.warning("Specified banner not found (%s), using default banner instead" % self.config["banner_path"])
+                    self.config.pop("banner_path", None)
+            else:
+                logging.debug("Using default banner for Tripal pages")
+                self.config.pop("banner_path", None)
+
+        # Create nginx dirs and write/re-write nginx conf
+        self.make_dirs(dir_paths_li=["./nginx", "./nginx/conf"])
        try:
-            os.mkdir("./nginx/")
-            os.mkdir("./nginx/conf")
            with open(os.path.abspath("./nginx/conf/default.conf"), 'w') as conf:
                conf.write("server {\n\tlisten 80;\n\tserver_name ~.;\n\tlocation /download/ {\n\t\talias /project_data/; \n\t\tautoindex on;\n\t}\n}")  # The species nginx conf
-        except FileExistsError:
-            logging.debug("Nginx conf already exists, skipping")
-
-
-        organism_annotation_dir, organism_genome_dir = None, None
+        except OSError as exc:
+            logging.critical("Cannot edit NginX conf file")
+            sys.exit(exc)

        # Creation (or updating) of the src_data directory tree
-        # Depth 0
        try:
            os.mkdir("./src_data")
        except FileExistsError:
            logging.debug("src_data folder already exist for %s" % self.full_name)
-        except PermissionError:
-            logging.critical("Insufficient permission to create src_data directory tree")
-            sys.exit()
-
-        # Depth 1
-        try:
-            os.mkdir("./src_data/annotation")
-            os.mkdir("./src_data/genome")
-            os.mkdir("./src_data/tracks")
-        except FileExistsError:
-            logging.debug("Depth 1 src_data folder(s) already exist for %s" % self.full_name)
        except PermissionError as exc:
            logging.critical("Insufficient permission to create src_data directory tree")
            sys.exit(exc)

-        # Depth 2
-        try:
-            os.mkdir("./src_data/annotation/" + self.species_folder_name)
-            os.mkdir("./src_data/genome/" + self.species_folder_name)
-        except FileExistsError:
-            logging.debug("Depth 2 src_data folder(s) already exist for %s" % self.full_name)
-        except PermissionError as exc:
-            logging.critical("Insufficient permission to create src_data directory tree")
-            sys.exit(exc)
+        # List of all the directories to create in src_data
+        src_data_dirs_li = ["./src_data", "./src_data/annotation", "./src_data/genome", "./src_data/tracks",
+                            "./src_data/annotation/%s" % self.species_folder_name,
+                            "./src_data/genome/%s" % self.species_folder_name,
+                            "./src_data/annotation/{0}/OGS{1}/".format(self.species_folder_name, self.ogs_version),
+                            "./src_data/genome/{0}/v{1}".format(self.species_folder_name, self.genome_version)]
+        self.make_dirs(dir_paths_li=src_data_dirs_li)
+
+        # Return to main directory
+        os.chdir(self.main_dir)

-        # Depth 3
+        logging.info("Directory tree generated for %s" % self.full_name)
+
+    @staticmethod
+    def make_dirs(dir_paths_li):
+        """
+        Recursively create directories from a list of paths with a try-catch condition
+
+        :param dir_paths_li:
+        :return:
+        """
+        created_dir_paths_li = []
+
+        for dir_path in dir_paths_li:
            try:
-            os.mkdir("./src_data/annotation/" + self.species_folder_name + "/OGS" + self.ogs_version)
-            os.mkdir("./src_data/genome/" + self.species_folder_name + "/v" + self.genome_version)
-            organism_annotation_dir = os.path.abspath("./src_data/annotation/" + self.species_folder_name + "/OGS" + self.genome_version)
-            organism_genome_dir = os.path.abspath("./src_data/genome/" + self.species_folder_name + "/v" + self.genome_version)
+                os.mkdir(dir_path)
            except FileExistsError:
-            logging.debug("Depth 3 src_data folder(s) already exist for %s" % self.full_name)
+                logging.debug("%s directory already exists" % dir_path)
            except PermissionError as exc:
-            logging.critical("Insufficient permission to create src_data directory tree")
+                logging.critical("Insufficient permission to create %s" % dir_path)
                sys.exit(exc)
+            created_dir_paths_li.append(dir_path)

-        # Return to main_dir
-        os.chdir(self.main_dir)
-
-        logging.info("Directory tree generated for %s" % self.full_name)
-
+        return created_dir_paths_li

-    def make_compose_files(self):
+    def make_compose_files(self, force=False):
        """
        Create a formatted copy of the template compose file inside a species directory tree

@@ -133,60 +132,13 @@ class DeploySpeciesStack(speciesData.SpeciesData):
        os.chdir(self.main_dir)
        try:
            os.chdir(self.species_dir)
-        except OSError:
-            logging.critical("Cannot access " + self.species_dir)
-            sys.exit(0)
-
-        # Path to the templates used to generate the custom docker-compose files for an input species and the traefik+authelia services
-        gspecies_template_path = self.script_dir + "/templates/gspecies_compose_template.yml.j2"  # Jinja template path
-        traefik_template_path = self.script_dir + "/templates/traefik_compose_template.yml"
-        # authelia_config_path = self.script_dir + "/templates/authelia_config_example.yml"  # Do not copy the authelia config!
-        authelia_users_path = self.script_dir + "/templates/authelia_users_template.yml"
+        except OSError as exc:
+            logging.critical("Cannot access %s" % self.species_dir)
+            sys.exit(exc)

-        # Set the genus_species_strain_sex value for replacing template
-        genus_species_strain_sex = "{0}_{1}".format(self.genus.lower(), self.specieslower())
-        if self.sex and self.strain:
-            genus_species_strain_sex = "_".join([self.genus.lower(), self.species.lower(), self.strain, self.sex])
-        elif self.sex and not self.strain:
-            genus_species_strain_sex = "_".join([self.genus.lower(), self.specieslower(), self.sex])
-        elif self.genus and not self.species and not self.sex and not self.strain:
-            genus_species_strain_sex = self.genus.lower()
-        else:
-            genus_species_strain_sex = "{0}_{1}".format(self.genus.lower(), self.specieslower())
-
-        # # Method without Jinja templating (deprecated)
-        # with open(stack_template_path, 'r') as infile:
-        #     organism_content = list()
-        #     for line in infile:
-        #         # Replace placeholders in the compose file and append line to output
-        #         organism_content.append(
-        #             line.replace("genus_species",
-        #                          str(self.genus.lower() + "_" + self.species)).replace("Genus species",
-        #                          str(self.genus_uppercase + " " + self.species)).replace("Genus/species",
-        #                          str(self.genus_uppercase + "/" + self.species)).replace("gspecies",
-        #                          str(self.genus.lower()[0] + self.species)).replace("genus_species_strain_sex",
-        #                          genus_species_strain_sex))
-        #     # Write/format the output compose file
-        #     with open("./docker-compose.yml", 'w') as outfile:
-        #         outfile.truncate(0)  # Delete file content
-        #         for line in organism_content:  # Replace env variables by those in the config file
-        #             for env_variable, value in self.config.items():  # env variables are stored in this dict
-        #                 # print("ENV VARIABLE: " + env_variable + "\t VALUE: " + value)
-        #                 if env_variable in line:
-        #                     line = line.replace(env_variable, value)
-        #                     break
-        #             # Write the new line in the docker-compose
-        #             outfile.write(line)
-
-        #     # Create mounts for the current docker-compose
-        #     self.create_mounts(working_dir=self.species_dir)
-
-
-        # jinja templating, handled using the python jinja module
-        # TODO: use ansible to handle the templating in production
+        # Jinja2 templating, handled using the python "jinja2" module
        file_loader = FileSystemLoader(self.script_dir + "/templates")
        env = Environment(loader=file_loader)
-        template = env.get_template("gspecies_compose_template.yml.j2")

        # We need a dict holding all key (variables) - values that needs to be replaced in the template as our rendering dict
        # To do so we need both input file vars and config vars
@@ -196,41 +148,55 @@ class DeploySpeciesStack(speciesData.SpeciesData):
                      "strain": self.strain, "sex": self.sex, "Genus_species": self.genus_species[0].upper() + self.genus_species[1:]}
        # Merge the two dicts
        render_vars = {**self.config, **input_vars}
-        output = template.render(render_vars)

-        with open(os.path.join(self.species_dir, "docker-compose.yml", "w")) as gspecies_compose_file:
-            print("Writing gspecies compose yml file")
+        # Render the gspecies docker-compose file and write it
+        gspecies_compose_template = env.get_template("gspecies_compose_template.yml.j2")
+        gspecies_compose_output = gspecies_compose_template.render(render_vars)
+        with open(os.path.join(self.species_dir, "docker-compose.yml"), "w") as gspecies_compose_file:
+            logging.info("Writing %s docker-compose.yml" % self.genus_species)
+            gspecies_compose_file.truncate(0)
+            gspecies_compose_file.write(gspecies_compose_output)

+        # Create the volumes (directory) of the species docker-compose file
+        self.create_mounts(working_dir=".")
+
+        # Proceed to the traefik and authelia directories
+        os.chdir(self.main_dir)
+        self.make_dirs(["./traefik", "./traefik/authelia"])
+        # Render and try to write the traefik docker-compose file
+        # This new docker-compose file will not overwrite the one already present in the traefik dir
+        # unless the argument "--overwrite-all" is specified
+        if not os.path.isfile("./traefik/docker-compose.yml") or force:
+            traefik_compose_template = env.get_template("traefik_compose_template.yml.j2")
+            traefik_compose_output = traefik_compose_template.render(render_vars)
+            with open(os.path.join(self.main_dir, "docker-compose.yml"), 'w') as traefik_compose_file:
+                logging.info("Writing traefik docker-compose.yml")
+                traefik_compose_file.truncate(0)
+                traefik_compose_file.write(traefik_compose_output)
+
+        if self.config["authelia_config_path"]:
+            if not self.config["authelia_config_path"] == "" or not self.config["authelia_config_path"] == "/path/to/authelia/config":
+                if os.path.isfile(os.path.abspath(self.config["authelia_config_path"])):
                    try:
-            os.chdir(os.path.abspath(self.main_dir))
-            os.mkdir("./traefik")
-            os.mkdir("./traefik/authelia")
-            if self.config["custom_authelia_config_path"]:
-                if os.path.isfile(os.path.abspath(self.config["custom_authelia_config_path"])):
-                    try:
-                        shutil.copy(os.path.abspath(self.config["custom_authelia_config_path"]), "./traefik/authelia")
-                    except FileNotFoundError:
-                        logging.critical("Cannot copy custom Authelia config file (%s)" % self.config["custom_authelia_config_path"])
-                        sys.exit()
+                        shutil.copy(os.path.abspath(self.config["authelia_config_path"]), "./traefik/authelia")
+                    except Exception as exc:
+                        logging.critical("Cannot copy custom Authelia config file (%s)" % self.config["authelia_config_path"])
+                        sys.exit(exc)
                else:
-                    logging.critical("Custom Authelia config file not found (%s)" % self.config["custom_authelia_config_path"])
-            if not os.path.isfile("./traefik/authelia/users.yml"):
+                    logging.critical("Custom Authelia config file not found (%s)" % self.config["authelia_config_path"])
+
+        # Path to the authelia users in the repo
+        authelia_users_path = self.script_dir + "/templates/authelia_users_template.yml"
+        # Copy authelia "users" file
+        if not os.path.isfile("./traefik/authelia/users.yml") or force:
            shutil.copy(authelia_users_path, "./traefik/authelia/users.yml")
-        except FileExistsError:
-            logging.debug("Traefik directory already exists: %s" % os.path.abspath("../traefik"))
-        try:
-            if not os.path.isfile("./traefik/docker-compose.yml"):
-                shutil.copy(traefik_template_path, "./traefik/docker-compose.yml")
-            else:
-                logging.debug("Traefik compose file already exists: %s" % os.path.abspath("./traefik/docker-compose.yml"))
-        except FileExistsError:
-            logging.debug("Traefik compose file already exists: %s" % os.path.abspath("./traefik/docker-compose.yml"))

+        # Create the mounts for the traefik and authelia services
        traefik_dir = os.path.abspath(os.path.join(self.main_dir, "traefik"))
-
-        # Create the mounts for the traefik+authelia containers
+        if not os.path.isdir(os.path.join(traefik_dir, "docker_data")) or force:
            self.create_mounts(working_dir=traefik_dir)

+        # Return to main directory
        os.chdir(self.main_dir)

    def create_mounts(self, working_dir):
@@ -243,9 +209,9 @@ class DeploySpeciesStack(speciesData.SpeciesData):
        # Change directory to create mount points for the container
        try:
            os.chdir(os.path.abspath(working_dir))
-        except Exception:
+        except Exception as exc:
            logging.critical("Cannot access %s, exiting" % working_dir)
-            sys.exit()
+            sys.exit(exc)
        compose_yml = os.path.abspath("./docker-compose.yml")
        if not os.path.isfile(compose_yml):
            raise Exception("Could not find docker-compose.yml at %s" % compose_yml)
@@ -279,11 +245,11 @@ class DeploySpeciesStack(speciesData.SpeciesData):
        # Go back to the "main" directory
        try:
            os.chdir(os.path.abspath(self.main_dir))
-        except Exception:
-            logging.critical("Cannot access main directory (%s), exiting" % self.main_dir)
-            sys.exit()
+        except OSError as exc:
+            logging.critical("Cannot access %s, exiting" % self.main_dir)
+            sys.exit(exc)

-    def deploy_stack(self):
+    def deploy_stack(self, input_list):
        """
        Call the script "deploy.sh" used to initiliaze the swarm cluster if needed and
        launch/update the current organism's stack
@@ -294,6 +260,9 @@ class DeploySpeciesStack(speciesData.SpeciesData):
        :return:
        """

+
+        to_deploy_species_li = []
+
        # # Create our swarm cluster if it doesn't exist
        # subprocess.Popen(["docker", "swarm", "init"],
        #                  stdout=subprocess.PIPE, stderr=subprocess.PIPE, cwd=self.main_dir)
@@ -304,17 +273,17 @@ class DeploySpeciesStack(speciesData.SpeciesData):

        # Launch and update docker stacks
        # noinspection PyArgumentList
-        deploy_stacks_popen = subprocess.Popen(["sh", self.script_dir + "/deploy.sh", self.genus_species,
-                                                self.main_dir + "/traefik"],
-                                               stdout=subprocess.PIPE, stderr=subprocess.STDOUT,
-                                               universal_newlines=True)
-
-        for stdout_line in iter(deploy_stacks_popen.stdout.readline, ""):
-            if "daemon" in stdout_line:  # Ignore swarm init error output
-                pass
-            else:
-                logging.info("\t%s" % stdout_line.strip())
-        deploy_stacks_popen.stdout.close()
+        # deploy_stacks_popen = subprocess.Popen(["sh", self.script_dir + "/deploy.sh", self.genus_species,
+        #                                         self.main_dir + "/traefik"],
+        #                                        stdout=subprocess.PIPE, stderr=subprocess.STDOUT,
+        #                                        universal_newlines=True)
+        #
+        # for stdout_line in iter(deploy_stacks_popen.stdout.readline, ""):
+        #     if "daemon" in stdout_line:  # Ignore swarm init error output
+        #         pass
+        #     else:
+        #         logging.info("\t%s" % stdout_line.strip())
+        # deploy_stacks_popen.stdout.close()


 if __name__ == "__main__":
@@ -339,6 +308,10 @@ if __name__ == "__main__":
                        type=str,
                        help="Where the stack containers will be located, defaults to current directory")

+    parser.add_argument("--overwrite-all",
+                        help="Overwrite all docker-compose and conf files in the traefik and authelia directories (default=False)",
+                        action="store_true")
+
    args = parser.parse_args()

    if args.verbose:
@@ -359,6 +332,9 @@ if __name__ == "__main__":
        main_dir = os.path.abspath(args.main_directory)

    sp_dict_list = utilities.parse_input(os.path.abspath(args.input))
+    print(sp_dict_list)
+
+    utilities.get_species_to_deploy(sp_dict_list=sp_dict_list)

    logging.info("Deploying stacks for organisms in input file %s" % args.input)
    for sp_dict in sp_dict_list:
@@ -377,7 +353,7 @@ if __name__ == "__main__":

        # Set the instance url attribute
        for env_variable, value in deploy_stack_for_current_organism.config.items():
-            if env_variable == "custom_host":
+            if env_variable == "hostname":
                deploy_stack_for_current_organism.instance_url = value + \
                                                                 deploy_stack_for_current_organism.genus_lowercase + \
                                                                 "_" + deploy_stack_for_current_organism.species + \
@@ -396,7 +372,7 @@ if __name__ == "__main__":
        logging.info("Successfully generated the directory tree for %s" % deploy_stack_for_current_organism.full_name)

        # Make compose files
-        deploy_stack_for_current_organism.make_compose_files()
+        deploy_stack_for_current_organism.make_compose_files(force=args.overwrite_all)
        logging.info("Successfully generated the docker-compose files for %s" % deploy_stack_for_current_organism.full_name)

        # Deploy the stack

--- a/gga_load_data.py
+++ b/gga_load_data.py
@@ -7,8 +7,6 @@ import os
 import subprocess
 import logging
 import sys
-import utilities
-import speciesData
 import fnmatch
 import time
 import json
@@ -19,11 +17,13 @@ import shutil
 from bioblend.galaxy.objects import GalaxyInstance
 from bioblend import galaxy

+import utilities
+import speciesData

 """ 
 gga_load_data.py

-Usage: $ python3 gga_init.py -i input_example.yml [OPTIONS]
+Usage: $ python3 gga_init.py -i input_example.yml --config config.yml [OPTIONS]

 Do not call this script before the galaxy container is ready
 """
@@ -36,7 +36,7 @@ class LoadData(speciesData.SpeciesData):
    Contains methods and attributes to copy data into the src_data subfolders of an organism and then into the
    galaxy instance's history of this given organism

-    Optional fasta headers reformat
+    Optional data file formatting

    """

@@ -69,8 +69,9 @@ class LoadData(speciesData.SpeciesData):
        """

        proteins_file = None
+        proteins_outfile = None
        annotation_dir = None
-        organism_annotation_dir = os.path.abspath("./src_data/annotation/{0}/OGS{1}".format(self.species_folder_name, self.genome_version))
+        organism_annotation_dir = os.path.abspath("./src_data/annotation/{0}/OGS{1}".format(self.species_folder_name, self.ogs_version))

        self.goto_species_dir()

@@ -91,12 +92,12 @@ class LoadData(speciesData.SpeciesData):
                subprocess.run(["mv", annotation_dir + "/outfile_proteins.fa", proteins_file],
                               stdout=subprocess.PIPE,
                               cwd=annotation_dir)
-                # subprocess.run(["rm", annotation_dir + "/outfile_proteins.fa"], stdout=subprocess.PIPE, cwd=annotation_dir)
+                subprocess.run(["rm", annotation_dir + "/outfile_proteins.fa"], stdout=subprocess.PIPE, cwd=annotation_dir)
        else:
            logging.warning("Skipping proteins fasta headers formatting (FileNotFound)")

-
-    def format_fasta_headers(self, infile, outfile, pattern, repl):
+    @staticmethod
+    def format_fasta_headers(infile, outfile, pattern, repl):
        """
        Format the fasta headers of a given file, given a matching pattern and a replacement string

@@ -124,6 +125,7 @@ class LoadData(speciesData.SpeciesData):
        Find source data files in the parent_directory
        Link data files

+        TODO: manage access to the "parent directory" subdirectories properly
        TODO: implement search/tests for individual file paths

        :return:
@@ -139,38 +141,22 @@ class LoadData(speciesData.SpeciesData):
        organism_genome_dir = os.path.abspath("./src_data/genome/{0}/v{1}".format(self.species_folder_name, self.genome_version))

        for dirpath, dirnames, files in os.walk(self.source_data_dir):
-            if "0" in str(dirpath):  # TODO: Ensures to take the correct files (other dirs hold files with valid names), this is for Phaeoexplorer only!
+            if "0" in str(dirpath):
                for f in files:
                    if "Contaminants" not in str(f):
                        try:
                            if fnmatch.fnmatch(f, "*" + self.species[1:] + "_" + self.sex.upper() + ".fa"):
                                logging.info("Genome assembly file - " + str(f))
-                                # os.symlink(os.path.join(dirpath, f), organism_genome_dir)
-                                # organism_genome_dir = os.path.abspath("./src_data/genome/" +
-                                #                                       self.species_folder_name + "/v" +
-                                #                                       self.genome_version)
                                shutil.copyfile(os.path.join(dirpath, f), os.path.join(organism_genome_dir, f))
                            elif fnmatch.fnmatch(f, "*" + self.species[1:] + "_" + self.sex.upper() + ".gff"):
                                logging.info("GFF file - " + str(f))
-                                # os.symlink(os.path.join(dirpath, f), organism_annotation_dir)
-                                # organism_annotation_dir = os.path.abspath("./src_data/annotation/" +
-                                #                                           self.species_folder_name + "/OGS" +
-                                #                                           self.genome_version)
                                shutil.copyfile(os.path.join(dirpath, f), os.path.join(organism_annotation_dir, f))
                            elif fnmatch.fnmatch(f, "*" + self.species[1:] + "_" +
                                                    self.sex.upper() + "_transcripts-gff.fa"):
                                logging.info("Transcripts file - " + str(f))
-                                # os.symlink(os.path.join(dirpath, f), organism_annotation_dir)
-                                # organism_annotation_dir = os.path.abspath("./src_data/annotation/" +
-                                #                                           self.species_folder_name + "/OGS" +
-                                #                                           self.genome_version)
                                shutil.copyfile(os.path.join(dirpath, f), os.path.join(organism_annotation_dir, f))
                            elif fnmatch.fnmatch(f, "*" + self.species[1:] + "_" + self.sex.upper() + "_proteins.fa"):
                                logging.info("Proteins file - " + str(f))
-                                # os.symlink(os.path.join(dirpath, f), organism_annotation_dir)
-                                # organism_annotation_dir = os.path.abspath("./src_data/annotation/" +
-                                #                                           self.species_folder_name + "/OGS" +
-                                #                                           self.genome_version)
                                shutil.copyfile(os.path.join(dirpath, f), os.path.join(organism_annotation_dir, f))
                        except FileExistsError as exc:
                            logging.warning("Error raised (FileExistsError)")
@@ -184,7 +170,6 @@ class LoadData(speciesData.SpeciesData):

        os.chdir(self.main_dir)

-
    def set_get_history(self):
        """
        Create or set the working history to the current species one
@@ -206,7 +191,6 @@ class LoadData(speciesData.SpeciesData):

        return self.history_id

-
    def remove_homo_sapiens_from_db(self):
        """
        Run the GMOD tool to remove the "Homo sapiens" default organism from the original database
@@ -235,7 +219,6 @@ class LoadData(speciesData.SpeciesData):
            logging.debug("Homo sapiens isn't in the instance's chado database")
            pass

-
    def purge_histories(self):
        """
        Delete all histories in the instance
@@ -252,7 +235,6 @@ class LoadData(speciesData.SpeciesData):

        return histories

-
    def setup_library(self):
        """
        Create a "Project Data" library in galaxy, mirroring the "src_data" folder of the current organism
@@ -376,7 +358,6 @@ class LoadData(speciesData.SpeciesData):

        logging.info("Finished importing data")

-
    def create_deep_folder(self, prj_lib, path, parent_folder=None, deep_name=""):
        """
        Create a folder inside a folder in a galaxy library
@@ -403,7 +384,6 @@ class LoadData(speciesData.SpeciesData):

        return new_folder

-
    def setup_data_libraries(self):
        """
        Load data into the galaxy container with the galaxy_data_libs_SI.py script written by A. Bretaudeau
@@ -425,7 +405,6 @@ class LoadData(speciesData.SpeciesData):
        else:
            logging.info("Data successfully loaded into the galaxy container for " + self.full_name)

-
    def generate_blast_banks(self):
        """
        TODO: Automatically generate blast banks for a species and commit
@@ -433,9 +412,6 @@ class LoadData(speciesData.SpeciesData):
        :return:
        """

-
-
-
    def connect_to_instance(self):
        """
        Test the connection to the galaxy instance for the current organism
@@ -522,8 +498,7 @@ if __name__ == "__main__":
        load_data_for_current_species.config = utilities.parse_config(args.config)
        # Set the instance url attribute
        for env_variable, value in load_data_for_current_species.config.items():
-            if env_variable == "custom_host":
-                # TODO:
+            if env_variable == "hostname":
                load_data_for_current_species.instance_url = "http://{0}:8888/sp/{1}_{2}/galaxy/".format(
                    value, load_data_for_current_species.genus_lowercase, load_data_for_current_species.species)
                break
@@ -567,7 +542,8 @@ if __name__ == "__main__":
            # Set or get the history for the current organism
            load_data_for_current_species.set_get_history()
            
-            # Remove H. sapiens from database if here TODO: set a dedicated history for removing H. sapiens (instead of doing it into a species history)
+            # Remove H. sapiens from database if here
+            # TODO: set a dedicated history for removing H. sapiens (instead of doing it into a species history)
            load_data_for_current_species.remove_homo_sapiens_from_db()

            # logging.info("Importing datasets into history for %s" % load_data_for_current_species.full_name)

--- a/utilities.py
+++ b/utilities.py
@@ -88,6 +88,7 @@ def check_galaxy_state(genus_lowercase, species, script_dir):

    :param genus_lowercase:
    :param species:
+    :param script_dir:
    :return:
    """

@@ -118,6 +119,33 @@ def get_species_history_id(instance, full_name):
    return [history_id, show_history]


+def get_species_to_deploy(sp_dict_list):
+    """
+    Find and return which species (i.e genus species) are not duplicated in the input dictionary used by gga scripts
+    Returns a list of species (directories) for which to deploy the stack
+    This aims to reduce the number of deployments for a single species by removing this step from the gga_init
+    loop iterating over input species
+
+    :param sp_dict_list:
+    :return:
+    """
+    to_deploy_li = []
+
+    for sp in sp_dict_list:
+        for k, v in sp.items():
+            sp_gspecies = ""
+            for k2, v2 in v.items():
+                if k2 == "genus":
+                    sp_gspecies = sp_gspecies + v2
+                elif k2 == "species":
+                    sp_gspecies = sp_gspecies + "_" + v2
+            if sp_gspecies not in to_deploy_li and sp_gspecies != "":
+                to_deploy_li.append(sp_gspecies)
+
+    print(to_deploy_li)
+    return to_deploy_li
+
+
 def write_metadata(metadata_file, metadata_dict):
    """

@@ -129,4 +157,3 @@ def write_metadata(metadata_file, metadata_dict):
    ret = 0

    return metadata_file, metadata_dict, ret
-
--- a/workflows/Chado_load_Tripal_synchronize.ga
+++ b/workflows/Chado_load_Tripal_synchronize.ga
@@ -2,7 +2,7 @@
    "a_galaxy_workflow": "true",
    "annotation": "",
    "format-version": "0.1",
-    "name": "Chado load Tripal synchronize",
+    "name": "Chado load Tripal synchronize (imported from uploaded file)",
    "steps": {
        "0": {
            "annotation": "",
@@ -16,7 +16,7 @@
            "outputs": [],
            "position": {
                "left": 200,
-                "top": 662.7333374023438
+                "top": 227
            },
            "tool_id": null,
            "tool_state": "{\"optional\": false}",
@@ -27,7 +27,7 @@
                {
                    "label": null,
                    "output_name": "output",
-                    "uuid": "df303ec8-a34f-47ff-8e23-f6fd4f7a4a25"
+                    "uuid": "1ea45e76-73ee-4de4-9d60-7be3f3bd47f5"
                }
            ]
        },
@@ -43,7 +43,7 @@
            "outputs": [],
            "position": {
                "left": 200,
-                "top": 707.7333374023438
+                "top": 317
            },
            "tool_id": null,
            "tool_state": "{\"optional\": false}",
@@ -54,7 +54,7 @@
                {
                    "label": null,
                    "output_name": "output",
-                    "uuid": "cae20d90-587c-4b3b-b3a4-5aedb8dde3bf"
+                    "uuid": "8e968289-d670-401d-8700-82dcecc6e79e"
                }
            ]
        },
@@ -70,7 +70,7 @@
            "outputs": [],
            "position": {
                "left": 200,
-                "top": 752.7333374023438
+                "top": 407
            },
            "tool_id": null,
            "tool_state": "{\"optional\": false}",
@@ -81,7 +81,7 @@
                {
                    "label": null,
                    "output_name": "output",
-                    "uuid": "79cc48a0-de9b-45d1-8372-7dabded47796"
+                    "uuid": "06493160-17c4-4fcd-85fa-c9fd3c1ff4b5"
                }
            ]
        },
@@ -97,7 +97,7 @@
            "outputs": [],
            "position": {
                "left": 200,
-                "top": 797.7333374023438
+                "top": 497
            },
            "tool_id": null,
            "tool_state": "{\"optional\": false}",
@@ -108,7 +108,7 @@
                {
                    "label": null,
                    "output_name": "output",
-                    "uuid": "00f82694-ec6e-471c-90ab-66311651c023"
+                    "uuid": "2ea3e81b-3048-4968-90c8-03df89b01caf"
                }
            ]
        },
@@ -146,8 +146,8 @@
                }
            ],
            "position": {
-                "left": 343,
-                "top": 662.7333374023438
+                "left": 486,
+                "top": 227
            },
            "post_job_actions": {},
            "tool_id": "toolshed.g2.bx.psu.edu/repos/gga/chado_feature_load_fasta/feature_load_fasta/2.3.3",
@@ -170,63 +170,10 @@
            ]
        },
        "5": {
-            "annotation": "",
-            "content_id": "toolshed.g2.bx.psu.edu/repos/iuc/jbrowse/jbrowse/1.16.10+galaxy0",
-            "errors": null,
-            "id": 5,
-            "input_connections": {
-                "reference_genome|genome": {
-                    "id": 0,
-                    "output_name": "output"
-                },
-                "track_groups_0|data_tracks_0|data_format|annotation": {
-                    "id": 1,
-                    "output_name": "output"
-                }
-            },
-            "inputs": [
-                {
-                    "description": "runtime parameter for tool JBrowse",
-                    "name": "reference_genome"
-                }
-            ],
-            "label": null,
-            "name": "JBrowse",
-            "outputs": [
-                {
-                    "name": "output",
-                    "type": "html"
-                }
-            ],
-            "position": {
-                "left": 343,
-                "top": 748.7333374023438
-            },
-            "post_job_actions": {},
-            "tool_id": "toolshed.g2.bx.psu.edu/repos/iuc/jbrowse/jbrowse/1.16.10+galaxy0",
-            "tool_shed_repository": {
-                "changeset_revision": "8774b28235bb",
-                "name": "jbrowse",
-                "owner": "iuc",
-                "tool_shed": "toolshed.g2.bx.psu.edu"
-            },
-            "tool_state": "{\"action\": {\"action_select\": \"create\", \"__current_case__\": 0}, \"gencode\": \"1\", \"jbgen\": {\"defaultLocation\": \"\", \"trackPadding\": \"20\", \"shareLink\": \"true\", \"aboutDescription\": \"\", \"show_tracklist\": \"true\", \"show_nav\": \"true\", \"show_overview\": \"true\", \"show_menu\": \"true\", \"hideGenomeOptions\": \"false\"}, \"plugins\": {\"BlastView\": \"true\", \"ComboTrackSelector\": \"false\", \"GCContent\": \"false\"}, \"reference_genome\": {\"genome_type_select\": \"history\", \"__current_case__\": 1, \"genome\": {\"__class__\": \"RuntimeValue\"}}, \"standalone\": \"minimal\", \"track_groups\": [{\"__index__\": 0, \"category\": \"Default\", \"data_tracks\": [{\"__index__\": 0, \"data_format\": {\"data_format_select\": \"gene_calls\", \"__current_case__\": 2, \"annotation\": {\"__class__\": \"RuntimeValue\"}, \"match_part\": {\"match_part_select\": \"false\", \"__current_case__\": 1}, \"index\": \"false\", \"track_config\": {\"track_class\": \"NeatHTMLFeatures/View/Track/NeatFeatures\", \"__current_case__\": 3, \"html_options\": {\"topLevelFeatures\": \"\"}}, \"jbstyle\": {\"style_classname\": \"feature\", \"style_label\": \"product,name,id\", \"style_description\": \"note,description\", \"style_height\": \"10px\", \"max_height\": \"600\"}, \"jbcolor_scale\": {\"color_score\": {\"color_score_select\": \"none\", \"__current_case__\": 0, \"color\": {\"color_select\": \"automatic\", \"__current_case__\": 0}}}, \"jb_custom_config\": {\"option\": []}, \"jbmenu\": {\"track_menu\": [{\"__index__\": 0, \"menu_action\": \"iframeDialog\", \"menu_label\": \"View transcript report\", \"menu_title\": \"Transcript {id}\", \"menu_url\": {\"__class__\": \"RuntimeValue\"}, \"menu_icon\": \"dijitIconBookmark\"}]}, \"track_visibility\": \"default_off\", \"override_apollo_plugins\": \"False\", \"override_apollo_drag\": \"False\"}}]}], \"uglyTestingHack\": \"\", \"__page__\": null, \"__rerun_remap_job_id__\": null}",
-            "tool_version": "1.16.10+galaxy0",
-            "type": "tool",
-            "uuid": "00657cb2-12f9-4f93-98da-04feac9e1388",
-            "workflow_outputs": [
-                {
-                    "label": null,
-                    "output_name": "output",
-                    "uuid": "6cbfa232-911e-49b9-96ad-fa9ed236f806"
-                }
-            ]
-        },
-        "6": {
            "annotation": "",
            "content_id": "toolshed.g2.bx.psu.edu/repos/gga/chado_feature_load_gff/feature_load_gff/2.3.3",
            "errors": null,
-            "id": 6,
+            "id": 5,
            "input_connections": {
                "fasta": {
                    "id": 2,
@@ -246,9 +193,21 @@
                    "description": "runtime parameter for tool Chado load gff",
                    "name": "analysis_id"
                },
+                {
+                    "description": "runtime parameter for tool Chado load gff",
+                    "name": "fasta"
+                },
+                {
+                    "description": "runtime parameter for tool Chado load gff",
+                    "name": "gff"
+                },
                {
                    "description": "runtime parameter for tool Chado load gff",
                    "name": "organism"
+                },
+                {
+                    "description": "runtime parameter for tool Chado load gff",
+                    "name": "wait_for"
                }
            ],
            "label": null,
@@ -260,8 +219,8 @@
                }
            ],
            "position": {
-                "left": 486,
-                "top": 743.2333374023438
+                "left": 772,
+                "top": 227
            },
            "post_job_actions": {},
            "tool_id": "toolshed.g2.bx.psu.edu/repos/gga/chado_feature_load_gff/feature_load_gff/2.3.3",
@@ -271,7 +230,7 @@
                "owner": "gga",
                "tool_shed": "toolshed.g2.bx.psu.edu"
            },
-            "tool_state": "{\"add_only\": \"false\", \"analysis_id\": {\"__class__\": \"RuntimeValue\"}, \"fasta\": {\"__class__\": \"ConnectedValue\"}, \"gff\": {\"__class__\": \"ConnectedValue\"}, \"landmark_type\": \"contig\", \"no_seq_compute\": \"false\", \"organism\": {\"__class__\": \"RuntimeValue\"}, \"prot_naming\": {\"method\": \"regex\", \"__current_case__\": 1, \"re_protein_capture\": \"^mRNA(\\\\..+)$\", \"re_protein\": \"protein\\\\1\"}, \"psql_target\": {\"method\": \"remote\", \"__current_case__\": 0}, \"wait_for\": {\"__class__\": \"ConnectedValue\"}, \"__page__\": null, \"__rerun_remap_job_id__\": null}",
+            "tool_state": "{\"add_only\": \"false\", \"analysis_id\": {\"__class__\": \"RuntimeValue\"}, \"fasta\": {\"__class__\": \"RuntimeValue\"}, \"gff\": {\"__class__\": \"RuntimeValue\"}, \"landmark_type\": \"contig\", \"no_seq_compute\": \"false\", \"organism\": {\"__class__\": \"RuntimeValue\"}, \"prot_naming\": {\"method\": \"regex\", \"__current_case__\": 1, \"re_protein_capture\": \"^mRNA(\\\\..+)$\", \"re_protein\": \"protein\\\\1\"}, \"psql_target\": {\"method\": \"remote\", \"__current_case__\": 0}, \"wait_for\": {\"__class__\": \"RuntimeValue\"}, \"__page__\": null, \"__rerun_remap_job_id__\": null}",
            "tool_version": "2.3.3",
            "type": "tool",
            "uuid": "b100a055-0dab-4f2f-8c46-573713ed3fff",
@@ -283,58 +242,14 @@
                }
            ]
        },
-        "7": {
-            "annotation": "",
-            "content_id": "toolshed.g2.bx.psu.edu/repos/gga/jbrowse_to_container/jbrowse_to_container/0.5.1",
-            "errors": null,
-            "id": 7,
-            "input_connections": {
-                "organisms_0|jbrowse": {
-                    "id": 5,
-                    "output_name": "output"
-                }
-            },
-            "inputs": [],
-            "label": null,
-            "name": "Add organisms to JBrowse container",
-            "outputs": [
-                {
-                    "name": "output",
-                    "type": "html"
-                }
-            ],
-            "position": {
-                "left": 486,
-                "top": 662.7333374023438
-            },
-            "post_job_actions": {},
-            "tool_id": "toolshed.g2.bx.psu.edu/repos/gga/jbrowse_to_container/jbrowse_to_container/0.5.1",
-            "tool_shed_repository": {
-                "changeset_revision": "11033bdad2ca",
-                "name": "jbrowse_to_container",
-                "owner": "gga",
-                "tool_shed": "toolshed.g2.bx.psu.edu"
-            },
-            "tool_state": "{\"organisms\": [{\"__index__\": 0, \"jbrowse\": {\"__class__\": \"RuntimeValue\"}, \"name\": {\"__class__\": \"RuntimeValue\"}, \"advanced\": {\"unique_id\": {\"__class__\": \"RuntimeValue\"}}}], \"__page__\": null, \"__rerun_remap_job_id__\": null}",
-            "tool_version": "0.5.1",
-            "type": "tool",
-            "uuid": "7b7cca87-4000-45de-93a5-bd22cd661d0a",
-            "workflow_outputs": [
-                {
-                    "label": null,
-                    "output_name": "output",
-                    "uuid": "8c23d473-4ffa-4a66-b071-aeecc105a529"
-                }
-            ]
-        },
-        "8": {
+        "6": {
            "annotation": "",
            "content_id": "toolshed.g2.bx.psu.edu/repos/gga/tripal_organism_sync/organism_sync/3.2.1.0",
            "errors": null,
-            "id": 8,
+            "id": 6,
            "input_connections": {
                "wait_for": {
-                    "id": 6,
+                    "id": 5,
                    "output_name": "results"
                }
            },
@@ -353,8 +268,8 @@
                }
            ],
            "position": {
-                "left": 629,
-                "top": 662.7333374023438
+                "left": 1058,
+                "top": 227
            },
            "post_job_actions": {},
            "tool_id": "toolshed.g2.bx.psu.edu/repos/gga/tripal_organism_sync/organism_sync/3.2.1.0",
@@ -376,14 +291,14 @@
                }
            ]
        },
-        "9": {
+        "7": {
            "annotation": "",
            "content_id": "toolshed.g2.bx.psu.edu/repos/gga/tripal_analysis_sync/analysis_sync/3.2.1.0",
            "errors": null,
-            "id": 9,
+            "id": 7,
            "input_connections": {
                "wait_for": {
-                    "id": 8,
+                    "id": 6,
                    "output_name": "results"
                }
            },
@@ -402,8 +317,8 @@
                }
            ],
            "position": {
-                "left": 772,
-                "top": 662.7333374023438
+                "left": 1344,
+                "top": 227
            },
            "post_job_actions": {},
            "tool_id": "toolshed.g2.bx.psu.edu/repos/gga/tripal_analysis_sync/analysis_sync/3.2.1.0",
@@ -425,14 +340,14 @@
                }
            ]
        },
-        "10": {
+        "8": {
            "annotation": "",
            "content_id": "toolshed.g2.bx.psu.edu/repos/gga/tripal_analysis_sync/analysis_sync/3.2.1.0",
            "errors": null,
-            "id": 10,
+            "id": 8,
            "input_connections": {
                "wait_for": {
-                    "id": 9,
+                    "id": 7,
                    "output_name": "results"
                }
            },
@@ -451,8 +366,8 @@
                }
            ],
            "position": {
-                "left": 915,
-                "top": 662.7333374023438
+                "left": 1630,
+                "top": 227
            },
            "post_job_actions": {},
            "tool_id": "toolshed.g2.bx.psu.edu/repos/gga/tripal_analysis_sync/analysis_sync/3.2.1.0",
@@ -474,14 +389,14 @@
                }
            ]
        },
-        "11": {
+        "9": {
            "annotation": "",
            "content_id": "toolshed.g2.bx.psu.edu/repos/gga/tripal_feature_sync/feature_sync/3.2.1.0",
            "errors": null,
-            "id": 11,
+            "id": 9,
            "input_connections": {
                "wait_for": {
-                    "id": 10,
+                    "id": 8,
                    "output_name": "results"
                }
            },
@@ -500,8 +415,8 @@
                }
            ],
            "position": {
-                "left": 1058,
-                "top": 662.7333374023438
+                "left": 1916,
+                "top": 227
            },
            "post_job_actions": {},
            "tool_id": "toolshed.g2.bx.psu.edu/repos/gga/tripal_feature_sync/feature_sync/3.2.1.0",
@@ -525,6 +440,6 @@
        }
    },
    "tags": [],
-    "uuid": "69699b44-94c8-4cc7-977e-74f266e58fdf",
-    "version": 3
+    "uuid": "13fe9125-bf94-4955-9305-908076475b70",
+    "version": 1
 }
\ No newline at end of file