diff --git a/.gitignore b/.gitignore
index 07e2dd14ac2543f1386b88911f859a1ded6a9954..17e708b3152da298ad47c73921399accf781c0b9 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,7 +1,107 @@
-__pycache__
-.idea
-phaeoexplorer_test.json
-example.json
-example.xlsx
-*.bak
-undaria_pinnatifida
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+
+# C extensions
+*.so
+
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+.hypothesis/
+.pytest_cache/
+
+# Translations
+*.mo
+*.pot
+
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+
+# Flask stuff:
+instance/
+.webassets-cache
+
+# Scrapy stuff:
+.scrapy
+
+# Sphinx documentation
+docs/_build/
+
+# PyBuilder
+target/
+
+# Jupyter Notebook
+.ipynb_checkpoints
+
+# pyenv
+.python-version
+
+# celery beat schedule file
+celerybeat-schedule
+
+# SageMath parsed files
+*.sage.py
+
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+
+# Spyder project settings
+.spyderproject
+.spyproject
+
+# Rope project settings
+.ropeproject
+
+# mkdocs documentation
+/site
+
+# mypy
+.mypy_cache/
+
+# IDE stuff
+.idea
\ No newline at end of file
diff --git a/__pycache__/docker_compose_generator.cpython-36.pyc b/__pycache__/docker_compose_generator.cpython-36.pyc
deleted file mode 100644
index f0bfefaa1f33103cb7eb2bf92aff23cbcba9ed3b..0000000000000000000000000000000000000000
Binary files a/__pycache__/docker_compose_generator.cpython-36.pyc and /dev/null differ
diff --git a/__pycache__/docker_compose_generator.cpython-38.pyc b/__pycache__/docker_compose_generator.cpython-38.pyc
deleted file mode 100644
index 1201041cefa429a4343e2513ccd1c57b142f50b8..0000000000000000000000000000000000000000
Binary files a/__pycache__/docker_compose_generator.cpython-38.pyc and /dev/null differ
diff --git a/__pycache__/metadata_generator.cpython-36.pyc b/__pycache__/metadata_generator.cpython-36.pyc
deleted file mode 100644
index 19eb173b57afdd0bdc0683e4ed77949e298cd055..0000000000000000000000000000000000000000
Binary files a/__pycache__/metadata_generator.cpython-36.pyc and /dev/null differ
diff --git a/__pycache__/metadata_generator.cpython-38.pyc b/__pycache__/metadata_generator.cpython-38.pyc
deleted file mode 100644
index eed2e9474897c4805dc636277bafade56ab8a337..0000000000000000000000000000000000000000
Binary files a/__pycache__/metadata_generator.cpython-38.pyc and /dev/null differ
diff --git a/__pycache__/table_parser.cpython-36.pyc b/__pycache__/table_parser.cpython-36.pyc
deleted file mode 100644
index 7272a5afeb889befe0494f81c89160a22c94c6fb..0000000000000000000000000000000000000000
Binary files a/__pycache__/table_parser.cpython-36.pyc and /dev/null differ
diff --git a/__pycache__/table_parser.cpython-38.pyc b/__pycache__/table_parser.cpython-38.pyc
deleted file mode 100644
index 57f034e254bb97b21195f7f449cc828c52038074..0000000000000000000000000000000000000000
Binary files a/__pycache__/table_parser.cpython-38.pyc and /dev/null differ
diff --git a/create_input_instance.py b/create_input_instance.py
index 9ef50aaff9db6a5111f59dceaae63bf24616f220..d01dd88084e6cdaa3fdeaeb3080e43b6d353f128 100644
--- a/create_input_instance.py
+++ b/create_input_instance.py
@@ -13,9 +13,8 @@ from datetime import datetime
 """
 create_input_instance.py
 
-Create an object containing the data input from the yml file as attributes
-This object is then fed to the other scripts
-It is to avoid having several times the same code in several files
+Create an object containing the data input from the yml file as attributes, which is then fed to the other scripts
+This object is created using the data in the input yml file
 
 """
 
@@ -23,7 +22,7 @@ It is to avoid having several times the same code in several files
 def parse_input(input_file):
     """
     Parse the yml input file to extract data to create the SpeciesData objects
-    Return a list of dictionaries. Each dictionary contains all the data
+    Return a list of dictionaries. Each dictionary contains data tied to a species
 
     :param input_file:
     :return:
@@ -40,24 +39,58 @@ def parse_input(input_file):
         try:
             yaml_dict = yaml.safe_load(stream)
             for k, v in yaml_dict.items():
+                if k == "config":
+                    pass
                 parsed_sp_dict_list.append(v)
-        except yaml.YAMLError as exc:
-            logging.debug(exc)
+        except yaml.YAMLError:
+            logging.critical("YAMLError raised")
+            sys.exit()
     return parsed_sp_dict_list
 
 
+def parse_args():
+    parser = argparse.ArgumentParser(description="Automatic data loading in containers and interaction "
+                                                 "with galaxy instances for GGA"
+                                                 ", following the protocol @ "
+                                                 "http://gitlab.sb-roscoff.fr/abims/e-infra/gga")
+
+    parser.add_argument("-i", "--input",
+                        help="Input file (yml)")
+
+    parser.add_argument("-v", "--verbose",
+                        help="Increase output verbosity",
+                        action="store_false")
+
+    parser.add_argument("--deploy-stacks",
+                        help="Create and deploy the stacks of services",
+                        action="store_true")
+
+    parser.add_argument("--load-data",
+                        help="Create src_data directory tree, copy datasets to src_data, and load these datasets "
+                             "into the instance, DEV",
+                        action="store_true")
+
+    parser.add_argument("--run-workflow",
+                        help="Run main workflow (load data into chado, sync all with tripal, "
+                             "index tripal data, populate materialized view, "
+                             "create a jbrowse for the current genus_species_strain_sex and add organism to jbrowse")
+
+    args = parser.parse_args()
+
+    return args
+
 
 
 class SpeciesData:
     """
     This class contains attributes and functions to interact with the galaxy container of the GGA environment
-
+    Parent class of LoadData, DeploySpeciesStack and RunWorkflow
 
     """
 
-    def __init__(self, parameters_dictionary, args):
+    def __init__(self, parameters_dictionary):
         self.parameters_dictionary = parameters_dictionary
-        self.args = args
+        self.args = parse_args()  # Not a good design
         self.species = parameters_dictionary["description"]["species"]
         self.genus = parameters_dictionary["description"]["genus"]
         self.strain = parameters_dictionary["description"]["strain"]
@@ -105,57 +138,8 @@ class SpeciesData:
         self.do_update = False  # Update the instance (in histories corresponding to the input) instead of creating a new one // TODO: move this variable inside methods
         self.api_key = "dev"  # API key used to communicate with the galaxy instance. Set to "dev" for the moment. Cannot be used to do user-tied actions
         self.args = args
-
-if __name__ == "__main__":
-    parser = argparse.ArgumentParser(description="Automatic data loading in containers and interaction with galaxy instances for GGA"
-                                                 ", following the protocol @ "
-                                                 "http://gitlab.sb-roscoff.fr/abims/e-infra/gga")
-
-    # Dev arguments, TODO: remove in production branch!
-    parser.add_argument("--full",
-                        help="Run everything, from src_data dir tree creation, moving data files (abims) into src_data,"
-                             "modify headers (abims), generate blast banks (doesn't commit them: TODO), initialize GGA instance, load the data and run,"
-                             " the main workflow. To update/add data to container, use --update in conjunction to --full (TODO)")
-
-    parser.add_argument("--init-instance",
-                        help="Initialization of galaxy instance. Run first in an empty instance, DEV",
-                        action="store_true")
-
-    parser.add_argument("--deploy-stacks",
-                        help="Create and deploy the stacks of services",
-                        action="store_true")
-
-    parser.add_argument("--load-data",
-                        help="Create src_data directory tree, copy datasets to src_data, and load these datasets into the instance, DEV",
-                        action="store_true")
-
-    parser.add_argument("--run-workflow",
-                        help="Run main workflow (load data into chado, sync all with tripal, "
-                             "index tripal data, populate materialized view, "
-                             "create a jbrowse for the current genus_species_strain_sex and add organism to jbrowse")
-
-
-    # Production arguments
-    parser.add_argument("input", type=str, help="Input file (yml)")
-
-    parser.add_argument("-v", "--verbose",
-                        help="Increase output verbosity",
-                        action="store_false")
-
-    parser.add_argument("--update",
-                        help="Update an already integrated organisms with new data from input file, docker-compose.yml will not be re-generated"
-                             ", assuming the instances for the organisms are already generated and initialized",
-                        action="store_false")
-
-    parser.add_argument("--dir",
-                        help="Path of the main directory, either absolute or relative, defaults to current directory",
-                        default=os.getcwd())
-
-    args = parser.parse_args()
-
-    if args.verbose:
-        logging.basicConfig(level=logging.DEBUG)
-    else:
-        logging.basicConfig(level=logging.INFO)
-
+        if self.args.verbose:
+            logging.basicConfig(level=logging.DEBUG)
+        else:
+            logging.basicConfig(level=logging.INFO)
 
diff --git a/deploy_stacks.py b/deploy_stacks.py
index 73b766e502cd646361b7bacd1b1d6d281e0fccba..45a89d013f02a2a7d800ea96c02bd4b96eb3779a 100755
--- a/deploy_stacks.py
+++ b/deploy_stacks.py
@@ -1,64 +1,31 @@
 #!/usr/bin/python
 # -*- coding: utf-8 -*-
 
-import bioblend
-import bioblend.galaxy.objects
-from bioblend import galaxy
+
 import argparse
 import os
 import subprocess
 import logging
 import sys
-import json
 import yaml
 import re
-import metadata_generator
-import docker_compose_generator
-import table_parser
+from gga_autoload.gga_load_data import table_parser
 import fnmatch
 import shutil
 from datetime import datetime
-import create_input_instance
+
 
 """ 
 deploy_stacks.py
 
-
-
-TODO:
-- add config file (inside repo or outside with argument
-- update existing history
-- clean/delete instance?
-- delete stack?
-- commit the files for blast banks.
-
-TODO EOSC/Cloudification:
-- divide into 2 general-use scripts
-    - create docker swarm, stacks, etc... (docker side)
-    - load data into libraries (method to load it at init, and a method/script to load it separately (galaxy side) (alb: galaxy_data_libs_SI does this already?)
-
-STEPS:
-- read input (yml, maybe xlsx later)
-- create dir_tree -- DONE
-- find and copy data -- DONE
-- change file headers, etc.. (ext scripts for data manipulation) -- IN PROGRESS
-- generate blast banks and links -- NOT DONE
-- generate and edit nginx confs -- DONE
-- generate dc and start the containers -- IN PROGRESS
-- connect to instance and launch tools>workflows -- IN PROGRESS
-- generate and update metadata -- IN PROGRESS
-
-
-NOTES:
-- A master API key cannot be used, as some functions are tied to a user (like creating an history), so the access to the
-  galaxy instance must be done using email and password (definable in yml_example_input.yml)
-
+Usage: $ python3 deploy_stacks.py -i example.yml [OPTIONS]
 """
 
 
 def parse_input(input_file):
     """
-    Parse the yml, json or tabulated input in order to set attributes for the Autoload class
+    Parse the yml input file to extract data to create the SpeciesData objects
+    Return a list of dictionaries. Each dictionary contains data tied to a species
 
     :param input_file:
     :return:
@@ -75,24 +42,24 @@ def parse_input(input_file):
         try:
             yaml_dict = yaml.safe_load(stream)
             for k, v in yaml_dict.items():
+                if k == "config":
+                    pass
                 parsed_sp_dict_list.append(v)
-        except yaml.YAMLError as exc:
-            logging.debug(exc)
+        except yaml.YAMLError as exit_code:
+            logging.critical(exit_code + " (YAML input file might be incorrect)")
+            sys.exit()
     return parsed_sp_dict_list
 
 
-
-
-class DeploySpeciesStacks:
+class DeploySpeciesStack:
     """
-    The class DeploySpeciesStacks
+    Deploy a stack of services for a given species
 
 
     """
 
-    def __init__(self, parameters_dictionary, args):
+    def __init__(self, parameters_dictionary):
         self.parameters_dictionary = parameters_dictionary
-        self.args = args
         self.species = parameters_dictionary["description"]["species"]
         self.genus = parameters_dictionary["description"]["genus"]
         self.strain = parameters_dictionary["description"]["strain"]
@@ -115,7 +82,8 @@ class DeploySpeciesStacks:
         self.full_name = " ".join([self.genus_uppercase, self.species, self.strain, self.sex])
         self.abbreviation = " ".join([self.genus_lowercase[0], self.species, self.strain, self.sex])
         self.genus_species = self.genus_lowercase + "_" + self.species
-        self.instance_url = "http://scratchgmodv1:8888/sp/" + self.genus_lowercase + "_" + self.species + "/galaxy/"  # Testing with localhost/scratchgmodv1
+        self.instance_url = "http://scratchgmodv1:8888/sp/" + self.genus_lowercase + "_" + self.species + "/galaxy/"
+        # Testing with localhost/scratchgmodv1
         self.instance = None
         self.history_id = None
         self.library_id = None
@@ -129,59 +97,24 @@ class DeploySpeciesStacks:
         self.datasets = dict()
         self.source_files = dict()
         self.workflow_name = None
-        self.docker_compose_generator = None
         self.metadata = dict()
-        self.api_key = "dev"  # TODO: set the key in config file --> saved for later (master api key access actions are limited)
+        self.api_key = "master"  # TODO: set the key in config file --> saved for later (master api key access actions are limited)
         if parameters_dictionary["data"]["parent_directory"] == "" or parameters_dictionary["data"]["parent_directory"] == "/path/to/closest/parent/dir":
             self.source_data_dir = "/projet/sbr/phaeoexplorer/"  # Testing path for phaeoexplorer data
         else:
             self.source_data_dir = parameters_dictionary["data"]["parent_directory"]
-        # Directory/subdirectories where data files are located (fasta, gff, ...), point to a directory as close as possible to the source files
+        # Directory/subdirectories where data files are located (fasta, gff, ...)
         self.do_update = False
-        # Update the instance (in histories corresponding to the input) instead of creating a new one // TODO: move this variable inside methods
-        self.api_key = "dev"
-        # API key used to communicate with the galaxy instance. Set to "dev" for the moment // TODO: find a way to create, store then use the api key safely
-
-
-    # def get_source_data(self, max_depth):
-    #     """
-    #     TODO: saved for later just in case
-    #
-    #     Find and copy source data files to src_data directory tree
-    #     - recursively search for the correct files (within a fixed max depth)
-    #     - requires the organism src_data directory tree to already be properly created for the organism (run generate_dir_tree)
-    #     - the source files must have "transcripts", "proteins"/"pep", "genome" in their name, and a gff extension
-    #
-    #     """
-    #     src_data_dir = os.path.join(self.species_dir, "/src_data")
-    #     sp_regex = "(?=\w*V)(?=\w*A)(?=\w*R)(?=\w*I)(?=\w*A)(?=\w*B)(?=\w*L)(?=\w*E)\w+"  # example with VARIABLE
-    #
-    #     # The regex works using the species attribute (unique) --> regex is probably not necessary
-    #     sp_regex = ""
-    #     for i in self.species:
-    #         sp_regex = sp_regex + "?=\w*" + i + ")"
-    #     sp_regex = sp_regex + ")\w+"
-    #     re_dict = dict()
-    #     re_dict["gff"] = None
-    #     re_dict["transcripts"] = None
-    #     re_dict["proteins"] = None
-    #     re_dict["genome"] = None
-    #     reg = None
-    #
-    #     for dirpath, dirnames, files in os.walk(self.source_data_dir):
-    #         for f in files:
-    #             if self.species and self.sex in f:
-    #                 logging.info("File found")
-
-
-
-
-    def generate_dir_tree(self):
+        # Update the instance (in histories corresponding to the input) instead of creating a new one
+        self.api_key = "master"
+        # API key used to communicate with the galaxy instance. Cannot be used to do user-tied actions
+        self.species_name_regex_litteral = "(?=\w*V)(?=\w*A)(?=\w*R)(?=\w*I)(?=\w*A)(?=\w*B)(?=\w*L)(?=\w*E)\w+"  # Placeholder re
+
+
+    def make_directory_tree(self):
         """
         Generate the directory tree for an organism and move datasets into src_data
 
-        TODO: DOCKER -- this is the one the "docker" parts of the script
-
         :return:
         """
 
@@ -213,14 +146,37 @@ class DeploySpeciesStacks:
         # self.species_folder_name = "_".join([self.genus_lowercase, self.species, self.strain, self.sex])
         organism_annotation_dir, organism_genome_dir = None, None
 
-        # Create src_data dir tree
+        # Creation (or updating) of the src_data directory tree
+        # Depth 0-1
         try:
             os.mkdir("./src_data")
             os.mkdir("./src_data/annotation")
             os.mkdir("./src_data/genome")
             os.mkdir("./src_data/tracks")
+        except FileExistsError:
+            if self.do_update:
+                logging.info("Updating src_data directory tree")
+            else:
+                logging.debug("The src_data directory tree already exists")
+        except PermissionError:
+            logging.critical("Insufficient permission to create src_data directory tree")
+            sys.exit()
+
+        # Depth 2
+        try:
             os.mkdir("./src_data/annotation/" + self.species_folder_name)
             os.mkdir("./src_data/genome/" + self.species_folder_name)
+        except FileExistsError:
+            if self.do_update:
+                logging.info("Updating src_data directory tree")
+            else:
+                logging.debug("The src_data directory tree already exists")
+        except PermissionError:
+            logging.critical("Insufficient permission to create src_data directory tree")
+            sys.exit()
+
+        # Depth 3
+        try:
             os.mkdir("./src_data/annotation/" + self.species_folder_name + "/OGS" + self.ogs_version)
             os.mkdir("./src_data/genome/" + self.species_folder_name + "/v" + self.genome_version)
             organism_annotation_dir = os.path.abspath("./src_data/annotation/" + self.species_folder_name + "/OGS" + self.genome_version)
@@ -234,6 +190,12 @@ class DeploySpeciesStacks:
             logging.critical("Insufficient permission to create src_data directory tree")
             sys.exit()
 
+
+    def make_compose_files(self):
+        """
+
+        :return:
+        """
         # Path to the templates used to generate the custom docker-compose files for an input species
         stack_template_path = self.script_dir + "/templates/stack-organism.yml"
         traefik_template_path = self.script_dir + "/templates/traefik.yml"
@@ -248,20 +210,27 @@ class DeploySpeciesStacks:
         with open(stack_template_path, 'r') as infile:
             organism_content = list()
             for line in infile:
-                # One-liner to replace placeholders by the genus and species
+                # Replace placeholders in the compose file
                 organism_content.append(
-                    line.replace("genus_species", str(self.genus.lower() + "_" + self.species)).replace("Genus species", str(self.genus_uppercase + " " + self.species)).replace("Genus/species", str(self.genus_uppercase + "/" + self.species)).replace("gspecies", str( self.genus.lower()[0] + self.species)).replace("genus_species_strain_sex", genus_species_strain_sex))
+                    line.replace("genus_species", str(self.genus.lower() + "_" + self.species)).replace("Genus species",
+                                                                                                        str(
+                                                                                                            self.genus_uppercase + " " + self.species)).replace(
+                        "Genus/species", str(self.genus_uppercase + "/" + self.species)).replace("gspecies", str(
+                        self.genus.lower()[0] + self.species)).replace("genus_species_strain_sex",
+                                                                       genus_species_strain_sex))
             with open("./docker-compose.yml", 'w') as outfile:
                 for line in organism_content:
                     outfile.write(line)
-            subprocess.call(["python3", self.script_dir + "/create_mounts.py"], cwd=working_dir, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)  # Create mounts for the containers
+            subprocess.call(["python3", self.script_dir + "/create_mounts.py"], cwd=working_dir,
+                            stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)  # Create mounts for the containers
 
         try:
             os.mkdir("../traefik")
             os.mkdir("../traefik/authelia")
             shutil.copy(authelia_config_path, "../traefik/authelia/configuration.yml")
             shutil.copy(authelia_users_path, "../traefik/authelia/users.yml")  # TODO: custom users (add a config file?)
-            subprocess.call(["python3", self.script_dir + "/create_mounts.py"], cwd=working_dir, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)  # Create mounts for the containers
+            subprocess.call(["python3", self.script_dir + "/create_mounts.py"], cwd=working_dir,
+                            stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)  # Create mounts for the containers
         except FileExistsError:
             logging.debug("Traefik directory already exists")
         try:
@@ -271,11 +240,9 @@ class DeploySpeciesStacks:
         subprocess.call(["python3", self.script_dir + "/create_mounts.py"], cwd=working_dir)
 
 
-
     def get_source_data_files_from_path(self):
         """
-        Find all files in source_data directory, to link the matching files in the src_data dir tree
-
+        Link data files
         :return:
         """
 
@@ -290,7 +257,7 @@ class DeploySpeciesStacks:
         organism_genome_dir = os.path.abspath("./src_data/genome/" + self.species_folder_name + "/v" + self.genome_version)
 
         for dirpath, dirnames, files in os.walk(self.source_data_dir):
-            if "0" in str(dirpath):  # Ensures to take the correct files (other dirs hold files with the correct names, but I don't know if they are the same) #alb
+            if "0" in str(dirpath):  # Ensures to take the correct files (other dirs hold files with the correct names, but I don't know if they are the same), this is for Phaeoexplorer only
                 for f in files:
                     if "Contaminants" not in str(f):
                         try:
@@ -322,7 +289,6 @@ class DeploySpeciesStacks:
                             logging.warning("Error raised (NotADirectoryError)")
 
 
-
     def deploy_stack(self):
         """
         Call the script "deploy.sh" used to initiliaze the swarm cluster if needed and launch/update the stack
@@ -330,458 +296,10 @@ class DeploySpeciesStacks:
         :return:
         """
         # Launch and update docker stacks (cf docs)
-        # TODO: add a fail condition?
         subprocess.call(["sh", self.script_dir + "/deploy.sh", self.genus_species, self.main_dir + "/traefik"])
 
 
 
-
-
-    def modify_fasta_headers(self):
-        """
-        Change the fasta headers before integration.
-
-        :return:
-        """
-
-        try:
-            os.chdir(self.species_dir)
-            working_dir = os.getcwd()
-        except OSError:
-            logging.info("Cannot access " + self.species_dir + ", run with higher privileges")
-            logging.info("Fatal error: exit")
-            sys.exit()
-        self.source_files = dict()
-        annotation_dir, genome_dir = None, None
-        for d in [i[0] for i in os.walk(os.getcwd() + "/src_data")]:
-            if "annotation/" in d:
-                annotation_dir = d
-                for f in os.listdir(d):
-                    if f.endswith("proteins.fasta"):
-                        self.source_files["proteins_file"] = os.path.join(d, f)
-                    elif f.endswith("transcripts-gff.fa"):
-                        self.source_files["transcripts_file"] = os.path.join(d, f)
-                    elif f.endswith(".gff"):
-                        self.source_files["gff_file"] = os.path.join(d, f)
-            elif "genome/" in d:
-                genome_dir = d
-                for f in os.listdir(d):
-                    if f.endswith(".fa"):
-                        self.source_files["genome_file"] = os.path.join(d, f)
-                logging.debug("source files found:")
-        for k, v in self.source_files.items():
-            logging.debug("\t" + k + "\t" + v)
-
-        # Changing headers in the *proteins.fasta file from >mRNA* to >protein*
-        # production version
-        modify_pep_headers = [str(self.main_dir) + "/gga_load_data/ext_scripts/phaeoexplorer-change_pep_fasta_header.sh",
-                              self.source_files["proteins_file"]]
-        # test version
-        # modify_pep_headers = ["/home/alebars/gga/phaeoexplorer-change_pep_fasta_header.sh",
-                              # self.source_files["proteins_file"]]
-        logging.info("Changing fasta headers: " + self.source_files["proteins_file"])
-        subprocess.run(modify_pep_headers, stdout=subprocess.PIPE, cwd=annotation_dir)
-        # production version
-        modify_pep_headers = [str(self.main_dir) + "/gga_load_data/ext_scripts/phaeoexplorer-change_transcript_fasta_header.sh",
-                              self.source_files["proteins_file"]]
-        # test version
-        # modify_pep_headers = ["/home/alebars/gga/phaeoexplorer-change_transcript_fasta_header.sh",
-        #                       self.source_files["proteins_file"]]
-        logging.info("Changing fasta headers: " + self.source_files["transcripts_file"])
-        subprocess.run(modify_pep_headers, stdout=subprocess.PIPE, cwd=annotation_dir)
-
-        # src_data cleaning
-        if os.path.exists(annotation_dir + "outfile"):
-            subprocess.run(["mv", annotation_dir + "/outfile", self.source_files["proteins_file"]],
-                           stdout=subprocess.PIPE,
-                           cwd=annotation_dir)
-        if os.path.exists(annotation_dir + "gmon.out"):
-            subprocess.run(["rm", annotation_dir + "/gmon.out"],
-                           stdout=subprocess.PIPE,
-                           cwd=annotation_dir)
-
-
-
-
-    def generate_blast_banks(self):
-        """
-        TODO
-        Automatically generate blast banks for a species
-        TODO: auto commit the files?
-
-        :return:
-        """
-
-
-    def connect_to_instance(self):
-        """
-        TODO: move in init/access
-        TODO: password
-        Test the connection to the galaxy instance for the current organism
-        Exit if it cannot connect to the instance
-        """
-        self.instance = galaxy.GalaxyInstance(url=self.instance_url, email="gga@sb-roscoff.fr", password="password", verify=False)
-        logging.info("Connecting to the galaxy instance ...")
-        try:
-            self.instance.histories.get_histories()
-            self.tool_panel = self.instance.tools.get_tool_panel()
-        except bioblend.ConnectionError:
-            logging.critical("Cannot connect to galaxy instance @ " + self.instance_url)
-            sys.exit()
-        else:
-            logging.info("Successfully connected to galaxy instance @ " + self.instance_url)
-        self.instance.histories.create_history(name="FOO")
-
-
-
-
-
-
-    def setup_data_libraries(self):
-        """
-        - generate blast banks and docker-compose
-        - load data into the galaxy container with the galaxy_data_libs_SI.py script
-
-        :return:
-        """
-
-        try:
-            logging.info("Loading data into the galaxy container")
-            subprocess.run("../serexec genus_species_galaxy /tool_deps/_conda/bin/python /opt/setup_data_libraries.py",
-                           shell=True)
-        except subprocess.CalledProcessError:
-            logging.info("Cannot load data into the galaxy container for " + self.full_name)
-            pass
-        else:
-            logging.info("Data successfully loaded into the galaxy container for " + self.full_name)
-
-        self.get_species_history_id()
-        # self.get_instance_attributes()
-        #
-        # # import all datasets into current history
-        # self.instance.histories.upload_dataset_from_library(history_id=self.history_id, lib_dataset_id=self.datasets["genome_file"])
-        # self.instance.histories.upload_dataset_from_library(history_id=self.history_id, lib_dataset_id=self.datasets["gff_file"])
-        # self.instance.histories.upload_dataset_from_library(history_id=self.history_id, lib_dataset_id=self.datasets["transcripts_file"])
-        # self.instance.histories.upload_dataset_from_library(history_id=self.history_id, lib_dataset_id=self.datasets["proteins_file"])
-
-
-
-
-
-    def get_species_history_id(self):
-        """
-        Set and return the current species history id in its galaxy instance
-
-        :return:
-        """
-        histories = self.instance.histories.get_histories(name=str(self.full_name))
-        self.history_id = histories[0]["id"]
-        self.instance.histories.show_history(history_id=self.history_id)
-
-        return self.history_id
-
-
-
-
-    def create_species_history(self):
-        histories = self.instance.histories.get_histories(name=str(self.full_name))
-        print("\n" + str(histories) + "\n" + self.full_name + "\n")
-        if not histories:
-            self.instance.histories.create_history(name="FOO")
-            print("Created history!")
-
-
-
-
-
-    def get_instance_attributes(self):
-        """
-        retrieves instance attributes:
-        - working history ID
-        - libraries ID (there should only be one library!)
-        - datasets IDs
-
-        :return:
-        """
-        histories = self.instance.histories.get_histories(name=str(self.full_name))
-        self.history_id = histories[0]["id"]
-        logging.debug("history ID: " + self.history_id)
-        libraries = self.instance.libraries.get_libraries()  # normally only one library
-        self.library_id = self.instance.libraries.get_libraries()[0]["id"]  # project data folder/library
-        logging.debug("library ID: " + self.history_id)
-        instance_source_data_folders = self.instance.libraries.get_folders(library_id=self.library_id)
-
-        folders_ids = {}
-        current_folder_name = ""
-        for i in instance_source_data_folders:
-            for k, v in i.items():
-                if k == "name":
-                    folders_ids[v] = 0
-                    current_folder_name = v
-                if k == "id":
-                    folders_ids[current_folder_name] = v
-        logging.info("Folders and datasets IDs: ")
-        self.datasets = dict()
-        for k, v in folders_ids.items():
-            logging.info("\t" + k + ": " + v)
-            if k == "/genome":
-                sub_folder_content = self.instance.folders.show_folder(folder_id=v, contents=True)
-                for k2, v2 in sub_folder_content.items():
-                    for e in v2:
-                        if type(e) == dict:
-                            if e["name"].endswith(".fa"):
-                                self.datasets["genome_file"] = e["ldda_id"]
-                                logging.info("\t\t" + e["name"] + ": " + e["ldda_id"])
-            elif k == "/annotation/" + self.genus_species:
-                sub_folder_content = self.instance.folders.show_folder(folder_id=v, contents=True)
-                for k2, v2 in sub_folder_content.items():
-                    for e in v2:
-                        if type(e) == dict:
-                            # TODO: manage several files of the same type and manage versions
-                            if e["name"].endswith("transcripts-gff.fa"):
-                                self.datasets["transcripts_file"] = e["ldda_id"]
-                                logging.info("\t\t" + e["name"] + ": " + e["ldda_id"])
-                            elif e["name"].endswith("proteins.fasta"):
-                                self.datasets["proteins_file"] = e["ldda_id"]
-                                logging.info("\t\t" + e["name"] + ": " + e["ldda_id"])
-                            elif e["name"].endswith(".gff"):
-                                self.datasets["gff_file"] = e["ldda_id"]
-                                logging.info("\t\t" + e["name"] + ": " + e["ldda_id"])
-                            elif e["name"].endswith("MALE"):
-                                self.datasets["gff_file"] = e["ldda_id"]
-                                logging.info("\t\t" + e["name"] + ": " + e["ldda_id"])
-
-
-
-
-
-
-    def init_instance(self):
-        """
-        Galaxy instance startup in preparation for running workflows
-        - remove Homo sapiens from the chado database.
-        - add organism and analyses into the chado database --> separate
-        - get any other existing organisms IDs before updating the galaxy instance --> separate
-
-        TODO: move the library and analysis/data stuff to a separate function
-        :return:
-        """
-
-        self.connect_to_instance()
-        self.get_species_history_id()
-        histories = self.instance.histories.get_histories(name=str(self.full_name))
-        # Create the first history
-        if not histories:
-            self.instance.histories.create_history(name=str(self.full_name))
-        self.history_id = histories[0]["id"]
-        logging.debug("history ID: " + self.history_id)
-        # libraries = self.instance.libraries.get_libraries()  # routine check: one library
-        # self.library_id = self.instance.libraries.get_libraries()[0]["id"]  # project data folder/library
-        logging.debug("library ID: " + self.history_id)
-        instance_source_data_folders = self.instance.libraries.get_folders(library_id=self.library_id)
-
-        # Delete Homo sapiens from Chado database
-        logging.debug("Getting 'Homo sapiens' ID in instance's chado database")
-        get_sapiens_id_job = self.instance.tools.run_tool(
-            tool_id="toolshed.g2.bx.psu.edu/repos/gga/chado_organism_get_organisms/organism_get_organisms/2.3.2",
-            history_id=self.history_id,
-            tool_inputs={"genus": "Homo", "species": "sapiens"})
-        get_sapiens_id_job_output = get_sapiens_id_job["outputs"][0]["id"]
-        get_sapiens_id_json_output = self.instance.datasets.download_dataset(dataset_id=get_sapiens_id_job_output)
-        try:
-            logging.debug("Deleting Homo 'sapiens' in the instance's chado database")
-            get_sapiens_id_final_output = json.loads(get_sapiens_id_json_output)[0]
-            sapiens_id = str(
-                get_sapiens_id_final_output["organism_id"])  # needs to be str to be recognized by the chado tool
-            self.instance.tools.run_tool(
-                tool_id="toolshed.g2.bx.psu.edu/repos/gga/chado_organism_delete_organisms/organism_delete_organisms/2.3.2",
-                history_id=self.history_id,
-                tool_inputs={"organism": str(sapiens_id)})
-        except bioblend.ConnectionError:
-            logging.debug("Homo sapiens isn't in the instance's chado database")
-        except IndexError:
-            logging.debug("Homo sapiens isn't in the instance's chado database")
-            pass
-
-        # TODO: the following actions should be done in a separate function (in case if the user wants to do everything him/herself -- for EOSC)
-        # Add organism (species) to chado
-        logging.info("Adding organism to the instance's chado database")
-        self.instance.tools.run_tool(
-            tool_id="toolshed.g2.bx.psu.edu/repos/gga/chado_organism_add_organism/organism_add_organism/2.3.2",
-            history_id=self.history_id,
-            tool_inputs={"abbr": self.abbreviation,
-                         "genus": self.genus,
-                         "species": self.species,
-                         "common": self.common})
-        # Add OGS analysis to chado
-        logging.info("Adding OGS analysis to the instance's chado database")
-        self.instance.tools.run_tool(
-            tool_id="toolshed.g2.bx.psu.edu/repos/gga/chado_analysis_add_analysis/analysis_add_analysis/2.3.2",
-            history_id=self.history_id,
-            tool_inputs={"name": self.genus + " " + self.species + " OGS" + self.ogs_version,
-                         "program": "Performed by Genoscope",
-                         "programversion": str("OGS" + self.ogs_version),
-                         "sourcename": "Genoscope",
-                         "date_executed": self.date})
-
-        # Add genome analysis to chado
-        logging.info("Adding genome analysis to the instance's chado database")
-        self.instance.tools.run_tool(
-            tool_id="toolshed.g2.bx.psu.edu/repos/gga/chado_analysis_add_analysis/analysis_add_analysis/2.3.2",
-            history_id=self.history_id,
-            tool_inputs={"name": self.genus + " " + self.species + " genome v" + self.genome_version,
-                         "program": "Performed by Genoscope",
-                         "programversion": str("genome v" + self.genome_version),
-                         "sourcename": "Genoscope",
-                         "date_executed": self.date})
-        self.get_organism_and_analyses_ids()
-        logging.info("Finished initializing instance")
-
-
-
-
-
-
-
-    def run_workflow(self, workflow_name, workflow_parameters, datamap):
-        """
-        Run the "main" workflow in the galaxy instance
-        - import data to library
-        - load fasta and gff
-        - sync with tripal
-        - add jbrowse + organism
-        - fill in the tripal views
-
-        TODO: map tool name to step id
-        :param workflow_name:
-        :param workflow_parameters:
-        :param datamap:
-        :return:
-        """
-
-        logging.debug("running workflow: " + str(workflow_name))
-        workflow_ga_file = self.main_dir + "Galaxy-Workflow-" + workflow_name + ".ga"
-        if self.strain != "":
-            custom_ga_file = "_".join([self.genus, self.species, self.strain]) + "_workflow.ga"
-            custom_ga_file_path = os.path.abspath(custom_ga_file)
-        else:
-            custom_ga_file = "_".join([self.genus, self.species]) + "_workflow.ga"
-            custom_ga_file_path = os.path.abspath(custom_ga_file)
-        with open(workflow_ga_file, 'r') as ga_in_file:
-            workflow = str(ga_in_file.readlines())
-            # ugly fix for the jbrowse parameters
-            workflow = workflow.replace('{\\\\\\\\\\\\"unique_id\\\\\\\\\\\\": \\\\\\\\\\\\"UNIQUE_ID\\\\\\\\\\\\"}',
-                                        str('{\\\\\\\\\\\\"unique_id\\\\\\\\\\\\": \\\\\\\\\\\\"' + self.genus + " " + self.species) + '\\\\\\\\\\\\"')
-            workflow = workflow.replace('\\\\\\\\\\\\"name\\\\\\\\\\\\": \\\\\\\\\\\\"NAME\\\\\\\\\\\\"',
-                                        str('\\\\\\\\\\\\"name\\\\\\\\\\\\": \\\\\\\\\\\\"' + self.genus.lower()[0] + self.species) + '\\\\\\\\\\\\"')
-            workflow = workflow.replace("\\\\", "\\")  # to restore the correct amount of backslashes in the workflow string before import
-            # test
-            workflow = workflow.replace('http://localhost/sp/genus_species/feature/Genus/species/mRNA/{id}',
-                                        "http://localhost/sp/" + self.genus_lowercase+ "_" + self.species + "/feature/" + self.genus + "/mRNA/{id}")
-            # production
-            # workflow = workflow.replace('http://localhost/sp/genus_species/feature/Genus/species/mRNA/{id}',
-            #                             "http://abims--gga.sb-roscoff.fr/sp/" + self.genus_lowercase + "_" + self.species + "/feature/" + self.genus + "/mRNA/{id}")
-            workflow = workflow[2:-2]  # if the line under doesn't output a correct json
-            # workflow = workflow[:-2]  # if the line above doesn't output a correct json
-
-            workflow_dict = json.loads(workflow)
-
-            self.instance.workflows.import_workflow_dict(workflow_dict=workflow_dict)
-            self.workflow_name = workflow_name
-            workflow_attributes = self.instance.workflows.get_workflows(name=self.workflow_name)
-            workflow_id = workflow_attributes[0]["id"]
-            show_workflow = self.instance.workflows.show_workflow(workflow_id=workflow_id)
-            logging.debug("Workflow ID: " + workflow_id)
-
-            logging.debug("Inputs:")
-            logging.debug(show_workflow["Inputs"])
-            self.instance.workflows.invoke_workflow(workflow_id=workflow_id,
-                                                    history_id=self.history_id,
-                                                    params=workflow_parameters,
-                                                    inputs=datamap,
-                                                    inputs_by="")
-            self.instance.workflows.delete_workflow(workflow_id=workflow_id)
-
-
-
-
-
-
-    def load_data_in_galaxy(self):
-        """
-        Function to load the src_data folder in galaxy
-
-        :return:
-        """
-
-        logging.info("Loading data in galaxy")
-
-        return None
-
-
-
-
-
-    def get_organism_and_analyses_ids(self):
-        """
-        Retrieve current organism ID and OGS and genome chado analyses IDs (needed to run some tools as Tripal/Chado
-        doesn't accept organism/analyses names as valid inputs
-
-        :return:
-        """
-        # Get the ID for the current organism in chado
-        org = self.instance.tools.run_tool(
-            tool_id="toolshed.g2.bx.psu.edu/repos/gga/chado_organism_get_organisms/organism_get_organisms/2.3.2",
-            history_id=self.history_id,
-            tool_inputs={"genus": self.genus, "species": self.species})
-        org_job_out = org["outputs"][0]["id"]
-        org_json_output = self.instance.datasets.download_dataset(dataset_id=org_job_out)
-        try:
-            org_output = json.loads(org_json_output)[0]
-            self.org_id = str(org_output["organism_id"])  # id needs to be a str to be recognized by chado tools
-        except IndexError:
-            logging.debug("no organism matching " + self.full_name + " exists in the instance's chado database")
-
-        # Get the ID for the OGS analysis in chado
-        ogs_analysis = self.instance.tools.run_tool(
-            tool_id="toolshed.g2.bx.psu.edu/repos/gga/chado_analysis_get_analyses/analysis_get_analyses/2.3.2",
-            history_id=self.history_id,
-            tool_inputs={"name": self.genus + " " + self.species + " OGS" + self.ogs_version})
-        ogs_analysis_job_out = ogs_analysis["outputs"][0]["id"]
-        ogs_analysis_json_output = self.instance.datasets.download_dataset(dataset_id=ogs_analysis_job_out)
-        try:
-            ogs_analysis_output = json.loads(ogs_analysis_json_output)[0]
-            self.ogs_analysis_id = str(ogs_analysis_output["analysis_id"])
-        except IndexError:
-            logging.debug("no matching OGS analysis exists in the instance's chado database")
-
-        # Get the ID for the genome analysis in chado
-        genome_analysis = self.instance.tools.run_tool(
-            tool_id="toolshed.g2.bx.psu.edu/repos/gga/chado_analysis_get_analyses/analysis_get_analyses/2.3.2",
-            history_id=self.history_id,
-            tool_inputs={"name": self.genus + " " + self.species + " genome v" + self.genome_version})
-        genome_analysis_job_out = genome_analysis["outputs"][0]["id"]
-        genome_analysis_json_output = self.instance.datasets.download_dataset(dataset_id=genome_analysis_job_out)
-        try:
-            genome_analysis_output = json.loads(genome_analysis_json_output)[0]
-            self.genome_analysis_id = str(genome_analysis_output["analysis_id"])
-        except IndexError:
-            logging.debug("no matching genome analysis exists in the instance's chado database")
-
-
-
-
-    def clean_instance(self):
-        """
-        TODO: method to purge the instance from analyses and organisms
-        :return:
-        """
-        return None
-
-
-
-
-
 def filter_empty_not_empty_items(li):
     ret = {"empty": [], "not_empty": []}
     for i in li:
@@ -793,42 +311,18 @@ def filter_empty_not_empty_items(li):
 
 
 if __name__ == "__main__":
-    parser = argparse.ArgumentParser(description="Automatic data loading in containers and interaction with galaxy instances for GGA"
+    parser = argparse.ArgumentParser(description="Automatic data loading in containers and interaction "
+                                                 "with galaxy instances for GGA"
                                                  ", following the protocol @ "
                                                  "http://gitlab.sb-roscoff.fr/abims/e-infra/gga")
-    # Dev arguments, TODO: remove in production branch!
-    parser.add_argument("--full",
-                        help="Run everything, from src_data dir tree creation, moving data files (abims) into src_data,"
-                             "modify headers (abims), generate blast banks (doesn't commit them: TODO), initialize GGA instance, load the data and run,"
-                             " the main workflow. To update/add data to container, use --update in conjunction to --full (TODO)")
-    parser.add_argument("--init-instance",
-                        help="Initialization of galaxy instance. Run first in an empty instance, DEV",
-                        action="store_true")
-    parser.add_argument("--load-data",
-                        help="Create src_data directory tree, copy datasets to src_data, and load these datasets into the instance, DEV",
-                        action="store_true")
-    parser.add_argument("--run-main",
-                        help="Run main workflow (load data into chado, sync all with tripal, "
-                             "index tripal data, populate materialized view, "
-                             "create a jbrowse for the current genus_species_strain_sex and add organism to jbrowse")
-    parser.add_argument("--generate-docker-compose",
-                        help="Generate docker-compose.yml for current species, DEV")
-    parser.add_argument("--link-source",
-                        help="Find source files in source data dir and copy them to src_data, DEV, OBSOLETE",
-                        action="store_true")
-
-    # Production arguments
-    parser.add_argument("input", type=str, help="Input file (yml)")
+
+    parser.add_argument("input",
+                        type=str,
+                        help="Input file (yml)")
+
     parser.add_argument("-v", "--verbose",
                         help="Increase output verbosity",
                         action="store_false")
-    parser.add_argument("--update",
-                        help="Update an already integrated organisms with new data from input file, docker-compose.yml will not be re-generated"
-                             ", assuming the instances for the organisms are already generated and initialized",
-                        action="store_false")
-    parser.add_argument("--dir",
-                        help="Path of the main directory, either absolute or relative, defaults to current directory",
-                        default=os.getcwd())
 
     args = parser.parse_args()
 
@@ -837,93 +331,23 @@ if __name__ == "__main__":
     else:
         logging.basicConfig(level=logging.INFO)
 
-    logging.info("Start")
+    logging.info("Deploy stacks: start")
     sp_dict_list = parse_input(args.input)
     for sp_dict in sp_dict_list:
-        al = Autoload(parameters_dictionary=sp_dict, args=args)
-        al.main_dir = os.path.abspath(args.dir)
-        if args.load_data:
-            """
-            Full workflow 
-            TODO: change later (docker side / load data side / galaxy side)
-            """
-            # al.generate_dir_tree()
-            # logging.info("Successfully generated the directory tree for " + al.genus[0].upper() + ". " + al.species + " " + al.strain + " " + al.sex)
-            #
-            # # al.get_source_data_files_from_path()
-            # logging.info("Successfully retrieved source data files for " + al.genus[0].upper() + ". " + al.species + " " + al.strain + " " + al.sex)
-            #
-            # al.deploy_stack()
-            # logging.info("Successfully deployed containers stack for " + al.genus[0].upper() + ". " + al.species + " " + al.strain + " " + al.sex)
-            #
-            al.connect_to_instance()
-            logging.info("Connected to instance")
-            #
-            # al.create_species_history()
-            # logging.info("Created a history")
-            #
-            # al.setup_data_libraries()
-            # logging.info("Setting up data libraries")
-
-            # al.init_instance()
-            # logging.info("Successfully initialized instance for " + al.genus[0].upper() + ". " + al.species + " " + al.strain + " " + al.sex)
-
-            # al.setup_data_libraries()
-            # logging.info("Successfully set up data libraries in galaxy for " + al.genus[0].upper() + ". " + al.species + " " + al.strain + " " + al.sex)
-
-
-            # if args.init_instance:
-            #     logging.info(" Initializing the galaxy instance")
-            #     al.init_instance()
-            #     al.get_instance_attributes()
-            #     # metadata[genus_species_strain_sex]["initialized"] = True
-            # if args.load_data:
-            #     logging.info("Loading data into galaxy")
-            #     # al.load_data()
-            #     # metadata[genus_species_strain_sex]["data_loaded_in_instance"] = True
-            # if args.run_main:
-            #     logging.info("Running main workflow")
-            #     al.get_organism_and_analyses_ids()
-            #     workflow_parameters = dict()
-            #     workflow_parameters["0"] = {}
-            #     workflow_parameters["1"] = {}
-            #     workflow_parameters["2"] = {}
-            #     workflow_parameters["3"] = {}
-            #     workflow_parameters["4"] = {"organism": al.org_id,
-            #                                 "analysis_id": al.genome_analysis_id,
-            #                                 "do_update": "true"}
-            #     workflow_parameters["5"] = {"organism": al.org_id,
-            #                                 "analysis_id": al.ogs_analysis_id}
-            #     workflow_parameters["6"] = {"organism_id": al.org_id}
-            #     workflow_parameters["7"] = {"analysis_id": al.ogs_analysis_id}
-            #     workflow_parameters["8"] = {"analysis_id": al.genome_analysis_id}
-            #     workflow_parameters["9"] = {"organism_id": al.org_id}
-            #     workflow_parameters["10"] = {}
-            #     workflow_parameters["11"] = {}
-            #
-            #     al.datamap = dict()
-            #     al.datamap["0"] = {"src": "hda", "id": al.datasets["genome_file"]}
-            #     al.datamap["1"] = {"src": "hda", "id": al.datasets["gff_file"]}
-            #     al.datamap["2"] = {"src": "hda", "id": al.datasets["proteins_file"]}
-            #     al.datamap["3"] = {"src": "hda", "id": al.datasets["transcripts_file"]}
-            #
-            #     al.run_workflow(workflow_name="main", workflow_parameters=workflow_parameters, datamap=al.datamap)
-            #     # metadata[genus_species_strain_sex]["workflows_run"] = metadata[genus_species_strain_sex]["workflows_run"].append("main")
-            #
-            # if args.link_source:
-            #     print('DEV')
-            #     al.generate_dir_tree()
-            #     print(al.main_dir)
-            #     print(al.species_dir)
-
-    logging.info("Exit")
-
-
-
-def main(species_data):
-    """
-    "Main" function
+        o = DeploySpeciesStack(parameters_dictionary=sp_dict)
+        o.main_dir = os.path.abspath(args.dir)
+
+        # dss.make_directory_tree()
+        # logging.info("Successfully generated the directory tree for " + o.genus[0].upper() + ". " + o.species + " " + o.strain + " " + o.sex)
+
+        dss.make_compose_files()
+        logging.info("Successfully generated the directory tree for " + o.genus[0].upper() + ". " + o.species + " " + o.strain + " " + o.sex)
+
+        # dss.get_source_data_files_from_path()
+        # logging.info("Successfully retrieved source data files for " + o.genus[0].upper() + ". " + o.species + " " + o.strain + " " + o.sex)
+
+        # dss.deploy_stack()
+        # logging.info("Successfully deployed containers stack for " + o.genus[0].upper() + ". " + o.species + " " + o.strain + " " + o.sex)
+
+    logging.info("Deploy stacks: done")
 
-    :return:
-    """
-    print("OK")
\ No newline at end of file
diff --git a/docker_compose_generator.py b/docker_compose_generator.py
index d5fe776c921dee8eda64c9c151a7ab478a26be01..81fdcc3495f97fc5f8e27bac53499e2ab91f6f6a 100755
--- a/docker_compose_generator.py
+++ b/docker_compose_generator.py
@@ -6,23 +6,9 @@ import logging
 # import json
 
 """
-docker-compose.yml generator
-The method "generate" works for both docker-compose architecture (old), or docker stack (new)
-This method will write a formatted docker-compose.yml for the specified organism (only requires genus and species)
-
-Made to work in the integration streamlined script "deploy_stacks.py" but can be used as a standalone (either with a CLI
-or in another python file as a module)
-
-Dockerfiles are specific to genus-species: a same organism can have several strains and sexes integrated, but only one 
-set of applications are used (see metadata files for details about what strains/sexes have been integrated for
-an organism)
+docker-compose_generator.py
 
-TODO: write the whole yml dict from scratch (would allow the script to be more reusable into the future and make it
-more customizable while being clearer (instead of the default yml string or input docker-compose template)
-
-TODO: read json
-
-API master key or galaxy: MASTER_API_KEY: XXXXXXX (alphanum, user prompt or git env variable)
+This method will write a formatted docker-compose.yml for the specified organism (only requires genus and species)
 """
 
 
diff --git a/examples/example_input.json b/examples/json_example_input.json
similarity index 100%
rename from examples/example_input.json
rename to examples/json_example_input.json
diff --git a/examples/yml_example_input.yml b/examples/yml_example_input.yml
index af0fe1213e1ac36cd0011d8bf74cbe6097f74c20..10395abcdb72178d522afc026caf829170ce7336 100644
--- a/examples/yml_example_input.yml
+++ b/examples/yml_example_input.yml
@@ -3,13 +3,13 @@
 # list of species for which the script will have to create these stacks/load data into galaxy/run workflows
 # Add new config option using a config scalar
 
-
-config:  # Simple config part, allowing the user to create his/her own admin account (default is gga)
-  # WARNING: not supported currently, as the default connection is using the gga account
-  admin:
+config:
+  admins: # Add admin account WARNING: not supported currently, as the default connection through a web browser is using the gga account
     username: "nflantier"  # Desired admin username
     password: "blanquette"  # Desired admin password
-    email: "noel.flantier@galaxy.org"  # Desired admin email
+    email: "noel.flantier@mail.com"  # Desired admin email
+  master_api_key: "master"  # Master API key is useless at the moment
+  url_prefix: "http://localhost/  # URL prefix to forward
 
 ectocarpus_sp1:  # Dummy value the user gives to designate the species (isn't used by the script)
   # Species description, leave blank if unknown or you don't want it to be used
diff --git a/setup_data_libraries.py b/galaxy_data_libs_SI.py
similarity index 100%
rename from setup_data_libraries.py
rename to galaxy_data_libs_SI.py
diff --git a/load_data.py b/load_data.py
index 4f3fbbdad58fe245a2277b8c8a5e2c94d9e03972..dd5cb3da1417d773c75e40318891cfe810f89b94 100644
--- a/load_data.py
+++ b/load_data.py
@@ -5,25 +5,262 @@
 import bioblend
 import bioblend.galaxy.objects
 from bioblend import galaxy
+import argparse
+import os
+import subprocess
 import logging
 import sys
-import deploy_stacks
-import create_input_instance
+import yaml
+import re
+from datetime import datetime
 
 
-"""
+""" 
 load_data.py
 
-Find source data files using the information provided in the input file.
-Copy these source data files over into the src_data directory
+Usage: $ python3 deploy_stacks.py -i example.yml [OPTIONS]
+"""
+
 
-Load the data into Galaxy using the script provided by Anthony Bretaudeau (setup_data_libraries)
-Also create/update the species history (TODO: Updating history)
+def parse_input(input_file):
+    """
+    Parse the yml input file to extract data to create the SpeciesData objects
+    Return a list of dictionaries. Each dictionary contains data tied to a species
 
+    :param input_file:
+    :return:
+    """
 
-"""
+    parsed_sp_dict_list = []
+
+    if str(input_file).endswith("yml") or str(input_file).endswith("yaml"):
+        logging.debug("Input format used: YAML")
+    else:
+        logging.critical("Error, please input a YAML file")
+        sys.exit()
+    with open(input_file, 'r') as stream:
+        try:
+            yaml_dict = yaml.safe_load(stream)
+            for k, v in yaml_dict.items():
+                if k == "config":
+                    pass
+                parsed_sp_dict_list.append(v)
+        except yaml.YAMLError as exit_code:
+            logging.critical(exit_code + " (YAML input file might be incorrect)")
+            sys.exit()
+    return parsed_sp_dict_list
+
+
+class LoadData:
+    """
+    Load data from the src_data subfolders into the galaxy instance's history of a given species
+
+    """
+
+    def __init__(self, parameters_dictionary):
+        self.parameters_dictionary = parameters_dictionary
+        self.species = parameters_dictionary["description"]["species"]
+        self.genus = parameters_dictionary["description"]["genus"]
+        self.strain = parameters_dictionary["description"]["strain"]
+        self.sex = parameters_dictionary["description"]["sex"]
+        self.common = parameters_dictionary["description"]["common_name"]
+        self.date = datetime.today().strftime("%Y-%m-%d")
+        self.origin = parameters_dictionary["description"]["origin"]
+        self.performed = parameters_dictionary["data"]["performed_by"]
+        if parameters_dictionary["data"]["genome_version"] == "":
+            self.genome_version = "1.0"
+        else:
+            self.genome_version = parameters_dictionary["data"]["genome_version"]
+        if parameters_dictionary["data"]["ogs_version"] == "":
+            self.ogs_version = "1.0"
+        else:
+            self.ogs_version = parameters_dictionary["data"]["ogs_version"]
+        self.genus_lowercase = self.genus[0].lower() + self.genus[1:]
+        self.genus_uppercase = self.genus[0].upper() + self.genus[1:]
+        self.species_folder_name = "_".join([self.genus_lowercase, self.species, self.strain, self.sex])
+        self.full_name = " ".join([self.genus_uppercase, self.species, self.strain, self.sex])
+        self.abbreviation = " ".join([self.genus_lowercase[0], self.species, self.strain, self.sex])
+        self.genus_species = self.genus_lowercase + "_" + self.species
+        self.instance_url = "http://scratchgmodv1:8888/sp/" + self.genus_lowercase + "_" + self.species + "/galaxy/"
+        # Testing with localhost/scratchgmodv1
+        self.instance = None
+        self.history_id = None
+        self.library_id = None
+        self.script_dir = os.path.dirname(os.path.realpath(sys.argv[0]))
+        self.main_dir = None
+        self.species_dir = None
+        self.org_id = None
+        self.genome_analysis_id = None
+        self.ogs_analysis_id = None
+        self.tool_panel = None
+        self.datasets = dict()
+        self.source_files = dict()
+        self.workflow_name = None
+        self.metadata = dict()
+        self.api_key = "master"  # TODO: set the key in config file --> saved for later (master api key access actions are limited)
+        if parameters_dictionary["data"]["parent_directory"] == "" or parameters_dictionary["data"]["parent_directory"] == "/path/to/closest/parent/dir":
+            self.source_data_dir = "/projet/sbr/phaeoexplorer/"  # Testing path for phaeoexplorer data
+        else:
+            self.source_data_dir = parameters_dictionary["data"]["parent_directory"]
+        # Directory/subdirectories where data files are located (fasta, gff, ...)
+        self.do_update = False
+        # Update the instance (in histories corresponding to the input) instead of creating a new one
+        self.api_key = "master"
+        # API key used to communicate with the galaxy instance. Cannot be used to do user-tied actions
+        self.species_name_regex_litteral = "(?=\w*V)(?=\w*A)(?=\w*R)(?=\w*I)(?=\w*A)(?=\w*B)(?=\w*L)(?=\w*E)\w+"  # Placeholder re
+
+
+    def modify_fasta_headers(self):
+        """
+        Change the fasta headers before integration.
+
+        :return:
+        """
+
+        try:
+            os.chdir(self.species_dir)
+            working_dir = os.getcwd()
+        except OSError:
+            logging.info("Cannot access " + self.species_dir + ", run with higher privileges")
+            logging.info("Fatal error: exit")
+            sys.exit()
+        self.source_files = dict()
+        annotation_dir, genome_dir = None, None
+        for d in [i[0] for i in os.walk(os.getcwd() + "/src_data")]:
+            if "annotation/" in d:
+                annotation_dir = d
+                for f in os.listdir(d):
+                    if f.endswith("proteins.fasta"):
+                        self.source_files["proteins_file"] = os.path.join(d, f)
+                    elif f.endswith("transcripts-gff.fa"):
+                        self.source_files["transcripts_file"] = os.path.join(d, f)
+                    elif f.endswith(".gff"):
+                        self.source_files["gff_file"] = os.path.join(d, f)
+            elif "genome/" in d:
+                genome_dir = d
+                for f in os.listdir(d):
+                    if f.endswith(".fa"):
+                        self.source_files["genome_file"] = os.path.join(d, f)
+                logging.debug("source files found:")
+        for k, v in self.source_files.items():
+            logging.debug("\t" + k + "\t" + v)
+
+        # Changing headers in the *proteins.fasta file from >mRNA* to >protein*
+        # production version
+        modify_pep_headers = [str(self.main_dir) + "/gga_load_data/utils/phaeoexplorer-change_pep_fasta_header.sh",
+                              self.source_files["proteins_file"]]
+        # test version
+        # modify_pep_headers = ["/home/alebars/gga/phaeoexplorer-change_pep_fasta_header.sh",
+                              # self.source_files["proteins_file"]]
+        logging.info("Changing fasta headers: " + self.source_files["proteins_file"])
+        subprocess.run(modify_pep_headers, stdout=subprocess.PIPE, cwd=annotation_dir)
+        # production version
+        modify_pep_headers = [str(self.main_dir) + "/gga_load_data/utils/phaeoexplorer-change_transcript_fasta_header.sh",
+                              self.source_files["proteins_file"]]
+        # test version
+        # modify_pep_headers = ["/home/alebars/gga/phaeoexplorer-change_transcript_fasta_header.sh",
+        #                       self.source_files["proteins_file"]]
+        logging.info("Changing fasta headers: " + self.source_files["transcripts_file"])
+        subprocess.run(modify_pep_headers, stdout=subprocess.PIPE, cwd=annotation_dir)
+
+        # src_data cleaning
+        if os.path.exists(annotation_dir + "outfile"):
+            subprocess.run(["mv", annotation_dir + "/outfile", self.source_files["proteins_file"]],
+                           stdout=subprocess.PIPE,
+                           cwd=annotation_dir)
+        if os.path.exists(annotation_dir + "gmon.out"):
+            subprocess.run(["rm", annotation_dir + "/gmon.out"],
+                           stdout=subprocess.PIPE,
+                           cwd=annotation_dir)
+
+
+    def setup_data_libraries(self):
+        """
+        - generate blast banks and docker-compose
+        - load data into the galaxy container with the galaxy_data_libs_SI.py script
+
+        :return:
+        """
+
+        try:
+            logging.info("Loading data into the galaxy container")
+            subprocess.run("../serexec genus_species_galaxy /tool_deps/_conda/bin/python /opt/galaxy_data_libs_SI.py", shell=True)
+        except subprocess.CalledProcessError:
+            logging.info("Cannot load data into the galaxy container for " + self.full_name)
+            pass
+        else:
+            logging.info("Data successfully loaded into the galaxy container for " + self.full_name)
+
+        self.get_species_history_id()
+        # self.get_instance_attributes()
+        #
+        # # import all datasets into current history
+        # self.instance.histories.upload_dataset_from_library(history_id=self.history_id, lib_dataset_id=self.datasets["genome_file"])
+        # self.instance.histories.upload_dataset_from_library(history_id=self.history_id, lib_dataset_id=self.datasets["gff_file"])
+        # self.instance.histories.upload_dataset_from_library(history_id=self.history_id, lib_dataset_id=self.datasets["transcripts_file"])
+        # self.instance.histories.upload_dataset_from_library(history_id=self.history_id, lib_dataset_id=self.datasets["proteins_file"])
+
+
+
+    def generate_blast_banks(self):
+        """
+        Automatically generate blast banks for a species and commit
+
+        :return:
+        """
+
+
+    def connect_to_instance(self):
+        """
+        Test the connection to the galaxy instance for the current organism
+        Exit if it cannot connect to the instance
+        """
+        self.instance = galaxy.GalaxyInstance(url=self.instance_url, email="gga@sb-roscoff.fr", password="password",
+                                              verify=False)
+        logging.info("Connecting to the galaxy instance ...")
+        try:
+            self.instance.histories.get_histories()
+            self.tool_panel = self.instance.tools.get_tool_panel()
+        except bioblend.ConnectionError:
+            logging.critical("Cannot connect to galaxy instance @ " + self.instance_url)
+            sys.exit()
+        else:
+            logging.info("Successfully connected to galaxy instance @ " + self.instance_url)
+        self.instance.histories.create_history(name="FOO")
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(description="Automatic data loading in containers and interaction "
+                                                 "with galaxy instances for GGA"
+                                                 ", following the protocol @ "
+                                                 "http://gitlab.sb-roscoff.fr/abims/e-infra/gga")
+
+    parser.add_argument("input",
+                        type=str,
+                        help="Input file (yml)")
+
+    parser.add_argument("-v", "--verbose",
+                        help="Increase output verbosity",
+                        action="store_false")
+
+    args = parser.parse_args()
+
+    if args.verbose:
+        logging.basicConfig(level=logging.DEBUG)
+    else:
+        logging.basicConfig(level=logging.INFO)
 
+    logging.info("Load data: start")
+    sp_dict_list = parse_input(args.input)
 
+    for sp_dict in sp_dict_list:
+        o = LoadData(parameters_dictionary=sp_dict)
+        o.main_dir = os.path.abspath(args.dir)
 
+        o.modify_fasta_headers()
+        logging.info("Successfully formatted files headers " + o.genus[0].upper() + ". " + o.species + " " + o.strain + " " + o.sex)
 
+        # o.setup_data_libraries()
+        # logging.info("Successfully set up data libraries in galaxy for " + o.genus[0].upper() + ". " + o.species + " " + o.strain + " " + o.sex)
 
+    logging.info("Load data: done")
diff --git a/run_workflow.py b/run_workflow.py
index 836e3e88982589c26a669cd7248b9997bfa67f9a..00e0c8222e858faaae658a18db7cd3b18fd3c74b 100644
--- a/run_workflow.py
+++ b/run_workflow.py
@@ -1,2 +1,464 @@
 #!/usr/bin/python
 # -*- coding: utf-8 -*-
+
+
+import bioblend
+import bioblend.galaxy.objects
+from bioblend import galaxy
+import argparse
+import os
+import subprocess
+import logging
+import sys
+import yaml
+import re
+from gga_autoload.gga_load_data import metadata_generator
+
+""" 
+deploy_stacks.py
+
+Usage: $ python3 deploy_stacks.py -i example.yml [OPTIONS]
+"""
+
+
+def parse_input(input_file):
+    """
+    Parse the yml input file to extract data to create the SpeciesData objects
+    Return a list of dictionaries. Each dictionary contains data tied to a species
+
+    :param input_file:
+    :return:
+    """
+
+    parsed_sp_dict_list = []
+
+    if str(input_file).endswith("yml") or str(input_file).endswith("yaml"):
+        logging.debug("Input format used: YAML")
+    else:
+        logging.critical("Error, please input a YAML file")
+        sys.exit()
+    with open(input_file, 'r') as stream:
+        try:
+            yaml_dict = yaml.safe_load(stream)
+            for k, v in yaml_dict.items():
+                if k == "config":
+                    pass
+                parsed_sp_dict_list.append(v)
+        except yaml.YAMLError as exit_code:
+            logging.critical(exit_code + " (YAML input file might be incorrect)")
+            sys.exit()
+    return parsed_sp_dict_list
+
+
+class RunWorkflow:
+    """
+    Run a workflow into the galaxy instance's history of a given species
+
+    """
+
+    def __init__(self, parameters_dictionary):
+        self.parameters_dictionary = parameters_dictionary
+        self.species = parameters_dictionary["description"]["species"]
+        self.genus = parameters_dictionary["description"]["genus"]
+        self.strain = parameters_dictionary["description"]["strain"]
+        self.sex = parameters_dictionary["description"]["sex"]
+        self.common = parameters_dictionary["description"]["common_name"]
+        self.date = datetime.today().strftime("%Y-%m-%d")
+        self.origin = parameters_dictionary["description"]["origin"]
+        self.performed = parameters_dictionary["data"]["performed_by"]
+        if parameters_dictionary["data"]["genome_version"] == "":
+            self.genome_version = "1.0"
+        else:
+            self.genome_version = parameters_dictionary["data"]["genome_version"]
+        if parameters_dictionary["data"]["ogs_version"] == "":
+            self.ogs_version = "1.0"
+        else:
+            self.ogs_version = parameters_dictionary["data"]["ogs_version"]
+        self.genus_lowercase = self.genus[0].lower() + self.genus[1:]
+        self.genus_uppercase = self.genus[0].upper() + self.genus[1:]
+        self.species_folder_name = "_".join([self.genus_lowercase, self.species, self.strain, self.sex])
+        self.full_name = " ".join([self.genus_uppercase, self.species, self.strain, self.sex])
+        self.abbreviation = " ".join([self.genus_lowercase[0], self.species, self.strain, self.sex])
+        self.genus_species = self.genus_lowercase + "_" + self.species
+        self.instance_url = "http://scratchgmodv1:8888/sp/" + self.genus_lowercase + "_" + self.species + "/galaxy/"
+        # Testing with localhost/scratchgmodv1
+        self.instance = None
+        self.history_id = None
+        self.library_id = None
+        self.script_dir = os.path.dirname(os.path.realpath(sys.argv[0]))
+        self.main_dir = None
+        self.species_dir = None
+        self.org_id = None
+        self.genome_analysis_id = None
+        self.ogs_analysis_id = None
+        self.tool_panel = None
+        self.datasets = dict()
+        self.source_files = dict()
+        self.workflow_name = None
+        self.metadata = dict()
+        self.api_key = "master"
+        if parameters_dictionary["data"]["parent_directory"] == "" or parameters_dictionary["data"]["parent_directory"] == "/path/to/closest/parent/dir":
+            self.source_data_dir = "/projet/sbr/phaeoexplorer/"  # Testing path for phaeoexplorer data
+        else:
+            self.source_data_dir = parameters_dictionary["data"]["parent_directory"]
+        # Directory/subdirectories where data files are located (fasta, gff, ...)
+        self.do_update = False
+        # Update the instance (in histories corresponding to the input) instead of creating a new one
+        self.api_key = "master"
+        # API key used to communicate with the galaxy instance. Cannot be used to do user-tied actions
+        self.species_name_regex_litteral = "(?=\w*V)(?=\w*A)(?=\w*R)(?=\w*I)(?=\w*A)(?=\w*B)(?=\w*L)(?=\w*E)\w+"  # Placeholder re
+
+
+    def get_species_history_id(self):
+        """
+        Set and return the current species history id in its galaxy instance
+
+        :return:
+        """
+        histories = self.instance.histories.get_histories(name=str(self.full_name))
+        self.history_id = histories[0]["id"]
+        self.instance.histories.show_history(history_id=self.history_id)
+
+        return self.history_id
+
+
+    def create_species_history(self):
+        histories = self.instance.histories.get_histories(name=str(self.full_name))
+        print("\n" + str(histories) + "\n" + self.full_name + "\n")
+        if not histories:
+            self.instance.histories.create_history(name="FOO")
+            print("Created history!")
+
+
+    def get_instance_attributes(self):
+        """
+        retrieves instance attributes:
+        - working history ID
+        - libraries ID (there should only be one library!)
+        - datasets IDs
+
+        :return:
+        """
+        histories = self.instance.histories.get_histories(name=str(self.full_name))
+        self.history_id = histories[0]["id"]
+        logging.debug("history ID: " + self.history_id)
+        libraries = self.instance.libraries.get_libraries()  # normally only one library
+        self.library_id = self.instance.libraries.get_libraries()[0]["id"]  # project data folder/library
+        logging.debug("library ID: " + self.history_id)
+        instance_source_data_folders = self.instance.libraries.get_folders(library_id=self.library_id)
+
+        folders_ids = {}
+        current_folder_name = ""
+        for i in instance_source_data_folders:
+            for k, v in i.items():
+                if k == "name":
+                    folders_ids[v] = 0
+                    current_folder_name = v
+                if k == "id":
+                    folders_ids[current_folder_name] = v
+        logging.info("Folders and datasets IDs: ")
+        self.datasets = dict()
+        for k, v in folders_ids.items():
+            logging.info("\t" + k + ": " + v)
+            if k == "/genome":
+                sub_folder_content = self.instance.folders.show_folder(folder_id=v, contents=True)
+                for k2, v2 in sub_folder_content.items():
+                    for e in v2:
+                        if type(e) == dict:
+                            if e["name"].endswith(".fa"):
+                                self.datasets["genome_file"] = e["ldda_id"]
+                                logging.info("\t\t" + e["name"] + ": " + e["ldda_id"])
+            elif k == "/annotation/" + self.genus_species:
+                sub_folder_content = self.instance.folders.show_folder(folder_id=v, contents=True)
+                for k2, v2 in sub_folder_content.items():
+                    for e in v2:
+                        if type(e) == dict:
+                            # TODO: manage several files of the same type and manage versions
+                            if e["name"].endswith("transcripts-gff.fa"):
+                                self.datasets["transcripts_file"] = e["ldda_id"]
+                                logging.info("\t\t" + e["name"] + ": " + e["ldda_id"])
+                            elif e["name"].endswith("proteins.fasta"):
+                                self.datasets["proteins_file"] = e["ldda_id"]
+                                logging.info("\t\t" + e["name"] + ": " + e["ldda_id"])
+                            elif e["name"].endswith(".gff"):
+                                self.datasets["gff_file"] = e["ldda_id"]
+                                logging.info("\t\t" + e["name"] + ": " + e["ldda_id"])
+                            elif e["name"].endswith("MALE"):
+                                self.datasets["gff_file"] = e["ldda_id"]
+                                logging.info("\t\t" + e["name"] + ": " + e["ldda_id"])
+
+
+    def init_instance(self):
+        """
+        Galaxy instance startup in preparation for running workflows
+        - remove Homo sapiens from the chado database.
+        - add organism and analyses into the chado database --> separate
+        - get any other existing organisms IDs before updating the galaxy instance --> separate
+
+        TODO: move the library and analysis/data stuff to a separate function
+        :return:
+        """
+
+        self.connect_to_instance()
+        self.get_species_history_id()
+        histories = self.instance.histories.get_histories(name=str(self.full_name))
+        # Create the first history
+        if not histories:
+            self.instance.histories.create_history(name=str(self.full_name))
+        self.history_id = histories[0]["id"]
+        logging.debug("history ID: " + self.history_id)
+        # libraries = self.instance.libraries.get_libraries()  # routine check: one library
+        # self.library_id = self.instance.libraries.get_libraries()[0]["id"]  # project data folder/library
+        logging.debug("library ID: " + self.history_id)
+        instance_source_data_folders = self.instance.libraries.get_folders(library_id=self.library_id)
+
+        # Delete Homo sapiens from Chado database
+        logging.debug("Getting 'Homo sapiens' ID in instance's chado database")
+        get_sapiens_id_job = self.instance.tools.run_tool(
+            tool_id="toolshed.g2.bx.psu.edu/repos/gga/chado_organism_get_organisms/organism_get_organisms/2.3.2",
+            history_id=self.history_id,
+            tool_inputs={"genus": "Homo", "species": "sapiens"})
+        get_sapiens_id_job_output = get_sapiens_id_job["outputs"][0]["id"]
+        get_sapiens_id_json_output = self.instance.datasets.download_dataset(dataset_id=get_sapiens_id_job_output)
+        try:
+            logging.debug("Deleting Homo 'sapiens' in the instance's chado database")
+            get_sapiens_id_final_output = json.loads(get_sapiens_id_json_output)[0]
+            sapiens_id = str(
+                get_sapiens_id_final_output["organism_id"])  # needs to be str to be recognized by the chado tool
+            self.instance.tools.run_tool(
+                tool_id="toolshed.g2.bx.psu.edu/repos/gga/chado_organism_delete_organisms/organism_delete_organisms/2.3.2",
+                history_id=self.history_id,
+                tool_inputs={"organism": str(sapiens_id)})
+        except bioblend.ConnectionError:
+            logging.debug("Homo sapiens isn't in the instance's chado database")
+        except IndexError:
+            logging.debug("Homo sapiens isn't in the instance's chado database")
+            pass
+
+        # TODO: the following actions should be done in a separate function (in case if the user wants to do everything him/herself -- for EOSC)
+        # Add organism (species) to chado
+        logging.info("Adding organism to the instance's chado database")
+        self.instance.tools.run_tool(
+            tool_id="toolshed.g2.bx.psu.edu/repos/gga/chado_organism_add_organism/organism_add_organism/2.3.2",
+            history_id=self.history_id,
+            tool_inputs={"abbr": self.abbreviation,
+                         "genus": self.genus,
+                         "species": self.species,
+                         "common": self.common})
+        # Add OGS analysis to chado
+        logging.info("Adding OGS analysis to the instance's chado database")
+        self.instance.tools.run_tool(
+            tool_id="toolshed.g2.bx.psu.edu/repos/gga/chado_analysis_add_analysis/analysis_add_analysis/2.3.2",
+            history_id=self.history_id,
+            tool_inputs={"name": self.genus + " " + self.species + " OGS" + self.ogs_version,
+                         "program": "Performed by Genoscope",
+                         "programversion": str("OGS" + self.ogs_version),
+                         "sourcename": "Genoscope",
+                         "date_executed": self.date})
+
+        # Add genome analysis to chado
+        logging.info("Adding genome analysis to the instance's chado database")
+        self.instance.tools.run_tool(
+            tool_id="toolshed.g2.bx.psu.edu/repos/gga/chado_analysis_add_analysis/analysis_add_analysis/2.3.2",
+            history_id=self.history_id,
+            tool_inputs={"name": self.genus + " " + self.species + " genome v" + self.genome_version,
+                         "program": "Performed by Genoscope",
+                         "programversion": str("genome v" + self.genome_version),
+                         "sourcename": "Genoscope",
+                         "date_executed": self.date})
+        self.get_organism_and_analyses_ids()
+        logging.info("Finished initializing instance")
+
+
+    def run_workflow(self, workflow_name, workflow_parameters, datamap):
+        """
+        Run the "main" workflow in the galaxy instance
+        - import data to library
+        - load fasta and gff
+        - sync with tripal
+        - add jbrowse + organism
+        - fill in the tripal views
+
+        TODO: map tool name to step id
+        :param workflow_name:
+        :param workflow_parameters:
+        :param datamap:
+        :return:
+        """
+
+        logging.debug("running workflow: " + str(workflow_name))
+        workflow_ga_file = self.main_dir + "Galaxy-Workflow-" + workflow_name + ".ga"
+        if self.strain != "":
+            custom_ga_file = "_".join([self.genus, self.species, self.strain]) + "_workflow.ga"
+            custom_ga_file_path = os.path.abspath(custom_ga_file)
+        else:
+            custom_ga_file = "_".join([self.genus, self.species]) + "_workflow.ga"
+            custom_ga_file_path = os.path.abspath(custom_ga_file)
+        with open(workflow_ga_file, 'r') as ga_in_file:
+            workflow = str(ga_in_file.readlines())
+            # ugly fix for the jbrowse parameters
+            workflow = workflow.replace('{\\\\\\\\\\\\"unique_id\\\\\\\\\\\\": \\\\\\\\\\\\"UNIQUE_ID\\\\\\\\\\\\"}',
+                                        str('{\\\\\\\\\\\\"unique_id\\\\\\\\\\\\": \\\\\\\\\\\\"' + self.genus + " " + self.species) + '\\\\\\\\\\\\"')
+            workflow = workflow.replace('\\\\\\\\\\\\"name\\\\\\\\\\\\": \\\\\\\\\\\\"NAME\\\\\\\\\\\\"',
+                                        str('\\\\\\\\\\\\"name\\\\\\\\\\\\": \\\\\\\\\\\\"' + self.genus.lower()[0] + self.species) + '\\\\\\\\\\\\"')
+            workflow = workflow.replace("\\\\", "\\")  # to restore the correct amount of backslashes in the workflow string before import
+            # test
+            workflow = workflow.replace('http://localhost/sp/genus_species/feature/Genus/species/mRNA/{id}',
+                                        "http://localhost/sp/" + self.genus_lowercase+ "_" + self.species + "/feature/" + self.genus + "/mRNA/{id}")
+            # production
+            # workflow = workflow.replace('http://localhost/sp/genus_species/feature/Genus/species/mRNA/{id}',
+            #                             "http://abims--gga.sb-roscoff.fr/sp/" + self.genus_lowercase + "_" + self.species + "/feature/" + self.genus + "/mRNA/{id}")
+            workflow = workflow[2:-2]  # if the line under doesn't output a correct json
+            # workflow = workflow[:-2]  # if the line above doesn't output a correct json
+
+            workflow_dict = json.loads(workflow)
+
+            self.instance.workflows.import_workflow_dict(workflow_dict=workflow_dict)
+            self.workflow_name = workflow_name
+            workflow_attributes = self.instance.workflows.get_workflows(name=self.workflow_name)
+            workflow_id = workflow_attributes[0]["id"]
+            show_workflow = self.instance.workflows.show_workflow(workflow_id=workflow_id)
+            logging.debug("Workflow ID: " + workflow_id)
+
+            logging.debug("Inputs:")
+            logging.debug(show_workflow["Inputs"])
+            self.instance.workflows.invoke_workflow(workflow_id=workflow_id,
+                                                    history_id=self.history_id,
+                                                    params=workflow_parameters,
+                                                    inputs=datamap,
+                                                    inputs_by="")
+            self.instance.workflows.delete_workflow(workflow_id=workflow_id)
+
+
+    def get_organism_and_analyses_ids(self):
+        """
+        Retrieve current organism ID and OGS and genome chado analyses IDs (needed to run some tools as Tripal/Chado
+        doesn't accept organism/analyses names as valid inputs
+
+        :return:
+        """
+        # Get the ID for the current organism in chado
+        org = self.instance.tools.run_tool(
+            tool_id="toolshed.g2.bx.psu.edu/repos/gga/chado_organism_get_organisms/organism_get_organisms/2.3.2",
+            history_id=self.history_id,
+            tool_inputs={"genus": self.genus, "species": self.species})
+        org_job_out = org["outputs"][0]["id"]
+        org_json_output = self.instance.datasets.download_dataset(dataset_id=org_job_out)
+        try:
+            org_output = json.loads(org_json_output)[0]
+            self.org_id = str(org_output["organism_id"])  # id needs to be a str to be recognized by chado tools
+        except IndexError:
+            logging.debug("no organism matching " + self.full_name + " exists in the instance's chado database")
+
+        # Get the ID for the OGS analysis in chado
+        ogs_analysis = self.instance.tools.run_tool(
+            tool_id="toolshed.g2.bx.psu.edu/repos/gga/chado_analysis_get_analyses/analysis_get_analyses/2.3.2",
+            history_id=self.history_id,
+            tool_inputs={"name": self.genus + " " + self.species + " OGS" + self.ogs_version})
+        ogs_analysis_job_out = ogs_analysis["outputs"][0]["id"]
+        ogs_analysis_json_output = self.instance.datasets.download_dataset(dataset_id=ogs_analysis_job_out)
+        try:
+            ogs_analysis_output = json.loads(ogs_analysis_json_output)[0]
+            self.ogs_analysis_id = str(ogs_analysis_output["analysis_id"])
+        except IndexError:
+            logging.debug("no matching OGS analysis exists in the instance's chado database")
+
+        # Get the ID for the genome analysis in chado
+        genome_analysis = self.instance.tools.run_tool(
+            tool_id="toolshed.g2.bx.psu.edu/repos/gga/chado_analysis_get_analyses/analysis_get_analyses/2.3.2",
+            history_id=self.history_id,
+            tool_inputs={"name": self.genus + " " + self.species + " genome v" + self.genome_version})
+        genome_analysis_job_out = genome_analysis["outputs"][0]["id"]
+        genome_analysis_json_output = self.instance.datasets.download_dataset(dataset_id=genome_analysis_job_out)
+        try:
+            genome_analysis_output = json.loads(genome_analysis_json_output)[0]
+            self.genome_analysis_id = str(genome_analysis_output["analysis_id"])
+        except IndexError:
+            logging.debug("no matching genome analysis exists in the instance's chado database")
+
+
+    def connect_to_instance(self):
+        """
+        TODO: move in init/access
+        TODO: password
+        Test the connection to the galaxy instance for the current organism
+        Exit if it cannot connect to the instance
+        """
+        self.instance = galaxy.GalaxyInstance(url=self.instance_url, email="gga@sb-roscoff.fr", password="password",
+                                              verify=False)
+        logging.info("Connecting to the galaxy instance ...")
+        try:
+            self.instance.histories.get_histories()
+            self.tool_panel = self.instance.tools.get_tool_panel()
+        except bioblend.ConnectionError:
+            logging.critical("Cannot connect to galaxy instance @ " + self.instance_url)
+            sys.exit()
+        else:
+            logging.info("Successfully connected to galaxy instance @ " + self.instance_url)
+        self.instance.histories.create_history(name="FOO")
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(description="Automatic data loading in containers and interaction "
+                                                 "with galaxy instances for GGA"
+                                                 ", following the protocol @ "
+                                                 "http://gitlab.sb-roscoff.fr/abims/e-infra/gga")
+
+    parser.add_argument("input",
+                        type=str,
+                        help="Input file (yml)")
+
+    parser.add_argument("-v", "--verbose",
+                        help="Increase output verbosity",
+                        action="store_false")
+
+    args = parser.parse_args()
+
+    if args.verbose:
+        logging.basicConfig(level=logging.DEBUG)
+    else:
+        logging.basicConfig(level=logging.INFO)
+
+    logging.info("Start")
+    sp_dict_list = parse_input(args.input)
+
+    for sp_dict in sp_dict_list:
+        o = RunWorkflow(parameters_dictionary=sp_dict)
+        o.main_dir = os.path.abspath(args.dir)
+        if args.init_instance:
+            logging.info(" Initializing the galaxy instance")
+            o.init_instance()
+            o.get_instance_attributes()
+            # metadata[genus_species_strain_sex]["initialized"] = True
+        if args.load_data:
+            logging.info("Loading data into galaxy")
+            # o.load_data()
+            # metadata[genus_species_strain_sex]["data_loaded_in_instance"] = True
+        if args.run_main:
+            logging.info("Running main workflow")
+            o.get_organism_and_analyses_ids()
+            workflow_parameters = dict()
+            workflow_parameters["0"] = {}
+            workflow_parameters["1"] = {}
+            workflow_parameters["2"] = {}
+            workflow_parameters["3"] = {}
+            workflow_parameters["4"] = {"organism": al.org_id,
+                                        "analysis_id": al.genome_analysis_id,
+                                        "do_update": "true"}
+            workflow_parameters["5"] = {"organism": al.org_id,
+                                        "analysis_id": al.ogs_analysis_id}
+            workflow_parameters["6"] = {"organism_id": al.org_id}
+            workflow_parameters["7"] = {"analysis_id": al.ogs_analysis_id}
+            workflow_parameters["8"] = {"analysis_id": al.genome_analysis_id}
+            workflow_parameters["9"] = {"organism_id": al.org_id}
+            workflow_parameters["10"] = {}
+            workflow_parameters["11"] = {}
+
+            o.datamap = dict()
+            o.datamap["0"] = {"src": "hda", "id": al.datasets["genome_file"]}
+            o.datamap["1"] = {"src": "hda", "id": al.datasets["gff_file"]}
+            o.datamap["2"] = {"src": "hda", "id": al.datasets["proteins_file"]}
+            o.datamap["3"] = {"src": "hda", "id": al.datasets["transcripts_file"]}
+
+            o.run_workflow(workflow_name="main", workflow_parameters=workflow_parameters, datamap=al.datamap)
+            # metadata[genus_species_strain_sex]["workflows_run"] = metadata[genus_species_strain_sex]["workflows_run"].append("main")
diff --git a/table_parser.py b/table_parser.py
index 9e55ecd3eb51c9dd0d755d2a4532d798c576df52..9314b91cedb9e7f6809d5524822d282aad2d2532 100755
--- a/table_parser.py
+++ b/table_parser.py
@@ -1,80 +1,79 @@
-import os
-import sys
-import pandas  # xlrd required for excel files reading
-import numpy
-import json
-import argparse
-import logging
-from datetime import datetime
-
-"""
-OBSOLETE
-
-Input parser script. 
-Does not work for ods spreadsheets (save as xls or xlsx instead) --> need to handle with pandas_ods_reader (requires ezodf, lxml)
-Does not support multiple sheets (TODO: "integration" and "update" sheets (1 and 2))
-See example toy table (toy_table.xls)
-
-TODO: move this script inside autoload
-
-standalone usage: python3 table_parser.py <tabulated_file> -d <directory_to_write_json_to (default: cwd)>
-"""
-
-
-class TableParser:
-
-	def __init__(self, table_file, dir):
-		self.dir = os.path.abspath(args.dir)
-		self.table_file = table_file
-		self.method = None  # TODO: instant launch or just parse (standalone)
-		self.extension = None
-		self.meta = dict()
-		self.json_file = None	
-
-	def parse_table(self, extension):
-		if extension == "xls":
-			pandas_table = pandas.DataFrame(pandas.read_excel(self.table_file))
-		elif extension == "csv":
-			pandas_table = pandas.DataFrame(pandas.read_csv(self.table_file))
-		else:
-			logging.info("wrong format: input tabulated file cannot be read (supported formats: xls, xlsx, csv)")
-			sys.exit()
-		pandas_table = pandas_table.replace(numpy.nan, "", regex=True)
-		
-		for char in " ,.()-/":
-			pandas_table = pandas_table.replace("\\" + char, "_", regex=True)
-		pandas_table = pandas_table.replace("\\__", "_", regex=True)
-		pandas_table.loc[pandas_table["genome version"] == "", "genome version"] = "1.0"
-		pandas_table.loc[pandas_table["ogs version"] == "", "ogs version"] = "1.0"
-		pandas_table.loc[pandas_table["version"] == "", "version"] = "1.0"
-		pandas_table.loc[pandas_table["date"] == "", "date"] = datetime.today().strftime("%Y-%m-%d")
-		with open(os.path.join(self.dir, self.json_file), 'w') as json_file:
-			json_file.truncate(0)
-			json_content = list()
-			for organism in range(0, len(pandas_table.index)):
-				organism_dict = pandas_table.iloc[organism].to_dict()
-				for k, v in organism_dict.items():
-					v = str(v).split(" ")
-					v = "_".join(v)
-					v = v.replace("__", "_")
-					if v.endswith("_"):
-						v = v[:-1]
-				json_content.append(organism_dict)
-			json.dump(json_content, json_file, indent=4)
-
-	def write_json(self, data, filename):
-		with open(filename, 'w') as f:
-			json.dump(data, f, indent=4)
-
-
-if __name__ == "__main__":
-	parser = argparse.ArgumentParser(description="Table parser for phaeoexplorer data")
-	parser.add_argument("input", type=str, help="input table")
-	parser.add_argument("-d", "--dir", type=str, help="Where to write the output json file that is be used for integration", default = os.getcwd())
-	args = parser.parse_args()
-
-	if args.input.endswith("xlsx") or args.input.endswith("xls"):
-		tp = TableParser(table_file=args.input, dir=args.dir)
-		tp.extension = args.input.split(".")[1]
-		tp.json_file = tp.dir + "/dataloader_" + datetime.today().strftime("%Y%m%d") + ".json"
-		tp.parse_table(extension="xls")
+import os
+import sys
+import pandas  # xlrd required for excel files reading
+import numpy
+import json
+import argparse
+import logging
+from datetime import datetime
+
+"""
+!! OBSOLETE !!
+
+Input parser script. 
+Does not work for ods spreadsheets (save as xls or xlsx instead) --> need to handle with pandas_ods_reader (requires ezodf, lxml)
+Does not support multiple sheets (TODO: "integration" and "update" sheets (1 and 2))
+See example toy table (toy_table.xls)
+
+standalone usage: python3 table_parser.py <tabulated_file> -d <directory_to_write_json_to (default: cwd)>
+
+"""
+
+
+class TableParser:
+
+	def __init__(self, table_file, dir):
+		self.dir = os.path.abspath(args.dir)
+		self.table_file = table_file
+		self.method = None  # TODO: instant launch or just parse (standalone)
+		self.extension = None
+		self.meta = dict()
+		self.json_file = None	
+
+	def parse_table(self, extension):
+		if extension == "xls":
+			pandas_table = pandas.DataFrame(pandas.read_excel(self.table_file))
+		elif extension == "csv":
+			pandas_table = pandas.DataFrame(pandas.read_csv(self.table_file))
+		else:
+			logging.info("wrong format: input tabulated file cannot be read (supported formats: xls, xlsx, csv)")
+			sys.exit()
+		pandas_table = pandas_table.replace(numpy.nan, "", regex=True)
+		
+		for char in " ,.()-/":
+			pandas_table = pandas_table.replace("\\" + char, "_", regex=True)
+		pandas_table = pandas_table.replace("\\__", "_", regex=True)
+		pandas_table.loc[pandas_table["genome version"] == "", "genome version"] = "1.0"
+		pandas_table.loc[pandas_table["ogs version"] == "", "ogs version"] = "1.0"
+		pandas_table.loc[pandas_table["version"] == "", "version"] = "1.0"
+		pandas_table.loc[pandas_table["date"] == "", "date"] = datetime.today().strftime("%Y-%m-%d")
+		with open(os.path.join(self.dir, self.json_file), 'w') as json_file:
+			json_file.truncate(0)
+			json_content = list()
+			for organism in range(0, len(pandas_table.index)):
+				organism_dict = pandas_table.iloc[organism].to_dict()
+				for k, v in organism_dict.items():
+					v = str(v).split(" ")
+					v = "_".join(v)
+					v = v.replace("__", "_")
+					if v.endswith("_"):
+						v = v[:-1]
+				json_content.append(organism_dict)
+			json.dump(json_content, json_file, indent=4)
+
+	def write_json(self, data, filename):
+		with open(filename, 'w') as f:
+			json.dump(data, f, indent=4)
+
+
+if __name__ == "__main__":
+	parser = argparse.ArgumentParser(description="Table parser for phaeoexplorer data")
+	parser.add_argument("input", type=str, help="input table")
+	parser.add_argument("-d", "--dir", type=str, help="Where to write the output json file that is be used for integration", default = os.getcwd())
+	args = parser.parse_args()
+
+	if args.input.endswith("xlsx") or args.input.endswith("xls"):
+		tp = TableParser(table_file=args.input, dir=args.dir)
+		tp.extension = args.input.split(".")[1]
+		tp.json_file = tp.dir + "/dataloader_" + datetime.today().strftime("%Y%m%d") + ".json"
+		tp.parse_table(extension="xls")
diff --git a/templates/compose-template.yml b/templates/compose-template.yml
index 590923cd3b27ff536dd15d51931524783b5c52a3..b3b85789b9cb74224cef40cffd643f35c6a9eb38 100755
--- a/templates/compose-template.yml
+++ b/templates/compose-template.yml
@@ -81,7 +81,7 @@ services:
     galaxy:
         image: quay.io/galaxy-genome-annotation/docker-galaxy-annotation:gmod
         volumes:
-            - ../galaxy_data_libs_SI.py:/opt/setup_data_libraries.py
+            - ../galaxy_data_libs_SI.py:/opt/galaxy_data_libs_SI.py
             - ./docker_data/galaxy:/export
             - ./src_data/:/project_data:ro
             #- /groups/XXX/:/groups/XXX/:ro  # We do this when we have symlinks in src_data pointing to /groups/XXX/...
diff --git a/templates/stack-organism.yml b/templates/stack-organism.yml
index 519b96f55d309b748c52b0b33d231727dce72870..103757f4df201e28cfde0b22c11e2105a3bc267c 100644
--- a/templates/stack-organism.yml
+++ b/templates/stack-organism.yml
@@ -112,7 +112,7 @@ services:
     galaxy:
         image: quay.io/galaxy-genome-annotation/docker-galaxy-annotation:gmod
         volumes:
-            - ../galaxy_data_libs_SI.py:/opt/setup_data_libraries.py
+            - ../galaxy_data_libs_SI.py:/opt/galaxy_data_libs_SI.py
             - ./docker_data/galaxy/:/export/
             - ./src_data/:/project_data/:ro
             #- /groups/XXX/:/groups/XXX/:ro  # We do this when we have symlinks in src_data pointing to /groups/XXX/...
@@ -130,7 +130,7 @@ services:
             GALAXY_DEFAULT_ADMIN_USER: "gga"
             GALAXY_DEFAULT_ADMIN_PASSWORD: "password"
             GALAXY_CONFIG_ADMIN_USERS: "admin@galaxy.org, gga@sb-roscoff.fr, lgueguen@sb-roscoff.fr, alebars@sb-roscoff.fr"   # admin@galaxy.org is the default (leave it), gogepp@bipaa is a shared ldap user we use to connect
-            GALAXY_CONFIG_MASTER_API_KEY: "dev"
+            GALAXY_CONFIG_MASTER_API_KEY: "master"
             ENABLE_FIX_PERMS: 0
             PROXY_PREFIX: /sp/genus_species/galaxy
             GALAXY_TRIPAL_URL: http://tripal.genus_species/tripal/
diff --git a/ext_scripts/__init__.py b/utils/__init__.py
similarity index 100%
rename from ext_scripts/__init__.py
rename to utils/__init__.py
diff --git a/ext_scripts/blastdb.py b/utils/blastdb.py
similarity index 100%
rename from ext_scripts/blastdb.py
rename to utils/blastdb.py
diff --git a/ext_scripts/common-stringSubsitute.py b/utils/common-stringSubsitute.py
similarity index 97%
rename from ext_scripts/common-stringSubsitute.py
rename to utils/common-stringSubsitute.py
index c32a177b83f45b3ee68c45151c3bb36147561d7a..c4d22a9fe017a03feb3b276047924353fd864406 100755
--- a/ext_scripts/common-stringSubsitute.py
+++ b/utils/common-stringSubsitute.py
@@ -1,37 +1,37 @@
-#!/usr/bin/env python3
-# -*- coding: utf-8 -*-
-import argparse
-import os
-import re
-import sys
-
-# Return the file obtained by replacing the occurrences of pattern by the replacement string.
-# Use of python method re.sub()
-# python common-stringSubsitute.py -f file -p pattern -r replacement_string
-# ex : python common-stringSubsitute.py -f file -p '(tRNA)(\w{3})(\w{3})' -r '\g<1>-\g<2>(\g<3>)'
-
-if __name__ == '__main__':
-
-    #Get arguments
-    parser = argparse.ArgumentParser(description="Return the file obtained by replacing the occurrences of pattern by the replacement string. Use of python method re.sub(). Example: python common-stringSubsitute.py -f file -p '(tRNA)(\w{3})(\w{3})' -r '\g<1>-\g<2>(\g<3>)'")
-    parser.add_argument('-i','--infile', help='Input file', required=True)
-    parser.add_argument('-o','--outfile', help='Output file', default='outfile')
-    parser.add_argument('-p','--pattern', help='Pattern string to be replaced', required=True)
-    parser.add_argument('-r','--repl', help='Replacement string', required=True)
-    args = parser.parse_args()
-
-    infilename=args.infile
-    outfilename=args.outfile
-    pattern=args.pattern
-    repl=args.repl
-
-    infile=open(infilename,'r')
-    outfile=open(outfilename,'w')
-
-    lines=infile.readlines()
-
-    for line in lines :
-        line_out=re.sub(pattern,repl,line)
-        outfile.write(line_out)
-
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+import argparse
+import os
+import re
+import sys
+
+# Return the file obtained by replacing the occurrences of pattern by the replacement string.
+# Use of python method re.sub()
+# python common-stringSubsitute.py -f file -p pattern -r replacement_string
+# ex : python common-stringSubsitute.py -f file -p '(tRNA)(\w{3})(\w{3})' -r '\g<1>-\g<2>(\g<3>)'
+
+if __name__ == '__main__':
+
+    #Get arguments
+    parser = argparse.ArgumentParser(description="Return the file obtained by replacing the occurrences of pattern by the replacement string. Use of python method re.sub(). Example: python common-stringSubsitute.py -f file -p '(tRNA)(\w{3})(\w{3})' -r '\g<1>-\g<2>(\g<3>)'")
+    parser.add_argument('-i','--infile', help='Input file', required=True)
+    parser.add_argument('-o','--outfile', help='Output file', default='outfile')
+    parser.add_argument('-p','--pattern', help='Pattern string to be replaced', required=True)
+    parser.add_argument('-r','--repl', help='Replacement string', required=True)
+    args = parser.parse_args()
+
+    infilename=args.infile
+    outfilename=args.outfile
+    pattern=args.pattern
+    repl=args.repl
+
+    infile=open(infilename,'r')
+    outfile=open(outfilename,'w')
+
+    lines=infile.readlines()
+
+    for line in lines :
+        line_out=re.sub(pattern,repl,line)
+        outfile.write(line_out)
+
     outfile.close()
\ No newline at end of file
diff --git a/ext_scripts/phaeoexplorer-change_pep_fasta_header.sh b/utils/phaeoexplorer-change_pep_fasta_header.sh
similarity index 96%
rename from ext_scripts/phaeoexplorer-change_pep_fasta_header.sh
rename to utils/phaeoexplorer-change_pep_fasta_header.sh
index 0de7b9b7bada4edb88dff1d6422c34c1bfbbd4e8..3cf614f745bfaef03725038f7bb9fac84a00011b 100755
--- a/ext_scripts/phaeoexplorer-change_pep_fasta_header.sh
+++ b/utils/phaeoexplorer-change_pep_fasta_header.sh
@@ -1,17 +1,17 @@
-#!/usr/bin/env bash
-
-INFILE=$1
-OUTFILE=tmpfile
-
-FILE_HEADER_START=$(grep ">" $INFILE | cut -c 1-6 | sort | uniq)
-HEADER_START_STRING=">mRNA."
-
-if [[ "$FILE_HEADER_START" == "$HEADER_START_STRING" ]]
-then
-    /usr/local/genome2/mmo/scripts/common/common-stringSubstitute.py -i $INFILE -o $OUTFILE -p '^>mRNA' -r '>protein'
-    mv $OUTFILE $INFILE
-    echo "'>mRNA' replaced by '>protein'"
-else 
-    echo "Abort. Not all headers start with '>mRNA.':"
-    echo "$FILE_HEADER_START"
+#!/usr/bin/env bash
+
+INFILE=$1
+OUTFILE=tmpfile
+
+FILE_HEADER_START=$(grep ">" $INFILE | cut -c 1-6 | sort | uniq)
+HEADER_START_STRING=">mRNA."
+
+if [[ "$FILE_HEADER_START" == "$HEADER_START_STRING" ]]
+then
+    /usr/local/genome2/mmo/scripts/common/common-stringSubstitute.py -i $INFILE -o $OUTFILE -p '^>mRNA' -r '>protein'
+    mv $OUTFILE $INFILE
+    echo "'>mRNA' replaced by '>protein'"
+else 
+    echo "Abort. Not all headers start with '>mRNA.':"
+    echo "$FILE_HEADER_START"
 fi
\ No newline at end of file
diff --git a/ext_scripts/phaeoexplorer-change_transcript_fasta_header.sh b/utils/phaeoexplorer-change_transcript_fasta_header.sh
similarity index 100%
rename from ext_scripts/phaeoexplorer-change_transcript_fasta_header.sh
rename to utils/phaeoexplorer-change_transcript_fasta_header.sh
diff --git a/ext_scripts/phaeoexplorer-change_transcript_fasta_header.sh.bak b/utils/phaeoexplorer-change_transcript_fasta_header.sh.bak
similarity index 97%
rename from ext_scripts/phaeoexplorer-change_transcript_fasta_header.sh.bak
rename to utils/phaeoexplorer-change_transcript_fasta_header.sh.bak
index 196675b503a42188dce58d3b930e1b804aab6868..12ce4e56544070af8daddcb3f981b7e0dc81f3fd 100755
--- a/ext_scripts/phaeoexplorer-change_transcript_fasta_header.sh.bak
+++ b/utils/phaeoexplorer-change_transcript_fasta_header.sh.bak
@@ -1,7 +1,7 @@
-#!/usr/bin/env bash
-
-INFILE=$1
-OUTFILE=tmpfile
-/home/fr2424/sib/alebars/gga_load_data/ext_scripts/common-stringSubsitute.py -i $INFILE -o $OUTFILE -p '^>\d+ mRNA' -r '>mRNA'
-mv $OUTFILE $INFILE
-echo "'>[0-9]+ mRNA' replaced by '>mRNA' in $1"
+#!/usr/bin/env bash
+
+INFILE=$1
+OUTFILE=tmpfile
+/home/fr2424/sib/alebars/gga_load_data/ext_scripts/common-stringSubsitute.py -i $INFILE -o $OUTFILE -p '^>\d+ mRNA' -r '>mRNA'
+mv $OUTFILE $INFILE
+echo "'>[0-9]+ mRNA' replaced by '>mRNA' in $1"