diff --git a/README.md b/README.md
index 88a48e410076e2568393ddfa9707c7ef3e3e2d44..e3a69adb2b37f607c87460eb6e4dd34075019cff 100644
--- a/README.md
+++ b/README.md
@@ -1,9 +1,93 @@
 # gga_load_data
 
-Automated integration of new organisms into GGA instances
+Automated integration of new organisms into GGA instances.
 
+## Description:
+This script is made for automatically integrating new organisms into GGA instances as part of the phaeexplorer project.
+As input, the script either takes a tabulated file (xls, xlsx or csv) or a json file describing the organism for which it has to create/update instances. 
+For each organism to be integrated, the script needs at least its genus and species (strain, sex, genome and annotation files versions are optional, but the two later will be set to the default version of 1.0, and the two former will be set as empty and will not being considered during the integration process). 
+See toy datasets (input_toy.json and input_toy.xlsx) for an example of what information can be described and the correct formatting of these input files. The script should then take of everything (for phaeoexplorer organisms), from generating the directory tree to running workflows and tools in the galaxy instance.
 
-Requirements:
+## Metadata files (in progress):
+The script also generates a metadata file in the directory of the newly integrated species, summing up what actions were taken for this organism (see meta_toy.yaml for
+the kind of information it can contain). It also creates another metadata files in the main directory (where you put all the organisms you have integrated), which contains the sum of all metadata files from all integrated organisms. These metadata files are also updated when updating an existing instance.
+
+## Directory tree:
+For every input organism, the script will create the following directories structure, or try to update it if it already exists.
+It will update the files in the main directory to account for the new organisms that are getting integrated.
+
+```
+/main_directory
+|
+|---/genus1_species1
+|   |
+|   |---/blast
+|   |   |---/<links class="yml"></links>
+|   |   |---/<banks class="yml"></banks>
+|   |
+|   |---/nginx
+|   |   |---/conf
+|   |       |---/<default class="conf"></default>
+|	|
+|	|---/src_data
+|	|	|---/genome
+|	| 	|	|---/genus1_species1_strain_sex                       
+|	|   |    	|---/vX.X
+|	|   |        	|---/<genus_species_vX class="X fasta"></genus_species_vX>
+|	|   |
+|	|	|---/annotation
+|	|	|	|---/genus1_species1_strain_sex                   
+|	|	|		|---/OGSX.X
+|	|	|           |---/<OGSX class="X gff"></OGSX>
+|	|	|           |---/<OGSX class="X_pep fasta"></OGSX>
+|	|	|           |---/<OGSX class="X_cds fasta"></OGSX>
+|   |   |
+|   |   |---/tracks
+|   |    	|---/genus1_species1_strain_sex
+|   |                    
+|   |---/apollo	
+|   |   |---/<annotation_groups class="tsv"></annotation_groups>
+|   |
+|   |---/<docker-compose class="yml"></docker-compose>
+|   |
+|   |---/<metada_genus1_species1 class="yml"></metada_genus1_species1>
+|
+|---/<metadata class="yml"></metadata>
+|
+|---/main_proxy
+	|---/conf
+		|---/<default class="conf"></default>
+
+```
+
+## Steps:
+For each input organism:
+1) create the json input file for the script
+2) create the docker-compose.yml for the organism (+ default.conf and edit main_proxy nginx default.conf for docker-compose docker configuration)
+3) create the directory tree structure (if it already exists, only create the required directories)
+4) gather files in the "source data" directory tree, can recursively search the directory (by default, the source-data folder is fixed for phaeoexplorer data, this default fixed directory can be set in the attributes of the Autoload class in autoload.py, can also be set as a command line argument with ```--source-data-folder <folder>```)
+5) link the source files to the organism correct src_data folders
+6) modify headers in the transcripts and protein fasta files
+7) generate blast banks (no commit)
+8) start the containers
+9) connect to the galaxy instance
+10) run data integration galaxy steps (see @ http://gitlab.sb-roscoff.fr/abims/e-infra/gga)
+11) generate and update metadata files
+
+## Usage (production):
+For organisms you want to integrate to GGA (not already integrated i.e no containers exists for the input organisms): 
+```
+python3 autoload.py input.xlsx --source-data <dir>
+```
+
+IN PROGRESS:
+For integrated organisms you want to update with new data (the input shouldn't contain already integrated content):
+```
+python3 autoload.py input.xlsx --update
+```
+
+
+## Requirements:
 - bioblend (v0.13)
 - PyYaml
-- pandas
+- pandas (+ xlrd package)
diff --git a/__init__.py b/__init__.py
new file mode 100755
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/autoload.py b/autoload.py
index ff1e7c26372819d427e0413106204bc7a4c2f864..be7ceac097ecb2e326d885bd1a989d1384136609 100644
--- a/autoload.py
+++ b/autoload.py
@@ -1,32 +1,55 @@
-from bioblend import galaxy
-import bioblend.galaxy.objects
 import bioblend
+import bioblend.galaxy.objects
+from bioblend import galaxy
 import argparse
 import os
 import subprocess
+import logging
 import sys
 import json
 import yaml
-import numpy
-import pandas
-import logging
 import re
-from . import table_parser, docker_compose_generator
+import table_parser, docker_compose_generator, metadata_generator
+
+"""
+gga_auto_load main script
+    
+Scripted integration of new data into GGA instances. The input is either a table-like (csv, xls, ...) or a json (TODO: yaml) file
+that describes what data is to be integrated (genus, species, sex, strain, data), see data_example.json for an example of the correct syntax.
+The script will parse the input and take care of everything, from source files directory tree creation to running the gmod tools
+inside the galaxy instances of organisms. 
+
+TODO: By default, the script will do everything needed to have a functional instance from scratch. If you want to bypass this behavior, 
+you have to specify --update as a parameter. The script can also be used to update an existing GGA instance with new data. For example, you have an instance "genus_species" 
+with data for the male sex and want to add the female sex to the same GGA instance. To do this, create your configuration input file as you would normally, and add the "--update"
+argument when invoking the script.
+
+
+STEPS:
+- init
+- create dir_tree
+- find and cp data
+- change headers, etc..
+- generate blast banks and links
+- generate and edit nginx confs
+- generate dc and start the containers
+- connect to instance and launch tools>workflows
+- generate and update metadata
+- exit
+"""
 
 
 class Autoload:
     """
-    Cleaner version for gga_auto_load (to use in production).
-
-    This class possesses most useful parameters to interact with GGA as attributes (as defined in __init__), so new
+    The Autoload class possesses most useful parameters to interact with GGA as attributes (as defined in __init__), so new
     methods can be more easily implemented by copying already existing ones (i.e add new analysis, run a workflow, ...)
 
     To run the workflows, place them in the same directory as this script, and add the method + the workflow
     parameters in the main invocation (at the end of the file)
-    TODO: store main workflow as string
+    TODO: store main workflow as string?
     """
 
-    def __init__(self, species_parameters_dictionary: dict, args):
+    def __init__(self, species_parameters_dictionary, args):
         self.species_parameters_dictionary = species_parameters_dictionary
         self.args = args
         self.species = species_parameters_dictionary["species"]
@@ -35,6 +58,7 @@ class Autoload:
         self.sex = species_parameters_dictionary["sex"]
         self.common = species_parameters_dictionary["common"]
         self.date = species_parameters_dictionary["date"]
+        self.origin = species_parameters_dictionary["origin"]
         self.performed = species_parameters_dictionary["performed by"]
         self.genome_version = species_parameters_dictionary["genome version"]
         self.ogs_version = species_parameters_dictionary["ogs version"]
@@ -43,7 +67,7 @@ class Autoload:
         self.abbreviation = " ".join([self.genus_lowercase[0], self.species, self.strain, self.sex])
         self.genus_species = self.genus_lowercase + "_" + self.species
         self.instance_url = "http://localhost/sp/" + self.genus_lowercase + "_" + self.species + "/galaxy/"  # testing
-        self.instance: galaxy = None
+        self.instance = None
         self.history_id = None
         self.library_id = None
         self.main_dir = None
@@ -56,16 +80,23 @@ class Autoload:
         self.source_files = dict()
         self.workflow_name = None
         self.docker_compose_generator = None
+        self.metadata = dict()
+        self.source_data_dir = "/projet/sbr/phaeoexplorer"  # directory/subdirectories where source data files are located
+        self.do_update = False
 
-        # Test the connection to the galaxy instance for the current species
-        # Additionally set some class attributes
+
+    def connect_to_instance(self):
+        """
+        test the connection to the galaxy instance for the current organism
+        the script will crash if it cannot connect to the instance (it won't connect to any other neither)
         # TODO: auth issues with nginx
+        """
         self.instance = galaxy.GalaxyInstance(url=self.instance_url,
-                                              key="3b36455cb16b4d0e4348e2c42f4bb934",
+                                              key="ec601ea5005766e1bc106e69ad8b9eaa",
                                               email="alebars@sb-roscoff.fr",
                                               password="pouet",
                                               verify=True)
-        logging.info("testing connection to the galaxy instance ...")
+        logging.info("connection to the galaxy instance ...")
         try:
             self.instance.histories.get_histories()
             self.tool_panel = self.instance.tools.get_tool_panel()
@@ -75,18 +106,39 @@ class Autoload:
         else:
             logging.info("successfully connected to galaxy instance @ " + self.instance_url)
 
-        self.main_dir = os.getcwd() + "/"
-        self.species_dir = os.path.join(self.main_dir, self.genus_species) + "/"
-
-    def load_data_in_galaxy(self):
+        
+    def get_source_data(self, max_depth):
         """
-        - create the src_data directory tree for the species
-        - change headers for pep file
-        - load data into the galaxy container with the galaxy_data_libs_SI.py script
+        find and copy source data files to src_data directory tree
+        - recursively search for the correct files (within a fixed max depth)
+        - requires the organism src_data directory tree to already be properly created for the organism (run generate_dir_tree)
+        - the source files must have "transcripts", "proteins"/"pep", "genome" in their name, and a gff extension for the ogs file
 
-        :return:
         """
+        src_data_dir = os.path.join(self.species_dir, "/src_data")  # to limit rewriting of the src_data directory
+        # some regex bullshit
+
+        sp_regex = "(?=\w*V)(?=\w*A)(?=\w*R)(?=\w*I)(?=\w*A)(?=\w*B)(?=\w*L)(?=\w*E)\w+"
+        for dirpath, dirnames, files in os.walk(self.source_data_dir):
+            for name in files:
+                print("foo")
+    
+    def regex_generator(self, organism_name_pattern):
+        re_dict = dict()
+        for char in organism_name_pattern:
+            re_dict["gff"] = None
+            re_dict["transcripts"] = None
+            re_dict["proteins"] = None
+            re_dict["genome"] = None
+
+    def generate_dir_tree(self):
+        """
+        generate the directory tree for an organism, preparing the next steps
+        """
+        
         os.chdir(self.main_dir)
+        self.main_dir = os.getcwd() + "/"
+        self.species_dir = os.path.join(self.main_dir, self.genus_species) + "/"
         try:
             os.mkdir(self.species_dir)
         except FileExistsError:
@@ -102,14 +154,31 @@ class Autoload:
         species_folder_name = "_".join([self.genus_lowercase, self.species, self.strain, self.sex])
         try:
             os.mkdir("./src_data")
-            os.mkdir("./src_data/annotation /src_data/genome")
-            os.mkdir("./src_data/annotation/" + species_folder_name + " ./src_data/genome/" + species_folder_name)
+            os.mkdir("./src_data/annotation")
+            os.mkdir("./src_data/genome")
+            os.mkdir("./src_data/tracks")
+            os.mkdir("./src_data/annotation/" + species_folder_name)
+            os.mkdir("./src_data/genome/" + species_folder_name)
+            os.mkdir("./src_data/annotation/" + species_folder_name + "/OGS" + self.ogs_version)
+            os.mkdir("./src_data/genome/" + species_folder_name + "/v" + self.genome_version)
         except FileExistsError:
-            logging.debug("src_data directory tree already exists")
+            if self.do_update:
+                logging.info("updating src_data directory tree")
+            else:
+                logging.info("src_data directory tree already exists")
         except PermissionError:
-            logging.debug("insufficient permission to create src_data directory tree")
+            logging.info("insufficient permission to create src_data directory tree")
 
-        # Data import into galaxy
+    def modify_fasta_headers(self):
+        """
+        """
+
+        try:
+            os.chdir(self.species_dir)
+            working_dir = os.getcwd()
+        except OSError:
+            logging.info("cannot access " + self.species_dir + ", run with higher privileges")
+            sys.exit()
         self.source_files = dict()
         annotation_dir, genome_dir = None, None
         for d in [i[0] for i in os.walk(os.getcwd() + "/src_data")]:
@@ -133,21 +202,22 @@ class Autoload:
 
         # Changing headers in the *proteins.fasta file from >mRNA* to >protein*
         # production version
-        modify_pep_headers = ["/usr/local/genome2/mmo/scripts/phaeoexplorer/phaeoexplorer-change_pep_fasta_header.sh",
+        modify_pep_headers = [str(self.main_dir) + "/gga_load_data/phaeoexplorer-change_pep_fasta_header.sh",
                               self.source_files["proteins_file"]]
         # test version
-        modify_pep_headers = ["/home/alebars/gga/phaeoexplorer-change_pep_fasta_header.sh",
-                              self.source_files["proteins_file"]]
+        # modify_pep_headers = ["/home/alebars/gga/phaeoexplorer-change_pep_fasta_header.sh",
+                              # self.source_files["proteins_file"]]
         logging.info("changing fasta headers in " + self.source_files["proteins_file"])
         subprocess.run(modify_pep_headers, stdout=subprocess.PIPE, cwd=annotation_dir)
         # production version
-        modify_pep_headers = ["/usr/local/genome2/mmo/scripts/phaeoexplorer/phaeoexplorer-change_transcript_fasta_header.sh",
+        modify_pep_headers = [str(self.main_dir) + "/gga_load_data/phaeoexplorer-change_transcript_fasta_header.sh",
                               self.source_files["proteins_file"]]
         # test version
-        modify_pep_headers = ["/home/alebars/gga/phaeoexplorer-change_transcript_fasta_header.sh",
-                              self.source_files["proteins_file"]]
+        # modify_pep_headers = ["/home/alebars/gga/phaeoexplorer-change_transcript_fasta_header.sh",
+        #                       self.source_files["proteins_file"]]
         logging.info("changing fasta headers in " + self.source_files["transcripts_file"])
         subprocess.run(modify_pep_headers, stdout=subprocess.PIPE, cwd=annotation_dir)
+
         # src_data cleaning
         if os.path.exists(annotation_dir + "outfile"):
             subprocess.run(["mv", annotation_dir + "/outfile", self.source_files["proteins_file"]],
@@ -158,6 +228,21 @@ class Autoload:
                            stdout=subprocess.PIPE,
                            cwd=annotation_dir)
 
+    def generate_blast_banks(self):
+        return None
+
+    def goto_working_dir(self):
+        return None
+
+    def setup_data_libraries(self):
+        """
+        - generate blast banks and docker-compose (TODO: separate function)
+        - load data into the galaxy container with the galaxy_data_libs_SI.py script
+
+        :return:
+        """
+
+
         setup_data_libraries = "docker-compose exec galaxy /tool_deps/_conda/bin/python /opt/setup_data_libraries.py"
         try:
             logging.info("loading data into the galaxy container")
@@ -181,10 +266,10 @@ class Autoload:
 
     def get_instance_attributes(self):
         """
-        retrieves a pre-existing instance attributes:
+        retrieves instance attributes:
         - working history ID
-        - libraries ID
-        -
+        - libraries ID (there should only be one library!)
+        - datasets IDs
 
         :return:
         """
@@ -379,7 +464,7 @@ class Autoload:
         org_json_output = self.instance.datasets.download_dataset(dataset_id=org_job_out)
         try:
             org_output = json.loads(org_json_output)[0]
-            self.org_id = str(org_output["organism_id"])  # needs to be str to be recognized by chado tools
+            self.org_id = str(org_output["organism_id"])  # id needs to be a str to be recognized by chado tools
         except IndexError:
             logging.debug("no organism matching " + self.full_name + " exists in the instance's chado database")
 
@@ -392,8 +477,7 @@ class Autoload:
         ogs_analysis_json_output = self.instance.datasets.download_dataset(dataset_id=ogs_analysis_job_out)
         try:
             ogs_analysis_output = json.loads(ogs_analysis_json_output)[0]
-            self.ogs_analysis_id = str(
-                ogs_analysis_output["analysis_id"])  # needs to be str to be recognized by chado tools
+            self.ogs_analysis_id = str(ogs_analysis_output["analysis_id"])
         except IndexError:
             logging.debug("no matching OGS analysis exists in the instance's chado database")
 
@@ -406,8 +490,7 @@ class Autoload:
         genome_analysis_json_output = self.instance.datasets.download_dataset(dataset_id=genome_analysis_job_out)
         try:
             genome_analysis_output = json.loads(genome_analysis_json_output)[0]
-            self.genome_analysis_id = str(
-                genome_analysis_output["analysis_id"])  # needs to be str to be recognized by chado tools
+            self.genome_analysis_id = str(genome_analysis_output["analysis_id"])
         except IndexError:
             logging.debug("no matching genome analysis exists in the instance's chado database")
 
@@ -420,29 +503,41 @@ class Autoload:
 
 
 if __name__ == "__main__":
-    parser = argparse.ArgumentParser(description="Automatic loading and interaction with galaxy instance (GGA)"
+    parser = argparse.ArgumentParser(description="Automatic data loading in containers and interaction with galaxy instances for GGA"
                                                  ", following the protocol @ "
                                                  "http://gitlab.sb-roscoff.fr/abims/e-infra/gga")
-    parser.add_argument("input", type=str, help="Input table (tabulated file that describes all data)"
-                                                "or json file",
-                        action="store_true")
-    parser.add_argument("-v", "--verbose",
-                        help="Increase output verbosity",
-                        action="store_true")
+    # Dev arguments, TODO: remove in production branch!
+    parser.add_argument("--full",
+                        help="Run everything, from src_data dir tree creation, moving data files (abims) into src_data,"
+                        " modify headers (abims), generate blast banks (doesn't commit them: TODO), initialize GGA instance, load the data and run,"
+                        " the main workflow. To update/add data to container, use --update in conjunction to --full (TODO)")
     parser.add_argument("--init-instance",
-                        help="Initialization of galaxy instance. Run first in an empty instance",
-                        action="store_true")
+                        help="Initialization of galaxy instance. Run first in an empty instance")
     parser.add_argument("--load-data",
-                        help="Create src_data directory tree and load its data into the instance",
-                        action="store_true")
+                        help="Create src_data directory tree and load its data into the instance")
     parser.add_argument("--run-main",
                         help="Run main workflow (load data into chado, sync all with tripal, "
                              "index tripal data, populate materialized view, "
-                             "create a jbrowse for the current genus_species_strain_sex and add organism to jbrowse",
-                        action="store_true")
+                             "create a jbrowse for the current genus_species_strain_sex and add organism to jbrowse")
     parser.add_argument("--generate-docker-compose",
-                        help="Generate docker-compose.yml for current species",
+                        help="Generate docker-compose.yml for current species")
+    parser.add_argument("--link-source",
+                        help="Find source files in source data dir and copy them to src_data",
                         action="store_true")
+
+    # Production arguments
+    parser.add_argument("input", type=str, help="Input table (tabulated file that describes all data) or json file")
+    parser.add_argument("-v", "--verbose",
+                        help="Increase output verbosity",
+                        action="store_false")
+    parser.add_argument("--update",
+    					help="Update an already integrated organisms with new data from input file, docker-compose.yml will not be re-generated"
+    					", assuming the instances for the organisms are already generated and initialized",
+                        action="store_false")
+    parser.add_argument("--dir",
+    					help="Path of the main directory, either absolute or relative",
+                        default=os.getcwd())
+
     args = parser.parse_args()
 
     if args.verbose:
@@ -466,13 +561,16 @@ if __name__ == "__main__":
 
     for sp_dict in sp_dict_list:
         al = Autoload(species_parameters_dictionary=sp_dict, args=args)
+        al.main_dir = os.path.abspath(args.dir)
         if args.init_instance:
             logging.info("initializing the galaxy instance")
             al.init_instance()
             al.get_instance_attributes()
+            metadata[genus_species_strain_sex]["initialized"] = True
         if args.load_data:
             logging.info("loading data into galaxy")
             al.load_data_in_galaxy()
+            metadata[genus_species_strain_sex]["data_loaded_in_instance"] = True
         if args.run_main:
             logging.info("running main workflow")
             al.get_organism_and_analyses_ids()
@@ -500,3 +598,10 @@ if __name__ == "__main__":
             al.datamap["3"] = {"src": "hda", "id": al.datasets["transcripts_file"]}
 
             al.run_workflow(workflow_name="main", workflow_parameters=workflow_parameters, datamap=al.datamap)
+            metadata[genus_species_strain_sex]["workflows_run"] = metadata[genus_species_strain_sex]["workflows_run"].append("main")
+
+        if args.link_source:
+            print('SOURCE DATA HANDLE')
+            al.generate_dir_tree()
+            print(al.main_dir)
+            print(al.species_dir)
diff --git a/blastdb.py b/blastdb.py
new file mode 100755
index 0000000000000000000000000000000000000000..2794ccbc3d7bee4a01a7d46138517481f2053267
--- /dev/null
+++ b/blastdb.py
@@ -0,0 +1,298 @@
+#!/usr/bin/env python
+
+from __future__ import print_function
+
+import argparse
+import collections
+import json
+import logging as log
+import os
+import sys
+
+from shutil import copyfile
+from subprocess import call
+
+
+class BlastBank:
+
+    def __init__(self, raw_org, data_dir_root, rel_path, fasta_file_name, db_dir_root, seq_type, path, is_multi):
+        self.raw_org = raw_org
+        self.org = prettify(raw_org)
+        self.data_dir_root = data_dir_root
+        self.rel_path = rel_path
+        self.fasta_file_name = fasta_file_name
+        self.db_dir_root = db_dir_root
+        self.seq_type = seq_type
+        self.path = path  # http://bipaa.genouest.org/sp/xxx/ Can be the same as raw_org, or something else when having multiple genomes.
+        self.is_multi = is_multi
+
+        self.fasta = os.path.join(data_dir_root, rel_path, fasta_file_name)
+        self.dest_path = os.path.splitext(os.path.join(db_dir_root, self.path, rel_path, fasta_file_name))[0]
+        self.title = sanitize(rel_path + '_' + os.path.splitext(self.fasta_file_name)[0])
+        if self.is_multi:
+            fake_path = rel_path.split('/')
+            if len(fake_path) > 2:
+                fake_path = [fake_path[1]] + [fake_path[0]] + fake_path[2:]
+            fake_path = '/'.join(fake_path)
+            self.pretty_name = prettify(fake_path, True)
+        else:
+            self.pretty_name = self.org + ' ' + prettify(rel_path, False)
+
+        with open(self.fasta, 'r') as f:
+            self.first_id = f.readline()[1:].rstrip()
+
+        if self.seq_type == 'nucl':
+            if 'transcript' in self.fasta_file_name.lower() or 'cdna' in self.fasta_file_name.lower():
+                self.pretty_name += " transcripts"
+            elif 'cds' in self.fasta_file_name.lower():
+                self.pretty_name += " CDS"
+        else:
+            if 'protein' in self.fasta_file_name.lower() or 'pep' in self.fasta_file_name.lower() or 'proteome' in self.fasta_file_name.lower() or self.fasta_file_name.endswith('.faa'):
+                self.pretty_name += " proteins"
+
+        # Just a stupid/hacky string used for sorting bank list
+        self.sort_key = 'a_' if 'genome' in self.title else 'b_'
+        self.sort_key += self.pretty_name
+
+    def __str__(self):
+        return str({
+            'raw_org': self.raw_org,
+            'org': self.org,
+            'data_dir_root': self.data_dir_root,
+            'rel_path': self.rel_path,
+            'fasta_file_name': self.fasta_file_name,
+            'db_dir_root': self.db_dir_root,
+            'seq_type': self.seq_type,
+            'path': self.path,
+            'fasta': self.fasta,
+            'dest_path': self.dest_path,
+            'title': self.title,
+            'pretty_name': self.pretty_name,
+        })
+
+
+def main(args):
+
+    genome_path = os.path.basename(os.getcwd())
+    if not args.multi_org:
+        genome_name = genome_path
+    data_dir_root = os.path.abspath(os.path.join('src_data'))
+    if not os.path.isdir(data_dir_root):
+        raise Exception("Could not find data dir: %s" % data_dir_root)
+
+    db_dir_root = os.path.abspath(args.dest)
+
+    ignore_list = ['func_annot', "apollo_source"]
+    if args.ignore:
+        ignore_list += args.ignore
+
+    # Looking for files
+    log.info("Looking for fasta files in %s:" % data_dir_root)
+    banks = []
+    for root, dirs, files in os.walk(data_dir_root, followlinks=True):
+        file_list = [os.path.realpath(os.path.join(root, filename)) for filename in files]
+        rel_path = root[len(data_dir_root) + 1:]
+
+        skip_current = False
+        for ign in ignore_list:
+            if ign in rel_path:
+                skip_current = True
+
+        if not skip_current:  # skip useless data
+            for f in file_list:
+                f = os.path.basename(f)
+                if f.endswith('.fasta') or f.endswith('.fa') or f.endswith('.fna') or f.endswith('.faa'):
+                    if args.multi_org:
+                        genome_name = rel_path.split('/')[1]
+
+                    if 'protein' in f or 'pep.' in f or 'proteome' in f or f.endswith('.faa'):
+                        seq_type = 'prot'
+                    else:
+                        seq_type = 'nucl'
+                    new_bank = BlastBank(genome_name, data_dir_root, rel_path, f, db_dir_root, seq_type, genome_path, args.multi_org)
+                    log.info("Found '%s' of type: %s" % (new_bank.fasta, new_bank.seq_type))
+                    banks.append(new_bank)
+
+    if not banks:
+        log.info("No fasta file found.")
+    else:
+        for b in banks:
+            makeblastdb(b, args.dry_run, args.no_parse_seqids)
+
+    nuc_list = collections.OrderedDict()
+    prot_list = collections.OrderedDict()
+    banks.sort(key=lambda x: x.sort_key)
+    for b in banks:
+        if b.seq_type == 'nucl':
+            if b.pretty_name not in nuc_list:
+                nuc_list[b.dest_path] = b.pretty_name
+            else:
+                nuc_list[b.dest_path] = "%s (%s)" % (b.pretty_name, b.fasta_file_name)
+        else:
+            if b.pretty_name not in prot_list:
+                prot_list[b.dest_path] = b.pretty_name
+            else:
+                prot_list[b.dest_path] = "%s (%s)" % (b.pretty_name, b.fasta_file_name)
+
+    yml_dir = os.path.abspath('blast')
+    yml_file_path = os.path.abspath(os.path.join(yml_dir, 'banks.yml'))
+    links_file_path = os.path.abspath(os.path.join(yml_dir, 'links.yml'))
+    if not args.dry_run:
+
+        log.info("List of bank names (to use in links.yml):")
+        write_titles(banks)
+
+        log.info("Writing bank list in '%s'" % yml_file_path)
+        if not os.path.exists(yml_dir):
+            os.makedirs(yml_dir, mode=0o755)
+        yml_file = open(yml_file_path, 'w')
+        write_yml(yml_file, nuc_list, prot_list)
+
+        log.info("Writing automatic links to links.yml in '%s'" % links_file_path)
+        if os.path.exists(links_file_path):
+            log.info("Making backup of previous links.yml to '%s'" % (links_file_path + '.back'))
+            copyfile(links_file_path, links_file_path + '.back')
+        links_yml_file = open(links_file_path, 'w')
+        write_links_yml(links_yml_file, banks, args.apollo)
+
+    else:
+        log.info("List of bank names (to use in links.yml):")
+        write_titles(banks)
+        log.info("Would write bank list in '%s'" % yml_file_path)
+        write_yml(sys.stdout, nuc_list, prot_list)
+        log.info("Would write links.yml in '%s'" % links_file_path)
+        write_links_yml(sys.stdout, banks, args.apollo)
+
+
+def write_yml(yml_file, nuc_list, prot_list):
+
+    nuc = "~"
+    prot = "~"
+
+    if nuc_list:
+        nuc = "\n                ".join(['%s: %s' % (json.dumps(k), json.dumps(v)) for k, v in nuc_list.items()])
+    if prot_list:
+        prot = "\n                ".join(['%s: %s' % (json.dumps(k), json.dumps(v)) for k, v in prot_list.items()])
+
+    print("genouest_blast:", file=yml_file)
+    print("    db_provider:", file=yml_file)
+    print("        list:", file=yml_file)
+    print("            nucleic:", file=yml_file)
+    print("                %s" % nuc, file=yml_file)
+    print("            proteic:", file=yml_file)
+    print("                %s" % prot, file=yml_file)
+
+
+def write_links_yml(yml_file, banks, apollo):
+
+    for bank in banks:
+        print("", file=yml_file)
+        print("# %s" % (bank.pretty_name), file=yml_file)
+
+        link = ''
+        if bank.seq_type == 'prot':
+            spl = bank.org.split()
+            if len(spl) > 2:
+                sp_str = '/'.join(spl[:2])
+                sp_str += '-' + '-'.join(spl[2:])
+            else:
+                sp_str = '/'.join(spl)
+            link = 'http://abims-gga.sb-roscoff.fr/sp/%s/feature/%s/polypeptide/{id}' % (bank.path, sp_str)
+        elif 'genome' in bank.title:
+            dataset_id = bank.org.lower()
+            spl = dataset_id.split()
+            if len(spl) == 2:  # Genus species => gspecies
+                dataset_id = spl[0][:1] + spl[1]
+            elif len(spl) == 3:  # Genus species strain1 => gsstrain1
+                dataset_id = spl[0][:1] + spl[1][:1] + spl[2]
+            else:  # Genus species some garbage => genus_species_some_garbage
+                dataset_id = dataset_id.replace(' ', '_')
+            if apollo:
+                link = '<a href="http://abims-gga.sb-roscoff.fr/sp/' + bank.path + '/jbrowse/?data=data%2F' + dataset_id + '&loc={id}{jbrowse_track}">{id}</a> <a href="http://abims-gga.sb-roscoff.fr/sp/' + bank.path + '/apollo/annotator/loadLink?loc={id}:1{apollo_track}">Apollo</a>'
+            else:
+                link = '<a href="http://abims-gga.sb-roscoff.fr/sp/' + bank.path + '/jbrowse/?data=data%2F' + dataset_id + '&loc={id}{jbrowse_track}">{id}</a>'
+        else:
+            spl = bank.org.split()
+            if len(spl) > 2:
+                sp_str = '/'.join(spl[:2])
+                sp_str += '-' + '-'.join(spl[2:])
+            else:
+                sp_str = '/'.join(spl)
+            link = 'http://abims-gga.sb-roscoff.fr/sp/%s/feature/%s/mRNA/{id}' % (bank.path, sp_str)
+
+        if link:
+            print("%s:" % (bank.title), file=yml_file)
+            print("    db: '%s'" % (bank.title), file=yml_file)
+            print("    '*': '%s'" % (link), file=yml_file)
+        else:
+            print("# Skipped", file=yml_file)
+
+
+def write_titles(banks):
+
+    for bank in banks:
+        print("'%s' -> '%s'      [%s]" % (bank.pretty_name, bank.title, bank.first_id))
+
+
+def makeblastdb(bank, dry_run, no_parse_seqids):
+    log.info("Formatting bank: %s  --->  %s" % (bank.fasta, bank.dest_path))
+    dest_dir = os.path.realpath(os.path.join(bank.dest_path, '..'))
+    if not os.path.exists(dest_dir):
+        log.info("Creating folder: %s" % dest_dir)
+        if not dry_run:
+            os.makedirs(dest_dir, mode=0o755)
+    parse = "-parse_seqids"
+    if no_parse_seqids:
+        parse = ""
+    cmd = "makeblastdb -in '%s' -dbtype '%s' %s -out '%s' -title '%s'" % (bank.fasta, bank.seq_type, parse, bank.dest_path, bank.title)
+    log.info("Running: %s" % cmd)
+    if not dry_run:
+        try:
+            retcode = call(cmd, shell=True)
+            if retcode != 0:
+                raise RuntimeError("Child was terminated by signal " + str(retcode))
+        except OSError as e:
+            print("Execution failed:" + e, file=sys.stderr)
+            sys.exit(1)
+
+
+def prettify(name, capital=True):
+    name = name.replace('_', ' ')
+    name = name.replace('/', ' ')
+    if capital:
+        name = name[0].upper() + name[1:]
+
+    return name
+
+
+def sanitize(name):
+    name = name.lower()
+    name = name.replace(' ', '_')
+    name = name.replace('/', '_')
+
+    return name
+
+
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser(
+        description='Generate blast databanks and update blast forms.'
+    )
+    parser.add_argument("-v", "--verbose", help="Increase output verbosity.",
+                        action="store_true")
+    parser.add_argument("-d", "--dry-run", help="Dry run: no modification will be done, for testing purpose.",
+                        action="store_true")
+    parser.add_argument("-m", "--multi-org", help="Add this flag if there are multiple organisms in src_data.",
+                        action="store_true")
+    parser.add_argument("-a", "--apollo", help="Add this flag to generate links to apollo.",
+                        action="store_true")
+    parser.add_argument("-p", "--no-parse-seqids", help="Don't use the makeblastdb -parse_seqids option (use this in case you have strange looking sequence ids that make html files unreadable)",
+                        action="store_true")
+    parser.add_argument("--ignore", help='Files or directories to ignore', nargs='*')
+    parser.add_argument("dest", help="Destination directory (not including the genome name, should be mounted on compute nodes)")
+
+    args = parser.parse_args()
+    log.basicConfig(level=log.INFO)
+    if args.verbose:
+        log.basicConfig(level=log.DEBUG)
+
+    main(args)
diff --git a/common-stringSubsitute.py b/common-stringSubsitute.py
new file mode 100755
index 0000000000000000000000000000000000000000..c32a177b83f45b3ee68c45151c3bb36147561d7a
--- /dev/null
+++ b/common-stringSubsitute.py
@@ -0,0 +1,37 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+import argparse
+import os
+import re
+import sys
+
+# Return the file obtained by replacing the occurrences of pattern by the replacement string.
+#Â Use of python method re.sub()
+# python common-stringSubsitute.py -f file -p pattern -r replacement_string
+# ex : python common-stringSubsitute.py -f file -p '(tRNA)(\w{3})(\w{3})' -r '\g<1>-\g<2>(\g<3>)'
+
+if __name__ == '__main__':
+
+    #Get arguments
+    parser = argparse.ArgumentParser(description="Return the file obtained by replacing the occurrences of pattern by the replacement string. Use of python method re.sub(). Example: python common-stringSubsitute.py -f file -p '(tRNA)(\w{3})(\w{3})' -r '\g<1>-\g<2>(\g<3>)'")
+    parser.add_argument('-i','--infile', help='Input file', required=True)
+    parser.add_argument('-o','--outfile', help='Output file', default='outfile')
+    parser.add_argument('-p','--pattern', help='Pattern string to be replaced', required=True)
+    parser.add_argument('-r','--repl', help='Replacement string', required=True)
+    args = parser.parse_args()
+
+    infilename=args.infile
+    outfilename=args.outfile
+    pattern=args.pattern
+    repl=args.repl
+
+    infile=open(infilename,'r')
+    outfile=open(outfilename,'w')
+
+    lines=infile.readlines()
+
+    for line in lines :
+        line_out=re.sub(pattern,repl,line)
+        outfile.write(line_out)
+
+    outfile.close()
\ No newline at end of file
diff --git a/compose-template.yml b/compose-template.yml
new file mode 100755
index 0000000000000000000000000000000000000000..ee3e04ebf89f21498de0f10ea2e46aa1ff5cc1bb
--- /dev/null
+++ b/compose-template.yml
@@ -0,0 +1,229 @@
+# ./docker_data is created and filled with persistent data that should be backuped
+
+version: '2'
+services:
+    proxy:
+        image: quay.io/abretaud/nginx-ldap:latest # Not using the default nginx image as we need the ldap module
+        ports:
+            - "9170:80"
+        links:
+            - galaxy
+            - jbrowse
+            - tripal
+            - apollo
+            - apollo-report
+            - blast
+            - wiki
+        volumes:
+            - ./src_data/:/project_data/
+            #- /groups/XXX/:/groups/XXX/:ro  # We do this when we have symlinks in src_data pointing to /groups/XXX/...
+            - ./nginx/conf:/etc/nginx/conf.d
+
+    tripal:
+        image: quay.io/galaxy-genome-annotation/tripal@sha256:4451cc3a601d109c07c7aedcc76bd41a5da7c438c8fa0862488680bd462f125b
+        links:
+            - tripaldb:postgres
+            - elasticsearch:elasticsearch
+        volumes_from:
+            - "galaxy"
+        volumes:
+            - ./src_data:/data:ro
+        environment:
+            BASE_URL_PATH: /sp/genus_species
+            UPLOAD_LIMIT: 20M
+            MEMORY_LIMIT: 512M
+            TRIPAL_GIT_CLONE_MODULES: "https://github.com/abretaud/tripal_rest_api.git[@c6f9021ea5d4c6d7c67c5bd363a7dd9359228bbc] https://github.com/tripal/tripal_elasticsearch.git[@dc7f276046e394a80a7dfc9404cf1a149006eb2a] https://github.com/tripal/tripal_analysis_interpro.git https://github.com/tripal/tripal_analysis_go.git https://github.com/tripal/tripal_analysis_blast.git  https://github.com/tripal/tripal_analysis_expression.git[@7240039fdeb4579afd06bbcb989cb7795bd4c342]"
+            TRIPAL_DOWNLOAD_MODULES: ""
+            TRIPAL_ENABLE_MODULES: "tripal_analysis_blast tripal_analysis_interpro tripal_analysis_go tripal_rest_api tripal_elasticsearch"
+            SITE_NAME: "Genus species"
+            ENABLE_JBROWSE: /jbrowse/?data=data/gspecies
+            ENABLE_APOLLO: 1
+            ENABLE_BLAST: 1
+            ENABLE_DOWNLOAD: 1
+            ENABLE_WIKI: 1
+            ENABLE_GO: /organism/Genus/species?pane=GO
+            ENABLE_ORTHOLOGY: 0
+            ENABLE_ORTHOLOGY_LINKS: http://localhost/sp/orthology/
+            #THEME: "bipaa"    # Use this to use another theme
+            #THEME_GIT_CLONE: "https://gitlab.inria.fr/abretaud/tripal_bipaa.git"    # Use this to install another theme
+            ADMIN_PASSWORD: XXXXXX  # You need to define it and update it in galaxy config below
+
+    tripaldb:
+        image: quay.io/galaxy-genome-annotation/chado:1.31-jenkins110.1-pg9.5
+        environment:
+            - POSTGRES_PASSWORD=postgres
+            # The default chado image would try to install the schema on first run,
+            # we just want the tools to be available.
+            - INSTALL_CHADO_SCHEMA=0
+        volumes:
+            - ./docker_data/tripal_db/:/var/lib/postgresql/data/
+
+    elasticsearch:
+        image: docker.elastic.co/elasticsearch/elasticsearch:6.6.1
+        #mem_limit: 500m   # This is to limit usage, but can make the docker crash when ram is exhausted, not recommended while indexing all data, ok once indexing is done
+        ulimits:
+            memlock:
+                soft: -1
+                hard: -1
+        volumes:
+            - ./docker_data/elastic_search_index/:/usr/share/elasticsearch/data
+        environment:
+            bootstrap.memory_lock: "true"
+            xpack.security.enabled: "false"
+            xpack.monitoring.enabled: "false"
+            xpack.ml.enabled: "false"
+            xpack.graph.enabled: "false"
+            xpack.watcher.enabled: "false"
+            cluster.routing.allocation.disk.threshold_enabled: "false"
+            ES_JAVA_OPTS: "-Xms200m -Xmx200m"
+            TAKE_FILE_OWNERSHIP: "true"
+
+    galaxy:
+        image: quay.io/galaxy-genome-annotation/docker-galaxy-annotation:gmod
+        volumes:
+            - ../galaxy_data_libs_SI.py:/opt/setup_data_libraries.py
+            - ./docker_data/galaxy:/export
+            - ./src_data/:/project_data:ro
+            #- /groups/XXX/:/groups/XXX/:ro  # We do this when we have symlinks in src_data pointing to /groups/XXX/...
+            - ./docker_data/jbrowse/:/jbrowse/data/
+            - ./docker_data/apollo/:/apollo-data/
+        links:
+            - "tripaldb:chado"
+        environment:
+            NONUSE: nodejs,proftp,reports
+            GALAXY_LOGGING: full
+            GALAXY_CONFIG_BRAND: "Genus species"
+            GALAXY_CONFIG_ALLOW_LIBRARY_PATH_PASTE: "True"
+            GALAXY_CONFIG_USE_REMOTE_USER: "True"
+            GALAXY_CONFIG_REMOTE_USER_MAILDOMAIN: "bipaa"
+            GALAXY_CONFIG_ADMIN_USERS: "admin@galaxy.org,gogepp@bipaa"   # admin@galaxy.org is the default (leave it), gogepp@bipaa is a shared ldap user we use to connect
+            ENABLE_FIX_PERMS: 0
+            PROXY_PREFIX: /sp/genus_species/galaxy
+            GALAXY_CONFIG_COOKIE_PATH: /galaxy
+            GALAXY_TRIPAL_PASSWORD: XXXXXX  # See tripal config above
+            GALAXY_WEBAPOLLO_URL: http://apollo:8080
+            GALAXY_WEBAPOLLO_USER: "admin_apollo@bipaa"
+            GALAXY_WEBAPOLLO_PASSWORD: "XXXXXX"  # See tripal config below
+            GALAXY_WEBAPOLLO_EXT_URL: /sp/genus_species/apollo
+            GALAXY_CHADO_DBHOST: chado
+            GALAXY_CHADO_DBSCHEMA: chado
+            GALAXY_AUTO_UPDATE_DB: 1
+            GALAXY_AUTO_UPDATE_CONDA: 1
+            GALAXY_AUTO_UPDATE_TOOLS: "/galaxy-central/tools_1.yaml"
+            BLAT_ENABLED: 1
+
+    jbrowse:
+        image: quay.io/galaxy-genome-annotation/jbrowse:v1.16.5
+        volumes:
+            - /data
+        volumes_from:
+            - "galaxy:ro"
+        ports:
+            - "80"
+
+    apollo:
+        image: quay.io/abretaud/apollo:bipaa
+        links:
+            - "apollo_db:db"
+        environment:
+            APOLLO_ADMIN_EMAIL: "admin_apollo@bipaa"   # internal admin user, used by some scripts/api
+            APOLLO_ADMIN_PASSWORD: "XXXXXX"  # define it and adapt galaxy config above
+            APOLLO_BASE_HOST: "http://localhost"
+            APOLLO_PATH_PREFIX: "/sp/genus_species/apollo/"
+            APOLLO_REMOTE_ADMINS: "gogepp@bipaa,abretaud@bipaa,srobin@bipaa,flegeai@bipaa"   # all ldap users that we use to connect as admin
+            WEBAPOLLO_DB_USERNAME: postgres
+            WEBAPOLLO_DB_PASSWORD: password
+            WEBAPOLLO_DB_DRIVER: "org.postgresql.Driver"
+            WEBAPOLLO_DB_DIALECT: "org.hibernate.dialect.PostgresPlusDialect"
+            WEBAPOLLO_DB_URI: "jdbc:postgresql://db/postgres"
+            WEBAPOLLO_FEATURE_HAS_DBXREFS: "true"
+            WEBAPOLLO_FEATURE_HAS_ATTRS: "true"
+            WEBAPOLLO_FEATURE_HAS_PUBMED: "true"
+            WEBAPOLLO_FEATURE_HAS_GO: "true"
+            WEBAPOLLO_FEATURE_HAS_COMMENTS: "true"
+            WEBAPOLLO_FEATURE_HAS_STATUS: "true"
+            CATALINA_OPTS: "-Xms512m -Xmx1g -XX:+CMSClassUnloadingEnabled -XX:+CMSPermGenSweepingEnabled -XX:+UseConcMarkSweepGC"
+        volumes_from:
+            - "galaxy:ro"
+        volumes:
+            - ./apollo/annotation_groups.tsv:/bootstrap/canned_values.txt:ro # Other canned things are preloaded in the docker image https://github.com/abretaud/docker-apollo/blob/bipaa/bootstrap.sh
+            #- ../blat/:/opt/blat/:ro # Mount the blat binary if you want to use it (could not include it in the docker image due to license issue)
+
+    apollo_db:
+        image: postgres:9.5
+        environment:
+            POSTGRES_PASSWORD: password
+        volumes:
+            - ./docker_data/apollo_db/:/var/lib/postgresql/data/
+
+    apollo-report:  # A report app following guidelines from https://bipaa.genouest.org/is/how-to-annotate-a-genome/
+        links:
+            - apollo:apollo
+        image: quay.io/abretaud/apollo-report:latest
+        environment:
+            APOLLO_EXT_URL: http://localhost/sp/genus_species/apollo/
+            APOLLO_USER: admin_apollo@bipaa
+            APOLLO_PASS: XXXXX # See apollo conf above
+            ADMIN_USERS: login1,login2 # ldap users that should see an extended report
+            APOLLO_MOUNTPOINT: /apollo-data/
+        volumes:
+            - ./docker_data/apollo/:/apollo-data/:ro
+            - ./apollo/annotation_groups.tsv:/data/annotation_groups.tsv:ro
+            - ./docker_data/apollo_report/:/data/report/
+
+    blast:
+        image: quay.io/abretaud/sf-blast:latest
+        links:
+            - blast_db:postgres
+        #hostname: gogepp-blast.genouest.org    # Hostname declare as submit node in sge conf (for drmaa mode only)
+        environment:
+            UPLOAD_LIMIT: 20M
+            MEMORY_LIMIT: 128M
+            DB_NAME: 'postgres'
+            ADMIN_EMAIL: 'xxx@example.org'  # email sender
+            ADMIN_NAME: 'xxxxx'  # email sender name
+            JOBS_METHOD: 'local'   # Can be local (= no sge jobs, but run inside the container) or drmaa (= to submit to a cluster)
+            JOBS_WORK_DIR: '/xxxx/blast_jobs/'  # disk accessible both from compute nodes and mounted in this docker (at the same path)
+            CDD_DELTA_PATH: '/db/cdd_delta/current/flat/cdd_delta'
+            BLAST_TITLE: 'Genus species blast server'
+            JOBS_SCHED_NAME: 'blast_gspecies'    # job names
+            PRE_CMD: '. /local/env/envblast-2.6.0.sh; . /local/env/envpython-2.7.sh;'    # executed at the beginning of each job
+            APACHE_RUN_USER: 'bipaaweb'   # username known by sge
+            APACHE_RUN_GROUP: 'bipaa'   # group known by sge
+            BASE_URL_PATH: '/sp/genus_species/blast/'
+            UID: 55914  # username known by sge (for drmaa mode only)
+            GID: 40259  # group known by sge (for drmaa mode only)
+        volumes:
+            #- ../blast-themes/xxx/:/var/www/blast/app/Resources/:ro # You can theme the app
+            - /data1/sge/:/usr/local/sge/:ro # the sge install
+            #- /xxxx/blast_jobs/:/xxxx/blast_jobs/ # (for drmaa mode only)
+            - ./blast/banks.yml:/var/www/blast/app/config/banks.yml:ro
+            - ./blast/links.yml:/etc/blast_links/links.yml:ro
+
+    blast_db:
+        image: postgres:9.5
+        environment:
+            - POSTGRES_PASSWORD=postgres
+            - PGDATA=/var/lib/postgresql/data/
+        volumes:
+            - ./docker_data/blast_db/:/var/lib/postgresql/data/
+
+    wiki:
+        image: quay.io/abretaud/mediawiki
+        environment:
+            MEDIAWIKI_SERVER: http://localhost
+            MEDIAWIKI_PROXY_PREFIX: /sp/genus_species/wiki
+            MEDIAWIKI_SITENAME: Genus species
+            MEDIAWIKI_SECRET_KEY: XXXXXXXXXX
+            MEDIAWIKI_DB_PASSWORD: password
+            MEDIAWIKI_ADMIN_USER: abretaud   # ldap user
+        links:
+            - wiki_db:db
+        volumes:
+            - ./docker_data/wiki_uploads:/images
+            #- ../bipaa_wiki.png:/var/www/mediawiki/resources/assets/wiki.png:ro # To cange the logo at the top left
+
+    wiki_db:
+        image: postgres:9.5
+        volumes:
+            - ./docker_data/wiki_db/:/var/lib/postgresql/data/
diff --git a/docker_compose_generator.py b/docker_compose_generator.py
index f748af1292e2d9459874a4e72695c1d82e47c852..c7070757a10db407c078e6a41e78b7a490622129 100644
--- a/docker_compose_generator.py
+++ b/docker_compose_generator.py
@@ -1,5 +1,6 @@
 import os
 import argparse
+import logging
 # import yaml
 # import ruamel.yaml
 # import json
@@ -14,6 +15,8 @@ or in another python file as a module)
 
 TODO: write the whole yml dict from scratch (would allow the script to be more reusable into the future and make it
 more customizable while being clearer (instead of the default yml string or input docker-compose template)
+
+TODO: read json
 """
 
 
@@ -29,7 +32,7 @@ class DockerComposeGenerator:
     def generate(self):
         if self.template is None:
             self.template = str(os.getcwd() + "/docker-compose.yml")
-            # default docker-compose if no input template was specified --> doesnt work, yaml doesnt support string replacement as is (https://stackoverflow.com/questions/5484016/how-can-i-do-string-concatenation-or-string-replacement-in-yaml)
+            # default docker-compose if no input template was specified --> doesnt work, yaml doesnt support direct string replacement, needs tags (maybe TODO) (https://stackoverflow.com/questions/5484016/how-can-i-do-string-concatenation-or-string-replacement-in-yaml)
             # self.template = "{'version': '3.7', 'services': {'proxy': {'image': 'quay.io/abretaud/nginx-ldap:latest', 'volumes': ['./src_data/:/project_data/', './nginx/conf:/etc/nginx/conf.d'], 'networks': ['traefik', 'genus_species'], 'deploy': {'labels': ['traefik.http.routers.genus_species-nginx.rule=(Host(`localhost`) && PathPrefix(`/sp/genus_species/download`))', 'traefik.http.routers.genus_species-nginx.tls=true', 'traefik.http.routers.genus_species-nginx.entryPoints=webs', 'traefik.http.routers.genus_species-nginx.middlewares=sp-auth,sp-app-trailslash,sp-prefix', 'traefik.http.services.genus_species-nginx.loadbalancer.server.port=80'], 'restart_policy': {'condition': 'on-failure', 'delay': '5s', 'max_attempts': 3, 'window': '120s'}}}, 'tripal': {'image': 'quay.io/galaxy-genome-annotation/tripal:v2.x', 'depends_on': ['tripal-db', 'elasticsearch'], 'volumes': ['./docker_data/galaxy/:/export/:ro', './src_data/:/project_data/:ro', './src_data:/data:ro'], 'environment': {'DB_HOST': 'tripal-db.genus_species', 'BASE_URL_PATH': '/sp/genus_species', 'UPLOAD_LIMIT': '20M', 'MEMORY_LIMIT': '512M', 'TRIPAL_GIT_CLONE_MODULES': 'https://github.com/abretaud/tripal_rest_api.git[@c6f9021ea5d4c6d7c67c5bd363a7dd9359228bbc] https://github.com/tripal/tripal_elasticsearch.git[@dc7f276046e394a80a7dfc9404cf1a149006eb2a] https://github.com/tripal/tripal_analysis_interpro.git https://github.com/tripal/tripal_analysis_go.git https://github.com/tripal/tripal_analysis_blast.git  https://github.com/tripal/tripal_analysis_expression.git[@7240039fdeb4579afd06bbcb989cb7795bd4c342]', 'TRIPAL_DOWNLOAD_MODULES': '', 'TRIPAL_ENABLE_MODULES': 'tripal_analysis_blast tripal_analysis_interpro tripal_analysis_go tripal_rest_api tripal_elasticsearch', 'SITE_NAME': 'Genus species', 'ELASTICSEARCH_HOST': 'elasticsearch.genus_species', 'ENABLE_JBROWSE': '/jbrowse/?data=data/gspecies', 'ENABLE_APOLLO': 'https://localhost/apollo/', 'ENABLE_BLAST': 1, 'ENABLE_DOWNLOAD': 1, 'ENABLE_WIKI': 1, 'ENABLE_GO': '/organism/Genus/species?pane=GO', 'ENABLE_ORTHOLOGY': 0, 'ENABLE_ORTHOLOGY_LINKS': 'http://localhost/sp/orthology/', 'ADMIN_PASSWORD': 'XXXXXX'}, 'networks': ['traefik', 'genus_species'], 'deploy': {'labels': ['traefik.http.routers.genus_species-tripal.rule=(Host(`localhost`) && PathPrefix(`/sp/genus_species`))', 'traefik.http.routers.genus_species-tripal.tls=true', 'traefik.http.routers.genus_species-tripal.entryPoints=webs', 'traefik.http.routers.genus_species-tripal.middlewares=sp-auth,sp-trailslash,sp-prefix,tripal-addprefix', 'traefik.http.services.genus_species-tripal.loadbalancer.server.port=80'], 'restart_policy': {'condition': 'on-failure', 'delay': '5s', 'max_attempts': 3, 'window': '120s'}}}, 'tripal-db': {'image': 'quay.io/galaxy-genome-annotation/chado:1.31-jenkins26-pg9.5', 'environment': ['POSTGRES_PASSWORD=postgres', 'INSTALL_CHADO_SCHEMA=0'], 'volumes': ['./docker_data/tripal_db/:/var/lib/postgresql/data/'], 'networks': ['genus_species']}, 'elasticsearch': {'image': 'docker.elastic.co/elasticsearch/elasticsearch:6.6.1', 'volumes': ['./docker_data/elastic_search_index/:/usr/share/elasticsearch/data/'], 'environment': {'bootstrap.memory_lock': 'true', 'xpack.security.enabled': 'false', 'xpack.monitoring.enabled': 'false', 'xpack.ml.enabled': 'false', 'xpack.graph.enabled': 'false', 'xpack.watcher.enabled': 'false', 'cluster.routing.allocation.disk.threshold_enabled': 'false', 'ES_JAVA_OPTS': '-Xms500m -Xmx500m', 'TAKE_FILE_OWNERSHIP': 'true'}, 'networks': ['genus_species']}, 'galaxy': {'image': 'quay.io/galaxy-genome-annotation/docker-galaxy-annotation:gmod', 'volumes': ['../galaxy_data_libs_SI.py:/opt/setup_data_libraries.py', './docker_data/galaxy/:/export/', './src_data/:/project_data/:ro', './docker_data/jbrowse/:/jbrowse/data/', './docker_data/apollo/:/apollo-data/', '../galaxy_nginx.conf:/etc/nginx/uwsgi_params'], 'environment': {'NONUSE': 'nodejs,proftp,reports,condor', 'GALAXY_LOGGING': 'full', 'GALAXY_CONFIG_BRAND': 'Genus species', 'GALAXY_CONFIG_ALLOW_LIBRARY_PATH_PASTE': 'True', 'GALAXY_CONFIG_USE_REMOTE_USER': 'True', 'GALAXY_CONFIG_REMOTE_USER_MAILDOMAIN': 'bipaa', 'GALAXY_CONFIG_ADMIN_USERS': 'admin@galaxy.org,gogepp@bipaa', 'ENABLE_FIX_PERMS': 0, 'PROXY_PREFIX': '/sp/genus_species/galaxy', 'GALAXY_TRIPAL_URL': 'http://tripal.genus_species/tripal/', 'GALAXY_TRIPAL_PASSWORD': 'XXXXXX', 'GALAXY_WEBAPOLLO_URL': 'http://one-of-the-swarm-node:8888/apollo/', 'GALAXY_WEBAPOLLO_USER': 'admin_apollo@bipaa', 'GALAXY_WEBAPOLLO_PASSWORD': 'XXXXXX', 'GALAXY_WEBAPOLLO_EXT_URL': '/apollo/', 'GALAXY_CHADO_DBHOST': 'tripal-db.genus_species', 'GALAXY_CHADO_DBSCHEMA': 'chado', 'GALAXY_AUTO_UPDATE_DB': 1, 'GALAXY_AUTO_UPDATE_CONDA': 1, 'GALAXY_AUTO_UPDATE_TOOLS': '/galaxy-central/tools_1.yaml', 'GALAXY_SHARED_DIR': '', 'BLAT_ENABLED': 1}, 'networks': ['traefik', 'genus_species'], 'deploy': {'labels': ['traefik.http.routers.genus_species-galaxy.rule=(Host(`localhost`) && PathPrefix(`/sp/genus_species/galaxy`))', 'traefik.http.routers.genus_species-galaxy.tls=true', 'traefik.http.routers.genus_species-galaxy.entryPoints=webs', 'traefik.http.routers.genus_species-galaxy.middlewares=sp-auth,sp-app-trailslash,sp-app-prefix', 'traefik.http.services.genus_species-galaxy.loadbalancer.server.port=80'], 'restart_policy': {'condition': 'on-failure', 'delay': '5s', 'max_attempts': 3, 'window': '120s'}}}, 'jbrowse': {'image': 'quay.io/galaxy-genome-annotation/jbrowse:v1.16.8', 'volumes': ['./docker_data/galaxy/:/export/:ro', './src_data/:/project_data/:ro', './docker_data/jbrowse/:/jbrowse/data/:ro'], 'networks': ['traefik', 'genus_species'], 'deploy': {'labels': ['traefik.http.routers.genus_species-jbrowse.rule=(Host(`localhost`) && PathPrefix(`/sp/genus_species/jbrowse`))', 'traefik.http.routers.genus_species-jbrowse.tls=true', 'traefik.http.routers.genus_species-jbrowse.entryPoints=webs', 'traefik.http.routers.genus_species-jbrowse.middlewares=sp-auth,sp-app-trailslash,sp-app-prefix', 'traefik.http.services.genus_species-jbrowse.loadbalancer.server.port=80'], 'restart_policy': {'condition': 'on-failure', 'delay': '5s', 'max_attempts': 3, 'window': '120s'}}}, 'blast': {'image': 'quay.io/abretaud/sf-blast:latest', 'depends_on': ['blast-db'], 'environment': {'DB_HOST': 'blast-db.genus_species', 'UPLOAD_LIMIT': '20M', 'MEMORY_LIMIT': '128M', 'DB_NAME': 'postgres', 'ADMIN_EMAIL': 'xxx@example.org', 'ADMIN_NAME': 'xxxxx', 'JOBS_METHOD': 'local', 'JOBS_WORK_DIR': '/xxxx/blast_jobs/', 'CDD_DELTA_PATH': '/db/cdd_delta/current/flat/cdd_delta', 'BLAST_TITLE': 'Genus species blast server', 'JOBS_SCHED_NAME': 'blast_gspecies', 'PRE_CMD': '. /local/env/envblast-2.6.0.sh; . /local/env/envpython-3.7.1.sh;', 'APACHE_RUN_USER': 'bipaaweb', 'APACHE_RUN_GROUP': 'bipaa', 'BASE_URL_PATH': '/sp/genus_species/blast/', 'UID': 55914, 'GID': 40259}, 'volumes': ['./blast/banks.yml:/var/www/blast/app/config/banks.yml:ro', './blast/links.yml:/etc/blast_links/links.yml:ro'], 'networks': ['traefik', 'genus_species'], 'deploy': {'labels': ['traefik.http.routers.genus_species-blast.rule=(Host(`localhost`) && PathPrefix(`/sp/genus_species/blast`))', 'traefik.http.routers.genus_species-blast.tls=true', 'traefik.http.routers.genus_species-blast.entryPoints=webs', 'traefik.http.routers.genus_species-blast.middlewares=sp-big-req,sp-auth,sp-app-trailslash,sp-app-prefix', 'traefik.http.services.genus_species-blast.loadbalancer.server.port=80'], 'restart_policy': {'condition': 'on-failure', 'delay': '5s', 'max_attempts': 3, 'window': '120s'}}}, 'blast-db': {'image': 'postgres:9.6-alpine', 'environment': ['POSTGRES_PASSWORD=postgres', 'PGDATA=/var/lib/postgresql/data/'], 'volumes': ['./docker_data/blast_db/:/var/lib/postgresql/data/'], 'networks': ['genus_species']}, 'wiki': {'image': 'quay.io/abretaud/mediawiki', 'environment': {'MEDIAWIKI_SERVER': 'http://localhost', 'MEDIAWIKI_PROXY_PREFIX': '/sp/genus_species/wiki', 'MEDIAWIKI_SITENAME': 'Genus species', 'MEDIAWIKI_SECRET_KEY': 'XXXXXXXXXX', 'MEDIAWIKI_DB_HOST': 'wiki-db.genus_species', 'MEDIAWIKI_DB_PASSWORD': 'password', 'MEDIAWIKI_ADMIN_USER': 'abretaud'}, 'depends_on': ['wiki-db'], 'volumes': ['./docker_data/wiki_uploads:/images'], 'networks': ['traefik', 'genus_species'], 'deploy': {'labels': ['traefik.http.routers.genus_species-blast.rule=(Host(`localhost`) && PathPrefix(`/sp/genus_species/blast`))', 'traefik.http.routers.genus_species-blast.tls=true', 'traefik.http.routers.genus_species-blast.entryPoints=webs', 'traefik.http.routers.genus_species-blast.middlewares=sp-big-req,sp-auth,sp-app-trailslash,sp-app-prefix', 'traefik.http.services.genus_species-blast.loadbalancer.server.port=80'], 'restart_policy': {'condition': 'on-failure', 'delay': '5s', 'max_attempts': 3, 'window': '120s'}}}, 'wiki-db': {'image': 'postgres:9.6-alpine', 'volumes': ['./docker_data/wiki_db/:/var/lib/postgresql/data/'], 'networks': ['genus_species']}}, 'networks': {'traefik': {'external': True}, 'genus_species': {'driver': 'overlay', 'name': 'genus_species'}}}"
             #
         else:
@@ -49,18 +52,16 @@ if __name__ == "__main__":
     parser = argparse.ArgumentParser(description="Generator of docker-compose.yml for GGA automated integration "
                                                  "following the templates available @ "
                                                  "https://gitlab.inria.fr/abretaud/genodock_demo/")
-
     parser.add_argument("-g", "--genus", type=str, help="input genus")
     parser.add_argument("-s", "--species", type=str, help="input species")
     parser.add_argument("-t", "--template", type=str, help="input template docker-compose.yml (compose or stack), optional")
     parser.add_argument("-o", "--outdir", type=str, help="where to write the output docker-compose")
-
     args = parser.parse_args()
 
-    generator = DockerComposeGenerator()
-    generator.genus = args.genus
-    generator.species = args.species
+    dc_generator = DockerComposeGenerator()
+    dc_generator.genus = args.genus
+    dc_generator.species = args.species
     if args.template:
-        generator.template = args.template
-    generator.outdir = args.outdir
-    generator.generate()
+        dc_generator.template = args.template
+    dc_generator.outdir = args.outdir
+    dc_generator.generate()
diff --git a/metadata_generator.py b/metadata_generator.py
index 20655f769190da1316250366edeba9878caad10c..2dd40ea62e2e802108e550112c32ae6b5860757f 100644
--- a/metadata_generator.py
+++ b/metadata_generator.py
@@ -1,11 +1,13 @@
 import os
+import logging
+import yaml
 
 """
 Metadata generator for gga_auto_load
 
-Creates a file that summarizes the organisms added by the script and what was done in the dedicated galaxy instance
+Creates a file that summarizes actions taken by the autoload script (e.g what was done in the dedicated galaxy instance)
 This organism metadata file is located in the metadata directory of the organism directory (i.e /genus_species/metadata)
-By default, will also update a general metadata file (located in the parent directory i.e where all the organisms
+By default, will also create/update a general metadata file (located in the parent directory i.e where all the organisms
 directories are located)
 
 Metadata format: .yml
@@ -14,10 +16,17 @@ Metadata format: .yml
 
 class MetadataGenerator:
 
-    def __init__(self):
-        self.maindir = None
-        self.meta = None
-        self.mainfile = None
+    def __init__(self, maindir):
+        self.maindir = maindir
+        self.genus = None
+        self.species = None
+        self.metadata = None
+        self.do_update = False
 
 
+    def read_metadata(self):
+    	for label, content in metadata.items():
+    		print("FOO")
+    		
+
 
diff --git a/phaeoexplorer-change_pep_fasta_header.sh b/phaeoexplorer-change_pep_fasta_header.sh
new file mode 100755
index 0000000000000000000000000000000000000000..0de7b9b7bada4edb88dff1d6422c34c1bfbbd4e8
--- /dev/null
+++ b/phaeoexplorer-change_pep_fasta_header.sh
@@ -0,0 +1,17 @@
+#!/usr/bin/env bash
+
+INFILE=$1
+OUTFILE=tmpfile
+
+FILE_HEADER_START=$(grep ">" $INFILE | cut -c 1-6 | sort | uniq)
+HEADER_START_STRING=">mRNA."
+
+if [[ "$FILE_HEADER_START" == "$HEADER_START_STRING" ]]
+then
+    /usr/local/genome2/mmo/scripts/common/common-stringSubstitute.py -i $INFILE -o $OUTFILE -p '^>mRNA' -r '>protein'
+    mv $OUTFILE $INFILE
+    echo "'>mRNA' replaced by '>protein'"
+else 
+    echo "Abort. Not all headers start with '>mRNA.':"
+    echo "$FILE_HEADER_START"
+fi
\ No newline at end of file
diff --git a/phaeoexplorer-change_transcript_fasta_header.sh b/phaeoexplorer-change_transcript_fasta_header.sh
new file mode 100755
index 0000000000000000000000000000000000000000..957190f7ae265f2b7b6226f217ae003bab8b021d
--- /dev/null
+++ b/phaeoexplorer-change_transcript_fasta_header.sh
@@ -0,0 +1,8 @@
+#!/usr/bin/env bash
+
+INFILE=$1
+OUTFILE=tmpfile
+
+./common-stringSubstitute.py -i $INFILE -o $OUTFILE -p '^>\d+ mRNA' -r '>mRNA'
+mv $OUTFILE $INFILE
+echo "'>[0-9]+ mRNA' replaced by '>mRNA' in $1"
diff --git a/stack_template.yml b/stack_template.yml
new file mode 100755
index 0000000000000000000000000000000000000000..68adc74b0148d4a44c653e1e0b1f12b538a6064b
--- /dev/null
+++ b/stack_template.yml
@@ -0,0 +1,286 @@
+# ./docker_data is created and filled with persistent data that should be backuped
+
+version: '3.7'
+services:
+    proxy:
+        image: quay.io/abretaud/nginx-ldap:latest
+        volumes:
+            - ./src_data/:/project_data/
+            #- /groups/XXX/:/groups/XXX/:ro  # We do this when we have symlinks in src_data pointing to /groups/XXX/...
+            - ./nginx/conf:/etc/nginx/conf.d
+        networks:
+            - traefik
+            - genus_species
+        deploy:
+          labels:
+            # Download page
+            - "traefik.http.routers.genus_species-nginx.rule=(Host(`localhost`) && PathPrefix(`/sp/genus_species/download`))"
+            - "traefik.http.routers.genus_species-nginx.tls=true"
+            - "traefik.http.routers.genus_species-nginx.entryPoints=webs"
+            - "traefik.http.routers.genus_species-nginx.middlewares=sp-auth,sp-app-trailslash,sp-prefix"
+            - "traefik.http.services.genus_species-nginx.loadbalancer.server.port=80"
+          restart_policy:
+            condition: on-failure
+            delay: 5s
+            max_attempts: 3
+            window: 120s
+
+    tripal:
+        image: quay.io/galaxy-genome-annotation/tripal:v2.x
+        depends_on:
+            - tripal-db
+            - elasticsearch
+        volumes:
+            - ./docker_data/galaxy/:/export/:ro
+            - ./src_data/:/project_data/:ro
+            - ./src_data:/data:ro
+            #- /groups/XXX/:/groups/XXX/:ro  # We do this when we have symlinks in src_data pointing to /groups/XXX/...
+        environment:
+            DB_HOST: tripal-db.genus_species
+            BASE_URL_PATH: /sp/genus_species
+            UPLOAD_LIMIT: 20M
+            MEMORY_LIMIT: 512M
+            TRIPAL_GIT_CLONE_MODULES: "https://github.com/abretaud/tripal_rest_api.git[@c6f9021ea5d4c6d7c67c5bd363a7dd9359228bbc] https://github.com/tripal/tripal_elasticsearch.git[@dc7f276046e394a80a7dfc9404cf1a149006eb2a] https://github.com/tripal/tripal_analysis_interpro.git https://github.com/tripal/tripal_analysis_go.git https://github.com/tripal/tripal_analysis_blast.git  https://github.com/tripal/tripal_analysis_expression.git[@7240039fdeb4579afd06bbcb989cb7795bd4c342]"
+            TRIPAL_DOWNLOAD_MODULES: ""
+            TRIPAL_ENABLE_MODULES: "tripal_analysis_blast tripal_analysis_interpro tripal_analysis_go tripal_rest_api tripal_elasticsearch"
+            SITE_NAME: "Genus species"
+            ELASTICSEARCH_HOST: elasticsearch.genus_species
+            ENABLE_JBROWSE: /jbrowse/?data=data/gspecies
+            ENABLE_APOLLO: https://localhost/apollo/
+            ENABLE_BLAST: 1
+            ENABLE_DOWNLOAD: 1
+            ENABLE_WIKI: 1
+            ENABLE_GO: /organism/Genus/species?pane=GO
+            ENABLE_ORTHOLOGY: 0
+            ENABLE_ORTHOLOGY_LINKS: http://localhost/sp/orthology/
+            #THEME: "bipaa"    # Use this to use another theme
+            #THEME_GIT_CLONE: "https://gitlab.inria.fr/abretaud/tripal_bipaa.git"    # Use this to install another theme
+            ADMIN_PASSWORD: XXXXXX  # You need to define it and update it in galaxy config below
+        networks:
+            - traefik
+            - genus_species
+        deploy:
+          labels:
+            - "traefik.http.routers.genus_species-tripal.rule=(Host(`localhost`) && PathPrefix(`/sp/genus_species`))"
+            - "traefik.http.routers.genus_species-tripal.tls=true"
+            - "traefik.http.routers.genus_species-tripal.entryPoints=webs"
+            - "traefik.http.routers.genus_species-tripal.middlewares=sp-auth,sp-trailslash,sp-prefix,tripal-addprefix"
+            - "traefik.http.services.genus_species-tripal.loadbalancer.server.port=80"
+          restart_policy:
+            condition: on-failure
+            delay: 5s
+            max_attempts: 3
+            window: 120s
+
+    tripal-db:
+        image: quay.io/galaxy-genome-annotation/chado:1.31-jenkins26-pg9.5
+        environment:
+            - POSTGRES_PASSWORD=postgres
+            # The default chado image would try to install the schema on first run,
+            # we just want the tools to be available.
+            - INSTALL_CHADO_SCHEMA=0
+        volumes:
+            - ./docker_data/tripal_db/:/var/lib/postgresql/data/
+        networks:
+            - genus_species
+
+    elasticsearch:
+        image: docker.elastic.co/elasticsearch/elasticsearch:6.6.1
+        #deploy:
+          #resources:
+            #limits:
+              #memory: 500M
+        volumes:
+            - ./docker_data/elastic_search_index/:/usr/share/elasticsearch/data/
+        environment:
+            bootstrap.memory_lock: "true"
+            xpack.security.enabled: "false"
+            xpack.monitoring.enabled: "false"
+            xpack.ml.enabled: "false"
+            xpack.graph.enabled: "false"
+            xpack.watcher.enabled: "false"
+            cluster.routing.allocation.disk.threshold_enabled: "false"
+            ES_JAVA_OPTS: "-Xms500m -Xmx500m"
+            TAKE_FILE_OWNERSHIP: "true"
+        networks:
+            - genus_species
+
+    galaxy:
+        image: quay.io/galaxy-genome-annotation/docker-galaxy-annotation:gmod
+        volumes:
+            - ../galaxy_data_libs_SI.py:/opt/setup_data_libraries.py
+            - ./docker_data/galaxy/:/export/
+            - ./src_data/:/project_data/:ro
+            #- /groups/XXX/:/groups/XXX/:ro  # We do this when we have symlinks in src_data pointing to /groups/XXX/...
+            - ./docker_data/jbrowse/:/jbrowse/data/
+            - ./docker_data/apollo/:/apollo-data/
+            - ../galaxy_nginx.conf:/etc/nginx/uwsgi_params
+        environment:
+            NONUSE: nodejs,proftp,reports,condor
+            GALAXY_LOGGING: full
+            GALAXY_CONFIG_BRAND: "Genus species"
+            GALAXY_CONFIG_ALLOW_LIBRARY_PATH_PASTE: "True"
+            GALAXY_CONFIG_USE_REMOTE_USER: "True"
+            GALAXY_CONFIG_REMOTE_USER_MAILDOMAIN: "bipaa"
+            GALAXY_CONFIG_ADMIN_USERS: "admin@galaxy.org,gogepp@bipaa"   # admin@galaxy.org is the default (leave it), gogepp@bipaa is a shared ldap user we use to connect
+            ENABLE_FIX_PERMS: 0
+            PROXY_PREFIX: /sp/genus_species/galaxy
+            GALAXY_TRIPAL_URL: http://tripal.genus_species/tripal/
+            GALAXY_TRIPAL_PASSWORD: XXXXXX  # See tripal config above
+            GALAXY_WEBAPOLLO_URL: http://one-of-the-swarm-node:8888/apollo/
+            GALAXY_WEBAPOLLO_USER: "admin_apollo@bipaa"
+            GALAXY_WEBAPOLLO_PASSWORD: "XXXXXX"  # See tripal config below
+            GALAXY_WEBAPOLLO_EXT_URL: /apollo/
+            GALAXY_CHADO_DBHOST: tripal-db.genus_species
+            GALAXY_CHADO_DBSCHEMA: chado
+            GALAXY_AUTO_UPDATE_DB: 1
+            GALAXY_AUTO_UPDATE_CONDA: 1
+            GALAXY_AUTO_UPDATE_TOOLS: "/galaxy-central/tools_1.yaml"
+            GALAXY_SHARED_DIR: ""
+            BLAT_ENABLED: 1
+            master_api_key: MASTERLOCK
+        networks:
+            - traefik
+            - genus_species
+        deploy:
+          labels:
+            - "traefik.http.routers.genus_species-galaxy.rule=(Host(`localhost`) && PathPrefix(`/sp/genus_species/galaxy`))"
+            - "traefik.http.routers.genus_species-galaxy.tls=true"
+            - "traefik.http.routers.genus_species-galaxy.entryPoints=webs"
+            - "traefik.http.routers.genus_species-galaxy.middlewares=sp-auth,sp-app-trailslash,sp-app-prefix"
+            - "traefik.http.services.genus_species-galaxy.loadbalancer.server.port=80"
+          restart_policy:
+            condition: on-failure
+            delay: 5s
+            max_attempts: 3
+            window: 120s
+
+    jbrowse:
+        image: quay.io/galaxy-genome-annotation/jbrowse:v1.16.8
+        volumes:
+            - ./docker_data/galaxy/:/export/:ro
+            - ./src_data/:/project_data/:ro
+            #- /groups/XXX/:/groups/XXX/:ro  # We do this when we have symlinks in src_data pointing to /groups/XXX/...
+            - ./docker_data/jbrowse/:/jbrowse/data/:ro
+        networks:
+            - traefik
+            - genus_species
+        deploy:
+          labels:
+            - "traefik.http.routers.genus_species-jbrowse.rule=(Host(`localhost`) && PathPrefix(`/sp/genus_species/jbrowse`))"
+            - "traefik.http.routers.genus_species-jbrowse.tls=true"
+            - "traefik.http.routers.genus_species-jbrowse.entryPoints=webs"
+            - "traefik.http.routers.genus_species-jbrowse.middlewares=sp-auth,sp-app-trailslash,sp-app-prefix"
+            - "traefik.http.services.genus_species-jbrowse.loadbalancer.server.port=80"
+          restart_policy:
+            condition: on-failure
+            delay: 5s
+            max_attempts: 3
+            window: 120s
+
+    blast:
+        image: quay.io/abretaud/sf-blast:latest
+        depends_on:
+            - blast-db
+        environment:
+            DB_HOST: blast-db.genus_species
+            UPLOAD_LIMIT: 20M
+            MEMORY_LIMIT: 128M
+            DB_NAME: 'postgres'
+            ADMIN_EMAIL: 'xxx@example.org'  # email sender
+            ADMIN_NAME: 'xxxxx'  # email sender name
+            JOBS_METHOD: 'local'   # Can be local (= no sge jobs, but run inside the container) or drmaa (= to submit to a cluster)
+            JOBS_WORK_DIR: '/xxxx/blast_jobs/'  # disk accessible both from compute nodes and mounted in this docker (at the same path)
+            CDD_DELTA_PATH: '/db/cdd_delta/current/flat/cdd_delta'
+            BLAST_TITLE: 'Genus species blast server'
+            JOBS_SCHED_NAME: 'blast_gspecies'    # job names
+            PRE_CMD: '. /local/env/envblast-2.6.0.sh; . /local/env/envpython-3.7.1.sh;'    # executed at the beginning of each job
+            APACHE_RUN_USER: 'bipaaweb'   # username known by sge
+            APACHE_RUN_GROUP: 'bipaa'   # group known by sge
+            BASE_URL_PATH: '/sp/genus_species/blast/'
+            UID: 55914  # username known by sge (for drmaa mode only)
+            GID: 40259  # group known by sge (for drmaa mode only)
+            #JOBS_DRMAA_NATIVE: '-p web' # This line and following for slurm
+            #DRMAA_METHOD: 'slurm' # This line and following for slurm
+        volumes:
+            #- ../blast-themes/xxx/:/var/www/blast/app/Resources/:ro # You can theme the app
+            #- /data1/sge/:/usr/local/sge/:ro # an sge install
+            #- /xxxx/blast_jobs/:/xxxx/blast_jobs/ # (for drmaa mode only)
+            - ./blast/banks.yml:/var/www/blast/app/config/banks.yml:ro
+            - ./blast/links.yml:/etc/blast_links/links.yml:ro
+            #- /data1/slurm/slurm.conf:/etc/slurm-llnl/slurm.conf:ro # This line and following for slurm
+            #- /data1/slurm/gres.conf:/etc/slurm-llnl/gres.conf:ro
+            #- /data1/slurm/cgroup.conf:/etc/slurm-llnl/cgroup.conf:ro
+            #- /data1/slurm/slurmdbd.conf:/etc/slurm-llnl/slurmdbd.conf:ro
+            #- /data1/slurm/drmaa/:/etc/slurm-llnl/drmaa/:ro
+            #- /etc/munge/:/etc/munge/:ro
+        networks:
+            - traefik
+            - genus_species
+        deploy:
+          labels:
+            - "traefik.http.routers.genus_species-blast.rule=(Host(`localhost`) && PathPrefix(`/sp/genus_species/blast`))"
+            - "traefik.http.routers.genus_species-blast.tls=true"
+            - "traefik.http.routers.genus_species-blast.entryPoints=webs"
+            - "traefik.http.routers.genus_species-blast.middlewares=sp-big-req,sp-auth,sp-app-trailslash,sp-app-prefix"
+            - "traefik.http.services.genus_species-blast.loadbalancer.server.port=80"
+          restart_policy:
+            condition: on-failure
+            delay: 5s
+            max_attempts: 3
+            window: 120s
+
+    blast-db:
+        image: postgres:9.6-alpine
+        environment:
+            - POSTGRES_PASSWORD=postgres
+            - PGDATA=/var/lib/postgresql/data/
+        volumes:
+            - ./docker_data/blast_db/:/var/lib/postgresql/data/
+        networks:
+            - genus_species
+
+    wiki:
+        image: quay.io/abretaud/mediawiki
+        environment:
+            MEDIAWIKI_SERVER: http://localhost
+            MEDIAWIKI_PROXY_PREFIX: /sp/genus_species/wiki
+            MEDIAWIKI_SITENAME: Genus species
+            MEDIAWIKI_SECRET_KEY: XXXXXXXXXX
+            MEDIAWIKI_DB_HOST: wiki-db.genus_species
+            MEDIAWIKI_DB_PASSWORD: password
+            MEDIAWIKI_ADMIN_USER: abretaud   # ldap user
+        depends_on:
+            - wiki-db
+        volumes:
+            - ./docker_data/wiki_uploads:/images
+            #- ../bipaa_wiki.png:/var/www/mediawiki/resources/assets/wiki.png:ro # To change the logo at the top left
+        networks:
+            - traefik
+            - genus_species
+        deploy:
+          labels:
+            - "traefik.http.routers.genus_species-blast.rule=(Host(`localhost`) && PathPrefix(`/sp/genus_species/blast`))"
+            - "traefik.http.routers.genus_species-blast.tls=true"
+            - "traefik.http.routers.genus_species-blast.entryPoints=webs"
+            - "traefik.http.routers.genus_species-blast.middlewares=sp-big-req,sp-auth,sp-app-trailslash,sp-app-prefix"
+            - "traefik.http.services.genus_species-blast.loadbalancer.server.port=80"
+          restart_policy:
+            condition: on-failure
+            delay: 5s
+            max_attempts: 3
+            window: 120s
+
+    wiki-db:
+        image: postgres:9.6-alpine
+        volumes:
+            - ./docker_data/wiki_db/:/var/lib/postgresql/data/
+        networks:
+            - genus_species
+
+networks:
+    traefik:
+        external: true
+    genus_species:
+        driver: overlay
+        name: genus_species
diff --git a/table_parser.py b/table_parser.py
new file mode 100755
index 0000000000000000000000000000000000000000..0058184a51098beda5e9b320e3b6b416f169a116
--- /dev/null
+++ b/table_parser.py
@@ -0,0 +1,96 @@
+import os
+import pandas  # xlrd required for excel files reading
+import numpy
+import json
+import argparse
+import logging
+from datetime import datetime
+
+"""
+Table parser for phaeoexplorer data. Tested with xlsand xlsx input format, should work with csv format as well
+Does not work for ods spreadsheets (save as xls or xlsx instead) --> need to handle with pandas_ods_reader (requires ezodf, lxml)
+Does not support multiple sheets (TODO: "integration" and "update" sheets (1 and 2))
+See example toy table (toy_table.xls)
+
+standalone usage: python3 table_parser.py <tabulated_file> -d <directory_to_write_json_to (default: cwd)>
+"""
+
+class TableParser:
+
+	def __init__(self, table_file, dir):
+		self.dir = os.path.abspath(args.dir)
+		self.table_file = table_file
+		self.method = None  # TODO: instant launch or just parse (standalone)
+		self.extension = None
+		self.meta = dict()
+		self.json_file = None	
+
+	def parse_table(self, extension):
+		if extension == "xls":
+			pandas_table = pandas.DataFrame(pandas.read_excel(self.table_file))
+			pandas_table = pandas_table.replace(numpy.nan, "", regex=True)
+			for char in " ,.()-/":
+				pandas_table = pandas_table.replace("\\" + char, "_", regex=True)
+			pandas_table = pandas_table.replace("\\__", "_", regex=True)
+			pandas_table.loc[pandas_table["genome version"] == "", "genome version"] = "1.0"
+			pandas_table.loc[pandas_table["ogs version"] == "", "ogs version"] = "1.0"
+			pandas_table.loc[pandas_table["version"] == "", "version"] = "1.0"
+			pandas_table.loc[pandas_table["date"] == "", "date"] = datetime.today().strftime("%Y-%m-%d")
+			with open(os.path.join(self.dir, self.json_file), 'w') as json_file:
+				json_file.truncate(0)
+				json_content = list()
+				for organism in range(0, len(pandas_table.index)):
+					organism_data = pandas_table.iloc[organism]
+					organism_dict = organism_data.to_dict()
+					for k, v in organism_dict.items():
+						v = str(v).split(" ")
+						v = "_".join(v)
+						v = v.replace("__", "_")
+						if v.endswith("_"):
+							v = v[:-1]
+					json_content.append(organism_dict)
+				json.dump(json_content, json_file, indent=4)
+
+		elif extension == "csv":
+			pandas_table = pandas.DataFrame(pandas.read_csv(self.table_file))
+			pandas_table = pandas_table.replace(numpy.nan, "", regex=True)
+			for char in " ,.()-/":
+				pandas_table = pandas_table.replace("\\" + char, "_", regex=True)
+			pandas_table = pandas_table.replace("\\__", "_", regex=True)
+			pandas_table.loc[pandas_table["genome version"] == "", "genome version"] = "1.0"
+			pandas_table.loc[pandas_table["ogs version"] == "", "ogs version"] = "1.0"
+			pandas_table.loc[pandas_table["version"] == "", "version"] = "1.0"
+			pandas_table.loc[pandas_table["date"] == "", "date"] = datetime.today().strftime("%Y-%m-%d")
+			with open(os.path.join(self.dir, self.json_file), 'w') as json_file:
+				json_file.truncate(0)
+				json_content = list()
+				for organism in range(0, len(pandas_table.index)):
+					organism_data = pandas_table.iloc[organism]
+					organism_dict = organism_data.to_dict()
+					for k, v in organism_dict.items():
+						v = str(v).split(" ")
+						v = "_".join(v)
+						v = v.replace("__", "_")
+						if v.endswith("_"):
+							v = v[:-1]
+					json_content.append(organism_dict)
+				json.dump(json_content, json_file, indent=4)
+
+		else:
+			logging.info("input tabulated file doesn't have the correct extension (supported extensions: xls, xlsx, csv)")
+
+	def write_json(data, filename):
+		with open(filename, 'w') as f:
+			json.dump(data, f, indent=4)
+
+if __name__ == "__main__":
+	parser = argparse.ArgumentParser(description="Table parser for phaeoexplorer data")
+	parser.add_argument("input", type=str, help="input table")
+	parser.add_argument("-d", "--dir", type=str, help="Where to write the output json file that is be used for integration", default = os.getcwd())
+	args = parser.parse_args()
+
+	if args.input.endswith("xlsx") or args.input.endswith("xls"):
+		tp = TableParser(table_file=args.input, dir=args.dir)
+		tp.extension = args.input.split(".")[1]
+		tp.json_file = tp.dir + "/dataloader_" + datetime.today().strftime("%Y%m%d") + ".json"
+		tp.parse_table(extension="xls")
\ No newline at end of file
diff --git a/toy.json b/toy.json
new file mode 100755
index 0000000000000000000000000000000000000000..60e8f59f83d3eca57d43bac99107abb72bc5357d
--- /dev/null
+++ b/toy.json
@@ -0,0 +1,27 @@
+[
+    {
+        "genus" : "genus1",
+        "species" : "Species1",
+        "common" : "Common1",
+        "strain" : "strain1",
+        "sex" : "male",
+        "version" : "1.0",
+        "performed by" : "Institute John Doe",
+        "genome version" : "1.0",
+        "ogs version" : "1.0",
+        "date" : "2020-01-01"
+    }
+    {
+        "genus" : "genus2",
+        "species" : "Species2",
+        "common" : "Common2",
+        "strain" : "strain2",
+        "sex" : "female",
+        "version" : "1.0",
+        "performed by" : "Institute Jane Doe",
+        "genome version" : "1.0",
+        "ogs version" : "1.0",
+        "date" : "2020-01-01"
+    }
+]
+