diff --git a/README.md b/README.md index 17a381d2499f8e9baf24f1ee3840526ecca4a55b..88a48e410076e2568393ddfa9707c7ef3e3e2d44 100644 --- a/README.md +++ b/README.md @@ -1,5 +1,9 @@ # gga_load_data -Bioblend-based script to load data into GGA +Automated integration of new organisms into GGA instances -TODO: Metadata in /projet/sbr/phaeoexplorer/ \ No newline at end of file + +Requirements: +- bioblend (v0.13) +- PyYaml +- pandas diff --git a/autoload.py b/autoload.py index 089a4d592abd688cf9429efee39bca511b561ca7..ff1e7c26372819d427e0413106204bc7a4c2f864 100644 --- a/autoload.py +++ b/autoload.py @@ -11,7 +11,7 @@ import numpy import pandas import logging import re -from . import table_parser +from . import table_parser, docker_compose_generator class Autoload: @@ -23,7 +23,7 @@ class Autoload: To run the workflows, place them in the same directory as this script, and add the method + the workflow parameters in the main invocation (at the end of the file) - TODO: store workflow as string in + TODO: store main workflow as string """ def __init__(self, species_parameters_dictionary: dict, args): @@ -55,6 +55,7 @@ class Autoload: self.datasets = dict() self.source_files = dict() self.workflow_name = None + self.docker_compose_generator = None # Test the connection to the galaxy instance for the current species # Additionally set some class attributes @@ -418,13 +419,12 @@ class Autoload: return None -if __name__ == "main": +if __name__ == "__main__": parser = argparse.ArgumentParser(description="Automatic loading and interaction with galaxy instance (GGA)" ", following the protocol @ " "http://gitlab.sb-roscoff.fr/abims/e-infra/gga") - parser.add_argument("-j", "--json", type=str, help="Input JSON file, alternative" - " to input csv file", action="store_true") - parser.add_argument("table", type=str, help="Input table (tabulated file that describes all data)", + parser.add_argument("input", type=str, help="Input table (tabulated file that describes all data)" + "or json file", action="store_true") parser.add_argument("-v", "--verbose", help="Increase output verbosity", @@ -440,6 +440,9 @@ if __name__ == "main": "index tripal data, populate materialized view, " "create a jbrowse for the current genus_species_strain_sex and add organism to jbrowse", action="store_true") + parser.add_argument("--generate-docker-compose", + help="Generate docker-compose.yml for current species", + action="store_true") args = parser.parse_args() if args.verbose: @@ -447,8 +450,15 @@ if __name__ == "main": else: logging.basicConfig(level=logging.INFO) + if str(args.input).endswith(".json"): + input_json = args.input + else: + tp = table_parser.TableParser() + logging.info("parsing input table") + tp.table = args.input + input_json = tp.parse_table(mode="simple", method="table_to_json") sp_dict_list = list() - with open(args.json, 'r') as infile: + with open(input_json, 'r') as infile: json_sp_dict = json.load(infile) json_sp_dump = json.dumps(json_sp_dict, indent=4, sort_keys=True) for json_sp in json_sp_dict: diff --git a/docker-compose.yml b/docker-compose.yml new file mode 100644 index 0000000000000000000000000000000000000000..a67bd0d6cb9d9d593660e5131e3489e430381291 --- /dev/null +++ b/docker-compose.yml @@ -0,0 +1,285 @@ +# ./docker_data is created and filled with persistent data that should be backuped + +version: '3.7' +services: + proxy: + image: quay.io/abretaud/nginx-ldap:latest + volumes: + - ./src_data/:/project_data/ + #- /groups/XXX/:/groups/XXX/:ro # We do this when we have symlinks in src_data pointing to /groups/XXX/... + - ./nginx/conf:/etc/nginx/conf.d + networks: + - traefik + - genus_species + deploy: + labels: + # Download page + - "traefik.http.routers.genus_species-nginx.rule=(Host(`localhost`) && PathPrefix(`/sp/genus_species/download`))" + - "traefik.http.routers.genus_species-nginx.tls=true" + - "traefik.http.routers.genus_species-nginx.entryPoints=webs" + - "traefik.http.routers.genus_species-nginx.middlewares=sp-auth,sp-app-trailslash,sp-prefix" + - "traefik.http.services.genus_species-nginx.loadbalancer.server.port=80" + restart_policy: + condition: on-failure + delay: 5s + max_attempts: 3 + window: 120s + + tripal: + image: quay.io/galaxy-genome-annotation/tripal:v2.x + depends_on: + - tripal-db + - elasticsearch + volumes: + - ./docker_data/galaxy/:/export/:ro + - ./src_data/:/project_data/:ro + - ./src_data:/data:ro + #- /groups/XXX/:/groups/XXX/:ro # We do this when we have symlinks in src_data pointing to /groups/XXX/... + environment: + DB_HOST: tripal-db.genus_species + BASE_URL_PATH: /sp/genus_species + UPLOAD_LIMIT: 20M + MEMORY_LIMIT: 512M + TRIPAL_GIT_CLONE_MODULES: "https://github.com/abretaud/tripal_rest_api.git[@c6f9021ea5d4c6d7c67c5bd363a7dd9359228bbc] https://github.com/tripal/tripal_elasticsearch.git[@dc7f276046e394a80a7dfc9404cf1a149006eb2a] https://github.com/tripal/tripal_analysis_interpro.git https://github.com/tripal/tripal_analysis_go.git https://github.com/tripal/tripal_analysis_blast.git https://github.com/tripal/tripal_analysis_expression.git[@7240039fdeb4579afd06bbcb989cb7795bd4c342]" + TRIPAL_DOWNLOAD_MODULES: "" + TRIPAL_ENABLE_MODULES: "tripal_analysis_blast tripal_analysis_interpro tripal_analysis_go tripal_rest_api tripal_elasticsearch" + SITE_NAME: "Genus species" + ELASTICSEARCH_HOST: elasticsearch.genus_species + ENABLE_JBROWSE: /jbrowse/?data=data/gspecies + ENABLE_APOLLO: https://localhost/apollo/ + ENABLE_BLAST: 1 + ENABLE_DOWNLOAD: 1 + ENABLE_WIKI: 1 + ENABLE_GO: /organism/Genus/species?pane=GO + ENABLE_ORTHOLOGY: 0 + ENABLE_ORTHOLOGY_LINKS: http://localhost/sp/orthology/ + #THEME: "bipaa" # Use this to use another theme + #THEME_GIT_CLONE: "https://gitlab.inria.fr/abretaud/tripal_bipaa.git" # Use this to install another theme + ADMIN_PASSWORD: XXXXXX # You need to define it and update it in galaxy config below + networks: + - traefik + - genus_species + deploy: + labels: + - "traefik.http.routers.genus_species-tripal.rule=(Host(`localhost`) && PathPrefix(`/sp/genus_species`))" + - "traefik.http.routers.genus_species-tripal.tls=true" + - "traefik.http.routers.genus_species-tripal.entryPoints=webs" + - "traefik.http.routers.genus_species-tripal.middlewares=sp-auth,sp-trailslash,sp-prefix,tripal-addprefix" + - "traefik.http.services.genus_species-tripal.loadbalancer.server.port=80" + restart_policy: + condition: on-failure + delay: 5s + max_attempts: 3 + window: 120s + + tripal-db: + image: quay.io/galaxy-genome-annotation/chado:1.31-jenkins26-pg9.5 + environment: + - POSTGRES_PASSWORD=postgres + # The default chado image would try to install the schema on first run, + # we just want the tools to be available. + - INSTALL_CHADO_SCHEMA=0 + volumes: + - ./docker_data/tripal_db/:/var/lib/postgresql/data/ + networks: + - genus_species + + elasticsearch: + image: docker.elastic.co/elasticsearch/elasticsearch:6.6.1 + #deploy: + #resources: + #limits: + #memory: 500M + volumes: + - ./docker_data/elastic_search_index/:/usr/share/elasticsearch/data/ + environment: + bootstrap.memory_lock: "true" + xpack.security.enabled: "false" + xpack.monitoring.enabled: "false" + xpack.ml.enabled: "false" + xpack.graph.enabled: "false" + xpack.watcher.enabled: "false" + cluster.routing.allocation.disk.threshold_enabled: "false" + ES_JAVA_OPTS: "-Xms500m -Xmx500m" + TAKE_FILE_OWNERSHIP: "true" + networks: + - genus_species + + galaxy: + image: quay.io/galaxy-genome-annotation/docker-galaxy-annotation:gmod + volumes: + - ../galaxy_data_libs_SI.py:/opt/setup_data_libraries.py + - ./docker_data/galaxy/:/export/ + - ./src_data/:/project_data/:ro + #- /groups/XXX/:/groups/XXX/:ro # We do this when we have symlinks in src_data pointing to /groups/XXX/... + - ./docker_data/jbrowse/:/jbrowse/data/ + - ./docker_data/apollo/:/apollo-data/ + - ../galaxy_nginx.conf:/etc/nginx/uwsgi_params + environment: + NONUSE: nodejs,proftp,reports,condor + GALAXY_LOGGING: full + GALAXY_CONFIG_BRAND: "Genus species" + GALAXY_CONFIG_ALLOW_LIBRARY_PATH_PASTE: "True" + GALAXY_CONFIG_USE_REMOTE_USER: "True" + GALAXY_CONFIG_REMOTE_USER_MAILDOMAIN: "bipaa" + GALAXY_CONFIG_ADMIN_USERS: "admin@galaxy.org,gogepp@bipaa" # admin@galaxy.org is the default (leave it), gogepp@bipaa is a shared ldap user we use to connect + ENABLE_FIX_PERMS: 0 + PROXY_PREFIX: /sp/genus_species/galaxy + GALAXY_TRIPAL_URL: http://tripal.genus_species/tripal/ + GALAXY_TRIPAL_PASSWORD: XXXXXX # See tripal config above + GALAXY_WEBAPOLLO_URL: http://one-of-the-swarm-node:8888/apollo/ + GALAXY_WEBAPOLLO_USER: "admin_apollo@bipaa" + GALAXY_WEBAPOLLO_PASSWORD: "XXXXXX" # See tripal config below + GALAXY_WEBAPOLLO_EXT_URL: /apollo/ + GALAXY_CHADO_DBHOST: tripal-db.genus_species + GALAXY_CHADO_DBSCHEMA: chado + GALAXY_AUTO_UPDATE_DB: 1 + GALAXY_AUTO_UPDATE_CONDA: 1 + GALAXY_AUTO_UPDATE_TOOLS: "/galaxy-central/tools_1.yaml" + GALAXY_SHARED_DIR: "" + BLAT_ENABLED: 1 + networks: + - traefik + - genus_species + deploy: + labels: + - "traefik.http.routers.genus_species-galaxy.rule=(Host(`localhost`) && PathPrefix(`/sp/genus_species/galaxy`))" + - "traefik.http.routers.genus_species-galaxy.tls=true" + - "traefik.http.routers.genus_species-galaxy.entryPoints=webs" + - "traefik.http.routers.genus_species-galaxy.middlewares=sp-auth,sp-app-trailslash,sp-app-prefix" + - "traefik.http.services.genus_species-galaxy.loadbalancer.server.port=80" + restart_policy: + condition: on-failure + delay: 5s + max_attempts: 3 + window: 120s + + jbrowse: + image: quay.io/galaxy-genome-annotation/jbrowse:v1.16.8 + volumes: + - ./docker_data/galaxy/:/export/:ro + - ./src_data/:/project_data/:ro + #- /groups/XXX/:/groups/XXX/:ro # We do this when we have symlinks in src_data pointing to /groups/XXX/... + - ./docker_data/jbrowse/:/jbrowse/data/:ro + networks: + - traefik + - genus_species + deploy: + labels: + - "traefik.http.routers.genus_species-jbrowse.rule=(Host(`localhost`) && PathPrefix(`/sp/genus_species/jbrowse`))" + - "traefik.http.routers.genus_species-jbrowse.tls=true" + - "traefik.http.routers.genus_species-jbrowse.entryPoints=webs" + - "traefik.http.routers.genus_species-jbrowse.middlewares=sp-auth,sp-app-trailslash,sp-app-prefix" + - "traefik.http.services.genus_species-jbrowse.loadbalancer.server.port=80" + restart_policy: + condition: on-failure + delay: 5s + max_attempts: 3 + window: 120s + + blast: + image: quay.io/abretaud/sf-blast:latest + depends_on: + - blast-db + environment: + DB_HOST: blast-db.genus_species + UPLOAD_LIMIT: 20M + MEMORY_LIMIT: 128M + DB_NAME: 'postgres' + ADMIN_EMAIL: 'xxx@example.org' # email sender + ADMIN_NAME: 'xxxxx' # email sender name + JOBS_METHOD: 'local' # Can be local (= no sge jobs, but run inside the container) or drmaa (= to submit to a cluster) + JOBS_WORK_DIR: '/xxxx/blast_jobs/' # disk accessible both from compute nodes and mounted in this docker (at the same path) + CDD_DELTA_PATH: '/db/cdd_delta/current/flat/cdd_delta' + BLAST_TITLE: 'Genus species blast server' + JOBS_SCHED_NAME: 'blast_gspecies' # job names + PRE_CMD: '. /local/env/envblast-2.6.0.sh; . /local/env/envpython-3.7.1.sh;' # executed at the beginning of each job + APACHE_RUN_USER: 'bipaaweb' # username known by sge + APACHE_RUN_GROUP: 'bipaa' # group known by sge + BASE_URL_PATH: '/sp/genus_species/blast/' + UID: 55914 # username known by sge (for drmaa mode only) + GID: 40259 # group known by sge (for drmaa mode only) + #JOBS_DRMAA_NATIVE: '-p web' # This line and following for slurm + #DRMAA_METHOD: 'slurm' # This line and following for slurm + volumes: + #- ../blast-themes/xxx/:/var/www/blast/app/Resources/:ro # You can theme the app + #- /data1/sge/:/usr/local/sge/:ro # an sge install + #- /xxxx/blast_jobs/:/xxxx/blast_jobs/ # (for drmaa mode only) + - ./blast/banks.yml:/var/www/blast/app/config/banks.yml:ro + - ./blast/links.yml:/etc/blast_links/links.yml:ro + #- /data1/slurm/slurm.conf:/etc/slurm-llnl/slurm.conf:ro # This line and following for slurm + #- /data1/slurm/gres.conf:/etc/slurm-llnl/gres.conf:ro + #- /data1/slurm/cgroup.conf:/etc/slurm-llnl/cgroup.conf:ro + #- /data1/slurm/slurmdbd.conf:/etc/slurm-llnl/slurmdbd.conf:ro + #- /data1/slurm/drmaa/:/etc/slurm-llnl/drmaa/:ro + #- /etc/munge/:/etc/munge/:ro + networks: + - traefik + - genus_species + deploy: + labels: + - "traefik.http.routers.genus_species-blast.rule=(Host(`localhost`) && PathPrefix(`/sp/genus_species/blast`))" + - "traefik.http.routers.genus_species-blast.tls=true" + - "traefik.http.routers.genus_species-blast.entryPoints=webs" + - "traefik.http.routers.genus_species-blast.middlewares=sp-big-req,sp-auth,sp-app-trailslash,sp-app-prefix" + - "traefik.http.services.genus_species-blast.loadbalancer.server.port=80" + restart_policy: + condition: on-failure + delay: 5s + max_attempts: 3 + window: 120s + + blast-db: + image: postgres:9.6-alpine + environment: + - POSTGRES_PASSWORD=postgres + - PGDATA=/var/lib/postgresql/data/ + volumes: + - ./docker_data/blast_db/:/var/lib/postgresql/data/ + networks: + - genus_species + + wiki: + image: quay.io/abretaud/mediawiki + environment: + MEDIAWIKI_SERVER: http://localhost + MEDIAWIKI_PROXY_PREFIX: /sp/genus_species/wiki + MEDIAWIKI_SITENAME: Genus species + MEDIAWIKI_SECRET_KEY: XXXXXXXXXX + MEDIAWIKI_DB_HOST: wiki-db.genus_species + MEDIAWIKI_DB_PASSWORD: password + MEDIAWIKI_ADMIN_USER: abretaud # ldap user + depends_on: + - wiki-db + volumes: + - ./docker_data/wiki_uploads:/images + #- ../bipaa_wiki.png:/var/www/mediawiki/resources/assets/wiki.png:ro # To change the logo at the top left + networks: + - traefik + - genus_species + deploy: + labels: + - "traefik.http.routers.genus_species-blast.rule=(Host(`localhost`) && PathPrefix(`/sp/genus_species/blast`))" + - "traefik.http.routers.genus_species-blast.tls=true" + - "traefik.http.routers.genus_species-blast.entryPoints=webs" + - "traefik.http.routers.genus_species-blast.middlewares=sp-big-req,sp-auth,sp-app-trailslash,sp-app-prefix" + - "traefik.http.services.genus_species-blast.loadbalancer.server.port=80" + restart_policy: + condition: on-failure + delay: 5s + max_attempts: 3 + window: 120s + + wiki-db: + image: postgres:9.6-alpine + volumes: + - ./docker_data/wiki_db/:/var/lib/postgresql/data/ + networks: + - genus_species + +networks: + traefik: + external: true + genus_species: + driver: overlay + name: genus_species diff --git a/docker_compose_generator.py b/docker_compose_generator.py new file mode 100644 index 0000000000000000000000000000000000000000..f748af1292e2d9459874a4e72695c1d82e47c852 --- /dev/null +++ b/docker_compose_generator.py @@ -0,0 +1,66 @@ +import os +import argparse +# import yaml +# import ruamel.yaml +# import json + +""" +docker-compose.yml generator +The method "generate" works for both docker-compose architecture (old), or docker stacks (new) +This method will write a formatted docker-compose.yml for the specified organism (only requires genus and species) + +Made to work in the integration streamlined script "autoload.py" but can be used as a standalone (either with a CLI +or in another python file as a module) + +TODO: write the whole yml dict from scratch (would allow the script to be more reusable into the future and make it +more customizable while being clearer (instead of the default yml string or input docker-compose template) +""" + + +class DockerComposeGenerator: + + def __init__(self): + self.mode = None + self.genus = None + self.species = None + self.template = None + self.outdir = None + + def generate(self): + if self.template is None: + self.template = str(os.getcwd() + "/docker-compose.yml") + # default docker-compose if no input template was specified --> doesnt work, yaml doesnt support string replacement as is (https://stackoverflow.com/questions/5484016/how-can-i-do-string-concatenation-or-string-replacement-in-yaml) + # self.template = "{'version': '3.7', 'services': {'proxy': {'image': 'quay.io/abretaud/nginx-ldap:latest', 'volumes': ['./src_data/:/project_data/', './nginx/conf:/etc/nginx/conf.d'], 'networks': ['traefik', 'genus_species'], 'deploy': {'labels': ['traefik.http.routers.genus_species-nginx.rule=(Host(`localhost`) && PathPrefix(`/sp/genus_species/download`))', 'traefik.http.routers.genus_species-nginx.tls=true', 'traefik.http.routers.genus_species-nginx.entryPoints=webs', 'traefik.http.routers.genus_species-nginx.middlewares=sp-auth,sp-app-trailslash,sp-prefix', 'traefik.http.services.genus_species-nginx.loadbalancer.server.port=80'], 'restart_policy': {'condition': 'on-failure', 'delay': '5s', 'max_attempts': 3, 'window': '120s'}}}, 'tripal': {'image': 'quay.io/galaxy-genome-annotation/tripal:v2.x', 'depends_on': ['tripal-db', 'elasticsearch'], 'volumes': ['./docker_data/galaxy/:/export/:ro', './src_data/:/project_data/:ro', './src_data:/data:ro'], 'environment': {'DB_HOST': 'tripal-db.genus_species', 'BASE_URL_PATH': '/sp/genus_species', 'UPLOAD_LIMIT': '20M', 'MEMORY_LIMIT': '512M', 'TRIPAL_GIT_CLONE_MODULES': 'https://github.com/abretaud/tripal_rest_api.git[@c6f9021ea5d4c6d7c67c5bd363a7dd9359228bbc] https://github.com/tripal/tripal_elasticsearch.git[@dc7f276046e394a80a7dfc9404cf1a149006eb2a] https://github.com/tripal/tripal_analysis_interpro.git https://github.com/tripal/tripal_analysis_go.git https://github.com/tripal/tripal_analysis_blast.git https://github.com/tripal/tripal_analysis_expression.git[@7240039fdeb4579afd06bbcb989cb7795bd4c342]', 'TRIPAL_DOWNLOAD_MODULES': '', 'TRIPAL_ENABLE_MODULES': 'tripal_analysis_blast tripal_analysis_interpro tripal_analysis_go tripal_rest_api tripal_elasticsearch', 'SITE_NAME': 'Genus species', 'ELASTICSEARCH_HOST': 'elasticsearch.genus_species', 'ENABLE_JBROWSE': '/jbrowse/?data=data/gspecies', 'ENABLE_APOLLO': 'https://localhost/apollo/', 'ENABLE_BLAST': 1, 'ENABLE_DOWNLOAD': 1, 'ENABLE_WIKI': 1, 'ENABLE_GO': '/organism/Genus/species?pane=GO', 'ENABLE_ORTHOLOGY': 0, 'ENABLE_ORTHOLOGY_LINKS': 'http://localhost/sp/orthology/', 'ADMIN_PASSWORD': 'XXXXXX'}, 'networks': ['traefik', 'genus_species'], 'deploy': {'labels': ['traefik.http.routers.genus_species-tripal.rule=(Host(`localhost`) && PathPrefix(`/sp/genus_species`))', 'traefik.http.routers.genus_species-tripal.tls=true', 'traefik.http.routers.genus_species-tripal.entryPoints=webs', 'traefik.http.routers.genus_species-tripal.middlewares=sp-auth,sp-trailslash,sp-prefix,tripal-addprefix', 'traefik.http.services.genus_species-tripal.loadbalancer.server.port=80'], 'restart_policy': {'condition': 'on-failure', 'delay': '5s', 'max_attempts': 3, 'window': '120s'}}}, 'tripal-db': {'image': 'quay.io/galaxy-genome-annotation/chado:1.31-jenkins26-pg9.5', 'environment': ['POSTGRES_PASSWORD=postgres', 'INSTALL_CHADO_SCHEMA=0'], 'volumes': ['./docker_data/tripal_db/:/var/lib/postgresql/data/'], 'networks': ['genus_species']}, 'elasticsearch': {'image': 'docker.elastic.co/elasticsearch/elasticsearch:6.6.1', 'volumes': ['./docker_data/elastic_search_index/:/usr/share/elasticsearch/data/'], 'environment': {'bootstrap.memory_lock': 'true', 'xpack.security.enabled': 'false', 'xpack.monitoring.enabled': 'false', 'xpack.ml.enabled': 'false', 'xpack.graph.enabled': 'false', 'xpack.watcher.enabled': 'false', 'cluster.routing.allocation.disk.threshold_enabled': 'false', 'ES_JAVA_OPTS': '-Xms500m -Xmx500m', 'TAKE_FILE_OWNERSHIP': 'true'}, 'networks': ['genus_species']}, 'galaxy': {'image': 'quay.io/galaxy-genome-annotation/docker-galaxy-annotation:gmod', 'volumes': ['../galaxy_data_libs_SI.py:/opt/setup_data_libraries.py', './docker_data/galaxy/:/export/', './src_data/:/project_data/:ro', './docker_data/jbrowse/:/jbrowse/data/', './docker_data/apollo/:/apollo-data/', '../galaxy_nginx.conf:/etc/nginx/uwsgi_params'], 'environment': {'NONUSE': 'nodejs,proftp,reports,condor', 'GALAXY_LOGGING': 'full', 'GALAXY_CONFIG_BRAND': 'Genus species', 'GALAXY_CONFIG_ALLOW_LIBRARY_PATH_PASTE': 'True', 'GALAXY_CONFIG_USE_REMOTE_USER': 'True', 'GALAXY_CONFIG_REMOTE_USER_MAILDOMAIN': 'bipaa', 'GALAXY_CONFIG_ADMIN_USERS': 'admin@galaxy.org,gogepp@bipaa', 'ENABLE_FIX_PERMS': 0, 'PROXY_PREFIX': '/sp/genus_species/galaxy', 'GALAXY_TRIPAL_URL': 'http://tripal.genus_species/tripal/', 'GALAXY_TRIPAL_PASSWORD': 'XXXXXX', 'GALAXY_WEBAPOLLO_URL': 'http://one-of-the-swarm-node:8888/apollo/', 'GALAXY_WEBAPOLLO_USER': 'admin_apollo@bipaa', 'GALAXY_WEBAPOLLO_PASSWORD': 'XXXXXX', 'GALAXY_WEBAPOLLO_EXT_URL': '/apollo/', 'GALAXY_CHADO_DBHOST': 'tripal-db.genus_species', 'GALAXY_CHADO_DBSCHEMA': 'chado', 'GALAXY_AUTO_UPDATE_DB': 1, 'GALAXY_AUTO_UPDATE_CONDA': 1, 'GALAXY_AUTO_UPDATE_TOOLS': '/galaxy-central/tools_1.yaml', 'GALAXY_SHARED_DIR': '', 'BLAT_ENABLED': 1}, 'networks': ['traefik', 'genus_species'], 'deploy': {'labels': ['traefik.http.routers.genus_species-galaxy.rule=(Host(`localhost`) && PathPrefix(`/sp/genus_species/galaxy`))', 'traefik.http.routers.genus_species-galaxy.tls=true', 'traefik.http.routers.genus_species-galaxy.entryPoints=webs', 'traefik.http.routers.genus_species-galaxy.middlewares=sp-auth,sp-app-trailslash,sp-app-prefix', 'traefik.http.services.genus_species-galaxy.loadbalancer.server.port=80'], 'restart_policy': {'condition': 'on-failure', 'delay': '5s', 'max_attempts': 3, 'window': '120s'}}}, 'jbrowse': {'image': 'quay.io/galaxy-genome-annotation/jbrowse:v1.16.8', 'volumes': ['./docker_data/galaxy/:/export/:ro', './src_data/:/project_data/:ro', './docker_data/jbrowse/:/jbrowse/data/:ro'], 'networks': ['traefik', 'genus_species'], 'deploy': {'labels': ['traefik.http.routers.genus_species-jbrowse.rule=(Host(`localhost`) && PathPrefix(`/sp/genus_species/jbrowse`))', 'traefik.http.routers.genus_species-jbrowse.tls=true', 'traefik.http.routers.genus_species-jbrowse.entryPoints=webs', 'traefik.http.routers.genus_species-jbrowse.middlewares=sp-auth,sp-app-trailslash,sp-app-prefix', 'traefik.http.services.genus_species-jbrowse.loadbalancer.server.port=80'], 'restart_policy': {'condition': 'on-failure', 'delay': '5s', 'max_attempts': 3, 'window': '120s'}}}, 'blast': {'image': 'quay.io/abretaud/sf-blast:latest', 'depends_on': ['blast-db'], 'environment': {'DB_HOST': 'blast-db.genus_species', 'UPLOAD_LIMIT': '20M', 'MEMORY_LIMIT': '128M', 'DB_NAME': 'postgres', 'ADMIN_EMAIL': 'xxx@example.org', 'ADMIN_NAME': 'xxxxx', 'JOBS_METHOD': 'local', 'JOBS_WORK_DIR': '/xxxx/blast_jobs/', 'CDD_DELTA_PATH': '/db/cdd_delta/current/flat/cdd_delta', 'BLAST_TITLE': 'Genus species blast server', 'JOBS_SCHED_NAME': 'blast_gspecies', 'PRE_CMD': '. /local/env/envblast-2.6.0.sh; . /local/env/envpython-3.7.1.sh;', 'APACHE_RUN_USER': 'bipaaweb', 'APACHE_RUN_GROUP': 'bipaa', 'BASE_URL_PATH': '/sp/genus_species/blast/', 'UID': 55914, 'GID': 40259}, 'volumes': ['./blast/banks.yml:/var/www/blast/app/config/banks.yml:ro', './blast/links.yml:/etc/blast_links/links.yml:ro'], 'networks': ['traefik', 'genus_species'], 'deploy': {'labels': ['traefik.http.routers.genus_species-blast.rule=(Host(`localhost`) && PathPrefix(`/sp/genus_species/blast`))', 'traefik.http.routers.genus_species-blast.tls=true', 'traefik.http.routers.genus_species-blast.entryPoints=webs', 'traefik.http.routers.genus_species-blast.middlewares=sp-big-req,sp-auth,sp-app-trailslash,sp-app-prefix', 'traefik.http.services.genus_species-blast.loadbalancer.server.port=80'], 'restart_policy': {'condition': 'on-failure', 'delay': '5s', 'max_attempts': 3, 'window': '120s'}}}, 'blast-db': {'image': 'postgres:9.6-alpine', 'environment': ['POSTGRES_PASSWORD=postgres', 'PGDATA=/var/lib/postgresql/data/'], 'volumes': ['./docker_data/blast_db/:/var/lib/postgresql/data/'], 'networks': ['genus_species']}, 'wiki': {'image': 'quay.io/abretaud/mediawiki', 'environment': {'MEDIAWIKI_SERVER': 'http://localhost', 'MEDIAWIKI_PROXY_PREFIX': '/sp/genus_species/wiki', 'MEDIAWIKI_SITENAME': 'Genus species', 'MEDIAWIKI_SECRET_KEY': 'XXXXXXXXXX', 'MEDIAWIKI_DB_HOST': 'wiki-db.genus_species', 'MEDIAWIKI_DB_PASSWORD': 'password', 'MEDIAWIKI_ADMIN_USER': 'abretaud'}, 'depends_on': ['wiki-db'], 'volumes': ['./docker_data/wiki_uploads:/images'], 'networks': ['traefik', 'genus_species'], 'deploy': {'labels': ['traefik.http.routers.genus_species-blast.rule=(Host(`localhost`) && PathPrefix(`/sp/genus_species/blast`))', 'traefik.http.routers.genus_species-blast.tls=true', 'traefik.http.routers.genus_species-blast.entryPoints=webs', 'traefik.http.routers.genus_species-blast.middlewares=sp-big-req,sp-auth,sp-app-trailslash,sp-app-prefix', 'traefik.http.services.genus_species-blast.loadbalancer.server.port=80'], 'restart_policy': {'condition': 'on-failure', 'delay': '5s', 'max_attempts': 3, 'window': '120s'}}}, 'wiki-db': {'image': 'postgres:9.6-alpine', 'volumes': ['./docker_data/wiki_db/:/var/lib/postgresql/data/'], 'networks': ['genus_species']}}, 'networks': {'traefik': {'external': True}, 'genus_species': {'driver': 'overlay', 'name': 'genus_species'}}}" + # + else: + with open(self.template, 'r') as infile: + content = list() + for line in infile: + content.append(line.replace("genus_species", str(self.genus.lower() + "_" + self.species)).replace("Genus species", str(self.genus + " " + self.species)).replace("Genus/species", str(self.genus + "/" + self.species)).replace("gspecies", str(self.genus.lower()[0] + self.species))) + self.write_yml(content=content) + + def write_yml(self, content): + with open(self.outdir + "/docker-compose.yml", 'w') as outfile: + for line in content: + outfile.write(line) + + +if __name__ == "__main__": + parser = argparse.ArgumentParser(description="Generator of docker-compose.yml for GGA automated integration " + "following the templates available @ " + "https://gitlab.inria.fr/abretaud/genodock_demo/") + + parser.add_argument("-g", "--genus", type=str, help="input genus") + parser.add_argument("-s", "--species", type=str, help="input species") + parser.add_argument("-t", "--template", type=str, help="input template docker-compose.yml (compose or stack), optional") + parser.add_argument("-o", "--outdir", type=str, help="where to write the output docker-compose") + + args = parser.parse_args() + + generator = DockerComposeGenerator() + generator.genus = args.genus + generator.species = args.species + if args.template: + generator.template = args.template + generator.outdir = args.outdir + generator.generate() diff --git a/metadata_generator.py b/metadata_generator.py new file mode 100644 index 0000000000000000000000000000000000000000..20655f769190da1316250366edeba9878caad10c --- /dev/null +++ b/metadata_generator.py @@ -0,0 +1,23 @@ +import os + +""" +Metadata generator for gga_auto_load + +Creates a file that summarizes the organisms added by the script and what was done in the dedicated galaxy instance +This organism metadata file is located in the metadata directory of the organism directory (i.e /genus_species/metadata) +By default, will also update a general metadata file (located in the parent directory i.e where all the organisms +directories are located) + +Metadata format: .yml +""" + + +class MetadataGenerator: + + def __init__(self): + self.maindir = None + self.meta = None + self.mainfile = None + + +