Skip to content
Snippets Groups Projects
Commit 8fa6a46e authored by Arthur Le Bars's avatar Arthur Le Bars
Browse files

Swarm functional version (wait for galaxy service to finish scanning /export...

Swarm functional version (wait for galaxy service to finish scanning /export before running the data loading + workflow script)
parent af06a5c7
No related branches found
No related tags found
1 merge request!1Release 1.0
<?xml version="1.0" encoding="UTF-8"?>
<module type="PYTHON_MODULE" version="4">
<component name="NewModuleRootManager">
<content url="file://$MODULE_DIR$" />
<orderEntry type="inheritedJdk" />
<orderEntry type="sourceFolder" forTests="false" />
<orderEntry type="library" name="R Skeletons" level="application" />
</component>
</module>
\ No newline at end of file
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="ProjectRootManager" version="2" project-jdk-name="Python 3.7" project-jdk-type="Python SDK" />
</project>
\ No newline at end of file
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="ProjectModuleManager">
<modules>
<module fileurl="file://$PROJECT_DIR$/.idea/gga_load_data.iml" filepath="$PROJECT_DIR$/.idea/gga_load_data.iml" />
</modules>
</component>
</project>
\ No newline at end of file
No preview for this file type
No preview for this file type
No preview for this file type
......@@ -30,7 +30,7 @@ TODO EOSC/Cloudification:
- keep in mind
- divide into 2 general-use scripts
- create docker stack via swarm
- load data into libraries
- load data into libraries (method to load it at init, and a method/script to load it separately (galaxy_data_libs_SI does this already?)
STEPS:
- read input (xls or json)
......@@ -49,7 +49,6 @@ STEPS:
class Autoload:
"""
Autoload class contains attributes and functions to interact with GGA
"""
def __init__(self, species_parameters_dictionary, args):
......@@ -85,19 +84,19 @@ class Autoload:
self.workflow_name = None
self.docker_compose_generator = None
self.metadata = dict()
self.source_data_dir = "/projet/sbr/phaeoexplorer" # directory/subdirectories where source data files are located
self.source_data_dir = "/projet/sbr/phaeoexplorer"
# Directory/subdirectories where data files are located (fasta, gff, ...), point to a directory as close as possible to the source files
self.do_update = False
# Update the instance (in histories corresponding to the input) instead of creating a new one TODO: move this variable inside methods
self.api_key = "dev"
# Api key used to communicate with the galaxy instance. Set to "dev" for the moment TODO: find a way to create, store then use the api key safely
def connect_to_instance(self):
"""
Test the connection to the galaxy instance for the current organism
Exit if we can't connect to the instance
"""
self.instance = galaxy.GalaxyInstance(url=self.instance_url,
key="ec601ea5005766e1bc106e69ad8b9eaa",
email="alebars@sb-roscoff.fr",
password="pouet",
verify=True)
self.instance = galaxy.GalaxyInstance(url=self.instance_url, key=self.api_key)
logging.info("connection to the galaxy instance ...")
try:
self.instance.histories.get_histories()
......@@ -108,7 +107,6 @@ class Autoload:
else:
logging.info("successfully connected to galaxy instance @ " + self.instance_url)
def get_source_data(self, max_depth):
"""
OBSOLETE
......@@ -194,42 +192,54 @@ class Autoload:
# Hard coded paths (find a way to get the files by adding an attribute "path_to_repo")
# Write with string?
stack_template_path = "/home/fr2424/sib/alebars/PycharmProjects/ggauto/gga_load_data/templates/stack-organism.yml"
traefik_template_path = "/home/fr2424/sib/alebars/PycharmProjects/ggauto/gga_load_data/templates/traefik.yml"
authelia_config_path = "/home/fr2424/sib/alebars/PycharmProjects/ggauto/gga_load_data/templates/authelia_config.yml"
authelia_users_path = "/home/fr2424/sib/alebars/PycharmProjects/ggauto/gga_load_data/templates/authelia_users.yml"
stack_template_path = self.script_dir + "/templates/stack-organism.yml"
traefik_template_path = self.script_dir + "/templates/traefik.yml"
authelia_config_path = self.script_dir + "/templates/authelia_config.yml"
authelia_users_path = self.script_dir + "/templates/authelia_users.yml"
if self.sex and self.strain:
genus_species_strain_sex = self.genus.lower() + "_" + self.species + "_" + self.strain + "_" + self.sex
else:
genus_species_strain_sex = self.genus.lower() + "_" + self.species
with open(stack_template_path, 'r') as infile:
organism_content = list()
for line in infile:
# Replace placeholders by the genus and species
organism_content.append(
line.replace("genus_species", str(self.genus.lower() + "_" + self.species)).replace("Genus species", str(self.genus + " " + self.species)).replace("Genus/species", str(self.genus + "/" + self.species)).replace("gspecies", str(self.genus.lower()[0] + self.species)))
line.replace("genus_species", str(self.genus.lower() + "_" + self.species)).replace("Genus species", str(self.genus + " " + self.species)).replace("Genus/species", str(self.genus + "/" + self.species)).replace("gspecies", str( self.genus.lower()[0] + self.species)).replace("genus_species_strain_sex", genus_species_strain_sex))
with open("./docker-compose.yml", 'w') as outfile:
for line in organism_content:
outfile.write(line)
subprocess.call(["python3", self.script_dir + "/create_mounts.py"], cwd=working_dir)
try:
os.mkdir("../traefik")
os.mkdir("../traefik/authelia")
shutil.copy(authelia_config_path, "../traefik/authelia/configuration.yml")
shutil.copy(authelia_users_path, "../traefik/authelia/users.yml")
# with open(traefik_template_path, 'r') as infile:
# traefik_content = list()
# for line in infile:
# # Replace placeholders by the genus and species (there are none)
# traefik_content.append(
# line.replace("genus_species", str(self.genus.lower() + "_" + self.species)).replace("Genus species", str(self.genus + " " + self.species)).replace("Genus/species", str(self.genus + "/" + self.species)).replace("gspecies", str(self.genus.lower()[0] + self.species)).replace("genus_species_strain_sex", genus_species_strain_sex))
# with open("../traefik/docker-compose.yml", 'w') as outfile:
# for line in traefik_content:
# outfile.write(line)
subprocess.call(["python3", self.script_dir + "/create_mounts.py"], cwd=working_dir)
except FileExistsError:
logging.debug("traefik directory already exists")
logging.debug("SKIP: Traefik directory already exists")
with open(traefik_template_path, 'r') as infile:
traefik_content = list()
for line in infile:
# Replace placeholders by the genus and species
traefik_content.append(
line.replace("genus_species", str(self.genus.lower() + "_" + self.species)).replace("Genus species", str(self.genus + " " + self.species)).replace("Genus/species", str(self.genus + "/" + self.species)).replace("gspecies", str(self.genus.lower()[0] + self.species)))
with open("../traefik/docker-compose.yml", 'w') as outfile:
for line in traefik_content:
outfile.write(line)
shutil.copy(authelia_config_path, "../traefik/authelia/configuration.yml")
shutil.copy(authelia_users_path, "../traefik/authelia/users.yml")
# Create volumes for the containers (script written by A. Bretaudeau)
subprocess.call(["python3", self.script_dir + "/create_mounts.py"], cwd=working_dir)
# Find all files in source_data directory, to link the matching files in the src_data dir tree
# Can be turned into a generator for performance
# TODO: cp data files method in a separate function (for EOSC)
for dirpath, dirnames, files in os.walk(self.source_data_dir):
if "0" in str(dirpath): # ensures to take the correct files (other dirs hold files with the correct names, but I don't know if they are the same
for f in files:
......@@ -257,14 +267,13 @@ class Autoload:
except TypeError:
pass
# Launch and update docker stacks (cf docs)
deploy_script_path = self.script_dir + "/deploy.sh"
subprocess.call(["sh", deploy_script_path, self.genus_species])
# Launch and update docker stacks (cf docs) TODO: deploy method in a separate function (for EOSC)
# deploy_script_path = self.script_dir + "/deploy.sh"
# subprocess.call(["sh", deploy_script_path, self.genus_species])
def write_nginx_conf(self):
"""
OBSOLETE: compose method
Generate (and update nginx) conf files to add new organisms from the proxy
:return:
......@@ -273,7 +282,6 @@ class Autoload:
nginx_organism_path = "" # nginx conf template for the current organism (used once)
docker_proxy_template_path = "" # dockerfile for the main proxy (used once)
def modify_fasta_headers(self):
"""
Change the fasta headers before integration.
......
[
{
"genus" : "Ectocarpus",
"species" : "sp1",
"common" : "",
"strain" : "",
"sex" : "male",
"origin" : "Unknown",
"version" : "1.0",
"performed by" : "Genoscope",
"genome version" : "1.0",
"ogs version" : "1.0",
"date" : "2020-08-03"
},
{
"genus" : "Ectocarpus",
"species" : "sp2",
"common" : "",
"strain" : "",
"sex" : "male",
"origin" : "Unknown",
"version" : "1.0",
"performed by" : "Genoscope",
"genome version" : "1.0",
"ogs version" : "1.0",
"date" : "2020-08-03"
}
]
......@@ -10,17 +10,17 @@ services:
- ./nginx/conf:/etc/nginx/conf.d
networks:
- traefikbig
- genus_species_1a
- genus_species
deploy:
labels:
# Download page
- "traefik.http.routers.genus_species_1a-nginx.rule=(Host(`scratchgmodv1`) && PathPrefix(`/sp/genus_species_1a/download`))"
# - "traefik.http.routers.genus_species_1a-nginx.tls=true"
# - "traefik.http.routers.genus_species_1a-nginx.entryPoints=webs"
- "traefik.http.routers.genus_species_1a-nginx.entryPoints=web" # lg
# - "traefik.http.routers.genus_species_1a-nginx.middlewares=sp-auth,sp-app-trailslash,sp-prefix"
- "traefik.http.routers.genus_species_1a-nginx.middlewares=sp-app-trailslash,sp-prefix" # lg
- "traefik.http.services.genus_species_1a-nginx.loadbalancer.server.port=80"
- "traefik.http.routers.genus_species-nginx.rule=(Host(`scratchgmodv1`) && PathPrefix(`/sp/genus_species/download`))"
# - "traefik.http.routers.genus_species-nginx.tls=true"
# - "traefik.http.routers.genus_species-nginx.entryPoints=webs"
- "traefik.http.routers.genus_species-nginx.entryPoints=web" # lg
# - "traefik.http.routers.genus_species-nginx.middlewares=sp-auth,sp-app-trailslash,sp-prefix"
- "traefik.http.routers.genus_species-nginx.middlewares=sp-app-trailslash,sp-prefix" # lg
- "traefik.http.services.genus_species-nginx.loadbalancer.server.port=80"
restart_policy:
condition: on-failure
delay: 5s
......@@ -38,16 +38,16 @@ services:
- ./src_data:/data:ro
#- /groups/XXX/:/groups/XXX/:ro # We do this when we have symlinks in src_data pointing to /groups/XXX/...
environment:
DB_HOST: tripal-db.genus_species_1a
BASE_URL_PATH: /sp/genus_species_1a
DB_HOST: tripal-db.genus_species
BASE_URL_PATH: /sp/genus_species
UPLOAD_LIMIT: 20M
MEMORY_LIMIT: 512M
TRIPAL_GIT_CLONE_MODULES: "https://github.com/abretaud/tripal_rest_api.git[@c6f9021ea5d4c6d7c67c5bd363a7dd9359228bbc] https://github.com/tripal/tripal_elasticsearch.git[@dc7f276046e394a80a7dfc9404cf1a149006eb2a] https://github.com/tripal/tripal_analysis_interpro.git https://github.com/tripal/tripal_analysis_go.git https://github.com/tripal/tripal_analysis_blast.git https://github.com/tripal/tripal_analysis_expression.git[@7240039fdeb4579afd06bbcb989cb7795bd4c342]"
TRIPAL_DOWNLOAD_MODULES: ""
TRIPAL_ENABLE_MODULES: "tripal_analysis_blast tripal_analysis_interpro tripal_analysis_go tripal_rest_api tripal_elasticsearch"
SITE_NAME: "Genus species"
ELASTICSEARCH_HOST: elasticsearch.genus_species_1a
ENABLE_JBROWSE: /jbrowse/?data=data/genus_species_1a_ec863_female
ELASTICSEARCH_HOST: elasticsearch.genus_species
ENABLE_JBROWSE: /jbrowse/?data=data/genus_species_strain_sex
ENABLE_APOLLO: 0
ENABLE_BLAST: 1
ENABLE_DOWNLOAD: 1
......@@ -57,19 +57,19 @@ services:
ENABLE_ORTHOLOGY_LINKS: 0
THEME: "abims" # Use this to use another theme
THEME_GIT_CLONE: "http://gga:BuH1_aG5@gitlab.sb-roscoff.fr/abims/e-infra/tripal_abims" # Use this to install another theme
ADMIN_PASSWORD: 23fN,Ajt # You need to define it and update it in galaxy config below
ADMIN_PASSWORD: 23fN,Ajt # You need to define it and update it in galaxy config below --> change in prod (set a password in script init?)
networks:
- traefikbig
- genus_species_1a
- genus_species
deploy:
labels:
- "traefik.http.routers.genus_species_1a-tripal.rule=(Host(`scratchgmodv1`) && PathPrefix(`/sp/genus_species_1a`))"
# - "traefik.http.routers.genus_species_1a-tripal.tls=true"
# - "traefik.http.routers.genus_species_1a-tripal.entryPoints=webs"
- "traefik.http.routers.genus_species_1a-tripal.entryPoints=web" # lg
# - "traefik.http.routers.genus_species_1a-tripal.middlewares=sp-auth,sp-trailslash,sp-prefix,tripal-addprefix"
- "traefik.http.routers.genus_species_1a-tripal.middlewares=sp-trailslash,sp-prefix,tripal-addprefix" # lg
- "traefik.http.services.genus_species_1a-tripal.loadbalancer.server.port=80"
- "traefik.http.routers.genus_species-tripal.rule=(Host(`scratchgmodv1`) && PathPrefix(`/sp/genus_species`))"
# - "traefik.http.routers.genus_species-tripal.tls=true"
# - "traefik.http.routers.genus_species-tripal.entryPoints=webs"
- "traefik.http.routers.genus_species-tripal.entryPoints=web" # lg
# - "traefik.http.routers.genus_species-tripal.middlewares=sp-auth,sp-trailslash,sp-prefix,tripal-addprefix"
- "traefik.http.routers.genus_species-tripal.middlewares=sp-trailslash,sp-prefix,tripal-addprefix" # lg
- "traefik.http.services.genus_species-tripal.loadbalancer.server.port=80"
restart_policy:
condition: on-failure
delay: 5s
......@@ -86,7 +86,7 @@ services:
volumes:
- ./docker_data/tripal_db/:/var/lib/postgresql/data/
networks:
- genus_species_1a
- genus_species
elasticsearch:
image: docker.elastic.co/elasticsearch/elasticsearch:6.6.1
......@@ -107,7 +107,7 @@ services:
ES_JAVA_OPTS: "-Xms500m -Xmx500m"
TAKE_FILE_OWNERSHIP: "true"
networks:
- genus_species_1a
- genus_species
galaxy:
image: quay.io/galaxy-genome-annotation/docker-galaxy-annotation:gmod
......@@ -127,34 +127,34 @@ services:
GALAXY_CONFIG_USE_REMOTE_USER: "True"
GALAXY_CONFIG_REMOTE_USER_MAILDOMAIN: "sb-roscoff.fr"
GALAXY_CONFIG_ADMIN_USERS: "admin@galaxy.org,gga@sb-roscoff.fr,lgueguen@sb-roscoff.fr, alebars@sb-roscoff.fr" # admin@galaxy.org is the default (leave it), gogepp@bipaa is a shared ldap user we use to connect
GALAXY_CONFIG_MASTER_API_KEY: "dev"
ENABLE_FIX_PERMS: 0
PROXY_PREFIX: /sp/genus_species_1a/galaxy
GALAXY_TRIPAL_URL: http://tripal.genus_species_1a/tripal/
PROXY_PREFIX: /sp/genus_species/galaxy
GALAXY_TRIPAL_URL: http://tripal.genus_species/tripal/
GALAXY_TRIPAL_PASSWORD: 23fN,Ajt # See tripal config above
GALAXY_WEBAPOLLO_URL: http://one-of-the-swarm-node:8888/apollo/
GALAXY_WEBAPOLLO_USER: "admin_apollo@sb-roscoff.fr"
GALAXY_WEBAPOLLO_PASSWORD: "Q65:dA,t" # See tripal config below
GALAXY_WEBAPOLLO_EXT_URL: /apollo/
GALAXY_CHADO_DBHOST: tripal-db.genus_species_1a
GALAXY_CHADO_DBHOST: tripal-db.genus_species
GALAXY_CHADO_DBSCHEMA: chado
GALAXY_AUTO_UPDATE_DB: 1
GALAXY_AUTO_UPDATE_CONDA: 1
GALAXY_AUTO_UPDATE_TOOLS: "/galaxy-central/tools_1.yaml"
GALAXY_SHARED_DIR: ""
GALAXY_CONFIG_MASTER_API_KEY: "dev"
BLAT_ENABLED: 1
networks:
- traefikbig
- genus_species_1a
- genus_species
deploy:
labels:
- "traefik.http.routers.genus_species_1a-galaxy.rule=(Host(`scratchgmodv1`) && PathPrefix(`/sp/genus_species_1a/galaxy`))"
# - "traefik.http.routers.genus_species_1a-galaxy.tls=true"
# - "traefik.http.routers.genus_species_1a-galaxy.entryPoints=webs"
- "traefik.http.routers.genus_species_1a-galaxy.entryPoints=web" #lg
# - "traefik.http.routers.genus_species_1a-galaxy.middlewares=sp-auth,sp-app-trailslash,sp-app-prefix"
- "traefik.http.routers.genus_species_1a-galaxy.middlewares=sp-app-trailslash,sp-app-prefix" #lg
- "traefik.http.services.genus_species_1a-galaxy.loadbalancer.server.port=80"
- "traefik.http.routers.genus_species-galaxy.rule=(Host(`scratchgmodv1`) && PathPrefix(`/sp/genus_species/galaxy`))"
# - "traefik.http.routers.genus_species-galaxy.tls=true"
# - "traefik.http.routers.genus_species-galaxy.entryPoints=webs"
- "traefik.http.routers.genus_species-galaxy.entryPoints=web" #lg
# - "traefik.http.routers.genus_species-galaxy.middlewares=sp-auth,sp-app-trailslash,sp-app-prefix"
- "traefik.http.routers.genus_species-galaxy.middlewares=sp-app-trailslash,sp-app-prefix" #lg
- "traefik.http.services.genus_species-galaxy.loadbalancer.server.port=80"
restart_policy:
condition: on-failure
delay: 5s
......@@ -170,16 +170,16 @@ services:
- ./docker_data/jbrowse/:/jbrowse/data/:ro
networks:
- traefikbig
- genus_species_1a
- genus_species
deploy:
labels:
- "traefik.http.routers.genus_species_1a-jbrowse.rule=(Host(`scratchgmodv1`) && PathPrefix(`/sp/genus_species_1a/jbrowse`))"
# - "traefik.http.routers.genus_species_1a-jbrowse.tls=true"
# - "traefik.http.routers.genus_species_1a-jbrowse.entryPoints=webs"
- "traefik.http.routers.genus_species_1a-jbrowse.entryPoints=web" # lg
# - "traefik.http.routers.genus_species_1a-jbrowse.middlewares=sp-auth,sp-app-trailslash,sp-app-prefix"
- "traefik.http.routers.genus_species_1a-jbrowse.middlewares=sp-app-trailslash,sp-app-prefix" #lg
- "traefik.http.services.genus_species_1a-jbrowse.loadbalancer.server.port=80"
- "traefik.http.routers.genus_species-jbrowse.rule=(Host(`scratchgmodv1`) && PathPrefix(`/sp/genus_species/jbrowse`))"
# - "traefik.http.routers.genus_species-jbrowse.tls=true"
# - "traefik.http.routers.genus_species-jbrowse.entryPoints=webs"
- "traefik.http.routers.genus_species-jbrowse.entryPoints=web" # lg
# - "traefik.http.routers.genus_species-jbrowse.middlewares=sp-auth,sp-app-trailslash,sp-app-prefix"
- "traefik.http.routers.genus_species-jbrowse.middlewares=sp-app-trailslash,sp-app-prefix" #lg
- "traefik.http.services.genus_species-jbrowse.loadbalancer.server.port=80"
restart_policy:
condition: on-failure
delay: 5s
......@@ -191,7 +191,7 @@ services:
depends_on:
- blast-db
environment:
DB_HOST: blast-db.genus_species_1a
DB_HOST: blast-db.genus_species
UPLOAD_LIMIT: 20M
MEMORY_LIMIT: 128M
DB_NAME: 'postgres'
......@@ -205,7 +205,7 @@ services:
#PRE_CMD: '. /local/env/envblast-2.6.0.sh; . /local/env/envpython-3.7.1.sh;' # executed at the beginning of each job
#APACHE_RUN_USER: 'bipaaweb' # username known by sge
#APACHE_RUN_GROUP: 'bipaa' # group known by sge
BASE_URL_PATH: '/sp/genus_species_1a/blast/'
BASE_URL_PATH: '/sp/genus_species/blast/'
UID: 55914 # username known by sge (for drmaa mode only)
GID: 40259 # group known by sge (for drmaa mode only)
#JOBS_DRMAA_NATIVE: '-p web' # This line and following for slurm
......@@ -226,16 +226,16 @@ services:
#- /etc/munge/:/etc/munge/:ro
networks:
- traefikbig
- genus_species_1a
- genus_species
deploy:
labels:
- "traefik.http.routers.genus_species_1a-blast.rule=(Host(`scratchgmodv1`) && PathPrefix(`/sp/genus_species_1a/blast`))"
# - "traefik.http.routers.genus_species_1a-blast.tls=true"
# - "traefik.http.routers.genus_species_1a-blast.entryPoints=webs"
- "traefik.http.routers.genus_species_1a-blast.entryPoints=web" # lg
# - "traefik.http.routers.genus_species_1a-blast.middlewares=sp-big-req,sp-auth,sp-app-trailslash,sp-app-prefix"
- "traefik.http.routers.genus_species_1a-blast.middlewares=sp-big-req,sp-app-trailslash,sp-app-prefix" # lg
- "traefik.http.services.genus_species_1a-blast.loadbalancer.server.port=80"
- "traefik.http.routers.genus_species-blast.rule=(Host(`scratchgmodv1`) && PathPrefix(`/sp/genus_species/blast`))"
# - "traefik.http.routers.genus_species-blast.tls=true"
# - "traefik.http.routers.genus_species-blast.entryPoints=webs"
- "traefik.http.routers.genus_species-blast.entryPoints=web" # lg
# - "traefik.http.routers.genus_species-blast.middlewares=sp-big-req,sp-auth,sp-app-trailslash,sp-app-prefix"
- "traefik.http.routers.genus_species-blast.middlewares=sp-big-req,sp-app-trailslash,sp-app-prefix" # lg
- "traefik.http.services.genus_species-blast.loadbalancer.server.port=80"
restart_policy:
condition: on-failure
delay: 5s
......@@ -251,7 +251,7 @@ services:
volumes:
- ./docker_data/blast_db/:/var/lib/postgresql/data/
networks:
- genus_species_1a
- genus_species
# wiki:
# image: quay.io/abretaud/mediawiki
......@@ -294,6 +294,6 @@ services:
networks:
traefikbig:
external: true
genus_species_1a:
genus_species:
driver: overlay
name: genus_species_1a
name: genus_species
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment