diff --git a/deprecated/__init__.py b/deprecated/__init__.py deleted file mode 100644 index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..0000000000000000000000000000000000000000 diff --git a/deprecated/create_mounts.py b/deprecated/create_mounts.py deleted file mode 100644 index 91e2f81aff06c7829adfb09a07f18701f3bd10a9..0000000000000000000000000000000000000000 --- a/deprecated/create_mounts.py +++ /dev/null @@ -1,64 +0,0 @@ -#!/usr/bin/env python3 - -import argparse -import logging as log -import os -import re -import yaml - -from pathlib import Path - - -def main(args): - - compose_yml = os.path.abspath('./docker-compose.yml') - if not os.path.isfile(compose_yml): - raise Exception("Could not find docker-compose.yml at %s" % compose_yml) - - with open(compose_yml) as f: - compose = yaml.safe_load(f) - - if 'services' not in compose: - raise Exception("Could not find services tag in docker-compose.yml") - - for service in compose['services']: - log.info("Service '%s'" % service) - if 'volumes' in compose['services'][service]: - for volume in compose['services'][service]['volumes']: - log.info(" Volume '%s'" % (volume)) - reg = re.match(r"^(\./[^:]+/):[^:]+(:\w+)?$", volume) - if reg: - vol_dir = os.path.abspath('./' + reg.group(1)) - log.info(" mkdir '%s' (from %s)" % (vol_dir, volume)) - if not args.dry_run and not os.path.exists(vol_dir): - os.makedirs(vol_dir, exist_ok=True) - else: - reg = re.match(r"^(\./[^:]+):[^:]+(:\w+)?$", volume) - if reg: - vol_file = os.path.abspath('./' + reg.group(1)) - vol_dir = os.path.dirname(vol_file) - log.info(" mkdir '%s' (from %s)" % (vol_dir, volume)) - if not args.dry_run and not os.path.exists(vol_dir): - os.makedirs(vol_dir, exist_ok=True) - log.info(" touch '%s' (from %s)" % (vol_file, volume)) - if not args.dry_run and not os.path.exists(vol_file): - Path(vol_file).touch() - else: - log.info(" skip") - - -if __name__ == '__main__': - parser = argparse.ArgumentParser( - description='Create local volume directories.' - ) - parser.add_argument("-v", "--verbose", help="Increase output verbosity.", - action="store_true") - parser.add_argument("-d", "--dry-run", help="Dry run: no modification will be done, for testing purpose.", - action="store_true") - - args = parser.parse_args() - log.basicConfig(level=log.INFO) - if args.verbose: - log.basicConfig(level=log.DEBUG) - - main(args) diff --git a/deprecated/galaxy_data_libs_SI.py b/deprecated/galaxy_data_libs_SI.py deleted file mode 100644 index 1fa378e8dfab31ca5915c76d252e4166e3736515..0000000000000000000000000000000000000000 --- a/deprecated/galaxy_data_libs_SI.py +++ /dev/null @@ -1,197 +0,0 @@ -#!/usr/bin/env python3 - -import argparse -import logging as log -import os -import subprocess -import time -from subprocess import CalledProcessError - -from bioblend.galaxy.objects import GalaxyInstance - -import yaml - - -class DataLibLoader: - - def __init__(self): - self.existing_folders_cache = {} - self.bam_metadata_cache = {} - - def create_deep_folder(self, prj_lib, path, parent_folder=None, deep_name=""): - - segments = path.split(os.sep) - - deeper_name = os.sep.join([deep_name, segments[0]]) - if deeper_name in self.existing_folders_cache: - new_folder = self.existing_folders_cache[deeper_name] - else: - new_folder = prj_lib.create_folder(segments[0], base_folder=parent_folder) - self.existing_folders_cache[deeper_name] = new_folder - - if len(segments) > 1: - new_folder = self.create_deep_folder(prj_lib, os.sep.join(segments[1:]), new_folder, deeper_name) - - return new_folder - - def get_bam_label(self, dirname, bam_file): - - bam_id = bam_file - if bam_id.endswith('.bam'): - bam_id = bam_id[:-4] - - if dirname in self.bam_metadata_cache: - if bam_id in self.bam_metadata_cache[dirname] and 'label' in self.bam_metadata_cache[dirname][bam_id] and self.bam_metadata_cache[dirname][bam_id]['label']: - return self.bam_metadata_cache[dirname][bam_id]['label'] - else: - return None - else: - meta_file = os.path.join(dirname, 'metadata.yml') - if os.path.exists(meta_file): - with open(meta_file) as f: - self.bam_metadata_cache[dirname] = yaml.safe_load(f) - log.info("Found metadata in %s " % meta_file) - else: - self.bam_metadata_cache[dirname] = {} - log.info("Did not find metadata in %s " % meta_file) - return self.get_bam_label(dirname, bam_file) - - def main(self, args, data_dir_root='/project_data'): - """ - Load files into a Galaxy data library. - """ - - log.info("Importing data libraries.") - - url = "http://localhost" - # The environment variables are set by the parent container - admin_email = os.environ.get('GALAXY_DEFAULT_ADMIN_USER', 'admin@galaxy.org') - admin_pass = os.environ.get('GALAXY_DEFAULT_ADMIN_PASSWORD', 'password') - - # Establish connection to galaxy instance - gio = GalaxyInstance(url=url, email=admin_email, password=admin_pass) - - log.info("Looking for project data in %s" % data_dir_root) - folders = dict() - for root, dirs, files in os.walk(data_dir_root, followlinks=True): - file_list = [os.path.join(root, filename) for filename in files] - folders[root] = file_list - - post_renaming = {} - if folders: - # Delete pre-existing lib (probably created by a previous call) - existing = gio.libraries.get_previews(name='Project Data') - for lib in existing: - if not lib.deleted: - log.info('Pre-existing "Project Data" library %s found, removing it.' % lib.id) - gio.libraries.delete(lib.id) - - log.info("Creating new 'Project Data' library.") - prj_lib = gio.libraries.create('Project Data', 'Data for current genome annotation project') - - for fname, files in folders.items(): - if fname and files: - folder_name = fname[len(data_dir_root) + 1:] - log.info("Creating folder: %s" % folder_name) - folder = None - if not args.dry_run: - folder = self.create_deep_folder(prj_lib, folder_name) - - for single_file in files: - ftype = 'auto' - - clean_name = os.path.basename(single_file) - clean_name = clean_name.replace('_', ' ') - - if single_file.endswith('.bam'): - ftype = 'bam' - bam_label = self.get_bam_label(fname, os.path.basename(single_file)) - if bam_label: - clean_name = bam_label - else: - clean_name = os.path.splitext(clean_name)[0] - if clean_name.endswith("Aligned.sortedByCoord.out"): # Stupid thing for many local bam files - clean_name = clean_name[:-25] - elif single_file.endswith('.fasta') or single_file.endswith('.fa') or single_file.endswith('.faa') or single_file.endswith('.fna'): - ftype = 'fasta' - elif single_file.endswith('.gff') or single_file.endswith('.gff3'): - ftype = 'gff3' - clean_name = os.path.splitext(clean_name)[0] - elif single_file.endswith('.xml'): - ftype = 'xml' - elif single_file.endswith('.bw'): - ftype = 'bigwig' - elif single_file.endswith('.gaf'): - ftype = 'tabular' - elif single_file.endswith('_tree.txt'): - # We don't want to pollute the logs with 20000 useless lines - log.debug("Skipping useless file '%s'." % single_file) - continue - elif single_file.endswith('.tar.gz') and 'newick' in fname: - ftype = 'tar' - elif single_file.endswith('.bai') or single_file.endswith('.tar.gz') or single_file.endswith('.tar.bz2') or single_file.endswith('.raw') or single_file.endswith('.pdf'): - log.info("Skipping useless file '%s'." % single_file) - continue - - log.info("Adding file '%s' with type '%s' and name '%s'." % (single_file, ftype, clean_name)) - if not args.dry_run: - datasets = prj_lib.upload_from_galaxy_fs( - single_file, - folder=folder, - link_data_only='link_to_files', - file_type=ftype, - tag_using_filenames=False - ) - - # Rename dataset - # Need to do it AFTER the datasets import is finished, otherwise the new names are not kept by galaxy - # (erased by metadata generation I guess) - post_renaming[datasets[0]] = clean_name - - time.sleep(1) - - if args.dry_run: - log.info("Finished in DRY RUN mode") - return - - # Wait for uploads to complete - log.info("Waiting for import jobs to finish... please wait") - while True: - try: - # "C" state means the job is completed, no need to wait for it - ret = subprocess.check_output("squeue | grep -v \"C debug\" | grep -v \"JOBID\" || true", shell=True) - if not len(ret): - break - time.sleep(3) - except CalledProcessError as inst: - if inst.returncode == 153: # queue is empty - break - else: - raise - - time.sleep(10) - - log.info("Import finished, now renaming datasets with pretty names") - for dataset in post_renaming: - dataset.update(name=post_renaming[dataset]) - - log.info("Finished importing data.") - - -if __name__ == '__main__': - parser = argparse.ArgumentParser( - description='Populate the Galaxy data library with files.' - ) - parser.add_argument("-v", "--verbose", help="Increase output verbosity.", - action="store_true") - parser.add_argument("-d", "--dry-run", help="Don't update the data library, just show what it would do.", - action="store_true") - - args = parser.parse_args() - if args.verbose: - log.basicConfig(level=log.DEBUG) - else: - log.basicConfig(level=log.INFO) - - dll = DataLibLoader() - dll.main(args) diff --git a/deprecated/json_example_input.json b/deprecated/json_example_input.json deleted file mode 100644 index 669bc8846ae237e6a2d21af8df7f1190ff38a5c4..0000000000000000000000000000000000000000 --- a/deprecated/json_example_input.json +++ /dev/null @@ -1,28 +0,0 @@ -[ - { - "genus" : "genus1", - "species" : "Species1", - "common" : "Common1", - "strain" : "strain1", - "sex" : "male", - "origin" : "Unknown", - "version" : "1.0", - "performed by" : "Institute John Doe", - "genome version" : "1.0", - "ogs version" : "1.0", - "date" : "2020-01-01" - }, - { - "genus" : "genus2", - "species" : "Species2", - "common" : "Common2", - "strain" : "strain2", - "sex" : "female", - "origin" : "Unknown", - "version" : "1.0", - "performed by" : "Institute Jane Doe", - "genome version" : "1.0", - "ogs version" : "1.0", - "date" : "2020-01-01" - } -] diff --git a/deprecated/table_parser.py b/deprecated/table_parser.py deleted file mode 100644 index 9314b91cedb9e7f6809d5524822d282aad2d2532..0000000000000000000000000000000000000000 --- a/deprecated/table_parser.py +++ /dev/null @@ -1,79 +0,0 @@ -import os -import sys -import pandas # xlrd required for excel files reading -import numpy -import json -import argparse -import logging -from datetime import datetime - -""" -!! OBSOLETE !! - -Input parser script. -Does not work for ods spreadsheets (save as xls or xlsx instead) --> need to handle with pandas_ods_reader (requires ezodf, lxml) -Does not support multiple sheets (TODO: "integration" and "update" sheets (1 and 2)) -See example toy table (toy_table.xls) - -standalone usage: python3 table_parser.py <tabulated_file> -d <directory_to_write_json_to (default: cwd)> - -""" - - -class TableParser: - - def __init__(self, table_file, dir): - self.dir = os.path.abspath(args.dir) - self.table_file = table_file - self.method = None # TODO: instant launch or just parse (standalone) - self.extension = None - self.meta = dict() - self.json_file = None - - def parse_table(self, extension): - if extension == "xls": - pandas_table = pandas.DataFrame(pandas.read_excel(self.table_file)) - elif extension == "csv": - pandas_table = pandas.DataFrame(pandas.read_csv(self.table_file)) - else: - logging.info("wrong format: input tabulated file cannot be read (supported formats: xls, xlsx, csv)") - sys.exit() - pandas_table = pandas_table.replace(numpy.nan, "", regex=True) - - for char in " ,.()-/": - pandas_table = pandas_table.replace("\\" + char, "_", regex=True) - pandas_table = pandas_table.replace("\\__", "_", regex=True) - pandas_table.loc[pandas_table["genome version"] == "", "genome version"] = "1.0" - pandas_table.loc[pandas_table["ogs version"] == "", "ogs version"] = "1.0" - pandas_table.loc[pandas_table["version"] == "", "version"] = "1.0" - pandas_table.loc[pandas_table["date"] == "", "date"] = datetime.today().strftime("%Y-%m-%d") - with open(os.path.join(self.dir, self.json_file), 'w') as json_file: - json_file.truncate(0) - json_content = list() - for organism in range(0, len(pandas_table.index)): - organism_dict = pandas_table.iloc[organism].to_dict() - for k, v in organism_dict.items(): - v = str(v).split(" ") - v = "_".join(v) - v = v.replace("__", "_") - if v.endswith("_"): - v = v[:-1] - json_content.append(organism_dict) - json.dump(json_content, json_file, indent=4) - - def write_json(self, data, filename): - with open(filename, 'w') as f: - json.dump(data, f, indent=4) - - -if __name__ == "__main__": - parser = argparse.ArgumentParser(description="Table parser for phaeoexplorer data") - parser.add_argument("input", type=str, help="input table") - parser.add_argument("-d", "--dir", type=str, help="Where to write the output json file that is be used for integration", default = os.getcwd()) - args = parser.parse_args() - - if args.input.endswith("xlsx") or args.input.endswith("xls"): - tp = TableParser(table_file=args.input, dir=args.dir) - tp.extension = args.input.split(".")[1] - tp.json_file = tp.dir + "/dataloader_" + datetime.today().strftime("%Y%m%d") + ".json" - tp.parse_table(extension="xls") diff --git a/deprecated/templates/compose-template.yml b/deprecated/templates/compose-template.yml deleted file mode 100644 index f15a9f74daad8c8e089c7d114ee5aaca7c552436..0000000000000000000000000000000000000000 --- a/deprecated/templates/compose-template.yml +++ /dev/null @@ -1,230 +0,0 @@ -# ./docker_data is created and filled with persistent data that should be backuped - -version: '2' -services: - proxy: - image: quay.io/abretaud/nginx-ldap:latest # Not using the default nginx image as we need the ldap module - ports: - - "9170:80" - links: - - galaxy - - jbrowse - - tripal - - apollo - - apollo-report - - blast - - wiki - volumes: - - ./src_data/:/project_data/ - #- /groups/XXX/:/groups/XXX/:ro # We do this when we have symlinks in src_data pointing to /groups/XXX/... - - ./nginx/conf:/etc/nginx/conf.d - - tripal: - image: quay.io/galaxy-genome-annotation/tripal@sha256:4451cc3a601d109c07c7aedcc76bd41a5da7c438c8fa0862488680bd462f125b - links: - - tripaldb:postgres - - elasticsearch:elasticsearch - volumes_from: - - "galaxy" - volumes: - - ./src_data:/data:ro - environment: - BASE_URL_PATH: /sp/genus_species - UPLOAD_LIMIT: 20M - MEMORY_LIMIT: 512M - TRIPAL_GIT_CLONE_MODULES: "https://github.com/abretaud/tripal_rest_api.git[@c6f9021ea5d4c6d7c67c5bd363a7dd9359228bbc] https://github.com/tripal/tripal_elasticsearch.git[@dc7f276046e394a80a7dfc9404cf1a149006eb2a] https://github.com/tripal/tripal_analysis_interpro.git https://github.com/tripal/tripal_analysis_go.git https://github.com/tripal/tripal_analysis_blast.git https://github.com/tripal/tripal_analysis_expression.git[@7240039fdeb4579afd06bbcb989cb7795bd4c342]" - TRIPAL_DOWNLOAD_MODULES: "" - TRIPAL_ENABLE_MODULES: "tripal_analysis_blast tripal_analysis_interpro tripal_analysis_go tripal_rest_api tripal_elasticsearch" - SITE_NAME: "Genus species" - ENABLE_JBROWSE: /jbrowse/?data=data/gspecies - ENABLE_APOLLO: 1 - ENABLE_BLAST: 1 - ENABLE_DOWNLOAD: 1 - ENABLE_WIKI: 1 - ENABLE_GO: /organism/Genus/species?pane=GO - ENABLE_ORTHOLOGY: 0 - ENABLE_ORTHOLOGY_LINKS: http://localhost/sp/orthology/ - #THEME: "bipaa" # Use this to use another theme - #THEME_GIT_CLONE: "https://gitlab.inria.fr/abretaud/tripal_bipaa.git" # Use this to install another theme - ADMIN_PASSWORD: XXXXXX # You need to define it and update it in galaxy config below - - tripaldb: - image: quay.io/galaxy-genome-annotation/chado:1.31-jenkins110.1-pg9.5 - environment: - - POSTGRES_PASSWORD=postgres - # The default chado image would try to install the schema on first run, - # we just want the tools to be available. - - INSTALL_CHADO_SCHEMA=0 - volumes: - - ./docker_data/tripal_db/:/var/lib/postgresql/data/ - - elasticsearch: - image: docker.elastic.co/elasticsearch/elasticsearch:6.6.1 - #mem_limit: 500m # This is to limit usage, but can make the docker crash when ram is exhausted, not recommended while indexing all data, ok once indexing is done - ulimits: - memlock: - soft: -1 - hard: -1 - volumes: - - ./docker_data/elastic_search_index/:/usr/share/elasticsearch/data - environment: - bootstrap.memory_lock: "true" - xpack.security.enabled: "false" - xpack.monitoring.enabled: "false" - xpack.ml.enabled: "false" - xpack.graph.enabled: "false" - xpack.watcher.enabled: "false" - cluster.routing.allocation.disk.threshold_enabled: "false" - ES_JAVA_OPTS: "-Xms200m -Xmx200m" - TAKE_FILE_OWNERSHIP: "true" - - galaxy: - image: quay.io/galaxy-genome-annotation/docker-galaxy-annotation:gmod - volumes: - - ../galaxy_data_libs_SI.py:/opt/galaxy_data_libs_SI.py - - ./docker_data/galaxy:/export - - ./src_data/:/project_data:ro - #- /groups/XXX/:/groups/XXX/:ro # We do this when we have symlinks in src_data pointing to /groups/XXX/... - - ./docker_data/jbrowse/:/jbrowse/data/ - - ./docker_data/apollo/:/apollo-data/ - links: - - "tripaldb:chado" - environment: - NONUSE: nodejs,proftp,reports - GALAXY_LOGGING: full - GALAXY_CONFIG_BRAND: "Genus species" - GALAXY_CONFIG_ALLOW_LIBRARY_PATH_PASTE: "True" - GALAXY_CONFIG_USE_REMOTE_USER: "True" - GALAXY_CONFIG_REMOTE_USER_MAILDOMAIN: "bipaa" - GALAXY_CONFIG_ADMIN_USERS: "admin@galaxy.org,gogepp@bipaa" # admin@galaxy.org is the default (leave it), gogepp@bipaa is a shared ldap user we use to connect - ENABLE_FIX_PERMS: 0 - PROXY_PREFIX: /sp/genus_species/galaxy - GALAXY_CONFIG_COOKIE_PATH: /galaxy - GALAXY_TRIPAL_PASSWORD: XXXXXX # See tripal config above - GALAXY_WEBAPOLLO_URL: http://apollo:8080 - GALAXY_WEBAPOLLO_USER: "admin_apollo@bipaa" - GALAXY_WEBAPOLLO_PASSWORD: "XXXXXX" # See tripal config below - GALAXY_WEBAPOLLO_EXT_URL: /sp/genus_species/apollo - GALAXY_CHADO_DBHOST: chado - GALAXY_CHADO_DBSCHEMA: chado - GALAXY_AUTO_UPDATE_DB: 1 - GALAXY_AUTO_UPDATE_CONDA: 1 - GALAXY_AUTO_UPDATE_TOOLS: "/galaxy-central/tools_1.yaml" - MASTER_API_KEY: dev - BLAT_ENABLED: 1 - - jbrowse: - image: quay.io/galaxy-genome-annotation/jbrowse:v1.16.5 - volumes: - - /data - volumes_from: - - "galaxy:ro" - ports: - - "80" - - apollo: - image: quay.io/abretaud/apollo:bipaa - links: - - "apollo_db:db" - environment: - APOLLO_ADMIN_EMAIL: "admin_apollo@bipaa" # internal admin user, used by some scripts/api - APOLLO_ADMIN_PASSWORD: "XXXXXX" # define it and adapt galaxy config above - APOLLO_BASE_HOST: "http://localhost" - APOLLO_PATH_PREFIX: "/sp/genus_species/apollo/" - APOLLO_REMOTE_ADMINS: "gogepp@bipaa,abretaud@bipaa,srobin@bipaa,flegeai@bipaa" # all ldap users that we use to connect as admin - WEBAPOLLO_DB_USERNAME: postgres - WEBAPOLLO_DB_PASSWORD: password - WEBAPOLLO_DB_DRIVER: "org.postgresql.Driver" - WEBAPOLLO_DB_DIALECT: "org.hibernate.dialect.PostgresPlusDialect" - WEBAPOLLO_DB_URI: "jdbc:postgresql://db/postgres" - WEBAPOLLO_FEATURE_HAS_DBXREFS: "true" - WEBAPOLLO_FEATURE_HAS_ATTRS: "true" - WEBAPOLLO_FEATURE_HAS_PUBMED: "true" - WEBAPOLLO_FEATURE_HAS_GO: "true" - WEBAPOLLO_FEATURE_HAS_COMMENTS: "true" - WEBAPOLLO_FEATURE_HAS_STATUS: "true" - CATALINA_OPTS: "-Xms512m -Xmx1g -XX:+CMSClassUnloadingEnabled -XX:+CMSPermGenSweepingEnabled -XX:+UseConcMarkSweepGC" - volumes_from: - - "galaxy:ro" - volumes: - - ./apollo/annotation_groups.tsv:/bootstrap/canned_values.txt:ro # Other canned things are preloaded in the docker image https://github.com/abretaud/docker-apollo/blob/bipaa/bootstrap.sh - #- ../blat/:/opt/blat/:ro # Mount the blat binary if you want to use it (could not include it in the docker image due to license issue) - - apollo_db: - image: postgres:9.5 - environment: - POSTGRES_PASSWORD: password - volumes: - - ./docker_data/apollo_db/:/var/lib/postgresql/data/ - - apollo-report: # A report app following guidelines from https://bipaa.genouest.org/is/how-to-annotate-a-genome/ - links: - - apollo:apollo - image: quay.io/abretaud/apollo-report:latest - environment: - APOLLO_EXT_URL: http://localhost/sp/genus_species/apollo/ - APOLLO_USER: admin_apollo@bipaa - APOLLO_PASS: XXXXX # See apollo conf above - ADMIN_USERS: login1,login2 # ldap users that should see an extended report - APOLLO_MOUNTPOINT: /apollo-data/ - volumes: - - ./docker_data/apollo/:/apollo-data/:ro - - ./apollo/annotation_groups.tsv:/data/annotation_groups.tsv:ro - - ./docker_data/apollo_report/:/data/report/ - - blast: - image: quay.io/abretaud/sf-blast:latest - links: - - blast_db:postgres - #hostname: gogepp-blast.genouest.org # Hostname declare as submit node in sge conf (for drmaa mode only) - environment: - UPLOAD_LIMIT: 20M - MEMORY_LIMIT: 128M - DB_NAME: 'postgres' - ADMIN_EMAIL: 'xxx@example.org' # email sender - ADMIN_NAME: 'xxxxx' # email sender name - JOBS_METHOD: 'local' # Can be local (= no sge jobs, but run inside the container) or drmaa (= to submit to a cluster) - JOBS_WORK_DIR: '/xxxx/blast_jobs/' # disk accessible both from compute nodes and mounted in this docker (at the same path) - CDD_DELTA_PATH: '/db/cdd_delta/current/flat/cdd_delta' - BLAST_TITLE: 'Genus species blast server' - JOBS_SCHED_NAME: 'blast_gspecies' # job names - PRE_CMD: '. /local/env/envblast-2.6.0.sh; . /local/env/envpython-2.7.sh;' # executed at the beginning of each job - APACHE_RUN_USER: 'bipaaweb' # username known by sge - APACHE_RUN_GROUP: 'bipaa' # group known by sge - BASE_URL_PATH: '/sp/genus_species/blast/' - UID: 55914 # username known by sge (for drmaa mode only) - GID: 40259 # group known by sge (for drmaa mode only) - volumes: - #- ../blast-themes/xxx/:/var/www/blast/app/Resources/:ro # You can theme the app - - /data1/sge/:/usr/local/sge/:ro # the sge install - #- /xxxx/blast_jobs/:/xxxx/blast_jobs/ # (for drmaa mode only) - - ./blast/banks.yml:/var/www/blast/app/config/banks.yml:ro - - ./blast/links.yml:/etc/blast_links/links.yml:ro - - blast_db: - image: postgres:9.5 - environment: - - POSTGRES_PASSWORD=postgres - - PGDATA=/var/lib/postgresql/data/ - volumes: - - ./docker_data/blast_db/:/var/lib/postgresql/data/ - - wiki: - image: quay.io/abretaud/mediawiki - environment: - MEDIAWIKI_SERVER: http://localhost - MEDIAWIKI_PROXY_PREFIX: /sp/genus_species/wiki - MEDIAWIKI_SITENAME: Genus species - MEDIAWIKI_SECRET_KEY: XXXXXXXXXX - MEDIAWIKI_DB_PASSWORD: password - MEDIAWIKI_ADMIN_USER: abretaud # ldap user - links: - - wiki_db:db - volumes: - - ./docker_data/wiki_uploads:/images - #- ../bipaa_wiki.png:/var/www/mediawiki/resources/assets/wiki.png:ro # To cange the logo at the top left - - wiki_db: - image: postgres:9.5 - volumes: - - ./docker_data/wiki_db/:/var/lib/postgresql/data/ diff --git a/deprecated/templates/nginx_organism_template.conf b/deprecated/templates/nginx_organism_template.conf deleted file mode 100644 index c2ae6c22dcc7208038389754c79486cf07977ccd..0000000000000000000000000000000000000000 --- a/deprecated/templates/nginx_organism_template.conf +++ /dev/null @@ -1,124 +0,0 @@ -# This is to restrict access to galaxy server to users in an admin ldap group -#ldap_server adminldap { -# url ldap://ldap.sb-roscoff.fr/o=sb-roscoff,c=fr?uid?sub?(&(objectClass=person)(uid=gga)); -# require valid_user; -# satisfy all; -#} - -# This is to authenticate users allowed to access the genome (required by apollo, even if the rest is public) -#ldap_server gspeciesldap { -# url ldap://ldap.sb-roscoff.fr/o=sb-roscoff,c=fr?uid?sub?(&(objectClass=person)(businessCategory=phaeoexplorer)); -# require valid_user; -# satisfy all; -#} - -# WebSocket stuff for Apollo -map $http_upgrade $connection_upgrade { - default upgrade; - '' close; -} - -server { - listen 80; - server_name ~.; - - # When ldap is configured - # auth_ldap "Restricted access"; - # auth_ldap_servers gspeciesldap; - # auth_basic "Restricted Content"; - # auth_basic_user_file /etc/nginx/conf.d/htpasswd_all_users; - proxy_set_header REMOTE_USER $remote_user; - - location / { - proxy_redirect http://tripal/tripal/ http://$http_host/tripal/; - proxy_pass http://tripal/tripal/; - } - - location /download/ { - alias /project_data/; - autoindex on; - } - - location /galaxy { - # When ldap is configured - # auth_ldap "Restricted access"; - # auth_ldap_servers adminldap; - # auth_basic "Restricted Content"; - # auth_basic_user_file /etc/nginx/conf.d/htpasswd_admin_only; - proxy_set_header REMOTE_USER $remote_user; - - client_max_body_size 50g; - proxy_redirect http://galaxy http://$host; - proxy_pass http://galaxy/; - proxy_cookie_path ~*^/([^/]*) /galaxy/$1; - } - - location /api/ { - proxy_pass http://galaxy/; - satisfy any; - allow all; - } - - rewrite ^/jbrowse$ http://localhost/sp/undaria_pinnatifida/jbrowse/ permanent; - rewrite ^/blast$ http://localhost/sp/undaria_pinnatifida/blast/ permanent; - rewrite ^/download$ http://localhost/sp/undaria_pinnatifida/download/ permanent; - rewrite ^/apollo$ http://localhost/sp/undaria_pinnatifida/apollo/ permanent; - rewrite ^/blast$ http://localhost/sp/undaria_pinnatifida/blast/ permanent; - rewrite ^/apollo_report$ localhost/sp/undaria_pinnatifida/apollo_report/ permanent; - rewrite ^/wiki$ http://localhost/sp/undaria_pinnatifida/wiki/ permanent; - - location /jbrowse { - proxy_redirect http://jbrowse http://$host; - proxy_pass http://jbrowse/; - - add_header Last-Modified $date_gmt; - } - -# location /apollo/ { -# proxy_set_header REMOTE_USER $remote_user@sb-roscoff.fr; -# -# # WebSocket stuff -# proxy_http_version 1.1; -# proxy_set_header Upgrade $http_upgrade; -# proxy_set_header Connection $connection_upgrade; -# -# if ($args ~* (.*)addStores=(.*)) { -# set_unescape_uri $addStores $2; -# set_escape_uri $addStores_fixed $addStores; -# set $args $1addStores=$addStores_fixed; -# } -# -# if ($args ~* (.*)addTracks=(.*)) { -# set_unescape_uri $addTracks $2; -# set_escape_uri $addTracks_fixed $addTracks; -# set $args $1addTracks=$addTracks_fixed; -# } -# -# proxy_redirect / http://$http_host/apollo/; -# proxy_redirect http://apollo:8080/ http://$http_host/apollo/; -# proxy_pass http://apollo:8080/; -# proxy_cookie_path ~*^/([^/]*) /apollo/$1; -# subs_filter (href|action)=('|")/(?!sp/) $1=$2/sp/ectocarpus_siliculosus_1a/apollo/ gir; -# } - - #location /blast/ { - # client_max_body_size 20m; -# -# proxy_pass http://blast/; -# proxy_redirect / http://$http_host/blast/; -# } - -# location /apollo_report/ { -# proxy_pass http://apollo-report/; -# proxy_redirect / http://$http_host/apollo_report/; -# } - -# location /wiki/ { -# proxy_set_header X-Forwarded-Host $host; -# proxy_set_header X-Forwarded-Server $host; -# proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; - -# proxy_redirect http://wiki/ http://$host/wiki/; -# proxy_pass http://wiki/; -# } -} diff --git a/deprecated/templates/proxy_template.yml b/deprecated/templates/proxy_template.yml deleted file mode 100644 index c9909bece1e7dc6c3d30a8d80a508b832e6cd1ba..0000000000000000000000000000000000000000 --- a/deprecated/templates/proxy_template.yml +++ /dev/null @@ -1,9 +0,0 @@ -version: '2' -services: - proxy: - image: quay.io/abretaud/nginx-ldap:latest # Not using the default nginx image as we need the ldap module - # ports: - # - "9178:80" # This is not used when using network_mode=host - volumes: - - ./nginx/conf:/etc/nginx/conf.d - network_mode: "host" # This is to be able to use localhost in nginx conf, you can prefer to remove it here and replace localhost by the docker host address in nginx config diff --git a/deprecated/xlsx_example_input.xlsx b/deprecated/xlsx_example_input.xlsx deleted file mode 100644 index 4d75e1ae55b74c096ce229daf452e911b426a165..0000000000000000000000000000000000000000 Binary files a/deprecated/xlsx_example_input.xlsx and /dev/null differ diff --git a/examples/authelia_config_example.yml b/examples/authelia_config_example.yml deleted file mode 100644 index c0bd77a94007278043d06646991bff9ad5525624..0000000000000000000000000000000000000000 --- a/examples/authelia_config_example.yml +++ /dev/null @@ -1,355 +0,0 @@ -############################################################### -# Authelia configuration # -############################################################### - -# The host and port to listen on -host: 0.0.0.0 -port: 9091 -# tls_key: /var/lib/authelia/ssl/key.pem -# tls_cert: /var/lib/authelia/ssl/cert.pem - -# Level of verbosity for logs: info, debug, trace -log_level: info -## File path where the logs will be written. If not set logs are written to stdout. -# log_file_path: /var/log/authelia - -# The secret used to generate JWT tokens when validating user identity by -# email confirmation. -# This secret can also be set using the env variables AUTHELIA_JWT_SECRET -jwt_secret: XXXXXXXXXXXXXXXXX - -# Default redirection URL -# -# If user tries to authenticate without any referer, Authelia -# does not know where to redirect the user to at the end of the -# authentication process. -# This parameter allows you to specify the default redirection -# URL Authelia will use in such a case. -# -# Note: this parameter is optional. If not provided, user won't -# be redirected upon successful authentication. -default_redirection_url: https://localhost/ - -# Google Analytics Tracking ID to track the usage of the portal -# using a Google Analytics dashboard. -# -## google_analytics: UA-00000-01 - -# TOTP Settings -# -# Parameters used for TOTP generation -#totp: - # The issuer name displayed in the Authenticator application of your choice - # See: https://github.com/google/google-authenticator/wiki/Key-Uri-Format for more info on issuer names - #issuer: authelia.com - # The period in seconds a one-time password is current for. Changing this will require all users to register - # their TOTP applications again. - # Warning: before changing period read the docs link below. - #period: 30 - # The skew controls number of one-time passwords either side of the current one that are valid. - # Warning: before changing skew read the docs link below. - #skew: 1 - # See: https://docs.authelia.com/configuration/one-time-password.html#period-and-skew to read the documentation. - -# Duo Push API -# -# Parameters used to contact the Duo API. Those are generated when you protect an application -# of type "Partner Auth API" in the management panel. -#duo_api: - #hostname: api-123456789.example.com - #integration_key: ABCDEF - # This secret can also be set using the env variables AUTHELIA_DUO_API_SECRET_KEY - #secret_key: 1234567890abcdefghifjkl - -# The authentication backend to use for verifying user passwords -# and retrieve information such as email address and groups -# users belong to. -# -# There are two supported backends: 'ldap' and 'file'. -authentication_backend: - # Disable both the HTML element and the API for reset password functionality - disable_reset_password: true - - # LDAP backend configuration. - # - # This backend allows Authelia to be scaled to more - # than one instance and therefore is recommended for - # production. -# ldap: - # The url to the ldap server. Scheme can be ldap:// or ldaps:// -# url: ldap://ldap-host-name - # Skip verifying the server certificate (to allow self-signed certificate). -# skip_verify: false - - # The base dn for every entries -# base_dn: dc=genouest,dc=org - - # The attribute holding the username of the user. This attribute is used to populate - # the username in the session information. It was introduced due to #561 to handle case - # insensitive search queries. - # For you information, Microsoft Active Directory usually uses 'sAMAccountName' and OpenLDAP - # usually uses 'uid' - username_attribute: uid - - # An additional dn to define the scope to all users -# additional_users_dn: ou=users - - # The users filter used in search queries to find the user profile based on input filled in login form. - # Various placeholders are available to represent the user input and back reference other options of the configuration: - # - {input} is a placeholder replaced by what the user inputs in the login form. - # - {username_attribute} is a placeholder replaced by what is configured in `username_attribute`. - # - {mail_attribute} is a placeholder replaced by what is configured in `mail_attribute`. - # - DON'T USE - {0} is an alias for {input} supported for backward compatibility but it will be deprecated in later versions, so please don't use it. - # - # Recommended settings are as follows: - # - Microsoft Active Directory: (&({username_attribute}={input})(objectCategory=person)(objectClass=user)) - # - OpenLDAP: (&({username_attribute}={input})(objectClass=person))' or '(&({username_attribute}={input})(objectClass=inetOrgPerson)) - # - # To allow sign in both with username and email, one can use a filter like - # (&(|({username_attribute}={input})({mail_attribute}={input}))(objectClass=person)) - users_filter: (&({username_attribute}={input})(objectClass=bipaaPerson)(isActive=TRUE)) - - # An additional dn to define the scope of groups -# additional_groups_dn: ou=groups - - # The groups filter used in search queries to find the groups of the user. - # - {input} is a placeholder replaced by what the user inputs in the login form. - # - {username} is a placeholder replace by the username stored in LDAP (based on `username_attribute`). - # - {dn} is a matcher replaced by the user distinguished name, aka, user DN. - # - {username_attribute} is a placeholder replaced by what is configured in `username_attribute`. - # - {mail_attribute} is a placeholder replaced by what is configured in `mail_attribute`. - # - DON'T USE - {0} is an alias for {input} supported for backward compatibility but it will be deprecated in later versions, so please don't use it. - # - DON'T USE - {1} is an alias for {username} supported for backward compatibility but it will be deprecated in later version, so please don't use it. -# groups_filter: (&(member={dn})(objectclass=bipaaGroup)) - - # The attribute holding the name of the group -# group_name_attribute: cn - - # The attribute holding the mail address of the user -# mail_attribute: mail - - # The username and password of the admin user. -# user: cn=admin,dc=genouest,dc=org - # This secret can also be set using the env variables AUTHELIA_AUTHENTICATION_BACKEND_LDAP_PASSWORD -# password: XXXXXXXXXXXXXX - - # File backend configuration. - # - # With this backend, the users database is stored in a file - # which is updated when users reset their passwords. - # Therefore, this backend is meant to be used in a dev environment - # and not in production since it prevents Authelia to be scaled to - # more than one instance. The options under password_options have sane - # defaults, and as it has security implications it is highly recommended - # you leave the default values. Before considering changing these settings - # please read the docs page below: - # https://docs.authelia.com/configuration/authentication/file.html#password-hash-algorithm-tuning - # - ## file: - ## path: ./users_database.yml - file: - path: /etc/authelia/users.yml - password_options: - algorithm: argon2id - iterations: 1 - key_length: 32 - salt_length: 16 - memory: 1024 - parallelism: 8 - - -# Access Control -# -# Access control is a list of rules defining the authorizations applied for one -# resource to users or group of users. -# -# If 'access_control' is not defined, ACL rules are disabled and the 'bypass' -# rule is applied, i.e., access is allowed to anyone. Otherwise restrictions follow -# the rules defined. -# -# Note: One can use the wildcard * to match any subdomain. -# It must stand at the beginning of the pattern. (example: *.mydomain.com) -# -# Note: You must put patterns containing wildcards between simple quotes for the YAML -# to be syntactically correct. -# -# Definition: A 'rule' is an object with the following keys: 'domain', 'subject', -# 'policy' and 'resources'. -# -# - 'domain' defines which domain or set of domains the rule applies to. -# -# - 'subject' defines the subject to apply authorizations to. This parameter is -# optional and matching any user if not provided. If provided, the parameter -# represents either a user or a group. It should be of the form 'user:<username>' -# or 'group:<groupname>'. -# -# - 'policy' is the policy to apply to resources. It must be either 'bypass', -# 'one_factor', 'two_factor' or 'deny'. -# -# - 'resources' is a list of regular expressions that matches a set of resources to -# apply the policy to. This parameter is optional and matches any resource if not -# provided. -# -# Note: the order of the rules is important. The first policy matching -# (domain, resource, subject) applies. -access_control: - # Default policy can either be 'bypass', 'one_factor', 'two_factor' or 'deny'. - # It is the policy applied to any resource if there is no policy to be applied - # to the user. - default_policy: bypass - - rules: - # The login portal is freely accessible (redirectino loop otherwise) - - domain: auth.example.org - policy: bypass - - # Apollo needs to be authenticated - - domain: localhost - resources: - - "^/apollo/.*$" - policy: one_factor - - # traefik dashboard is restricted to a group from ldap - - domain: localhost - resources: - - "^/traefik/.*$" - policy: one_factor - subject: "group:ldap_admin" - - domain: localhost - resources: - - "^/traefik/.*$" - policy: deny - - # All galaxies are restricted to a group from ldap - - domain: localhost - resources: - - "^/sp/.+/galaxy/.*$" - policy: one_factor - subject: "group:ldap_admin" - - domain: localhost - resources: - - "^/sp/.+/galaxy$" - policy: deny - - # A genome restricted to an ldap group - - domain: localhost - resources: - - "^/sp/genus_species/.*$" - policy: one_factor - subject: "group:gspecies" - - domain: localhost - resources: - - "^/sp/genus_species/.*$" - policy: deny - - -# Configuration of session cookies -# -# The session cookies identify the user once logged in. -session: - # The name of the session cookie. (default: authelia_session). - name: authelia_bipaa_session - - # The secret to encrypt the session data. This is only used with Redis. - # This secret can also be set using the env variables AUTHELIA_SESSION_SECRET - secret: WXXXXXXXXXXXXXXXXXXXcXXXXXXXXXXXXXX - - # The time in seconds before the cookie expires and session is reset. - expiration: 3600000 # 1000 hour - - # The inactivity time in seconds before the session is reset. - # abretaud: We get an Unauthorized message when reaching this threshold => disabling by making > cookie lifetime - inactivity: 3700000 # 5 minutes - - # The remember me duration. - # Value of 0 disables remember me. - # Value is in seconds, or duration notation. See: https://docs.authelia.com/configuration/index.html#duration-notation-format - # Longer periods are considered less secure because a stolen cookie will last longer giving attackers more time to spy - # or attack. Currently the default is 1M or 1 month. - remember_me_duration: 1M - - # The domain to protect. - # Note: the authenticator must also be in that domain. If empty, the cookie - # is restricted to the subdomain of the issuer. - domain: genouest.org - - # The redis connection details - redis: - host: authelia-redis - port: 6379 - # This secret can also be set using the env variables AUTHELIA_SESSION_REDIS_PASSWORD - #password: authelia - # This is the Redis DB Index https://redis.io/commands/select (sometimes referred to as database number, DB, etc). - #database_index: 0 - -# Configuration of the authentication regulation mechanism. -# -# This mechanism prevents attackers from brute forcing the first factor. -# It bans the user if too many attempts are done in a short period of -# time. -regulation: - # The number of failed login attempts before user is banned. - # Set it to 0 to disable regulation. - max_retries: 3 - - # The time range during which the user can attempt login before being banned. - # The user is banned if the authentication failed 'max_retries' times in a 'find_time' seconds window. - # Find Time accepts duration notation. See: https://docs.authelia.com/configuration/index.html#duration-notation-format - find_time: 2m - - # The length of time before a banned user can login again. - # Ban Time accepts duration notation. See: https://docs.authelia.com/configuration/index.html#duration-notation-format - ban_time: 5m - -# Configuration of the storage backend used to store data and secrets. -# -# You must use only an available configuration: local, mysql, postgres -storage: - postgres: - host: authelia-db - port: 5432 - database: postgres - username: postgres - # # This secret can also be set using the env variables AUTHELIA_STORAGE_POSTGRES_PASSWORD - password: XXXXXXXX - -# Configuration of the notification system. -# -# Notifications are sent to users when they require a password reset, a u2f -# registration or a TOTP registration. -# Use only an available configuration: filesystem, gmail -notifier: - # For testing purpose, notifications can be sent in a file - ## filesystem: - ## filename: /tmp/authelia/notification.txt - - # Use a SMTP server for sending notifications. Authelia uses PLAIN or LOGIN method to authenticate. - # [Security] By default Authelia will: - # - force all SMTP connections over TLS including unauthenticated connections - # - use the disable_require_tls boolean value to disable this requirement (only works for unauthenticated connections) - # - validate the SMTP server x509 certificate during the TLS handshake against the hosts trusted certificates - # - trusted_cert option: - # - this is a string value, that may specify the path of a PEM format cert, it is completely optional - # - if it is not set, a blank string, or an invalid path; will still trust the host machine/containers cert store - # - defaults to the host machine (or docker container's) trusted certificate chain for validation - # - use the trusted_cert string value to specify the path of a PEM format public cert to trust in addition to the hosts trusted certificates - # - use the disable_verify_cert boolean value to disable the validation (prefer the trusted_cert option as it's more secure) - smtp: - #username: test - # This secret can also be set using the env variables AUTHELIA_NOTIFIER_SMTP_PASSWORD - #password: password - #secure: false - host: smtp-server-hostname - port: 25 - disable_require_tls: true - sender: bipaa@inrae.fr - - # Sending an email using a Gmail account is as simple as the next section. - # You need to create an app password by following: https://support.google.com/accounts/answer/185833?hl=en - ## smtp: - ## username: myaccount@gmail.com - ## # This secret can also be set using the env variables AUTHELIA_NOTIFIER_SMTP_PASSWORD - ## password: yourapppassword - ## sender: admin@example.com - ## host: smtp.gmail.com - ## port: 587 diff --git a/examples/config b/examples/config deleted file mode 100644 index 527f310895e3c9ef34b13d92bf74d67ec96e7b06..0000000000000000000000000000000000000000 --- a/examples/config +++ /dev/null @@ -1,21 +0,0 @@ -# This is the configuration file used by the gga_init.py, gga_load_data.py and run_workflow.py scripts -# It contains (sensible) variables to set up different docker services - -# "all" section contains variables used by several services at once or the paths to import sensible files that cannot be procedurally generated/formatted using the scripts -all: - custom_host: localhost # The hosting machine name - custom_authelia_config_path: /path/to/your/authelia/config # The path to the authelia config yml to use, an example is available in the "examples" folder at the repo root -# "galaxy" section contains variables used to set up the galaxy service -galaxy: - custom_galaxy_default_admin_email: admin@galaxy.org # The default admin email (used to connect to the instance) - custom_galaxy_defaut_admin_user: admin # The default admin user - custom_galaxy_default_admin_password: password # The default admin password (used to connect to the instance) - custom_galaxy_config_master_api_key: master # The master API key (not in use at the moment so you can skip this) - custom_galaxy_tripal_password: galaxy_tripal_password # Same as custom_tripal_admin_password (connection to db relies on this) - custom_galaxy_web_apollo_user: admin_apollo@galaxy.org # Apollo user - custom_galaxy_web_apollo_password: galaxy_apollo_password # Apollo password tied to the user above -tripal: - custom_tripal_admin_password: galaxy_tripal_password # Same as custom_galay_tripal_password (connection to db relies on this) - custom_banner: /path/to/banner # Custom banner, TODO: defaults to a generic banner - custom_theme: "abims_gga" # Use this to use another theme - custom_theme_git_clone: "http://gitlab.sb-roscoff.fr/abims/e-infra/tripal_gga.git" # Use this to install another theme than the default one \ No newline at end of file diff --git a/examples/config_demo.yaml b/examples/config_demo.yaml deleted file mode 100644 index 3023e868a7bc7e146281621e8fee612951595e47..0000000000000000000000000000000000000000 --- a/examples/config_demo.yaml +++ /dev/null @@ -1,21 +0,0 @@ -# This is the configuration file used by the gga_init.py, gga_load_data.py and run_workflow.py scripts -# It contains (sensible) variables to set up different docker services - -# "all" section contains variables used by several services at once or the paths to import sensible files that cannot be procedurally generated/formatted using the scripts -all: - custom_host: localhost # The hosting machine name - custom_authelia_config_path: /path/to/your/authelia/config # The path to the authelia config yml to use, an example is available in the "examples" folder at the repo root -# "galaxy" section contains variables used to set up the galaxy service -galaxy: - custom_galaxy_default_admin_email: admin@galaxy.org # The default admin email (used to connect to the instance) - custom_galaxy_defaut_admin_user: admin # The default admin user - custom_galaxy_default_admin_password: password # The default admin password (used to connect to the instance) - custom_galaxy_config_master_api_key: master # The master API key (not in use at the moment so you can skip this) - custom_galaxy_tripal_password: galaxy_tripal_password # Same as custom_tripal_admin_password (connection to db relies on this) - custom_galaxy_web_apollo_user: admin_apollo@galaxy.org # OPTIONAL - Apollo user - custom_galaxy_web_apollo_password: galaxy_apollo_password # MANDATORY - Apollo password tied to the user above -tripal: - custom_tripal_admin_password: galaxy_tripal_password # MANDATORY - Same as custom_galay_tripal_password (connection to db relies on this) - custom_banner: /path/to/banner # OPTIONAL - Custom banner - custom_theme: "abims_gga" # OPTIONAL - Use this to use another theme - custom_theme_git_clone: "http://gitlab.sb-roscoff.fr/abims/e-infra/tripal_gga.git" # OPTIONAL - Use this to install another theme than the default one \ No newline at end of file diff --git a/examples/example.yml b/examples/example.yml deleted file mode 100644 index e718673e9de3e25f2412ed777d309121013b0bdd..0000000000000000000000000000000000000000 --- a/examples/example.yml +++ /dev/null @@ -1,68 +0,0 @@ -# Input file for the automated creation GGA docker stacks -# The file consists in a "list" of species for which the script will have to create these stacks/load data into galaxy/run workflows -# This file is internally turned into a list of dictionaries by the scripts - -ectocarpus_sp2_male: # Dummy value the user gives to designate the species (isn't used by the script) - description: - # Species description, leave blank if unknown or you don't want it to be used - # These parameters are used to set up the various urls and adresses in different containers - # The script requires at least the genus to be specified - genus: "ectocarpus" # Mandatory! - species: "sp2" # # Mandatory! - sex: "male" - strain: "" - common_name: "" - origin: "" - # the sex and strain, the script will look for files containing the genus, species, sex and strain of the species) - # If no file corresponding to the description is found, this path will be considered empty and the script will - # proceed to the next step (create the directory tree for the GGA docker stack) - data: - # Sequence of paths to the different datasets to copy and import into galaxy - # Paths must be absolute paths - genome_path: "/path/to/fasta" # Mandatory! - transcripts_path: "/path/to/fasta" # Mandatory! - proteins_path: "/path/to/fasta" # Mandatory! - gff_path: "/path/to/gff" # Mandatory! - interpro_path: "/path/to/interpro" - orthofinder_path: "/path/to/orthofinder" - blastp_path: "/path/to/blastx" - blastx_path: "/path/to/blastp" - # If the user has several datasets of the same 'nature' (gff, genomes, ...) to upload to galaxy, the next scalar is used by the script to differentiate - # between these different versions and name directories according to it and not overwrite the existing data - # If left empty, the genome will be considered version "1.0" - genome_version: "1.0" - # Same as genome version, but for the OGS analysis - ogs_version: "" - performed_by: "" - services: - # Describe what optional services to deploy for the stack - # By default, only tripal, tripaldb and galaxy services will be deployed - blast: "False" - wiki: "False" - apollo: "False" - -# Second example without the explanation -ectocarpus_sp2_female: - description: - genus: "ectocarpus" - species: "sp2" - sex: "female" - strain: "" - common_name: "" - origin: "" - data: - genome_path: "/path/to/fasta" - transcripts_path: "/path/to/fasta" - proteins_path: "/path/to/fasta" - gff_path: "/path/to/gff" - interpro_path: "/path/to/interpro" - orthofinder_path: "/path/to/orthofinder" - blastp_path: "/path/to/blastx" - blastx_path: "/path/to/blastp" - genome_version: "1.0" - ogs_version: "1.0" - performed_by: "" - services: - blast: "False" - wiki: "False" - apollo: "False" \ No newline at end of file diff --git a/examples/input_demo.yaml b/examples/input_demo.yaml deleted file mode 100644 index 79aaaa467ed75781049149e13819a8132d05e7b1..0000000000000000000000000000000000000000 --- a/examples/input_demo.yaml +++ /dev/null @@ -1,41 +0,0 @@ -# Input file for the automated creation GGA docker stacks -# The file consists in a "list" of species for which the script will have to create these stacks/load data into galaxy/run workflows - -ectocarpus_sp2_male: # Dummy value the user gives to designate the species (isn't used by the script) - # Species description, leave blank if unknown or you don't want it to be used - # These parameters are used to set up the various urls and adresses in different containers - # The script requires at least the genus to be specified - description: - genus: "ectocarpus" # Mandatory! - species: "sp4" - sex: "male" - strain: "" - common_name: "" - origin: "" - # Data files scalars contain paths to the source files that have to be loaded into galaxy - # WARNING: The paths must be absolute paths! - # If any path is left blank and the "parent_directory" scalar is specified, this directory and ALL its subdirectories will be - # scanned for files corresponding to the description provided for the species (i.e if the user specified - # the sex and strain, the script will look for files containing the genus, species, sex and strain of the species) - # If no file corresponding to the description is found, this path will be considered empty and the script will - # proceed to the next step (create the directory tree for the GGA docker stack) - # If a path is left blank and the "parent_directory" scalar is also blank, no file will be loaded for this "path" scalar - # If the files are not named using this nomenclature, please provide all the paths in the corresponding scalars below - data: - # "parent_directory": (optional) directory from where to search files if a "***_path" scalar is empty - # NOTE: Try to set a parent directory "close" to the data files so as not to increase runtime - # If empty (""), the script will not search for files and no dataset will be loaded for the corresponding scalar - parent_directory: "/path/to/closest/parent/dir" - # "***_path": path to the file (optional if parent_directory is set and species "description" scalars are precised) - # TODO Not implemented yet - genome_path: "/path/to/fasta" - transcripts_path: "/path/to/fasta" - proteins_path: "/path/to/fasta" - gff_path: "/path/to/gff" - # If the user has several genomes to upload to galaxy, the next scalar is used by the script to differentiate - # between these different versions and name directories according to it. - # If left empty, the genome will be considered version "1.0" - genome_version: "1.0" - # Same as genome version, but for the analysis - ogs_version: "" - performed_by: "" \ No newline at end of file diff --git a/utils/__init__.py b/utils/__init__.py deleted file mode 100755 index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..0000000000000000000000000000000000000000 diff --git a/utils/blastdb.py b/utils/blastdb.py deleted file mode 100755 index 2794ccbc3d7bee4a01a7d46138517481f2053267..0000000000000000000000000000000000000000 --- a/utils/blastdb.py +++ /dev/null @@ -1,298 +0,0 @@ -#!/usr/bin/env python - -from __future__ import print_function - -import argparse -import collections -import json -import logging as log -import os -import sys - -from shutil import copyfile -from subprocess import call - - -class BlastBank: - - def __init__(self, raw_org, data_dir_root, rel_path, fasta_file_name, db_dir_root, seq_type, path, is_multi): - self.raw_org = raw_org - self.org = prettify(raw_org) - self.data_dir_root = data_dir_root - self.rel_path = rel_path - self.fasta_file_name = fasta_file_name - self.db_dir_root = db_dir_root - self.seq_type = seq_type - self.path = path # http://bipaa.genouest.org/sp/xxx/ Can be the same as raw_org, or something else when having multiple genomes. - self.is_multi = is_multi - - self.fasta = os.path.join(data_dir_root, rel_path, fasta_file_name) - self.dest_path = os.path.splitext(os.path.join(db_dir_root, self.path, rel_path, fasta_file_name))[0] - self.title = sanitize(rel_path + '_' + os.path.splitext(self.fasta_file_name)[0]) - if self.is_multi: - fake_path = rel_path.split('/') - if len(fake_path) > 2: - fake_path = [fake_path[1]] + [fake_path[0]] + fake_path[2:] - fake_path = '/'.join(fake_path) - self.pretty_name = prettify(fake_path, True) - else: - self.pretty_name = self.org + ' ' + prettify(rel_path, False) - - with open(self.fasta, 'r') as f: - self.first_id = f.readline()[1:].rstrip() - - if self.seq_type == 'nucl': - if 'transcript' in self.fasta_file_name.lower() or 'cdna' in self.fasta_file_name.lower(): - self.pretty_name += " transcripts" - elif 'cds' in self.fasta_file_name.lower(): - self.pretty_name += " CDS" - else: - if 'protein' in self.fasta_file_name.lower() or 'pep' in self.fasta_file_name.lower() or 'proteome' in self.fasta_file_name.lower() or self.fasta_file_name.endswith('.faa'): - self.pretty_name += " proteins" - - # Just a stupid/hacky string used for sorting bank list - self.sort_key = 'a_' if 'genome' in self.title else 'b_' - self.sort_key += self.pretty_name - - def __str__(self): - return str({ - 'raw_org': self.raw_org, - 'org': self.org, - 'data_dir_root': self.data_dir_root, - 'rel_path': self.rel_path, - 'fasta_file_name': self.fasta_file_name, - 'db_dir_root': self.db_dir_root, - 'seq_type': self.seq_type, - 'path': self.path, - 'fasta': self.fasta, - 'dest_path': self.dest_path, - 'title': self.title, - 'pretty_name': self.pretty_name, - }) - - -def main(args): - - genome_path = os.path.basename(os.getcwd()) - if not args.multi_org: - genome_name = genome_path - data_dir_root = os.path.abspath(os.path.join('src_data')) - if not os.path.isdir(data_dir_root): - raise Exception("Could not find data dir: %s" % data_dir_root) - - db_dir_root = os.path.abspath(args.dest) - - ignore_list = ['func_annot', "apollo_source"] - if args.ignore: - ignore_list += args.ignore - - # Looking for files - log.info("Looking for fasta files in %s:" % data_dir_root) - banks = [] - for root, dirs, files in os.walk(data_dir_root, followlinks=True): - file_list = [os.path.realpath(os.path.join(root, filename)) for filename in files] - rel_path = root[len(data_dir_root) + 1:] - - skip_current = False - for ign in ignore_list: - if ign in rel_path: - skip_current = True - - if not skip_current: # skip useless data - for f in file_list: - f = os.path.basename(f) - if f.endswith('.fasta') or f.endswith('.fa') or f.endswith('.fna') or f.endswith('.faa'): - if args.multi_org: - genome_name = rel_path.split('/')[1] - - if 'protein' in f or 'pep.' in f or 'proteome' in f or f.endswith('.faa'): - seq_type = 'prot' - else: - seq_type = 'nucl' - new_bank = BlastBank(genome_name, data_dir_root, rel_path, f, db_dir_root, seq_type, genome_path, args.multi_org) - log.info("Found '%s' of type: %s" % (new_bank.fasta, new_bank.seq_type)) - banks.append(new_bank) - - if not banks: - log.info("No fasta file found.") - else: - for b in banks: - makeblastdb(b, args.dry_run, args.no_parse_seqids) - - nuc_list = collections.OrderedDict() - prot_list = collections.OrderedDict() - banks.sort(key=lambda x: x.sort_key) - for b in banks: - if b.seq_type == 'nucl': - if b.pretty_name not in nuc_list: - nuc_list[b.dest_path] = b.pretty_name - else: - nuc_list[b.dest_path] = "%s (%s)" % (b.pretty_name, b.fasta_file_name) - else: - if b.pretty_name not in prot_list: - prot_list[b.dest_path] = b.pretty_name - else: - prot_list[b.dest_path] = "%s (%s)" % (b.pretty_name, b.fasta_file_name) - - yml_dir = os.path.abspath('blast') - yml_file_path = os.path.abspath(os.path.join(yml_dir, 'banks.yml')) - links_file_path = os.path.abspath(os.path.join(yml_dir, 'links.yml')) - if not args.dry_run: - - log.info("List of bank names (to use in links.yml):") - write_titles(banks) - - log.info("Writing bank list in '%s'" % yml_file_path) - if not os.path.exists(yml_dir): - os.makedirs(yml_dir, mode=0o755) - yml_file = open(yml_file_path, 'w') - write_yml(yml_file, nuc_list, prot_list) - - log.info("Writing automatic links to links.yml in '%s'" % links_file_path) - if os.path.exists(links_file_path): - log.info("Making backup of previous links.yml to '%s'" % (links_file_path + '.back')) - copyfile(links_file_path, links_file_path + '.back') - links_yml_file = open(links_file_path, 'w') - write_links_yml(links_yml_file, banks, args.apollo) - - else: - log.info("List of bank names (to use in links.yml):") - write_titles(banks) - log.info("Would write bank list in '%s'" % yml_file_path) - write_yml(sys.stdout, nuc_list, prot_list) - log.info("Would write links.yml in '%s'" % links_file_path) - write_links_yml(sys.stdout, banks, args.apollo) - - -def write_yml(yml_file, nuc_list, prot_list): - - nuc = "~" - prot = "~" - - if nuc_list: - nuc = "\n ".join(['%s: %s' % (json.dumps(k), json.dumps(v)) for k, v in nuc_list.items()]) - if prot_list: - prot = "\n ".join(['%s: %s' % (json.dumps(k), json.dumps(v)) for k, v in prot_list.items()]) - - print("genouest_blast:", file=yml_file) - print(" db_provider:", file=yml_file) - print(" list:", file=yml_file) - print(" nucleic:", file=yml_file) - print(" %s" % nuc, file=yml_file) - print(" proteic:", file=yml_file) - print(" %s" % prot, file=yml_file) - - -def write_links_yml(yml_file, banks, apollo): - - for bank in banks: - print("", file=yml_file) - print("# %s" % (bank.pretty_name), file=yml_file) - - link = '' - if bank.seq_type == 'prot': - spl = bank.org.split() - if len(spl) > 2: - sp_str = '/'.join(spl[:2]) - sp_str += '-' + '-'.join(spl[2:]) - else: - sp_str = '/'.join(spl) - link = 'http://abims-gga.sb-roscoff.fr/sp/%s/feature/%s/polypeptide/{id}' % (bank.path, sp_str) - elif 'genome' in bank.title: - dataset_id = bank.org.lower() - spl = dataset_id.split() - if len(spl) == 2: # Genus species => gspecies - dataset_id = spl[0][:1] + spl[1] - elif len(spl) == 3: # Genus species strain1 => gsstrain1 - dataset_id = spl[0][:1] + spl[1][:1] + spl[2] - else: # Genus species some garbage => genus_species_some_garbage - dataset_id = dataset_id.replace(' ', '_') - if apollo: - link = '<a href="http://abims-gga.sb-roscoff.fr/sp/' + bank.path + '/jbrowse/?data=data%2F' + dataset_id + '&loc={id}{jbrowse_track}">{id}</a> <a href="http://abims-gga.sb-roscoff.fr/sp/' + bank.path + '/apollo/annotator/loadLink?loc={id}:1{apollo_track}">Apollo</a>' - else: - link = '<a href="http://abims-gga.sb-roscoff.fr/sp/' + bank.path + '/jbrowse/?data=data%2F' + dataset_id + '&loc={id}{jbrowse_track}">{id}</a>' - else: - spl = bank.org.split() - if len(spl) > 2: - sp_str = '/'.join(spl[:2]) - sp_str += '-' + '-'.join(spl[2:]) - else: - sp_str = '/'.join(spl) - link = 'http://abims-gga.sb-roscoff.fr/sp/%s/feature/%s/mRNA/{id}' % (bank.path, sp_str) - - if link: - print("%s:" % (bank.title), file=yml_file) - print(" db: '%s'" % (bank.title), file=yml_file) - print(" '*': '%s'" % (link), file=yml_file) - else: - print("# Skipped", file=yml_file) - - -def write_titles(banks): - - for bank in banks: - print("'%s' -> '%s' [%s]" % (bank.pretty_name, bank.title, bank.first_id)) - - -def makeblastdb(bank, dry_run, no_parse_seqids): - log.info("Formatting bank: %s ---> %s" % (bank.fasta, bank.dest_path)) - dest_dir = os.path.realpath(os.path.join(bank.dest_path, '..')) - if not os.path.exists(dest_dir): - log.info("Creating folder: %s" % dest_dir) - if not dry_run: - os.makedirs(dest_dir, mode=0o755) - parse = "-parse_seqids" - if no_parse_seqids: - parse = "" - cmd = "makeblastdb -in '%s' -dbtype '%s' %s -out '%s' -title '%s'" % (bank.fasta, bank.seq_type, parse, bank.dest_path, bank.title) - log.info("Running: %s" % cmd) - if not dry_run: - try: - retcode = call(cmd, shell=True) - if retcode != 0: - raise RuntimeError("Child was terminated by signal " + str(retcode)) - except OSError as e: - print("Execution failed:" + e, file=sys.stderr) - sys.exit(1) - - -def prettify(name, capital=True): - name = name.replace('_', ' ') - name = name.replace('/', ' ') - if capital: - name = name[0].upper() + name[1:] - - return name - - -def sanitize(name): - name = name.lower() - name = name.replace(' ', '_') - name = name.replace('/', '_') - - return name - - -if __name__ == '__main__': - parser = argparse.ArgumentParser( - description='Generate blast databanks and update blast forms.' - ) - parser.add_argument("-v", "--verbose", help="Increase output verbosity.", - action="store_true") - parser.add_argument("-d", "--dry-run", help="Dry run: no modification will be done, for testing purpose.", - action="store_true") - parser.add_argument("-m", "--multi-org", help="Add this flag if there are multiple organisms in src_data.", - action="store_true") - parser.add_argument("-a", "--apollo", help="Add this flag to generate links to apollo.", - action="store_true") - parser.add_argument("-p", "--no-parse-seqids", help="Don't use the makeblastdb -parse_seqids option (use this in case you have strange looking sequence ids that make html files unreadable)", - action="store_true") - parser.add_argument("--ignore", help='Files or directories to ignore', nargs='*') - parser.add_argument("dest", help="Destination directory (not including the genome name, should be mounted on compute nodes)") - - args = parser.parse_args() - log.basicConfig(level=log.INFO) - if args.verbose: - log.basicConfig(level=log.DEBUG) - - main(args) diff --git a/utils/common-stringSubsitute.py b/utils/common-stringSubsitute.py deleted file mode 100755 index c4d22a9fe017a03feb3b276047924353fd864406..0000000000000000000000000000000000000000 --- a/utils/common-stringSubsitute.py +++ /dev/null @@ -1,37 +0,0 @@ -#!/usr/bin/env python3 -# -*- coding: utf-8 -*- -import argparse -import os -import re -import sys - -# Return the file obtained by replacing the occurrences of pattern by the replacement string. -#Â Use of python method re.sub() -# python common-stringSubsitute.py -f file -p pattern -r replacement_string -# ex : python common-stringSubsitute.py -f file -p '(tRNA)(\w{3})(\w{3})' -r '\g<1>-\g<2>(\g<3>)' - -if __name__ == '__main__': - - #Get arguments - parser = argparse.ArgumentParser(description="Return the file obtained by replacing the occurrences of pattern by the replacement string. Use of python method re.sub(). Example: python common-stringSubsitute.py -f file -p '(tRNA)(\w{3})(\w{3})' -r '\g<1>-\g<2>(\g<3>)'") - parser.add_argument('-i','--infile', help='Input file', required=True) - parser.add_argument('-o','--outfile', help='Output file', default='outfile') - parser.add_argument('-p','--pattern', help='Pattern string to be replaced', required=True) - parser.add_argument('-r','--repl', help='Replacement string', required=True) - args = parser.parse_args() - - infilename=args.infile - outfilename=args.outfile - pattern=args.pattern - repl=args.repl - - infile=open(infilename,'r') - outfile=open(outfilename,'w') - - lines=infile.readlines() - - for line in lines : - line_out=re.sub(pattern,repl,line) - outfile.write(line_out) - - outfile.close() \ No newline at end of file diff --git a/utils/docker_compose_generator.py b/utils/docker_compose_generator.py deleted file mode 100755 index 30747b148352f5cb668f07c0fc59d82885e879bd..0000000000000000000000000000000000000000 --- a/utils/docker_compose_generator.py +++ /dev/null @@ -1,73 +0,0 @@ -#!/usr/bin/env python3 -# -*- coding: utf-8 -*- - -import os -import argparse -import logging -# import yaml -# import ruamel.yaml -# import json - -""" -docker-compose_generator.py - -This method will write a formatted docker-compose.yml for the specified organism (only requires genus and species) -""" - - -class DockerComposeGenerator: - - def __init__(self): - self.mode = None - self.genus = None - self.species = None - self.organism_template = None - self.traefik_template = None - self.outdir = None - - def generate(self): - if self.organism_template is None: - self.organism_template = str(os.getcwd() + "/templates/gspecies_compose_template.yml") - else: - with open(self.organism_template, 'r') as infile: - organism_content = list() - for line in infile: - # Replace placeholders by the genus and species - organism_content.append(line.replace("genus_species", str(self.genus.lower() + "_" + self.species)).replace("Genus species", str(self.genus + " " + self.species)).replace("Genus/species", str(self.genus + "/" + self.species)).replace("gspecies", str(self.genus.lower()[0] + self.species))) - self.write_yml(content=organism_content) - - if self.traefik_template is None: - self.traefik_template = str(os.getcwd() + "/templates/gspecies_compose_template.yml") - else: - with open(self.traefik_template, 'r') as infile: - traefik_content = list() - for line in infile: - # Replace placeholders by the genus and species - traefik_content.append(line.replace("genus_species", str(self.genus.lower() + "_" + self.species)).replace("Genus species", str(self.genus + " " + self.species)).replace("Genus/species", str(self.genus + "/" + self.species)).replace("gspecies", str(self.genus.lower()[0] + self.species))) - self.write_yml(content=traefik_content) - - def write_yml(self, content): - with open(self.outdir + "/docker-compose.yml", 'w') as outfile: - for line in content: - outfile.write(line) - - -if __name__ == "__main__": - parser = argparse.ArgumentParser(description="Generator of docker-compose.yml for GGA automated integration " - "following the templates available @ " - "https://gitlab.inria.fr/abretaud/genodock_demo/") - parser.add_argument("-g", "--genus", type=str, help="input genus") - parser.add_argument("-s", "--species", type=str, help="input species") - parser.add_argument("-o", "--organism-template", type=str, help="input organism template docker-compose.yml (compose or stack), optional") - parser.add_argument("-t", "--traefik-template", type=str, help="input organism template docker-compose.yml (compose or stack), optional") - parser.add_argument("-m", "--main-dir", type=str, help="where to write the output traefik docker-compose.yml (defaults to cd, autoload places it in main directory)") - parser.add_argument("-d", "--organism-dir", type=str, help="where to write the output organism docker-compose.yml (defaults to cd, autoload places it in organism directory)") - args = parser.parse_args() - - dc_generator = DockerComposeGenerator() - dc_generator.genus = args.genus - dc_generator.species = args.species - if args.template: - dc_generator.template = args.template - dc_generator.outdir = args.outdir - dc_generator.generate() diff --git a/utils/metadata_generator.py b/utils/metadata_generator.py deleted file mode 100755 index c03ff0ca8ea2834dfc2ef9989263c5ca7a9cd308..0000000000000000000000000000000000000000 --- a/utils/metadata_generator.py +++ /dev/null @@ -1,36 +0,0 @@ -import os -import logging -import yaml - -""" -Metadata generator for gga_auto_load - -Creates a file that summarizes actions taken by the autoload script (e.g what was done in the dedicated galaxy instance) -This organism metadata file is located in the metadata directory of the organism directory (i.e /genus_species/metadata) -By default, will also create/update a general metadata file (located in the parent directory i.e where all the organisms -directories are located) - -TODO: move inside autoload - -Metadata format: .yml -""" - - -class MetadataGenerator: - - def __init__(self, maindir): - self.maindir = maindir - self.genus = None - self.species = None - self.metadata = None - self.do_update = False - self.date = "01/01/2020" - - - # def read_metadata(self): - # for label, content in metadata.items(): - # print("FOO") - - def write_metadata(self): - with open(self.maindir + "/main_metadata.yml", "a") as metadata: - metadata.write("\n\nAdded " + self.genus + " " + self.species + "") \ No newline at end of file diff --git a/utils/phaeoexplorer-change_pep_fasta_header.sh b/utils/phaeoexplorer-change_pep_fasta_header.sh deleted file mode 100755 index 3cf614f745bfaef03725038f7bb9fac84a00011b..0000000000000000000000000000000000000000 --- a/utils/phaeoexplorer-change_pep_fasta_header.sh +++ /dev/null @@ -1,17 +0,0 @@ -#!/usr/bin/env bash - -INFILE=$1 -OUTFILE=tmpfile - -FILE_HEADER_START=$(grep ">" $INFILE | cut -c 1-6 | sort | uniq) -HEADER_START_STRING=">mRNA." - -if [[ "$FILE_HEADER_START" == "$HEADER_START_STRING" ]] -then - /usr/local/genome2/mmo/scripts/common/common-stringSubstitute.py -i $INFILE -o $OUTFILE -p '^>mRNA' -r '>protein' - mv $OUTFILE $INFILE - echo "'>mRNA' replaced by '>protein'" -else - echo "Abort. Not all headers start with '>mRNA.':" - echo "$FILE_HEADER_START" -fi \ No newline at end of file diff --git a/utils/phaeoexplorer-change_transcript_fasta_header.sh b/utils/phaeoexplorer-change_transcript_fasta_header.sh deleted file mode 100755 index 99bd3ff2443e46a07ac3a960c0596872bcb37745..0000000000000000000000000000000000000000 --- a/utils/phaeoexplorer-change_transcript_fasta_header.sh +++ /dev/null @@ -1,4 +0,0 @@ -#!/usr/bin/env bash -INFILE=$1 -OUTFILE=tmpfile -./common-stringSubsitute.py -i $INFILE -o $OUTFILE -p '^>\d+ mRNA' -r '>mRNA' || mv $OUTFILE $INFILE || echo "'>[0-9]+ mRNA' replaced by '>mRNA' in $1" \ No newline at end of file diff --git a/utils/phaeoexplorer-change_transcript_fasta_header.sh.bak b/utils/phaeoexplorer-change_transcript_fasta_header.sh.bak deleted file mode 100755 index 12ce4e56544070af8daddcb3f981b7e0dc81f3fd..0000000000000000000000000000000000000000 --- a/utils/phaeoexplorer-change_transcript_fasta_header.sh.bak +++ /dev/null @@ -1,7 +0,0 @@ -#!/usr/bin/env bash - -INFILE=$1 -OUTFILE=tmpfile -/home/fr2424/sib/alebars/gga_load_data/ext_scripts/common-stringSubsitute.py -i $INFILE -o $OUTFILE -p '^>\d+ mRNA' -r '>mRNA' -mv $OUTFILE $INFILE -echo "'>[0-9]+ mRNA' replaced by '>mRNA' in $1" diff --git a/utils/phaeoexplorer_ec32_orthologs_transfer.py b/utils/phaeoexplorer_ec32_orthologs_transfer.py deleted file mode 100644 index 20e6750c64fb27bfdfc1a27bcc3cd2985d4d1888..0000000000000000000000000000000000000000 --- a/utils/phaeoexplorer_ec32_orthologs_transfer.py +++ /dev/null @@ -1,96 +0,0 @@ -#!/usr/bin/python -# -*- coding: utf-8 -*- - -import sys -import os -import argparse -import pickle -import logging - -def orthologs_transfer(manual_annotation_file, orthofinder_file, gff_file, outdir): - """ - Transfer of description between a manual annotation file and a gff file, using an orthofinder output file - to find the corresponding IDs between the two files - - :param manual_annotation_file: - :param orthofinder_file: - :param gff_file: - :param outdir: - :return: - """ - - manual_annotations_dict, orthofinder_dict, gff_dict = {}, {}, {} - species_filename = "" - - with open(orthofinder_file, 'r') as orthofinder: - next(orthofinder) # Skip header - # Mapping orthofinder's IDs to manual annotation's description - logging.info("Mapping Orthofinder's IDs to manual annotation's IDs...") - for orthofiner_line in orthofinder: - orthofiner_line_split = orthofiner_line.rstrip().split("\t") - orthofinder_dict[orthofiner_line_split[1].split(" ")[0]] = orthofiner_line_split[2].split(" ")[0] - - - with open(manual_annotation_file, 'r') as manual_annotation: - # Mapping orthofinder's IDs to manual annotation's description - logging.info("Mapping Orthofinder's IDs to descriptions...") - next(manual_annotation) # Skip header - for manual_annotation_line in manual_annotation: - manual_annotation_line_split = manual_annotation_line.rstrip().split("\t") - manual_annotations_dict[manual_annotation_line_split[0]] = ";".join([manual_annotation_line_split[0], manual_annotation_line_split[6]]) - - # Opening GFF, appending manual annotation's description to matching IDs - logging.info("Transferring manual descriptions to the GFF file...") - output_filename = str(os.path.join(os.path.abspath(outdir), os.path.basename(gff_file).split(".")[0]) + "_TRANSFERED.gff") - - with open(output_filename, 'a') as output_file: - output_file.truncate(0) # Erase previous file content - with open(gff_file, 'r') as gff: - for gff_line in gff: - if "ID=mRNA" in gff_line: # Look for mRNA items - gff_line_id = gff_line.split("\t")[8].split("=")[1].split(";")[0] # GFF file ID (matches manual annotation IDs) - if gff_line_id in orthofinder_dict.keys(): # The gff ID is supposed to match a unique ID (key) in the orthofinder file - try: - manual_annotation_value = orthofinder_dict[gff_line_id] # Find the corresponding manual annotation to transfer - gff_line = "{0};ec32_ortholog={1};ec32_ortholog_description={2}\n".format(gff_line.strip(), orthofinder_dict[gff_line_id], manual_annotation_value) - except KeyError: # Just in case some values are missing in the manual annotation file (optional, can be removed, it will then exit instead) - continue - output_file.write(gff_line) - logging.info("Finished transferring descriptions for %s" % str(os.path.basename(gff_file))) - - -if __name__ == "__main__": - - parser = argparse.ArgumentParser(description="Orthologs_transfer between a reference manual annotation and Orthofinder outputs. \ - \n\nRun example: python3 ec32_orthologs_transfer.py -d /path/to/desired/dir/ -o orthofinder_file.tsv \ - -g gff_file.gff -m manual_annotation_file.txt") - parser.add_argument("-o", "--orthofinder", - help="Orthofinder output (tabulated)") - parser.add_argument("-m", "--manual-annotation", - help="Manual annotation file (tabulated) or pickle file containing the dicionary (automatically generated when you input a new manual annotation file)") - parser.add_argument("-g", "--gff", - help="GFF input file (tabulated)") - parser.add_argument("-d", "--out-dir", - help="Output directory") - args = parser.parse_args() - - # WARNING for shell use case: - # The script doesn't check if the inout files are valid. - # Files must be put in this order: $1=orthofinder file, $2=manual annotation file, $3=GFF file, $4=optional output directory) - - if not args.manual_annotation: - logging.info("Please input a manual annotation file or a pickled manual annotation dictionary") - sys.exit() - if not args.orthofinder: - logging.info("Please input an orthofinder output file") - sys.exit() - if not args.gff: - logging.info("Please input a gff file") - sys.exit() - if not args.out_dir: - args.out = "." - - orthologs_transfer(manual_annotation_file=args.manual_annotation, - orthofinder_file=args.orthofinder, - gff_file=args.gff, - outdir=args.out_dir) diff --git a/utils/phaeoexplorer_hectar_transfer.py b/utils/phaeoexplorer_hectar_transfer.py deleted file mode 100644 index ec9a62d90cf602420ceb1e0610229d7d97bf859f..0000000000000000000000000000000000000000 --- a/utils/phaeoexplorer_hectar_transfer.py +++ /dev/null @@ -1,95 +0,0 @@ -#!/usr/bin/python -# -*- coding: utf-8 -*- - -import sys -import os -import argparse -import logging - -def hectar_transfer(hectar_file, orthofinder_file, gff_file, outdir): - """ - Transfer of description between a manual annotation file and a gff file, using an orthofinder output file - to find the corresponding IDs between the two files - - :param hectar_file: - :param orthofinder_file: - :param gff_file: - :param outdir: - :return: - """ - - manual_annotations_dict, orthofinder_dict, gff_dict = {}, {}, {} - species_filename = "" - - with open(orthofinder_file, 'r') as orthofinder: - next(orthofinder) # Skip header - # Mapping orthofinder's IDs to hectar IDs - logging.info("Mapping Orthofinder's IDs to hectar IDs...") - for orthofiner_line in orthofinder: - orthofiner_line_split = orthofiner_line.rstrip().split("\t") - orthofinder_dict[orthofiner_line_split[1].split(" ")[0]] = orthofiner_line_split[2].split(" ")[0] - - hectar_dict = {} - with open(hectar_file, 'r') as hectar: - # Mapping orthofinder's IDs to hectar description - logging.info("Mapping Orthofinder's IDs to descriptions...") - next(hectar) # Skip header - for hectar_line in hectar: - hectar_line_split = hectar_line.rstrip().split("\t") - hectar_dict[hectar_line_split[0].split(" ")[0]] = hectar_line_split[1] - - # Opening GFF, appending hectar description to matching IDs - logging.info("Transferring manual descriptions to the GFF file...") - output_filename = str(os.path.join(os.path.abspath(outdir), os.path.basename(gff_file).split(".")[0]) + "_TRANSFERED_HECTAR.gff") - - with open(output_filename, 'a') as output_file: - output_file.truncate(0) # Erase previous file content - with open(gff_file, 'r') as gff: - for gff_line in gff: - if "ID=mRNA" in gff_line: # Look for mRNA items - gff_line_id = gff_line.split("\t")[8].split("=")[1].split(";")[0] # GFF file ID (matches hectar IDs) - if gff_line_id in hectar_dict.keys(): # The gff ID is supposed to match a unique ID (key) in the orthofinder file - try: - hectar_value = hectar_dict[gff_line_id] # Find the corresponding manual annotation to transfer - gff_line = "{0};HECTAR_predicted_targeting_category={1}\n".format(gff_line.strip(), hectar_value) - except KeyError: # Just in case some values are missing in the hectar file (If removed it will then exit instead) - continue - output_file.write(gff_line) - logging.info("Finished transferring descriptions for %s" % str(os.path.basename(gff_file))) - - -if __name__ == "__main__": - - parser = argparse.ArgumentParser(description="Orthologs_transfer between a reference manual annotation and Orthofinder outputs. \ - \n\nRun example: python3 ec32_orthologs_transfer.py -d /path/to/desired/dir/ -h hectar_file.tsv \ - -g gff_file.gff -m manual_annotation_file.txt") - parser.add_argument("-i", "--hectar", - help="Hectar output (tabulated)") - parser.add_argument("-o", "--orthofinder", - help="Manual annotation file (tabulated) or pickle file containing the dicionary (automatically generated when you input a new manual annotation file)") - parser.add_argument("-g", "--gff", - help="GFF input file (tabulated)") - parser.add_argument("-d", "--out-dir", - help="Output directory") - args = parser.parse_args() - - # WARNING for shell use case: - # The script doesn't check if the inout files are valid. - # Files must be put in this order: $1=orthofinder file, $2=manual annotation file, $3=GFF file, $4=optional output directory) - - if not args.hectar: - logging.info("Please input a hectar file") - sys.exit() - if not args.orthofinder: - logging.info("Please input a hectar output file") - sys.exit() - if not args.gff: - logging.info("Please input a gff file") - sys.exit() - if not args.out_dir: - args.out = "." - - hectar_transfer(hectar_file=args.hectar, - orthofinder_file=args.orthofinder, - gff_file=args.gff, - outdir=args.out_dir) diff --git a/workflows/Jbrowse.ga.bak b/workflows/Jbrowse.ga.bak deleted file mode 100644 index 091a955b5a94b34dd8f5a881182aa23c96319f30..0000000000000000000000000000000000000000 --- a/workflows/Jbrowse.ga.bak +++ /dev/null @@ -1,162 +0,0 @@ -{ - "a_galaxy_workflow": "true", - "annotation": "", - "format-version": "0.1", - "name": "Jbrowse", - "steps": { - "0": { - "annotation": "", - "content_id": null, - "errors": null, - "id": 0, - "input_connections": {}, - "inputs": [], - "label": null, - "name": "Input dataset", - "outputs": [], - "position": { - "left": 200, - "top": 200 - }, - "tool_id": null, - "tool_state": "{\"optional\": false}", - "tool_version": null, - "type": "data_input", - "uuid": "751caac1-d015-4d77-8a68-2c3debae0caf", - "workflow_outputs": [ - { - "label": null, - "output_name": "output", - "uuid": "ac834ebd-236e-4539-86da-916da1ac8c5a" - } - ] - }, - "1": { - "annotation": "", - "content_id": null, - "errors": null, - "id": 1, - "input_connections": {}, - "inputs": [], - "label": null, - "name": "Input dataset", - "outputs": [], - "position": { - "left": 200, - "top": 290 - }, - "tool_id": null, - "tool_state": "{\"optional\": false}", - "tool_version": null, - "type": "data_input", - "uuid": "5cb81c38-64fe-4bdc-9043-bd862bdefc6d", - "workflow_outputs": [ - { - "label": null, - "output_name": "output", - "uuid": "a9c468b7-935b-49b3-83d0-6eabafae8daf" - } - ] - }, - "2": { - "annotation": "", - "content_id": "toolshed.g2.bx.psu.edu/repos/iuc/jbrowse/jbrowse/1.16.10+galaxy0", - "errors": null, - "id": 2, - "input_connections": { - "reference_genome|genome": { - "id": 0, - "output_name": "output" - }, - "track_groups_0|data_tracks_0|data_format|annotation": { - "id": 1, - "output_name": "output" - } - }, - "inputs": [ - { - "description": "runtime parameter for tool JBrowse", - "name": "reference_genome" - } - ], - "label": null, - "name": "JBrowse", - "outputs": [ - { - "name": "output", - "type": "html" - } - ], - "position": { - "left": 486, - "top": 200 - }, - "post_job_actions": {}, - "tool_id": "toolshed.g2.bx.psu.edu/repos/iuc/jbrowse/jbrowse/1.16.10+galaxy0", - "tool_shed_repository": { - "changeset_revision": "8774b28235bb", - "name": "jbrowse", - "owner": "iuc", - "tool_shed": "toolshed.g2.bx.psu.edu" - }, - "tool_state": "{\"action\": {\"action_select\": \"create\", \"__current_case__\": 0}, \"gencode\": \"1\", \"jbgen\": {\"defaultLocation\": \"test\", \"trackPadding\": \"20\", \"shareLink\": \"true\", \"aboutDescription\": \"test\", \"show_tracklist\": \"true\", \"show_nav\": \"true\", \"show_overview\": \"true\", \"show_menu\": \"true\", \"hideGenomeOptions\": \"false\"}, \"plugins\": {\"BlastView\": \"true\", \"ComboTrackSelector\": \"false\", \"GCContent\": \"false\"}, \"reference_genome\": {\"genome_type_select\": \"history\", \"__current_case__\": 1, \"genome\": {\"__class__\": \"RuntimeValue\"}}, \"standalone\": \"minimal\", \"track_groups\": [{\"__index__\": 0, \"category\": \"Annotation\", \"data_tracks\": [{\"__index__\": 0, \"data_format\": {\"data_format_select\": \"gene_calls\", \"__current_case__\": 2, \"annotation\": {\"__class__\": \"RuntimeValue\"}, \"match_part\": {\"match_part_select\": \"false\", \"__current_case__\": 1}, \"index\": \"false\", \"track_config\": {\"track_class\": \"NeatHTMLFeatures/View/Track/NeatFeatures\", \"__current_case__\": 3, \"html_options\": {\"topLevelFeatures\": \"mRNA\"}}, \"jbstyle\": {\"style_classname\": \"transcript\", \"style_label\": \"product,name,id\", \"style_description\": \"note,description\", \"style_height\": \"10px\", \"max_height\": \"600\"}, \"jbcolor_scale\": {\"color_score\": {\"color_score_select\": \"none\", \"__current_case__\": 0, \"color\": {\"color_select\": \"automatic\", \"__current_case__\": 0}}}, \"jb_custom_config\": {\"option\": []}, \"jbmenu\": {\"track_menu\": [{\"__index__\": 0, \"menu_action\": \"iframeDialog\", \"menu_label\": \"View transcript report\", \"menu_title\": \"Transcript {id}\", \"menu_url\": \"{{ MENU_URL }}\", \"menu_icon\": \"dijitIconBookmark\"}]}, \"track_visibility\": \"default_off\", \"override_apollo_plugins\": \"False\", \"override_apollo_drag\": \"False\"}}]}], \"uglyTestingHack\": \"\", \"__page__\": null, \"__rerun_remap_job_id__\": null}", - "tool_version": "1.16.10+galaxy0", - "type": "tool", - "uuid": "ba7d15fd-8ffd-407d-9a45-47cd4be68bd2", - "workflow_outputs": [ - { - "label": null, - "output_name": "output", - "uuid": "519355d7-82cc-47f0-a96c-3ee0e39aa7df" - } - ] - }, - "3": { - "annotation": "", - "content_id": "toolshed.g2.bx.psu.edu/repos/gga/jbrowse_to_container/jbrowse_to_container/0.5.1", - "errors": null, - "id": 3, - "input_connections": { - "organisms_0|jbrowse": { - "id": 2, - "output_name": "output" - } - }, - "inputs": [], - "label": null, - "name": "Add organisms to JBrowse container", - "outputs": [ - { - "name": "output", - "type": "html" - } - ], - "position": { - "left": 772, - "top": 200 - }, - "post_job_actions": {}, - "tool_id": "toolshed.g2.bx.psu.edu/repos/gga/jbrowse_to_container/jbrowse_to_container/0.5.1", - "tool_shed_repository": { - "changeset_revision": "11033bdad2ca", - "name": "jbrowse_to_container", - "owner": "gga", - "tool_shed": "toolshed.g2.bx.psu.edu" - }, - "tool_state": "{\"organisms\": [{\"__index__\": 0, \"jbrowse\": {\"__class__\": \"ConnectedValue\"}, \"name\": {\"__class__\": \"RuntimeValue\"}, \"advanced\": {\"unique_id\": {\"__class__\": \"RuntimeValue\"}}}], \"__page__\": null, \"__rerun_remap_job_id__\": null}", - "tool_version": "0.5.1", - "type": "tool", - "uuid": "1cf25ca3-2287-4b82-9e93-b8828eed70a2", - "workflow_outputs": [ - { - "label": null, - "output_name": "output", - "uuid": "f78c7496-18a9-4c47-ad7f-a3ac31456749" - } - ] - } - }, - "tags": [], - "uuid": "77f04b69-2dec-430b-891f-f4ddbf04d1db", - "version": 1 -} \ No newline at end of file