diff --git a/autoload.py b/autoload.py index 031fa04bde974586faa207a87a67065ce16d9265..c26495b92f655999e35c657c3ed60a7988c0d549 100644 --- a/autoload.py +++ b/autoload.py @@ -41,9 +41,8 @@ STEPS: class Autoload: """ - The Autoload class has attributs to interact with GGA as attributes + Autoload class contains attributes and functions to interact with GGA - TODO: store default main workflow as string? """ def __init__(self, species_parameters_dictionary, args): @@ -63,7 +62,7 @@ class Autoload: self.full_name = " ".join([self.genus_lowercase, self.species, self.strain, self.sex]) self.abbreviation = " ".join([self.genus_lowercase[0], self.species, self.strain, self.sex]) self.genus_species = self.genus_lowercase + "_" + self.species - self.instance_url = "http://localhost/sp/" + self.genus_lowercase + "_" + self.species + "/galaxy/" # testing + self.instance_url = "http://localhost/sp/" + self.genus_lowercase + "_" + self.species + "/galaxy/" # testing! self.instance = None self.history_id = None self.library_id = None @@ -116,7 +115,6 @@ class Autoload: """ src_data_dir = os.path.join(self.species_dir, "/src_data") - sp_regex = "(?=\w*V)(?=\w*A)(?=\w*R)(?=\w*I)(?=\w*A)(?=\w*B)(?=\w*L)(?=\w*E)\w+" # TODO: improve regex for dirpath, dirnames, files in os.walk(self.source_data_dir): for name in files: @@ -133,7 +131,7 @@ class Autoload: re_dict["genome"] = None for char in organism_name_pattern: - + pass def generate_dir_tree(self): """ @@ -173,6 +171,7 @@ class Autoload: except PermissionError: logging.info("insufficient permission to create src_data directory tree") + def modify_fasta_headers(self): """ """ @@ -206,7 +205,7 @@ class Autoload: # Changing headers in the *proteins.fasta file from >mRNA* to >protein* # production version - modify_pep_headers = [str(self.main_dir) + "/gga_load_data/phaeoexplorer-change_pep_fasta_header.sh", + modify_pep_headers = [str(self.main_dir) + "/gga_load_data/ext_scripts/phaeoexplorer-change_pep_fasta_header.sh", self.source_files["proteins_file"]] # test version # modify_pep_headers = ["/home/alebars/gga/phaeoexplorer-change_pep_fasta_header.sh", @@ -214,7 +213,7 @@ class Autoload: logging.info("changing fasta headers in " + self.source_files["proteins_file"]) subprocess.run(modify_pep_headers, stdout=subprocess.PIPE, cwd=annotation_dir) # production version - modify_pep_headers = [str(self.main_dir) + "/gga_load_data/phaeoexplorer-change_transcript_fasta_header.sh", + modify_pep_headers = [str(self.main_dir) + "/gga_load_data/ext_scripts/phaeoexplorer-change_transcript_fasta_header.sh", self.source_files["proteins_file"]] # test version # modify_pep_headers = ["/home/alebars/gga/phaeoexplorer-change_transcript_fasta_header.sh", @@ -351,6 +350,7 @@ class Autoload: custom_ga_file_path = os.path.abspath(custom_ga_file) with open(workflow_ga_file, 'r') as ga_in_file: workflow = str(ga_in_file.readlines()) + # ugly fix for the jbrowse parameters workflow = workflow.replace('{\\\\\\\\\\\\"unique_id\\\\\\\\\\\\": \\\\\\\\\\\\"UNIQUE_ID\\\\\\\\\\\\"}', str('{\\\\\\\\\\\\"unique_id\\\\\\\\\\\\": \\\\\\\\\\\\"' + self.genus + " " + self.species) + '\\\\\\\\\\\\"') workflow = workflow.replace('\\\\\\\\\\\\"name\\\\\\\\\\\\": \\\\\\\\\\\\"NAME\\\\\\\\\\\\"', @@ -365,11 +365,11 @@ class Autoload: workflow = workflow[2:-2] # if the line under doesn't output a correct json # workflow = workflow[:-2] # if the line above doesn't output a correct json - workflow_dict = json.loads(workflow) # doesn't work with eval() + workflow_dict = json.loads(workflow) self.instance.workflows.import_workflow_dict(workflow_dict=workflow_dict) self.workflow_name = workflow_name - workflow_attributes = self.instance.workflows.get_workflows(name=workflow_name) + workflow_attributes = self.instance.workflows.get_workflows(name=self.workflow_name) workflow_id = workflow_attributes[0]["id"] show_workflow = self.instance.workflows.show_workflow(workflow_id=workflow_id) logging.debug("workflow ID: " + workflow_id) @@ -397,10 +397,11 @@ class Autoload: histories = self.instance.histories.get_histories(name=str(self.full_name)) self.history_id = histories[0]["id"] logging.debug("history ID: " + self.history_id) - libraries = self.instance.libraries.get_libraries() # normally only one library + libraries = self.instance.libraries.get_libraries() # routine check: one library self.library_id = self.instance.libraries.get_libraries()[0]["id"] # project data folder/library logging.debug("library ID: " + self.history_id) instance_source_data_folders = self.instance.libraries.get_folders(library_id=self.library_id) + # Delete Homo sapiens from Chado database logging.info("getting sapiens ID in instance's chado database") get_sapiens_id_job = self.instance.tools.run_tool(tool_id="toolshed.g2.bx.psu.edu/repos/gga/chado_organism_get_organisms/organism_get_organisms/2.3.2", diff --git a/docker_compose_generator.py b/docker_compose_generator.py index c7070757a10db407c078e6a41e78b7a490622129..e76198bae1a8835096a1f326394aa338c7a4c022 100644 --- a/docker_compose_generator.py +++ b/docker_compose_generator.py @@ -31,7 +31,7 @@ class DockerComposeGenerator: def generate(self): if self.template is None: - self.template = str(os.getcwd() + "/docker-compose.yml") + self.template = str(os.getcwd() + "/templates/docker-compose.yml") # default docker-compose if no input template was specified --> doesnt work, yaml doesnt support direct string replacement, needs tags (maybe TODO) (https://stackoverflow.com/questions/5484016/how-can-i-do-string-concatenation-or-string-replacement-in-yaml) # self.template = "{'version': '3.7', 'services': {'proxy': {'image': 'quay.io/abretaud/nginx-ldap:latest', 'volumes': ['./src_data/:/project_data/', './nginx/conf:/etc/nginx/conf.d'], 'networks': ['traefik', 'genus_species'], 'deploy': {'labels': ['traefik.http.routers.genus_species-nginx.rule=(Host(`localhost`) && PathPrefix(`/sp/genus_species/download`))', 'traefik.http.routers.genus_species-nginx.tls=true', 'traefik.http.routers.genus_species-nginx.entryPoints=webs', 'traefik.http.routers.genus_species-nginx.middlewares=sp-auth,sp-app-trailslash,sp-prefix', 'traefik.http.services.genus_species-nginx.loadbalancer.server.port=80'], 'restart_policy': {'condition': 'on-failure', 'delay': '5s', 'max_attempts': 3, 'window': '120s'}}}, 'tripal': {'image': 'quay.io/galaxy-genome-annotation/tripal:v2.x', 'depends_on': ['tripal-db', 'elasticsearch'], 'volumes': ['./docker_data/galaxy/:/export/:ro', './src_data/:/project_data/:ro', './src_data:/data:ro'], 'environment': {'DB_HOST': 'tripal-db.genus_species', 'BASE_URL_PATH': '/sp/genus_species', 'UPLOAD_LIMIT': '20M', 'MEMORY_LIMIT': '512M', 'TRIPAL_GIT_CLONE_MODULES': 'https://github.com/abretaud/tripal_rest_api.git[@c6f9021ea5d4c6d7c67c5bd363a7dd9359228bbc] https://github.com/tripal/tripal_elasticsearch.git[@dc7f276046e394a80a7dfc9404cf1a149006eb2a] https://github.com/tripal/tripal_analysis_interpro.git https://github.com/tripal/tripal_analysis_go.git https://github.com/tripal/tripal_analysis_blast.git https://github.com/tripal/tripal_analysis_expression.git[@7240039fdeb4579afd06bbcb989cb7795bd4c342]', 'TRIPAL_DOWNLOAD_MODULES': '', 'TRIPAL_ENABLE_MODULES': 'tripal_analysis_blast tripal_analysis_interpro tripal_analysis_go tripal_rest_api tripal_elasticsearch', 'SITE_NAME': 'Genus species', 'ELASTICSEARCH_HOST': 'elasticsearch.genus_species', 'ENABLE_JBROWSE': '/jbrowse/?data=data/gspecies', 'ENABLE_APOLLO': 'https://localhost/apollo/', 'ENABLE_BLAST': 1, 'ENABLE_DOWNLOAD': 1, 'ENABLE_WIKI': 1, 'ENABLE_GO': '/organism/Genus/species?pane=GO', 'ENABLE_ORTHOLOGY': 0, 'ENABLE_ORTHOLOGY_LINKS': 'http://localhost/sp/orthology/', 'ADMIN_PASSWORD': 'XXXXXX'}, 'networks': ['traefik', 'genus_species'], 'deploy': {'labels': ['traefik.http.routers.genus_species-tripal.rule=(Host(`localhost`) && PathPrefix(`/sp/genus_species`))', 'traefik.http.routers.genus_species-tripal.tls=true', 'traefik.http.routers.genus_species-tripal.entryPoints=webs', 'traefik.http.routers.genus_species-tripal.middlewares=sp-auth,sp-trailslash,sp-prefix,tripal-addprefix', 'traefik.http.services.genus_species-tripal.loadbalancer.server.port=80'], 'restart_policy': {'condition': 'on-failure', 'delay': '5s', 'max_attempts': 3, 'window': '120s'}}}, 'tripal-db': {'image': 'quay.io/galaxy-genome-annotation/chado:1.31-jenkins26-pg9.5', 'environment': ['POSTGRES_PASSWORD=postgres', 'INSTALL_CHADO_SCHEMA=0'], 'volumes': ['./docker_data/tripal_db/:/var/lib/postgresql/data/'], 'networks': ['genus_species']}, 'elasticsearch': {'image': 'docker.elastic.co/elasticsearch/elasticsearch:6.6.1', 'volumes': ['./docker_data/elastic_search_index/:/usr/share/elasticsearch/data/'], 'environment': {'bootstrap.memory_lock': 'true', 'xpack.security.enabled': 'false', 'xpack.monitoring.enabled': 'false', 'xpack.ml.enabled': 'false', 'xpack.graph.enabled': 'false', 'xpack.watcher.enabled': 'false', 'cluster.routing.allocation.disk.threshold_enabled': 'false', 'ES_JAVA_OPTS': '-Xms500m -Xmx500m', 'TAKE_FILE_OWNERSHIP': 'true'}, 'networks': ['genus_species']}, 'galaxy': {'image': 'quay.io/galaxy-genome-annotation/docker-galaxy-annotation:gmod', 'volumes': ['../galaxy_data_libs_SI.py:/opt/setup_data_libraries.py', './docker_data/galaxy/:/export/', './src_data/:/project_data/:ro', './docker_data/jbrowse/:/jbrowse/data/', './docker_data/apollo/:/apollo-data/', '../galaxy_nginx.conf:/etc/nginx/uwsgi_params'], 'environment': {'NONUSE': 'nodejs,proftp,reports,condor', 'GALAXY_LOGGING': 'full', 'GALAXY_CONFIG_BRAND': 'Genus species', 'GALAXY_CONFIG_ALLOW_LIBRARY_PATH_PASTE': 'True', 'GALAXY_CONFIG_USE_REMOTE_USER': 'True', 'GALAXY_CONFIG_REMOTE_USER_MAILDOMAIN': 'bipaa', 'GALAXY_CONFIG_ADMIN_USERS': 'admin@galaxy.org,gogepp@bipaa', 'ENABLE_FIX_PERMS': 0, 'PROXY_PREFIX': '/sp/genus_species/galaxy', 'GALAXY_TRIPAL_URL': 'http://tripal.genus_species/tripal/', 'GALAXY_TRIPAL_PASSWORD': 'XXXXXX', 'GALAXY_WEBAPOLLO_URL': 'http://one-of-the-swarm-node:8888/apollo/', 'GALAXY_WEBAPOLLO_USER': 'admin_apollo@bipaa', 'GALAXY_WEBAPOLLO_PASSWORD': 'XXXXXX', 'GALAXY_WEBAPOLLO_EXT_URL': '/apollo/', 'GALAXY_CHADO_DBHOST': 'tripal-db.genus_species', 'GALAXY_CHADO_DBSCHEMA': 'chado', 'GALAXY_AUTO_UPDATE_DB': 1, 'GALAXY_AUTO_UPDATE_CONDA': 1, 'GALAXY_AUTO_UPDATE_TOOLS': '/galaxy-central/tools_1.yaml', 'GALAXY_SHARED_DIR': '', 'BLAT_ENABLED': 1}, 'networks': ['traefik', 'genus_species'], 'deploy': {'labels': ['traefik.http.routers.genus_species-galaxy.rule=(Host(`localhost`) && PathPrefix(`/sp/genus_species/galaxy`))', 'traefik.http.routers.genus_species-galaxy.tls=true', 'traefik.http.routers.genus_species-galaxy.entryPoints=webs', 'traefik.http.routers.genus_species-galaxy.middlewares=sp-auth,sp-app-trailslash,sp-app-prefix', 'traefik.http.services.genus_species-galaxy.loadbalancer.server.port=80'], 'restart_policy': {'condition': 'on-failure', 'delay': '5s', 'max_attempts': 3, 'window': '120s'}}}, 'jbrowse': {'image': 'quay.io/galaxy-genome-annotation/jbrowse:v1.16.8', 'volumes': ['./docker_data/galaxy/:/export/:ro', './src_data/:/project_data/:ro', './docker_data/jbrowse/:/jbrowse/data/:ro'], 'networks': ['traefik', 'genus_species'], 'deploy': {'labels': ['traefik.http.routers.genus_species-jbrowse.rule=(Host(`localhost`) && PathPrefix(`/sp/genus_species/jbrowse`))', 'traefik.http.routers.genus_species-jbrowse.tls=true', 'traefik.http.routers.genus_species-jbrowse.entryPoints=webs', 'traefik.http.routers.genus_species-jbrowse.middlewares=sp-auth,sp-app-trailslash,sp-app-prefix', 'traefik.http.services.genus_species-jbrowse.loadbalancer.server.port=80'], 'restart_policy': {'condition': 'on-failure', 'delay': '5s', 'max_attempts': 3, 'window': '120s'}}}, 'blast': {'image': 'quay.io/abretaud/sf-blast:latest', 'depends_on': ['blast-db'], 'environment': {'DB_HOST': 'blast-db.genus_species', 'UPLOAD_LIMIT': '20M', 'MEMORY_LIMIT': '128M', 'DB_NAME': 'postgres', 'ADMIN_EMAIL': 'xxx@example.org', 'ADMIN_NAME': 'xxxxx', 'JOBS_METHOD': 'local', 'JOBS_WORK_DIR': '/xxxx/blast_jobs/', 'CDD_DELTA_PATH': '/db/cdd_delta/current/flat/cdd_delta', 'BLAST_TITLE': 'Genus species blast server', 'JOBS_SCHED_NAME': 'blast_gspecies', 'PRE_CMD': '. /local/env/envblast-2.6.0.sh; . /local/env/envpython-3.7.1.sh;', 'APACHE_RUN_USER': 'bipaaweb', 'APACHE_RUN_GROUP': 'bipaa', 'BASE_URL_PATH': '/sp/genus_species/blast/', 'UID': 55914, 'GID': 40259}, 'volumes': ['./blast/banks.yml:/var/www/blast/app/config/banks.yml:ro', './blast/links.yml:/etc/blast_links/links.yml:ro'], 'networks': ['traefik', 'genus_species'], 'deploy': {'labels': ['traefik.http.routers.genus_species-blast.rule=(Host(`localhost`) && PathPrefix(`/sp/genus_species/blast`))', 'traefik.http.routers.genus_species-blast.tls=true', 'traefik.http.routers.genus_species-blast.entryPoints=webs', 'traefik.http.routers.genus_species-blast.middlewares=sp-big-req,sp-auth,sp-app-trailslash,sp-app-prefix', 'traefik.http.services.genus_species-blast.loadbalancer.server.port=80'], 'restart_policy': {'condition': 'on-failure', 'delay': '5s', 'max_attempts': 3, 'window': '120s'}}}, 'blast-db': {'image': 'postgres:9.6-alpine', 'environment': ['POSTGRES_PASSWORD=postgres', 'PGDATA=/var/lib/postgresql/data/'], 'volumes': ['./docker_data/blast_db/:/var/lib/postgresql/data/'], 'networks': ['genus_species']}, 'wiki': {'image': 'quay.io/abretaud/mediawiki', 'environment': {'MEDIAWIKI_SERVER': 'http://localhost', 'MEDIAWIKI_PROXY_PREFIX': '/sp/genus_species/wiki', 'MEDIAWIKI_SITENAME': 'Genus species', 'MEDIAWIKI_SECRET_KEY': 'XXXXXXXXXX', 'MEDIAWIKI_DB_HOST': 'wiki-db.genus_species', 'MEDIAWIKI_DB_PASSWORD': 'password', 'MEDIAWIKI_ADMIN_USER': 'abretaud'}, 'depends_on': ['wiki-db'], 'volumes': ['./docker_data/wiki_uploads:/images'], 'networks': ['traefik', 'genus_species'], 'deploy': {'labels': ['traefik.http.routers.genus_species-blast.rule=(Host(`localhost`) && PathPrefix(`/sp/genus_species/blast`))', 'traefik.http.routers.genus_species-blast.tls=true', 'traefik.http.routers.genus_species-blast.entryPoints=webs', 'traefik.http.routers.genus_species-blast.middlewares=sp-big-req,sp-auth,sp-app-trailslash,sp-app-prefix', 'traefik.http.services.genus_species-blast.loadbalancer.server.port=80'], 'restart_policy': {'condition': 'on-failure', 'delay': '5s', 'max_attempts': 3, 'window': '120s'}}}, 'wiki-db': {'image': 'postgres:9.6-alpine', 'volumes': ['./docker_data/wiki_db/:/var/lib/postgresql/data/'], 'networks': ['genus_species']}}, 'networks': {'traefik': {'external': True}, 'genus_species': {'driver': 'overlay', 'name': 'genus_species'}}}" # @@ -65,3 +65,4 @@ if __name__ == "__main__": dc_generator.template = args.template dc_generator.outdir = args.outdir dc_generator.generate() + print("foo") diff --git a/ext_scripts/phaeoexplorer-change_transcript_fasta_header.sh b/ext_scripts/phaeoexplorer-change_transcript_fasta_header.sh index 957190f7ae265f2b7b6226f217ae003bab8b021d..99bd3ff2443e46a07ac3a960c0596872bcb37745 100755 --- a/ext_scripts/phaeoexplorer-change_transcript_fasta_header.sh +++ b/ext_scripts/phaeoexplorer-change_transcript_fasta_header.sh @@ -1,8 +1,4 @@ -#!/usr/bin/env bash - -INFILE=$1 -OUTFILE=tmpfile - -./common-stringSubstitute.py -i $INFILE -o $OUTFILE -p '^>\d+ mRNA' -r '>mRNA' -mv $OUTFILE $INFILE -echo "'>[0-9]+ mRNA' replaced by '>mRNA' in $1" +#!/usr/bin/env bash +INFILE=$1 +OUTFILE=tmpfile +./common-stringSubsitute.py -i $INFILE -o $OUTFILE -p '^>\d+ mRNA' -r '>mRNA' || mv $OUTFILE $INFILE || echo "'>[0-9]+ mRNA' replaced by '>mRNA' in $1" \ No newline at end of file diff --git a/ext_scripts/phaeoexplorer-change_transcript_fasta_header.sh.bak b/ext_scripts/phaeoexplorer-change_transcript_fasta_header.sh.bak new file mode 100755 index 0000000000000000000000000000000000000000..196675b503a42188dce58d3b930e1b804aab6868 --- /dev/null +++ b/ext_scripts/phaeoexplorer-change_transcript_fasta_header.sh.bak @@ -0,0 +1,7 @@ +#!/usr/bin/env bash + +INFILE=$1 +OUTFILE=tmpfile +/home/fr2424/sib/alebars/gga_load_data/ext_scripts/common-stringSubsitute.py -i $INFILE -o $OUTFILE -p '^>\d+ mRNA' -r '>mRNA' +mv $OUTFILE $INFILE +echo "'>[0-9]+ mRNA' replaced by '>mRNA' in $1" diff --git a/table_parser.py b/table_parser.py index 0058184a51098beda5e9b320e3b6b416f169a116..0059fdbe72c3b08f63ea8f3b634f81b6e5545885 100755 --- a/table_parser.py +++ b/table_parser.py @@ -40,8 +40,7 @@ class TableParser: json_file.truncate(0) json_content = list() for organism in range(0, len(pandas_table.index)): - organism_data = pandas_table.iloc[organism] - organism_dict = organism_data.to_dict() + organism_dict = pandas_table.iloc[organism].to_dict() for k, v in organism_dict.items(): v = str(v).split(" ") v = "_".join(v) @@ -65,8 +64,7 @@ class TableParser: json_file.truncate(0) json_content = list() for organism in range(0, len(pandas_table.index)): - organism_data = pandas_table.iloc[organism] - organism_dict = organism_data.to_dict() + organism_dict = pandas_table.iloc[organism].to_dict() for k, v in organism_dict.items(): v = str(v).split(" ") v = "_".join(v) @@ -79,7 +77,7 @@ class TableParser: else: logging.info("input tabulated file doesn't have the correct extension (supported extensions: xls, xlsx, csv)") - def write_json(data, filename): + def write_json(self, data, filename): with open(filename, 'w') as f: json.dump(data, f, indent=4)