diff --git a/constants.py b/constants.py index 40d76b56aa8fdedae3af981bf147819f3d1cc1ed..1e3be48b57ecaee5b6e1d4834f6f8c2e6ce2317a 100644 --- a/constants.py +++ b/constants.py @@ -11,6 +11,7 @@ ORG_PARAM_DESC_PICTURE_PATH = "picture_path" ORG_PARAM_DESC_MAIN_SPECIES = "main_species" ORG_PARAM_DATA = "data" ORG_PARAM_DATA_GENOME_PATH = "genome_path" +ORG_PARAM_DATA_GENOME_CONTIG_PREFIX = "contig_prefix" ORG_PARAM_DATA_TRANSCRIPTS_PATH = "transcripts_path" ORG_PARAM_DATA_PROTEINS_PATH = "proteins_path" ORG_PARAM_DATA_GFF_PATH = "gff_path" diff --git a/examples/citrus_sinensis.yml b/examples/citrus_sinensis.yml index 38087bbe0cfe9b87dbd6519ddebd32730aab1dc7..af423e09ebe9e927c076f122594e896fc9c414f9 100644 --- a/examples/citrus_sinensis.yml +++ b/examples/citrus_sinensis.yml @@ -18,6 +18,10 @@ # Paths to the different datasets to copy and import into the galaxy container (as a shared library) # Must be absolute paths to the dataset genome_path: /path/to/repo/examples/src_data/genome/v1.0/Citrus_sinensis-scaffold00001.fasta # Mandatory! + # Required when they are multiple strains from the same species to map the contig name prefix to the right JBrowse dataset ID in Tripal + # If multiple contig name prefixes, they must be separated by ";" + # example: "contig_prefix1;contig_prefix2" + contig_prefix: transcripts_path: /path/to/repo/examples/src_data/annotation/v1.0/Citrus_sinensis-orange1.1g015632m.g.fasta proteins_path: # Mandatory! gff_path: /path/to/repo/examples/src_data/annotation/v1.0/Citrus_sinensis-orange1.1g015632m.g.gff3 # Mandatory! diff --git a/gga_init.py b/gga_init.py index eee49686c1a7dc5afb51f7efe6d32a3e2460f84b..7416aab594712e352efcd7df75e40bbcf6f2d71e 100755 --- a/gga_init.py +++ b/gga_init.py @@ -127,7 +127,7 @@ class DeploySpeciesStack(speciesData.SpeciesData): # Create input file vars dict input_vars = {"genus": self.genus_lowercase, "Genus": self.genus_uppercase, "species": self.species, - "genus_species": self.genus_species, "genus_species_strain_sex": self.species_folder_name, + "genus_species": self.genus_species, "jbrowse_dataset_id": self.species_folder_name, "jbrowse_links": self.jbrowse_links, "genus_species_sex": "{0}_{1}_{2}".format(self.genus_lowercase, self.species_lowercase, self.sex), "strain": self.strain, "sex": self.sex, "Genus_species": "{0} {1}".format(self.genus_uppercase, self.species_lowercase), "blast": self.blast, "go": self.go, "picture_path": self.picture_path} @@ -397,12 +397,19 @@ if __name__ == "__main__": sp_dict_list = utilities.parse_input(os.path.abspath(args.input)) + #TODO: create SpeciesData objects in utilities.parse_input() + org_list = [] + for sp_dict in sp_dict_list: + org = DeploySpeciesStack(parameters_dictionary=sp_dict) + org_list.append(org) + # Create traefik directory and compose files if needed or specified if args.force_traefik or not os.path.isdir(os.path.join(os.path.abspath(main_dir), "traefik")): make_traefik_compose_files(config=config, main_dir=main_dir) unique_sp_dict_list = utilities.get_unique_species_dict_list(sp_dict_list=sp_dict_list) sp_picture_dict = utilities.get_sp_picture(sp_dict_list=sp_dict_list) + sp_jbrowse_links_dict = utilities.get_sp_jbrowse_links(org_list=org_list) logging.info("Deploying stacks for organisms in input file %s" % args.input) for sp_dict in unique_sp_dict_list: @@ -428,6 +435,7 @@ if __name__ == "__main__": logging.info("Successfully generated the directory tree for %s %s", deploy_stack_for_current_organism.genus, deploy_stack_for_current_organism.species) # Make compose files + deploy_stack_for_current_organism.jbrowse_links = sp_jbrowse_links_dict[deploy_stack_for_current_organism.genus_species] deploy_stack_for_current_organism.make_compose_files() logging.info("Successfully generated the docker-compose files for %s %s", deploy_stack_for_current_organism.genus, deploy_stack_for_current_organism.species) diff --git a/speciesData.py b/speciesData.py index 2c16840b475c6570906ab359fd07efe3ec3cb7b0..d8dd88c643d7216746ab3528c2ed228d9537cd3c 100755 --- a/speciesData.py +++ b/speciesData.py @@ -84,6 +84,7 @@ class SpeciesData: # TODO: catch blocks if key is absent in input self.genome_path = parameters_dictionary_data[constants.ORG_PARAM_DATA_GENOME_PATH] + self.contig_prefix = parameters_dictionary_data[constants.ORG_PARAM_DATA_GENOME_CONTIG_PREFIX] self.transcripts_path = parameters_dictionary_data[constants.ORG_PARAM_DATA_TRANSCRIPTS_PATH] self.proteins_path = parameters_dictionary_data[constants.ORG_PARAM_DATA_PROTEINS_PATH] self.gff_path = parameters_dictionary_data[constants.ORG_PARAM_DATA_GFF_PATH] diff --git a/templates/gspecies_compose.yml.j2 b/templates/gspecies_compose.yml.j2 index 4651cf460fdde2c44648ada68b31873ff9b57eb3..4f4300c2f7c351690160e2e3b52b06082eb88a85 100644 --- a/templates/gspecies_compose.yml.j2 +++ b/templates/gspecies_compose.yml.j2 @@ -60,7 +60,8 @@ services: TRIPAL_ENABLE_MODULES: "tripal_analysis_blast tripal_analysis_interpro tripal_analysis_go tripal_rest_api tripal_elasticsearch" SITE_NAME: "{{ Genus_species }}" ELASTICSEARCH_HOST: elasticsearch.{{ genus_species }} - ENABLE_JBROWSE: /jbrowse/?data=data/{{ genus_species_strain_sex }} + ENABLE_JBROWSE: /jbrowse/?data=data/{{ jbrowse_dataset_id }} + JBROWSE_LINKS: {{ jbrowse_links }} ENABLE_APOLLO: 0 ENABLE_BLAST: {{ blast }} ENABLE_DOWNLOAD: 1 diff --git a/templates/organisms.yml.j2 b/templates/organisms.yml.j2 index 019fce089740ba1634b15f2756f8d04e5fa2e5f3..fc932ed4b5d3d57ad739f75db031bc2e3d7c5944 100644 --- a/templates/organisms.yml.j2 +++ b/templates/organisms.yml.j2 @@ -12,6 +12,7 @@ {% endif %} {{ org_param_data }}: {{ org_param_data_genome_path }}: {{ org_param_data_genome_path_value }} + {{ org_param_data_genome_contig_prefix }}: {{ org_param_data_genome_contig_prefix_value }} {{ org_param_data_transcripts_path }}: {{ org_param_data_transcripts_path_value }} {{ org_param_data_proteins_path }}: {{ org_param_data_proteins_path_value }} {{ org_param_data_gff_path }}: {{ org_param_data_gff_path_value }} diff --git a/utilities.py b/utilities.py index c6e7cfb7e23c6bcb2e5b0d418b05c6eb8448d57a..fa357ca2356afe1271b98fd85f082833ae541814 100755 --- a/utilities.py +++ b/utilities.py @@ -189,6 +189,23 @@ def get_sp_picture(sp_dict_list): sp_picture_dict[gspecies] = sp[constants.ORG_PARAM_DESC][constants.ORG_PARAM_DESC_PICTURE_PATH] return sp_picture_dict +def get_sp_jbrowse_links(org_list): + """ + Get the jbrowse links from all strains for each species + """ + + jbrowse_links_dict = {} + + for org in org_list: + gspecies = org.genus_species + if org.contig_prefix != "": + if gspecies not in jbrowse_links_dict.keys(): + jbrowse_links_dict[gspecies] = org.contig_prefix + ">" + org.species_folder_name + else: + jbrowse_links_dict[gspecies] = jbrowse_links_dict[gspecies] + ";" + org.contig_prefix + ">" + org.species_folder_name + + return jbrowse_links_dict + def run_tool(instance, tool_id, history_id, tool_inputs): output_dict = None @@ -229,6 +246,7 @@ def create_org_param_dict_from_constants(): org_param_dict["org_param_desc_main_species"] = constants.ORG_PARAM_DESC_MAIN_SPECIES org_param_dict["org_param_data"] = constants.ORG_PARAM_DATA org_param_dict["org_param_data_genome_path"] = constants.ORG_PARAM_DATA_GENOME_PATH + org_param_dict["org_param_data_genome_contig_prefix"] = constants.ORG_PARAM_DATA_GENOME_CONTIG_PREFIX org_param_dict["org_param_data_transcripts_path"] = constants.ORG_PARAM_DATA_TRANSCRIPTS_PATH org_param_dict["org_param_data_proteins_path"] = constants.ORG_PARAM_DATA_PROTEINS_PATH org_param_dict["org_param_data_gff_path"] = constants.ORG_PARAM_DATA_GFF_PATH