Skip to content
Snippets Groups Projects
Commit 5f7757d4 authored by Loraine Gueguen's avatar Loraine Gueguen
Browse files

Set data file and dataset name as constants. Refactor run_wf (WIP)

parent d44121bf
No related branches found
No related tags found
2 merge requests!24Run wf,!18Release v2.1.0
This commit is part of merge request !18. Comments created here will be created in the context of that merge request.
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
# Constants used in the input yaml
ORG_PARAM_NAME = "name"
ORG_PARAM_DESC = "description"
......@@ -24,7 +27,6 @@ ORG_PARAM_SERVICES = "services"
ORG_PARAM_SERVICES_BLAST = "blast"
ORG_PARAM_SERVICES_GO = "go"
# Constants used in the config yaml file
CONF_ALL_HOSTNAME = "hostname"
CONF_ALL_HTTP_PORT = "http_port"
......@@ -45,12 +47,23 @@ CONF_TRIPAL_THEME_NAME = "tripal_theme_name"
CONF_TRIPAL_THEME_GIT_CLONE = "tripal_theme_git_clone"
CONF_JBROWSE_MENU_URL = "jbrowse_menu_url"
# Data
FILENAME_SUFFIX_TRANSCRIPTS = "transcripts_gff.fasta"
FILENAME_SUFFIX_PROTEINS = "proteins.fasta"
FILENAME_SUFFIX_INTERPRO = "interproscan.xml"
FILENAME_SUFFIX_BLASTP = "diamond_blastp_vs_uniref90.xml" # Temporary constant: this value should be in the organism input file
FILENAME_SUFFIX_BLASTX = "diamond_blastx_vs_uniref90.xml" # Temporary constant: this value should be in the organism input file
FILENAME_SUFFIX_ORTHOFINDER = "orthologous_one2one_vs_Ec32.tsv" # Temporary constant: this value should be in the organism input file
DATA_DATE = "2021-02-24" # Temporary constant: this value should be in the organism input file, for each data
# default config file
DEFAULT_CONFIG = "examples/config"
# Galaxy tools
GET_ORGANISMS_TOOL = "toolshed.g2.bx.psu.edu/repos/gga/chado_organism_get_organisms/organism_get_organisms/2.3.4+galaxy0"
DELETE_ORGANISMS_TOOL = "toolshed.g2.bx.psu.edu/repos/gga/chado_organism_delete_organisms/organism_delete_organisms/2.3.4+galaxy0"
# Galaxy library
HOST_DATA_DIR='src_data'
CONTAINER_DATA_DIR_ROOT='/project_data'
GALAXY_LIBRARY_NAME = 'Project Data'
......
......@@ -101,7 +101,7 @@ class GetData(speciesData.SpeciesData):
for k, v in genome_datasets.items():
if v: # If dataset is not present in input file, skip copy
logging.info("Copying {0} ({1}) into {2}".format(k, v, organism_genome_dir))
genome_fname = "{0}_v{1}.fasta".format(self.dataset_prefix, self.genome_version)
genome_fname = self.genome_filename
try:
shutil.copyfile(os.path.abspath(v), os.path.join(organism_genome_dir, genome_fname))
except Exception as exc:
......@@ -111,19 +111,19 @@ class GetData(speciesData.SpeciesData):
if v: # If dataset is not present in input file, skip copy
dataset_fname = ""
if k == constants.ORG_PARAM_DATA_GFF_PATH:
dataset_fname = "{0}_OGS{1}_{2}.gff".format(self.dataset_prefix, self.ogs_version, self.get_last_modified_time_string(os.path.abspath(v)))
dataset_fname = self.gff_filename
elif k == constants.ORG_PARAM_DATA_TRANSCRIPTS_PATH:
dataset_fname = "{0}_OGS{1}_transcripts.fasta".format(self.dataset_prefix, self.ogs_version)
dataset_fname = self.transcripts_filename
elif k == constants.ORG_PARAM_DATA_PROTEINS_PATH:
dataset_fname = "{0}_OGS{1}_proteins.fasta".format(self.dataset_prefix, self.ogs_version)
dataset_fname = self.proteins_filename
elif k == constants.ORG_PARAM_DATA_ORTHOFINDER_PATH:
dataset_fname = "{0}_OGS{1}_orthofinder.tsv".format(self.dataset_prefix, self.ogs_version)
dataset_fname = self.orthofinder_filename
elif k == constants.ORG_PARAM_DATA_INTERPRO_PATH:
dataset_fname = "{0}_OGS{1}_interproscan.xml".format(self.dataset_prefix, self.ogs_version)
dataset_fname = self.interpro_filename
elif k == constants.ORG_PARAM_DATA_BLASTP_PATH:
dataset_fname = "{0}_OGS{1}_blastp.xml".format(self.dataset_prefix, self.ogs_version)
dataset_fname = self.blastp_filename
elif k == constants.ORG_PARAM_DATA_BLASTX_PATH:
dataset_fname = "{0}_OGS{1}_blastx.xml".format(self.dataset_prefix, self.ogs_version)
dataset_fname = self.blastx_filename
logging.info("Copying {0} ({1}) into {2}".format(k, v, organism_annotation_dir))
try:
shutil.copyfile(os.path.abspath(v), os.path.join(organism_annotation_dir, dataset_fname))
......
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
import constants
### Workflows
WORKFLOW_LOAD_FASTA_GFF_JBROWSE = "load_fasta_gff_jbrowse"
......@@ -17,7 +22,7 @@ ADD_ANALYSIS_TOOL_VERSION = "2.3.4+galaxy0"
ADD_ANALYSIS_TOOL_ID= ADD_ANALYSIS_TOOL_NAME + ADD_ANALYSIS_TOOL_VERSION
ADD_ANALYSIS_TOOL_CHANGESET_REVISION = "10b2b1c70e69"
ADD_ANALYSIS_TOOL_PARAM_PROGRAM = "Performed by Genoscope"
ADD_ANALYSIS_TOOL_PARAM_DATE = "2021-02-24"
ADD_ANALYSIS_TOOL_PARAM_DATE = constants.DATA_DATE
GET_ORGANISMS_TOOL_NAME = "toolshed.g2.bx.psu.edu/repos/gga/chado_organism_get_organisms/organism_get_organisms/"
GET_ORGANISMS_TOOL_VERSION = "2.3.4+galaxy0"
......
This diff is collapsed.
......@@ -116,6 +116,15 @@ class SpeciesData:
else:
self.dataset_prefix = self.genus[0].lower() + "_" + self.species_lowercase
self.genome_filename = "{0}_v{1}.fasta".format(self.dataset_prefix, self.genome_version)
self.gff_filename = "{0}_OGS{1}_{2}.gff".format(self.dataset_prefix, self.ogs_version, constants.DATA_DATE)
self.transcripts_filename = "{0}_OGS{1}_{2}".format(self.dataset_prefix, self.ogs_version, constants.TRANSCRIPTS_FILENAME_SUFFIX)
self.proteins_filename = "{0}_OGS{1}_{2}".format(self.dataset_prefix, self.ogs_version, constants.FILENAME_SUFFIX_PROTEINS)
self.interpro_filename = "{0}_OGS{1}_{2}".format(self.dataset_prefix, self.ogs_version, constants.FILENAME_SUFFIX_INTERPRO)
self.blastp_filename = "{0}_OGS{1}_{2}".format(self.dataset_prefix, self.ogs_version, constants.FILENAME_SUFFIX_BLASTP)
self.blastx_filename = "{0}_OGS{1}_{2}".format(self.dataset_prefix, self.ogs_version, constants.FILENAME_SUFFIX_BLASTX)
self.orthofinder_filename = "{0}_OGS{1}_{2}".format(self.dataset_prefix, self.ogs_version, constants.FILENAME_SUFFIX_ORTHOFINDER)
# Bioblend/Chado IDs for an organism analyses/organisms/datasets/history/library
self.org_id = None
self.genome_analysis_id = None
......@@ -131,8 +140,6 @@ class SpeciesData:
self.species_dir = None
self.tool_panel = None
self.datasets = dict()
self.datasets_name = dict()
self.source_files = dict()
self.workflow_name = None
self.metadata = dict()
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment