Skip to content
Snippets Groups Projects
Commit 9b4f2a91 authored by Loraine Gueguen's avatar Loraine Gueguen
Browse files

Merge branch 'workflow_v2' into 'dev'

Workflow v2

See merge request !5
parents 273fb7c9 29933ed3
No related branches found
No related tags found
2 merge requests!9Release 2.0 (merge dev to master),!5Workflow v2
This commit is part of merge request !9. Comments created here will be created in the context of that merge request.
......@@ -122,14 +122,14 @@ class GetData(speciesData.SpeciesData):
# Where to store blast results?
# search_excluded_datasets = ["interpro_path", "orthofinder_path", "blastp_path", "blastx_path"]
# # These datasets will not be searched if missing in the input file
# These datasets will not be searched if missing in the input file
# Copy datasets in the organism src_data dir tree correct folder
for k, v in datasets_to_get.items():
if v: # If dataset is not present in input file, skip copy
if k in genome_datasets:
logging.info("Copying {0} ({1}) into {2}".format(k, v, organism_genome_dir))
genome_fname = "v%s.fasta" % self.genome_version
genome_fname = "{0}_v{1}.fasta".format(self.dataset_prefix, self.genome_version)
try:
shutil.copyfile(os.path.abspath(v), os.path.join(organism_genome_dir, genome_fname))
except Exception as exc:
......@@ -137,19 +137,19 @@ class GetData(speciesData.SpeciesData):
elif k in annotation_datasets:
dataset_fname = ""
if k == "gff_path":
dataset_fname = "OGS%s.gff" % self.ogs_version
dataset_fname = "{0}_OGS{1}.gff".format(self.dataset_prefix, self.ogs_version)
elif k == "transcripts_path":
dataset_fname = "OGS%s_transcripts.fasta" % self.ogs_version
dataset_fname = "{0}_OGS{1}_transcripts.fasta".format(self.dataset_prefix, self.ogs_version)
elif k == "proteins_path":
dataset_fname = "OGS%s_proteins.fasta" % self.ogs_version
dataset_fname = "{0}_OGS{1}_proteins.fasta".format(self.dataset_prefix, self.ogs_version)
elif k == "orthofinder_path":
dataset_fname = "OGS%s_orthofinder.tsv" % self.ogs_version
dataset_fname = "{0}_OGS{1}_orthofinder.tsv".format(self.dataset_prefix, self.ogs_version)
elif k == "interpro_path":
dataset_fname = "OGS%s_interproscan.xml" % self.ogs_version
dataset_fname = "{0}_OGS{1}_interproscan.xml".format(self.dataset_prefix, self.ogs_version)
elif k == "blastp_path":
dataset_fname = "OGS%s_blastp.xml" % self.ogs_version
dataset_fname = "{0}_OGS{1}_blastp.xml".format(self.dataset_prefix, self.ogs_version)
elif k == "blastx_path":
dataset_fname = "OGS%s_blastx.xml" % self.ogs_version
dataset_fname = "{0}_OGS{1}_blastx.xml".format(self.dataset_prefix, self.ogs_version)
logging.info("Copying {0} ({1}) into {2}".format(k, v, organism_annotation_dir))
try:
shutil.copyfile(os.path.abspath(v), os.path.join(organism_annotation_dir, dataset_fname))
......
......@@ -64,8 +64,9 @@ class DeploySpeciesStack(speciesData.SpeciesData):
if not config["banner_path"] == "" and os.path.isfile(os.path.abspath(config["banner_path"])):
banner_dest_path = os.path.abspath("./banner.png")
logging.info("Custom banner path: %s" % self.config["banner_path"])
if os.path.samefile(os.path.abspath(config["banner_path"]),banner_dest_path):
shutil.copy(os.path.abspath(self.config["banner_path"]), banner_dest_path)
# if os.path.samefile(os.path.abspath(config["banner_path"]), banner_dest_path):
# shutil.copy(os.path.abspath(self.config["banner_path"]), banner_dest_path)
shutil.copy(os.path.abspath(self.config["banner_path"]), banner_dest_path)
else:
logging.debug("Using default banner for Tripal pages")
self.config.pop("banner_path", None)
......
......@@ -59,20 +59,18 @@ class LoadData(speciesData.SpeciesData):
"""
Create or set the working history to the current species one
TODO - move to utilities?
:return:
"""
try:
histories = self.instance.histories.get_histories(name=str(self.full_name))
histories = self.instance.histories.get_histories(name=str(self.genus_species))
self.history_id = histories[0]["id"]
logging.info("History for {0}: {1}".format(self.full_name, self.history_id))
logging.debug("History ID set for {0} {1}: {2}".format(self.genus, self.species, self.history_id))
except IndexError:
logging.info("Creating history for %s" % self.full_name)
self.instance.histories.create_history(name=str(self.full_name))
histories = self.instance.histories.get_histories(name=str(self.full_name))
logging.info("Creating history for {0} {1}".format(self.genus, self.species))
self.instance.histories.create_history(name=str(self.genus_species))
histories = self.instance.histories.get_histories(name=str(self.genus_species))
self.history_id = histories[0]["id"]
logging.info("History for {0}: {1}".format(self.full_name, self.history_id))
logging.debug("History ID set for {0} {1}: {2}".format(self.genus, self.species, self.history_id))
return self.history_id
......@@ -82,9 +80,16 @@ class LoadData(speciesData.SpeciesData):
Will do nothing if H. sapiens isn't in the database
"""
get_organism_tool = self.instance.tools.show_tool("toolshed.g2.bx.psu.edu/repos/gga/chado_organism_get_organisms/organism_get_organisms/2.3.4+galaxy0")
delete_organism_tool = self.instance.tools.show_tool("toolshed.g2.bx.psu.edu/repos/gga/chado_organism_delete_organisms/organism_delete_organisms/2.3.4+galaxy0")
get_organism_tool_version = get_organism_tool["version"]
delete_organism_tool_version = delete_organism_tool["version"]
logging.debug("Getting 'Homo sapiens' ID in instance's chado database")
get_sapiens_id_job = self.instance.tools.run_tool(
tool_id="toolshed.g2.bx.psu.edu/repos/gga/chado_organism_get_organisms/organism_get_organisms/2.3.4+galaxy0",
tool_id="toolshed.g2.bx.psu.edu/repos/gga/chado_organism_get_organisms/organism_get_organisms/%s" % get_organism_tool_version,
history_id=self.history_id,
tool_inputs={"genus": "Homo", "species": "sapiens"})
get_sapiens_id_job_output = get_sapiens_id_job["outputs"][0]["id"]
......@@ -95,7 +100,7 @@ class LoadData(speciesData.SpeciesData):
sapiens_id = str(
get_sapiens_id_final_output["organism_id"]) # needs to be str to be recognized by the chado tool
self.instance.tools.run_tool(
tool_id="toolshed.g2.bx.psu.edu/repos/gga/chado_organism_delete_organisms/organism_delete_organisms/2.3.4+galaxy0",
tool_id="toolshed.g2.bx.psu.edu/repos/gga/chado_organism_delete_organisms/organism_delete_organisms/%s" % delete_organism_tool_version,
history_id=self.history_id,
tool_inputs={"organism": str(sapiens_id)})
except bioblend.ConnectionError:
......@@ -386,9 +391,9 @@ if __name__ == "__main__":
load_data_for_current_species.connect_to_instance()
# Load the datasets into a galaxy library
logging.info("Setting up library for %s" % load_data_for_current_species.full_name)
logging.info("Setting up library for {0} {1}".format(load_data_for_current_species.genus, load_data_for_current_species.species))
load_data_for_current_species.setup_library()
logging.info("Successfully set up library in galaxy for %s" % load_data_for_current_species.full_name)
logging.info("Successfully set up library in galaxy for {0} {1}".format(load_data_for_current_species.genus, load_data_for_current_species.species))
# Set or get the history for the current organism
load_data_for_current_species.set_get_history()
......@@ -402,8 +407,8 @@ if __name__ == "__main__":
# load_data_for_current_species.purge_histories() # Testing purposes
logging.info("Data successfully loaded and imported for %s" % load_data_for_current_species.full_name)
logging.info("Data successfully loaded and imported for {0} {1}".format(load_data_for_current_species.genus, load_data_for_current_species.species))
else:
logging.critical("The galaxy container for %s is not ready yet!" % load_data_for_current_species.full_name)
logging.critical("The galaxy container for {0} {1} is not ready yet".format(load_data_for_current_species.genus, load_data_for_current_species.species))
sys.exit()
This diff is collapsed.
......@@ -39,11 +39,11 @@ class SpeciesData:
if parameters_dictionary["data"]["genome_version"] == "":
self.genome_version = "1.0"
else:
self.genome_version = parameters_dictionary["data"]["genome_version"]
self.genome_version = str(parameters_dictionary["data"]["genome_version"])
if parameters_dictionary["data"]["ogs_version"] == "":
self.ogs_version = "1.0"
else:
self.ogs_version = parameters_dictionary["data"]["ogs_version"]
self.ogs_version = str(parameters_dictionary["data"]["ogs_version"])
# TODO: catch blocks if key is absent in input
self.genome_path = parameters_dictionary["data"]["genome_path"]
......@@ -66,18 +66,27 @@ class SpeciesData:
self.full_name_lowercase = self.full_name.lower()
self.abbreviation = "_".join(utilities.filter_empty_not_empty_items([self.genus_lowercase[0], self.species, self.strain, self.sex])["not_empty"])
self.genus_species = self.genus_lowercase + "_" + self.species
self.genus_species = "{0}_{1}".format(self.genus.lower(), self.species.lower())
self.dataset_prefix = None
if self.sex is not None or self.sex != "":
self.dataset_prefix = self.genus[0].lower() + "_" + self.species.lower() + "_" + self.sex[0].lower()
else:
self.dataset_prefix = self.genus[0].lower() + "_" + self.species.lower()
# Bioblend/Chado IDs for an organism analyses/organisms/datasets/history/library
self.org_id = None
self.genome_analysis_id = None
self.ogs_analysis_id = None
self.instance_url = None
self.instance = None
self.history_id = None
self.library = None
self.library_id = None
self.script_dir = os.path.dirname(os.path.realpath(sys.argv[0]))
self.main_dir = None
self.species_dir = None
self.org_id = None
self.genome_analysis_id = None
self.ogs_analysis_id = None
self.tool_panel = None
self.datasets = dict()
self.datasets_name = dict()
......@@ -88,7 +97,9 @@ class SpeciesData:
self.datasets = dict()
self.config = None # Custom config used to set environment variables inside containers
self.species_folder_name = "_".join(utilities.filter_empty_not_empty_items([self.genus_lowercase.lower(), self.species.lower(), self.strain.lower(), self.sex.lower()])["not_empty"])
self.species_folder_name = self.species_folder_name .replace("-", "_")
self.species_folder_name = self.species_folder_name .replace("-", "_").replace('__', '_').replace("(", "_").replace(")", "_")
if self.species_folder_name.endswith("_"):
self.species_folder_name = self.species_folder_name[0:-2]
self.existing_folders_cache = {}
self.bam_metadata_cache = {}
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment