Skip to content
Snippets Groups Projects
Commit 08074e9b authored by Arthur Le Bars's avatar Arthur Le Bars
Browse files

reworked datasets and Chado IDs assignation

parent 6af39982
No related branches found
No related tags found
2 merge requests!9Release 2.0 (merge dev to master),!5Workflow v2
...@@ -124,12 +124,18 @@ class GetData(speciesData.SpeciesData): ...@@ -124,12 +124,18 @@ class GetData(speciesData.SpeciesData):
# search_excluded_datasets = ["interpro_path", "orthofinder_path", "blastp_path", "blastx_path"] # search_excluded_datasets = ["interpro_path", "orthofinder_path", "blastp_path", "blastx_path"]
# # These datasets will not be searched if missing in the input file # # These datasets will not be searched if missing in the input file
dataset_shortname = ""
if self.sex is not None or self.sex != "":
dataset_shortname = self.genus[0].lower() + "_" + self.species.lower() + "_" + self.sex[0].lower()
else:
dataset_shortname = self.genus[0].lower() + "_" + self.species.lower()
# Copy datasets in the organism src_data dir tree correct folder # Copy datasets in the organism src_data dir tree correct folder
for k, v in datasets_to_get.items(): for k, v in datasets_to_get.items():
if v: # If dataset is not present in input file, skip copy if v: # If dataset is not present in input file, skip copy
if k in genome_datasets: if k in genome_datasets:
logging.info("Copying {0} ({1}) into {2}".format(k, v, organism_genome_dir)) logging.info("Copying {0} ({1}) into {2}".format(k, v, organism_genome_dir))
genome_fname = "v%s.fasta" % self.genome_version genome_fname = "{0}_v{1}.fasta".format(dataset_shortname, self.genome_version)
try: try:
shutil.copyfile(os.path.abspath(v), os.path.join(organism_genome_dir, genome_fname)) shutil.copyfile(os.path.abspath(v), os.path.join(organism_genome_dir, genome_fname))
except Exception as exc: except Exception as exc:
...@@ -137,19 +143,19 @@ class GetData(speciesData.SpeciesData): ...@@ -137,19 +143,19 @@ class GetData(speciesData.SpeciesData):
elif k in annotation_datasets: elif k in annotation_datasets:
dataset_fname = "" dataset_fname = ""
if k == "gff_path": if k == "gff_path":
dataset_fname = "OGS%s.gff" % self.ogs_version dataset_fname = "{0}_OGS{1}.gff".format(dataset_shortname, self.genome_version)
elif k == "transcripts_path": elif k == "transcripts_path":
dataset_fname = "OGS%s_transcripts.fasta" % self.ogs_version dataset_fname = "{0}_OGS{1}_transcripts.fasta".format(dataset_shortname, self.genome_version)
elif k == "proteins_path": elif k == "proteins_path":
dataset_fname = "OGS%s_proteins.fasta" % self.ogs_version dataset_fname = "{0}_OGS{1}_proteins.fasta".format(dataset_shortname, self.genome_version)
elif k == "orthofinder_path": elif k == "orthofinder_path":
dataset_fname = "OGS%s_orthofinder.tsv" % self.ogs_version dataset_fname = "{0}_OGS{1}_orthofinder.tsv".format(dataset_shortname, self.genome_version)
elif k == "interpro_path": elif k == "interpro_path":
dataset_fname = "OGS%s_interproscan.xml" % self.ogs_version dataset_fname = "{0}_OGS{1}_interproscan.xml".format(dataset_shortname, self.genome_version)
elif k == "blastp_path": elif k == "blastp_path":
dataset_fname = "OGS%s_blastp.xml" % self.ogs_version dataset_fname = "{0}_OGS{1}_blastp.xml".format(dataset_shortname, self.genome_version)
elif k == "blastx_path": elif k == "blastx_path":
dataset_fname = "OGS%s_blastx.xml" % self.ogs_version dataset_fname = "{0}_OGS{1}_blastx.xml".format(dataset_shortname, self.genome_version)
logging.info("Copying {0} ({1}) into {2}".format(k, v, organism_annotation_dir)) logging.info("Copying {0} ({1}) into {2}".format(k, v, organism_annotation_dir))
try: try:
shutil.copyfile(os.path.abspath(v), os.path.join(organism_annotation_dir, dataset_fname)) shutil.copyfile(os.path.abspath(v), os.path.join(organism_annotation_dir, dataset_fname))
......
This diff is collapsed.
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment