#!/usr/bin/python # -*- coding: utf-8 -*- import argparse import os import logging import sys import json import yaml import re from datetime import datetime """ create_input_instance.py Create an object containing the data input from the yml file as attributes, which is then fed to the other scripts This object is created using the data in the input yml file """ def parse_input(input_file): """ Parse the yml input file to extract data to create the SpeciesData objects Return a list of dictionaries. Each dictionary contains data tied to a species :param input_file: :return: """ parsed_sp_dict_list = [] if str(input_file).endswith("yml") or str(input_file).endswith("yaml"): logging.debug("Input format used: YAML") else: logging.critical("Error, please input a YAML file") sys.exit() with open(input_file, 'r') as stream: try: yaml_dict = yaml.safe_load(stream) for k, v in yaml_dict.items(): if k == "config": pass parsed_sp_dict_list.append(v) except yaml.YAMLError: logging.critical("YAMLError raised") sys.exit() return parsed_sp_dict_list def parse_args(): parser = argparse.ArgumentParser(description="Automatic data loading in containers and interaction " "with galaxy instances for GGA" ", following the protocol @ " "http://gitlab.sb-roscoff.fr/abims/e-infra/gga") parser.add_argument("-i", "--input", help="Input file (yml)") parser.add_argument("-v", "--verbose", help="Increase output verbosity", action="store_false") parser.add_argument("--deploy-stacks", help="Create and deploy the stacks of services", action="store_true") parser.add_argument("--load-data", help="Create src_data directory tree, copy datasets to src_data, and load these datasets " "into the instance, DEV", action="store_true") parser.add_argument("--run-workflow", help="Run main workflow (load data into chado, sync all with tripal, " "index tripal data, populate materialized view, " "create a jbrowse for the current genus_species_strain_sex and add organism to jbrowse") args = parser.parse_args() return args class SpeciesData: """ This class contains attributes and functions to interact with the galaxy container of the GGA environment Parent class of LoadData, DeploySpeciesStack and RunWorkflow """ def __init__(self, parameters_dictionary): self.parameters_dictionary = parameters_dictionary self.args = parse_args() # Not a good design self.species = parameters_dictionary["description"]["species"] self.genus = parameters_dictionary["description"]["genus"] self.strain = parameters_dictionary["description"]["strain"] self.sex = parameters_dictionary["description"]["sex"] self.common = parameters_dictionary["description"]["common_name"] self.date = datetime.today().strftime("%Y-%m-%d") self.origin = parameters_dictionary["description"]["origin"] self.performed = parameters_dictionary["data"]["performed_by"] if parameters_dictionary["data"]["genome_version"] == "": self.genome_version = "1.0" else: self.genome_version = parameters_dictionary["data"]["genome_version"] if parameters_dictionary["data"]["ogs_version"] == "": self.ogs_version = "1.0" else: self.ogs_version = parameters_dictionary["data"]["ogs_version"] self.genus_lowercase = self.genus[0].lower() + self.genus[1:] self.genus_uppercase = self.genus[0].upper() + self.genus[1:] self.species_folder_name = "_".join([self.genus_lowercase, self.species, self.strain, self.sex]) self.full_name = " ".join([self.genus_uppercase, self.species, self.strain, self.sex]) self.abbreviation = " ".join([self.genus_lowercase[0], self.species, self.strain, self.sex]) self.genus_species = self.genus_lowercase + "_" + self.species self.instance_url = "http://scratchgmodv1:8888/sp/" + self.genus_lowercase + "_" + self.species + "/galaxy/" # Testing with localhost/scratchgmodv1 self.instance = None self.history_id = None self.library_id = None self.script_dir = os.path.dirname(os.path.realpath(sys.argv[0])) self.main_dir = None self.species_dir = None self.org_id = None self.genome_analysis_id = None self.ogs_analysis_id = None self.tool_panel = None self.datasets = dict() self.source_files = dict() self.workflow_name = None self.docker_compose_generator = None self.metadata = dict() self.api_key = "dev" # TODO: set the key in config file --> saved for later (master api key access actions are limited) if parameters_dictionary["data"]["parent_directory"] == "" or parameters_dictionary["data"]["parent_directory"] == "/path/to/closest/parent/dir": self.source_data_dir = "/projet/sbr/phaeoexplorer/" # Testing path for phaeoexplorer data else: self.source_data_dir = parameters_dictionary["data"]["parent_directory"] # Directory/subdirectories where data files are located (fasta, gff, ...), point to a directory as close as possible to the source files self.do_update = False # Update the instance (in histories corresponding to the input) instead of creating a new one // TODO: move this variable inside methods self.api_key = "dev" # API key used to communicate with the galaxy instance. Set to "dev" for the moment. Cannot be used to do user-tied actions self.args = args if self.args.verbose: logging.basicConfig(level=logging.DEBUG) else: logging.basicConfig(level=logging.INFO)