#!/usr/bin/python
# -*- coding: utf-8 -*-


import bioblend
import bioblend.galaxy.objects
from bioblend import galaxy
import argparse
import os
import subprocess
import logging
import sys
import utilities
import speciesData
from datetime import datetime



""" 
gga_load_data.py

Usage: $ python3 gga_init.py -i example.yml [OPTIONS]
"""



class LoadData(speciesData.SpeciesData):
    """
    Load data from the src_data subfolders into the galaxy instance's history of a given species

    """

    def modify_fasta_headers(self):
        """
        Change the fasta headers before integration.

        :return:
        """

        try:
            os.chdir(self.species_dir)
            working_dir = os.getcwd()
        except OSError:
            logging.info("Cannot access " + self.species_dir + ", run with higher privileges")
            logging.info("Fatal error: exit")
            sys.exit()
        self.source_files = dict()
        annotation_dir, genome_dir = None, None
        for d in [i[0] for i in os.walk(os.getcwd() + "/src_data")]:
            if "annotation/" in d:
                annotation_dir = d
                for f in os.listdir(d):
                    if f.endswith("proteins.fasta"):
                        self.source_files["proteins_file"] = os.path.join(d, f)
                    elif f.endswith("transcripts-gff.fa"):
                        self.source_files["transcripts_file"] = os.path.join(d, f)
                    elif f.endswith(".gff"):
                        self.source_files["gff_file"] = os.path.join(d, f)
            elif "genome/" in d:
                genome_dir = d
                for f in os.listdir(d):
                    if f.endswith(".fa"):
                        self.source_files["genome_file"] = os.path.join(d, f)
                logging.debug("source files found:")
        for k, v in self.source_files.items():
            logging.debug("\t" + k + "\t" + v)

        # Changing headers in the *proteins.fasta file from >mRNA* to >protein*
        # production version
        modify_pep_headers = [str(self.main_dir) + "/gga_load_data/utils/phaeoexplorer-change_pep_fasta_header.sh",
                              self.source_files["proteins_file"]]
        # test version
        # modify_pep_headers = ["/home/alebars/gga/phaeoexplorer-change_pep_fasta_header.sh",
                              # self.source_files["proteins_file"]]
        logging.info("Changing fasta headers: " + self.source_files["proteins_file"])
        subprocess.run(modify_pep_headers, stdout=subprocess.PIPE, cwd=annotation_dir)
        # production version
        modify_pep_headers = [str(self.main_dir) + "/gga_load_data/utils/phaeoexplorer-change_transcript_fasta_header.sh",
                              self.source_files["proteins_file"]]
        # test version
        # modify_pep_headers = ["/home/alebars/gga/phaeoexplorer-change_transcript_fasta_header.sh",
        #                       self.source_files["proteins_file"]]
        logging.info("Changing fasta headers: " + self.source_files["transcripts_file"])
        subprocess.run(modify_pep_headers, stdout=subprocess.PIPE, cwd=annotation_dir)

        # src_data cleaning
        if os.path.exists(annotation_dir + "outfile"):
            subprocess.run(["mv", annotation_dir + "/outfile", self.source_files["proteins_file"]],
                           stdout=subprocess.PIPE,
                           cwd=annotation_dir)
        if os.path.exists(annotation_dir + "gmon.out"):
            subprocess.run(["rm", annotation_dir + "/gmon.out"],
                           stdout=subprocess.PIPE,
                           cwd=annotation_dir)


    def setup_data_libraries(self):
        """
        - generate blast banks and docker-compose
        - load data into the galaxy container with the galaxy_data_libs_SI.py script

        :return:
        """

        try:
            logging.info("Loading data into the galaxy container")
            subprocess.run("../serexec genus_species_galaxy /tool_deps/_conda/bin/python /opt/galaxy_data_libs_SI.py", shell=True)
        except subprocess.CalledProcessError:
            logging.info("Cannot load data into the galaxy container for " + self.full_name)
            pass
        else:
            logging.info("Data successfully loaded into the galaxy container for " + self.full_name)

        self.get_species_history_id()
        # self.get_instance_attributes()
        #
        # # import all datasets into current history
        # self.instance.histories.upload_dataset_from_library(history_id=self.history_id, lib_dataset_id=self.datasets["genome_file"])
        # self.instance.histories.upload_dataset_from_library(history_id=self.history_id, lib_dataset_id=self.datasets["gff_file"])
        # self.instance.histories.upload_dataset_from_library(history_id=self.history_id, lib_dataset_id=self.datasets["transcripts_file"])
        # self.instance.histories.upload_dataset_from_library(history_id=self.history_id, lib_dataset_id=self.datasets["proteins_file"])



    def generate_blast_banks(self):
        """
        Automatically generate blast banks for a species and commit

        :return:
        """


    def connect_to_instance(self):
        """
        Test the connection to the galaxy instance for the current organism
        Exit if it cannot connect to the instance
        """
        self.instance = galaxy.GalaxyInstance(url=self.instance_url, email="gga@sb-roscoff.fr", password="password",
                                              verify=False)
        logging.info("Connecting to the galaxy instance ...")
        try:
            self.instance.histories.get_histories()
            self.tool_panel = self.instance.tools.get_tool_panel()
        except bioblend.ConnectionError:
            logging.critical("Cannot connect to galaxy instance @ " + self.instance_url)
            sys.exit()
        else:
            logging.info("Successfully connected to galaxy instance @ " + self.instance_url)
        self.instance.histories.create_history(name="FOO")


if __name__ == "__main__":
    parser = argparse.ArgumentParser(description="Automatic data loading in containers and interaction "
                                                 "with galaxy instances for GGA"
                                                 ", following the protocol @ "
                                                 "http://gitlab.sb-roscoff.fr/abims/e-infra/gga")

    parser.add_argument("input",
                        type=str,
                        help="Input file (yml)")

    parser.add_argument("-v", "--verbose",
                        help="Increase output verbosity",
                        action="store_false")

    args = parser.parse_args()

    if args.verbose:
        logging.basicConfig(level=logging.DEBUG)
    else:
        logging.basicConfig(level=logging.INFO)

    logging.info("Load data: start")
    sp_dict_list = utilities.parse_input(args.input)

    for sp_dict in sp_dict_list:
        o = LoadData(parameters_dictionary=sp_dict)
        o.main_dir = os.path.abspath(args.dir)

        o.modify_fasta_headers()
        logging.info("Successfully formatted files headers " + o.genus[0].upper() + ". " + o.species + " " + o.strain + " " + o.sex)

        # o.setup_data_libraries()
        # logging.info("Successfully set up data libraries in galaxy for " + o.genus[0].upper() + ". " + o.species + " " + o.strain + " " + o.sex)

    logging.info("Load data: done")