from bioblend import galaxy
import bioblend.galaxy.objects as bbo
import bioblend as bb
import argparse
import os
import sys
import subprocess
import json
import urllib3 as ul
from chado import ChadoInstance
from workflow import Workflow
from filetransfer import FileTransfer
from toolrunner import ToolRunner
from webscrap import WebScrap

"""
TODO: script description
python3 ~/PycharmProjects/ggauto/gga_load_data/main.py ~/PycharmProjects/ggauto/gga_load_data/dataloader.json

"""


class Autoload:
    """
    TODO: turn main into an object
    """
    def __init__(self, json_in):
        self.json_in = json_in

    def main(self):
        return None


def main():

    parser = argparse.ArgumentParser(description="Input genus, species, strain, version")
    parser.add_argument("json", type=str, help="Input JSON file")
    parser.add_argument("--just-load", help="Only load data into galaxy, does not create nor run analyses in galaxy")

    # CLI stuff
    # parser.add_argument("--name", help="Sample species name, format: genus-species",type=str)
    # parser.add_argument("--strain", help="Strain of the sample species", type=str)
    # parser.add_argument("--version", help="Data version (e.g 1.0, 1.2, ...)", type=str)
    # parser.add_argument("--common-name", help="Vernacular/common name of the species",type=str)

    user_arguments = parser.parse_args()

    # List that will hold all dicts from the JSON input file, containing parameters for each species
    sp_dict_list = []

    # JSON parsing and loading
    with open(user_arguments.json, 'r') as infile:
        json_sp_dict = json.load(infile)
        json_sp_dump = json.dumps(json_sp_dict, indent=4, sort_keys=True)
        for json_sp in json_sp_dict:
            sp_dict_list.append(json_sp)

    # Get variables from the current species dict
    for sp_dict in sp_dict_list:
        sp_params_list = []
        genus = sp_dict["genus"]
        genus_lower = genus[0].lower() + genus[1:]
        species = sp_dict["species"]
        genus_species = genus_lower + "_" + species
        common = sp_dict["common"]
        strain = sp_dict["strain"]
        if strain != "":
            genus_species_strain = genus_species + "_" + strain
        else:
            genus_species_strain = genus_species
        ogs_version = sp_dict["ogs version"]
        genome_version = sp_dict["genome version"]
        performed_by = sp_dict["performed by"]

        # CLI stuff
        # genus = user_arguments.name.split('-')[0]
        # genus_lower = genus[0].lower().genus[1:]
        # genus_upper = genus[0].upper() + genus[1:]
        # species = user_arguments.name.split('-')[1]
        # strain = user_arguments.strain
        # vernacular = user_arguments.common_name

        # TODO: prompt y/n asking for the validity of info
        # Test adress, change to abims-gga.sb-roscoff.fr/sp/ in production
        instance_url = "http://localhost/sp/" + genus_lower + "_" + species + "/galaxy/"

        print("Species: " + genus + " " + species + " (" + common + ")"
              "\nStrain: " + strain +
              "\nAccessing instance " + instance_url)

        # Connect to the galaxy instance of the current species TODO: API key connection issues
        gi = galaxy.GalaxyInstance(url=instance_url,
                                   key="3b36455cb16b4d0e4348e2c42f4bb934",
                                   email="alebars@sb-roscoff.fr",
                                   password="pouet",
                                   verify=True)

        """
        This part creates the current species directory and go to it
        If it already exists, just move to it
        To be expanded when docker-swarm is implemented (or all config files are ready), not useful for now
        """
        main_dir = os.getcwd() + "/"
        sp_dir = os.path.join(main_dir, genus_species) + "/"

        try:
            os.mkdir(sp_dir)
        except FileExistsError:
            print("Directory " + sp_dir + " already exists")
        try:
            os.chdir(sp_dir)
            wd = os.getcwd()
        except OSError:
            print("Cannot access " + sp_dir + ", run with higher privileges")
            break

        # Production instance example TODO: secure pswd and API key + manage API keys
        # gi = galaxy.GalaxyInstance(url="http://abims-gga.sb-roscoff.fr/sp/ectocarpus_species1/galaxy/",
        #                            key="84dfbee3c0efa9155518f01fbeff57c8",
        #                            email="gga@sb-roscoff.fr",
        #                            password="****")

        # Check connection to the current instance
        try:
            hl = gi.histories.get_histories()
        except bb.ConnectionError:
            print("Cannot connect to GGA instance @ " + instance_url)

        else:
            print("Successfully connected to instance " + instance_url)

        # TODO: FTP stuff to retrieve the datasets (used in testing, not needed for production)
        # try:
        #     os.mkdir("./src_data")
        # except FileExistsError:
        #     print("src_data folder already exists for " + genus_species_strain)
        # else:
        #     print("src_data folder created for " + genus_species_strain)
        #
        # TODO: load the data into the current species directory and load it into galaxy instance
        # setup_data_libraries_cl = \
        #     "docker-compose exec galaxy /tool_deps/_conda/bin/python /opt/setup_data_libraries.py"
        #
        # try:
        #     setup_data_libraries = subprocess.Popen(setup_data_libraries_cl.split(), stdout=subprocess.PIPE)
        #     # output message from the data loading script
        #     setup_data_libraries_output = setup_data_libraries.communicate()
        # except Exception:
        #     print("Cannot load data into container for " + genus_species_strain)
        #     break
        # else:
        #     print("Data successfully loaded into docker container for " + genus_species_strain)

        # generate workflow file and run it in the galaxy instance

        gi.histories.create_history(name=str(genus_species_strain + "_" + genome_version))
        hi = gi.histories.get_histories(name=str(genus_species_strain + "_" + genome_version))
        hi_id = hi[0]["id"]
        li = gi.libraries.get_libraries()  # only one library
        li_id = gi.libraries.get_libraries()[0]["id"]  # project data folder/library
        fo_gi = gi.libraries.get_folders(library_id=li_id)  # data location (project data)

        fo_id = {}
        current_fo_name = ""
        # folders ids: access to data to run the first tools
        for i in fo_gi:
            for k, v in i.items():
                if k == "name":
                    fo_id[v] = 0
                    current_fo_name = v
                if k == "id":
                    fo_id[current_fo_name] = v

        # TODO: turn data id parsing into a function
        print("Folders and datasets IDs: ")
        datasets = dict()
        for k, v in fo_id.items():
            print("\t" + k + ": " + v)
            if k == "/genome":
                sub_folder_content = gi.folders.show_folder(folder_id=v, contents=True)
                for k2, v2 in sub_folder_content.items():
                    for e in v2:
                        if type(e) == dict:
                            if e["name"].endswith(".fa"):
                                datasets["genome_file"] = e["ldda_id"]
                                print("\t\t" + e["name"] + ": " + e["ldda_id"])
            elif k == "/annotation/" + genus_species:
                sub_folder_content = gi.folders.show_folder(folder_id=v, contents=True)
                for k2, v2 in sub_folder_content.items():
                    for e in v2:
                        if type(e) == dict:
                            # TODO: manage several files of the same type
                            if e["name"].endswith("transcripts-gff.fa"):
                                datasets["transcripts_file"] = e["ldda_id"]
                                print("\t\t" + e["name"] + ": " + e["ldda_id"])
                            elif e["name"].endswith("proteins.fasta"):
                                datasets["proteins_file"] = e["ldda_id"]
                                print("\t\t" + e["name"] + ": " + e["ldda_id"])
                            elif e["name"].endswith(".gff"):
                                datasets["gff_file"] = e["ldda_id"]
                                print("\t\t" + e["name"] + ": " + e["ldda_id"])

        current_hi_id = gi.histories.get_current_history()["id"]
        print("History ID: " + current_hi_id)
        gi.histories.upload_dataset_from_library(history_id=current_hi_id, lib_dataset_id=datasets["genome_file"])
        gi.histories.upload_dataset_from_library(history_id=current_hi_id, lib_dataset_id=datasets["gff_file"])
        gi.histories.upload_dataset_from_library(history_id=current_hi_id, lib_dataset_id=datasets["transcripts_file"])
        gi.histories.upload_dataset_from_library(history_id=current_hi_id, lib_dataset_id=datasets["proteins_file"])

        # Workflow generation
        workflow = Workflow(parameters_dict=sp_dict, instance=gi, history_id = current_hi_id)
        wf_dict_json = workflow.generate(working_directory=wd, main_directory=main_dir, workflow_name="preset_workflow")

        tools = gi.tools.get_tool_panel()  # tools panel -> alternative to wf
        # print(tools)

        wf_dict = json.loads(wf_dict_json)  # doesn't work with eval()

        gi.workflows.import_workflow_dict(workflow_dict=wf_dict)
        wf_name = workflow.get_workflow_name()
        wf_attr = gi.workflows.get_workflows(name=wf_name)
        wf_id = wf_attr[0]["id"]
        wf_show = gi.workflows.show_workflow(workflow_id=wf_id)
        print("Workflow ID: " + wf_id)

        toolrunner = ToolRunner(parameters_dict=sp_dict, instance=gi, history=current_hi_id)
        # toolrunner.purge_organisms()

        # wf_o = bbo.Workflow(wf_dict=wf_dict, gi=gi)

        wf_params = workflow.set_main_workflow_parameters(datasets=datasets)
        print("Inputs:")
        print(wf_show["inputs"])

        datamap = dict()
        datamap["0"] = {"src": "hda", "id": datasets["genome_file"]}
        datamap["1"] = {"src": "hda", "id": datasets["gff_file"]}
        datamap["2"] = {"src": "hda", "id": datasets["proteins_file"]}
        datamap["3"] = {"src": "hda", "id": datasets["transcripts_file"]}
        #
        # gi.workflows.invoke_workflow(workflow_id=wf_id,
        #                              history_id=current_hi_id,
        #                              params=wf_params,
        #                              inputs=datamap)
        #
        gi.workflows.delete_workflow(workflow_id=wf_id)

        datamap = dict()
        datamap["0"] = {"src": "hda", "id": datasets["genome_file"]}
        datamap["1"] = {"src": "hda", "id": datasets["proteins_file"]}

        wf_dict_json = workflow.generate(working_directory=wd, main_directory=main_dir, workflow_name="jbrowse")
        wf_dict = json.loads(wf_dict_json)  # doesn't work with eval()

        gi.workflows.import_workflow_dict(workflow_dict=wf_dict)
        wf_attr = gi.workflows.get_workflows(name="jbrowse")
        wf_id = wf_attr[0]["id"]
        wf_show = gi.workflows.show_workflow(workflow_id=wf_id)
        print("Jbrowse workflow ID: " + wf_id)
        wf_params = workflow.set_jbrowse_workflow_parameters()

        gi.workflows.invoke_workflow(workflow_id=wf_id,
                                     history_id=current_hi_id,
                                     params=wf_params,
                                     inputs=datamap)
        gi.workflows.delete_workflow(workflow_id=wf_id)

        # remove active instance history for testing, purge configured @ ~/config/galaxy.yml.docker_sample
        # gi.histories.delete_history(history_id=current_hi_id, purge=True)

        os.chdir(main_dir)
        print("\n")


if __name__ == "__main__":
    main()