From 48cdd5a77e1a40cc23c40f931f49743475338d18 Mon Sep 17 00:00:00 2001
From: Arthur Le Bars <arthur.le-bars@sb-roscoff.fr>
Date: Thu, 9 Apr 2020 16:41:08 +0200
Subject: [PATCH] Delete main.py

---
 main.py | 352 --------------------------------------------------------
 1 file changed, 352 deletions(-)
 delete mode 100644 main.py

diff --git a/main.py b/main.py
deleted file mode 100644
index faf1140..0000000
--- a/main.py
+++ /dev/null
@@ -1,352 +0,0 @@
-from bioblend import galaxy
-import bioblend.galaxy.objects as bbo
-import bioblend as bb
-import argparse
-import os
-import sys
-import subprocess
-import logging
-import re
-import json
-from workflow import Workflow
-from toolrunner import ToolRunner
-"""
-TODO: script description
-python3 ~/PycharmProjects/ggauto/gga_load_data/main.py ~/PycharmProjects/ggauto/gga_load_data/dataloader.json
-
-"""
-
-
-def main():
-
-    parser = argparse.ArgumentParser(description="Input genus, species, strain, version")
-    parser.add_argument("json", type=str, help="Input JSON file")
-    parser.add_argument("--just-load", help="Only load data into galaxy, does not create nor run analyses in galaxy")
-    parser.add_argument("-v", "--verbose", help="Increase output verbosity")
-
-    # CLI stuff
-    # parser.add_argument("--name", help="Sample species name, format: genus-species",type=str)
-    # parser.add_argument("--strain", help="Strain of the sample species", type=str)
-    # parser.add_argument("--version", help="Data version (e.g 1.0, 1.2, ...)", type=str)
-    # parser.add_argument("--common-name", help="Vernacular/common name of the species",type=str)
-
-    user_arguments = parser.parse_args()
-
-    logging.basicConfig(level=logging.INFO)
-
-    # List that will hold all dicts from the JSON input file, containing parameters for each species
-    sp_dict_list = []
-
-    # JSON parsing and loading
-    with open(user_arguments.json, 'r') as infile:
-        json_sp_dict = json.load(infile)
-        json_sp_dump = json.dumps(json_sp_dict, indent=4, sort_keys=True)
-        for json_sp in json_sp_dict:
-            sp_dict_list.append(json_sp)
-
-    # Get variables from the current species dict
-    for sp_dict in sp_dict_list:
-        sp_params_list = []
-        genus = sp_dict["genus"]
-        genus_lower = genus[0].lower() + genus[1:]
-        species = sp_dict["species"]
-        genus_species = genus_lower + "_" + species
-        common = sp_dict["common"]
-        strain = sp_dict["strain"]
-        sex = sp_dict["sex"]
-        if strain != "":
-            genus_species_strain = genus_species + "_" + strain
-        else:
-            genus_species_strain = genus_species
-        ogs_version = sp_dict["ogs version"]
-        genome_version = sp_dict["genome version"]
-        performed_by = sp_dict["performed by"]
-
-        # CLI stuff
-        # genus = user_arguments.name.split('-')[0]
-        # genus_lower = genus[0].lower().genus[1:]
-        # genus_upper = genus[0].upper() + genus[1:]
-        # species = user_arguments.name.split('-')[1]
-        # strain = user_arguments.strain
-        # vernacular = user_arguments.common_name
-
-        # TODO: prompt y/n asking for the validity of info
-        # Test adress, change to abims-gga.sb-roscoff.fr/sp/ in production
-        instance_url = "http://localhost/sp/" + genus_lower + "_" + species + "/galaxy/"
-
-        print("Species: " + genus + " " + species + " (" + common + ")" +
-                     "\nStrain: " + strain +
-                     "\nAccessing instance " + instance_url)
-
-        # Connect to the galaxy instance of the current species TODO: connection issues (galaxy side)
-        gi = galaxy.GalaxyInstance(url=instance_url,
-                                   key="3b36455cb16b4d0e4348e2c42f4bb934",
-                                   email="alebars@sb-roscoff.fr",
-                                   password="pouet",
-                                   verify=True)
-
-        # admin_email = os.environ.get('GALAXY_DEFAULT_ADMIN_USER', 'admin@galaxy.org')
-        # admin_pass = os.environ.get('GALAXY_DEFAULT_ADMIN_PASSWORD', 'admin')
-
-        """
-        This part creates the current species directory and go to it
-        If it already exists, just move to it
-        To be expanded when docker-swarm is implemented (or all config files are ready), not useful for now
-        """
-        main_dir = os.getcwd() + "/"
-        sp_dir = os.path.join(main_dir, genus_species) + "/"
-
-        try:
-            os.mkdir(sp_dir)
-        except FileExistsError:
-            print("Directory " + sp_dir + " already exists")
-        try:
-            os.chdir(sp_dir)
-            wd = os.getcwd()
-        except OSError:
-            print("Cannot access " + sp_dir + ", run with higher privileges")
-            break
-
-        # Production instance example TODO: secure pswd and API key + manage API keys
-        # gi = galaxy.GalaxyInstance(url="http://abims-gga.sb-roscoff.fr/sp/ectocarpus_species1/galaxy/",
-        #                            key="84dfbee3c0efa9155518f01fbeff57c8",
-        #                            email="gga@sb-roscoff.fr",
-        #                            password="****")
-
-        # Check connection to the current instance
-        print("Testing connection to the galaxy instance")
-        try:
-            hl = gi.histories.get_histories()
-        except bb.ConnectionError:
-            print("Cannot connect to galaxy instance @ " + instance_url)
-
-        else:
-            print("Successfully connected to galaxy instance @ " + instance_url)
-
-        # TODO: FTP/symlink stuff to retrieve the datasets + change headers in pep.fasta
-
-        # ---------------------------------------------------------------------
-        # src_data directory tree creation
-        # ---------------------------------------------------------------------
-
-        src_data_folders = ["annotation", "genome"]
-        species_folder_name = "_".join([genus_lower, species, strain, sex])
-        try:
-            os.mkdir("./src_data")
-            os.mkdir("./src_data/annotation")
-            os.mkdir("./src_data/genome")
-            os.mkdir("./src_data/annotation/" + species_folder_name)
-            os.mkdir("./src_data/genome/" + species_folder_name)
-        except FileExistsError:
-            print("src_data directory tree already exists")
-            pass
-        except PermissionError:
-            print("Insufficient permission to create src_data directory tree")
-
-        # ---------------------------------------------------------------------
-        # Data import into galaxy
-        # ---------------------------------------------------------------------
-
-        source_files = dict()
-        annotation_dir, genome_dir = None, None
-        for d in [i[0] for i in os.walk(os.getcwd() + "/src_data")]:
-            if "annotation/" in d:
-                annotation_dir = d
-                for f in os.listdir(d):
-                    if f.endswith("proteins.fasta"):
-                        source_files["proteins_file"] = os.path.join(d, f)
-                    elif f.endswith("transcripts-gff.fa"):
-                        source_files["transcripts_file"] = os.path.join(d, f)
-                    elif f.endswith(".gff"):
-                        source_files["gff_file"] = os.path.join(d, f)
-                # annotation_dir_files = [f for f in os.listdir(d) if os.path.isfile(os.path.join(d, f))]
-            elif "genome/" in d:
-                genome_dir = d
-                for f in os.listdir(d):
-                    if f.endswith(".fa"):
-                        source_files["genome_file"] = os.path.join(d, f)
-                # genome_dir_files = [f for f in os.listdir(d) if os.path.isfile(os.path.join(d, f))]
-                print("Source files found:")
-        for k, v in source_files.items():
-            print("\t" + k + "\t" + v)
-
-        # Changing headers in the *proteins.fasta file from >mRNA* to >protein*
-        # production version
-        modify_pep_headers = ["/usr/local/genome2/mmo/scripts/phaeoexplorer/phaeoexplorer-change_pep_fasta_header.sh",
-                              source_files["proteins_file"]]
-        # test version
-        modify_pep_headers = ["/home/alebars/gga/phaeoexplorer-change_pep_fasta_header.sh",
-                              source_files["proteins_file"]]
-        print("Changing fasta headers in " + source_files["proteins_file"])
-        subprocess.run(modify_pep_headers, stdout=subprocess.PIPE, cwd=annotation_dir)
-
-        # src_data cleaning
-        if os.path.exists(annotation_dir + "outfile"):
-            subprocess.run(["mv", annotation_dir + "/outfile", source_files["proteins_file"]],
-                           stdout=subprocess.PIPE,
-                           cwd=annotation_dir)
-        if os.path.exists(annotation_dir + "gmon.out"):
-            subprocess.run(["rm", annotation_dir + "/gmon.out"],
-                           stdout=subprocess.PIPE,
-                           cwd=annotation_dir)
-
-        # TODO: load the data into the current species directory and load it into galaxy instance
-        setup_data_libraries = "docker-compose exec galaxy /tool_deps/_conda/bin/python /opt/setup_data_libraries.py"
-        try:
-            print("Loading data into the galaxy container")
-            subprocess.run(setup_data_libraries,
-                           stdout=subprocess.PIPE,
-                           shell=True)
-        except subprocess.CalledProcessError:
-            print("Cannot load data into container for " + genus_species_strain)
-            break
-        else:
-            print("Data successfully loaded into docker container for " + genus_species_strain)
-
-        # generate workflow file and run it in the galaxy instance
-
-        # gi.histories.create_history(name=str(genus_species_strain + "_" + genome_version))
-        hi = gi.histories.get_histories(name=str(genus_species_strain + "_" + genome_version))
-        hi_id = hi[0]["id"]
-        li = gi.libraries.get_libraries()  # only one library
-        li_id = gi.libraries.get_libraries()[0]["id"]  # project data folder/library
-        fo_gi = gi.libraries.get_folders(library_id=li_id)  # data location (project data)
-
-        fo_id = {}
-        current_fo_name = ""
-        # folders ids: access to data to run the first tools
-        for i in fo_gi:
-            for k, v in i.items():
-                if k == "name":
-                    fo_id[v] = 0
-                    current_fo_name = v
-                if k == "id":
-                    fo_id[current_fo_name] = v
-        print("Folders and datasets IDs: ")
-        datasets = dict()
-        for k, v in fo_id.items():
-            print("\t" + k + ": " + v)
-            if k == "/genome":
-                sub_folder_content = gi.folders.show_folder(folder_id=v, contents=True)
-                for k2, v2 in sub_folder_content.items():
-                    for e in v2:
-                        if type(e) == dict:
-                            if e["name"].endswith(".fa"):
-                                datasets["genome_file"] = e["ldda_id"]
-                                print("\t\t" + e["name"] + ": " + e["ldda_id"])
-            elif k == "/annotation/" + genus_species:
-                sub_folder_content = gi.folders.show_folder(folder_id=v, contents=True)
-                for k2, v2 in sub_folder_content.items():
-                    for e in v2:
-                        if type(e) == dict:
-                            # TODO: manage several files of the same type and versions
-                            if e["name"].endswith("transcripts-gff.fa"):
-                                datasets["transcripts_file"] = e["ldda_id"]
-                                print("\t\t" + e["name"] + ": " + e["ldda_id"])
-                            elif e["name"].endswith("proteins.fasta"):
-                                datasets["proteins_file"] = e["ldda_id"]
-                                print("\t\t" + e["name"] + ": " + e["ldda_id"])
-                            elif e["name"].endswith(".gff"):
-                                datasets["gff_file"] = e["ldda_id"]
-                                print("\t\t" + e["name"] + ": " + e["ldda_id"])
-                            elif e["name"].endswith("MALE"):
-                                datasets["gff_file"] = e["ldda_id"]
-                                print("\t\t" + e["name"] + ": " + e["ldda_id"])
-
-        current_hi_id = gi.histories.get_current_history()["id"]
-        print("History ID: " + current_hi_id)
-        gi.histories.upload_dataset_from_library(history_id=current_hi_id, lib_dataset_id=datasets["genome_file"])
-        gi.histories.upload_dataset_from_library(history_id=current_hi_id, lib_dataset_id=datasets["gff_file"])
-        gi.histories.upload_dataset_from_library(history_id=current_hi_id, lib_dataset_id=datasets["transcripts_file"])
-        gi.histories.upload_dataset_from_library(history_id=current_hi_id, lib_dataset_id=datasets["proteins_file"])
-
-        toolrunner = ToolRunner(parameters_dict=sp_dict, instance=gi, history=current_hi_id)
-        # toolrunner.show_pannel()  # show tools pannel (with tool_id and versions)
-
-        # ---------------------------------------------------------------------
-        # Galaxy instance interaction
-        # ---------------------------------------------------------------------
-
-        # # Delete Homo sapiens from Chado database
-        # toolrunner = ToolRunner(parameters_dict=sp_dict, instance=gi, history=current_hi_id)
-        # sapiens = toolrunner.get_sapiens_id()
-        # sapiens_job_out = sapiens["outputs"][0]["id"]
-        # sapiens_json_output = gi.datasets.download_dataset(dataset_id=sapiens_job_out)
-        # try:
-        #     sapiens_output = json.loads(sapiens_json_output)[0]
-        #     sapiens_id = str(sapiens_output["organism_id"])  # needs to be str to be recognized by the chado tool
-        #     toolrunner.delete_sapiens(hs_id=sapiens_id)
-        # except bb.ConnectionError:
-        #     print("Homo sapiens isn't in the database")
-        # except IndexError:
-        #     pass
-        #
-        # # Workflow generation
-        workflow = Workflow(parameters_dict=sp_dict, instance=gi, history_id = current_hi_id)
-        # wf_dict_json = workflow.generate(working_directory=wd, main_directory=main_dir, workflow_name="preset_workflow")
-        #
-        # tools = gi.tools.get_tool_panel()  # tools panel -> alternative to wf
-        # # print(tools)
-        #
-        # wf_dict = json.loads(wf_dict_json)  # doesn't work with eval()
-        #
-        # gi.workflows.import_workflow_dict(workflow_dict=wf_dict)
-        # wf_name = workflow.get_workflow_name()
-        # wf_attr = gi.workflows.get_workflows(name=wf_name)
-        # wf_id = wf_attr[0]["id"]
-        # wf_show = gi.workflows.show_workflow(workflow_id=wf_id)
-        # print("Workflow ID: " + wf_id)
-        #
-        # toolrunner = ToolRunner(parameters_dict=sp_dict, instance=gi, history=current_hi_id)
-        # # toolrunner.purge_organisms()
-        #
-        # # wf_o = bbo.Workflow(wf_dict=wf_dict, gi=gi)
-        #
-        # wf_params = workflow.set_main_workflow_parameters(datasets=datasets)
-        # # print("Inputs:")
-        # # print(wf_show["inputs"])
-        #
-        # datamap = dict()
-        # datamap["0"] = {"src": "hda", "id": datasets["genome_file"]}
-        # datamap["1"] = {"src": "hda", "id": datasets["gff_file"]}
-        # datamap["2"] = {"src": "hda", "id": datasets["proteins_file"]}
-        # datamap["3"] = {"src": "hda", "id": datasets["transcripts_file"]}
-        #
-        # gi.workflows.invoke_workflow(workflow_id=wf_id,
-        #                              history_id=current_hi_id,
-        #                              params=wf_params,
-        #                              inputs=datamap)
-        # gi.workflows.delete_workflow(workflow_id=wf_id)
-        #
-        # datamap = dict()
-        # datamap["0"] = {"src": "hda", "id": datasets["genome_file"]}
-        # datamap["1"] = {"src": "hda", "id": datasets["proteins_file"]}
-        #
-        wf_dict_json = workflow.generate(working_directory=wd, main_directory=main_dir, workflow_name="main")
-        wf_dict = json.loads(wf_dict_json)  # doesn't work with eval()
-        #
-        # gi.workflows.import_workflow_dict(workflow_dict=wf_dict)
-        # wf_attr = gi.workflows.get_workflows(name="jbrowse")
-        # wf_id = wf_attr[0]["id"]
-        # wf_show = gi.workflows.show_workflow(workflow_id=wf_id)
-        # print("Jbrowse workflow ID: " + wf_id)
-        # wf_params = workflow.set_jbrowse_workflow_parameters()
-        #
-        # allow_tool_state_correction makes galaxy fill missing tool states,
-        # because workflow was edited outside of galaxy with only some inputs (precaution parameter)
-        # gi.workflows.invoke_workflow(workflow_id=wf_id,
-        #                              history_id=current_hi_id,
-        #                              params=wf_params,
-        #                              inputs=datamap,
-        #                              allow_tool_state_corrections=True)
-        # gi.workflows.delete_workflow(workflow_id=wf_id)
-
-        # remove active instance history for testing, purge configured @ ~/config/galaxy.yml.docker_sample
-        # gi.histories.delete_history(history_id=current_hi_id, purge=True)
-
-        os.chdir(main_dir)
-        print("\n")
-
-
-if __name__ == "__main__":
-    main()
-- 
GitLab