From abdd1b5de9f167ce6fbe6147d5f3f0397e4b243f Mon Sep 17 00:00:00 2001
From: Arthur Le Bars <arthur.le-bars@sb-roscoff.fr>
Date: Fri, 13 Mar 2020 17:21:18 +0100
Subject: [PATCH] fix for class attributes

---
 autoload.py | 325 ++++++++++++++++++++++++++++++++++------------------
 1 file changed, 211 insertions(+), 114 deletions(-)

diff --git a/autoload.py b/autoload.py
index 346d51e..82779cd 100644
--- a/autoload.py
+++ b/autoload.py
@@ -1,4 +1,5 @@
 from bioblend import galaxy
+import bioblend.galaxy.objects
 import bioblend
 import argparse
 import os
@@ -21,10 +22,12 @@ class Autoload:
 
     To run the workflows, place them in the same directory as this script, and add the method + the workflow
     parameters in the main invocation (at the end of the file)
+    TODO: store workflow as string in
     """
 
-    def __init__(self, species_parameters_dictionary: dict):
+    def __init__(self, species_parameters_dictionary: dict, args):
         self.species_parameters_dictionary = species_parameters_dictionary
+        self.args = args
         self.species = species_parameters_dictionary["species"]
         self.genus = species_parameters_dictionary["genus"]
         self.strain = species_parameters_dictionary["strain"]
@@ -38,7 +41,7 @@ class Autoload:
         self.full_name = " ".join([self.genus_lowercase, self.species, self.strain, self.sex])
         self.abbreviation = " ".join([self.genus_lowercase[0], self.species, self.strain, self.sex])
         self.genus_species = self.genus_lowercase + "_" + self.species
-        self.instance_url = "http://localhost/sp/" + self.genus_lowercase + "_" + self.species + "/galaxy/"
+        self.instance_url = "http://localhost/sp/" + self.genus_lowercase + "_" + self.species + "/galaxy/"  # testing
         self.instance: galaxy = None
         self.history_id = None
         self.library_id = None
@@ -48,6 +51,9 @@ class Autoload:
         self.genome_analysis_id = None
         self.ogs_analysis_id = None
         self.tool_panel = None
+        self.datasets = dict()
+        self.source_files = dict()
+        self.workflow_name = None
 
         # Test the connection to the galaxy instance for the current species
         # Additionally set some class attributes
@@ -70,7 +76,7 @@ class Autoload:
         self.main_dir = os.getcwd() + "/"
         self.species_dir = os.path.join(self.main_dir, self.genus_species) + "/"
 
-    def load_data_in_galaxy(self, method):
+    def load_data_in_galaxy(self):
         """
         - create the src_data directory tree for the species
         - change headers for pep file
@@ -105,40 +111,40 @@ class Autoload:
             logging.debug("insufficient permission to create src_data directory tree")
 
         # Data import into galaxy
-        source_files = dict()
+        self.source_files = dict()
         annotation_dir, genome_dir = None, None
         for d in [i[0] for i in os.walk(os.getcwd() + "/src_data")]:
             if "annotation/" in d:
                 annotation_dir = d
                 for f in os.listdir(d):
                     if f.endswith("proteins.fasta"):
-                        source_files["proteins_file"] = os.path.join(d, f)
+                        self.source_files["proteins_file"] = os.path.join(d, f)
                     elif f.endswith("transcripts-gff.fa"):
-                        source_files["transcripts_file"] = os.path.join(d, f)
+                        self.source_files["transcripts_file"] = os.path.join(d, f)
                     elif f.endswith(".gff"):
-                        source_files["gff_file"] = os.path.join(d, f)
+                        self.source_files["gff_file"] = os.path.join(d, f)
             elif "genome/" in d:
                 genome_dir = d
                 for f in os.listdir(d):
                     if f.endswith(".fa"):
-                        source_files["genome_file"] = os.path.join(d, f)
+                        self.source_files["genome_file"] = os.path.join(d, f)
                 logging.debug("source files found:")
-        for k, v in source_files.items():
+        for k, v in self.source_files.items():
             logging.debug("\t" + k + "\t" + v)
 
         # Changing headers in the *proteins.fasta file from >mRNA* to >protein*
         # production version
         modify_pep_headers = ["/usr/local/genome2/mmo/scripts/phaeoexplorer/phaeoexplorer-change_pep_fasta_header.sh",
-                              source_files["proteins_file"]]
+                              self.source_files["proteins_file"]]
         # test version
         modify_pep_headers = ["/home/alebars/gga/phaeoexplorer-change_pep_fasta_header.sh",
-                              source_files["proteins_file"]]
-        logging.info("changing fasta headers in " + source_files["proteins_file"])
+                              self.source_files["proteins_file"]]
+        logging.info("changing fasta headers in " + self.source_files["proteins_file"])
         subprocess.run(modify_pep_headers, stdout=subprocess.PIPE, cwd=annotation_dir)
 
         # src_data cleaning
         if os.path.exists(annotation_dir + "outfile"):
-            subprocess.run(["mv", annotation_dir + "/outfile", source_files["proteins_file"]],
+            subprocess.run(["mv", annotation_dir + "/outfile", self.source_files["proteins_file"]],
                            stdout=subprocess.PIPE,
                            cwd=annotation_dir)
         if os.path.exists(annotation_dir + "gmon.out"):
@@ -158,25 +164,43 @@ class Autoload:
         else:
             logging.info("data successfully loaded into docker container for " + self.full_name)
 
-        # gi.histories.create_history(name=str(genus_species_strain + "_" + genome_version))
-        histories = self.instance.histories.get_histories(name=str(self.full_name + "_" + self.genome_version))
+        self.get_instance_attributes()
+        # self.history_id = self.instance.histories.get_current_history()["id"]
+
+        # import all datasets into current history
+        self.instance.histories.upload_dataset_from_library(history_id=self.history_id, lib_dataset_id=self.datasets["genome_file"])
+        self.instance.histories.upload_dataset_from_library(history_id=self.history_id, lib_dataset_id=self.datasets["gff_file"])
+        self.instance.histories.upload_dataset_from_library(history_id=self.history_id, lib_dataset_id=self.datasets["transcripts_file"])
+        self.instance.histories.upload_dataset_from_library(history_id=self.history_id, lib_dataset_id=self.datasets["proteins_file"])
+
+    def get_instance_attributes(self):
+        """
+        retrieves a pre-existing instance attributes:
+        - working history ID
+        - libraries ID
+        -
+
+        :return:
+        """
+        histories = self.instance.histories.get_histories(name=str(self.full_name))
         self.history_id = histories[0]["id"]
+        logging.debug("history ID: " + self.history_id)
         libraries = self.instance.libraries.get_libraries()  # normally only one library
         self.library_id = self.instance.libraries.get_libraries()[0]["id"]  # project data folder/library
+        logging.debug("library ID: " + self.history_id)
         instance_source_data_folders = self.instance.libraries.get_folders(library_id=self.library_id)
 
         folders_ids = {}
-        current_fo_name = ""
-        # folders ids: access to data to run the first tools
+        current_folder_name = ""
         for i in instance_source_data_folders:
             for k, v in i.items():
                 if k == "name":
                     folders_ids[v] = 0
-                    current_fo_name = v
+                    current_folder_name = v
                 if k == "id":
-                    folders_ids[current_fo_name] = v
-        logging.info("folders and datasets IDs: ")
-        datasets = dict()
+                    folders_ids[current_folder_name] = v
+        logging.debug("folders and datasets IDs: ")
+        self.datasets = dict()
         for k, v in folders_ids.items():
             logging.info("\t" + k + ": " + v)
             if k == "/genome":
@@ -185,8 +209,8 @@ class Autoload:
                     for e in v2:
                         if type(e) == dict:
                             if e["name"].endswith(".fa"):
-                                datasets["genome_file"] = e["ldda_id"]
-                                logging.info("\t\t" + e["name"] + ": " + e["ldda_id"])
+                                self.datasets["genome_file"] = e["ldda_id"]
+                                logging.debug("\t\t" + e["name"] + ": " + e["ldda_id"])
             elif k == "/annotation/" + self.genus_species:
                 sub_folder_content = self.instance.folders.show_folder(folder_id=v, contents=True)
                 for k2, v2 in sub_folder_content.items():
@@ -194,31 +218,31 @@ class Autoload:
                         if type(e) == dict:
                             # TODO: manage several files of the same type and manage versions
                             if e["name"].endswith("transcripts-gff.fa"):
-                                datasets["transcripts_file"] = e["ldda_id"]
-                                logging.info("\t\t" + e["name"] + ": " + e["ldda_id"])
+                                self.datasets["transcripts_file"] = e["ldda_id"]
+                                logging.debug("\t\t" + e["name"] + ": " + e["ldda_id"])
                             elif e["name"].endswith("proteins.fasta"):
-                                datasets["proteins_file"] = e["ldda_id"]
-                                logging.info("\t\t" + e["name"] + ": " + e["ldda_id"])
+                                self.datasets["proteins_file"] = e["ldda_id"]
+                                logging.debug("\t\t" + e["name"] + ": " + e["ldda_id"])
                             elif e["name"].endswith(".gff"):
-                                datasets["gff_file"] = e["ldda_id"]
-                                logging.info("\t\t" + e["name"] + ": " + e["ldda_id"])
+                                self.datasets["gff_file"] = e["ldda_id"]
+                                logging.debug("\t\t" + e["name"] + ": " + e["ldda_id"])
                             elif e["name"].endswith("MALE"):
-                                datasets["gff_file"] = e["ldda_id"]
-                                logging.info("\t\t" + e["name"] + ": " + e["ldda_id"])
-
-        self.history_id = self.instance.histories.get_current_history()["id"]
-        logging.debug("history ID: " + self.history_id)
-        # import all datasets into current history
-        self.instance.histories.upload_dataset_from_library(history_id=self.history_id, lib_dataset_id=datasets["genome_file"])
-        self.instance.histories.upload_dataset_from_library(history_id=self.history_id, lib_dataset_id=datasets["gff_file"])
-        self.instance.histories.upload_dataset_from_library(history_id=self.history_id, lib_dataset_id=datasets["transcripts_file"])
-        self.instance.histories.upload_dataset_from_library(history_id=self.history_id, lib_dataset_id=datasets["proteins_file"])
+                                self.datasets["gff_file"] = e["ldda_id"]
+                                logging.debug("\t\t" + e["name"] + ": " + e["ldda_id"])
 
-    def run_workflow(self, workflow_name, workflow_parameters):
+    def run_workflow(self, workflow_name, workflow_parameters, datamap):
         """
-
-        :param workflow_ga_file:
+        Run the "main" workflow in the galaxy instance
+        - import data to library
+        - load fasta and gff
+        - sync with tripal
+        - add jbrowse + organism
+        - fill in the tripal views
+
+        TODO: map tool name to step id
+        :param workflow_name:
         :param workflow_parameters:
+        :param datamap:
         :return:
         """
 
@@ -231,20 +255,38 @@ class Autoload:
             custom_ga_file = "_".join([self.genus, self.species]) + "_workflow.ga"
             custom_ga_file_path = os.path.abspath(custom_ga_file)
         with open(workflow_ga_file, 'r') as ga_in_file:
-            ga_in = str(ga_in_file.readlines())
-            ga_in = ga_in.replace('{\\\\\\\\\\\\"unique_id\\\\\\\\\\\\": \\\\\\\\\\\\"UNIQUE_ID\\\\\\\\\\\\"}',
-                                  str('{\\\\\\\\\\\\"unique_id\\\\\\\\\\\\": \\\\\\\\\\\\"' + self.genus + " " + self.species) + '\\\\\\\\\\\\"')
-            ga_in = ga_in.replace('\\\\\\\\\\\\"name\\\\\\\\\\\\": \\\\\\\\\\\\"NAME\\\\\\\\\\\\"',
-                                  str('\\\\\\\\\\\\"name\\\\\\\\\\\\": \\\\\\\\\\\\"' + self.genus.lower()[0] + self.species) + '\\\\\\\\\\\\"')
-            ga_in = ga_in.replace("\\\\", "\\")  # to restore the correct amount of backslashes in the workflow string before import
+            workflow = str(ga_in_file.readlines())
+            workflow = workflow.replace('{\\\\\\\\\\\\"unique_id\\\\\\\\\\\\": \\\\\\\\\\\\"UNIQUE_ID\\\\\\\\\\\\"}',
+                                        str('{\\\\\\\\\\\\"unique_id\\\\\\\\\\\\": \\\\\\\\\\\\"' + self.genus + " " + self.species) + '\\\\\\\\\\\\"')
+            workflow = workflow.replace('\\\\\\\\\\\\"name\\\\\\\\\\\\": \\\\\\\\\\\\"NAME\\\\\\\\\\\\"',
+                                        str('\\\\\\\\\\\\"name\\\\\\\\\\\\": \\\\\\\\\\\\"' + self.genus.lower()[0] + self.species) + '\\\\\\\\\\\\"')
+            workflow = workflow.replace("\\\\", "\\")  # to restore the correct amount of backslashes in the workflow string before import
             # test
-            ga_in = ga_in.replace('http://localhost/sp/genus_species/feature/Genus/species/mRNA/{id}',
-                                  "http://localhost/sp/" + self.genus_lowercase+ "_" + self.species + "/feature/" + self.genus + "/mRNA/{id}")
+            workflow = workflow.replace('http://localhost/sp/genus_species/feature/Genus/species/mRNA/{id}',
+                                        "http://localhost/sp/" + self.genus_lowercase+ "_" + self.species + "/feature/" + self.genus + "/mRNA/{id}")
             # production
-            # ga_in = ga_in.replace('http://localhost/sp/genus_species/feature/Genus/species/mRNA/{id}',
-            #                       "http://abims--gga.sb-roscoff.fr/sp/" + self.genus_lowercase + "_" + self.species + "/feature/" + self.genus + "/mRNA/{id}")
-            ga_in = ga_in[2:-2]  # if the line under doesn't outputs a correct json
-            # ga_in = ga_in[:-2]  # if the line above doesn't outputs a correct json
+            # workflow = workflow.replace('http://localhost/sp/genus_species/feature/Genus/species/mRNA/{id}',
+            #                             "http://abims--gga.sb-roscoff.fr/sp/" + self.genus_lowercase + "_" + self.species + "/feature/" + self.genus + "/mRNA/{id}")
+            workflow = workflow[2:-2]  # if the line under doesn't output a correct json
+            # workflow = workflow[:-2]  # if the line above doesn't output a correct json
+
+            workflow_dict = json.loads(workflow)  # doesn't work with eval()
+
+            self.instance.workflows.import_workflow_dict(workflow_dict=workflow_dict)
+            self.workflow_name = workflow_name
+            workflow_attributes = self.instance.workflows.get_workflows(name=workflow_name)
+            workflow_id = workflow_attributes[0]["id"]
+            show_workflow = self.instance.workflows.show_workflow(workflow_id=workflow_id)
+            logging.debug("workflow ID: " + workflow_id)
+
+            logging.debug("inputs:")
+            logging.debug(show_workflow["inputs"])
+            self.instance.workflows.invoke_workflow(workflow_id=workflow_id,
+                                                    history_id=self.history_id,
+                                                    params=workflow_parameters,
+                                                    inputs=datamap,
+                                                    inputs_by="")
+            self.instance.workflows.delete_workflow(workflow_id=workflow_id)
 
     def init_instance(self):
         """
@@ -256,24 +298,36 @@ class Autoload:
         :return:
         """
 
+        self.instance.histories.create_history(name=str(self.full_name))
+        histories = self.instance.histories.get_histories(name=str(self.full_name))
+        self.history_id = histories[0]["id"]
+        logging.debug("history ID: " + self.history_id)
+        libraries = self.instance.libraries.get_libraries()  # normally only one library
+        self.library_id = self.instance.libraries.get_libraries()[0]["id"]  # project data folder/library
+        logging.debug("library ID: " + self.history_id)
+        instance_source_data_folders = self.instance.libraries.get_folders(library_id=self.library_id)
         # Delete Homo sapiens from Chado database
+        logging.info("getting sapiens ID in instance's chado database")
         get_sapiens_id_job = self.instance.tools.run_tool(tool_id="toolshed.g2.bx.psu.edu/repos/gga/chado_organism_get_organisms/organism_get_organisms/2.3.2",
-                                                          tool_inputs={"genus": "Homo", "species": "species"},
-                                                          history=self.history_id)
+                                                          history_id=self.history_id,
+                                                          tool_inputs={"genus": "Homo", "species": "sapiens"})
         get_sapiens_id_job_output = get_sapiens_id_job["outputs"][0]["id"]
         get_sapiens_id_json_output = self.instance.datasets.download_dataset(dataset_id=get_sapiens_id_job_output)
         try:
+            logging.info("deleting Homo sapiens in the instance's chado database")
             get_sapiens_id_final_output = json.loads(get_sapiens_id_json_output)[0]
             sapiens_id = str(get_sapiens_id_final_output["organism_id"])  # needs to be str to be recognized by the chado tool
             self.instance.tools.run_tool(tool_id="toolshed.g2.bx.psu.edu/repos/gga/chado_organism_delete_organisms/organism_delete_organisms/2.3.2",
                                          history_id=self.history_id,
                                          tool_inputs={"organism": str(sapiens_id)})
         except bioblend.ConnectionError:
-            logging.debug("homo sapiens isn't in the database")
+            logging.debug("Homo sapiens isn't in the instance's chado database")
         except IndexError:
+            logging.debug("Homo sapiens isn't in the instance's chado database")
             pass
 
         # Add organism (species) to chado
+        logging.info("adding organism to the instance's chado database")
         self.instance.tools.run_tool(tool_id="toolshed.g2.bx.psu.edu/repos/gga/chado_organism_add_organism/organism_add_organism/2.3.2",
                                      history_id=self.history_id,
                                      tool_inputs={"abbr": self.abbreviation,
@@ -281,6 +335,7 @@ class Autoload:
                                                   "species": self.species,
                                                   "common": self.common})
         # Add OGS analysis to chado
+        logging.info("adding OGS analysis to the instance's chado database")
         self.instance.tools.run_tool(tool_id="toolshed.g2.bx.psu.edu/repos/gga/chado_analysis_add_analysis/analysis_add_analysis/2.3.2",
                                      history_id=self.history_id,
                                      tool_inputs={"name": self.genus + " " + self.species + " OGS" + self.ogs_version,
@@ -290,6 +345,7 @@ class Autoload:
                                                   "date_executed": self.date})
 
         # Add genome analysis to chado
+        logging.info("adding genome analysis to the instance's chado database")
         self.instance.tools.run_tool(tool_id="toolshed.g2.bx.psu.edu/repos/gga/chado_analysis_add_analysis/analysis_add_analysis/2.3.2",
                                      history_id=self.history_id,
                                      tool_inputs={"name": self.genus + " " + self.species + " genome v" + self.genome_version,
@@ -298,41 +354,56 @@ class Autoload:
                                                   "sourcename": "Genoscope",
                                                   "date_executed": self.date})
 
-        # Get the ID from OGS analysis in chado
-        org = self.instance.tools.run_tool(tool_id="toolshed.g2.bx.psu.edu/repos/gga/chado_organism_get_organisms/organism_get_organisms/2.3.2",
-                                           history_id=self.history_id,
-                                           tool_inputs={"genus": self.genus, "species": self.species})
+        self.get_organism_and_analyses_ids()
+        logging.info("finished initializing instance")
+
+    def get_organism_and_analyses_ids(self):
+        """
+        Retrieve current organism ID and OGS and genome chado analyses IDs (needed to run some tools as Tripal/Chado
+        doesn't accept organism/analyses names as valid inputs
+
+        :return:
+        """
+        # Get the ID for the current organism in chado
+        org = self.instance.tools.run_tool(
+            tool_id="toolshed.g2.bx.psu.edu/repos/gga/chado_organism_get_organisms/organism_get_organisms/2.3.2",
+            history_id=self.history_id,
+            tool_inputs={"genus": self.genus, "species": self.species})
         org_job_out = org["outputs"][0]["id"]
         org_json_output = self.instance.datasets.download_dataset(dataset_id=org_job_out)
         try:
             org_output = json.loads(org_json_output)[0]
             self.org_id = str(org_output["organism_id"])  # needs to be str to be recognized by chado tools
         except IndexError:
-            logging.debug("no organism matching " + self.full_name + " exists in the Chado database")
+            logging.debug("no organism matching " + self.full_name + " exists in the instance's chado database")
 
-        ogs_analysis = self.instance.tools.run_tool(tool_id="toolshed.g2.bx.psu.edu/repos/gga/chado_analysis_get_analyses/analysis_get_analyses/2.3.2",
-                                                    history_id=self.history_id,
-                                                    tool_inputs={"name": self.genus + " " + self.species + " OGS" + self.ogs_version})
+        # Get the ID for the OGS analysis in chado
+        ogs_analysis = self.instance.tools.run_tool(
+            tool_id="toolshed.g2.bx.psu.edu/repos/gga/chado_analysis_get_analyses/analysis_get_analyses/2.3.2",
+            history_id=self.history_id,
+            tool_inputs={"name": self.genus + " " + self.species + " OGS" + self.ogs_version})
         ogs_analysis_job_out = ogs_analysis["outputs"][0]["id"]
         ogs_analysis_json_output = self.instance.datasets.download_dataset(dataset_id=ogs_analysis_job_out)
         try:
             ogs_analysis_output = json.loads(ogs_analysis_json_output)[0]
-            self.ogs_analysis_id = str(ogs_analysis_output["analysis_id"])  # needs to be str to be recognized by chado tools
+            self.ogs_analysis_id = str(
+                ogs_analysis_output["analysis_id"])  # needs to be str to be recognized by chado tools
         except IndexError:
-            logging.debug("no matching OGS analysis exists in the Chado database")
+            logging.debug("no matching OGS analysis exists in the instance's chado database")
 
-        genome_analysis = self.instance.tools.run_tool(tool_id="toolshed.g2.bx.psu.edu/repos/gga/chado_analysis_get_analyses/analysis_get_analyses/2.3.2",
-                                                       history_id=self.history_id,
-                                                       tool_inputs={"name": self.genus + " " + self.species + " genome v" + self.genome_version})
+        # Get the ID for the genome analysis in chado
+        genome_analysis = self.instance.tools.run_tool(
+            tool_id="toolshed.g2.bx.psu.edu/repos/gga/chado_analysis_get_analyses/analysis_get_analyses/2.3.2",
+            history_id=self.history_id,
+            tool_inputs={"name": self.genus + " " + self.species + " genome v" + self.genome_version})
         genome_analysis_job_out = genome_analysis["outputs"][0]["id"]
         genome_analysis_json_output = self.instance.datasets.download_dataset(dataset_id=genome_analysis_job_out)
         try:
             genome_analysis_output = json.loads(genome_analysis_json_output)[0]
-            self.genome_analysis_id = str(genome_analysis_output["analysis_id"])  # needs to be str to be recognized by chado tools
+            self.genome_analysis_id = str(
+                genome_analysis_output["analysis_id"])  # needs to be str to be recognized by chado tools
         except IndexError:
-            logging.debug("no matching genome analysis exists in the Chado database")
-
-        logging.info("finished initializing instance")
+            logging.debug("no matching genome analysis exists in the instance's chado database")
 
     def clean_instance(self):
         """
@@ -342,43 +413,69 @@ class Autoload:
         return None
 
 
-if __name__ == "main":
-    parser = argparse.ArgumentParser(description="Input genus, species, strain, version")
-    parser.add_argument("json", type=str, help="Input JSON file")
-    parser.add_argument("-v", "--verbose", help="Increase output verbosity")
-    parser.add_argument("--load-data", help="Create src_data directory tree and load data into galaxy")
-    parser.add_argument("--main-workflow", help="Run main workflow (initialize galaxy instance, load data into chado,"
-                                                "sync with tripal, create jbrowse and add organism to jbrowse")
-    args = parser.parse_args()
-
-    if args.verbose:
-        logging.basicConfig(level=logging.DEBUG)
-    else:
-        logging.basicConfig(level=logging.INFO)
-
-    sp_dict_list = list()
-    with open(args.json, 'r') as infile:
-        json_sp_dict = json.load(infile)
-        json_sp_dump = json.dumps(json_sp_dict, indent=4, sort_keys=True)
-        for json_sp in json_sp_dict:
-            sp_dict_list.append(json_sp)
-
-    for sp_dict in sp_dict_list:
-        al = Autoload(species_parameters_dictionary=sp_dict)
-        if args.main_workflow:
-            workflow_parameters = dict()
-            workflow_parameters["0"] = {}
-            workflow_parameters["1"] = {}
-            workflow_parameters["2"] = {}
-            workflow_parameters["3"] = {}
-            workflow_parameters["4"] = {"organism": al.org_id,
-                                        "analysis_id": al.genome_analysis_id,
-                                        "do_update": "true"}  # the do_update parameter is to prevent assertion errors when loading the file, should always be set to "true"
-            workflow_parameters["5"] = {"organism": al.org_id,
-                                        "analysis_id": al.ogs_analysis_id}
-            workflow_parameters["6"] = {"organism_id": al.org_id}
-            workflow_parameters["7"] = {"analysis_id": al.ogs_analysis_id}
-            workflow_parameters["8"] = {"analysis_id": al.genome_analysis_id}
-            workflow_parameters["9"] = {"organism_id": al.org_id}
-            al.run_workflow(workflow_name="main", workflow_parameters=workflow_parameters)
-
+parser = argparse.ArgumentParser(description="Input genus, species, strain, version")
+parser.add_argument("json", type=str, help="Input JSON file")
+parser.add_argument("-v", "--verbose",
+                    help="Increase output verbosity",
+                    action="store_true")
+parser.add_argument("--init-instance",
+                    help="Initialization of galaxy instance. Run first in an empty instance",
+                    action="store_true")
+parser.add_argument("--load-data",
+                    help="Create src_data directory tree and load its data into the instance",
+                    action="store_true")
+parser.add_argument("--run-main",
+                    help="Run main workflow (load data into chado, sync all with tripal, "
+                         "index tripal data, populate materialized view, "
+                         "create a jbrowse for the current genus_species_strain_sex and add organism to jbrowse",
+                    action="store_true")
+args = parser.parse_args()
+
+if args.verbose:
+    logging.basicConfig(level=logging.DEBUG)
+else:
+    logging.basicConfig(level=logging.INFO)
+
+sp_dict_list = list()
+with open(args.json, 'r') as infile:
+    json_sp_dict = json.load(infile)
+    json_sp_dump = json.dumps(json_sp_dict, indent=4, sort_keys=True)
+    for json_sp in json_sp_dict:
+        sp_dict_list.append(json_sp)
+
+for sp_dict in sp_dict_list:
+    al = Autoload(species_parameters_dictionary=sp_dict, args=args)
+    if args.init_instance:
+        logging.info("initializing the galaxy instance")
+        al.init_instance()
+        al.get_instance_attributes()
+    if args.load_data:
+        logging.info("loading data into galaxy")
+        al.load_data_in_galaxy()
+    if args.run_main:
+        logging.info("running main workflow")
+        al.get_organism_and_analyses_ids()
+        workflow_parameters = dict()
+        workflow_parameters["0"] = {}
+        workflow_parameters["1"] = {}
+        workflow_parameters["2"] = {}
+        workflow_parameters["3"] = {}
+        workflow_parameters["4"] = {"organism": al.org_id,
+                                    "analysis_id": al.genome_analysis_id,
+                                    "do_update": "true"}
+        workflow_parameters["5"] = {"organism": al.org_id,
+                                    "analysis_id": al.ogs_analysis_id}
+        workflow_parameters["6"] = {"organism_id": al.org_id}
+        workflow_parameters["7"] = {"analysis_id": al.ogs_analysis_id}
+        workflow_parameters["8"] = {"analysis_id": al.genome_analysis_id}
+        workflow_parameters["9"] = {"organism_id": al.org_id}
+        workflow_parameters["10"] = {}
+        workflow_parameters["11"] = {}
+
+        al.datamap = dict()
+        al.datamap["0"] = {"src": "hda", "id": al.datasets["genome_file"]}
+        al.datamap["1"] = {"src": "hda", "id": al.datasets["gff_file"]}
+        al.datamap["2"] = {"src": "hda", "id": al.datasets["proteins_file"]}
+        al.datamap["3"] = {"src": "hda", "id": al.datasets["transcripts_file"]}
+
+        al.run_workflow(workflow_name="main", workflow_parameters=workflow_parameters, datamap=al.datamap)
-- 
GitLab