From 0b4c80b710946479d339eaf5c88c07faab262688 Mon Sep 17 00:00:00 2001
From: Arthur Le Bars <arthur.le-bars@sb-roscoff.fr>
Date: Thu, 5 Mar 2020 16:06:12 +0100
Subject: [PATCH] Functional version 1.0: Chado loading + Tripal sync + jbrowse

---
 main.py                     | 122 ++++++++++++++-----------------
 toolrunner.py               | 123 ++++++++++++++++++++++++++++---
 workflow.py                 | 139 ++++++++++++++++++++++--------------
 workflows/tool_example.json |   0
 4 files changed, 252 insertions(+), 132 deletions(-)
 create mode 100644 workflows/tool_example.json

diff --git a/main.py b/main.py
index 3d0403f..7cd904d 100644
--- a/main.py
+++ b/main.py
@@ -1,22 +1,29 @@
 from bioblend import galaxy
+import bioblend.galaxy.objects as bbo
 import bioblend as bb
 import argparse
 import os
 import sys
 import subprocess
 import json
+import urllib3 as ul
+from chado import ChadoInstance
 from workflow import Workflow
 from filetransfer import FileTransfer
 from toolrunner import ToolRunner
-
-# TODO: script description
+from webscrap import WebScrap
 
 """
+TODO: script description
+python3 ~/PycharmProjects/ggauto/gga_load_data/main.py ~/PycharmProjects/ggauto/gga_load_data/dataloader.json
+
 """
 
 
 class Autoload:
-
+    """
+    TODO: turn main into an object
+    """
     def __init__(self, json_in):
         self.json_in = json_in
 
@@ -83,7 +90,7 @@ def main():
 
         # Connect to the galaxy instance of the current species TODO: API key connection issues
         gi = galaxy.GalaxyInstance(url=instance_url,
-                                   key="0e993414b2f876515e74dd890f16ffc7",
+                                   key="3b36455cb16b4d0e4348e2c42f4bb934",
                                    email="alebars@sb-roscoff.fr",
                                    password="pouet",
                                    verify=True)
@@ -107,7 +114,7 @@ def main():
             print("Cannot access " + sp_dir + ", run with higher privileges")
             break
 
-        # Production instance example TODO: secure pswd and API key? + manage API keys (if needed)
+        # Production instance example TODO: secure pswd and API key + manage API keys
         # gi = galaxy.GalaxyInstance(url="http://abims-gga.sb-roscoff.fr/sp/ectocarpus_species1/galaxy/",
         #                            key="84dfbee3c0efa9155518f01fbeff57c8",
         #                            email="gga@sb-roscoff.fr",
@@ -145,23 +152,13 @@ def main():
         #     print("Data successfully loaded into docker container for " + genus_species_strain)
 
         # generate workflow file and run it in the galaxy instance
-        workflow = Workflow(parameters_dict=sp_dict, instance=gi)
-        print("Generating custom workflow for " + genus_species_strain)
-        wf_dict_json = workflow.generate(working_directory=wd, main_directory=main_dir)
 
         gi.histories.create_history(name=str(genus_species_strain + "_" + genome_version))
         hi = gi.histories.get_histories(name=str(genus_species_strain + "_" + genome_version))
         hi_id = hi[0]["id"]
-        # print(hi)
-
         li = gi.libraries.get_libraries()  # only one library
-        # print(li)
-
         li_id = gi.libraries.get_libraries()[0]["id"]  # project data folder/library
-        # print(li_id)
-
         fo_gi = gi.libraries.get_folders(library_id=li_id)  # data location (project data)
-        # print(fo_gi)
 
         fo_id = {}
         current_fo_name = ""
@@ -174,16 +171,14 @@ def main():
                 if k == "id":
                     fo_id[current_fo_name] = v
 
+        # TODO: turn data id parsing into a function
         print("Folders and datasets IDs: ")
         datasets = dict()
         for k, v in fo_id.items():
             print("\t" + k + ": " + v)
             if k == "/genome":
                 sub_folder_content = gi.folders.show_folder(folder_id=v, contents=True)
-                # print(sub_folder_content)
                 for k2, v2 in sub_folder_content.items():
-                    # print(k2)
-                    # print(v2)
                     for e in v2:
                         if type(e) == dict:
                             if e["name"].endswith(".fa"):
@@ -191,10 +186,7 @@ def main():
                                 print("\t\t" + e["name"] + ": " + e["ldda_id"])
             elif k == "/annotation/" + genus_species:
                 sub_folder_content = gi.folders.show_folder(folder_id=v, contents=True)
-                # print(sub_folder_content)
                 for k2, v2 in sub_folder_content.items():
-                    # print(k2)
-                    # print(v2)
                     for e in v2:
                         if type(e) == dict:
                             # TODO: manage several files of the same type
@@ -215,74 +207,66 @@ def main():
         gi.histories.upload_dataset_from_library(history_id=current_hi_id, lib_dataset_id=datasets["transcripts_file"])
         gi.histories.upload_dataset_from_library(history_id=current_hi_id, lib_dataset_id=datasets["proteins_file"])
 
+        # Workflow generation
+        workflow = Workflow(parameters_dict=sp_dict, instance=gi, history_id = current_hi_id)
+        wf_dict_json = workflow.generate(working_directory=wd, main_directory=main_dir, workflow_name="preset_workflow")
+
         tools = gi.tools.get_tool_panel()  # tools panel -> alternative to wf
         # print(tools)
 
         wf_dict = json.loads(wf_dict_json)  # doesn't work with eval()
-        # print(wf_dict)
-        # print(wf_dict_json.count("id"))
-        # TODO: fill in workflow inputs and attributes (doable?)
+
         gi.workflows.import_workflow_dict(workflow_dict=wf_dict)
-        print(gi.workflows.import_workflow_dict(workflow_dict=wf_dict))
         wf_name = workflow.get_workflow_name()
         wf_attr = gi.workflows.get_workflows(name=wf_name)
-        # print(wf_attr)
         wf_id = wf_attr[0]["id"]
         wf_show = gi.workflows.show_workflow(workflow_id=wf_id)
         print("Workflow ID: " + wf_id)
-        print(wf_show["inputs"])
 
-        # toolrunner = ToolRunner(parameters_dict=sp_dict, instance=gi, history=current_hi_id)
+        toolrunner = ToolRunner(parameters_dict=sp_dict, instance=gi, history=current_hi_id)
         # toolrunner.purge_organisms()
 
-        wf_params = workflow.set_params(datasets=datasets)
+        # wf_o = bbo.Workflow(wf_dict=wf_dict, gi=gi)
+
+        wf_params = workflow.set_main_workflow_parameters(datasets=datasets)
+        print("Inputs:")
+        print(wf_show["inputs"])
 
         datamap = dict()
         datamap["0"] = {"src": "hda", "id": datasets["genome_file"]}
         datamap["1"] = {"src": "hda", "id": datasets["gff_file"]}
-        datamap["2"] = {"src": "hda", "id": datasets["transcripts_file"]}
-        datamap["4"] = {"src": "hda", "id": datasets["proteins_file"]}
+        datamap["2"] = {"src": "hda", "id": datasets["proteins_file"]}
+        datamap["3"] = {"src": "hda", "id": datasets["transcripts_file"]}
+        #
+        # gi.workflows.invoke_workflow(workflow_id=wf_id,
+        #                              history_id=current_hi_id,
+        #                              params=wf_params,
+        #                              inputs=datamap)
+        #
+        gi.workflows.delete_workflow(workflow_id=wf_id)
+
+        datamap = dict()
+        datamap["0"] = {"src": "hda", "id": datasets["genome_file"]}
+        datamap["1"] = {"src": "hda", "id": datasets["proteins_file"]}
+
+        wf_dict_json = workflow.generate(working_directory=wd, main_directory=main_dir, workflow_name="jbrowse")
+        wf_dict = json.loads(wf_dict_json)  # doesn't work with eval()
+
+        gi.workflows.import_workflow_dict(workflow_dict=wf_dict)
+        wf_attr = gi.workflows.get_workflows(name="jbrowse")
+        wf_id = wf_attr[0]["id"]
+        wf_show = gi.workflows.show_workflow(workflow_id=wf_id)
+        print("Jbrowse workflow ID: " + wf_id)
+        wf_params = workflow.set_jbrowse_workflow_parameters()
 
-        # print(gi.workflows.show_workflow(wf_id))
-        gi.workflows.run_workflow(workflow_id=wf_id,
-                                  history_id=current_hi_id,
-                                  params=wf_params,
-                                  dataset_map=datamap)
+        gi.workflows.invoke_workflow(workflow_id=wf_id,
+                                     history_id=current_hi_id,
+                                     params=wf_params,
+                                     inputs=datamap)
+        gi.workflows.delete_workflow(workflow_id=wf_id)
 
-        # for testing, purge configured @ ~/config/galaxy.yml.docker_sample
+        # remove active instance history for testing, purge configured @ ~/config/galaxy.yml.docker_sample
         # gi.histories.delete_history(history_id=current_hi_id, purge=True)
-        # gi.workflows.delete_workflow(workflow_id=wf_id)
-
-        # # PRE FILLED METHOD
-        # wf_id = workflow.store()
-        # hi_id = gi.histories.get_histories()[0]["id"]
-        # print("Workflow id: " + wf_id)
-        # print("History id: " + hi_id)
-        # wf_show = gi.workflows.show_workflow(workflow_id=wf_id)
-        # # print(wf_show["inputs"])  # ->no inputs
-        # # workflow.port()
-        # li_id = gi.libraries.get_libraries()[0]["id"]
-        # # gi.folders.update_folder()  # TODO: add method to enable data updates
-        # tsi = gi.toolshed.get_repositories()  # tool shed repo access point
-        # # print(gi.users.get_users())  # TODO: users management
-        # # print(gi.libraries.show_library(li_id))
-        # fo_gi = gi.libraries.get_folders(library_id=li_id)  # data location
-        # fo_id = {}
-        # current_fo_name = ""
-        # # print(fo_gi)
-        # # folders ids: access to data to run the first tools
-        # for i in fo_gi:
-        #     for k, v in i.items():
-        #         if k == "name":
-        #             fo_id[v] = 0
-        #             current_fo_name = v
-        #         if k == "id":
-        #             fo_id[current_fo_name] = v
-        # print("Folders id: ")
-        # for k, v in fo_id.items():
-        #     print("\t" + k + ": " + v)
-        # workflow.show()
-        # # gi.workflows.run_workflow(workflow_id=wf_id)  # pre filled workflow, use the set on runtime approach instead
 
         os.chdir(main_dir)
         print("\n")
diff --git a/toolrunner.py b/toolrunner.py
index 6d2ee7e..b1e65a0 100644
--- a/toolrunner.py
+++ b/toolrunner.py
@@ -2,7 +2,7 @@ from bioblend import galaxy
 from bioblend.galaxy import GalaxyInstance
 import os
 """
-Methods to run Chado and Tripal tools on galaxy
+Methods to run all Chado and Tripal tools on galaxy
 """
 
 
@@ -20,20 +20,21 @@ class ToolRunner:
         self.genome_version = parameters_dict["genome version"]
         self.ogs_version = parameters_dict["ogs version"]
         self.sex = parameters_dict["sex"]
+        self.date = parameters_dict["date"]
         self.custom_ga_file = None
         self.custom_ga_file_path = None
         self.preset_ga_file = None
         self.analysis = None
         self.organism = None
         self.program = None
-        self.ogs = str("OGS"+ self.ogs_version)
-        self.genome = str(self.full + " genome v" + self.genome_version)
         if self.strain != "":
             self.abbr = self.genus[0].lower() + "_" + self.species + "_" + self.strain
             self.full = "_".join([self.genus, self.species, self.strain, self.sex])
         else:
             self.abbr = self.genus[0].lower() + "_" + self.species
             self.full = "_".join([self.genus, self.species, self.strain, self.sex])
+        self.ogs = str("OGS"+ self.ogs_version)
+        self.genome = str(self.full + " genome v" + self.genome_version)
 
     def show_tool(self, tool_id):
         print(self.instance.tools.show_tool(tool_id=tool_id, io_details=True))
@@ -44,15 +45,115 @@ class ToolRunner:
         self.instance.tools.run_tool(tool_id="toolshed.g2.bx.psu.edu/repos/gga/chado_organism_delete_organisms/organism_delete_organisms/2.3.2",
                                      history_id=self.history,
                                      tool_inputs={"name": self.organism})
+    def purge_analyses(self):
+        return None
 
-    def purge_genome(self):
-        self.instance.tools.run_tool(tool_id="toolshed.g2.bx.psu.edu/repos/gga/chado_analysis_add_analysis/analysis_add_analysis/2.3.2",
-                                     history_id=self.history,
-                                     tool_inputs={"name": self.genome,
-                                                  "program": "Performed by Genoscope",
-                                                  "programversion": str("genome v" + self.genome_version),
-                                                  "sourcename": "Genoscope",
-                                                  "date_executed": "2020-02-02"})
+    def get_organism(self):
+        """
+        Tool invocation must be as return (or else it executes but returns nothing when called)
+        :return:
+        """
+        return self.instance.tools.run_tool(tool_id="toolshed.g2.bx.psu.edu/repos/gga/chado_organism_get_organisms/organism_get_organisms/2.3.2",
+                                            history_id=self.history,
+                                            tool_inputs={"genus": self.genus, "species": self.species})
+
+    def get_ogs_analysis(self):
+        """
+        Tool invocation must be as return (or else it executes but returns nothing when called)
+        :return:
+        """
+        return self.instance.tools.run_tool(tool_id="toolshed.g2.bx.psu.edu/repos/gga/chado_analysis_get_analyses/analysis_get_analyses/2.3.2",
+                                            history_id=self.history,
+                                            tool_inputs={"name": self.genus + " " + self.species + " OGS" + self.ogs_version})
+
+    def get_genome_analysis(self):
+        """
+        Tool invocation must be as return (or else it executes but returns nothing when called)
+        :return:
+        """
+        return self.instance.tools.run_tool(tool_id="toolshed.g2.bx.psu.edu/repos/gga/chado_analysis_get_analyses/analysis_get_analyses/2.3.2",
+                                            history_id=self.history,
+                                            tool_inputs={"name": self.genus + " " + self.species + " genome v" + self.genome_version})
+
+    def add_organism(self):
+        """
+        Tool invocation must be as return (or else it executes but returns nothing when called)
+        :return:
+        """
+        return self.instance.tools.run_tool(tool_id="toolshed.g2.bx.psu.edu/repos/gga/chado_organism_add_organism/organism_add_organism/2.3.2",
+                                            history_id=self.history,
+                                            tool_inputs={"abbr": self.abbr,
+                                                         "genus": self.genus,
+                                                         "species": self.species,
+                                                         "common": self.common
+                                                         })
+
+    def add_ogs(self):
+        """
+        Tool invocation must be as return (or else it executes but returns nothing when called)
+        :return:
+        """
+        return self.instance.tools.run_tool(tool_id="toolshed.g2.bx.psu.edu/repos/gga/chado_analysis_add_analysis/analysis_add_analysis/2.3.2",
+                                            history_id=self.history,
+                                            tool_inputs={"name": self.genus + " " + self.species + " OGS" + self.ogs_version,
+                                                         "program": "Performed by Genoscope",
+                                                         "programversion": str("OGS" + self.genome_version),
+                                                         "sourcename": "Genoscope",
+                                                         "date_executed": self.date
+                                                         })
+    def add_genome(self):
+        """
+        Tool invocation must be as return (or else it executes but returns nothing when called)
+        :return:
+        """
+        return self.instance.tools.run_tool(tool_id="toolshed.g2.bx.psu.edu/repos/gga/chado_analysis_add_analysis/analysis_add_analysis/2.3.2",
+                                            history_id=self.history,
+                                            tool_inputs={"name": self.genus + " " + self.species + " genome v" + self.genome_version,
+                                                         "program": "Performed by Genoscope",
+                                                         "programversion": str("genome v" + self.genome_version),
+                                                         "sourcename": "Genoscope",
+                                                         "date_executed": self.date
+                                                         })
+
+    def add_jbrowse(self, datasets):
+        return self.instance.tools.run_tool(tool_id="toolshed.g2.bx.psu.edu/repos/iuc/jbrowse/jbrowse/1.16.5+galaxy7",
+                                            history_id=self.history,
+                                            tool_inputs={"genomes": datasets["genome_file"]})
+
+    def add_organism_jbrowse(self):
+        return self.instance.tools.run_tool(tool_id="toolshed.g2.bx.psu.edu/repos/gga/jbrowse_to_container/jbrowse_to_container/0.5.1",
+                                            history_id=self.history,
+                                            tool_inputs={"multiple": "false",
+                                                         "name": "foo",
+                                                         "unique_id": "bar"})
+
+    # Homo sapiens deletion TODO: concat the 2 calls
+    def get_sapiens_id(self):
+        """
+        Tool invocation must be as return (or else it executes but returns nothing when called)
+        :return:
+        """
+        return self.instance.tools.run_tool(tool_id="toolshed.g2.bx.psu.edu/repos/gga/chado_organism_get_organisms/organism_get_organisms/2.3.2",
+                                            history_id=self.history,
+                                            tool_inputs={"genus": "Homo", "species": "species"})
+
+    def delete_sapiens(self, hs_id):
+        """
+        Tool invocation must be as return (or else it executes but returns nothing when called)
+        :return:
+        """
+        return self.instance.tools.run_tool(tool_id="toolshed.g2.bx.psu.edu/repos/gga/chado_organism_delete_organisms/organism_delete_organisms/2.3.2",
+                                            history_id=self.history,
+                                            tool_inputs={"organism": hs_id})
+
+    # def load_fasta(self, inputs_dict):
+    #     """
+    #
+    #     :return:
+    #     """
+    #     return self.instance.tools.run_tool(tool_id="toolshed.g2.bx.psu.edu/repos/gga/chado_feature_load_fasta/feature_load_fasta/2.3.2",
+    #                                         history_id=self.history,
+    #                                         tool_inputs=)
 
     def show_pannel(self):
         print(self.instance.tools.get_tool_panel())
diff --git a/workflow.py b/workflow.py
index 64be187..16fff71 100644
--- a/workflow.py
+++ b/workflow.py
@@ -1,6 +1,7 @@
 import os
 from bioblend.galaxy import GalaxyInstance
-
+from toolrunner import ToolRunner
+import json
 """
 Workflow creation for generation and visualization of data and analyses output
 """
@@ -8,7 +9,8 @@ Workflow creation for generation and visualization of data and analyses output
 
 class Workflow:
 
-    def __init__(self, parameters_dict, instance):
+    def __init__(self, parameters_dict, instance, history_id):
+        self.history_id = history_id
         self.instance = instance
         self.parameters_dict = parameters_dict
         self.genus = parameters_dict["genus"]
@@ -23,6 +25,7 @@ class Workflow:
         self.custom_ga_file = None
         self.custom_ga_file_path = None
         self.preset_ga_file = None
+        self.sp_url = self.genus[0].lower() + self.genus[1:] + "_" + self.species
         if self.strain != "":
             self.abbr = self.genus[0].lower() + "_" + self.species + "_" + self.strain
             self.full = "_".join([self.genus, self.species, self.strain, self.sex])
@@ -31,7 +34,7 @@ class Workflow:
             self.full = "_".join([self.genus, self.species, self.strain, self.sex])
         self.workflow = None
 
-    def generate(self, working_directory, main_directory):
+    def generate(self, working_directory, main_directory, workflow_name):
         """
         Generation of a galaxy workflow using the defined parameters in the .json input file
         Output format is a json dict
@@ -42,10 +45,10 @@ class Workflow:
         """
         # template workflow as a string
         # template_workflow_str = '"{"uuid": "ea9c3050-416f-4098-a7ff-b05c952bcd73", "tags": [], "format-version": "0.1", "name": "test", "version": 2, "steps": {"0": {"tool_id": "toolshed.g2.bx.psu.edu/repos/gga/chado_organism_delete_organisms/organism_delete_organisms/2.3.2", "tool_version": "2.3.2", "outputs": [{"type": "txt", "name": "results"}], "workflow_outputs": [{"output_name": "results", "uuid": "d9b75b03-49e7-4a81-a67c-eaf6a9671905", "label": null}], "input_connections": {}, "tool_state": "{\"__page__\": null, \"__rerun_remap_job_id__\": null, \"organism\": \"\\\"2\\\"\", \"psql_target\": \"{\\\"__current_case__\\\": 0, \\\"method\\\": \\\"remote\\\"}\"}", "id": 0, "tool_shed_repository": {"owner": "gga", "changeset_revision": "13da56fdaeb1", "name": "chado_organism_delete_organisms", "tool_shed": "toolshed.g2.bx.psu.edu"}, "uuid": "f70569bc-9ac0-441a-a2d8-b547086a5bdf", "errors": null, "name": "Chado organism delete", "post_job_actions": {}, "label": "$ORGADELETE", "inputs": [], "position": {"top": 362, "left": 200}, "annotation": "", "content_id": "toolshed.g2.bx.psu.edu/repos/gga/chado_organism_delete_organisms/organism_delete_organisms/2.3.2", "type": "tool"}, "1": {"tool_id": "toolshed.g2.bx.psu.edu/repos/gga/chado_organism_add_organism/organism_add_organism/2.3.2", "tool_version": "2.3.2", "outputs": [{"type": "json", "name": "results"}], "workflow_outputs": [{"output_name": "results", "uuid": "8ca0b891-0f01-4787-9b7f-57105dc303b0", "label": null}], "input_connections": {}, "tool_state": "{\"comment\": \"\\\"\\\"\", \"__page__\": null, \"__rerun_remap_job_id__\": null, \"psql_target\": \"{\\\"__current_case__\\\": 0, \\\"method\\\": \\\"remote\\\"}\", \"common\": \"\\\"$COMMON\\\"\", \"genus\": \"\\\"$GENUS\\\"\", \"species\": \"\\\"$SPECIES\\\"\", \"abbr\": \"\\\"$ABBR\\\"\"}", "id": 1, "tool_shed_repository": {"owner": "gga", "changeset_revision": "0f4956cec445", "name": "chado_organism_add_organism", "tool_shed": "toolshed.g2.bx.psu.edu"}, "uuid": "24f0e175-f932-4e48-8b42-a53d9a432d5e", "errors": null, "name": "Chado organism add", "post_job_actions": {}, "label": "$ORGADD", "inputs": [], "position": {"top": 361, "left": 467.5}, "annotation": "", "content_id": "toolshed.g2.bx.psu.edu/repos/gga/chado_organism_add_organism/organism_add_organism/2.3.2", "type": "tool"}, "2": {"tool_id": "toolshed.g2.bx.psu.edu/repos/gga/chado_analysis_add_analysis/analysis_add_analysis/2.3.2", "tool_version": "2.3.2", "outputs": [{"type": "json", "name": "results"}], "workflow_outputs": [{"output_name": "results", "uuid": "8fa0e728-8803-4800-93b4-70f906f95f87", "label": null}], "input_connections": {}, "tool_state": "{\"__page__\": null, \"name\": \"\\\"$GENOME\\\"\", \"sourceuri\": \"\\\"\\\"\", \"sourcename\": \"\\\"\\\"\", \"__rerun_remap_job_id__\": null, \"programversion\": \"\\\"\\\"\", \"psql_target\": \"{\\\"__current_case__\\\": 0, \\\"method\\\": \\\"remote\\\"}\", \"sourceversion\": \"\\\"\\\"\", \"program\": \"\\\"$PERFORMEDBY\\\"\", \"algorithm\": \"\\\"\\\"\", \"date_executed\": \"\\\"\\\"\", \"description\": \"\\\"\\\"\"}", "id": 2, "tool_shed_repository": {"owner": "gga", "changeset_revision": "3a1f3c9b755b", "name": "chado_analysis_add_analysis", "tool_shed": "toolshed.g2.bx.psu.edu"}, "uuid": "76cbbd55-f1ac-4e48-be3c-c7bbda5add4c", "errors": null, "name": "Chado analysis add", "post_job_actions": {}, "label": "$ADDGENOME", "inputs": [], "position": {"top": 307, "left": 690}, "annotation": "", "content_id": "toolshed.g2.bx.psu.edu/repos/gga/chado_analysis_add_analysis/analysis_add_analysis/2.3.2", "type": "tool"}, "3": {"tool_id": "toolshed.g2.bx.psu.edu/repos/gga/chado_analysis_add_analysis/analysis_add_analysis/2.3.2", "tool_version": "2.3.2", "outputs": [{"type": "json", "name": "results"}], "workflow_outputs": [{"output_name": "results", "uuid": "5e7da027-0723-4077-8885-2dbe51cb5dda", "label": null}], "input_connections": {}, "tool_state": "{\"__page__\": null, \"name\": \"\\\"$OGS\\\"\", \"sourceuri\": \"\\\"\\\"\", \"sourcename\": \"\\\"\\\"\", \"__rerun_remap_job_id__\": null, \"programversion\": \"\\\"\\\"\", \"psql_target\": \"{\\\"__current_case__\\\": 0, \\\"method\\\": \\\"remote\\\"}\", \"sourceversion\": \"\\\"\\\"\", \"program\": \"\\\"$PERFORMEDBY\\\"\", \"algorithm\": \"\\\"\\\"\", \"date_executed\": \"\\\"\\\"\", \"description\": \"\\\"\\\"\"}", "id": 3, "tool_shed_repository": {"owner": "gga", "changeset_revision": "3a1f3c9b755b", "name": "chado_analysis_add_analysis", "tool_shed": "toolshed.g2.bx.psu.edu"}, "uuid": "4d1ffee4-00b2-445d-b630-b7b774c17873", "errors": null, "name": "Chado analysis add", "post_job_actions": {}, "label": "$ADDOGS", "inputs": [], "position": {"top": 395, "left": 697}, "annotation": "", "content_id": "toolshed.g2.bx.psu.edu/repos/gga/chado_analysis_add_analysis/analysis_add_analysis/2.3.2", "type": "tool"}, "4": {"tool_id": "toolshed.g2.bx.psu.edu/repos/gga/chado_feature_load_fasta/feature_load_fasta/2.3.2", "tool_version": "2.3.2", "outputs": [{"type": "json", "name": "results"}], "workflow_outputs": [{"output_name": "results", "uuid": "737dddc9-ae1b-463d-99fa-d9176053594d", "label": null}], "input_connections": {}, "tool_state": "{\"do_update\": \"\\\"false\\\"\", \"relationships\": \"{\\\"__current_case__\\\": 0, \\\"rel_type\\\": \\\"none\\\"}\", \"ext_db\": \"{\\\"db\\\": \\\"\\\", \\\"re_db_accession\\\": \\\"\\\"}\", \"analysis_id\": \"\\\"4\\\"\", \"re_uniquename\": \"\\\"\\\"\", \"match_on_name\": \"\\\"false\\\"\", \"__page__\": null, \"__rerun_remap_job_id__\": null, \"psql_target\": \"{\\\"__current_case__\\\": 0, \\\"method\\\": \\\"remote\\\"}\", \"re_name\": \"\\\"\\\"\", \"fasta\": \"{\\\"__class__\\\": \\\"RuntimeValue\\\"}\", \"wait_for\": \"{\\\"__class__\\\": \\\"RuntimeValue\\\"}\", \"organism\": \"\\\"2\\\"\", \"sequence_type\": \"\\\"contig\\\"\"}", "id": 4, "tool_shed_repository": {"owner": "gga", "changeset_revision": "1421dbc33a92", "name": "chado_feature_load_fasta", "tool_shed": "toolshed.g2.bx.psu.edu"}, "uuid": "3d417ced-fc48-4c04-8a92-fdb7b9fecafc", "errors": null, "name": "Chado load fasta", "post_job_actions": {}, "label": "$LOADFASTA", "inputs": [{"name": "fasta", "description": "runtime parameter for tool Chado load fasta"}, {"name": "wait_for", "description": "runtime parameter for tool Chado load fasta"}], "position": {"top": 306, "left": 933.5}, "annotation": "", "content_id": "toolshed.g2.bx.psu.edu/repos/gga/chado_feature_load_fasta/feature_load_fasta/2.3.2", "type": "tool"}}, "annotation": "", "a_galaxy_workflow": "true"}"'
-        # TODO: store workflow as a var instead of file
+        # TODO: store workflow as a var instead of file (once it runs smoothly)
 
         os.chdir(path=working_directory)
-        self.preset_ga_file = main_directory + "Galaxy-Workflow-preset_workflow.ga"
+        self.preset_ga_file = main_directory + "Galaxy-Workflow-" + workflow_name + ".ga"
         if self.strain != "":
             self.custom_ga_file = "_".join([self.genus, self.species, self.strain]) + "_workflow.ga"
             self.custom_ga_file_path = os.path.abspath(self.custom_ga_file)
@@ -55,67 +58,99 @@ class Workflow:
             self.custom_ga_file_path = os.path.abspath(self.custom_ga_file)
             # print("Workflow file @ " + self.custom_ga_file_path)
         with open(self.preset_ga_file, 'r') as ga_in_file:
-            ga_in: str = str(ga_in_file.readlines())
-            # Pre-filled workflow, useless for parameters and data set on runtime
-            # ga_in = ga_in.replace("$OGS", "OGS")
-            # ga_in = ga_in.replace("$VERSION", self.ogs_version)
-            # ga_in = ga_in.replace("$GENUS", self.genus)
-            # ga_in = ga_in.replace("$SPECIES", self.species)
-            # ga_in = ga_in.replace("$ABBR", self.abbr)
-            # ga_in = ga_in.replace("$STRAIN", self.strain)
-            # ga_in = ga_in.replace("$PERFORMEDBY", self.performed)
-            # ga_in = ga_in.replace("$COMMON", self.common)
-            # ga_in = ga_in.replace("$ORGA", self.full)
-            # ga_in = ga_in.replace("$ADDAN", "Add analysis")
+            ga_in = str(ga_in_file.readlines())
             ga_in = ga_in.replace("\\\\", "\\")  # to restore the correct amount of backslashes in the workflow string before import
-            workflow_name = '"name": "' + self.full + '"'
+            ga_in = ga_in.replace('"name": "NAME"', str('"name": "' + self.genus.lower()[0] + self.species) + '"')
+            ga_in = ga_in.replace('{"unique_id": "UNIQUEID"}', str('{"unique_id": "' + self.genus + " " + self.species) + '"')
+            ga_in = ga_in.replace('http://localhost/sp/undaria_pinnatifida/feature/Undaria/pinnatifida/mRNA/{id}"',
+                                  "http://localhost/sp/" + self.genus.lower()[0] + self.genus[1:] + " " + self.species + "/feature/" + self.genus + "/mRNA/{id}")
+            # ga_in = ga_in.replace('"index\\\": \\\"false', '"index\\\": \\\"true')
+            # workflow_name = '"name": "' + self.full + '"'
             # ga_in = ga_in.replace('"name": "preset_workflow"', '"name": "preset_workflow"')
             # print(workflow_name)
             ga_in = ga_in[2:-2]
-            # ga_in = ga_in[:-2]
+            # ga_in = ga_in[:-2]  # if the line above doesn't outputs a correct json
             self.workflow = ga_in
-        print(str(main_directory + "Galaxy-Workflow-preset_workflow.ga"))
         return ga_in
 
-    def set_params(self, datasets):
+    def set_main_workflow_parameters(self, datasets):
         """
-
+        Test function
         :return:
         """
+        # TODO: move tool calls to main/autoload
+
+        toolrunner = ToolRunner(parameters_dict=self.parameters_dict, instance=self.instance, history=self.history_id)
+
+        toolrunner.add_organism()
+        toolrunner.add_ogs()
+        toolrunner.add_genome()
+
+        org_id: str = None
+        genome_analysis_id: str = None
+        ogs_analysis_id: str = None
+
+        org = toolrunner.get_organism()
+        org_job_out = org["outputs"][0]["id"]
+        org_json_output = self.instance.datasets.download_dataset(dataset_id=org_job_out)
+        try:
+            org_output = json.loads(org_json_output)[0]
+            org_id = str(org_output["organism_id"])  # needs to be str to be recognized by the chado tool
+            global_org_id = org_id
+        except IndexError:
+            print("No organism matching " + self.full + " exists in the Chado database")
+
+        ogs_analysis = toolrunner.get_ogs_analysis()
+        ogs_analysis_job_out = ogs_analysis["outputs"][0]["id"]
+        ogs_analysis_json_output = self.instance.datasets.download_dataset(dataset_id=ogs_analysis_job_out)
+        try:
+            ogs_analysis_output = json.loads(ogs_analysis_json_output)[0]
+            ogs_analysis_id = str(ogs_analysis_output["analysis_id"])  # needs to be str to be recognized by the chado tool
+            global_ogs_id = ogs_analysis_id
+        except IndexError:
+            print("No matching OGS analysis exists in the Chado database")
+
+        genome_analysis = toolrunner.get_genome_analysis()
+        genome_analysis_job_out = genome_analysis["outputs"][0]["id"]
+        genome_analysis_json_output = self.instance.datasets.download_dataset(dataset_id=genome_analysis_job_out)
+        try:
+            genome_analysis_output = json.loads(genome_analysis_json_output)[0]
+            genome_analysis_id = str(genome_analysis_output["analysis_id"])  # needs to be str to be recognized by the chado tool
+            global_genome_id = genome_analysis_id
+        except IndexError:
+            print("No matching genome analysis exists in the Chado database")
+
         params = dict()
-        # params["0"] = {}
-        # params["1"] = {}
-        # params["2"] = {}
-        # params["3"] = {}
-        params["4"] = {"confirm": "True"}
-        params["5"] = {"abbr": self.abbr, "genus": self.genus, "species": self.species, "common": self.common}
-        # params["2"] = {"analysis": str(self.genus + " " + self.species + " genome v" + self.genome_version)}
-        # params["3"] = {"analysis": str(self.genus + " " + self.species + " OGS" + self.ogs_version)}
-        params["6"] = {"name": self.genus + " " + self.species + " genome v" + self.genome_version,
-                       "program": "Performed by Genoscope",
-                       "programversion": str("genome v" + self.genome_version),
-                       "sourcename": "Genoscope",
-                       "date_executed": self.date
-                       }
-        params["7"] = {"name": self.genus + " " + self.species + " OGS" + self.ogs_version,
-                       "program": "Performed by Genoscope",
-                       "programversion": str("OGS" + self.genome_version),
-                       "sourcename": "Genoscope",
-                       "date_executed": self.date
-                       }
-        params["8"] = {"genus": self.genus,
-                       "species": self.species,
-                       "common": self.common,
-                       "abbr": self.abbr
+        params["0"] = {}
+        params["1"] = {}
+        params["2"] = {}
+        params["3"] = {}
+        params["4"] = {"organism": org_id,
+                       "analysis_id": genome_analysis_id,
+                       "do_update": "true"}  # the do_update parameter is to prevent assertion errors when loading the file, should always be set to "true"
+        params["5"] = {"organism": org_id,
+                       "analysis_id": ogs_analysis_id,
                        }
-        params["9"] = {"name": self.genus + " " + self.species + " genome v" + self.genome_version,
-                       "program": "Performed by Genoscope",
-                       "programversion": str("genome v" + self.genome_version),
-                       "sourcename": "Genoscope",
-                       }
-        # params["10"] = {"organism": "13", "analysis": "2", "sequence": "contig"}
+        params["6"] = {"organism_id": org_id}
+        params["7"] = {"analysis_id": ogs_analysis_id}
+        params["8"] = {"analysis_id": genome_analysis_id}
+        params["9"] = {"organism_id": org_id}
+
         return params
 
+    def set_jbrowse_workflow_parameters(self):
+        params = dict()
+        params["0"] = {}
+        params["1"] = {}
+        # jbrowse tools parameters are inside nested dictionaries, read tool inputs before adding or modifying anything
+        # problem is with Jbrowse add organism --> the parameters are nested
+        params["2"] = {}  # {"jbmenu":  {"menu_url": "http://localhost/sp/undaria_pinnatifida/feature/Undaria/pinnatifida/mRNA/{id}"}} # {"menu_url": "/".join(["http://localhost/sp", self.sp_url, "feature", self.genus, self.species, "mRNA", "{id}"])}}
+        params["3"] = {}
+        # params["3"] = {"name": " ".join([self.genus, self.species + self.strain + "male"]),
+        #                "unique_id": self.genus.lower()[0] + self.species}
+        # params["3"] = {"name": [{"name": str(self.genus + " " + self.species),
+        #                "unique_id": str(self.genus.lower()[0] + self.species)}]}
+
     def set_datamap(self):
         gi = self.instance
 
diff --git a/workflows/tool_example.json b/workflows/tool_example.json
new file mode 100644
index 0000000..e69de29
-- 
GitLab