From 179451dfc2ed154b2bef67ef4e9c962423e05775 Mon Sep 17 00:00:00 2001
From: Arthur Le Bars <arthur.le-bars@sb-roscoff.fr>
Date: Thu, 10 Sep 2020 15:15:03 +0200
Subject: [PATCH] Divided into 3 separate scripts: deploy_stacks.py,
 load_data.py, run_workflow.py

---
 .gitignore                                    | 114 ++-
 .../docker_compose_generator.cpython-36.pyc   | Bin 2107 -> 0 bytes
 .../docker_compose_generator.cpython-38.pyc   | Bin 2644 -> 0 bytes
 __pycache__/metadata_generator.cpython-36.pyc | Bin 585 -> 0 bytes
 __pycache__/metadata_generator.cpython-38.pyc | Bin 908 -> 0 bytes
 __pycache__/table_parser.cpython-36.pyc       | Bin 2676 -> 0 bytes
 __pycache__/table_parser.cpython-38.pyc       | Bin 2694 -> 0 bytes
 create_input_instance.py                      | 108 ++-
 deploy_stacks.py                              | 760 +++---------------
 docker_compose_generator.py                   |  18 +-
 ...ple_input.json => json_example_input.json} |   0
 examples/yml_example_input.yml                |  10 +-
 ...ata_libraries.py => galaxy_data_libs_SI.py |   0
 load_data.py                                  | 253 +++++-
 run_workflow.py                               | 462 +++++++++++
 table_parser.py                               | 159 ++--
 templates/compose-template.yml                |   2 +-
 templates/stack-organism.yml                  |   4 +-
 {ext_scripts => utils}/__init__.py            |   0
 {ext_scripts => utils}/blastdb.py             |   0
 .../common-stringSubsitute.py                 |  72 +-
 .../phaeoexplorer-change_pep_fasta_header.sh  |  32 +-
 ...explorer-change_transcript_fasta_header.sh |   0
 ...orer-change_transcript_fasta_header.sh.bak |  14 +-
 24 files changed, 1100 insertions(+), 908 deletions(-)
 delete mode 100644 __pycache__/docker_compose_generator.cpython-36.pyc
 delete mode 100644 __pycache__/docker_compose_generator.cpython-38.pyc
 delete mode 100644 __pycache__/metadata_generator.cpython-36.pyc
 delete mode 100644 __pycache__/metadata_generator.cpython-38.pyc
 delete mode 100644 __pycache__/table_parser.cpython-36.pyc
 delete mode 100644 __pycache__/table_parser.cpython-38.pyc
 rename examples/{example_input.json => json_example_input.json} (100%)
 rename setup_data_libraries.py => galaxy_data_libs_SI.py (100%)
 rename {ext_scripts => utils}/__init__.py (100%)
 rename {ext_scripts => utils}/blastdb.py (100%)
 rename {ext_scripts => utils}/common-stringSubsitute.py (97%)
 rename {ext_scripts => utils}/phaeoexplorer-change_pep_fasta_header.sh (96%)
 rename {ext_scripts => utils}/phaeoexplorer-change_transcript_fasta_header.sh (100%)
 rename {ext_scripts => utils}/phaeoexplorer-change_transcript_fasta_header.sh.bak (97%)

diff --git a/.gitignore b/.gitignore
index 07e2dd1..17e708b 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,7 +1,107 @@
-__pycache__
-.idea
-phaeoexplorer_test.json
-example.json
-example.xlsx
-*.bak
-undaria_pinnatifida
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+
+# C extensions
+*.so
+
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+.hypothesis/
+.pytest_cache/
+
+# Translations
+*.mo
+*.pot
+
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+
+# Flask stuff:
+instance/
+.webassets-cache
+
+# Scrapy stuff:
+.scrapy
+
+# Sphinx documentation
+docs/_build/
+
+# PyBuilder
+target/
+
+# Jupyter Notebook
+.ipynb_checkpoints
+
+# pyenv
+.python-version
+
+# celery beat schedule file
+celerybeat-schedule
+
+# SageMath parsed files
+*.sage.py
+
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+
+# Spyder project settings
+.spyderproject
+.spyproject
+
+# Rope project settings
+.ropeproject
+
+# mkdocs documentation
+/site
+
+# mypy
+.mypy_cache/
+
+# IDE stuff
+.idea
\ No newline at end of file
diff --git a/__pycache__/docker_compose_generator.cpython-36.pyc b/__pycache__/docker_compose_generator.cpython-36.pyc
deleted file mode 100644
index f0bfefaa1f33103cb7eb2bf92aff23cbcba9ed3b..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 2107
zcmZ`)&2HQ_5GEz9R^GoP&R-kUiCPpz7RXvf>O)WzfrA9_%}CpW+XVuF7P+$4T4_Ua
z?OIs(vbhFDZ$0&%9{LD9^xP-lwWq#9PMx84olTHP;0!rK4LLL4jK1o2Tfbj-<5yo2
z@-JC?e3<XTP=5pAgwvc1YfgvMuAQN?N4Ue?7lgYDXXpx#d&iFaOVDwHH@N?j4t?Q|
zo4m;b5%3M(dg)POC2igTNq0@s<Cj2kc};SKZ-V6Ng=AwvhArU&ZHv~pBieBXco%pV
zcu)R`6jnXpmw>|<UtSOjcmJOBul)ypko^Sd>TzJQ3qySlL=g0A=y1xN7i8#imwUiH
z^aOZ=H^E9^ZagWIV<8`xlWD2Mo+yN5x|9h86ZjxUyN_Y0&wy|;Cvi=|KFl3UT}wSq
zz$)GEH;gwaxiF1X6f<R-YATXUC=+NgndVFj<CindGufxcQz9R|B60()zy2{gDkmZu
z$@@F^cOsP?Ml2VHOseSVS#reW<f$yjBGD>JQ#R8jvN$gpk2%vU;?|ousoja;^xB{8
z>6r=QI4d$8$9K?|77#-H<y+l(e;l?~-&_R5gU@;}Y(V~j$()QK9LIsYt*Lg#kWfx<
zk_E*u6@O^Y>Yau2%WvmYHx}gF`RV65eL|j{oV#;p?9W|*l&fL?I2Z#cxOWq6w$R4H
z0bQfkasM58b14trk#DToYJI$BYe6Y_{@?&5?YYC-FJXNEcmI}rM}7bfsq5@lThS^<
zY8lSKGMw9IliW~gx;CltDyKBAx&WM+3YU`CG+aue3rRE#(&a)TrXcUlq%5>3biX69
zI*qHeG)}3Ep9-Cva^sa#Q5Y}Jlr}z_!h)No6o8*ZNX&z2<mIW5vVrrBQ<-TI!yWsc
z3{c`{#VE^#yn+ib16V+e)dcfzf+ax27qC*8Rzh3!TB8TBXgMeJgRoeAmtbw+gV#Zm
z78Va+qA|=Ap};Zuw;}B}Y}$J^?ath_>G<g2-4wX{)z$X~{<QDP>tI<nftZGMcj;>F
zViQWrI{XRldfQcb3uHSm6vmp+P4Ke{KLGlRNbdK2ESrK&L>!w|979pha-4VK`1y?G
zwMMqVq})P+$E+P~FM)B97%1Ff&kvlS8T70S?N3-%#Bud2pr|g_upEVTDX(KM9F;QM
z+j|sZ;ZLB6@Gt`erO>gmvH)!kaL9@@)JGz`0CC7pSeCQHT!g#fk=9f7brhwU2GMp_
z$c$}|WW)}o&}_ydNNkBG$6QRx2vFMQLM1Yr;t`cINUP>x0Gnp2YG=iCrbD~vJI2#z
zQ(?R#kxyZtsx}6LCE%-Wy>qp62D)ku2CExZ4{P;m{XWm{E=w?k2q?{x<Nm#{w0B}T
zoK{xBU{E*IYIJ%eB=}#3HjOqIpu(%Z9n$bF7b7;ywZhE_zh~o$y)?8Xrov1^fZ@oL
z6`7i-5c)N=-cmv*@At3BtB_6;FquxFKMT{$%QS`X+qBA0QQ3PW)7b=S{3-5`#<x4A
z=`hY?_|WR)CT?w66$+K{kPbUMdDFmcz1u>!&BE3|jkq^BzW|#FzNvxw5L=D?Z3PtG
VTfMD-cHB)sKfZzEqZDc#>Hq#WJ*EHv

diff --git a/__pycache__/docker_compose_generator.cpython-38.pyc b/__pycache__/docker_compose_generator.cpython-38.pyc
deleted file mode 100644
index 1201041cefa429a4343e2513ccd1c57b142f50b8..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 2644
zcmcgu&2HQ_5GEz<uDpM7?8I(@Ce@)oSs=22Anl<DiUbMZo13;MS}afqvdEQXYo!gz
zjbXt#ImZS?fj)qJDbQ2jrAHo9pg><ChoUo-wBEW7&{MVGkTWD_IKyv-`}JVZCGg$;
z_4DwrRYHD8X8CIa^AMW+0|+CGW@KFTbW9Czjje6MEM~tX%s#QkHg}kFV2Rh9?zdQr
zwO`S3o45BntixRHvK7{S<xpZIJ=O=wU`aA$S3t75B)Q7gE=bndHIQ6iTC>46LGn%`
zIbAs;<1V*P$hgP5`+eRE`_K=dA3#48zaa%~2>li4p`EUt5eoi&MMgLN1YgNcjCA?4
zfO!Z_z7K>G^nYwI%B+`UY%`lV&^zcc^exr_O|IT}Qp5*bJT9iQLh@~%a}lXR#1vFC
zAC6J?V`%aY5JrwkSW&P6W6Mz6P{+^~BcPw|jN00n7L4mw!t=S*9XaE1%B8+qh$PBW
zISmz`&ay~xy{bgSC+Wetq+QGvOT~z4NAhg)3lWHhH<MtmnDSsE?%lt4KalBe5M_Kf
z5;Ayt6z@f1`cxGAJXSJDl4!09>_Qx5MZ^q)gOEik3YhUfjH>{IaLOtITeBnWhGCkg
zDh%I4&-*|KZ7+W1dzZ&<tJ(FBAhuvL9^M4;dkE<<*@qAwxZ<{=$~v{r;Bq&~3B}On
zKP$(~TPM~JKfR!;bwXZP$MgyL?)eM**g9<=+ov67D_FhnBAl4Bi5k18;mm@$RyD`k
z7v!B<?p~0uEa~drSkiSyDS7q@=<Xd`toI7IZ^*~w3-TrT7FYF+53m2_L%;T+&z5~~
z7Ct!tyAN-S56(p&3{d+!<*lFrq70;p;sd`y=+@CR(^TkzfpFL$n1<y55@s&FTJnZ^
zwIo<b0_`R>BQTEu%AL5#70=aZAh3FATPmTgLh5$HReZ>_Q_Og-oh+3~x1$*_Om_r_
zB#F6ja7!yI4!IDBDZO$iQpH0!qv)W4mRLb?JIyC)21(U;0T4H4QE|@4SDIw2#U`^V
z(!>33oeBIS;N-hN2<_5q))4UTT8eI1%iLRYq0tg!0ck$;0S{?%;XVu$B8ejY4O(v2
zSRWd!TgSG+;Kn)b%$?ag<$9gIex1J6O&yMGu?bp*3q-d}d`6B5i&#fd2g(WtM_^uG
za9p6P51>iB975NqOH1!9nZ5`;);Xvac{Jr=sJmeZHE*8bxF3ek=22G72&^H(LxN_O
zOk-&f7<hq!x&_VHcP-a-uNxWapGIjOhUJd{yQ)fg#l)*B*m4NGNg=%L?MEKgyeX7c
z=B04=36%D<$YC}E{G@r}sXgv3Q13<0qcn?lGwwa~_LQ2*PlF&y6^ORdT%^&~L<G^U
z;3}F!73Fz>mP5v;MF6nwF)m|~&d@|@`AOOFeONS?WiQQVbLAOkWNAko&A4{<cs7G|
zvRv`~Iz!4qwX$I?zbM!JMIQRiQOoUWT?6ff50}N_PK^x@Vq)@obk{4)DWeSRt8&fv
z>s&ZDVENOs>-!jQA0D9c;Bb!%h*ja4D>s1y5TRT3-b+n0K8fa;lDIKucg=m^)8m<3
zmR_phVm!3sffabM3UM@oL$PwW<Tf5)_VxoTqHqZi?)<kCoU2%zVsuqp2NY^I63Gmn
zR<1i)ktBc`gGPb>G4$|}Naj<hWlwR1&~39)=zhdl2pi3uxQ0u+W(tK;J4nZU7N0-2
zrmj^5&26sP<ZqQNCOytcRiPeKN%k4mRrw(h7jo6TIsCw-eR~ZuxB0mi)GYW_^lxw3
B-W&h`

diff --git a/__pycache__/metadata_generator.cpython-36.pyc b/__pycache__/metadata_generator.cpython-36.pyc
deleted file mode 100644
index 19eb173b57afdd0bdc0683e4ed77949e298cd055..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 585
zcmZWlO-lnY5KXfE*j5XQ3gXdQJ#?Y=BqBwjJy{C9?Iom}bi1;<smbo4(wqG={-s<!
z`4>Dnvu&Z^gn2U|d2c@MJDv9P!!;jygnW}DgF6|5+dBYBBuz=Ui!`LworP>nB$M8j
zNbi${o}@F<t^L4%GKmlm#vmDj+e-jNkRLKhCEF5UdB}3-uc{DI6k@D$WkjJ(gfuja
z=r7>*7GOg+ghMlcAO*-Oga#n5NdkgwGV0b{GZRTJ6XWW!%1i5-cBP_3S?6c_YOW<U
zFIP}lb(yPKl`ag9{zXg+!tX3t>P!U-(;xH)flcOtNYz{z8%)>HQkZON^qYzb8^p0F
z3yp!NT1bww1cx#l>hGrNt=7)xJjs)S^DCUF4M1pPe?D&hEuwe$K7l;}Hy4@W+_gE+
zv@BE1PdR@tMY?-2Cm>k0e*{>?;1(~yEhavz?x4vutR~fQoaC{qtwokrFGp<Lg+U4D
QKkkY?QPgRm4ux~}2eWyM>i_@%

diff --git a/__pycache__/metadata_generator.cpython-38.pyc b/__pycache__/metadata_generator.cpython-38.pyc
deleted file mode 100644
index eed2e9474897c4805dc636277bafade56ab8a337..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 908
zcmZuvOK;Oa5T0GzsT&#uK@bTc5uqLkhd4y#hN>bYdLk+y1j=REdbh4CKf<mplqj6q
zJAVO={H48e;xF{XjMJ2&GWL8svyXXvyYD-lHlW<=Z;OXMz%O#Hiige~s(pf>K(P#_
ztC3Av<MS!s2gQ|l2Fg3-Q%|vXFz|olewf6BSB0aqhiV@nXrLHVu9)IyfG0iW<LIjZ
zBedM@*SeBQR&rk#+Q_OjF&$qE*`zN~?PG)`TY|vi5HTzwZx9PY+gBVhP=KMm$*V<s
z^fbbMJQ|NM)kZGULZ!w9i7smEHtbBtskSc6FKTd`sucAM%hAp!9B}7bogEpX=qDKB
z@%&SCT;@7DGUJ``PGr-=NM`y_8XFzV<6~*^15=*pxUx}_$hs<N5M*SfRE<N1pr)gB
zW`QNG+8@s5E)*gy(n^TC)b<tvu-4_bxPOha;rc+#X!C^-ahs?OdAn#-NZ1KetcQ1b
z%D-)YWlQ!QZxv@fob!9Mj;Mkj{9p_|z%zIQZ{Z^nPPa8#bR!~BtjiwG^UN{12;1!!
zN@>+UXMG2Q!1?7&7Y%DJ_+rvZ8+>?R0)zoKRFr8E5TPr$ftI`3?7a;C2<I#v1lDDX
z{`I#1-9Ev@NL?XwEre?ek(a8@$lem-b1k#giXlQxm*8*u#-bVwv33WwNgVR9)65Kf
k$4h)OS(zkhkvM-Y^Q>9<Ul12QU*+DV`>@n%$nd-M2eNz9SpWb4

diff --git a/__pycache__/table_parser.cpython-36.pyc b/__pycache__/table_parser.cpython-36.pyc
deleted file mode 100644
index 7272a5afeb889befe0494f81c89160a22c94c6fb..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 2676
zcmZuzTXP$?6$bXgU0x*9lB|o9IP1ikhOI7X>h$8twC=={rp;92rs=rZac5^mK-8|6
zyEGQGXb$<rv~QZ^Z}cU<p}zyKeez%ML)-6wlBIUK92@}WU=I$yJ>LO;va-_p_`!Da
zMTfC}vTIL(@k6xwUl4)`Ua)xC^O)PY6FatdW7qax?AhLrecLzU270FmhCv)~_8TT#
z;eEq|w{YT6HiiGpQLm++w?sn(=R9u9cHR-8ToFytI(Ip<^S0<fvm#l(x?p1U+_(B0
z3l?`}H@}&$i5m-Ee}6IVE;xLKG(%e#H_x4T!`g0%wFMXJJ4|jQn{orTjV&gFd~3n-
zZE<VCx0u+3W+T~w^|t7Wt#i(C?mx2L_A6k(j(XgLBGpo7L)j<I<>O%R5UsukAsOKV
zY`Ac~VZg?o8rb-vfw?aNcnnSZtMs^#k5g4k)pzk8J=Bx4Ptof85SryYceI;(3qE6s
zHzNsnAptIApu+|AO*?A|2Iu^!*EUX78*h~AQxl}ewH<h=8q~%WnKCU+yC)}EAx&c_
z^=TzcQ%<xj>#Qn`C$hda9t^2z=5<xtl|9#ZwJc6l6OpM9;_pm#(Y^lt;b}FLhbQWz
z`ybsuth3|8w2;R*+u`Gv{nJzpAFC>teO(_82I*K=)FnlgiUfG3hs$6_7K?+?OA{tZ
zR%SX$w&1)*W-`XRykUPKpKe@(?%;B1pKzn6g+{Mf4FACyn{mOBYHt&kXwHn~?t(A)
z+$j%Wb71@Vh0VKWTho5t5Ln5X4sxK(+04nCx|O$u{g~(O+?hloc*Op)J$K0_!kLRS
zLTZZ!S)U(2vG0i1l}@y=vNLl;X9rYxhW^C)guT(OTxeJROS^iZT_vqvAu8lIDDf|R
zbua8)Ywv2%B45W@HqdS@9C35ueEa>}n|b*r(Eukx+Ry#T-}JUv(>w5T8;CeKiJCVs
zdu!I%Vfouw6HLCG1^GJ*2HR&(oq0HK>N~UMg6Hpwbzs;OP3(mEyg6$Efeq1IICdOi
z#J}U}^BHneZ2#5SVe{s9{Q2j=#Lw@}0*V9ky(dh+m%l&rXP(#u=|KD^C^UayU$Q@=
zM?UWy8Fy0DvD>d-O#gJIs&WvWRBD*&2T@jz#yUa*jSDbSM3m7{KP}5jN5?W!G8NGW
z^>{R@lr`Je>IV`0?U9)5Mey1)>?^R_S8^aH(;(VA_@K9c_?ka8Jej&r6Nn!ld^lYl
z$P#%Ty^xA%GHq9b`f_4|OKrIO_5SW~e^-D;h{^CXg)C&Rd1Ra<c}3$_9t1XQWTlX$
ze)VrWy?drHN=uQ}aq|&a@tI18GH%fc5;^J1!qP4dtpYKdpr{6etQ;6GD^Dup)-S<$
zImxsMlpGalUmCw055Xt5OiSYyRbLTFjIS$^f@QT<CsfOfSB+$;C?@680xz$!(h^*)
z(md2^T=wZKUXj%rIHl!4!lNvWPe+)<UIzCj&{bkfho5OVM6JOV#;->Of(m6R>az?5
zsgaJbAK;<N0+0}6ucdC#_HBr`vxFhBWx;q<Kx`aw$CjdgRiaM8VWkFXnbpI1<#M##
zoIf-Cg)K}=ag)x_w&i8%^Ia(FpF%Jm@(pTE3vvx|8--@m{vv*xw@`96`6^#SYw-?#
zYp_qZt{0$de&5t_g7Qrb`XMG7bwGe&-)_!bjf##s?g~e^|0mnMBXt{Zs5=lnt|-=?
ztM*9s3la}V;aRwBf~6*;q%V;zhM)ZJv0G;fpCTdP9^%ECB4aqmbo2Tdt`;5j8dQGC
zTauVok_;;`#!Zo!PLezyr^V8u*5F3{iUif!YrE2Ypa^kAm)R|}x&aY7VHn=EI`ThE
zvl6U&-v$vaD?f2M8l9%Hl9N$UDXAj59YB+|kab^WBTaV`5)PD~w%;(nkUhtE`sE1e
zc`A$1)Y%tPe}5nM-1O5Yr&7sCSCKtpq)%m3jWyK_f@E1P^l7T2tfqP~uBnC~jtn_A
zP$}&Q^nXX^`2xZOLY}1KLdPBz548?m&$+q~4L?xtKsW6iQtWiaNtH|+yI=1PcM;VA
z?TW8TdER?#Nx6F%tGgtAK!QH932{{~QF)JaL|coUdPLUMU#r1*2o5~9S0>dB`Ws<7
zsSpX?v^MoIEo}^>?w<+MvP;NVEBb4veoNwKB<LbHjvgsugpEv5%BmlcApS4Ev;DAF
n!jdZdWSJhnq@rg3{e<Ar)vvR%`eBG{>w;IIL)=>9cldt*nY6v$

diff --git a/__pycache__/table_parser.cpython-38.pyc b/__pycache__/table_parser.cpython-38.pyc
deleted file mode 100644
index 57f034e254bb97b21195f7f449cc828c52038074..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 2694
zcmZuz-H+SG5#L>YOFCJ1C*OyYI82-iqSkjx0;foWAaI%>{dhRIZGtp}1BRzvNt7v4
zUaov+fqJ_1@sRup)JuW9^r?T%zV^xgf*;z>tnTbf5fVGQGegeK4Cgn?uQoOs1g?M6
z<JfA!jLh}R1Li)A@;^X0;WQ)BdZrOIJd2o_t;jO79oc4fBFD^b<igA{Z|X%JCBG-!
z;`aB1+vhCuMZld$Og<ORq`_V8UC^j0nv)jy#Rd;}<HDlE@J-$V$%Y_xdqsHr!a;d`
zV@0Bl=u9^2Eq-G~%YR<3J1Yt{edJ(l^UVtub&c*O-&#?=y-!3p-Vt5Ub$5yI>fIHo
z_xQ~f-6ebnB;9x)w6}PN?_N*}asQR{_MSlw$Z?NqpC?MFbSefYxqdNV?!zeG0wM_J
z2=YTY`<_64OyrOshr7T#+ykq=Zho1ZWa2?0OCbjqymxis8pmfa%6mW*nb4XkYhtfx
zMPj=`3TXxkNe1dE{~YJQa1Bmi1A5$RYF3omo+WCmz2u}c3p<gcQd>Ngx}ng6c$j8F
zyHlaY1=oQ%Qz9?ZBG)!g2TI$ZNOUkMi`+cfv$S1`Y$yZpO!`26BC-S1^Qb>ArlLQT
z@85g>UcXFF`bj2EAin;Cr-N}Krw?Q?5d&5BN26q}3Y=nGWJSUagNqoFob=b8%}jER
zW>2*r$7!CbIKBltl~A&T&<=IY?bAi~S}u>S9v$FUxw^34(PNX;e^Ejz$|=;~tBTYV
z2CK-#TG16<vit~i40In}>g;Q}E#=fMhbJjfUhS_4B^8?lsxfJD^Mx<0CF9=b<lDWa
zg&OWx7E~8hJa<v|_&XzQyp-@JNLm%+t$o19BbbNmWAZ|}aVg#SnY4W=ZKG6eAU^6F
zh=xmBolAYk=sRk2vQ=+G99<YUSB!6-vmgGkv@5&bL2N)YyXR!-s6D==_QA%jb9&Ao
zihSt;_e$mNlls*aS$dUMzqTTv`Qjm4`pZDwu7Yz~zs|QIcL5JzmjG8)PzSurgB3ID
z5LWa@D(_cNr2OXJ**;kYKhnqdE2n;=^3Vg|-d1l;?o>`?^Bq7Sbb+tQ2jtJ>FXV5~
zeGrSs+B(b1$QqPS7GIspA|HjrLQWI)QJChlxeB3h<{7}0hgjC(Aj$JWg(o7EBH`gX
z<$N|Pq*0sK%8x>@cNfXo-4LwyH2JbeBS(snI9qt(-J^GUhyCaDp{DV|dKd$F@95n{
zdn9rw_V9_2h?PaN7?oFy_O7J<!8eBo)58M}SOT9kJ(W;-<T(vBi{oeih3lCOf}f~%
z)0~T3_1@C%EXjFNM#1L*gfC<=6;T77kHy(QWCnJTZv^13_OfC$O7oGn(|lNHt9%NG
z7iX!`o)ohz83^s<^C>{c%9C7MSuv1^3+<?aCjhKc$szVMZ5K0<OLRv%_<%hr(%b-C
zwvqRhoaX}!!_LxDX-6jcNPtC=YX`kQi|iE4YflxiX&p|g#1wiBY@wZUmVr~g$a#61
zLPK(s8SDp{!OjBt0Uvt}c>}j^0*PAdG{mM8Xd4@d@gr)PRx~Jb=u=>@kfS6|%W1T6
zwOVgZA8Go;G^RD##2A{UqpWSd34-!BKnV3|7YAzq-C{o7f!?xX?vUQ14d^mm+6Gx0
z)-AYkwb;GhbFSI_Sue+QzGyqY0!Bf*-~g>3c9xc+69&E9;*49rQ0(@x+=n;hYe;Sb
z=~?pExc(5yZ&6^Igqi9}w2u{^L%nD^{QtlkrxHF!jH&w+UM$fs0+B3su1E3mq2r#5
zJr}LTv2MiiwBU0%D`MS><Hz$PTWjPNn310#`5h8G2d)v!xx}Mv4@T(%@tN=YuNxWK
zpC)Mzkh)`>2-oc%0Ugf9i73R`EGwjtAsz{E{x-QN2Qr;0Je{EA0Pl<D3)p9($F!}U
z&Y(cYBAYGPAzwI$hj7v@KKptsqzF|Jnh--Z7GW`0*f209>t3P8i3-yao5j4u1_FMh
zP-P>T;Es^}KVm$80-`-GhRHlrk&T^0ZiB4HULJ{>9z8>(FIs){9nUo(<Ar<h&B62l
zygI^Nd(>2V@0B&^?x67y$=gWqBWoWH>vdM%Kp6tp<WAx-rrV#&(R>Ohcwo*+>>T(r
zp<4;(F}!JX68|8yI}&Pe%5}p$g340jPoDe(5=4PPx@Kx7KR^yKD0@h-Y7O$%-}x6l
kvGzY+m&Yd<v-t<|0ZA|KR;PXB8{F(spX~vDw&-p8KLx+W>i_@%

diff --git a/create_input_instance.py b/create_input_instance.py
index 9ef50aa..d01dd88 100644
--- a/create_input_instance.py
+++ b/create_input_instance.py
@@ -13,9 +13,8 @@ from datetime import datetime
 """
 create_input_instance.py
 
-Create an object containing the data input from the yml file as attributes
-This object is then fed to the other scripts
-It is to avoid having several times the same code in several files
+Create an object containing the data input from the yml file as attributes, which is then fed to the other scripts
+This object is created using the data in the input yml file
 
 """
 
@@ -23,7 +22,7 @@ It is to avoid having several times the same code in several files
 def parse_input(input_file):
     """
     Parse the yml input file to extract data to create the SpeciesData objects
-    Return a list of dictionaries. Each dictionary contains all the data
+    Return a list of dictionaries. Each dictionary contains data tied to a species
 
     :param input_file:
     :return:
@@ -40,24 +39,58 @@ def parse_input(input_file):
         try:
             yaml_dict = yaml.safe_load(stream)
             for k, v in yaml_dict.items():
+                if k == "config":
+                    pass
                 parsed_sp_dict_list.append(v)
-        except yaml.YAMLError as exc:
-            logging.debug(exc)
+        except yaml.YAMLError:
+            logging.critical("YAMLError raised")
+            sys.exit()
     return parsed_sp_dict_list
 
 
+def parse_args():
+    parser = argparse.ArgumentParser(description="Automatic data loading in containers and interaction "
+                                                 "with galaxy instances for GGA"
+                                                 ", following the protocol @ "
+                                                 "http://gitlab.sb-roscoff.fr/abims/e-infra/gga")
+
+    parser.add_argument("-i", "--input",
+                        help="Input file (yml)")
+
+    parser.add_argument("-v", "--verbose",
+                        help="Increase output verbosity",
+                        action="store_false")
+
+    parser.add_argument("--deploy-stacks",
+                        help="Create and deploy the stacks of services",
+                        action="store_true")
+
+    parser.add_argument("--load-data",
+                        help="Create src_data directory tree, copy datasets to src_data, and load these datasets "
+                             "into the instance, DEV",
+                        action="store_true")
+
+    parser.add_argument("--run-workflow",
+                        help="Run main workflow (load data into chado, sync all with tripal, "
+                             "index tripal data, populate materialized view, "
+                             "create a jbrowse for the current genus_species_strain_sex and add organism to jbrowse")
+
+    args = parser.parse_args()
+
+    return args
+
 
 
 class SpeciesData:
     """
     This class contains attributes and functions to interact with the galaxy container of the GGA environment
-
+    Parent class of LoadData, DeploySpeciesStack and RunWorkflow
 
     """
 
-    def __init__(self, parameters_dictionary, args):
+    def __init__(self, parameters_dictionary):
         self.parameters_dictionary = parameters_dictionary
-        self.args = args
+        self.args = parse_args()  # Not a good design
         self.species = parameters_dictionary["description"]["species"]
         self.genus = parameters_dictionary["description"]["genus"]
         self.strain = parameters_dictionary["description"]["strain"]
@@ -105,57 +138,8 @@ class SpeciesData:
         self.do_update = False  # Update the instance (in histories corresponding to the input) instead of creating a new one // TODO: move this variable inside methods
         self.api_key = "dev"  # API key used to communicate with the galaxy instance. Set to "dev" for the moment. Cannot be used to do user-tied actions
         self.args = args
-
-if __name__ == "__main__":
-    parser = argparse.ArgumentParser(description="Automatic data loading in containers and interaction with galaxy instances for GGA"
-                                                 ", following the protocol @ "
-                                                 "http://gitlab.sb-roscoff.fr/abims/e-infra/gga")
-
-    # Dev arguments, TODO: remove in production branch!
-    parser.add_argument("--full",
-                        help="Run everything, from src_data dir tree creation, moving data files (abims) into src_data,"
-                             "modify headers (abims), generate blast banks (doesn't commit them: TODO), initialize GGA instance, load the data and run,"
-                             " the main workflow. To update/add data to container, use --update in conjunction to --full (TODO)")
-
-    parser.add_argument("--init-instance",
-                        help="Initialization of galaxy instance. Run first in an empty instance, DEV",
-                        action="store_true")
-
-    parser.add_argument("--deploy-stacks",
-                        help="Create and deploy the stacks of services",
-                        action="store_true")
-
-    parser.add_argument("--load-data",
-                        help="Create src_data directory tree, copy datasets to src_data, and load these datasets into the instance, DEV",
-                        action="store_true")
-
-    parser.add_argument("--run-workflow",
-                        help="Run main workflow (load data into chado, sync all with tripal, "
-                             "index tripal data, populate materialized view, "
-                             "create a jbrowse for the current genus_species_strain_sex and add organism to jbrowse")
-
-
-    # Production arguments
-    parser.add_argument("input", type=str, help="Input file (yml)")
-
-    parser.add_argument("-v", "--verbose",
-                        help="Increase output verbosity",
-                        action="store_false")
-
-    parser.add_argument("--update",
-                        help="Update an already integrated organisms with new data from input file, docker-compose.yml will not be re-generated"
-                             ", assuming the instances for the organisms are already generated and initialized",
-                        action="store_false")
-
-    parser.add_argument("--dir",
-                        help="Path of the main directory, either absolute or relative, defaults to current directory",
-                        default=os.getcwd())
-
-    args = parser.parse_args()
-
-    if args.verbose:
-        logging.basicConfig(level=logging.DEBUG)
-    else:
-        logging.basicConfig(level=logging.INFO)
-
+        if self.args.verbose:
+            logging.basicConfig(level=logging.DEBUG)
+        else:
+            logging.basicConfig(level=logging.INFO)
 
diff --git a/deploy_stacks.py b/deploy_stacks.py
index 73b766e..45a89d0 100755
--- a/deploy_stacks.py
+++ b/deploy_stacks.py
@@ -1,64 +1,31 @@
 #!/usr/bin/python
 # -*- coding: utf-8 -*-
 
-import bioblend
-import bioblend.galaxy.objects
-from bioblend import galaxy
+
 import argparse
 import os
 import subprocess
 import logging
 import sys
-import json
 import yaml
 import re
-import metadata_generator
-import docker_compose_generator
-import table_parser
+from gga_autoload.gga_load_data import table_parser
 import fnmatch
 import shutil
 from datetime import datetime
-import create_input_instance
+
 
 """ 
 deploy_stacks.py
 
-
-
-TODO:
-- add config file (inside repo or outside with argument
-- update existing history
-- clean/delete instance?
-- delete stack?
-- commit the files for blast banks.
-
-TODO EOSC/Cloudification:
-- divide into 2 general-use scripts
-    - create docker swarm, stacks, etc... (docker side)
-    - load data into libraries (method to load it at init, and a method/script to load it separately (galaxy side) (alb: galaxy_data_libs_SI does this already?)
-
-STEPS:
-- read input (yml, maybe xlsx later)
-- create dir_tree -- DONE
-- find and copy data -- DONE
-- change file headers, etc.. (ext scripts for data manipulation) -- IN PROGRESS
-- generate blast banks and links -- NOT DONE
-- generate and edit nginx confs -- DONE
-- generate dc and start the containers -- IN PROGRESS
-- connect to instance and launch tools>workflows -- IN PROGRESS
-- generate and update metadata -- IN PROGRESS
-
-
-NOTES:
-- A master API key cannot be used, as some functions are tied to a user (like creating an history), so the access to the
-  galaxy instance must be done using email and password (definable in yml_example_input.yml)
-
+Usage: $ python3 deploy_stacks.py -i example.yml [OPTIONS]
 """
 
 
 def parse_input(input_file):
     """
-    Parse the yml, json or tabulated input in order to set attributes for the Autoload class
+    Parse the yml input file to extract data to create the SpeciesData objects
+    Return a list of dictionaries. Each dictionary contains data tied to a species
 
     :param input_file:
     :return:
@@ -75,24 +42,24 @@ def parse_input(input_file):
         try:
             yaml_dict = yaml.safe_load(stream)
             for k, v in yaml_dict.items():
+                if k == "config":
+                    pass
                 parsed_sp_dict_list.append(v)
-        except yaml.YAMLError as exc:
-            logging.debug(exc)
+        except yaml.YAMLError as exit_code:
+            logging.critical(exit_code + " (YAML input file might be incorrect)")
+            sys.exit()
     return parsed_sp_dict_list
 
 
-
-
-class DeploySpeciesStacks:
+class DeploySpeciesStack:
     """
-    The class DeploySpeciesStacks
+    Deploy a stack of services for a given species
 
 
     """
 
-    def __init__(self, parameters_dictionary, args):
+    def __init__(self, parameters_dictionary):
         self.parameters_dictionary = parameters_dictionary
-        self.args = args
         self.species = parameters_dictionary["description"]["species"]
         self.genus = parameters_dictionary["description"]["genus"]
         self.strain = parameters_dictionary["description"]["strain"]
@@ -115,7 +82,8 @@ class DeploySpeciesStacks:
         self.full_name = " ".join([self.genus_uppercase, self.species, self.strain, self.sex])
         self.abbreviation = " ".join([self.genus_lowercase[0], self.species, self.strain, self.sex])
         self.genus_species = self.genus_lowercase + "_" + self.species
-        self.instance_url = "http://scratchgmodv1:8888/sp/" + self.genus_lowercase + "_" + self.species + "/galaxy/"  # Testing with localhost/scratchgmodv1
+        self.instance_url = "http://scratchgmodv1:8888/sp/" + self.genus_lowercase + "_" + self.species + "/galaxy/"
+        # Testing with localhost/scratchgmodv1
         self.instance = None
         self.history_id = None
         self.library_id = None
@@ -129,59 +97,24 @@ class DeploySpeciesStacks:
         self.datasets = dict()
         self.source_files = dict()
         self.workflow_name = None
-        self.docker_compose_generator = None
         self.metadata = dict()
-        self.api_key = "dev"  # TODO: set the key in config file --> saved for later (master api key access actions are limited)
+        self.api_key = "master"  # TODO: set the key in config file --> saved for later (master api key access actions are limited)
         if parameters_dictionary["data"]["parent_directory"] == "" or parameters_dictionary["data"]["parent_directory"] == "/path/to/closest/parent/dir":
             self.source_data_dir = "/projet/sbr/phaeoexplorer/"  # Testing path for phaeoexplorer data
         else:
             self.source_data_dir = parameters_dictionary["data"]["parent_directory"]
-        # Directory/subdirectories where data files are located (fasta, gff, ...), point to a directory as close as possible to the source files
+        # Directory/subdirectories where data files are located (fasta, gff, ...)
         self.do_update = False
-        # Update the instance (in histories corresponding to the input) instead of creating a new one // TODO: move this variable inside methods
-        self.api_key = "dev"
-        # API key used to communicate with the galaxy instance. Set to "dev" for the moment // TODO: find a way to create, store then use the api key safely
-
-
-    # def get_source_data(self, max_depth):
-    #     """
-    #     TODO: saved for later just in case
-    #
-    #     Find and copy source data files to src_data directory tree
-    #     - recursively search for the correct files (within a fixed max depth)
-    #     - requires the organism src_data directory tree to already be properly created for the organism (run generate_dir_tree)
-    #     - the source files must have "transcripts", "proteins"/"pep", "genome" in their name, and a gff extension
-    #
-    #     """
-    #     src_data_dir = os.path.join(self.species_dir, "/src_data")
-    #     sp_regex = "(?=\w*V)(?=\w*A)(?=\w*R)(?=\w*I)(?=\w*A)(?=\w*B)(?=\w*L)(?=\w*E)\w+"  # example with VARIABLE
-    #
-    #     # The regex works using the species attribute (unique) --> regex is probably not necessary
-    #     sp_regex = ""
-    #     for i in self.species:
-    #         sp_regex = sp_regex + "?=\w*" + i + ")"
-    #     sp_regex = sp_regex + ")\w+"
-    #     re_dict = dict()
-    #     re_dict["gff"] = None
-    #     re_dict["transcripts"] = None
-    #     re_dict["proteins"] = None
-    #     re_dict["genome"] = None
-    #     reg = None
-    #
-    #     for dirpath, dirnames, files in os.walk(self.source_data_dir):
-    #         for f in files:
-    #             if self.species and self.sex in f:
-    #                 logging.info("File found")
-
-
-
-
-    def generate_dir_tree(self):
+        # Update the instance (in histories corresponding to the input) instead of creating a new one
+        self.api_key = "master"
+        # API key used to communicate with the galaxy instance. Cannot be used to do user-tied actions
+        self.species_name_regex_litteral = "(?=\w*V)(?=\w*A)(?=\w*R)(?=\w*I)(?=\w*A)(?=\w*B)(?=\w*L)(?=\w*E)\w+"  # Placeholder re
+
+
+    def make_directory_tree(self):
         """
         Generate the directory tree for an organism and move datasets into src_data
 
-        TODO: DOCKER -- this is the one the "docker" parts of the script
-
         :return:
         """
 
@@ -213,14 +146,37 @@ class DeploySpeciesStacks:
         # self.species_folder_name = "_".join([self.genus_lowercase, self.species, self.strain, self.sex])
         organism_annotation_dir, organism_genome_dir = None, None
 
-        # Create src_data dir tree
+        # Creation (or updating) of the src_data directory tree
+        # Depth 0-1
         try:
             os.mkdir("./src_data")
             os.mkdir("./src_data/annotation")
             os.mkdir("./src_data/genome")
             os.mkdir("./src_data/tracks")
+        except FileExistsError:
+            if self.do_update:
+                logging.info("Updating src_data directory tree")
+            else:
+                logging.debug("The src_data directory tree already exists")
+        except PermissionError:
+            logging.critical("Insufficient permission to create src_data directory tree")
+            sys.exit()
+
+        # Depth 2
+        try:
             os.mkdir("./src_data/annotation/" + self.species_folder_name)
             os.mkdir("./src_data/genome/" + self.species_folder_name)
+        except FileExistsError:
+            if self.do_update:
+                logging.info("Updating src_data directory tree")
+            else:
+                logging.debug("The src_data directory tree already exists")
+        except PermissionError:
+            logging.critical("Insufficient permission to create src_data directory tree")
+            sys.exit()
+
+        # Depth 3
+        try:
             os.mkdir("./src_data/annotation/" + self.species_folder_name + "/OGS" + self.ogs_version)
             os.mkdir("./src_data/genome/" + self.species_folder_name + "/v" + self.genome_version)
             organism_annotation_dir = os.path.abspath("./src_data/annotation/" + self.species_folder_name + "/OGS" + self.genome_version)
@@ -234,6 +190,12 @@ class DeploySpeciesStacks:
             logging.critical("Insufficient permission to create src_data directory tree")
             sys.exit()
 
+
+    def make_compose_files(self):
+        """
+
+        :return:
+        """
         # Path to the templates used to generate the custom docker-compose files for an input species
         stack_template_path = self.script_dir + "/templates/stack-organism.yml"
         traefik_template_path = self.script_dir + "/templates/traefik.yml"
@@ -248,20 +210,27 @@ class DeploySpeciesStacks:
         with open(stack_template_path, 'r') as infile:
             organism_content = list()
             for line in infile:
-                # One-liner to replace placeholders by the genus and species
+                # Replace placeholders in the compose file
                 organism_content.append(
-                    line.replace("genus_species", str(self.genus.lower() + "_" + self.species)).replace("Genus species", str(self.genus_uppercase + " " + self.species)).replace("Genus/species", str(self.genus_uppercase + "/" + self.species)).replace("gspecies", str( self.genus.lower()[0] + self.species)).replace("genus_species_strain_sex", genus_species_strain_sex))
+                    line.replace("genus_species", str(self.genus.lower() + "_" + self.species)).replace("Genus species",
+                                                                                                        str(
+                                                                                                            self.genus_uppercase + " " + self.species)).replace(
+                        "Genus/species", str(self.genus_uppercase + "/" + self.species)).replace("gspecies", str(
+                        self.genus.lower()[0] + self.species)).replace("genus_species_strain_sex",
+                                                                       genus_species_strain_sex))
             with open("./docker-compose.yml", 'w') as outfile:
                 for line in organism_content:
                     outfile.write(line)
-            subprocess.call(["python3", self.script_dir + "/create_mounts.py"], cwd=working_dir, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)  # Create mounts for the containers
+            subprocess.call(["python3", self.script_dir + "/create_mounts.py"], cwd=working_dir,
+                            stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)  # Create mounts for the containers
 
         try:
             os.mkdir("../traefik")
             os.mkdir("../traefik/authelia")
             shutil.copy(authelia_config_path, "../traefik/authelia/configuration.yml")
             shutil.copy(authelia_users_path, "../traefik/authelia/users.yml")  # TODO: custom users (add a config file?)
-            subprocess.call(["python3", self.script_dir + "/create_mounts.py"], cwd=working_dir, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)  # Create mounts for the containers
+            subprocess.call(["python3", self.script_dir + "/create_mounts.py"], cwd=working_dir,
+                            stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)  # Create mounts for the containers
         except FileExistsError:
             logging.debug("Traefik directory already exists")
         try:
@@ -271,11 +240,9 @@ class DeploySpeciesStacks:
         subprocess.call(["python3", self.script_dir + "/create_mounts.py"], cwd=working_dir)
 
 
-
     def get_source_data_files_from_path(self):
         """
-        Find all files in source_data directory, to link the matching files in the src_data dir tree
-
+        Link data files
         :return:
         """
 
@@ -290,7 +257,7 @@ class DeploySpeciesStacks:
         organism_genome_dir = os.path.abspath("./src_data/genome/" + self.species_folder_name + "/v" + self.genome_version)
 
         for dirpath, dirnames, files in os.walk(self.source_data_dir):
-            if "0" in str(dirpath):  # Ensures to take the correct files (other dirs hold files with the correct names, but I don't know if they are the same) #alb
+            if "0" in str(dirpath):  # Ensures to take the correct files (other dirs hold files with the correct names, but I don't know if they are the same), this is for Phaeoexplorer only
                 for f in files:
                     if "Contaminants" not in str(f):
                         try:
@@ -322,7 +289,6 @@ class DeploySpeciesStacks:
                             logging.warning("Error raised (NotADirectoryError)")
 
 
-
     def deploy_stack(self):
         """
         Call the script "deploy.sh" used to initiliaze the swarm cluster if needed and launch/update the stack
@@ -330,458 +296,10 @@ class DeploySpeciesStacks:
         :return:
         """
         # Launch and update docker stacks (cf docs)
-        # TODO: add a fail condition?
         subprocess.call(["sh", self.script_dir + "/deploy.sh", self.genus_species, self.main_dir + "/traefik"])
 
 
 
-
-
-    def modify_fasta_headers(self):
-        """
-        Change the fasta headers before integration.
-
-        :return:
-        """
-
-        try:
-            os.chdir(self.species_dir)
-            working_dir = os.getcwd()
-        except OSError:
-            logging.info("Cannot access " + self.species_dir + ", run with higher privileges")
-            logging.info("Fatal error: exit")
-            sys.exit()
-        self.source_files = dict()
-        annotation_dir, genome_dir = None, None
-        for d in [i[0] for i in os.walk(os.getcwd() + "/src_data")]:
-            if "annotation/" in d:
-                annotation_dir = d
-                for f in os.listdir(d):
-                    if f.endswith("proteins.fasta"):
-                        self.source_files["proteins_file"] = os.path.join(d, f)
-                    elif f.endswith("transcripts-gff.fa"):
-                        self.source_files["transcripts_file"] = os.path.join(d, f)
-                    elif f.endswith(".gff"):
-                        self.source_files["gff_file"] = os.path.join(d, f)
-            elif "genome/" in d:
-                genome_dir = d
-                for f in os.listdir(d):
-                    if f.endswith(".fa"):
-                        self.source_files["genome_file"] = os.path.join(d, f)
-                logging.debug("source files found:")
-        for k, v in self.source_files.items():
-            logging.debug("\t" + k + "\t" + v)
-
-        # Changing headers in the *proteins.fasta file from >mRNA* to >protein*
-        # production version
-        modify_pep_headers = [str(self.main_dir) + "/gga_load_data/ext_scripts/phaeoexplorer-change_pep_fasta_header.sh",
-                              self.source_files["proteins_file"]]
-        # test version
-        # modify_pep_headers = ["/home/alebars/gga/phaeoexplorer-change_pep_fasta_header.sh",
-                              # self.source_files["proteins_file"]]
-        logging.info("Changing fasta headers: " + self.source_files["proteins_file"])
-        subprocess.run(modify_pep_headers, stdout=subprocess.PIPE, cwd=annotation_dir)
-        # production version
-        modify_pep_headers = [str(self.main_dir) + "/gga_load_data/ext_scripts/phaeoexplorer-change_transcript_fasta_header.sh",
-                              self.source_files["proteins_file"]]
-        # test version
-        # modify_pep_headers = ["/home/alebars/gga/phaeoexplorer-change_transcript_fasta_header.sh",
-        #                       self.source_files["proteins_file"]]
-        logging.info("Changing fasta headers: " + self.source_files["transcripts_file"])
-        subprocess.run(modify_pep_headers, stdout=subprocess.PIPE, cwd=annotation_dir)
-
-        # src_data cleaning
-        if os.path.exists(annotation_dir + "outfile"):
-            subprocess.run(["mv", annotation_dir + "/outfile", self.source_files["proteins_file"]],
-                           stdout=subprocess.PIPE,
-                           cwd=annotation_dir)
-        if os.path.exists(annotation_dir + "gmon.out"):
-            subprocess.run(["rm", annotation_dir + "/gmon.out"],
-                           stdout=subprocess.PIPE,
-                           cwd=annotation_dir)
-
-
-
-
-    def generate_blast_banks(self):
-        """
-        TODO
-        Automatically generate blast banks for a species
-        TODO: auto commit the files?
-
-        :return:
-        """
-
-
-    def connect_to_instance(self):
-        """
-        TODO: move in init/access
-        TODO: password
-        Test the connection to the galaxy instance for the current organism
-        Exit if it cannot connect to the instance
-        """
-        self.instance = galaxy.GalaxyInstance(url=self.instance_url, email="gga@sb-roscoff.fr", password="password", verify=False)
-        logging.info("Connecting to the galaxy instance ...")
-        try:
-            self.instance.histories.get_histories()
-            self.tool_panel = self.instance.tools.get_tool_panel()
-        except bioblend.ConnectionError:
-            logging.critical("Cannot connect to galaxy instance @ " + self.instance_url)
-            sys.exit()
-        else:
-            logging.info("Successfully connected to galaxy instance @ " + self.instance_url)
-        self.instance.histories.create_history(name="FOO")
-
-
-
-
-
-
-    def setup_data_libraries(self):
-        """
-        - generate blast banks and docker-compose
-        - load data into the galaxy container with the galaxy_data_libs_SI.py script
-
-        :return:
-        """
-
-        try:
-            logging.info("Loading data into the galaxy container")
-            subprocess.run("../serexec genus_species_galaxy /tool_deps/_conda/bin/python /opt/setup_data_libraries.py",
-                           shell=True)
-        except subprocess.CalledProcessError:
-            logging.info("Cannot load data into the galaxy container for " + self.full_name)
-            pass
-        else:
-            logging.info("Data successfully loaded into the galaxy container for " + self.full_name)
-
-        self.get_species_history_id()
-        # self.get_instance_attributes()
-        #
-        # # import all datasets into current history
-        # self.instance.histories.upload_dataset_from_library(history_id=self.history_id, lib_dataset_id=self.datasets["genome_file"])
-        # self.instance.histories.upload_dataset_from_library(history_id=self.history_id, lib_dataset_id=self.datasets["gff_file"])
-        # self.instance.histories.upload_dataset_from_library(history_id=self.history_id, lib_dataset_id=self.datasets["transcripts_file"])
-        # self.instance.histories.upload_dataset_from_library(history_id=self.history_id, lib_dataset_id=self.datasets["proteins_file"])
-
-
-
-
-
-    def get_species_history_id(self):
-        """
-        Set and return the current species history id in its galaxy instance
-
-        :return:
-        """
-        histories = self.instance.histories.get_histories(name=str(self.full_name))
-        self.history_id = histories[0]["id"]
-        self.instance.histories.show_history(history_id=self.history_id)
-
-        return self.history_id
-
-
-
-
-    def create_species_history(self):
-        histories = self.instance.histories.get_histories(name=str(self.full_name))
-        print("\n" + str(histories) + "\n" + self.full_name + "\n")
-        if not histories:
-            self.instance.histories.create_history(name="FOO")
-            print("Created history!")
-
-
-
-
-
-    def get_instance_attributes(self):
-        """
-        retrieves instance attributes:
-        - working history ID
-        - libraries ID (there should only be one library!)
-        - datasets IDs
-
-        :return:
-        """
-        histories = self.instance.histories.get_histories(name=str(self.full_name))
-        self.history_id = histories[0]["id"]
-        logging.debug("history ID: " + self.history_id)
-        libraries = self.instance.libraries.get_libraries()  # normally only one library
-        self.library_id = self.instance.libraries.get_libraries()[0]["id"]  # project data folder/library
-        logging.debug("library ID: " + self.history_id)
-        instance_source_data_folders = self.instance.libraries.get_folders(library_id=self.library_id)
-
-        folders_ids = {}
-        current_folder_name = ""
-        for i in instance_source_data_folders:
-            for k, v in i.items():
-                if k == "name":
-                    folders_ids[v] = 0
-                    current_folder_name = v
-                if k == "id":
-                    folders_ids[current_folder_name] = v
-        logging.info("Folders and datasets IDs: ")
-        self.datasets = dict()
-        for k, v in folders_ids.items():
-            logging.info("\t" + k + ": " + v)
-            if k == "/genome":
-                sub_folder_content = self.instance.folders.show_folder(folder_id=v, contents=True)
-                for k2, v2 in sub_folder_content.items():
-                    for e in v2:
-                        if type(e) == dict:
-                            if e["name"].endswith(".fa"):
-                                self.datasets["genome_file"] = e["ldda_id"]
-                                logging.info("\t\t" + e["name"] + ": " + e["ldda_id"])
-            elif k == "/annotation/" + self.genus_species:
-                sub_folder_content = self.instance.folders.show_folder(folder_id=v, contents=True)
-                for k2, v2 in sub_folder_content.items():
-                    for e in v2:
-                        if type(e) == dict:
-                            # TODO: manage several files of the same type and manage versions
-                            if e["name"].endswith("transcripts-gff.fa"):
-                                self.datasets["transcripts_file"] = e["ldda_id"]
-                                logging.info("\t\t" + e["name"] + ": " + e["ldda_id"])
-                            elif e["name"].endswith("proteins.fasta"):
-                                self.datasets["proteins_file"] = e["ldda_id"]
-                                logging.info("\t\t" + e["name"] + ": " + e["ldda_id"])
-                            elif e["name"].endswith(".gff"):
-                                self.datasets["gff_file"] = e["ldda_id"]
-                                logging.info("\t\t" + e["name"] + ": " + e["ldda_id"])
-                            elif e["name"].endswith("MALE"):
-                                self.datasets["gff_file"] = e["ldda_id"]
-                                logging.info("\t\t" + e["name"] + ": " + e["ldda_id"])
-
-
-
-
-
-
-    def init_instance(self):
-        """
-        Galaxy instance startup in preparation for running workflows
-        - remove Homo sapiens from the chado database.
-        - add organism and analyses into the chado database --> separate
-        - get any other existing organisms IDs before updating the galaxy instance --> separate
-
-        TODO: move the library and analysis/data stuff to a separate function
-        :return:
-        """
-
-        self.connect_to_instance()
-        self.get_species_history_id()
-        histories = self.instance.histories.get_histories(name=str(self.full_name))
-        # Create the first history
-        if not histories:
-            self.instance.histories.create_history(name=str(self.full_name))
-        self.history_id = histories[0]["id"]
-        logging.debug("history ID: " + self.history_id)
-        # libraries = self.instance.libraries.get_libraries()  # routine check: one library
-        # self.library_id = self.instance.libraries.get_libraries()[0]["id"]  # project data folder/library
-        logging.debug("library ID: " + self.history_id)
-        instance_source_data_folders = self.instance.libraries.get_folders(library_id=self.library_id)
-
-        # Delete Homo sapiens from Chado database
-        logging.debug("Getting 'Homo sapiens' ID in instance's chado database")
-        get_sapiens_id_job = self.instance.tools.run_tool(
-            tool_id="toolshed.g2.bx.psu.edu/repos/gga/chado_organism_get_organisms/organism_get_organisms/2.3.2",
-            history_id=self.history_id,
-            tool_inputs={"genus": "Homo", "species": "sapiens"})
-        get_sapiens_id_job_output = get_sapiens_id_job["outputs"][0]["id"]
-        get_sapiens_id_json_output = self.instance.datasets.download_dataset(dataset_id=get_sapiens_id_job_output)
-        try:
-            logging.debug("Deleting Homo 'sapiens' in the instance's chado database")
-            get_sapiens_id_final_output = json.loads(get_sapiens_id_json_output)[0]
-            sapiens_id = str(
-                get_sapiens_id_final_output["organism_id"])  # needs to be str to be recognized by the chado tool
-            self.instance.tools.run_tool(
-                tool_id="toolshed.g2.bx.psu.edu/repos/gga/chado_organism_delete_organisms/organism_delete_organisms/2.3.2",
-                history_id=self.history_id,
-                tool_inputs={"organism": str(sapiens_id)})
-        except bioblend.ConnectionError:
-            logging.debug("Homo sapiens isn't in the instance's chado database")
-        except IndexError:
-            logging.debug("Homo sapiens isn't in the instance's chado database")
-            pass
-
-        # TODO: the following actions should be done in a separate function (in case if the user wants to do everything him/herself -- for EOSC)
-        # Add organism (species) to chado
-        logging.info("Adding organism to the instance's chado database")
-        self.instance.tools.run_tool(
-            tool_id="toolshed.g2.bx.psu.edu/repos/gga/chado_organism_add_organism/organism_add_organism/2.3.2",
-            history_id=self.history_id,
-            tool_inputs={"abbr": self.abbreviation,
-                         "genus": self.genus,
-                         "species": self.species,
-                         "common": self.common})
-        # Add OGS analysis to chado
-        logging.info("Adding OGS analysis to the instance's chado database")
-        self.instance.tools.run_tool(
-            tool_id="toolshed.g2.bx.psu.edu/repos/gga/chado_analysis_add_analysis/analysis_add_analysis/2.3.2",
-            history_id=self.history_id,
-            tool_inputs={"name": self.genus + " " + self.species + " OGS" + self.ogs_version,
-                         "program": "Performed by Genoscope",
-                         "programversion": str("OGS" + self.ogs_version),
-                         "sourcename": "Genoscope",
-                         "date_executed": self.date})
-
-        # Add genome analysis to chado
-        logging.info("Adding genome analysis to the instance's chado database")
-        self.instance.tools.run_tool(
-            tool_id="toolshed.g2.bx.psu.edu/repos/gga/chado_analysis_add_analysis/analysis_add_analysis/2.3.2",
-            history_id=self.history_id,
-            tool_inputs={"name": self.genus + " " + self.species + " genome v" + self.genome_version,
-                         "program": "Performed by Genoscope",
-                         "programversion": str("genome v" + self.genome_version),
-                         "sourcename": "Genoscope",
-                         "date_executed": self.date})
-        self.get_organism_and_analyses_ids()
-        logging.info("Finished initializing instance")
-
-
-
-
-
-
-
-    def run_workflow(self, workflow_name, workflow_parameters, datamap):
-        """
-        Run the "main" workflow in the galaxy instance
-        - import data to library
-        - load fasta and gff
-        - sync with tripal
-        - add jbrowse + organism
-        - fill in the tripal views
-
-        TODO: map tool name to step id
-        :param workflow_name:
-        :param workflow_parameters:
-        :param datamap:
-        :return:
-        """
-
-        logging.debug("running workflow: " + str(workflow_name))
-        workflow_ga_file = self.main_dir + "Galaxy-Workflow-" + workflow_name + ".ga"
-        if self.strain != "":
-            custom_ga_file = "_".join([self.genus, self.species, self.strain]) + "_workflow.ga"
-            custom_ga_file_path = os.path.abspath(custom_ga_file)
-        else:
-            custom_ga_file = "_".join([self.genus, self.species]) + "_workflow.ga"
-            custom_ga_file_path = os.path.abspath(custom_ga_file)
-        with open(workflow_ga_file, 'r') as ga_in_file:
-            workflow = str(ga_in_file.readlines())
-            # ugly fix for the jbrowse parameters
-            workflow = workflow.replace('{\\\\\\\\\\\\"unique_id\\\\\\\\\\\\": \\\\\\\\\\\\"UNIQUE_ID\\\\\\\\\\\\"}',
-                                        str('{\\\\\\\\\\\\"unique_id\\\\\\\\\\\\": \\\\\\\\\\\\"' + self.genus + " " + self.species) + '\\\\\\\\\\\\"')
-            workflow = workflow.replace('\\\\\\\\\\\\"name\\\\\\\\\\\\": \\\\\\\\\\\\"NAME\\\\\\\\\\\\"',
-                                        str('\\\\\\\\\\\\"name\\\\\\\\\\\\": \\\\\\\\\\\\"' + self.genus.lower()[0] + self.species) + '\\\\\\\\\\\\"')
-            workflow = workflow.replace("\\\\", "\\")  # to restore the correct amount of backslashes in the workflow string before import
-            # test
-            workflow = workflow.replace('http://localhost/sp/genus_species/feature/Genus/species/mRNA/{id}',
-                                        "http://localhost/sp/" + self.genus_lowercase+ "_" + self.species + "/feature/" + self.genus + "/mRNA/{id}")
-            # production
-            # workflow = workflow.replace('http://localhost/sp/genus_species/feature/Genus/species/mRNA/{id}',
-            #                             "http://abims--gga.sb-roscoff.fr/sp/" + self.genus_lowercase + "_" + self.species + "/feature/" + self.genus + "/mRNA/{id}")
-            workflow = workflow[2:-2]  # if the line under doesn't output a correct json
-            # workflow = workflow[:-2]  # if the line above doesn't output a correct json
-
-            workflow_dict = json.loads(workflow)
-
-            self.instance.workflows.import_workflow_dict(workflow_dict=workflow_dict)
-            self.workflow_name = workflow_name
-            workflow_attributes = self.instance.workflows.get_workflows(name=self.workflow_name)
-            workflow_id = workflow_attributes[0]["id"]
-            show_workflow = self.instance.workflows.show_workflow(workflow_id=workflow_id)
-            logging.debug("Workflow ID: " + workflow_id)
-
-            logging.debug("Inputs:")
-            logging.debug(show_workflow["Inputs"])
-            self.instance.workflows.invoke_workflow(workflow_id=workflow_id,
-                                                    history_id=self.history_id,
-                                                    params=workflow_parameters,
-                                                    inputs=datamap,
-                                                    inputs_by="")
-            self.instance.workflows.delete_workflow(workflow_id=workflow_id)
-
-
-
-
-
-
-    def load_data_in_galaxy(self):
-        """
-        Function to load the src_data folder in galaxy
-
-        :return:
-        """
-
-        logging.info("Loading data in galaxy")
-
-        return None
-
-
-
-
-
-    def get_organism_and_analyses_ids(self):
-        """
-        Retrieve current organism ID and OGS and genome chado analyses IDs (needed to run some tools as Tripal/Chado
-        doesn't accept organism/analyses names as valid inputs
-
-        :return:
-        """
-        # Get the ID for the current organism in chado
-        org = self.instance.tools.run_tool(
-            tool_id="toolshed.g2.bx.psu.edu/repos/gga/chado_organism_get_organisms/organism_get_organisms/2.3.2",
-            history_id=self.history_id,
-            tool_inputs={"genus": self.genus, "species": self.species})
-        org_job_out = org["outputs"][0]["id"]
-        org_json_output = self.instance.datasets.download_dataset(dataset_id=org_job_out)
-        try:
-            org_output = json.loads(org_json_output)[0]
-            self.org_id = str(org_output["organism_id"])  # id needs to be a str to be recognized by chado tools
-        except IndexError:
-            logging.debug("no organism matching " + self.full_name + " exists in the instance's chado database")
-
-        # Get the ID for the OGS analysis in chado
-        ogs_analysis = self.instance.tools.run_tool(
-            tool_id="toolshed.g2.bx.psu.edu/repos/gga/chado_analysis_get_analyses/analysis_get_analyses/2.3.2",
-            history_id=self.history_id,
-            tool_inputs={"name": self.genus + " " + self.species + " OGS" + self.ogs_version})
-        ogs_analysis_job_out = ogs_analysis["outputs"][0]["id"]
-        ogs_analysis_json_output = self.instance.datasets.download_dataset(dataset_id=ogs_analysis_job_out)
-        try:
-            ogs_analysis_output = json.loads(ogs_analysis_json_output)[0]
-            self.ogs_analysis_id = str(ogs_analysis_output["analysis_id"])
-        except IndexError:
-            logging.debug("no matching OGS analysis exists in the instance's chado database")
-
-        # Get the ID for the genome analysis in chado
-        genome_analysis = self.instance.tools.run_tool(
-            tool_id="toolshed.g2.bx.psu.edu/repos/gga/chado_analysis_get_analyses/analysis_get_analyses/2.3.2",
-            history_id=self.history_id,
-            tool_inputs={"name": self.genus + " " + self.species + " genome v" + self.genome_version})
-        genome_analysis_job_out = genome_analysis["outputs"][0]["id"]
-        genome_analysis_json_output = self.instance.datasets.download_dataset(dataset_id=genome_analysis_job_out)
-        try:
-            genome_analysis_output = json.loads(genome_analysis_json_output)[0]
-            self.genome_analysis_id = str(genome_analysis_output["analysis_id"])
-        except IndexError:
-            logging.debug("no matching genome analysis exists in the instance's chado database")
-
-
-
-
-    def clean_instance(self):
-        """
-        TODO: method to purge the instance from analyses and organisms
-        :return:
-        """
-        return None
-
-
-
-
-
 def filter_empty_not_empty_items(li):
     ret = {"empty": [], "not_empty": []}
     for i in li:
@@ -793,42 +311,18 @@ def filter_empty_not_empty_items(li):
 
 
 if __name__ == "__main__":
-    parser = argparse.ArgumentParser(description="Automatic data loading in containers and interaction with galaxy instances for GGA"
+    parser = argparse.ArgumentParser(description="Automatic data loading in containers and interaction "
+                                                 "with galaxy instances for GGA"
                                                  ", following the protocol @ "
                                                  "http://gitlab.sb-roscoff.fr/abims/e-infra/gga")
-    # Dev arguments, TODO: remove in production branch!
-    parser.add_argument("--full",
-                        help="Run everything, from src_data dir tree creation, moving data files (abims) into src_data,"
-                             "modify headers (abims), generate blast banks (doesn't commit them: TODO), initialize GGA instance, load the data and run,"
-                             " the main workflow. To update/add data to container, use --update in conjunction to --full (TODO)")
-    parser.add_argument("--init-instance",
-                        help="Initialization of galaxy instance. Run first in an empty instance, DEV",
-                        action="store_true")
-    parser.add_argument("--load-data",
-                        help="Create src_data directory tree, copy datasets to src_data, and load these datasets into the instance, DEV",
-                        action="store_true")
-    parser.add_argument("--run-main",
-                        help="Run main workflow (load data into chado, sync all with tripal, "
-                             "index tripal data, populate materialized view, "
-                             "create a jbrowse for the current genus_species_strain_sex and add organism to jbrowse")
-    parser.add_argument("--generate-docker-compose",
-                        help="Generate docker-compose.yml for current species, DEV")
-    parser.add_argument("--link-source",
-                        help="Find source files in source data dir and copy them to src_data, DEV, OBSOLETE",
-                        action="store_true")
-
-    # Production arguments
-    parser.add_argument("input", type=str, help="Input file (yml)")
+
+    parser.add_argument("input",
+                        type=str,
+                        help="Input file (yml)")
+
     parser.add_argument("-v", "--verbose",
                         help="Increase output verbosity",
                         action="store_false")
-    parser.add_argument("--update",
-                        help="Update an already integrated organisms with new data from input file, docker-compose.yml will not be re-generated"
-                             ", assuming the instances for the organisms are already generated and initialized",
-                        action="store_false")
-    parser.add_argument("--dir",
-                        help="Path of the main directory, either absolute or relative, defaults to current directory",
-                        default=os.getcwd())
 
     args = parser.parse_args()
 
@@ -837,93 +331,23 @@ if __name__ == "__main__":
     else:
         logging.basicConfig(level=logging.INFO)
 
-    logging.info("Start")
+    logging.info("Deploy stacks: start")
     sp_dict_list = parse_input(args.input)
     for sp_dict in sp_dict_list:
-        al = Autoload(parameters_dictionary=sp_dict, args=args)
-        al.main_dir = os.path.abspath(args.dir)
-        if args.load_data:
-            """
-            Full workflow 
-            TODO: change later (docker side / load data side / galaxy side)
-            """
-            # al.generate_dir_tree()
-            # logging.info("Successfully generated the directory tree for " + al.genus[0].upper() + ". " + al.species + " " + al.strain + " " + al.sex)
-            #
-            # # al.get_source_data_files_from_path()
-            # logging.info("Successfully retrieved source data files for " + al.genus[0].upper() + ". " + al.species + " " + al.strain + " " + al.sex)
-            #
-            # al.deploy_stack()
-            # logging.info("Successfully deployed containers stack for " + al.genus[0].upper() + ". " + al.species + " " + al.strain + " " + al.sex)
-            #
-            al.connect_to_instance()
-            logging.info("Connected to instance")
-            #
-            # al.create_species_history()
-            # logging.info("Created a history")
-            #
-            # al.setup_data_libraries()
-            # logging.info("Setting up data libraries")
-
-            # al.init_instance()
-            # logging.info("Successfully initialized instance for " + al.genus[0].upper() + ". " + al.species + " " + al.strain + " " + al.sex)
-
-            # al.setup_data_libraries()
-            # logging.info("Successfully set up data libraries in galaxy for " + al.genus[0].upper() + ". " + al.species + " " + al.strain + " " + al.sex)
-
-
-            # if args.init_instance:
-            #     logging.info(" Initializing the galaxy instance")
-            #     al.init_instance()
-            #     al.get_instance_attributes()
-            #     # metadata[genus_species_strain_sex]["initialized"] = True
-            # if args.load_data:
-            #     logging.info("Loading data into galaxy")
-            #     # al.load_data()
-            #     # metadata[genus_species_strain_sex]["data_loaded_in_instance"] = True
-            # if args.run_main:
-            #     logging.info("Running main workflow")
-            #     al.get_organism_and_analyses_ids()
-            #     workflow_parameters = dict()
-            #     workflow_parameters["0"] = {}
-            #     workflow_parameters["1"] = {}
-            #     workflow_parameters["2"] = {}
-            #     workflow_parameters["3"] = {}
-            #     workflow_parameters["4"] = {"organism": al.org_id,
-            #                                 "analysis_id": al.genome_analysis_id,
-            #                                 "do_update": "true"}
-            #     workflow_parameters["5"] = {"organism": al.org_id,
-            #                                 "analysis_id": al.ogs_analysis_id}
-            #     workflow_parameters["6"] = {"organism_id": al.org_id}
-            #     workflow_parameters["7"] = {"analysis_id": al.ogs_analysis_id}
-            #     workflow_parameters["8"] = {"analysis_id": al.genome_analysis_id}
-            #     workflow_parameters["9"] = {"organism_id": al.org_id}
-            #     workflow_parameters["10"] = {}
-            #     workflow_parameters["11"] = {}
-            #
-            #     al.datamap = dict()
-            #     al.datamap["0"] = {"src": "hda", "id": al.datasets["genome_file"]}
-            #     al.datamap["1"] = {"src": "hda", "id": al.datasets["gff_file"]}
-            #     al.datamap["2"] = {"src": "hda", "id": al.datasets["proteins_file"]}
-            #     al.datamap["3"] = {"src": "hda", "id": al.datasets["transcripts_file"]}
-            #
-            #     al.run_workflow(workflow_name="main", workflow_parameters=workflow_parameters, datamap=al.datamap)
-            #     # metadata[genus_species_strain_sex]["workflows_run"] = metadata[genus_species_strain_sex]["workflows_run"].append("main")
-            #
-            # if args.link_source:
-            #     print('DEV')
-            #     al.generate_dir_tree()
-            #     print(al.main_dir)
-            #     print(al.species_dir)
-
-    logging.info("Exit")
-
-
-
-def main(species_data):
-    """
-    "Main" function
+        o = DeploySpeciesStack(parameters_dictionary=sp_dict)
+        o.main_dir = os.path.abspath(args.dir)
+
+        # dss.make_directory_tree()
+        # logging.info("Successfully generated the directory tree for " + o.genus[0].upper() + ". " + o.species + " " + o.strain + " " + o.sex)
+
+        dss.make_compose_files()
+        logging.info("Successfully generated the directory tree for " + o.genus[0].upper() + ". " + o.species + " " + o.strain + " " + o.sex)
+
+        # dss.get_source_data_files_from_path()
+        # logging.info("Successfully retrieved source data files for " + o.genus[0].upper() + ". " + o.species + " " + o.strain + " " + o.sex)
+
+        # dss.deploy_stack()
+        # logging.info("Successfully deployed containers stack for " + o.genus[0].upper() + ". " + o.species + " " + o.strain + " " + o.sex)
+
+    logging.info("Deploy stacks: done")
 
-    :return:
-    """
-    print("OK")
\ No newline at end of file
diff --git a/docker_compose_generator.py b/docker_compose_generator.py
index d5fe776..81fdcc3 100755
--- a/docker_compose_generator.py
+++ b/docker_compose_generator.py
@@ -6,23 +6,9 @@ import logging
 # import json
 
 """
-docker-compose.yml generator
-The method "generate" works for both docker-compose architecture (old), or docker stack (new)
-This method will write a formatted docker-compose.yml for the specified organism (only requires genus and species)
-
-Made to work in the integration streamlined script "deploy_stacks.py" but can be used as a standalone (either with a CLI
-or in another python file as a module)
-
-Dockerfiles are specific to genus-species: a same organism can have several strains and sexes integrated, but only one 
-set of applications are used (see metadata files for details about what strains/sexes have been integrated for
-an organism)
+docker-compose_generator.py
 
-TODO: write the whole yml dict from scratch (would allow the script to be more reusable into the future and make it
-more customizable while being clearer (instead of the default yml string or input docker-compose template)
-
-TODO: read json
-
-API master key or galaxy: MASTER_API_KEY: XXXXXXX (alphanum, user prompt or git env variable)
+This method will write a formatted docker-compose.yml for the specified organism (only requires genus and species)
 """
 
 
diff --git a/examples/example_input.json b/examples/json_example_input.json
similarity index 100%
rename from examples/example_input.json
rename to examples/json_example_input.json
diff --git a/examples/yml_example_input.yml b/examples/yml_example_input.yml
index af0fe12..10395ab 100644
--- a/examples/yml_example_input.yml
+++ b/examples/yml_example_input.yml
@@ -3,13 +3,13 @@
 # list of species for which the script will have to create these stacks/load data into galaxy/run workflows
 # Add new config option using a config scalar
 
-
-config:  # Simple config part, allowing the user to create his/her own admin account (default is gga)
-  # WARNING: not supported currently, as the default connection is using the gga account
-  admin:
+config:
+  admins: # Add admin account WARNING: not supported currently, as the default connection through a web browser is using the gga account
     username: "nflantier"  # Desired admin username
     password: "blanquette"  # Desired admin password
-    email: "noel.flantier@galaxy.org"  # Desired admin email
+    email: "noel.flantier@mail.com"  # Desired admin email
+  master_api_key: "master"  # Master API key is useless at the moment
+  url_prefix: "http://localhost/  # URL prefix to forward
 
 ectocarpus_sp1:  # Dummy value the user gives to designate the species (isn't used by the script)
   # Species description, leave blank if unknown or you don't want it to be used
diff --git a/setup_data_libraries.py b/galaxy_data_libs_SI.py
similarity index 100%
rename from setup_data_libraries.py
rename to galaxy_data_libs_SI.py
diff --git a/load_data.py b/load_data.py
index 4f3fbbd..dd5cb3d 100644
--- a/load_data.py
+++ b/load_data.py
@@ -5,25 +5,262 @@
 import bioblend
 import bioblend.galaxy.objects
 from bioblend import galaxy
+import argparse
+import os
+import subprocess
 import logging
 import sys
-import deploy_stacks
-import create_input_instance
+import yaml
+import re
+from datetime import datetime
 
 
-"""
+""" 
 load_data.py
 
-Find source data files using the information provided in the input file.
-Copy these source data files over into the src_data directory
+Usage: $ python3 deploy_stacks.py -i example.yml [OPTIONS]
+"""
+
 
-Load the data into Galaxy using the script provided by Anthony Bretaudeau (setup_data_libraries)
-Also create/update the species history (TODO: Updating history)
+def parse_input(input_file):
+    """
+    Parse the yml input file to extract data to create the SpeciesData objects
+    Return a list of dictionaries. Each dictionary contains data tied to a species
 
+    :param input_file:
+    :return:
+    """
 
-"""
+    parsed_sp_dict_list = []
+
+    if str(input_file).endswith("yml") or str(input_file).endswith("yaml"):
+        logging.debug("Input format used: YAML")
+    else:
+        logging.critical("Error, please input a YAML file")
+        sys.exit()
+    with open(input_file, 'r') as stream:
+        try:
+            yaml_dict = yaml.safe_load(stream)
+            for k, v in yaml_dict.items():
+                if k == "config":
+                    pass
+                parsed_sp_dict_list.append(v)
+        except yaml.YAMLError as exit_code:
+            logging.critical(exit_code + " (YAML input file might be incorrect)")
+            sys.exit()
+    return parsed_sp_dict_list
+
+
+class LoadData:
+    """
+    Load data from the src_data subfolders into the galaxy instance's history of a given species
+
+    """
+
+    def __init__(self, parameters_dictionary):
+        self.parameters_dictionary = parameters_dictionary
+        self.species = parameters_dictionary["description"]["species"]
+        self.genus = parameters_dictionary["description"]["genus"]
+        self.strain = parameters_dictionary["description"]["strain"]
+        self.sex = parameters_dictionary["description"]["sex"]
+        self.common = parameters_dictionary["description"]["common_name"]
+        self.date = datetime.today().strftime("%Y-%m-%d")
+        self.origin = parameters_dictionary["description"]["origin"]
+        self.performed = parameters_dictionary["data"]["performed_by"]
+        if parameters_dictionary["data"]["genome_version"] == "":
+            self.genome_version = "1.0"
+        else:
+            self.genome_version = parameters_dictionary["data"]["genome_version"]
+        if parameters_dictionary["data"]["ogs_version"] == "":
+            self.ogs_version = "1.0"
+        else:
+            self.ogs_version = parameters_dictionary["data"]["ogs_version"]
+        self.genus_lowercase = self.genus[0].lower() + self.genus[1:]
+        self.genus_uppercase = self.genus[0].upper() + self.genus[1:]
+        self.species_folder_name = "_".join([self.genus_lowercase, self.species, self.strain, self.sex])
+        self.full_name = " ".join([self.genus_uppercase, self.species, self.strain, self.sex])
+        self.abbreviation = " ".join([self.genus_lowercase[0], self.species, self.strain, self.sex])
+        self.genus_species = self.genus_lowercase + "_" + self.species
+        self.instance_url = "http://scratchgmodv1:8888/sp/" + self.genus_lowercase + "_" + self.species + "/galaxy/"
+        # Testing with localhost/scratchgmodv1
+        self.instance = None
+        self.history_id = None
+        self.library_id = None
+        self.script_dir = os.path.dirname(os.path.realpath(sys.argv[0]))
+        self.main_dir = None
+        self.species_dir = None
+        self.org_id = None
+        self.genome_analysis_id = None
+        self.ogs_analysis_id = None
+        self.tool_panel = None
+        self.datasets = dict()
+        self.source_files = dict()
+        self.workflow_name = None
+        self.metadata = dict()
+        self.api_key = "master"  # TODO: set the key in config file --> saved for later (master api key access actions are limited)
+        if parameters_dictionary["data"]["parent_directory"] == "" or parameters_dictionary["data"]["parent_directory"] == "/path/to/closest/parent/dir":
+            self.source_data_dir = "/projet/sbr/phaeoexplorer/"  # Testing path for phaeoexplorer data
+        else:
+            self.source_data_dir = parameters_dictionary["data"]["parent_directory"]
+        # Directory/subdirectories where data files are located (fasta, gff, ...)
+        self.do_update = False
+        # Update the instance (in histories corresponding to the input) instead of creating a new one
+        self.api_key = "master"
+        # API key used to communicate with the galaxy instance. Cannot be used to do user-tied actions
+        self.species_name_regex_litteral = "(?=\w*V)(?=\w*A)(?=\w*R)(?=\w*I)(?=\w*A)(?=\w*B)(?=\w*L)(?=\w*E)\w+"  # Placeholder re
+
+
+    def modify_fasta_headers(self):
+        """
+        Change the fasta headers before integration.
+
+        :return:
+        """
+
+        try:
+            os.chdir(self.species_dir)
+            working_dir = os.getcwd()
+        except OSError:
+            logging.info("Cannot access " + self.species_dir + ", run with higher privileges")
+            logging.info("Fatal error: exit")
+            sys.exit()
+        self.source_files = dict()
+        annotation_dir, genome_dir = None, None
+        for d in [i[0] for i in os.walk(os.getcwd() + "/src_data")]:
+            if "annotation/" in d:
+                annotation_dir = d
+                for f in os.listdir(d):
+                    if f.endswith("proteins.fasta"):
+                        self.source_files["proteins_file"] = os.path.join(d, f)
+                    elif f.endswith("transcripts-gff.fa"):
+                        self.source_files["transcripts_file"] = os.path.join(d, f)
+                    elif f.endswith(".gff"):
+                        self.source_files["gff_file"] = os.path.join(d, f)
+            elif "genome/" in d:
+                genome_dir = d
+                for f in os.listdir(d):
+                    if f.endswith(".fa"):
+                        self.source_files["genome_file"] = os.path.join(d, f)
+                logging.debug("source files found:")
+        for k, v in self.source_files.items():
+            logging.debug("\t" + k + "\t" + v)
+
+        # Changing headers in the *proteins.fasta file from >mRNA* to >protein*
+        # production version
+        modify_pep_headers = [str(self.main_dir) + "/gga_load_data/utils/phaeoexplorer-change_pep_fasta_header.sh",
+                              self.source_files["proteins_file"]]
+        # test version
+        # modify_pep_headers = ["/home/alebars/gga/phaeoexplorer-change_pep_fasta_header.sh",
+                              # self.source_files["proteins_file"]]
+        logging.info("Changing fasta headers: " + self.source_files["proteins_file"])
+        subprocess.run(modify_pep_headers, stdout=subprocess.PIPE, cwd=annotation_dir)
+        # production version
+        modify_pep_headers = [str(self.main_dir) + "/gga_load_data/utils/phaeoexplorer-change_transcript_fasta_header.sh",
+                              self.source_files["proteins_file"]]
+        # test version
+        # modify_pep_headers = ["/home/alebars/gga/phaeoexplorer-change_transcript_fasta_header.sh",
+        #                       self.source_files["proteins_file"]]
+        logging.info("Changing fasta headers: " + self.source_files["transcripts_file"])
+        subprocess.run(modify_pep_headers, stdout=subprocess.PIPE, cwd=annotation_dir)
+
+        # src_data cleaning
+        if os.path.exists(annotation_dir + "outfile"):
+            subprocess.run(["mv", annotation_dir + "/outfile", self.source_files["proteins_file"]],
+                           stdout=subprocess.PIPE,
+                           cwd=annotation_dir)
+        if os.path.exists(annotation_dir + "gmon.out"):
+            subprocess.run(["rm", annotation_dir + "/gmon.out"],
+                           stdout=subprocess.PIPE,
+                           cwd=annotation_dir)
+
+
+    def setup_data_libraries(self):
+        """
+        - generate blast banks and docker-compose
+        - load data into the galaxy container with the galaxy_data_libs_SI.py script
+
+        :return:
+        """
+
+        try:
+            logging.info("Loading data into the galaxy container")
+            subprocess.run("../serexec genus_species_galaxy /tool_deps/_conda/bin/python /opt/galaxy_data_libs_SI.py", shell=True)
+        except subprocess.CalledProcessError:
+            logging.info("Cannot load data into the galaxy container for " + self.full_name)
+            pass
+        else:
+            logging.info("Data successfully loaded into the galaxy container for " + self.full_name)
+
+        self.get_species_history_id()
+        # self.get_instance_attributes()
+        #
+        # # import all datasets into current history
+        # self.instance.histories.upload_dataset_from_library(history_id=self.history_id, lib_dataset_id=self.datasets["genome_file"])
+        # self.instance.histories.upload_dataset_from_library(history_id=self.history_id, lib_dataset_id=self.datasets["gff_file"])
+        # self.instance.histories.upload_dataset_from_library(history_id=self.history_id, lib_dataset_id=self.datasets["transcripts_file"])
+        # self.instance.histories.upload_dataset_from_library(history_id=self.history_id, lib_dataset_id=self.datasets["proteins_file"])
+
+
+
+    def generate_blast_banks(self):
+        """
+        Automatically generate blast banks for a species and commit
+
+        :return:
+        """
+
+
+    def connect_to_instance(self):
+        """
+        Test the connection to the galaxy instance for the current organism
+        Exit if it cannot connect to the instance
+        """
+        self.instance = galaxy.GalaxyInstance(url=self.instance_url, email="gga@sb-roscoff.fr", password="password",
+                                              verify=False)
+        logging.info("Connecting to the galaxy instance ...")
+        try:
+            self.instance.histories.get_histories()
+            self.tool_panel = self.instance.tools.get_tool_panel()
+        except bioblend.ConnectionError:
+            logging.critical("Cannot connect to galaxy instance @ " + self.instance_url)
+            sys.exit()
+        else:
+            logging.info("Successfully connected to galaxy instance @ " + self.instance_url)
+        self.instance.histories.create_history(name="FOO")
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(description="Automatic data loading in containers and interaction "
+                                                 "with galaxy instances for GGA"
+                                                 ", following the protocol @ "
+                                                 "http://gitlab.sb-roscoff.fr/abims/e-infra/gga")
+
+    parser.add_argument("input",
+                        type=str,
+                        help="Input file (yml)")
+
+    parser.add_argument("-v", "--verbose",
+                        help="Increase output verbosity",
+                        action="store_false")
+
+    args = parser.parse_args()
+
+    if args.verbose:
+        logging.basicConfig(level=logging.DEBUG)
+    else:
+        logging.basicConfig(level=logging.INFO)
 
+    logging.info("Load data: start")
+    sp_dict_list = parse_input(args.input)
 
+    for sp_dict in sp_dict_list:
+        o = LoadData(parameters_dictionary=sp_dict)
+        o.main_dir = os.path.abspath(args.dir)
 
+        o.modify_fasta_headers()
+        logging.info("Successfully formatted files headers " + o.genus[0].upper() + ". " + o.species + " " + o.strain + " " + o.sex)
 
+        # o.setup_data_libraries()
+        # logging.info("Successfully set up data libraries in galaxy for " + o.genus[0].upper() + ". " + o.species + " " + o.strain + " " + o.sex)
 
+    logging.info("Load data: done")
diff --git a/run_workflow.py b/run_workflow.py
index 836e3e8..00e0c82 100644
--- a/run_workflow.py
+++ b/run_workflow.py
@@ -1,2 +1,464 @@
 #!/usr/bin/python
 # -*- coding: utf-8 -*-
+
+
+import bioblend
+import bioblend.galaxy.objects
+from bioblend import galaxy
+import argparse
+import os
+import subprocess
+import logging
+import sys
+import yaml
+import re
+from gga_autoload.gga_load_data import metadata_generator
+
+""" 
+deploy_stacks.py
+
+Usage: $ python3 deploy_stacks.py -i example.yml [OPTIONS]
+"""
+
+
+def parse_input(input_file):
+    """
+    Parse the yml input file to extract data to create the SpeciesData objects
+    Return a list of dictionaries. Each dictionary contains data tied to a species
+
+    :param input_file:
+    :return:
+    """
+
+    parsed_sp_dict_list = []
+
+    if str(input_file).endswith("yml") or str(input_file).endswith("yaml"):
+        logging.debug("Input format used: YAML")
+    else:
+        logging.critical("Error, please input a YAML file")
+        sys.exit()
+    with open(input_file, 'r') as stream:
+        try:
+            yaml_dict = yaml.safe_load(stream)
+            for k, v in yaml_dict.items():
+                if k == "config":
+                    pass
+                parsed_sp_dict_list.append(v)
+        except yaml.YAMLError as exit_code:
+            logging.critical(exit_code + " (YAML input file might be incorrect)")
+            sys.exit()
+    return parsed_sp_dict_list
+
+
+class RunWorkflow:
+    """
+    Run a workflow into the galaxy instance's history of a given species
+
+    """
+
+    def __init__(self, parameters_dictionary):
+        self.parameters_dictionary = parameters_dictionary
+        self.species = parameters_dictionary["description"]["species"]
+        self.genus = parameters_dictionary["description"]["genus"]
+        self.strain = parameters_dictionary["description"]["strain"]
+        self.sex = parameters_dictionary["description"]["sex"]
+        self.common = parameters_dictionary["description"]["common_name"]
+        self.date = datetime.today().strftime("%Y-%m-%d")
+        self.origin = parameters_dictionary["description"]["origin"]
+        self.performed = parameters_dictionary["data"]["performed_by"]
+        if parameters_dictionary["data"]["genome_version"] == "":
+            self.genome_version = "1.0"
+        else:
+            self.genome_version = parameters_dictionary["data"]["genome_version"]
+        if parameters_dictionary["data"]["ogs_version"] == "":
+            self.ogs_version = "1.0"
+        else:
+            self.ogs_version = parameters_dictionary["data"]["ogs_version"]
+        self.genus_lowercase = self.genus[0].lower() + self.genus[1:]
+        self.genus_uppercase = self.genus[0].upper() + self.genus[1:]
+        self.species_folder_name = "_".join([self.genus_lowercase, self.species, self.strain, self.sex])
+        self.full_name = " ".join([self.genus_uppercase, self.species, self.strain, self.sex])
+        self.abbreviation = " ".join([self.genus_lowercase[0], self.species, self.strain, self.sex])
+        self.genus_species = self.genus_lowercase + "_" + self.species
+        self.instance_url = "http://scratchgmodv1:8888/sp/" + self.genus_lowercase + "_" + self.species + "/galaxy/"
+        # Testing with localhost/scratchgmodv1
+        self.instance = None
+        self.history_id = None
+        self.library_id = None
+        self.script_dir = os.path.dirname(os.path.realpath(sys.argv[0]))
+        self.main_dir = None
+        self.species_dir = None
+        self.org_id = None
+        self.genome_analysis_id = None
+        self.ogs_analysis_id = None
+        self.tool_panel = None
+        self.datasets = dict()
+        self.source_files = dict()
+        self.workflow_name = None
+        self.metadata = dict()
+        self.api_key = "master"
+        if parameters_dictionary["data"]["parent_directory"] == "" or parameters_dictionary["data"]["parent_directory"] == "/path/to/closest/parent/dir":
+            self.source_data_dir = "/projet/sbr/phaeoexplorer/"  # Testing path for phaeoexplorer data
+        else:
+            self.source_data_dir = parameters_dictionary["data"]["parent_directory"]
+        # Directory/subdirectories where data files are located (fasta, gff, ...)
+        self.do_update = False
+        # Update the instance (in histories corresponding to the input) instead of creating a new one
+        self.api_key = "master"
+        # API key used to communicate with the galaxy instance. Cannot be used to do user-tied actions
+        self.species_name_regex_litteral = "(?=\w*V)(?=\w*A)(?=\w*R)(?=\w*I)(?=\w*A)(?=\w*B)(?=\w*L)(?=\w*E)\w+"  # Placeholder re
+
+
+    def get_species_history_id(self):
+        """
+        Set and return the current species history id in its galaxy instance
+
+        :return:
+        """
+        histories = self.instance.histories.get_histories(name=str(self.full_name))
+        self.history_id = histories[0]["id"]
+        self.instance.histories.show_history(history_id=self.history_id)
+
+        return self.history_id
+
+
+    def create_species_history(self):
+        histories = self.instance.histories.get_histories(name=str(self.full_name))
+        print("\n" + str(histories) + "\n" + self.full_name + "\n")
+        if not histories:
+            self.instance.histories.create_history(name="FOO")
+            print("Created history!")
+
+
+    def get_instance_attributes(self):
+        """
+        retrieves instance attributes:
+        - working history ID
+        - libraries ID (there should only be one library!)
+        - datasets IDs
+
+        :return:
+        """
+        histories = self.instance.histories.get_histories(name=str(self.full_name))
+        self.history_id = histories[0]["id"]
+        logging.debug("history ID: " + self.history_id)
+        libraries = self.instance.libraries.get_libraries()  # normally only one library
+        self.library_id = self.instance.libraries.get_libraries()[0]["id"]  # project data folder/library
+        logging.debug("library ID: " + self.history_id)
+        instance_source_data_folders = self.instance.libraries.get_folders(library_id=self.library_id)
+
+        folders_ids = {}
+        current_folder_name = ""
+        for i in instance_source_data_folders:
+            for k, v in i.items():
+                if k == "name":
+                    folders_ids[v] = 0
+                    current_folder_name = v
+                if k == "id":
+                    folders_ids[current_folder_name] = v
+        logging.info("Folders and datasets IDs: ")
+        self.datasets = dict()
+        for k, v in folders_ids.items():
+            logging.info("\t" + k + ": " + v)
+            if k == "/genome":
+                sub_folder_content = self.instance.folders.show_folder(folder_id=v, contents=True)
+                for k2, v2 in sub_folder_content.items():
+                    for e in v2:
+                        if type(e) == dict:
+                            if e["name"].endswith(".fa"):
+                                self.datasets["genome_file"] = e["ldda_id"]
+                                logging.info("\t\t" + e["name"] + ": " + e["ldda_id"])
+            elif k == "/annotation/" + self.genus_species:
+                sub_folder_content = self.instance.folders.show_folder(folder_id=v, contents=True)
+                for k2, v2 in sub_folder_content.items():
+                    for e in v2:
+                        if type(e) == dict:
+                            # TODO: manage several files of the same type and manage versions
+                            if e["name"].endswith("transcripts-gff.fa"):
+                                self.datasets["transcripts_file"] = e["ldda_id"]
+                                logging.info("\t\t" + e["name"] + ": " + e["ldda_id"])
+                            elif e["name"].endswith("proteins.fasta"):
+                                self.datasets["proteins_file"] = e["ldda_id"]
+                                logging.info("\t\t" + e["name"] + ": " + e["ldda_id"])
+                            elif e["name"].endswith(".gff"):
+                                self.datasets["gff_file"] = e["ldda_id"]
+                                logging.info("\t\t" + e["name"] + ": " + e["ldda_id"])
+                            elif e["name"].endswith("MALE"):
+                                self.datasets["gff_file"] = e["ldda_id"]
+                                logging.info("\t\t" + e["name"] + ": " + e["ldda_id"])
+
+
+    def init_instance(self):
+        """
+        Galaxy instance startup in preparation for running workflows
+        - remove Homo sapiens from the chado database.
+        - add organism and analyses into the chado database --> separate
+        - get any other existing organisms IDs before updating the galaxy instance --> separate
+
+        TODO: move the library and analysis/data stuff to a separate function
+        :return:
+        """
+
+        self.connect_to_instance()
+        self.get_species_history_id()
+        histories = self.instance.histories.get_histories(name=str(self.full_name))
+        # Create the first history
+        if not histories:
+            self.instance.histories.create_history(name=str(self.full_name))
+        self.history_id = histories[0]["id"]
+        logging.debug("history ID: " + self.history_id)
+        # libraries = self.instance.libraries.get_libraries()  # routine check: one library
+        # self.library_id = self.instance.libraries.get_libraries()[0]["id"]  # project data folder/library
+        logging.debug("library ID: " + self.history_id)
+        instance_source_data_folders = self.instance.libraries.get_folders(library_id=self.library_id)
+
+        # Delete Homo sapiens from Chado database
+        logging.debug("Getting 'Homo sapiens' ID in instance's chado database")
+        get_sapiens_id_job = self.instance.tools.run_tool(
+            tool_id="toolshed.g2.bx.psu.edu/repos/gga/chado_organism_get_organisms/organism_get_organisms/2.3.2",
+            history_id=self.history_id,
+            tool_inputs={"genus": "Homo", "species": "sapiens"})
+        get_sapiens_id_job_output = get_sapiens_id_job["outputs"][0]["id"]
+        get_sapiens_id_json_output = self.instance.datasets.download_dataset(dataset_id=get_sapiens_id_job_output)
+        try:
+            logging.debug("Deleting Homo 'sapiens' in the instance's chado database")
+            get_sapiens_id_final_output = json.loads(get_sapiens_id_json_output)[0]
+            sapiens_id = str(
+                get_sapiens_id_final_output["organism_id"])  # needs to be str to be recognized by the chado tool
+            self.instance.tools.run_tool(
+                tool_id="toolshed.g2.bx.psu.edu/repos/gga/chado_organism_delete_organisms/organism_delete_organisms/2.3.2",
+                history_id=self.history_id,
+                tool_inputs={"organism": str(sapiens_id)})
+        except bioblend.ConnectionError:
+            logging.debug("Homo sapiens isn't in the instance's chado database")
+        except IndexError:
+            logging.debug("Homo sapiens isn't in the instance's chado database")
+            pass
+
+        # TODO: the following actions should be done in a separate function (in case if the user wants to do everything him/herself -- for EOSC)
+        # Add organism (species) to chado
+        logging.info("Adding organism to the instance's chado database")
+        self.instance.tools.run_tool(
+            tool_id="toolshed.g2.bx.psu.edu/repos/gga/chado_organism_add_organism/organism_add_organism/2.3.2",
+            history_id=self.history_id,
+            tool_inputs={"abbr": self.abbreviation,
+                         "genus": self.genus,
+                         "species": self.species,
+                         "common": self.common})
+        # Add OGS analysis to chado
+        logging.info("Adding OGS analysis to the instance's chado database")
+        self.instance.tools.run_tool(
+            tool_id="toolshed.g2.bx.psu.edu/repos/gga/chado_analysis_add_analysis/analysis_add_analysis/2.3.2",
+            history_id=self.history_id,
+            tool_inputs={"name": self.genus + " " + self.species + " OGS" + self.ogs_version,
+                         "program": "Performed by Genoscope",
+                         "programversion": str("OGS" + self.ogs_version),
+                         "sourcename": "Genoscope",
+                         "date_executed": self.date})
+
+        # Add genome analysis to chado
+        logging.info("Adding genome analysis to the instance's chado database")
+        self.instance.tools.run_tool(
+            tool_id="toolshed.g2.bx.psu.edu/repos/gga/chado_analysis_add_analysis/analysis_add_analysis/2.3.2",
+            history_id=self.history_id,
+            tool_inputs={"name": self.genus + " " + self.species + " genome v" + self.genome_version,
+                         "program": "Performed by Genoscope",
+                         "programversion": str("genome v" + self.genome_version),
+                         "sourcename": "Genoscope",
+                         "date_executed": self.date})
+        self.get_organism_and_analyses_ids()
+        logging.info("Finished initializing instance")
+
+
+    def run_workflow(self, workflow_name, workflow_parameters, datamap):
+        """
+        Run the "main" workflow in the galaxy instance
+        - import data to library
+        - load fasta and gff
+        - sync with tripal
+        - add jbrowse + organism
+        - fill in the tripal views
+
+        TODO: map tool name to step id
+        :param workflow_name:
+        :param workflow_parameters:
+        :param datamap:
+        :return:
+        """
+
+        logging.debug("running workflow: " + str(workflow_name))
+        workflow_ga_file = self.main_dir + "Galaxy-Workflow-" + workflow_name + ".ga"
+        if self.strain != "":
+            custom_ga_file = "_".join([self.genus, self.species, self.strain]) + "_workflow.ga"
+            custom_ga_file_path = os.path.abspath(custom_ga_file)
+        else:
+            custom_ga_file = "_".join([self.genus, self.species]) + "_workflow.ga"
+            custom_ga_file_path = os.path.abspath(custom_ga_file)
+        with open(workflow_ga_file, 'r') as ga_in_file:
+            workflow = str(ga_in_file.readlines())
+            # ugly fix for the jbrowse parameters
+            workflow = workflow.replace('{\\\\\\\\\\\\"unique_id\\\\\\\\\\\\": \\\\\\\\\\\\"UNIQUE_ID\\\\\\\\\\\\"}',
+                                        str('{\\\\\\\\\\\\"unique_id\\\\\\\\\\\\": \\\\\\\\\\\\"' + self.genus + " " + self.species) + '\\\\\\\\\\\\"')
+            workflow = workflow.replace('\\\\\\\\\\\\"name\\\\\\\\\\\\": \\\\\\\\\\\\"NAME\\\\\\\\\\\\"',
+                                        str('\\\\\\\\\\\\"name\\\\\\\\\\\\": \\\\\\\\\\\\"' + self.genus.lower()[0] + self.species) + '\\\\\\\\\\\\"')
+            workflow = workflow.replace("\\\\", "\\")  # to restore the correct amount of backslashes in the workflow string before import
+            # test
+            workflow = workflow.replace('http://localhost/sp/genus_species/feature/Genus/species/mRNA/{id}',
+                                        "http://localhost/sp/" + self.genus_lowercase+ "_" + self.species + "/feature/" + self.genus + "/mRNA/{id}")
+            # production
+            # workflow = workflow.replace('http://localhost/sp/genus_species/feature/Genus/species/mRNA/{id}',
+            #                             "http://abims--gga.sb-roscoff.fr/sp/" + self.genus_lowercase + "_" + self.species + "/feature/" + self.genus + "/mRNA/{id}")
+            workflow = workflow[2:-2]  # if the line under doesn't output a correct json
+            # workflow = workflow[:-2]  # if the line above doesn't output a correct json
+
+            workflow_dict = json.loads(workflow)
+
+            self.instance.workflows.import_workflow_dict(workflow_dict=workflow_dict)
+            self.workflow_name = workflow_name
+            workflow_attributes = self.instance.workflows.get_workflows(name=self.workflow_name)
+            workflow_id = workflow_attributes[0]["id"]
+            show_workflow = self.instance.workflows.show_workflow(workflow_id=workflow_id)
+            logging.debug("Workflow ID: " + workflow_id)
+
+            logging.debug("Inputs:")
+            logging.debug(show_workflow["Inputs"])
+            self.instance.workflows.invoke_workflow(workflow_id=workflow_id,
+                                                    history_id=self.history_id,
+                                                    params=workflow_parameters,
+                                                    inputs=datamap,
+                                                    inputs_by="")
+            self.instance.workflows.delete_workflow(workflow_id=workflow_id)
+
+
+    def get_organism_and_analyses_ids(self):
+        """
+        Retrieve current organism ID and OGS and genome chado analyses IDs (needed to run some tools as Tripal/Chado
+        doesn't accept organism/analyses names as valid inputs
+
+        :return:
+        """
+        # Get the ID for the current organism in chado
+        org = self.instance.tools.run_tool(
+            tool_id="toolshed.g2.bx.psu.edu/repos/gga/chado_organism_get_organisms/organism_get_organisms/2.3.2",
+            history_id=self.history_id,
+            tool_inputs={"genus": self.genus, "species": self.species})
+        org_job_out = org["outputs"][0]["id"]
+        org_json_output = self.instance.datasets.download_dataset(dataset_id=org_job_out)
+        try:
+            org_output = json.loads(org_json_output)[0]
+            self.org_id = str(org_output["organism_id"])  # id needs to be a str to be recognized by chado tools
+        except IndexError:
+            logging.debug("no organism matching " + self.full_name + " exists in the instance's chado database")
+
+        # Get the ID for the OGS analysis in chado
+        ogs_analysis = self.instance.tools.run_tool(
+            tool_id="toolshed.g2.bx.psu.edu/repos/gga/chado_analysis_get_analyses/analysis_get_analyses/2.3.2",
+            history_id=self.history_id,
+            tool_inputs={"name": self.genus + " " + self.species + " OGS" + self.ogs_version})
+        ogs_analysis_job_out = ogs_analysis["outputs"][0]["id"]
+        ogs_analysis_json_output = self.instance.datasets.download_dataset(dataset_id=ogs_analysis_job_out)
+        try:
+            ogs_analysis_output = json.loads(ogs_analysis_json_output)[0]
+            self.ogs_analysis_id = str(ogs_analysis_output["analysis_id"])
+        except IndexError:
+            logging.debug("no matching OGS analysis exists in the instance's chado database")
+
+        # Get the ID for the genome analysis in chado
+        genome_analysis = self.instance.tools.run_tool(
+            tool_id="toolshed.g2.bx.psu.edu/repos/gga/chado_analysis_get_analyses/analysis_get_analyses/2.3.2",
+            history_id=self.history_id,
+            tool_inputs={"name": self.genus + " " + self.species + " genome v" + self.genome_version})
+        genome_analysis_job_out = genome_analysis["outputs"][0]["id"]
+        genome_analysis_json_output = self.instance.datasets.download_dataset(dataset_id=genome_analysis_job_out)
+        try:
+            genome_analysis_output = json.loads(genome_analysis_json_output)[0]
+            self.genome_analysis_id = str(genome_analysis_output["analysis_id"])
+        except IndexError:
+            logging.debug("no matching genome analysis exists in the instance's chado database")
+
+
+    def connect_to_instance(self):
+        """
+        TODO: move in init/access
+        TODO: password
+        Test the connection to the galaxy instance for the current organism
+        Exit if it cannot connect to the instance
+        """
+        self.instance = galaxy.GalaxyInstance(url=self.instance_url, email="gga@sb-roscoff.fr", password="password",
+                                              verify=False)
+        logging.info("Connecting to the galaxy instance ...")
+        try:
+            self.instance.histories.get_histories()
+            self.tool_panel = self.instance.tools.get_tool_panel()
+        except bioblend.ConnectionError:
+            logging.critical("Cannot connect to galaxy instance @ " + self.instance_url)
+            sys.exit()
+        else:
+            logging.info("Successfully connected to galaxy instance @ " + self.instance_url)
+        self.instance.histories.create_history(name="FOO")
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(description="Automatic data loading in containers and interaction "
+                                                 "with galaxy instances for GGA"
+                                                 ", following the protocol @ "
+                                                 "http://gitlab.sb-roscoff.fr/abims/e-infra/gga")
+
+    parser.add_argument("input",
+                        type=str,
+                        help="Input file (yml)")
+
+    parser.add_argument("-v", "--verbose",
+                        help="Increase output verbosity",
+                        action="store_false")
+
+    args = parser.parse_args()
+
+    if args.verbose:
+        logging.basicConfig(level=logging.DEBUG)
+    else:
+        logging.basicConfig(level=logging.INFO)
+
+    logging.info("Start")
+    sp_dict_list = parse_input(args.input)
+
+    for sp_dict in sp_dict_list:
+        o = RunWorkflow(parameters_dictionary=sp_dict)
+        o.main_dir = os.path.abspath(args.dir)
+        if args.init_instance:
+            logging.info(" Initializing the galaxy instance")
+            o.init_instance()
+            o.get_instance_attributes()
+            # metadata[genus_species_strain_sex]["initialized"] = True
+        if args.load_data:
+            logging.info("Loading data into galaxy")
+            # o.load_data()
+            # metadata[genus_species_strain_sex]["data_loaded_in_instance"] = True
+        if args.run_main:
+            logging.info("Running main workflow")
+            o.get_organism_and_analyses_ids()
+            workflow_parameters = dict()
+            workflow_parameters["0"] = {}
+            workflow_parameters["1"] = {}
+            workflow_parameters["2"] = {}
+            workflow_parameters["3"] = {}
+            workflow_parameters["4"] = {"organism": al.org_id,
+                                        "analysis_id": al.genome_analysis_id,
+                                        "do_update": "true"}
+            workflow_parameters["5"] = {"organism": al.org_id,
+                                        "analysis_id": al.ogs_analysis_id}
+            workflow_parameters["6"] = {"organism_id": al.org_id}
+            workflow_parameters["7"] = {"analysis_id": al.ogs_analysis_id}
+            workflow_parameters["8"] = {"analysis_id": al.genome_analysis_id}
+            workflow_parameters["9"] = {"organism_id": al.org_id}
+            workflow_parameters["10"] = {}
+            workflow_parameters["11"] = {}
+
+            o.datamap = dict()
+            o.datamap["0"] = {"src": "hda", "id": al.datasets["genome_file"]}
+            o.datamap["1"] = {"src": "hda", "id": al.datasets["gff_file"]}
+            o.datamap["2"] = {"src": "hda", "id": al.datasets["proteins_file"]}
+            o.datamap["3"] = {"src": "hda", "id": al.datasets["transcripts_file"]}
+
+            o.run_workflow(workflow_name="main", workflow_parameters=workflow_parameters, datamap=al.datamap)
+            # metadata[genus_species_strain_sex]["workflows_run"] = metadata[genus_species_strain_sex]["workflows_run"].append("main")
diff --git a/table_parser.py b/table_parser.py
index 9e55ecd..9314b91 100755
--- a/table_parser.py
+++ b/table_parser.py
@@ -1,80 +1,79 @@
-import os
-import sys
-import pandas  # xlrd required for excel files reading
-import numpy
-import json
-import argparse
-import logging
-from datetime import datetime
-
-"""
-OBSOLETE
-
-Input parser script. 
-Does not work for ods spreadsheets (save as xls or xlsx instead) --> need to handle with pandas_ods_reader (requires ezodf, lxml)
-Does not support multiple sheets (TODO: "integration" and "update" sheets (1 and 2))
-See example toy table (toy_table.xls)
-
-TODO: move this script inside autoload
-
-standalone usage: python3 table_parser.py <tabulated_file> -d <directory_to_write_json_to (default: cwd)>
-"""
-
-
-class TableParser:
-
-	def __init__(self, table_file, dir):
-		self.dir = os.path.abspath(args.dir)
-		self.table_file = table_file
-		self.method = None  # TODO: instant launch or just parse (standalone)
-		self.extension = None
-		self.meta = dict()
-		self.json_file = None	
-
-	def parse_table(self, extension):
-		if extension == "xls":
-			pandas_table = pandas.DataFrame(pandas.read_excel(self.table_file))
-		elif extension == "csv":
-			pandas_table = pandas.DataFrame(pandas.read_csv(self.table_file))
-		else:
-			logging.info("wrong format: input tabulated file cannot be read (supported formats: xls, xlsx, csv)")
-			sys.exit()
-		pandas_table = pandas_table.replace(numpy.nan, "", regex=True)
-		
-		for char in " ,.()-/":
-			pandas_table = pandas_table.replace("\\" + char, "_", regex=True)
-		pandas_table = pandas_table.replace("\\__", "_", regex=True)
-		pandas_table.loc[pandas_table["genome version"] == "", "genome version"] = "1.0"
-		pandas_table.loc[pandas_table["ogs version"] == "", "ogs version"] = "1.0"
-		pandas_table.loc[pandas_table["version"] == "", "version"] = "1.0"
-		pandas_table.loc[pandas_table["date"] == "", "date"] = datetime.today().strftime("%Y-%m-%d")
-		with open(os.path.join(self.dir, self.json_file), 'w') as json_file:
-			json_file.truncate(0)
-			json_content = list()
-			for organism in range(0, len(pandas_table.index)):
-				organism_dict = pandas_table.iloc[organism].to_dict()
-				for k, v in organism_dict.items():
-					v = str(v).split(" ")
-					v = "_".join(v)
-					v = v.replace("__", "_")
-					if v.endswith("_"):
-						v = v[:-1]
-				json_content.append(organism_dict)
-			json.dump(json_content, json_file, indent=4)
-
-	def write_json(self, data, filename):
-		with open(filename, 'w') as f:
-			json.dump(data, f, indent=4)
-
-
-if __name__ == "__main__":
-	parser = argparse.ArgumentParser(description="Table parser for phaeoexplorer data")
-	parser.add_argument("input", type=str, help="input table")
-	parser.add_argument("-d", "--dir", type=str, help="Where to write the output json file that is be used for integration", default = os.getcwd())
-	args = parser.parse_args()
-
-	if args.input.endswith("xlsx") or args.input.endswith("xls"):
-		tp = TableParser(table_file=args.input, dir=args.dir)
-		tp.extension = args.input.split(".")[1]
-		tp.json_file = tp.dir + "/dataloader_" + datetime.today().strftime("%Y%m%d") + ".json"
-		tp.parse_table(extension="xls")
+import os
+import sys
+import pandas  # xlrd required for excel files reading
+import numpy
+import json
+import argparse
+import logging
+from datetime import datetime
+
+"""
+!! OBSOLETE !!
+
+Input parser script. 
+Does not work for ods spreadsheets (save as xls or xlsx instead) --> need to handle with pandas_ods_reader (requires ezodf, lxml)
+Does not support multiple sheets (TODO: "integration" and "update" sheets (1 and 2))
+See example toy table (toy_table.xls)
+
+standalone usage: python3 table_parser.py <tabulated_file> -d <directory_to_write_json_to (default: cwd)>
+
+"""
+
+
+class TableParser:
+
+	def __init__(self, table_file, dir):
+		self.dir = os.path.abspath(args.dir)
+		self.table_file = table_file
+		self.method = None  # TODO: instant launch or just parse (standalone)
+		self.extension = None
+		self.meta = dict()
+		self.json_file = None	
+
+	def parse_table(self, extension):
+		if extension == "xls":
+			pandas_table = pandas.DataFrame(pandas.read_excel(self.table_file))
+		elif extension == "csv":
+			pandas_table = pandas.DataFrame(pandas.read_csv(self.table_file))
+		else:
+			logging.info("wrong format: input tabulated file cannot be read (supported formats: xls, xlsx, csv)")
+			sys.exit()
+		pandas_table = pandas_table.replace(numpy.nan, "", regex=True)
+		
+		for char in " ,.()-/":
+			pandas_table = pandas_table.replace("\\" + char, "_", regex=True)
+		pandas_table = pandas_table.replace("\\__", "_", regex=True)
+		pandas_table.loc[pandas_table["genome version"] == "", "genome version"] = "1.0"
+		pandas_table.loc[pandas_table["ogs version"] == "", "ogs version"] = "1.0"
+		pandas_table.loc[pandas_table["version"] == "", "version"] = "1.0"
+		pandas_table.loc[pandas_table["date"] == "", "date"] = datetime.today().strftime("%Y-%m-%d")
+		with open(os.path.join(self.dir, self.json_file), 'w') as json_file:
+			json_file.truncate(0)
+			json_content = list()
+			for organism in range(0, len(pandas_table.index)):
+				organism_dict = pandas_table.iloc[organism].to_dict()
+				for k, v in organism_dict.items():
+					v = str(v).split(" ")
+					v = "_".join(v)
+					v = v.replace("__", "_")
+					if v.endswith("_"):
+						v = v[:-1]
+				json_content.append(organism_dict)
+			json.dump(json_content, json_file, indent=4)
+
+	def write_json(self, data, filename):
+		with open(filename, 'w') as f:
+			json.dump(data, f, indent=4)
+
+
+if __name__ == "__main__":
+	parser = argparse.ArgumentParser(description="Table parser for phaeoexplorer data")
+	parser.add_argument("input", type=str, help="input table")
+	parser.add_argument("-d", "--dir", type=str, help="Where to write the output json file that is be used for integration", default = os.getcwd())
+	args = parser.parse_args()
+
+	if args.input.endswith("xlsx") or args.input.endswith("xls"):
+		tp = TableParser(table_file=args.input, dir=args.dir)
+		tp.extension = args.input.split(".")[1]
+		tp.json_file = tp.dir + "/dataloader_" + datetime.today().strftime("%Y%m%d") + ".json"
+		tp.parse_table(extension="xls")
diff --git a/templates/compose-template.yml b/templates/compose-template.yml
index 590923c..b3b8578 100755
--- a/templates/compose-template.yml
+++ b/templates/compose-template.yml
@@ -81,7 +81,7 @@ services:
     galaxy:
         image: quay.io/galaxy-genome-annotation/docker-galaxy-annotation:gmod
         volumes:
-            - ../galaxy_data_libs_SI.py:/opt/setup_data_libraries.py
+            - ../galaxy_data_libs_SI.py:/opt/galaxy_data_libs_SI.py
             - ./docker_data/galaxy:/export
             - ./src_data/:/project_data:ro
             #- /groups/XXX/:/groups/XXX/:ro  # We do this when we have symlinks in src_data pointing to /groups/XXX/...
diff --git a/templates/stack-organism.yml b/templates/stack-organism.yml
index 519b96f..103757f 100644
--- a/templates/stack-organism.yml
+++ b/templates/stack-organism.yml
@@ -112,7 +112,7 @@ services:
     galaxy:
         image: quay.io/galaxy-genome-annotation/docker-galaxy-annotation:gmod
         volumes:
-            - ../galaxy_data_libs_SI.py:/opt/setup_data_libraries.py
+            - ../galaxy_data_libs_SI.py:/opt/galaxy_data_libs_SI.py
             - ./docker_data/galaxy/:/export/
             - ./src_data/:/project_data/:ro
             #- /groups/XXX/:/groups/XXX/:ro  # We do this when we have symlinks in src_data pointing to /groups/XXX/...
@@ -130,7 +130,7 @@ services:
             GALAXY_DEFAULT_ADMIN_USER: "gga"
             GALAXY_DEFAULT_ADMIN_PASSWORD: "password"
             GALAXY_CONFIG_ADMIN_USERS: "admin@galaxy.org, gga@sb-roscoff.fr, lgueguen@sb-roscoff.fr, alebars@sb-roscoff.fr"   # admin@galaxy.org is the default (leave it), gogepp@bipaa is a shared ldap user we use to connect
-            GALAXY_CONFIG_MASTER_API_KEY: "dev"
+            GALAXY_CONFIG_MASTER_API_KEY: "master"
             ENABLE_FIX_PERMS: 0
             PROXY_PREFIX: /sp/genus_species/galaxy
             GALAXY_TRIPAL_URL: http://tripal.genus_species/tripal/
diff --git a/ext_scripts/__init__.py b/utils/__init__.py
similarity index 100%
rename from ext_scripts/__init__.py
rename to utils/__init__.py
diff --git a/ext_scripts/blastdb.py b/utils/blastdb.py
similarity index 100%
rename from ext_scripts/blastdb.py
rename to utils/blastdb.py
diff --git a/ext_scripts/common-stringSubsitute.py b/utils/common-stringSubsitute.py
similarity index 97%
rename from ext_scripts/common-stringSubsitute.py
rename to utils/common-stringSubsitute.py
index c32a177..c4d22a9 100755
--- a/ext_scripts/common-stringSubsitute.py
+++ b/utils/common-stringSubsitute.py
@@ -1,37 +1,37 @@
-#!/usr/bin/env python3
-# -*- coding: utf-8 -*-
-import argparse
-import os
-import re
-import sys
-
-# Return the file obtained by replacing the occurrences of pattern by the replacement string.
-# Use of python method re.sub()
-# python common-stringSubsitute.py -f file -p pattern -r replacement_string
-# ex : python common-stringSubsitute.py -f file -p '(tRNA)(\w{3})(\w{3})' -r '\g<1>-\g<2>(\g<3>)'
-
-if __name__ == '__main__':
-
-    #Get arguments
-    parser = argparse.ArgumentParser(description="Return the file obtained by replacing the occurrences of pattern by the replacement string. Use of python method re.sub(). Example: python common-stringSubsitute.py -f file -p '(tRNA)(\w{3})(\w{3})' -r '\g<1>-\g<2>(\g<3>)'")
-    parser.add_argument('-i','--infile', help='Input file', required=True)
-    parser.add_argument('-o','--outfile', help='Output file', default='outfile')
-    parser.add_argument('-p','--pattern', help='Pattern string to be replaced', required=True)
-    parser.add_argument('-r','--repl', help='Replacement string', required=True)
-    args = parser.parse_args()
-
-    infilename=args.infile
-    outfilename=args.outfile
-    pattern=args.pattern
-    repl=args.repl
-
-    infile=open(infilename,'r')
-    outfile=open(outfilename,'w')
-
-    lines=infile.readlines()
-
-    for line in lines :
-        line_out=re.sub(pattern,repl,line)
-        outfile.write(line_out)
-
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+import argparse
+import os
+import re
+import sys
+
+# Return the file obtained by replacing the occurrences of pattern by the replacement string.
+# Use of python method re.sub()
+# python common-stringSubsitute.py -f file -p pattern -r replacement_string
+# ex : python common-stringSubsitute.py -f file -p '(tRNA)(\w{3})(\w{3})' -r '\g<1>-\g<2>(\g<3>)'
+
+if __name__ == '__main__':
+
+    #Get arguments
+    parser = argparse.ArgumentParser(description="Return the file obtained by replacing the occurrences of pattern by the replacement string. Use of python method re.sub(). Example: python common-stringSubsitute.py -f file -p '(tRNA)(\w{3})(\w{3})' -r '\g<1>-\g<2>(\g<3>)'")
+    parser.add_argument('-i','--infile', help='Input file', required=True)
+    parser.add_argument('-o','--outfile', help='Output file', default='outfile')
+    parser.add_argument('-p','--pattern', help='Pattern string to be replaced', required=True)
+    parser.add_argument('-r','--repl', help='Replacement string', required=True)
+    args = parser.parse_args()
+
+    infilename=args.infile
+    outfilename=args.outfile
+    pattern=args.pattern
+    repl=args.repl
+
+    infile=open(infilename,'r')
+    outfile=open(outfilename,'w')
+
+    lines=infile.readlines()
+
+    for line in lines :
+        line_out=re.sub(pattern,repl,line)
+        outfile.write(line_out)
+
     outfile.close()
\ No newline at end of file
diff --git a/ext_scripts/phaeoexplorer-change_pep_fasta_header.sh b/utils/phaeoexplorer-change_pep_fasta_header.sh
similarity index 96%
rename from ext_scripts/phaeoexplorer-change_pep_fasta_header.sh
rename to utils/phaeoexplorer-change_pep_fasta_header.sh
index 0de7b9b..3cf614f 100755
--- a/ext_scripts/phaeoexplorer-change_pep_fasta_header.sh
+++ b/utils/phaeoexplorer-change_pep_fasta_header.sh
@@ -1,17 +1,17 @@
-#!/usr/bin/env bash
-
-INFILE=$1
-OUTFILE=tmpfile
-
-FILE_HEADER_START=$(grep ">" $INFILE | cut -c 1-6 | sort | uniq)
-HEADER_START_STRING=">mRNA."
-
-if [[ "$FILE_HEADER_START" == "$HEADER_START_STRING" ]]
-then
-    /usr/local/genome2/mmo/scripts/common/common-stringSubstitute.py -i $INFILE -o $OUTFILE -p '^>mRNA' -r '>protein'
-    mv $OUTFILE $INFILE
-    echo "'>mRNA' replaced by '>protein'"
-else 
-    echo "Abort. Not all headers start with '>mRNA.':"
-    echo "$FILE_HEADER_START"
+#!/usr/bin/env bash
+
+INFILE=$1
+OUTFILE=tmpfile
+
+FILE_HEADER_START=$(grep ">" $INFILE | cut -c 1-6 | sort | uniq)
+HEADER_START_STRING=">mRNA."
+
+if [[ "$FILE_HEADER_START" == "$HEADER_START_STRING" ]]
+then
+    /usr/local/genome2/mmo/scripts/common/common-stringSubstitute.py -i $INFILE -o $OUTFILE -p '^>mRNA' -r '>protein'
+    mv $OUTFILE $INFILE
+    echo "'>mRNA' replaced by '>protein'"
+else 
+    echo "Abort. Not all headers start with '>mRNA.':"
+    echo "$FILE_HEADER_START"
 fi
\ No newline at end of file
diff --git a/ext_scripts/phaeoexplorer-change_transcript_fasta_header.sh b/utils/phaeoexplorer-change_transcript_fasta_header.sh
similarity index 100%
rename from ext_scripts/phaeoexplorer-change_transcript_fasta_header.sh
rename to utils/phaeoexplorer-change_transcript_fasta_header.sh
diff --git a/ext_scripts/phaeoexplorer-change_transcript_fasta_header.sh.bak b/utils/phaeoexplorer-change_transcript_fasta_header.sh.bak
similarity index 97%
rename from ext_scripts/phaeoexplorer-change_transcript_fasta_header.sh.bak
rename to utils/phaeoexplorer-change_transcript_fasta_header.sh.bak
index 196675b..12ce4e5 100755
--- a/ext_scripts/phaeoexplorer-change_transcript_fasta_header.sh.bak
+++ b/utils/phaeoexplorer-change_transcript_fasta_header.sh.bak
@@ -1,7 +1,7 @@
-#!/usr/bin/env bash
-
-INFILE=$1
-OUTFILE=tmpfile
-/home/fr2424/sib/alebars/gga_load_data/ext_scripts/common-stringSubsitute.py -i $INFILE -o $OUTFILE -p '^>\d+ mRNA' -r '>mRNA'
-mv $OUTFILE $INFILE
-echo "'>[0-9]+ mRNA' replaced by '>mRNA' in $1"
+#!/usr/bin/env bash
+
+INFILE=$1
+OUTFILE=tmpfile
+/home/fr2424/sib/alebars/gga_load_data/ext_scripts/common-stringSubsitute.py -i $INFILE -o $OUTFILE -p '^>\d+ mRNA' -r '>mRNA'
+mv $OUTFILE $INFILE
+echo "'>[0-9]+ mRNA' replaced by '>mRNA' in $1"
-- 
GitLab