From 19a008f88e30ccf893a15bfe56c57b285afa7d26 Mon Sep 17 00:00:00 2001 From: jarbesfeld Date: Tue, 10 Feb 2026 12:12:45 -0500 Subject: [PATCH 1/5] Dockerize cst --- .dockerignore | 59 ++++++++++++++++++++++++++++++++ .env.shared | 5 +++ compose.yaml | 23 +++++++++++++ docker-desktop-container.png | Bin 0 -> 17299 bytes docs/source/install.rst | 63 ++++++++++++++++++++++++----------- uta-setup.sql | 27 +++++++++++++++ 6 files changed, 158 insertions(+), 19 deletions(-) create mode 100644 .dockerignore create mode 100644 .env.shared create mode 100644 compose.yaml create mode 100644 docker-desktop-container.png create mode 100644 uta-setup.sql diff --git a/.dockerignore b/.dockerignore new file mode 100644 index 0000000..698dd01 --- /dev/null +++ b/.dockerignore @@ -0,0 +1,59 @@ +# Bytecode files +__pycache__/ +*.pyc +*.pyo + +# Virtual environment +venv/ +.venv/ + +# Debug and log files +*.log +debug*.txt + +# Data and media +data/ +media/ + +# Temporary files and directories +tmp/ +temp/ +*.swp + +# IDE and OS files +.idea/ +.vscode/ +.DS_Store + +# Distribution files +dist/ +build/ +*.egg-info/ + +# Secrets +.env +.env.* +*.pem +*.key +*.crt +*.p12 +*.jks +secrets/ + +# Git +.git/ +.gitignore + +# Tests & coverage +tests/ +test/ +.pytest_cache/ +.coverage +htmlcov/ + +# Docs / misc +README* +docs/ + +# CI +.github/ diff --git a/.env.shared b/.env.shared new file mode 100644 index 0000000..174e3ed --- /dev/null +++ b/.env.shared @@ -0,0 +1,5 @@ +SEQREPO_ROOT_DIR=/usr/local/share/seqrepo/2024-12-20 +AWS_ACCESS_KEY_ID=dummy +AWS_SECRET_ACCESS_KEY=dummy +AWS_SESSION_TOKEN=dummy +UTA_DB_URL=postgresql://anonymous@localhost:5432/uta/uta_20241220 diff --git a/compose.yaml b/compose.yaml new file mode 100644 index 0000000..de2f586 --- /dev/null +++ b/compose.yaml @@ -0,0 +1,23 @@ +services: + uta: + # Test: + # psql -XAt postgres://anonymous@localhost/uta -c 'select count(*) from uta_20241220.transcript' + # 329090 + image: biocommons/uta:uta_20241220 + environment: + - POSTGRES_PASSWORD=some-password-that-you-make-up + volumes: + - uta_vol:/var/lib/postgresql/data + - type: bind + source: ./uta_20241220.pgd.gz + target: /tmp/uta_20241220.pgd.gz + read_only: true + bind: + create_host_path: false + - ./uta-setup.sql:/docker-entrypoint-initdb.d/uta-setup.sql + ports: + - 127.0.0.1:5432:5432 + +volumes: + uta_vol: + external: true diff --git a/docker-desktop-container.png b/docker-desktop-container.png new file mode 100644 index 0000000000000000000000000000000000000000..500e5fbd5986b2e8391beb149e18a61400ac6f7c GIT binary patch literal 17299 zcmd7(WmH>h*9HosMT<+JNN{&6?(Xhx#oaZy6^8;vixs!xu7O}JZpEb(cZZWc`gTA2 z`|*u&ew>Vz(yx&Tk)fcVUdzfzs6j!&sYAkQh_4{O89bk8prDZJ zZN$Y@WW~h+Dy~jeHujcKP%_bJ9}zUv7x5w{JNL$qCgcUtps?YdeE zxL(|=Y@c6(4p!KTlGta34@Najf44H9g7+3*xnB?gqX$YLAC^4w6P;N^#4DHqtG({! zhvg_9Gakkgv!{oL7uy(tKx%v_TAU3DBWfPljl$uK?U(^wsC+cCHc_SY_c2c7m5fqY z65bXuDc0VuG1#R9ika8ZsMcimh)}r$8&N6H09`UZrca;Z2FMhTEUC7U!Vqi37nmr_ zdC@l|cQy!Z$qU$q=lc8 zY6F2A!pi+k zShvIeaJq-Xy1UlC(qr}BJ}vfZlx(rQqeN#0Xvtq`Gjsc_M>qFMC5Faan*e+|>H2yS zJ=y5;C9zZ+^$n-tUAb-#_ru3z6c;Sk__daH4Qk|17B@Yaqj@YA6N6)MX*)w(em;|T z5+je2y1Dh868i=0L4j>G3X@~;iB;obLKCfx1@-(2JGYS&?58lR01Th^!SMD#;#$$% zH|sfrljB)@sCKQi%8W8fBcIulVz#Ci#Hx4vvp%GKfQ3@O;=l0e=;Sy2m`ER}G#U(D zn^RsU=|uYXZZVI7SqV zaw@`pjX(y#8UPjtqo|3z!U!d*MXp4U%!jjwUKZv00_!J2@eUC!6wMhX8ydd{*ZK7d z40r^xR}nZtPc zHJxGop&TP8d$iZ}YjFah6AGc^(28TG#C7xE(Bjd<;U>t5MXTDW41ST*fGx+J4gV$i zLqt}UMK$l64kxanxPHFjx4@sYocNtEov#dIs-<{7FMr?r#`7)oC)=eqDnVg58|a<+ zLWYh4J2irOSlPf~KT&@+Xr&L;eAT>8N0X8GAnK}r$W*K}Bjo{wKKs6)HA$REz}Q_p~>%3RKlJTA2FLd(r68PCMJ%mpj?pecN!Z z2ixV_Xd{NHZ&M#6)W|cFbczNkk!(0@gv&m;CtRj0P`^zP96sA-JhyVizGJ*&yDJpX zTvf|unoKcfFs4{z4l;aBgAwhO0hZNjkQo=KZAA5HhJVC?8O-SW>`ZG%%ayv7%Ae{Y zS^l}vEXpjxtjw%olWGu?Dm%F@xfx5jOvNT)O1GGdKE_tWQnhvF^-Sl?#f+(CCA(o} zRd%_-XMdGeV;==c*+C^LJu2;Dshy%24cubeO0brqHj}zwrLX=~Lsk=^`iGh(*(%MW zwk3ospI;=u{IATexUcko<#D|yTqU$2)aT;kB(eF-HO|@3>Bi}0mpkvztL&3IDsbGf z=@Ys4V?LyD>}y}6mhI8^U(k6~l*$il@up@4Ywo)$x{A zU13_mE7OQ0bo_8!ZOnG8I;(#dZ5QUka2NK1?ILFvWshg9+g!BXM9)J1s}8DOx!x;1 zJRMJ+Epq7U-b0+0CG6;|#y)yJF@gy`fhN2SS`BTTDn?yKgGNuQa;vJVv#SXKmjTHEdj3QH zM*-URo)4yvFpo|5KTPc!S`VqOGr9|3Y+u}?7)qr=y!teFJ^_vu$ zm^w=%BAQ0pqxj&+p&Me}%fI1fm9A+KswN%1yFNLjSDrnNn5o zJM#C~!cU)t2fPPUK{Dp}1J&`JSfvCsQ3r9LL21xQEUEE!=lh4x8-v%ev3P4nV#dks z+f!=e#ryKg%<4Krx}KEKDHti=vROxvHaMM!uot5nqsE0!q>>F&nyC^>T1quaI&f;?W<%!70?RzgNTt6J(u6d#oufi16u=ro;Bq z4;Uv6g#hgMxY0!n+1mETY}9BNC!=BPA1yqs=qNs@aLb_Slr z>c-Q-O~vn6#%L(0P-%BI-1z3zwbqC;f*&1wZ129Pb0c`~*L(By$)4wE<+3qD+rM?7 zyr|xyK2_iM4&i{bJn*)O*TyX>-mz^~7wcd)FnEQbaZPZpmHQo6mn-U#j&lztr&dY@2betPsac zL*-uY;z9$@WO&YjFsbwNCVU=>wO?juDNCD?fXm3;)Y)j8SDHXw|&$c9Zymq=96#IwFY-*XdV)4HN&J@*Cw6-Hp!x)%5Jh+SB-q6w{CS`X zqVV>iP4x8_GmgSbAFk;NiJ*F>7r;w=r%Cy6kJDDZ_ubo1lLW(`pBp83Ru=NFZtiMn>FD;}$vsNxU>TCtoQ;N#yN;p)pShC*v#EuXnI*HggY)ks zPy*h3kf4L5yD7li!QRo0&s&iEA2IkK;opy0$N~R|;%+BMuA`^|5O;F51aLF6GP9Bk zAp-ya0aptvJ~auceV;A^G^#2k3x4{3CQ0Ko1 z+1OZl{%_L%Bl>GnO*czdaVG~zk?unO9hrY6{=dS1B@|%!-SYp_6aVbye>{csvk;EbKU$yCmqb<3W9QdQG{}dh31B68wvs&mG82VjjDoWQ}Dc2l*Awo&|pSJyOt;iQFL_LMf zt404sg8Ymrk!ZU4n*)+^#s!xD=~JMZjLFVWT(Hz^lZ}SRj&6%LQc?Bu>~$DhmHy%} zOH7VvG@X>p{z7%D2*AY*!zjI4hvtjC=pYhXI@w=l2Q(HfEwI-dU7pnpJQG=gcR3TT zM{2Iq%BGH09mh1|DX(Xaq1J@nI@z@Rfx3%vkm?H@V+r6ZLLXvSs?w3(0$Pt_Dbg7* zKZJYCuf4uFU=4hve}$p#xJ-{^=$C%I0j|_-C{inybG_U7_z_%pyxgozt&~lRF8o+K zFffn`W^+~f&ouZQP{BfiqbM@T#$ZnZBVzE!UtVaysxetWriey&CkG0 z2ET`*&!EQT4OTcaaW-wKZ3@r7>6c<4vWYCrq6ASS_ zAE6Q57{1y@Wb6R>lB)~PgrRtshUw*1t|0sQMdGvIJY0-9FB4Cfq|Fcs3_BQ`c5c`T zg-cE;^JA#Jn#|ayF3OoT!?0*1!r73WdWcnPYpMegSgoC>jDqLy>E~$1k0@*B=7<|j6D*>wb z&mwQ;5!_of9FVRQE^)`wYjL5I&_1JDmY@l6t4 z0?9eIehT=#;DE<7kn7L#%-G$}BT-8*8IKuu^aqOzovrAu3>JG!7 z8-YkKVx>-<7~qS*)8%w>R+e7vkesXoq)SY@aJF=l-&wYCUSh3o`9$-4Jzg~2!M|$a`U+ZZRTNar3>%-9Zu*)qjg^y1+YrcN-)rM0YK5jmn(KHrUzh+%Ws6Gyd zq6>XV*dV1rH3)AN{c|@}F@X{Ey~G&fwVkWfU2j?t*(Y0fz$HCE%t@_FNB3;kh@n%L z)|kzunWGehvIxWRF3wkQSu7&m#hWjK5kl8kU3P3&EK$m#hs=ZGu^fKwSbUbw%Vfuc zaD1c0*KfIpuaA~@s+w1miTOR>pS*T^+{+7^vt=_{3wj9vMxu1clX$N~O9uc!pfDut z?OAQ(w0R>x#cxVSOO1{AsDwQ}&Vh(@)1}JuPvZ639g&?E6a0FIy3LL`3b_KhFZ;0E zFYZT6nnQ5}g$W|>_Sd?*S4jlCYron|OQDEhq~kH^6-~x=^*pkD3%^DGb6P+l;gCT~ zn3%-NW6NgqiC4C2)EXL)$7~qW58|LssH^a5iXq>Jz%?k1M6ZAe1}rfYYuX=rzmVpPBWBz*ld*CggM;WG8<_Fk(c(XHGplv^N~ONAD17 z^EfDZQK6=%my0~^X}4VZYNHn*wS2VP?A4fHQO99DF|$g8p;c=(vD9dHb;j$q`@SYU zDhlz5B!De~eb>NSQBICM(xB!iMTb5{n?{qpZYQa!&<8>;tb;cJY|dO-ac^6Mtn8&0 z=!d2+G>TK9PAb%VO?ery06=o8o|6gk!O=B)KteB(5u?T7j3Q@kdr`CK6ouh za`a+42Y!|B-avmh`KZ(aG`Kyc|J^$(4rJA(SW!E z+_-H&>8iMh5xETBcn628Oa>2PZa-x2XhHQ+SQiZupuH7;hq-h-QkH|Y|cC>qCZ83;(< zVyp+?3e9IxP(jXJ0hq^QMuBwuXgC2RfY%1syulVn>RN-6O+Z3%qMPP!4x30Dk+5MdY1t{f+0uLb~aJx?cn!9X_Tt*h6lUpa}10LY$}8c)ps%Q6!aZ+oRv&@r%YfHJEZ|qew+{5QT6AA#eO+{85&CsV5Um8NIyLBelGC zWeW6DoBY$X9fC&lSKJ|FVd_c*F?rfIb1n(-@py=6xe?27capXo_PsO0L};|EQ28^R5*kI=7-lN5`e?;d;d*=DoHMPE!zv z{i=ki>#rII8uT{n0PKv&>u9~CuZ;liE7L|b`zY^F6UJd!Wb`z32Lv>tQ(hj%m}iIg zYr1A>&QmJUv|XqSPn1fT%zMnn+Z8&%1o&(YcybxoD+E=z2nU)b^=Q^Fq30XSMZwxG zG+7?5A3CBDzr=P>b%o)io>7ObXJB7wlJ4D57v(#6kHeXH?j&6l=cB^x!;b!?c8S5* zFghCWn~Az3G&pd$G*3dfmTKBhdW@y7breKvsBmrd;&x$KZBMUytN-chF3hwu>Bo-T121q5OioCC5{OehiC{k-G6hYKy4uh)! zC;|-7q@u3UVQ7A9cILwF+p>G~yi@m2L%-Gm4y2kWgMACcd+4JG>MFh-!tSVZrGIPf zJ4E^;%T#+9cwBYXdy|3GWW_4?8zGOz8k?}3^Mk+$D6+*T`4nq^ohLYc!Y3au@^d5_MD;U~8Y=&=f{)sGr-vAgB zt3(AZ@W{>hgH#!}zqka|{l6oHnwNsz`j0z$9Gbf+k5NMPBgHBIFN^@1k*U1Nr5l`5 z&84hFgy67>(ed8Dj-SW~xd>aag<*~JzC)%n0WC=O&(Zid3=J2Bz&ukGi?@Hkf`3a* ze}ljU16#DzKNsOYQD8nRq;xve82iw_lLG92*X@)1IPkA^`-kd{^9nkKE?hLp;V(Pb zKX1ccL!_o|WxOhX_SrxC0oVtCG>(;PbN`Hq!3~jyjTi5VKu-NXu*pAekwc*dQ~UCVx&K!LdD0gFk=C}(5YhfIqyE)G z5mSu(47E2ni?9Axx&V=4aevH$g&bCYH7Y=kFH{&Db>vX;<^L4{fUIQB>pehLg0dS^ zHHEW@28vvb(R~F=Z6bx=){kz&gA%zhF^9NIPA)96nWGG&z zX1E4)9~TS-KaoLu!SH4#rzA0A(o9kANt8%Pw2(5Vd$E0p-Qg7Yb1j9z0@L1dzz|w3 zm#9LgS#NAoCAQsbFJljVb+l~nruBS9Rq}?c*t~rg1~?Ec>aE*mmXps6k4rP4v#WtI zJo1hbQ&&!g*XLZFyeK;cc#-|zQG|?^iX0LuiJwjgrh18x0asC!2`U4Rm0Vw19ocNn z<}qqm>XGg0o5y&bO!wszS@%pdS2|MkUcwY|++v+IMF z;6U1PlbqOmZgD^hm6boQ%|jZGoLuyNjkPoc8Ze;XF(yWm_zqtr2IggFGj4$rK0?rj za=B(jdQDvS)*U@uEd>XMIynUe>=#S})gaUmJ1&1)OUqb=Cwj?yLn5Zeiu6X}+i2|1 zosG1dOPd=?k^n&pX|ObbET3>Pr-#52=-ByJwo;NvAe*_oJ)fWqCz)^4-PWUIj$ND{36aWYnRv z>VKig>;5y*|9-#Js`;pb<&}IoW75Agm+cAuldn~z$NUZTZWdFA=2K@Kv^8d6ij)*8 zW^S?Ejz-s(EoARU{u|5DZ)cH~*FOo>hCmV{oE;8M=^9lUQso-%DhnFO>sRk>;}7|> znXGe3E|sC18v>r(vN|3|*kl#J=_Rkpi?!R|w_Z)X`AB=dJg4Aedyl!;$|7&Zkoq0; z>L%WUc&%1YJ=f>qi~UL(9alwa+OY$8bX};fX)EtcF{b0pGN$7u&vvRs$>=2|At%u( zy>#S|%I5s^Lt4>JrVsgXF9pr^qF4Oz=SBjos<6b^$%N!(<_~EN2YYjo!{_IvpzT6Z zH7W}A?GQKeou%71`%C2=cG-E)?gmDxmdWwCiPPn2X&|RE${hV|VlUinsRhj22azU& zr=Eu(YWD*wD2E`4^(pDq3D{vuTSK`!uydx>qrYtPJ@U4}^|$K6ExOKlx1Wye7) z)~qx)rOfZ|O4qOvc1W)XnwJ=Ev`2WL`ICLcUd8cIR!zpFTE7DAZ(_Kw=6s~THE?>N zk$>rSc1zH)3GiIC2CrGK`WiW=h}6QJzGWvP6BWAKPRY;u|I5NN@&K=fvNRYbJHl6JuDj613JpoaaDggk32XQ7bmXQX#- zI`S@yF-=3aBOM)>O&9chS6Z=VRVbZ6;<{-0F|%37n9K;3=$*=H;DbZJ5y0&7WX+h{ zVTlrgXutL8NJTsZIsR6WZ;%OvT(_fs1MGMbL3)TSD$V!m*PTFKO(;hAD|8Y@Py#8w z_sF*p8rI8looEds*sncE{#~~%?&{c-G-AklK|g0LTD$b&37lmW)@j^S%=b_B*>`^w z);o(SA`}PU169uRY(~y+$J2J|ufNy3sz>|^O2!H$-v%q;!YMbA`ZCF)pnLb-H1OTH zoe-zFy-iP18Xq04+v0US38Q2(P-v-%F)`ycGfO6t9Z7omvBA9J@Is@HLU)SRtL|@} zaVt8;TjNq>zmW9eCZvYM3yRN6yvZ#uf$L>w_ua+xSx=+Pvqa5dk7L%*jC!%meLKnwJmnHV zI#gD(&93K(Xpe;R4qWA^ceR*_qkutYL|hn>>hzIFR#*CYf4br;g%rfmC7T9vEH8?^ zZ5LML-^I=QU>Lo~?rLIbj(mH-N+TD83@^4gqemEE0dN-W6@Iw zgUv;*ol^I=y&t+q6DFVavo3!!IK0{m-(vK6M&fPp>AcM0)8`Jji016>^ys=i^n5(+ zM?b%q6du*J&MD5-+j&lGNDl?Tuq4Z+(aS;%2bGbRO*LCwkk;2i%t4g=UdJ=@-#E7q z_?V5m3?|-fp&lWHo+k+1e~BaDkXjdnVWVPZzS;k1gYtRg;iS7J9RH9(zo}9ogH;p) zeCd=0&cZ5v;{H;7$AFKw z+Mx;w!RTD6{D+qMDP12}Kv=)+>MwH`A-@q9!QN+|fQUoEjH=uiDOrZAOZU-S##yU~ z8SRvn_?H-0B!PoxFpfg~#`n6*8`Lr)Y_cDzk+9S>!c~ge*b&(G(I@*{=l2O+>t*a< z^Uu>~9p{cI38aCF^Cu~@1A*has5XUIQ(W394i&^QLS!hKtzfX-9{ElgMxg*{WkkZ> zM_a}eWu>pqU7Fy4TVyKEHxH^;HCActuD)0G^(G!|M4H$yjdDd_G`S38S!p9ZS>wA? z=jBE@3X6)(RCl+_uD@`zZ6s*VLvaUbwsC6sPn3UrSfNx^yvBPQr#hwLLs$E)#hzCeE=S){+an%olK>DC zE*~9F;aDYoG-fL_wdyzSP1z9rPq+IzUmeW1hCFIVGbhlZGx!fZ{m9GB3tE}KWd=PN zP)s~nd*4N%QaM@;Q&kl*bz)xeGf77nX@1}N;UPb%!(`^Tlvdo?`WUeLgRr5aS3NL{ zzNaz-DGb{U6}f!?Y`F$Cnypmg$t{uFu+97BUP^6tBs9S{)~Z7Y3aqZyPRGN)kDQxjXUybpoo)%-sZS9}Phe%ge#T=1KR< zoZ{2{K@E&D&S7%{&#Je07MI;FR)8}}7@X+#snzqF{8gv!C+eE+=Qf4xcdFsoEy~W; zletU~tMhcoOkX%k<87$6`E&dy*Wx4Gf;T8yrTelE4u$CZwMZO8==ddIqEWb;tI33s zs!5`$T&N661OD@TfWY9wit$ttM7OYrzOQ?(D_`C!fDIvi8uFiwtUc>)j{oVsGiX~M zKXSTW7wP`xl>qW-;qyzX^AJa&sCsAK~OX3L=-*=?;6gzTT3;B zy-Rt0_UqE7rRloJVfNLNi;q}%q>s=b5CJbt&%B#{qp1t4>VwX&ljqCF$3o$qnX1%v#`JU>i?*;Vv1tcwyU}4km0RmE)uaOwdLhfM$|2LiI}h*z zCxP?mLkRh4)Fc=B04D-<0qc$HC$vtHb4vZ&pTdAkV_CTq`r}$i9;r!kZdBT23f=AE z2<0R3r0`1z5r2uFA-dNI(McsPn#@W-~y%~8NDg8ediY$-RGtEHZJ{!SOo%1CaSm$vi2XQ zEhbrFi$y_>^+zD5BFE+UdN-HnN3vBDKoFvs*?KE5te!QB4&sGy*Yw#=l6IGwh$IdC z(uWNG?cSylxOz*iQqt7LdSeGg1liFsR-LG-CRij`NuO6V@P* zM}gYy-!EkWh&@DMs6djq?S!55qcc@W?YP3(d&FO#*lzvK$%f!?X1g=8=_S z?#h;6;DT_a7}et}&tCh}xz7F%vYST?$u5mMR*WoFMIVJjEyu$E5<$Re$(I`Hy;?ao zON)rCHxxujx%ABfelo&m0YBReyzXd9%fF#gr5jXs(6FbOggPfbR990rUxRb<`x^r; z#|oFs$+`KwLV&UtEvB08feAPGMdclYpeqZQJwJUPmP_?qtdb|Ta^`_TJ7r&+d{i<2 zYS*8gP~V}epBPdxrY=IxtL=bnb7~}SSyr)2`{e*y27CgleuX^`+e61}%r!{a(;p3g z6o+V3Vvq$L4!ee9=09rfoL#Ip;$C^5k`S%tBbE?qGDCFa^y2Y~oooCW>f{c^R*^Y;B!*SD_*!o= z$_hdG=ZfECpf=&VrU4ifVF;TCq4QeF%SwSYBE!L@BN({!`-+$HnwvxKG@YKV8IMdO z6M39=eS&-gpULp7%G@>|s?+b4fHXuMyNMiwZzMLXUW08Wo2leZ8yiNzu@*;js8@zr zR9lgQJJ^N_#FLX9i%tQOZ_sngm%~p)-UC6%@!|?>s|<7ZE7Z_f!-_GZv;Gj}eV~lP z9WmidL)0iO%L|RZDb4>I%8fC1hj?rl6xJ9Y7Yzxc(lYX_YlcBL#ALxY@^hw49X=OZ zRaMo@)xF0Ko&^84!50bV#yd_s=BndV30Mjiqxy1N?;sq=aJA%>v^2gf6NbfpU2V8Q zpIaqXNa)pR(8Q3CmQ8dub646br_nm#u`=?igJ~VteGpf>B__{pf|q#`#rwnK`6vra z0tvQI_!g_T^)3Ig?dA3>i8{!!s1!@Ho6&uMm_X^>4e_~a@p&CHTIU5c;aN;go#)qeA%Am(bbRZ^saquK9eJ!1L{v>$D=zzC_EU;7#^*k?7A%wjhvV z>(n)}>3|MCE_=PNTS=gp({<*&COwN3aT?M0RM0Q%jK>;{jkg)^Og_g;#9%PRZ6N(* zqUl72ECLTgz0IeM{cj38i>%mgVQ)oGd$5hoPq|rr4CAy#X>i-jQAbpH7@Sbw2uQHj zbrm_Rx*$r+(kIag7{_sG7bp$Ym6ah|1Lslt5>58X5=p}(74uR)OXh$at07DTzzc{4 zT=5D?7sYJYkM+L#-4WYaCi3{5{dbpW4ds}sT#=H5-rPO3Gz`1~=F|7R-s`;e4olC&%y7-g7Ho4ljf42CKUfmETmmrkV6v9y85|xGp`*!S!v|03v=O zE3hWeTK}^dwR0-b+Q%u+XVTT%Y0tw9S1sP)b9iKLMWY#ay zf6lvb9oL9H`q$(HMpuK|2n=sWKm#MOup0hilXHB^@nA4WQ#IQ9 z!cVb%%mb*s4 zc7CiHu?*3SX^4WfB9_^uCMNG+@t()DSG#>R-gNN8!Hb_tBwH^90xur<%*!TC&Ul9= z%KMi{@YZg9B>Hf+rA-#J01J!_Y2U;XJse7|^xO(&nx?Ufvd-&V`2{DgRDx@@V_SO+ByBW|nqrI-_qHE#(WSBz6zK z>HCh+cU`Hcy4Ph#KXL&cMc;_my6_Cr8#Q^`_u=Y+ptUCJ3u`|-{R)B@s~B0`S9$@h zZ={!)KJ~S)S;Wl)QndkE#91Y91Yx#)4@0)ALKk!F&rO1YTD4-)n3|8XJ7=c=C>F0Q zEQ(p{xi=l^?^luDs17OxpBXWnQuuUZ!PJZrq9K`#nA5D%DS_V!Sk~dj#>dYoKeO?A z*=EfArdjRXzY+BPib}vPfdXrl95!n1e{?6cE=Ug*9j8KfFr}IviD>|j zU;I`{my1n!5B#q_q=*LjUUTEk&9XGCwMW;LJtLkJN{ z<{8DbXQK9A5yG6XgphVTBd>nF>x$QBAWQvX4vL>D*JLrcCIV*k*N4(-s+3-m zh)C3U(rHk^7Ij0Xp+C0IPPNx&=)m*$jo^LfqAU8fXf_2rR-nr!G*A}7wd5;5joc9R zz(~j$mu7q@B5}Og(u09dqJc-!!wsrLF>3grcXLL4mr=L+h+bU>OP@jFBhDQv;C=x~ z>OGBA4y+LmaKR$Y8h(|o(6OeH#$#A~vnpWH^M+K2Ok5){Ygw}z9(&FbJfrD@hf!jl z^X6FVCWLDRnsGfTM1{Ii#EdKMZFEG^A#Tb@xSgxuN}~MUWRB1LgdWK8t*K2$N7uE9 zWK8M%Wh@dPGirKpJVNHF+@j;?j_N30Jb7AEeV zr<`{&W%bMKPV}bd>Gs={xH#=$^0iB9P4oX&u@Atb|4_x>!H6h zDhxy|%02|;f-uS|ZXv3ZcqMe{wZa!a_4NbkcSJ=X;yDnE9#U*~gt`H!{V2>!f0`96 z6Ef>ni^F_Kgv%UeK;jKfh~{)f7U1U9ZO__j>At5`pNT+1N$<_Ml#Dsls6aFQ%qmHh zJ&2BXAhD(|e0u3M)-G+a5-Dw%ix2*;RC6D~hhfFa)=Mv;)lVc zZ!VNai-JyIS8r4~mg3JStvsAagsa@fW-5*)`-WO4v7t>0Gf5nMy)|k zS5}qbCuK!+8e65-MWkL1(gWLf@%JG%YpHd9l0gyi!i+y=!LswlYNCD|WKM9;CJr;x zoQN_k{>}<(3~Lp$F?f}7>hh?2tNK5ugRChJO0QnpDU|$pGVc*bTV;2+32NZbE_iHJ zj&Hd8hQ^+(=2;Ge!lX6=rAUy&7&ETN)xEtjP_^ZWg^Ry4nuqi(q&)|m}uYY@L$lwe{&O~7$G$JXLLVIUtFNq;A}epRpR_^Lr>Iq5|IlI^0kipehHP3 z?&6xEYa#YOoF}#B`V41=3&`EZhp>iD`cyQyc8@|JV`wL=d-BpAI)Z-Al^g-|V_wvX z7f|3UlO^K$Yw0~)wQkk-1)}*TKu`f&1lD&5UlhW10q8t0aIhc?r~!)d6=XRng(P** z($@y->rYK;1NTK0r5te(Sr7?CnQ{Et$i;c6tA7(GN9VG#=>{hNODj; z-*t~o78d0DXlGK2%8Wqc$J-05=_}v6+m3qKngTkkTF)8E9@3OdHTxPh-aM*J#neLcpXbjt652qfPgS(O;u zKd@1dDk><%8G#q)U4o82&TLtilfGzwnRQr?h%dbeeb}yE-{vK$96LXM(!8R)?YskB zr3AKwt$9c~`fMWxn7+p)PM>;63bmy|Ov~(c_@J58WtHiWkHG0&yjgxAZ zwuF0}abtT=)5cVdgE4}msWRb2sHcIVpO@hpvoPS|QGuynTZ2M+8^~V`tn6aIVd!hu+sd`dnE`8UF$A80D9qg8Z!m(_^+3128p)E zfZoB*cwyo`4ZNfAc4x0)viFh)FF><1Db00Atv4{~W<&H8h}NuPKa7K{k@^Zua!!7n zcxLTstobfdig+??uaA%B5vBCoCdVGwb5U+RO;)8fzJpON$HaDfVPUldu$qooB8F0E z++u|_>}j|z$$=k`R|G!7@Lb&z8o|}3vZVvNC953MizJkz0F&q+rq_^^WCUuu7?MhV zE|sGF>VSCRBu3z<>@y5TF;4vdX{@Z#i6N?J=q*IfGxqj3?Z)oEQBS?kZBwSgl_sG; z?2!p{=)lSG-_6O6w3O+=hh_{8j{M4r1qIBQJ2pDkVJ0vYunHRIcfxN7t0%LH&%6nL zA}qk^UI9s=>900g=lP#5`v0b-yW7t9nO3cs{r@CAl@`U&=FV@g_lAZ%(s)KtC2a(# z6-pf^_@|0yAPeA)skM7v#kd(OEd@ruOFAGn-*YjW$5$pBtX5EYFeBuzjz~-HnGKWm z(F(gk4}+$>Yvc$59sSa|;cbmSeH77ARm~KB^*j;#$bk6a-0C^ruR=TdrvEJz^M+=s zelvEcmpE10OW)(^O{JTH51)i&`E2D3whsS=_FA=)=}Blaw16VO9nSo8S7ZV*XcW*6=#M&dnyOnYfwj$X#_6qVf~wVzZQ&i> zV?H!-ibLEgUpbGKE0r5y>va>@%t|~xXNRug5>0NESEH>kmUuF#ALKv#I^6trHjFKU za`-2*{pou#j}NNApO%lkRthiXZcef_&3uZ7gK!UTNREfYZgP5FwbB@}p%1V23* zTY+06MmB~JtEjW*IoNA0{xBgYVnbyGPwKQcASWEHUNYJ`itodqM*9H!TtoWzgfANf z&I3i6UhgE6xr_dYu)JFo+}NmUDqe!fuaR;|(Ug*`3lLlGy*;iGL)X}vi?N5x*oX(C z92tDTDG18aKa+(F{_2tVX4X@dZ|*!kfU2yWs-#A6_I$8-@mLkfy#EKreeh0;&4w(5 zD0)0k%#b^N@5FVJ^NU#PXRj@#!{kJ*;Vc5~*!l1sy&D2jV(_!S2vHv?BMw}eN+)Ak zMIcjpH9TWUzVk=8Ley~`3T`)zdUwIK;P zrmiwIIQ;A@_>9orsq2%77{Dg;UkG|Nv|xSh3%ZzVd9hFbFVV~t{JhGFj(;wfHK}9n zhO&LaATi!*9?jdV@UI0SQqNa#bV~Pl)s{+W8D`$4)z>6o z^_ggEMbZ%D3%0bozhf>HC;@Ofc*Sy5Wq%pFAW?T^z;xuDc12dUmoI@83j%5Rp_MT&Yv;zhNXn z$^b(Brhh@<#r`e(D-miCXBGAjI?sQYS{P6e!Vz?jnd;vcnxE0<_jlAR2`_: * ``tests`` includes packages for running tests * ``docs`` includes packages for writing and building documentation -Set up UTA ----------- - -Cool-Seq-Tool requires an available instance of the Universal Transcript Archive (UTA) database. Complete installation instructions (via Docker or a local server) are available at the `UTA GitHub repository `_. For local usage, we recommend the following: - -.. long-term, it would be best to move this over to the UTA repo to avoid duplication - -.. code-block:: - - createuser -U postgres uta_admin - createuser -U postgres anonymous - createdb -U postgres -O uta_admin uta - - export UTA_VERSION=uta_20241220.pgd.gz # most recent as of 2025/03/10 - curl -O https://dl.biocommons.org/uta/$UTA_VERSION - gzip -cdq ${UTA_VERSION} | psql -h localhost -U uta_admin --echo-errors --single-transaction -v ON_ERROR_STOP=1 -d uta -p 5432 - -By default, Cool-Seq-Tool expects to connect to the UTA database via a PostgreSQL connection served local on port 5432, under the PostgreSQL username ``uta_admin`` and the schema ``uta_20241220``. - Set up SeqRepo -------------- @@ -79,6 +60,50 @@ Try moving data manually with ``sudo``: See `mirroring documentation `_ on the SeqRepo GitHub repo for instructions and additional troubleshooting. +Set up using Docker +---------- + +FUSOR's dependencies can be installed using a Docker container. + +.. important:: + + This section assumes you have a local + `SeqRepo `_ + installed at ``/usr/local/share/seqrepo/2024-12-20``. + If you have it installed elsewhere, please add a + ``SEQREPO_ROOT_DIR`` environment variable in ``.env.shared``. + + You must download `uta_20241220.pgd.gz` from + using a web browser and + move it to the root of the repository. + + If you're using Docker Desktop, you must go to + **Settings → Resources → File sharing** and add + ``/usr/local/share/seqrepo`` under the *Virtual file shares* + section. Otherwise, you will get the following error:: + + OSError: Unable to open SeqRepo directory /usr/local/share/seqrepo/2024-12-20 + +To build, (re)create, and start containers: + +.. code-block:: shell + + docker volume create uta_vol + docker compose up + +.. tip:: + + If you want a clean slate, run ``docker compose down -v`` to remove + containers and volumes, then run + ``docker compose up --build`` to rebuild and start fresh containers. + +In Docker Desktop, you should see the following for a successful setup: + +.. figure:: ../../docker-desktop-container.png + :alt: Docker Desktop Container + :align: center + + Check data availability ----------------------- diff --git a/uta-setup.sql b/uta-setup.sql new file mode 100644 index 0000000..a902f7b --- /dev/null +++ b/uta-setup.sql @@ -0,0 +1,27 @@ +\c uta; +CREATE TABLE uta_20241220.genomic AS +SELECT t.hgnc, aes.alt_ac, aes.alt_aln_method, + aes.alt_strand, ae.start_i AS alt_start_i, + ae.end_i AS alt_end_i +FROM (((((uta_20241220.transcript t + JOIN uta_20241220.exon_set tes ON (((t.ac = tes.tx_ac) + AND (tes.alt_aln_method = 'transcript'::text)))) + JOIN uta_20241220.exon_set aes ON (((t.ac = aes.tx_ac) + AND (aes.alt_aln_method <> 'transcript'::text)))) + JOIN uta_20241220.exon te ON + ((tes.exon_set_id = te.exon_set_id))) + JOIN uta_20241220.exon ae ON + (((aes.exon_set_id = ae.exon_set_id) + AND (te.ord = ae.ord)))) + LEFT JOIN uta_20241220.exon_aln ea ON + (((te.exon_id = ea.tx_exon_id) AND + (ae.exon_id = ea.alt_exon_id)))); + +CREATE INDEX alt_pos_index ON uta_20241220.genomic (alt_ac, alt_start_i, alt_end_i); +CREATE INDEX gene_alt_index ON uta_20241220.genomic (hgnc, alt_ac); +CREATE INDEX alt_ac_index ON uta_20241220.genomic (alt_ac); + +GRANT CONNECT ON DATABASE uta TO anonymous; +GRANT SELECT, INSERT, UPDATE, DELETE ON ALL TABLES IN SCHEMA uta_20241220 TO anonymous; +ALTER DATABASE uta OWNER TO anonymous; +ALTER SCHEMA uta_20241220 OWNER to anonymous; From 03c4598adf09c79333efd2a78987c146076e84c8 Mon Sep 17 00:00:00 2001 From: jarbesfeld Date: Mon, 16 Feb 2026 09:16:34 -0500 Subject: [PATCH 2/5] Make edits --- .gitignore | 3 +++ docs/source/install.rst | 6 ++++-- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/.gitignore b/.gitignore index b32ab81..74540cc 100644 --- a/.gitignore +++ b/.gitignore @@ -138,3 +138,6 @@ tpm3.fasta # VSCode .vscode + +# UTA download +uta_*pgd.gz diff --git a/docs/source/install.rst b/docs/source/install.rst index f6539ed..d4f9294 100644 --- a/docs/source/install.rst +++ b/docs/source/install.rst @@ -61,12 +61,14 @@ Try moving data manually with ``sudo``: See `mirroring documentation `_ on the SeqRepo GitHub repo for instructions and additional troubleshooting. Set up using Docker ----------- +------------------- -FUSOR's dependencies can be installed using a Docker container. +Cool-Seq-Tool's dependencies can be installed using a Docker container. We only provide guidance on setting up external dependencies using Docker. .. important:: + Please see :ref:`setup-seqrepo` to follow the SeqRepo setup instructions. + This section assumes you have a local `SeqRepo `_ installed at ``/usr/local/share/seqrepo/2024-12-20``. From 39f4afb82c1e01552f753e7e19e559089e0cb267 Mon Sep 17 00:00:00 2001 From: jarbesfeld Date: Mon, 16 Feb 2026 09:28:55 -0500 Subject: [PATCH 3/5] Add SeqRepo link out --- docs/source/install.rst | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/docs/source/install.rst b/docs/source/install.rst index d4f9294..63be58d 100644 --- a/docs/source/install.rst +++ b/docs/source/install.rst @@ -67,13 +67,12 @@ Cool-Seq-Tool's dependencies can be installed using a Docker container. We only .. important:: - Please see :ref:`setup-seqrepo` to follow the SeqRepo setup instructions. - This section assumes you have a local `SeqRepo `_ installed at ``/usr/local/share/seqrepo/2024-12-20``. If you have it installed elsewhere, please add a ``SEQREPO_ROOT_DIR`` environment variable in ``.env.shared``. + See the `SeqRepo setup section <#set-up-seqrepo>`_ for additional information. You must download `uta_20241220.pgd.gz` from using a web browser and From adf053a3b5f765d2535194e62d70a805dfed5d17 Mon Sep 17 00:00:00 2001 From: jarbesfeld Date: Mon, 16 Feb 2026 10:48:40 -0500 Subject: [PATCH 4/5] Remove .env.shared file and reference in docs --- .env.shared | 5 ----- docs/source/install.rst | 2 -- 2 files changed, 7 deletions(-) delete mode 100644 .env.shared diff --git a/.env.shared b/.env.shared deleted file mode 100644 index 174e3ed..0000000 --- a/.env.shared +++ /dev/null @@ -1,5 +0,0 @@ -SEQREPO_ROOT_DIR=/usr/local/share/seqrepo/2024-12-20 -AWS_ACCESS_KEY_ID=dummy -AWS_SECRET_ACCESS_KEY=dummy -AWS_SESSION_TOKEN=dummy -UTA_DB_URL=postgresql://anonymous@localhost:5432/uta/uta_20241220 diff --git a/docs/source/install.rst b/docs/source/install.rst index 63be58d..110535d 100644 --- a/docs/source/install.rst +++ b/docs/source/install.rst @@ -70,8 +70,6 @@ Cool-Seq-Tool's dependencies can be installed using a Docker container. We only This section assumes you have a local `SeqRepo `_ installed at ``/usr/local/share/seqrepo/2024-12-20``. - If you have it installed elsewhere, please add a - ``SEQREPO_ROOT_DIR`` environment variable in ``.env.shared``. See the `SeqRepo setup section <#set-up-seqrepo>`_ for additional information. You must download `uta_20241220.pgd.gz` from From 795e3575d5b7e4da1fe68d5e4942cb55920a12cb Mon Sep 17 00:00:00 2001 From: jarbesfeld Date: Mon, 16 Feb 2026 16:04:15 -0500 Subject: [PATCH 5/5] Update UTA_DB_URL --- docs/source/usage.rst | 2 +- src/cool_seq_tool/sources/uta_database.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/source/usage.rst b/docs/source/usage.rst index d335c14..8aff261 100644 --- a/docs/source/usage.rst +++ b/docs/source/usage.rst @@ -80,7 +80,7 @@ Individual classes will accept arguments upon initialization to set parameters r * - ``SEQREPO_ROOT_DIR`` - Path to SeqRepo directory (i.e. contains ``aliases.sqlite3`` database file, and ``sequences`` directory). Used by :py:class:`SeqRepoAccess `. If not defined, defaults to ``/usr/local/share/seqrepo/latest``. * - ``UTA_DB_URL`` - - A `libpq connection string `_, i.e. of the form ``postgresql://:@://``, used by the :py:class:`UtaDatabase ` class. By default, it is set to ``postgresql://uta_admin:uta@localhost:5432/uta/uta_20241220``. + - A `libpq connection string `_, i.e. of the form ``postgresql://:@://``, used by the :py:class:`UtaDatabase ` class. By default, it is set to ``postgresql://anonymous@localhost:5432/uta/uta_20241220``. * - ``LIFTOVER_CHAIN_37_TO_38`` - A path to a `chainfile `_ for lifting from GRCh37 to GRCh38. Used by the :py:class:`LiftOver ` class as input to `agct `_. If not provided, agct will fetch it automatically from UCSC. * - ``LIFTOVER_CHAIN_38_TO_37`` diff --git a/src/cool_seq_tool/sources/uta_database.py b/src/cool_seq_tool/sources/uta_database.py index 45aba11..e993ef6 100644 --- a/src/cool_seq_tool/sources/uta_database.py +++ b/src/cool_seq_tool/sources/uta_database.py @@ -27,7 +27,7 @@ UTADatabaseType = TypeVar("UTADatabaseType", bound="UtaDatabase") UTA_DB_URL = environ.get( - "UTA_DB_URL", "postgresql://uta_admin:uta@localhost:5432/uta/uta_20241220" + "UTA_DB_URL", "postgresql://anonymous@localhost:5432/uta/uta_20241220" ) _logger = logging.getLogger(__name__)