diff --git a/.dockerignore b/.dockerignore
new file mode 100644
index 0000000..698dd01
--- /dev/null
+++ b/.dockerignore
@@ -0,0 +1,59 @@
+# Bytecode files
+__pycache__/
+*.pyc
+*.pyo
+
+# Virtual environment
+venv/
+.venv/
+
+# Debug and log files
+*.log
+debug*.txt
+
+# Data and media
+data/
+media/
+
+# Temporary files and directories
+tmp/
+temp/
+*.swp
+
+# IDE and OS files
+.idea/
+.vscode/
+.DS_Store
+
+# Distribution files
+dist/
+build/
+*.egg-info/
+
+# Secrets
+.env
+.env.*
+*.pem
+*.key
+*.crt
+*.p12
+*.jks
+secrets/
+
+# Git
+.git/
+.gitignore
+
+# Tests & coverage
+tests/
+test/
+.pytest_cache/
+.coverage
+htmlcov/
+
+# Docs / misc
+README*
+docs/
+
+# CI
+.github/
diff --git a/.gitignore b/.gitignore
index b32ab81..74540cc 100644
--- a/.gitignore
+++ b/.gitignore
@@ -138,3 +138,6 @@ tpm3.fasta
# VSCode
.vscode
+
+# UTA download
+uta_*pgd.gz
diff --git a/compose.yaml b/compose.yaml
new file mode 100644
index 0000000..de2f586
--- /dev/null
+++ b/compose.yaml
@@ -0,0 +1,23 @@
+services:
+ uta:
+ # Test:
+ # psql -XAt postgres://anonymous@localhost/uta -c 'select count(*) from uta_20241220.transcript'
+ # 329090
+ image: biocommons/uta:uta_20241220
+ environment:
+ - POSTGRES_PASSWORD=some-password-that-you-make-up
+ volumes:
+ - uta_vol:/var/lib/postgresql/data
+ - type: bind
+ source: ./uta_20241220.pgd.gz
+ target: /tmp/uta_20241220.pgd.gz
+ read_only: true
+ bind:
+ create_host_path: false
+ - ./uta-setup.sql:/docker-entrypoint-initdb.d/uta-setup.sql
+ ports:
+ - 127.0.0.1:5432:5432
+
+volumes:
+ uta_vol:
+ external: true
diff --git a/docker-desktop-container.png b/docker-desktop-container.png
new file mode 100644
index 0000000..500e5fb
Binary files /dev/null and b/docker-desktop-container.png differ
diff --git a/docs/source/install.rst b/docs/source/install.rst
index 07e4f8c..110535d 100644
--- a/docs/source/install.rst
+++ b/docs/source/install.rst
@@ -27,25 +27,6 @@ Install Cool-Seq-Tool from `PyPI `_:
* ``tests`` includes packages for running tests
* ``docs`` includes packages for writing and building documentation
-Set up UTA
-----------
-
-Cool-Seq-Tool requires an available instance of the Universal Transcript Archive (UTA) database. Complete installation instructions (via Docker or a local server) are available at the `UTA GitHub repository `_. For local usage, we recommend the following:
-
-.. long-term, it would be best to move this over to the UTA repo to avoid duplication
-
-.. code-block::
-
- createuser -U postgres uta_admin
- createuser -U postgres anonymous
- createdb -U postgres -O uta_admin uta
-
- export UTA_VERSION=uta_20241220.pgd.gz # most recent as of 2025/03/10
- curl -O https://dl.biocommons.org/uta/$UTA_VERSION
- gzip -cdq ${UTA_VERSION} | psql -h localhost -U uta_admin --echo-errors --single-transaction -v ON_ERROR_STOP=1 -d uta -p 5432
-
-By default, Cool-Seq-Tool expects to connect to the UTA database via a PostgreSQL connection served local on port 5432, under the PostgreSQL username ``uta_admin`` and the schema ``uta_20241220``.
-
Set up SeqRepo
--------------
@@ -79,6 +60,49 @@ Try moving data manually with ``sudo``:
See `mirroring documentation `_ on the SeqRepo GitHub repo for instructions and additional troubleshooting.
+Set up using Docker
+-------------------
+
+Cool-Seq-Tool's dependencies can be installed using a Docker container. We only provide guidance on setting up external dependencies using Docker.
+
+.. important::
+
+ This section assumes you have a local
+ `SeqRepo `_
+ installed at ``/usr/local/share/seqrepo/2024-12-20``.
+ See the `SeqRepo setup section <#set-up-seqrepo>`_ for additional information.
+
+ You must download `uta_20241220.pgd.gz` from
+ using a web browser and
+ move it to the root of the repository.
+
+ If you're using Docker Desktop, you must go to
+ **Settings → Resources → File sharing** and add
+ ``/usr/local/share/seqrepo`` under the *Virtual file shares*
+ section. Otherwise, you will get the following error::
+
+ OSError: Unable to open SeqRepo directory /usr/local/share/seqrepo/2024-12-20
+
+To build, (re)create, and start containers:
+
+.. code-block:: shell
+
+ docker volume create uta_vol
+ docker compose up
+
+.. tip::
+
+ If you want a clean slate, run ``docker compose down -v`` to remove
+ containers and volumes, then run
+ ``docker compose up --build`` to rebuild and start fresh containers.
+
+In Docker Desktop, you should see the following for a successful setup:
+
+.. figure:: ../../docker-desktop-container.png
+ :alt: Docker Desktop Container
+ :align: center
+
+
Check data availability
-----------------------
diff --git a/docs/source/usage.rst b/docs/source/usage.rst
index d335c14..8aff261 100644
--- a/docs/source/usage.rst
+++ b/docs/source/usage.rst
@@ -80,7 +80,7 @@ Individual classes will accept arguments upon initialization to set parameters r
* - ``SEQREPO_ROOT_DIR``
- Path to SeqRepo directory (i.e. contains ``aliases.sqlite3`` database file, and ``sequences`` directory). Used by :py:class:`SeqRepoAccess `. If not defined, defaults to ``/usr/local/share/seqrepo/latest``.
* - ``UTA_DB_URL``
- - A `libpq connection string `_, i.e. of the form ``postgresql://:@://``, used by the :py:class:`UtaDatabase ` class. By default, it is set to ``postgresql://uta_admin:uta@localhost:5432/uta/uta_20241220``.
+ - A `libpq connection string `_, i.e. of the form ``postgresql://:@://``, used by the :py:class:`UtaDatabase ` class. By default, it is set to ``postgresql://anonymous@localhost:5432/uta/uta_20241220``.
* - ``LIFTOVER_CHAIN_37_TO_38``
- A path to a `chainfile `_ for lifting from GRCh37 to GRCh38. Used by the :py:class:`LiftOver ` class as input to `agct `_. If not provided, agct will fetch it automatically from UCSC.
* - ``LIFTOVER_CHAIN_38_TO_37``
diff --git a/src/cool_seq_tool/sources/uta_database.py b/src/cool_seq_tool/sources/uta_database.py
index 45aba11..e993ef6 100644
--- a/src/cool_seq_tool/sources/uta_database.py
+++ b/src/cool_seq_tool/sources/uta_database.py
@@ -27,7 +27,7 @@
UTADatabaseType = TypeVar("UTADatabaseType", bound="UtaDatabase")
UTA_DB_URL = environ.get(
- "UTA_DB_URL", "postgresql://uta_admin:uta@localhost:5432/uta/uta_20241220"
+ "UTA_DB_URL", "postgresql://anonymous@localhost:5432/uta/uta_20241220"
)
_logger = logging.getLogger(__name__)
diff --git a/uta-setup.sql b/uta-setup.sql
new file mode 100644
index 0000000..a902f7b
--- /dev/null
+++ b/uta-setup.sql
@@ -0,0 +1,27 @@
+\c uta;
+CREATE TABLE uta_20241220.genomic AS
+SELECT t.hgnc, aes.alt_ac, aes.alt_aln_method,
+ aes.alt_strand, ae.start_i AS alt_start_i,
+ ae.end_i AS alt_end_i
+FROM (((((uta_20241220.transcript t
+ JOIN uta_20241220.exon_set tes ON (((t.ac = tes.tx_ac)
+ AND (tes.alt_aln_method = 'transcript'::text))))
+ JOIN uta_20241220.exon_set aes ON (((t.ac = aes.tx_ac)
+ AND (aes.alt_aln_method <> 'transcript'::text))))
+ JOIN uta_20241220.exon te ON
+ ((tes.exon_set_id = te.exon_set_id)))
+ JOIN uta_20241220.exon ae ON
+ (((aes.exon_set_id = ae.exon_set_id)
+ AND (te.ord = ae.ord))))
+ LEFT JOIN uta_20241220.exon_aln ea ON
+ (((te.exon_id = ea.tx_exon_id) AND
+ (ae.exon_id = ea.alt_exon_id))));
+
+CREATE INDEX alt_pos_index ON uta_20241220.genomic (alt_ac, alt_start_i, alt_end_i);
+CREATE INDEX gene_alt_index ON uta_20241220.genomic (hgnc, alt_ac);
+CREATE INDEX alt_ac_index ON uta_20241220.genomic (alt_ac);
+
+GRANT CONNECT ON DATABASE uta TO anonymous;
+GRANT SELECT, INSERT, UPDATE, DELETE ON ALL TABLES IN SCHEMA uta_20241220 TO anonymous;
+ALTER DATABASE uta OWNER TO anonymous;
+ALTER SCHEMA uta_20241220 OWNER to anonymous;