Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
59 changes: 59 additions & 0 deletions .dockerignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
# Bytecode files
__pycache__/
*.pyc
*.pyo

# Virtual environment
venv/
.venv/

# Debug and log files
*.log
debug*.txt

# Data and media
data/
media/

# Temporary files and directories
tmp/
temp/
*.swp

# IDE and OS files
.idea/
.vscode/
.DS_Store

# Distribution files
dist/
build/
*.egg-info/

# Secrets
.env
.env.*
*.pem
*.key
*.crt
*.p12
*.jks
secrets/

# Git
.git/
.gitignore

# Tests & coverage
tests/
test/
.pytest_cache/
.coverage
htmlcov/

# Docs / misc
README*
docs/

# CI
.github/
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -138,3 +138,6 @@ tpm3.fasta

# VSCode
.vscode

# UTA download
uta_*pgd.gz
23 changes: 23 additions & 0 deletions compose.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
services:
uta:
# Test:
# psql -XAt postgres://anonymous@localhost/uta -c 'select count(*) from uta_20241220.transcript'
# 329090
image: biocommons/uta:uta_20241220
environment:
- POSTGRES_PASSWORD=some-password-that-you-make-up
volumes:
- uta_vol:/var/lib/postgresql/data
- type: bind
source: ./uta_20241220.pgd.gz
target: /tmp/uta_20241220.pgd.gz
read_only: true
bind:
create_host_path: false
- ./uta-setup.sql:/docker-entrypoint-initdb.d/uta-setup.sql
ports:
- 127.0.0.1:5432:5432

volumes:
uta_vol:
external: true
Binary file added docker-desktop-container.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
62 changes: 43 additions & 19 deletions docs/source/install.rst
Original file line number Diff line number Diff line change
Expand Up @@ -27,25 +27,6 @@ Install Cool-Seq-Tool from `PyPI <https://pypi.org/project/cool-seq-tool/>`_:
* ``tests`` includes packages for running tests
* ``docs`` includes packages for writing and building documentation

Set up UTA
----------

Cool-Seq-Tool requires an available instance of the Universal Transcript Archive (UTA) database. Complete installation instructions (via Docker or a local server) are available at the `UTA GitHub repository <https://github.com/biocommons/uta>`_. For local usage, we recommend the following:

.. long-term, it would be best to move this over to the UTA repo to avoid duplication

.. code-block::

createuser -U postgres uta_admin
createuser -U postgres anonymous
createdb -U postgres -O uta_admin uta

export UTA_VERSION=uta_20241220.pgd.gz # most recent as of 2025/03/10
curl -O https://dl.biocommons.org/uta/$UTA_VERSION
gzip -cdq ${UTA_VERSION} | psql -h localhost -U uta_admin --echo-errors --single-transaction -v ON_ERROR_STOP=1 -d uta -p 5432

By default, Cool-Seq-Tool expects to connect to the UTA database via a PostgreSQL connection served local on port 5432, under the PostgreSQL username ``uta_admin`` and the schema ``uta_20241220``.

Set up SeqRepo
--------------

Expand Down Expand Up @@ -79,6 +60,49 @@ Try moving data manually with ``sudo``:

See `mirroring documentation <https://github.com/biocommons/biocommons.seqrepo/blob/main/docs/mirror.rst>`_ on the SeqRepo GitHub repo for instructions and additional troubleshooting.

Set up using Docker
-------------------

Cool-Seq-Tool's dependencies can be installed using a Docker container. We only provide guidance on setting up external dependencies using Docker.

.. important::

This section assumes you have a local
`SeqRepo <https://github.com/biocommons/biocommons.seqrepo>`_
installed at ``/usr/local/share/seqrepo/2024-12-20``.
See the `SeqRepo setup section <#set-up-seqrepo>`_ for additional information.

You must download `uta_20241220.pgd.gz` from
<https://dl.biocommons.org/uta/> using a web browser and
move it to the root of the repository.

If you're using Docker Desktop, you must go to
**Settings → Resources → File sharing** and add
``/usr/local/share/seqrepo`` under the *Virtual file shares*
section. Otherwise, you will get the following error::

OSError: Unable to open SeqRepo directory /usr/local/share/seqrepo/2024-12-20

To build, (re)create, and start containers:

.. code-block:: shell

docker volume create uta_vol
docker compose up

.. tip::

If you want a clean slate, run ``docker compose down -v`` to remove
containers and volumes, then run
``docker compose up --build`` to rebuild and start fresh containers.

In Docker Desktop, you should see the following for a successful setup:

.. figure:: ../../docker-desktop-container.png
:alt: Docker Desktop Container
:align: center


Check data availability
-----------------------

Expand Down
2 changes: 1 addition & 1 deletion docs/source/usage.rst
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@ Individual classes will accept arguments upon initialization to set parameters r
* - ``SEQREPO_ROOT_DIR``
- Path to SeqRepo directory (i.e. contains ``aliases.sqlite3`` database file, and ``sequences`` directory). Used by :py:class:`SeqRepoAccess <cool_seq_tool.handlers.seqrepo_access.SeqRepoAccess>`. If not defined, defaults to ``/usr/local/share/seqrepo/latest``.
* - ``UTA_DB_URL``
- A `libpq connection string <https://www.postgresql.org/docs/current/libpq-connect.html#LIBPQ-CONNSTRING>`_, i.e. of the form ``postgresql://<user>:<password>@<host>:<port>/<database>/<schema>``, used by the :py:class:`UtaDatabase <cool_seq_tool.sources.uta_database.UtaDatabase>` class. By default, it is set to ``postgresql://uta_admin:uta@localhost:5432/uta/uta_20241220``.
- A `libpq connection string <https://www.postgresql.org/docs/current/libpq-connect.html#LIBPQ-CONNSTRING>`_, i.e. of the form ``postgresql://<user>:<password>@<host>:<port>/<database>/<schema>``, used by the :py:class:`UtaDatabase <cool_seq_tool.sources.uta_database.UtaDatabase>` class. By default, it is set to ``postgresql://anonymous@localhost:5432/uta/uta_20241220``.
* - ``LIFTOVER_CHAIN_37_TO_38``
- A path to a `chainfile <https://genome.ucsc.edu/goldenPath/help/chain.html>`_ for lifting from GRCh37 to GRCh38. Used by the :py:class:`LiftOver <cool_seq_tool.mappers.liftover.LiftOver>` class as input to `agct <https://pypi.org/project/agct/>`_. If not provided, agct will fetch it automatically from UCSC.
* - ``LIFTOVER_CHAIN_38_TO_37``
Expand Down
2 changes: 1 addition & 1 deletion src/cool_seq_tool/sources/uta_database.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@
UTADatabaseType = TypeVar("UTADatabaseType", bound="UtaDatabase")

UTA_DB_URL = environ.get(
"UTA_DB_URL", "postgresql://uta_admin:uta@localhost:5432/uta/uta_20241220"
"UTA_DB_URL", "postgresql://anonymous@localhost:5432/uta/uta_20241220"
)

_logger = logging.getLogger(__name__)
Expand Down
27 changes: 27 additions & 0 deletions uta-setup.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
\c uta;
CREATE TABLE uta_20241220.genomic AS
SELECT t.hgnc, aes.alt_ac, aes.alt_aln_method,
aes.alt_strand, ae.start_i AS alt_start_i,
ae.end_i AS alt_end_i
FROM (((((uta_20241220.transcript t
JOIN uta_20241220.exon_set tes ON (((t.ac = tes.tx_ac)
AND (tes.alt_aln_method = 'transcript'::text))))
JOIN uta_20241220.exon_set aes ON (((t.ac = aes.tx_ac)
AND (aes.alt_aln_method <> 'transcript'::text))))
JOIN uta_20241220.exon te ON
((tes.exon_set_id = te.exon_set_id)))
JOIN uta_20241220.exon ae ON
(((aes.exon_set_id = ae.exon_set_id)
AND (te.ord = ae.ord))))
LEFT JOIN uta_20241220.exon_aln ea ON
(((te.exon_id = ea.tx_exon_id) AND
(ae.exon_id = ea.alt_exon_id))));

CREATE INDEX alt_pos_index ON uta_20241220.genomic (alt_ac, alt_start_i, alt_end_i);
CREATE INDEX gene_alt_index ON uta_20241220.genomic (hgnc, alt_ac);
CREATE INDEX alt_ac_index ON uta_20241220.genomic (alt_ac);

GRANT CONNECT ON DATABASE uta TO anonymous;
GRANT SELECT, INSERT, UPDATE, DELETE ON ALL TABLES IN SCHEMA uta_20241220 TO anonymous;
ALTER DATABASE uta OWNER TO anonymous;
ALTER SCHEMA uta_20241220 OWNER to anonymous;