Skip to content

Commit c2904e1

Browse files
Nativu5Copilot
andauthored
🔨 Add LMDB maintenance utility (#5)
* Add LMDB rotation script * Update scripts/rotate_lmdb.py Co-authored-by: Copilot <[email protected]> --------- Co-authored-by: Copilot <[email protected]>
1 parent a487b1e commit c2904e1

File tree

3 files changed

+172
-0
lines changed

3 files changed

+172
-0
lines changed

scripts/USAGE.md

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
# Scripts Usage
2+
3+
This directory contains maintenance utilities for the project.
4+
5+
## Table of Contents
6+
7+
- [dump_lmdb.py](#dump_lmdbpy)
8+
- [rotate_lmdb.py](#rotate_lmdbpy)
9+
10+
## dump_lmdb.py
11+
12+
Dump records from an LMDB database as a JSON array. If no keys are provided, the script outputs every record. When keys are supplied, only the specified records are returned.
13+
14+
### Usage
15+
16+
Dump all entries:
17+
18+
```bash
19+
python scripts/dump_lmdb.py /path/to/lmdb
20+
```
21+
22+
Dump specific keys:
23+
24+
```bash
25+
python scripts/dump_lmdb.py /path/to/lmdb key1 key2
26+
```
27+
28+
## rotate_lmdb.py
29+
30+
Delete LMDB records older than a given duration or remove all records.
31+
32+
### Usage
33+
34+
Delete entries older than 14 days:
35+
36+
```bash
37+
python scripts/rotate_lmdb.py /path/to/lmdb 14d
38+
```
39+
40+
Delete all entries:
41+
42+
```bash
43+
python scripts/rotate_lmdb.py /path/to/lmdb all
44+
```

scripts/dump_lmdb.py

Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,58 @@
1+
import argparse
2+
from pathlib import Path
3+
from typing import Any, Iterable, List
4+
5+
import lmdb
6+
import orjson
7+
8+
9+
def _decode_value(value: bytes) -> Any:
10+
"""Decode a value from LMDB to Python data."""
11+
try:
12+
return orjson.loads(value)
13+
except orjson.JSONDecodeError:
14+
return value.decode("utf-8", errors="replace")
15+
16+
17+
def _dump_all(txn: lmdb.Transaction) -> List[dict[str, Any]]:
18+
"""Return all records from the database."""
19+
result: List[dict[str, Any]] = []
20+
for key, value in txn.cursor():
21+
result.append({"key": key.decode("utf-8"), "value": _decode_value(value)})
22+
return result
23+
24+
25+
def _dump_selected(txn: lmdb.Transaction, keys: Iterable[str]) -> List[dict[str, Any]]:
26+
"""Return records for the provided keys."""
27+
result: List[dict[str, Any]] = []
28+
for key in keys:
29+
raw = txn.get(key.encode("utf-8"))
30+
if raw is not None:
31+
result.append({"key": key, "value": _decode_value(raw)})
32+
return result
33+
34+
35+
def dump_lmdb(path: Path, keys: Iterable[str] | None = None) -> None:
36+
"""Print selected or all key-value pairs from the LMDB database."""
37+
env = lmdb.open(str(path), readonly=True, lock=False)
38+
with env.begin() as txn:
39+
if keys:
40+
records = _dump_selected(txn, keys)
41+
else:
42+
records = _dump_all(txn)
43+
env.close()
44+
45+
print(orjson.dumps(records, option=orjson.OPT_INDENT_2).decode())
46+
47+
48+
def main() -> None:
49+
parser = argparse.ArgumentParser(description="Dump LMDB records as JSON")
50+
parser.add_argument("path", type=Path, help="Path to LMDB directory")
51+
parser.add_argument("keys", nargs="*", help="Keys to retrieve")
52+
args = parser.parse_args()
53+
54+
dump_lmdb(args.path, args.keys)
55+
56+
57+
if __name__ == "__main__":
58+
main()

scripts/rotate_lmdb.py

Lines changed: 70 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,70 @@
1+
import argparse
2+
from datetime import datetime, timedelta
3+
from pathlib import Path
4+
from typing import Any
5+
6+
import lmdb
7+
import orjson
8+
9+
10+
def _parse_duration(value: str) -> timedelta:
11+
"""Parse duration in the format '14d' or '24h'."""
12+
if value.endswith("d"):
13+
return timedelta(days=int(value[:-1]))
14+
if value.endswith("h"):
15+
return timedelta(hours=int(value[:-1]))
16+
raise ValueError("Invalid duration format. Use Nd or Nh")
17+
18+
19+
def _should_delete(record: dict[str, Any], threshold: datetime) -> bool:
20+
"""Check if the record is older than the threshold."""
21+
timestamp = record.get("updated_at") or record.get("created_at")
22+
if not timestamp:
23+
return False
24+
try:
25+
ts = datetime.fromisoformat(timestamp)
26+
except ValueError:
27+
return False
28+
return ts < threshold
29+
30+
31+
def rotate_lmdb(path: Path, keep: str) -> None:
32+
"""Remove records older than the specified duration."""
33+
env = lmdb.open(str(path), writemap=True, readahead=False, meminit=False)
34+
if keep == "all":
35+
with env.begin(write=True) as txn:
36+
cursor = txn.cursor()
37+
for key, _ in cursor:
38+
txn.delete(key)
39+
env.close()
40+
return
41+
42+
delta = _parse_duration(keep)
43+
threshold = datetime.now() - delta
44+
45+
with env.begin(write=True) as txn:
46+
cursor = txn.cursor()
47+
for key, value in cursor:
48+
try:
49+
record = orjson.loads(value)
50+
except orjson.JSONDecodeError:
51+
continue
52+
if _should_delete(record, threshold):
53+
txn.delete(key)
54+
env.close()
55+
56+
57+
def main() -> None:
58+
parser = argparse.ArgumentParser(description="Remove outdated LMDB records")
59+
parser.add_argument("path", type=Path, help="Path to LMDB directory")
60+
parser.add_argument(
61+
"keep",
62+
help="Retention period, e.g. 14d or 24h. Use 'all' to delete every record",
63+
)
64+
args = parser.parse_args()
65+
66+
rotate_lmdb(args.path, args.keep)
67+
68+
69+
if __name__ == "__main__":
70+
main()

0 commit comments

Comments
 (0)