diff --git a/.github/workflows/benchmark.yml b/.github/workflows/benchmark.yml index b76334a..c236a4d 100644 --- a/.github/workflows/benchmark.yml +++ b/.github/workflows/benchmark.yml @@ -22,7 +22,7 @@ jobs: runs-on: ubuntu-latest steps: - name: Checkout code - uses: actions/checkout@v3 + uses: actions/checkout@v4 - name: Set up Python uses: actions/setup-python@v4 diff --git a/.github/workflows/release-python.yml b/.github/workflows/release-python.yml index 37eac2e..664b8d6 100644 --- a/.github/workflows/release-python.yml +++ b/.github/workflows/release-python.yml @@ -107,7 +107,7 @@ jobs: python -c 'import imgdd' - name: Upload sdist artifact - uses: actions/upload-artifact@v3 + uses: actions/upload-artifact@v4 with: name: sdist path: target/wheels/*.tar.gz @@ -147,7 +147,7 @@ jobs: steps: - name: Checkout code - uses: actions/checkout@v3 + uses: actions/checkout@v4 - name: Install Rust uses: actions-rs/toolchain@v1 @@ -185,7 +185,7 @@ jobs: ls -l target/wheels/ - name: Upload wheel artifact - uses: actions/upload-artifact@v3 + uses: actions/upload-artifact@v4 with: name: wheels path: target/wheels/*.whl @@ -200,7 +200,7 @@ jobs: if: ${{ !inputs.dry-run }} steps: - name: Download all artifacts - uses: actions/download-artifact@v3 + uses: actions/download-artifact@v4 with: path: root @@ -224,7 +224,7 @@ jobs: if: ${{ inputs.dry-run }} steps: - name: Download all artifacts - uses: actions/download-artifact@v3 + uses: actions/download-artifact@v4 with: path: root diff --git a/Cargo.lock b/Cargo.lock index bef7b35..ed57238 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -492,7 +492,7 @@ dependencies = [ [[package]] name = "imgdd" -version = "0.1.0" +version = "0.1.1" dependencies = [ "anyhow", "criterion", @@ -503,7 +503,7 @@ dependencies = [ [[package]] name = "imgddcore" -version = "0.1.0" +version = "0.1.1" dependencies = [ "anyhow", "codspeed-criterion-compat", @@ -517,7 +517,7 @@ dependencies = [ [[package]] name = "imgddpy" -version = "0.1.0" +version = "0.1.3" dependencies = [ "image", "imgddcore", diff --git a/Cargo.toml b/Cargo.toml index 1c60793..f869de9 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -16,9 +16,11 @@ repository = "https://github.com/aastopher/imgdd" documentation = "https://github.com/aastopher/imgdd" homepage = "https://github.com/aastopher/imgdd" -keywords = ["imagehash", "hash", "perceptual hash", "deduplication", "image deduplication"] +keywords = ["imagehash", "hash", "perceptual", "deduplication", "wavelet"] readme = "./README.md" -categories = ["image-processing", "utilities", "image-hash", "hashing"] +categories = ["algorithms", "computer-vision", "data-structures", "mathematics"] + + [workspace.dependencies] diff --git a/README.md b/README.md index 04d9492..7aa2066 100644 --- a/README.md +++ b/README.md @@ -1,3 +1,6 @@ +[![imgdd pypi](https://img.shields.io/pypi/v/imgdd?label=imgdd%20pypi)](https://pypi.org/project/imgdd) +[![imgdd crate](https://img.shields.io/crates/v/imgdd?label=imgdd)](https://crates.io/crates/imgdd) +[![imgddcore crate](https://img.shields.io/crates/v/imgddcore?label=imgddcore)](https://crates.io/crates/imgddcore) [![codecov](https://codecov.io/gh/aastopher/imgdd/graph/badge.svg?token=XZ1O2X04SO)](https://codecov.io/gh/aastopher/imgdd) [![Documentation Status](https://img.shields.io/badge/docs-online-brightgreen)](https://aastopher.github.io/imgdd/) [![DeepSource](https://app.deepsource.com/gh/aastopher/imgdd.svg/?label=active+issues&show_trend=true&token=IiuhCO6n1pK-GAJ800k6Z_9t)](https://app.deepsource.com/gh/aastopher/imgdd/) diff --git a/crates/imgdd/Cargo.toml b/crates/imgdd/Cargo.toml index 87c4abb..e7a040d 100644 --- a/crates/imgdd/Cargo.toml +++ b/crates/imgdd/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "imgdd" -version = "0.1.0" +version = "0.1.1" edition.workspace = true license.workspace = true authors.workspace = true @@ -9,12 +9,11 @@ repository.workspace = true documentation.workspace = true homepage.workspace = true keywords.workspace = true -readme.workspace = true categories.workspace = true - +readme = "README.md" [dependencies] -imgddcore = { path = "../imgddcore", version = "0.1.0" } +imgddcore = { path = "../imgddcore", version = "0.1.1" } image.workspace = true anyhow.workspace = true criterion = { version = "0.5.1", optional = true } diff --git a/crates/imgdd/README.md b/crates/imgdd/README.md new file mode 100644 index 0000000..e7c3dbb --- /dev/null +++ b/crates/imgdd/README.md @@ -0,0 +1,85 @@ +[![codecov](https://codecov.io/gh/aastopher/imgdd/graph/badge.svg?token=XZ1O2X04SO)](https://codecov.io/gh/aastopher/imgdd) +[![Documentation Status](https://img.shields.io/badge/docs-online-brightgreen)](https://aastopher.github.io/imgdd/) +[![DeepSource](https://app.deepsource.com/gh/aastopher/imgdd.svg/?label=active+issues&show_trend=true&token=IiuhCO6n1pK-GAJ800k6Z_9t)](https://app.deepsource.com/gh/aastopher/imgdd/) + +# imgdd: Image DeDuplication + +`imgdd` is a performance-first perceptual hashing library that combines Rust's speed with Python's accessibility, making it perfect for handling large datasets. Designed to quickly process nested folder structures, commonly found in image datasets. + +## Features +- **Multiple Hashing Algorithms**: Supports `aHash`, `dHash`, `mHash`, `pHash`, `wHash`. +- **Multiple Filter Types**: Supports `Nearest`, `Triangle`, `CatmullRom`, `Gaussian`, `Lanczos3`. +- **Identify Duplicates**: Quickly identify duplicate hash pairs. +- **Simplicity**: Simple interface, robust performance. + +## Why imgdd? + +`imgdd` has been inspired by [imagehash](https://github.com/JohannesBuchner/imagehash) and aims to be a lightning-fast replacement with additional features. To ensure enhanced performance, `imgdd` has been benchmarked against `imagehash`. In Python, **imgdd consistently outperforms imagehash by ~60%–95%**, demonstrating a significant reduction in hashing time per image. + +--- + +# Quick Start + +## Installation + +```bash +pip install imgdd +``` + +## Usage Examples + +### Hash Images + +```rust +use imgdd::*; +use std::path::PathBuf; + +let result = hash( + PathBuf::from("path/to/images"), + Some("Triangle"), // Optional: default = "Triangle" + Some("dHash"), // Optional: default = "dHash" + Some(false), // Optional: default = false +); +println!("{:#?}", result); +``` + +### Find Duplicates + +```rust +use imgdd::*; +use std::path::PathBuf; + +let result = dupes( + PathBuf::from("path/to/images"), + Some("Triangle"), // Optional: default = "Triangle" + Some("dHash"), // Optional: default = "dHash" + false, +); +println!("{:#?}", result); +```(duplicates) +``` + +## Supported Algorithms +- **aHash**: Average Hash +- **mHash**: Median Hash +- **dHash**: Difference Hash +- **pHash**: Perceptual Hash +- **wHash**: Wavelet Hash + +## Supported Filters +- `Nearest`, `Triangle`, `CatmullRom`, `Gaussian`, `Lanczos3` + +## Contributing +Contributions are always welcome! 🚀 + +Found a bug or have a question? Open a GitHub issue. Pull requests for new features or fixes are encouraged! + +## Similar projects +- https://github.com/JohannesBuchner/imagehash +- https://github.com/commonsmachinery/blockhash-python +- https://github.com/acoomans/instagram-filters +- https://pippy360.github.io/transformationInvariantImageSearch/ +- https://www.phash.org/ +- https://pypi.org/project/dhash/ +- https://github.com/thorn-oss/perception (based on imagehash code, depends on opencv) +- https://docs.opencv.org/3.4/d4/d93/group__img__hash.html diff --git a/crates/imgddcore/Cargo.toml b/crates/imgddcore/Cargo.toml index dd10ac9..8e08008 100644 --- a/crates/imgddcore/Cargo.toml +++ b/crates/imgddcore/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "imgddcore" -version = "0.1.0" +version = "0.1.1" edition.workspace = true license.workspace = true authors.workspace = true @@ -8,6 +8,8 @@ description.workspace = true repository.workspace = true documentation.workspace = true homepage.workspace = true +keywords.workspace = true +categories.workspace = true readme = "README.md" [dependencies] diff --git a/crates/imgddpy/Cargo.toml b/crates/imgddpy/Cargo.toml index 810cb29..f86625d 100644 --- a/crates/imgddpy/Cargo.toml +++ b/crates/imgddpy/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "imgddpy" -version = "0.1.0" +version = "0.1.3" edition.workspace = true license.workspace = true authors.workspace = true @@ -9,9 +9,10 @@ description.workspace = true repository.workspace = true documentation.workspace = true homepage.workspace = true +readme = "README.md" [dependencies] -imgddcore = { path = "../imgddcore", version = "0.1.0" } +imgddcore = { path = "../imgddcore", version = "0.1.1" } pyo3 = { version = "0.23", features = ["extension-module", "abi3-py39"] } image.workspace = true diff --git a/crates/imgddpy/README.md b/crates/imgddpy/README.md new file mode 100644 index 0000000..7aa2066 --- /dev/null +++ b/crates/imgddpy/README.md @@ -0,0 +1,85 @@ +[![imgdd pypi](https://img.shields.io/pypi/v/imgdd?label=imgdd%20pypi)](https://pypi.org/project/imgdd) +[![imgdd crate](https://img.shields.io/crates/v/imgdd?label=imgdd)](https://crates.io/crates/imgdd) +[![imgddcore crate](https://img.shields.io/crates/v/imgddcore?label=imgddcore)](https://crates.io/crates/imgddcore) +[![codecov](https://codecov.io/gh/aastopher/imgdd/graph/badge.svg?token=XZ1O2X04SO)](https://codecov.io/gh/aastopher/imgdd) +[![Documentation Status](https://img.shields.io/badge/docs-online-brightgreen)](https://aastopher.github.io/imgdd/) +[![DeepSource](https://app.deepsource.com/gh/aastopher/imgdd.svg/?label=active+issues&show_trend=true&token=IiuhCO6n1pK-GAJ800k6Z_9t)](https://app.deepsource.com/gh/aastopher/imgdd/) + +# imgdd: Image DeDuplication + +`imgdd` is a performance-first perceptual hashing library that combines Rust's speed with Python's accessibility, making it perfect for handling large datasets. Designed to quickly process nested folder structures, commonly found in image datasets. + +## Features +- **Multiple Hashing Algorithms**: Supports `aHash`, `dHash`, `mHash`, `pHash`, `wHash`. +- **Multiple Filter Types**: Supports `Nearest`, `Triangle`, `CatmullRom`, `Gaussian`, `Lanczos3`. +- **Identify Duplicates**: Quickly identify duplicate hash pairs. +- **Simplicity**: Simple interface, robust performance. + +## Why imgdd? + +`imgdd` has been inspired by [imagehash](https://github.com/JohannesBuchner/imagehash) and aims to be a lightning-fast replacement with additional features. To ensure enhanced performance, `imgdd` has been benchmarked against `imagehash`. In Python, **imgdd consistently outperforms imagehash by ~60%–95%**, demonstrating a significant reduction in hashing time per image. + +--- + +# Quick Start + +## Installation + +```bash +pip install imgdd +``` + +## Usage Examples + +### Hash Images + +```python +import imgdd as dd + +results = dd.hash( + path="path/to/images", + algo="dhash", # Optional: default = dhash + filter="triangle" # Optional: default = triangle + sort=False # Optional: default = False +) +print(results) +``` + +### Find Duplicates + +```python +import imgdd as dd + +duplicates = dd.dupes( + path="path/to/images", + algo="dhash", # Optional: default = dhash + filter="triangle", # Optional: default = triangle + remove=False # Optional: default = False +) +print(duplicates) +``` + +## Supported Algorithms +- **aHash**: Average Hash +- **mHash**: Median Hash +- **dHash**: Difference Hash +- **pHash**: Perceptual Hash +- **wHash**: Wavelet Hash + +## Supported Filters +- `Nearest`, `Triangle`, `CatmullRom`, `Gaussian`, `Lanczos3` + +## Contributing +Contributions are always welcome! 🚀 + +Found a bug or have a question? Open a GitHub issue. Pull requests for new features or fixes are encouraged! + +## Similar projects +- https://github.com/JohannesBuchner/imagehash +- https://github.com/commonsmachinery/blockhash-python +- https://github.com/acoomans/instagram-filters +- https://pippy360.github.io/transformationInvariantImageSearch/ +- https://www.phash.org/ +- https://pypi.org/project/dhash/ +- https://github.com/thorn-oss/perception (based on imagehash code, depends on opencv) +- https://docs.opencv.org/3.4/d4/d93/group__img__hash.html diff --git a/crates/imgddpy/docs/index.md b/crates/imgddpy/docs/index.md index 4f36093..801eebb 100644 --- a/crates/imgddpy/docs/index.md +++ b/crates/imgddpy/docs/index.md @@ -1,3 +1,5 @@ +[![imgdd crate](https://img.shields.io/crates/v/imgdd?label=imgdd)](https://crates.io/crates/imgdd) +[![imgddcore crate](https://img.shields.io/crates/v/imgddcore?label=imgddcore)](https://crates.io/crates/imgddcore) [![codecov](https://codecov.io/gh/aastopher/imgdd/graph/badge.svg?token=XZ1O2X04SO)](https://codecov.io/gh/aastopher/imgdd) [![Documentation Status](https://img.shields.io/badge/docs-online-brightgreen)](https://aastopher.github.io/imgdd/) [![DeepSource](https://app.deepsource.com/gh/aastopher/imgdd.svg/?label=active+issues&show_trend=true&token=IiuhCO6n1pK-GAJ800k6Z_9t)](https://app.deepsource.com/gh/aastopher/imgdd/) diff --git a/crates/imgddpy/pyproject.toml b/crates/imgddpy/pyproject.toml index 8631ec0..67b6dc0 100644 --- a/crates/imgddpy/pyproject.toml +++ b/crates/imgddpy/pyproject.toml @@ -4,13 +4,14 @@ build-backend = "maturin" [project] name = "imgdd" -version = "0.1.0" +version = "0.1.3" description = """ Performance-first perceptual hashing library; perfect for handling large datasets. Designed to quickly process nested folder structures, commonly found in image datasets """ requires-python = ">=3.9" license = { file = "LICENSE" } +dynamic = ["readme"] keywords = [ "rust", "imagehash", @@ -41,6 +42,9 @@ classifiers = [ "Topic :: Scientific/Engineering", ] +[tool.setuptools.dynamic] +readme = { file = "README.md", content-type = "text/markdown"} + [project.urls] homepage = "https://github.com/aastopher/imgdd" documentation = "https://github.com/aastopher/imgdd" @@ -65,4 +69,6 @@ test = [ [tool.maturin] bindings = "pyo3" -module-name = "imgdd" \ No newline at end of file +module-name = "imgdd" +include = ["LICENSE", "README.md"] +sdist-include = ["LICENSE", "README.md"] \ No newline at end of file