Skip to content

Commit b9d7bd7

Browse files
committed
Deploying to gh-pages from @ 5bb61cf 🚀
1 parent d16c2fc commit b9d7bd7

File tree

6 files changed

+62
-8
lines changed

6 files changed

+62
-8
lines changed

assets/python/pca_migration.py

Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,54 @@
1+
import time
2+
import faiss
3+
import numpy as np
4+
from sklearn.datasets import fetch_openml
5+
from sklearn.model_selection import train_test_split
6+
from sklearn.decomposition import PCA
7+
8+
9+
def sklearn_pca_to_faiss(skl_pca) -> faiss.PCAMatrix:
10+
d_in = skl_pca.components_.shape[1]
11+
d_out = skl_pca.n_components_
12+
13+
# Build A: rows are components; include whitening if requested
14+
if getattr(skl_pca, "whiten", False):
15+
scale = np.sqrt(skl_pca.explained_variance_)[:, None]
16+
A = (skl_pca.components_ / scale).astype(np.float32)
17+
else:
18+
A = skl_pca.components_.astype(np.float32)
19+
20+
faiss_pca = faiss.PCAMatrix(d_in, d_out, 0.0, False) # eigen_power handled manually
21+
faiss.copy_array_to_vector(A.reshape(-1), faiss_pca.A)
22+
23+
mean = skl_pca.mean_.astype(np.float32)
24+
faiss.copy_array_to_vector(mean.reshape(-1), faiss_pca.mean)
25+
26+
# Choose bias so that X @ A^T + b == (X - mean) @ A^T
27+
b = -mean @ A.T # shape (d_out,)
28+
faiss.copy_array_to_vector(b.reshape(-1), faiss_pca.b)
29+
30+
faiss_pca.is_trained = True
31+
return faiss_pca
32+
33+
X, y = fetch_openml(data_id=41082, as_frame=False, return_X_y=True)
34+
X_train, X_test, y_train, y_test = train_test_split(X, y, stratify=y, random_state=42, test_size=3_000)
35+
36+
skl_pca = PCA(n_components=32, random_state=42)
37+
skl_pca.fit(X_train)
38+
39+
faiss_pca = sklearn_pca_to_faiss(skl_pca)
40+
41+
X = np.random.randn(1_000_000, faiss_pca.d_in).astype(np.float32)
42+
np.testing.assert_allclose(skl_pca.transform(X), faiss_pca.apply_py(X), atol=1e-5)
43+
np.testing.assert_allclose(skl_pca.transform(X_train), faiss_pca.apply_py(X_train), atol=1e-5)
44+
np.testing.assert_allclose(skl_pca.transform(X_test), faiss_pca.apply_py(X_test), atol=1e-5)
45+
print("OK: sklearn == faiss")
46+
47+
# sklearn
48+
t0 = time.perf_counter(); _ = skl_pca.transform(X); t1 = time.perf_counter()
49+
# faiss
50+
t2 = time.perf_counter(); _ = faiss_pca.apply_py(X); t3 = time.perf_counter()
51+
52+
print(f"sklearn.transform: {(t1-t0):.3f}s | {(X.shape[0]/(t1-t0)):.0f} vec/s")
53+
print(f"faiss.apply_py : {(t3-t2):.3f}s | {(X.shape[0]/(t3-t2)):.0f} vec/s")
54+
print(f"Speedup: {((t1-t0)/(t3-t2)):.1f}x")

blog/2024/dvc-fix/index.html

Lines changed: 2 additions & 2 deletions
Large diffs are not rendered by default.

blog/2025/sklearn-faiss/index.html

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -73,6 +73,6 @@
7373
<span class="nf">print</span><span class="p">(</span><span class="sa">f</span><span class="sh">"</span><span class="s">sklearn.transform: </span><span class="si">{</span><span class="p">(</span><span class="n">t1</span><span class="o">-</span><span class="n">t0</span><span class="p">)</span><span class="si">:</span><span class="p">.</span><span class="mi">3</span><span class="n">f</span><span class="si">}</span><span class="s">s | </span><span class="si">{</span><span class="p">(</span><span class="n">X</span><span class="p">.</span><span class="n">shape</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span><span class="o">/</span><span class="p">(</span><span class="n">t1</span><span class="o">-</span><span class="n">t0</span><span class="p">))</span><span class="si">:</span><span class="p">.</span><span class="mi">0</span><span class="n">f</span><span class="si">}</span><span class="s"> vec/s</span><span class="sh">"</span><span class="p">)</span>
7474
<span class="nf">print</span><span class="p">(</span><span class="sa">f</span><span class="sh">"</span><span class="s">faiss.apply_py : </span><span class="si">{</span><span class="p">(</span><span class="n">t3</span><span class="o">-</span><span class="n">t2</span><span class="p">)</span><span class="si">:</span><span class="p">.</span><span class="mi">3</span><span class="n">f</span><span class="si">}</span><span class="s">s | </span><span class="si">{</span><span class="p">(</span><span class="n">X</span><span class="p">.</span><span class="n">shape</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span><span class="o">/</span><span class="p">(</span><span class="n">t3</span><span class="o">-</span><span class="n">t2</span><span class="p">))</span><span class="si">:</span><span class="p">.</span><span class="mi">0</span><span class="n">f</span><span class="si">}</span><span class="s"> vec/s</span><span class="sh">"</span><span class="p">)</span>
7575
<span class="nf">print</span><span class="p">(</span><span class="sa">f</span><span class="sh">"</span><span class="s">Speedup: </span><span class="si">{</span><span class="p">((</span><span class="n">t1</span><span class="o">-</span><span class="n">t0</span><span class="p">)</span><span class="o">/</span><span class="p">(</span><span class="n">t3</span><span class="o">-</span><span class="n">t2</span><span class="p">))</span><span class="si">:</span><span class="p">.</span><span class="mi">1</span><span class="n">f</span><span class="si">}</span><span class="s">x</span><span class="sh">"</span><span class="p">)</span>
76-
</code></pre></div></div> <p>In this example, a 1.2x speedup was achieved. See the complete code <a href="https://github.com/barufa/barufa.github.io/blob/main/assets/pca_migration.py" rel="external nofollow noopener" target="_blank">here</a>.</p> <h2 id="conclusion">Conclusion</h2> <p>Migrating from <code class="language-plaintext highlighter-rouge">scikit-learn</code> to <code class="language-plaintext highlighter-rouge">Faiss</code> for PCA application is a straightforward optimization with real-world impact. You can keep sklearn for training and validation, then deploy the exact same projection using Faiss—boosting inference performance without retraining.</p> <p>This method is simple, deterministic, and production-ready. And with just a few lines of code, you bridge the gap between experimentation and scalable deployment.</p> </div> </article> <br> <hr> <br> <ul class="list-disc pl-8"></ul> <h2 class="text-3xl font-semibold mb-4 mt-12">Enjoy Reading This Article?</h2> <p class="mb-2">Here are some more articles you might like to read next:</p> <li class="my-2"> <a class="text-pink-700 underline font-semibold hover:text-pink-800" href="/blog/2024/start-ml-project/">How to Start a Machine Learning Project Before Starting a Machine Learning Project</a> </li> <li class="my-2"> <a class="text-pink-700 underline font-semibold hover:text-pink-800" href="/blog/2024/dvc-fix/">DVC + Many Files: A Strategy for Efficient Large Dataset Management</a> </li> </div> </div> <footer class="fixed-bottom" role="contentinfo"> <div class="container mt-0"> © Copyright 2025 Bruno A. Bruno Baruffaldi. Powered by <a href="https://jekyllrb.com/" target="_blank" rel="external nofollow noopener">Jekyll</a> with <a href="https://github.com/alshedivat/al-folio" rel="external nofollow noopener" target="_blank">al-folio</a> theme. Hosted by <a href="https://pages.github.com/" target="_blank" rel="external nofollow noopener">GitHub Pages</a>. </div> </footer> <script src="https://cdn.jsdelivr.net/npm/[email protected]/dist/jquery.min.js" integrity="sha256-/xUj+3OJU5yExlq6GSYGSHk7tPXikynS7ogEvDej/m4=" crossorigin="anonymous"></script> <script src="/assets/js/bootstrap.bundle.min.js"></script> <script src="https://cdn.jsdelivr.net/npm/[email protected]/js/mdb.min.js" integrity="sha256-NdbiivsvWt7VYCt6hYNT3h/th9vSTL4EDWeGs5SN3DA=" crossorigin="anonymous"></script> <script defer src="https://cdn.jsdelivr.net/npm/[email protected]/dist/masonry.pkgd.min.js" integrity="sha256-Nn1q/fx0H7SNLZMQ5Hw5JLaTRZp0yILA/FRexe19VdI=" crossorigin="anonymous"></script> <script defer src="https://cdn.jsdelivr.net/npm/[email protected]/imagesloaded.pkgd.min.js" integrity="sha256-htrLFfZJ6v5udOG+3kNLINIKh2gvoKqwEhHYfTTMICc=" crossorigin="anonymous"></script> <script defer src="/assets/js/masonry.js?a0db7e5d5c70cc3252b3138b0c91dcaf" type="text/javascript"></script> <script defer src="https://cdn.jsdelivr.net/npm/[email protected]/dist/medium-zoom.min.js" integrity="sha256-ZgMyDAIYDYGxbcpJcfUnYwNevG/xi9OHKaR/8GK+jWc=" crossorigin="anonymous"></script> <script defer src="/assets/js/zoom.js?85ddb88934d28b74e78031fd54cf8308"></script> <script src="/assets/js/no_defer.js?2781658a0a2b13ed609542042a859126"></script> <script defer src="/assets/js/common.js?e0514a05c5c95ac1a93a8dfd5249b92e"></script> <script defer src="/assets/js/copy_code.js?c8a01c11a92744d44b093fc3bda915df" type="text/javascript"></script> <script defer src="/assets/js/jupyter_new_tab.js?d9f17b6adc2311cbabd747f4538bb15f"></script> <script async src="https://d1bxh8uas1mnw7.cloudfront.net/assets/embed.js"></script> <script async src="https://badge.dimensions.ai/badge.js"></script> <script defer type="text/javascript" id="MathJax-script" src="https://cdn.jsdelivr.net/npm/[email protected]/es5/tex-mml-chtml.js" integrity="sha256-MASABpB4tYktI2Oitl4t+78w/lyA+D7b/s9GEP0JOGI=" crossorigin="anonymous"></script> <script src="/assets/js/mathjax-setup.js?a5bb4e6a542c546dd929b24b8b236dfd"></script> <script defer src="https://cdnjs.cloudflare.com/polyfill/v3/polyfill.min.js?features=es6" crossorigin="anonymous"></script> <script defer src="/assets/js/progress-bar.js?2f30e0e6801ea8f5036fa66e1ab0a71a" type="text/javascript"></script> <script src="/assets/js/vanilla-back-to-top.min.js?eaf77346e117baa09987a278a117b9a7"></script> <script>
76+
</code></pre></div></div> <p>See the complete code <a href="https://github.com/barufa/barufa.github.io/blob/main/assets/python/pca_migration.py" rel="external nofollow noopener" target="_blank">here</a>.</p> <p>Benchmarking on an <a href="&lt;(https://www.asus.com/us/laptops/for-home/zenbook/zenbook-14-q407/techspec/)">Asus Zenbook 14</a> showed throughput rising from 803,290 vec/s to 1,418,897 vec/s — roughly a 1.77× improvement.</p> <h2 id="conclusion">Conclusion</h2> <p>Migrating from <code class="language-plaintext highlighter-rouge">scikit-learn</code> to <code class="language-plaintext highlighter-rouge">Faiss</code> for PCA application is a straightforward optimization with real-world impact. You can keep sklearn for training and validation, then deploy the exact same projection using Faiss—boosting inference performance without retraining.</p> <p>This method is simple, deterministic, and production-ready. And with just a few lines of code, you bridge the gap between experimentation and scalable deployment.</p> </div> </article> <br> <hr> <br> <ul class="list-disc pl-8"></ul> <h2 class="text-3xl font-semibold mb-4 mt-12">Enjoy Reading This Article?</h2> <p class="mb-2">Here are some more articles you might like to read next:</p> <li class="my-2"> <a class="text-pink-700 underline font-semibold hover:text-pink-800" href="/blog/2024/start-ml-project/">How to Start a Machine Learning Project Before Starting a Machine Learning Project</a> </li> <li class="my-2"> <a class="text-pink-700 underline font-semibold hover:text-pink-800" href="/blog/2024/dvc-fix/">DVC + Many Files: A Strategy for Efficient Large Dataset Management</a> </li> </div> </div> <footer class="fixed-bottom" role="contentinfo"> <div class="container mt-0"> © Copyright 2025 Bruno A. Bruno Baruffaldi. Powered by <a href="https://jekyllrb.com/" target="_blank" rel="external nofollow noopener">Jekyll</a> with <a href="https://github.com/alshedivat/al-folio" rel="external nofollow noopener" target="_blank">al-folio</a> theme. Hosted by <a href="https://pages.github.com/" target="_blank" rel="external nofollow noopener">GitHub Pages</a>. </div> </footer> <script src="https://cdn.jsdelivr.net/npm/[email protected]/dist/jquery.min.js" integrity="sha256-/xUj+3OJU5yExlq6GSYGSHk7tPXikynS7ogEvDej/m4=" crossorigin="anonymous"></script> <script src="/assets/js/bootstrap.bundle.min.js"></script> <script src="https://cdn.jsdelivr.net/npm/[email protected]/js/mdb.min.js" integrity="sha256-NdbiivsvWt7VYCt6hYNT3h/th9vSTL4EDWeGs5SN3DA=" crossorigin="anonymous"></script> <script defer src="https://cdn.jsdelivr.net/npm/[email protected]/dist/masonry.pkgd.min.js" integrity="sha256-Nn1q/fx0H7SNLZMQ5Hw5JLaTRZp0yILA/FRexe19VdI=" crossorigin="anonymous"></script> <script defer src="https://cdn.jsdelivr.net/npm/[email protected]/imagesloaded.pkgd.min.js" integrity="sha256-htrLFfZJ6v5udOG+3kNLINIKh2gvoKqwEhHYfTTMICc=" crossorigin="anonymous"></script> <script defer src="/assets/js/masonry.js?a0db7e5d5c70cc3252b3138b0c91dcaf" type="text/javascript"></script> <script defer src="https://cdn.jsdelivr.net/npm/[email protected]/dist/medium-zoom.min.js" integrity="sha256-ZgMyDAIYDYGxbcpJcfUnYwNevG/xi9OHKaR/8GK+jWc=" crossorigin="anonymous"></script> <script defer src="/assets/js/zoom.js?85ddb88934d28b74e78031fd54cf8308"></script> <script src="/assets/js/no_defer.js?2781658a0a2b13ed609542042a859126"></script> <script defer src="/assets/js/common.js?e0514a05c5c95ac1a93a8dfd5249b92e"></script> <script defer src="/assets/js/copy_code.js?c8a01c11a92744d44b093fc3bda915df" type="text/javascript"></script> <script defer src="/assets/js/jupyter_new_tab.js?d9f17b6adc2311cbabd747f4538bb15f"></script> <script async src="https://d1bxh8uas1mnw7.cloudfront.net/assets/embed.js"></script> <script async src="https://badge.dimensions.ai/badge.js"></script> <script defer type="text/javascript" id="MathJax-script" src="https://cdn.jsdelivr.net/npm/[email protected]/es5/tex-mml-chtml.js" integrity="sha256-MASABpB4tYktI2Oitl4t+78w/lyA+D7b/s9GEP0JOGI=" crossorigin="anonymous"></script> <script src="/assets/js/mathjax-setup.js?a5bb4e6a542c546dd929b24b8b236dfd"></script> <script defer src="https://cdnjs.cloudflare.com/polyfill/v3/polyfill.min.js?features=es6" crossorigin="anonymous"></script> <script defer src="/assets/js/progress-bar.js?2f30e0e6801ea8f5036fa66e1ab0a71a" type="text/javascript"></script> <script src="/assets/js/vanilla-back-to-top.min.js?eaf77346e117baa09987a278a117b9a7"></script> <script>
7777
addBackToTop();
7878
</script> <script type="module" src="/assets/js/search/ninja-keys.min.js?f8abf2f636f242d077f24149a0a56c96"></script> <ninja-keys hidebreadcrumbs noautoloadmdicons placeholder="Type to start searching"></ninja-keys> <script src="/assets/js/search-setup.js?6c304f7b1992d4b60f7a07956e52f04a"></script> <script src="/assets/js/search-data.js"></script> <script src="/assets/js/shortcut-key.js?6f508d74becd347268a7f822bca7309d"></script> </body> </html>

0 commit comments

Comments
 (0)