|
| 1 | +#!/usr/bin/env python3 |
| 2 | +""" |
| 3 | +Demo of the new JSON-only classification architecture. |
| 4 | + |
| 5 | +This example shows the clean JSON API that replaces DataFrames. |
| 6 | +""" |
| 7 | + |
| 8 | +import sys |
| 9 | +from pathlib import Path |
| 10 | + |
| 11 | +# Add project root to path |
| 12 | +sys.path.insert(0, str(Path(__file__).parent.parent)) |
| 13 | + |
| 14 | +try: |
| 15 | + from piedomains import DataCollector, DomainClassifier |
| 16 | +except ImportError as e: |
| 17 | + print(f"Import error: {e}") |
| 18 | + print("This demo requires the piedomains package to be installed.") |
| 19 | + sys.exit(1) |
| 20 | + |
| 21 | + |
| 22 | +def demo_json_api(): |
| 23 | + """Demonstrate the new JSON-only API.""" |
| 24 | + print("🚀 Piedomains JSON-Only Architecture Demo") |
| 25 | + print("=" * 50) |
| 26 | + |
| 27 | + # Test domains |
| 28 | + domains = ["example.com", "httpbin.org"] |
| 29 | + |
| 30 | + # Create classifier |
| 31 | + classifier = DomainClassifier(cache_dir="demo_cache") |
| 32 | + |
| 33 | + print(f"\n🔤 Testing JSON API with {len(domains)} domains...") |
| 34 | + print("Domains:", domains) |
| 35 | + |
| 36 | + try: |
| 37 | + # Test the new JSON-only classify method |
| 38 | + results = classifier.classify(domains) |
| 39 | + |
| 40 | + print("\n✅ Classification complete!") |
| 41 | + print(f"Result type: {type(results)}") |
| 42 | + print(f"Number of results: {len(results)}") |
| 43 | + |
| 44 | + print("\n📊 Results:") |
| 45 | + for i, result in enumerate(results): |
| 46 | + print(f"\n{i+1}. Domain: {result.get('domain', 'unknown')}") |
| 47 | + print(f" URL: {result.get('url', 'unknown')}") |
| 48 | + print(f" Category: {result.get('category', 'unknown')}") |
| 49 | + print(f" Confidence: {result.get('confidence', 0.0):.3f}") |
| 50 | + print(f" Model Used: {result.get('model_used', 'unknown')}") |
| 51 | + print( |
| 52 | + f" Data Collection Time: {result.get('date_time_collected', 'unknown')}" |
| 53 | + ) |
| 54 | + print(f" Text Path: {result.get('text_path', 'none')}") |
| 55 | + print(f" Image Path: {result.get('image_path', 'none')}") |
| 56 | + |
| 57 | + if result.get("error"): |
| 58 | + print(f" ❌ Error: {result['error']}") |
| 59 | + else: |
| 60 | + print(" ✅ Success") |
| 61 | + |
| 62 | + # Test different classification methods |
| 63 | + print("\n🔤 Testing text-only classification...") |
| 64 | + text_results = classifier.classify_by_text(domains) |
| 65 | + print(f"Text results: {len(text_results)} domains") |
| 66 | + for result in text_results: |
| 67 | + print( |
| 68 | + f" {result['domain']}: {result.get('category', 'error')} " |
| 69 | + f"({result.get('confidence', 0):.3f}) - {result.get('model_used', 'unknown')}" |
| 70 | + ) |
| 71 | + |
| 72 | + print("\n🖼️ Testing image-only classification...") |
| 73 | + image_results = classifier.classify_by_images(domains) |
| 74 | + print(f"Image results: {len(image_results)} domains") |
| 75 | + for result in image_results: |
| 76 | + print( |
| 77 | + f" {result['domain']}: {result.get('category', 'error')} " |
| 78 | + f"({result.get('confidence', 0):.3f}) - {result.get('model_used', 'unknown')}" |
| 79 | + ) |
| 80 | + |
| 81 | + # Show JSON structure |
| 82 | + print("\n📋 JSON Schema Example:") |
| 83 | + if results: |
| 84 | + example_result = results[0] |
| 85 | + import json |
| 86 | + |
| 87 | + print(json.dumps(example_result, indent=2)) |
| 88 | + |
| 89 | + print("\n✅ Demo completed successfully!") |
| 90 | + print("\nKey improvements:") |
| 91 | + print("- 🗂️ Pure JSON output (no pandas dependency)") |
| 92 | + print("- 🔄 Unified data collection → inference pipeline") |
| 93 | + print("- 📁 Clear data file paths for debugging") |
| 94 | + print("- ♻️ Data reuse across multiple classification approaches") |
| 95 | + print("- 🌐 Language-agnostic JSON format") |
| 96 | + |
| 97 | + except Exception as e: |
| 98 | + print(f"❌ Demo failed: {e}") |
| 99 | + import traceback |
| 100 | + |
| 101 | + traceback.print_exc() |
| 102 | + print("\nThis is expected if:") |
| 103 | + print("- Dependencies are missing") |
| 104 | + print("- Network is unavailable") |
| 105 | + print("- ML models aren't downloaded") |
| 106 | + print("\nThe demo shows the API structure even without full functionality.") |
| 107 | + |
| 108 | + |
| 109 | +def demo_separated_workflow(): |
| 110 | + """Show the separated data collection and inference workflow.""" |
| 111 | + print("\n" + "=" * 50) |
| 112 | + print("🔧 Separated Data Collection & Inference Demo") |
| 113 | + print("=" * 50) |
| 114 | + |
| 115 | + domains = ["httpbin.org"] |
| 116 | + |
| 117 | + try: |
| 118 | + print("\n📦 Step 1: Data Collection") |
| 119 | + collector = DataCollector(cache_dir="demo_separated") |
| 120 | + collection_data = collector.collect(domains) |
| 121 | + |
| 122 | + print("✅ Collection complete!") |
| 123 | + print(f" Collection ID: {collection_data['collection_id']}") |
| 124 | + print(f" Successful: {collection_data['summary']['successful']}") |
| 125 | + print(f" Failed: {collection_data['summary']['failed']}") |
| 126 | + |
| 127 | + print("\n🧠 Step 2: Classification") |
| 128 | + classifier = DomainClassifier() |
| 129 | + |
| 130 | + print("Running text classification on collected data...") |
| 131 | + results = classifier.classify_from_collection(collection_data, method="text") |
| 132 | + |
| 133 | + print("✅ Inference complete!") |
| 134 | + for result in results: |
| 135 | + print( |
| 136 | + f" {result['domain']}: {result.get('category', 'error')} " |
| 137 | + f"({result.get('confidence', 0):.3f})" |
| 138 | + ) |
| 139 | + |
| 140 | + print("\n♻️ Data Reuse: The same collected data can now be used with:") |
| 141 | + print(" - Different ML model versions") |
| 142 | + print(" - LLM-based classification") |
| 143 | + print(" - Ensemble approaches") |
| 144 | + print(" - External analysis tools") |
| 145 | + |
| 146 | + except Exception as e: |
| 147 | + print(f"❌ Separated workflow demo failed: {e}") |
| 148 | + |
| 149 | + |
| 150 | +if __name__ == "__main__": |
| 151 | + demo_json_api() |
| 152 | + demo_separated_workflow() |
0 commit comments