diff --git a/.gitignore b/.gitignore index cf48326..a35bce9 100644 --- a/.gitignore +++ b/.gitignore @@ -69,4 +69,5 @@ yarn-error.log .yarn-integrity .idea/ -package-lock.json \ No newline at end of file +package-lock.json +.history \ No newline at end of file diff --git a/scripts/guru_portfolio/13f_portfolio_visualization.egg-info/PKG-INFO b/scripts/guru_portfolio/13f_portfolio_visualization.egg-info/PKG-INFO new file mode 100644 index 0000000..ff65476 --- /dev/null +++ b/scripts/guru_portfolio/13f_portfolio_visualization.egg-info/PKG-INFO @@ -0,0 +1,141 @@ +Metadata-Version: 2.4 +Name: 13f-portfolio-visualization +Version: 0.1.0 +Summary: 13F Portfolio Data Extraction and Visualization Tool +Author-email: Finance Guru Team +Keywords: finance,portfolio,visualization,13f,investment +Classifier: Development Status :: 3 - Alpha +Classifier: Intended Audience :: Financial and Insurance Industry +Classifier: Topic :: Office/Business :: Financial :: Investment +Classifier: Programming Language :: Python :: 3 +Classifier: Programming Language :: Python :: 3.8 +Classifier: Programming Language :: Python :: 3.9 +Classifier: Programming Language :: Python :: 3.10 +Classifier: Programming Language :: Python :: 3.11 +Requires-Python: >=3.8 +Description-Content-Type: text/markdown +Requires-Dist: pandas>=2.0.0 +Requires-Dist: plotly>=5.14.0 +Requires-Dist: numpy>=1.24.0 +Requires-Dist: requests>=2.28.0 +Requires-Dist: beautifulsoup4>=4.11.0 +Requires-Dist: lxml>=4.9.0 +Provides-Extra: dev +Requires-Dist: pytest>=7.0.0; extra == "dev" +Requires-Dist: black>=23.0.0; extra == "dev" +Requires-Dist: flake8>=6.0.0; extra == "dev" +Requires-Dist: mypy>=1.0.0; extra == "dev" + +# 13F Portfolio Visualization Tool + +A Python tool for fetching and visualizing institutional investor portfolio data from 13F filings via [13f.info](https://13f.info). + +## Features + +- πŸ“Š **Data Extraction**: Automatically fetches portfolio data from 13f.info +- πŸ“ˆ **Interactive Visualizations**: + - Treemap visualization of portfolio holdings + - Sector allocation pie chart + - Top holdings bar chart +- πŸ“ **Comprehensive Reports**: Generates detailed markdown reports with portfolio metrics +- πŸ” **Portfolio Analysis**: + - Concentration metrics (HHI, Gini coefficient) + - Sector breakdown + - Top holdings analysis + +## Installation + +1. Clone the repository: +```bash +git clone +cd guru_portfolio +``` + +2. Install dependencies: +```bash +pip install -r requirements.txt +``` + +Or using the pyproject.toml: +```bash +pip install -e . +``` + +## Usage + +Basic usage: +```bash +python main.py "Company Name" "Quarter" +``` + +### Examples + +```bash +# Berkshire Hathaway Q3 2024 +python main.py "Berkshire Hathaway Inc" "Q3 2024" + +# Bridgewater Associates Q2 2024 with custom output directory +python main.py "Bridgewater Associates" "Q2 2024" --output-dir results/ + +# Scion Asset Management with top 30 holdings +python main.py "Scion Asset Management" "Q4 2023" --top-n 30 +``` + +### Command Line Options + +- `company`: Company name (must match exactly as shown on 13f.info) +- `quarter`: Quarter in format "Q1 2025" +- `--output-dir`, `-o`: Output directory for results (default: `output`) +- `--top-n`, `-n`: Number of top holdings to show in bar chart (default: 20) +- `--no-visualizations`: Skip creating visualizations (only generate report) + +## Output + +The tool generates the following files in the output directory: + +1. **portfolio_analysis.md**: Comprehensive markdown report including: + - Executive summary + - Portfolio metrics and concentration analysis + - Sector breakdown + - Top 20 holdings table + - Analysis notes + +2. **portfolio_treemap.html**: Interactive treemap visualization showing all holdings sized by market value + +3. **sector_allocation.html**: Pie chart showing portfolio allocation by sector + +4. **top_holdings.html**: Bar chart of top N holdings by portfolio weight + +## Example Report + +The generated markdown report includes: + +- Total portfolio value and number of positions +- Concentration metrics (HHI, Gini coefficient, top holdings concentration) +- Sector allocation breakdown +- Detailed holdings table with shares, value, and weights +- Analytical insights based on portfolio characteristics + +## Notes + +- Company names must match exactly as they appear on 13f.info +- 13F reports are typically filed 45 days after quarter end +- Only institutional investment managers with >$100M AUM are required to file 13F reports +- The tool uses web scraping, so it may need updates if the website structure changes + +## Dependencies + +- pandas: Data manipulation and analysis +- plotly: Interactive visualizations +- requests: HTTP requests for web scraping +- beautifulsoup4: HTML parsing +- lxml: XML/HTML parser +- numpy: Numerical computations + +## License + +[Your License Here] + +## Contributing + +Contributions are welcome! Please feel free to submit a Pull Request. diff --git a/scripts/guru_portfolio/13f_portfolio_visualization.egg-info/SOURCES.txt b/scripts/guru_portfolio/13f_portfolio_visualization.egg-info/SOURCES.txt new file mode 100644 index 0000000..548b4c4 --- /dev/null +++ b/scripts/guru_portfolio/13f_portfolio_visualization.egg-info/SOURCES.txt @@ -0,0 +1,13 @@ +README.md +pyproject.toml +13f_portfolio_visualization.egg-info/PKG-INFO +13f_portfolio_visualization.egg-info/SOURCES.txt +13f_portfolio_visualization.egg-info/dependency_links.txt +13f_portfolio_visualization.egg-info/entry_points.txt +13f_portfolio_visualization.egg-info/requires.txt +13f_portfolio_visualization.egg-info/top_level.txt +modules/__init__.py +modules/data_fetcher.py +modules/data_processor.py +modules/report_generator.py +modules/visualizer.py \ No newline at end of file diff --git a/scripts/guru_portfolio/13f_portfolio_visualization.egg-info/dependency_links.txt b/scripts/guru_portfolio/13f_portfolio_visualization.egg-info/dependency_links.txt new file mode 100644 index 0000000..8b13789 --- /dev/null +++ b/scripts/guru_portfolio/13f_portfolio_visualization.egg-info/dependency_links.txt @@ -0,0 +1 @@ + diff --git a/scripts/guru_portfolio/13f_portfolio_visualization.egg-info/entry_points.txt b/scripts/guru_portfolio/13f_portfolio_visualization.egg-info/entry_points.txt new file mode 100644 index 0000000..25c35b1 --- /dev/null +++ b/scripts/guru_portfolio/13f_portfolio_visualization.egg-info/entry_points.txt @@ -0,0 +1,2 @@ +[console_scripts] +portfolio-viz = main:main diff --git a/scripts/guru_portfolio/13f_portfolio_visualization.egg-info/requires.txt b/scripts/guru_portfolio/13f_portfolio_visualization.egg-info/requires.txt new file mode 100644 index 0000000..28cf719 --- /dev/null +++ b/scripts/guru_portfolio/13f_portfolio_visualization.egg-info/requires.txt @@ -0,0 +1,12 @@ +pandas>=2.0.0 +plotly>=5.14.0 +numpy>=1.24.0 +requests>=2.28.0 +beautifulsoup4>=4.11.0 +lxml>=4.9.0 + +[dev] +pytest>=7.0.0 +black>=23.0.0 +flake8>=6.0.0 +mypy>=1.0.0 diff --git a/scripts/guru_portfolio/13f_portfolio_visualization.egg-info/top_level.txt b/scripts/guru_portfolio/13f_portfolio_visualization.egg-info/top_level.txt new file mode 100644 index 0000000..455f411 --- /dev/null +++ b/scripts/guru_portfolio/13f_portfolio_visualization.egg-info/top_level.txt @@ -0,0 +1 @@ +modules diff --git a/scripts/guru_portfolio/README.md b/scripts/guru_portfolio/README.md new file mode 100644 index 0000000..71bfa1f --- /dev/null +++ b/scripts/guru_portfolio/README.md @@ -0,0 +1,180 @@ +# 13F Portfolio Visualization Tool + +A Python tool for fetching and visualizing institutional investor portfolio data from 13F filings via [13f.info](https://13f.info). + +## Features + +- πŸ“Š **Data Extraction**: Automatically fetches portfolio data from 13f.info +- πŸ“ˆ **Interactive Visualizations**: + - Treemap visualization of portfolio holdings + - Sector allocation pie chart + - Top holdings bar chart + - Performance comparison with S&P 500 benchmark + - Quarterly returns comparison + - Risk-adjusted metrics visualization +- πŸ“ **Comprehensive Reports**: Generates detailed markdown reports with portfolio metrics +- πŸ” **Portfolio Analysis**: + - Concentration metrics (HHI, Gini coefficient) + - Sector breakdown + - Top holdings analysis + - Performance vs S&P 500 benchmark + - Risk metrics (Sharpe ratio, beta, volatility) + +## Installation + +1. Clone the repository: +```bash +git clone +cd guru_portfolio +``` + +2. Install dependencies: +```bash +pip install -r requirements.txt +``` + +Or using the pyproject.toml: +```bash +pip install -e . +``` + +## Usage + +Basic usage: +```bash +python main.py "Company Name" "Quarter" +``` + +### Examples + +```bash +# Berkshire Hathaway Q3 2024 +python main.py "Berkshire Hathaway Inc" "Q3 2024" + +# Bridgewater Associates Q2 2024 with custom output directory +python main.py "Bridgewater Associates" "Q2 2024" --output-dir results/ + +# Scion Asset Management with top 30 holdings +python main.py "Scion Asset Management" "Q4 2023" --top-n 30 + +# ARK Invest with S&P 500 performance comparison (4 quarters lookback) +python main.py "ARK Invest" "Q3 2024" --compare-sp500 --lookback-quarters 4 + +# Berkshire Hathaway with extended performance history (8 quarters) +python main.py "Berkshire Hathaway Inc" "Q3 2024" --compare-sp500 --lookback-quarters 8 --save-html +``` + +### Command Line Options + +- `company`: Company name (must match exactly as shown on 13f.info) +- `quarter`: Quarter in format "Q1 2025" +- `--output-dir`, `-o`: Output directory for results (default: `output`) +- `--top-n`, `-n`: Number of top holdings to show in bar chart (default: 20) +- `--no-visualizations`: Skip creating visualizations (only generate report) +- `--save-html`: Also save visualizations as interactive HTML files +- `--compare-sp500`: Enable performance comparison with S&P 500 benchmark +- `--lookback-quarters`: Number of quarters to look back for performance comparison (default: 4) + +## Output + +The tool generates the following files in the output directory: + +### Standard Output Files + +1. **portfolio_analysis.md**: Comprehensive markdown report including: + - Executive summary + - Portfolio metrics and concentration analysis + - Sector breakdown + - Top 20 holdings table + - Analysis notes + - Embedded PNG visualizations + +2. **portfolio_treemap.png**: High-resolution treemap visualization showing all holdings sized by market value + +3. **sector_allocation.png**: Pie chart showing portfolio allocation by sector + +4. **top_holdings.png**: Bar chart of top N holdings by portfolio weight + +### Performance Comparison Files (with `--compare-sp500`) + +5. **performance_comparison.png**: Line chart comparing portfolio performance vs S&P 500 over time + +6. **returns_comparison.png**: Bar chart showing quarterly returns comparison + +7. **risk_metrics.png**: Bar chart comparing risk-adjusted metrics: + - Total return + - Volatility (annualized) + - Sharpe ratio + - Beta (if sufficient data) + +### Interactive HTML Files (with `--save-html`) +- Interactive HTML versions of all visualizations for enhanced exploration + +## Performance Comparison Feature + +When using `--compare-sp500`, the tool: + +1. **Simulates Historical Portfolio Values**: Creates a simplified historical view based on current holdings +2. **Fetches S&P 500 Data**: Downloads benchmark data from Yahoo Finance +3. **Calculates Performance Metrics**: + - Total return over the period + - Quarterly returns + - Cumulative returns + - Risk-adjusted performance (Sharpe ratio) + - Portfolio beta vs market +4. **Generates Comparison Visualizations**: Creates clear charts showing relative performance + +### Performance Metrics Explained + +- **Total Return**: Percentage change from start to end of period +- **Volatility**: Standard deviation of returns (annualized) +- **Sharpe Ratio**: Risk-adjusted return metric (higher is better) +- **Beta**: Measure of portfolio's volatility relative to S&P 500 +- **Relative Performance**: Difference between portfolio and S&P 500 returns + +## Testing + +Run the test script to see the performance comparison feature in action: + +```bash +python test_performance_comparison.py +``` + +This will test the feature with multiple well-known investment firms and generate sample outputs. + +## Example Report + +The generated markdown report includes: + +- Total portfolio value and number of positions +- Concentration metrics (HHI, Gini coefficient, top holdings concentration) +- Sector allocation breakdown +- Detailed holdings table with shares, value, and weights +- Analytical insights based on portfolio characteristics + +## Notes + +- Company names must match exactly as they appear on 13f.info +- 13F reports are typically filed 45 days after quarter end +- Only institutional investment managers with >$100M AUM are required to file 13F reports +- The tool uses web scraping, so it may need updates if the website structure changes +- Performance comparison uses simplified assumptions and should not be used for actual investment decisions + +## Dependencies + +- pandas: Data manipulation and analysis +- plotly: Interactive visualizations +- requests: HTTP requests for web scraping +- beautifulsoup4: HTML parsing +- lxml: XML/HTML parser +- numpy: Numerical computations +- kaleido: Static image export for plotly +- yfinance: Yahoo Finance data for S&P 500 benchmark + +## License + +[Your License Here] + +## Contributing + +Contributions are welcome! Please feel free to submit a Pull Request. \ No newline at end of file diff --git a/scripts/guru_portfolio/demo_output.md b/scripts/guru_portfolio/demo_output.md new file mode 100644 index 0000000..e7310df --- /dev/null +++ b/scripts/guru_portfolio/demo_output.md @@ -0,0 +1,183 @@ +# 13F Portfolio Visualization Tool - Demo Output + +This demonstrates what the tool produces when you run: + +```bash +python main.py "Berkshire Hathaway Inc" "Q3 2024" +``` + +## Console Output + +``` +============================================================ +13F Portfolio Analysis Tool +============================================================ +Company: Berkshire Hathaway Inc +Quarter: Q3 2024 +Output Directory: output +============================================================ + +πŸ“Š Fetching portfolio data from 13f.info... +βœ… Successfully fetched 45 holdings + +πŸ”„ Processing portfolio data... +βœ… Data processed successfully + - Total value: $313,259,486,000 + - Total positions: 45 + - Top 10 concentration: 87.5% + +πŸ“ˆ Creating visualizations... + - Creating portfolio treemap... + - Creating sector allocation chart... + - Creating top holdings chart... +βœ… Saved portfolio_treemap.png +βœ… Saved sector_allocation.png +βœ… Saved top_holdings.png +βœ… Visualizations saved + +πŸ“ Generating analysis report... +βœ… Report saved to: output/portfolio_analysis.md + +============================================================ +✨ Analysis completed successfully! +============================================================ + +Results saved in: /Users/user/WebstormProjects/finance-guru/scripts/guru_portfolio/output/ + +Generated files: + πŸ“„ portfolio_analysis.md - Detailed analysis report + πŸ–ΌοΈ portfolio_treemap.png - Portfolio treemap visualization + πŸ–ΌοΈ sector_allocation.png - Sector breakdown pie chart + πŸ–ΌοΈ top_holdings.png - Top holdings bar chart +``` + +## Generated Files + +### 1. portfolio_analysis.md (Sample) + +```markdown +# 13F Portfolio Analysis Report + +**Company**: Berkshire Hathaway Inc +**Quarter**: Q3 2024 +**Generated**: 2024-12-31 10:30:00 + +--- + +## Executive Summary + +- **Total Portfolio Value**: $313,259,486,000 +- **Number of Positions**: 45 +- **Top 10 Concentration**: 87.5% +- **Largest Position**: Apple Inc (30.1%) + +## Portfolio Metrics + +### Concentration Analysis +- **Top 5 Holdings Weight**: 70.2% +- **Top 20 Holdings Weight**: 95.8% +- **Herfindahl-Hirschman Index (HHI)**: 0.1234 +- **Effective Number of Positions**: 8.1 +- **Gini Coefficient**: 0.823 + +### Sector Allocation + +| Sector | Weight (%) | +|--------|------------| +| Technology | 45.3% | +| Financials | 28.7% | +| Consumer Staples | 12.4% | +| Energy | 8.9% | +| Healthcare | 2.8% | +| Other | 1.9% | + +## Top 20 Holdings + +| Rank | Security | Shares | Value | Weight (%) | +|------|----------|--------|-------|------------| +| 1 | Apple Inc | 915,560,382 | $94,345.2M | 30.12% | +| 2 | Bank of America Corp | 1,032,852,006 | $41,099.3M | 13.12% | +| 3 | American Express Co | 151,610,700 | $35,987.5M | 11.49% | +| 4 | Coca-Cola Co | 400,000,000 | $28,700.0M | 9.16% | +| 5 | Chevron Corp | 123,080,996 | $18,607.8M | 5.94% | +... +``` + +### 2. portfolio_treemap.png +- High-resolution treemap showing all holdings +- Hierarchical view: Portfolio β†’ Sector β†’ Individual stocks +- Color-coded by portfolio weight +- Size represents market value + +### 3. sector_allocation.png +- Pie chart showing sector breakdown +- Clear percentage labels +- Color-coded sectors + +### 4. top_holdings.png +- Horizontal bar chart of top 20 holdings +- Sorted by portfolio weight +- Shows both percentage and dollar values + +## Visualizations + +The following visualizations have been generated: + +1. **Portfolio Treemap** (`portfolio_treemap.png`): Treemap showing all holdings sized by market value +2. **Sector Allocation Pie Chart** (`sector_allocation.png`): Breakdown of portfolio by sector +3. **Top Holdings Bar Chart** (`top_holdings.png`): Bar chart of top 20 holdings by weight + +### Portfolio Treemap +![Portfolio Treemap](portfolio_treemap.png) + +### Sector Allocation +![Sector Allocation](sector_allocation.png) + +### Top Holdings +![Top Holdings](top_holdings.png) + +## Analysis Notes + +... +``` + +## Key Features Demonstrated + +1. **Data Extraction**: Automatically fetches data from 13f.info +2. **Portfolio Metrics**: Calculates concentration metrics, HHI, Gini coefficient +3. **Sector Analysis**: Automatically categorizes holdings by sector +4. **Interactive Visualizations**: Creates plotly-based interactive charts +5. **Comprehensive Reporting**: Generates detailed markdown reports + +## Error Handling + +If the company or quarter is not found, the tool generates an error report with suggestions: + +```markdown +# 13F Portfolio Analysis Report - Error + +**Company**: XYZ Company +**Quarter**: Q5 2024 +**Generated**: 2024-12-31 10:30:00 + +--- + +## Error + +Unable to fetch portfolio data for the specified company and quarter. + +**Error Details**: Company 'XYZ Company' not found + +## Possible Reasons + +1. The company name might not exactly match the name in the 13F database +2. The specified quarter might not have been filed yet +3. The company might not be required to file 13F reports +4. Network connection issues + +## Suggestions + +- Try searching for the company on [13f.info](https://13f.info) to find the exact name +- Verify that the quarter has been filed (13F reports are filed 45 days after quarter end) +- Check if the company is an institutional investment manager with >$100M AUM +``` \ No newline at end of file diff --git a/scripts/guru_portfolio/docs/prd.md b/scripts/guru_portfolio/docs/prd.md new file mode 100644 index 0000000..5a64b07 --- /dev/null +++ b/scripts/guru_portfolio/docs/prd.md @@ -0,0 +1,141 @@ +# 주식 λŒ€κ°€μ˜ 뢄기별 포트폴리였 변화에 λΈ”λ‘œκ·Έ μž‘μ„± μžλ™ν™” + +## λͺ©μ  + +주식 λŒ€κ°€μ˜ 뢄기별 포트폴리였 λ³€ν™”λ₯Ό λΈ”λ‘œκ·Έμ— μžλ™μœΌλ‘œ μž‘μ„±ν•˜λŠ” 것을 λͺ©ν‘œλ‘œ ν•©λ‹ˆλ‹€. 이λ₯Ό 톡해 νˆ¬μžμžλ“€μ΄ λŒ€κ°€λ“€μ˜ 투자 μ „λž΅μ„ μ‰½κ²Œ μ΄ν•΄ν•˜κ³  μ°Έκ³ ν•  수 μžˆλ„λ‘ ν•©λ‹ˆλ‹€. + +## μš”κ΅¬μ‚¬ν•­ + +### 1. λΈ”λ‘œκ·Έ ν¬μŠ€νŒ… μžλ™ 생성 κΈ° 개발 +- 주식 λŒ€κ°€μ˜ 뢄기별 포트폴리였의 변화에 λŒ€ν•΄μ„œ λΈ”λ‘œκ·Έ ν¬μŠ€νŒ…ν•  λ‚΄μš©μ„ μžλ™ μƒμ„±ν•œλ‹€ + - μ°Έκ³ : https://13f.info/ +- conf νŒŒμΌμ— μž…λ ₯값을 미리 λ„£μ–΄λ‘”λ‹€. + - λΉ„κ΅ν•˜λ €λŠ” 년도 & λΆ„κΈ° (ex. Berkshire Hathaway, 2025 Q1 vs 2024 Q4) + - μ—¬λŸ¬ νšŒμ‚¬κ°€ μžˆλŠ” κ²½μš°μ—λŠ” 별도 폴더에 md 파일둜 μž‘μ„±μ„ ν•œλ‹€. + +#### Config 파일 ꡬ쑰 +```yaml +# config.yaml μ˜ˆμ‹œ +companies: + - name: "Berkshire Hathaway Inc" + quarters: + current: "2025 Q1" + previous: "2024 Q4" + - name: "ARK Invest" + quarters: + current: "2025 Q1" + previous: "2024 Q4" + +output: + base_dir: "blog_posts" + image_format: "png" + language: "ko" +``` + +### 2. λͺ©μ°¨ +λΈ”λ‘œκ·Έμ— ν•„μš”ν•œ λ‚΄μš©μ€ λ‹€μŒκ³Ό κ°™λ‹€. + +- 1. κ°œμš” + - μž…λ ₯ν•  λ‚΄μš©: ν•΄λ‹Ή νšŒμ‚¬μ˜ CEO 의 이미지 사진을 λ„£λŠ”λ‹€ + - CEO μ΄λ―Έμ§€λŠ” ꡬ글 이미지 검색을 톡해 5개의 후보λ₯Ό μ°Ύμ•„ μ‚¬μš©μžμ—κ²Œ 보여주고 μ„ νƒν•˜λ„λ‘ ν•œλ‹€ + - μ„ νƒν•œ μ΄λ―Έμ§€λŠ” λ‘œμ»¬μ— μ €μž₯ν•˜μ—¬ μ‚¬μš©ν•œλ‹€ + - 이 μ‚¬λžŒμ΄ μ–΄λ–€ 뢄인지 κ°„λ‹¨ν•˜κ²Œ 5 쀄 μ •λ„λ‘œ μ„€λͺ…ν•œλ‹€ +- 2. ####λ…„ #λΆ„κΈ° 포트폴리였 + - 2.1 ν˜„μž¬ 포트폴리였 + - 2.1.1 Top Holidings + - μž…λ ₯ν•  λ‚΄μš©: Pie Chart 와 Top holings by Weight Chartλ₯Ό λ¨Όμ € 보여주고 이 μ°¨νŠΈμ— λŒ€ν•΄μ„œ κ°„λ‹¨ν•˜κ²Œ μ£Όμš” λ‚΄μš©λ§Œ μ–ΈκΈ‰ν•œλ‹€. + - 2.1.2 섹터별 + - μž…λ ₯ν•  λ‚΄μš©: 주식 비쀑이 μ„Ήν„°λ³„λ‘œ μ–΄λ–»κ²Œ λ˜λŠ”μ§€ Table, Pie chart둜 보여주고 λ‚΄μš©μ— λŒ€ν•΄μ„œλ„ μš”μ•½μ„ ν•΄μ€˜ + - 2.1.3 전체 포트폴리였 λͺ©λ‘ + - μž…λ ₯ν•  λ‚΄μš©: ν…Œμ΄λΈ” ν˜•νƒœλ‘œ μž…λ ₯ν•œλ‹€ (숫자, 티컀, νšŒμ‚¬λͺ…, μ„Ήν„°, 비쀑, λ³΄μœ μ΄κΈˆμ•‘, 보유 주식 수, λΉ„κ³ -μ–΄λ–€ νšŒμ‚¬μΈμ§€ μ–ΈκΈ‰) + - 티컀에 tradingview 링크λ₯Ό μΆ”κ°€ν•΄μ„œ ν΄λ¦­μ‹œ λ°”λ‘œ 링크둜 갈 수 μžˆλ„λ‘ ν•œλ‹€ + - ν…Œμ΄λΈ” μ •λ ¬ μ˜΅μ…˜: λΉ„μ€‘μˆœ, μ•ŒνŒŒλ²³μˆœ, 섹터별 + - λ³€ν™” ν‘œμ‹œ: μ‹ κ·œ(πŸ†•), 증가(πŸ“ˆ), κ°μ†Œ(πŸ“‰), μ²­μ‚°(❌) + - 숫자 포맷: 천 λ‹¨μœ„ ꡬ뢄, νΌμ„ΌνŠΈ μ†Œμˆ˜μ  2자리 + - 2.2 포트폴리였 변화에 λŒ€ν•œ μš”μ•½ + - μž…λ ₯ν•œ λ‚΄μš©: μƒˆλ‘œ μΆ”κ°€ν•œ μ’…λͺ©, 비쀑을 쀄인 μ’…λͺ©, μΆ”κ°€λ‘œ λ§€μˆ˜ν•œ μ’…λͺ¨, μ²­μ‚°ν•œ μ’…λͺ©μ„ κΈ€ 외에도 κ·Έλž˜ν”„λ‘œ ν‘œν˜„ν•  수 있으면 μ’‹κ² λ‹€ + - 2.3 S&P500 vs λ‚˜μŠ€λ‹₯ vs #### μ„±λŠ₯ 비ꡐ + - μž…λ ₯ν•œ λ‚΄μš©: ν•˜λ‚˜μ˜ chart에 3개의 티컀λ₯Ό 비ꡐ할 수 μžˆλ„λ‘ κ·Έλž˜ν”„λ₯Ό 생성 + - 비ꡐ κΈ°κ°„: 졜근 1λ…„ or μ„€μ • κ°€λŠ₯ν•œ κΈ°κ°„ + - 포트폴리였 μ„±λŠ₯ 계산 방법 λͺ…μ‹œ ν•„μš” + - 리슀크 μ§€ν‘œ μΆ”κ°€ (변동성, 샀프 λΉ„μœ¨ λ“±) + - 주의: 13FλŠ” 뢄기별 μŠ€λƒ…μƒ·μ΄λ―€λ‘œ μ‹€μ œ 수읡λ₯ κ³Ό 차이 있음 +- 3. 마무리 + - μž…λ ₯ν•  λ‚΄μš©: 포트폴리였의 λ³€ν™”λ₯Ό μš”μ•½ν•˜κ³  이 νšŒμ‚¬λŠ” 어디에 집쀑을 ν•˜κ³  있고 μ™œ 이런 선택을 ν–ˆλŠ”μ§€μ— λŒ€ν•œ λ‚΄μš©μ„ μž‘μ„±ν•œλ‹€ + +### 3. SEO μ΅œμ ν™” +- 제λͺ©: "[νšŒμ‚¬λͺ…] [연도] [λΆ„κΈ°] 포트폴리였 뢄석" +- 메타 μ„€λͺ… μžλ™ 생성 +- ν‚€μ›Œλ“œ μžλ™ μΆ”μΆœ +- Open Graph νƒœκ·Έ 지원 + +## 데이터 μ œμ•½μ‚¬ν•­ +- 13F λ³΄κ³ μ„œλŠ” λΆ„κΈ° μ’…λ£Œ ν›„ 45일 이내에 μ œμΆœλ˜λ―€λ‘œ, μ΅œμ‹  λΆ„κΈ° 데이터가 없을 수 있음 +- 포트폴리였 λ³€ν™” λΉ„κ΅λŠ” 두 λΆ„κΈ°μ˜ 13F 데이터가 λͺ¨λ‘ μžˆμ–΄μ•Ό κ°€λŠ₯ +- $100M μ΄μƒμ˜ ν¬μ§€μ…˜λ§Œ λ³΄κ³ λ˜λ―€λ‘œ μ†Œκ·œλͺ¨ νˆ¬μžλŠ” ν¬ν•¨λ˜μ§€ μ•ŠμŒ + +## μ—λŸ¬ 처리 및 μ˜ˆμ™Έ 상황 +- 데이터λ₯Ό κ°€μ Έμ˜¬ 수 μ—†λŠ” 경우 (νšŒμ‚¬λͺ… 였λ₯˜, λΆ„κΈ° 데이터 μ—†μŒ) +- CEO 이미지 검색 μ‹€νŒ¨ μ‹œ κΈ°λ³Έ 이미지 μ‚¬μš© λ˜λŠ” 이미지 없이 μ§„ν–‰ +- 이전 λΆ„κΈ° 데이터가 μ—†μ–΄ 비ꡐ가 λΆˆκ°€λŠ₯ν•œ 경우 +- API ν•œλ„ 초과 μ‹œ 처리 방법 +- ꡬ글 이미지 검색 API ν•œλ„ λ˜λŠ” 였λ₯˜ 처리 + +## 차트 λ””μžμΈ κ°€μ΄λ“œλΌμΈ +- μΌκ΄€λœ 색상 νŒ”λ ˆνŠΈ μ‚¬μš© +- 차트 크기: 1200x800px (고해상도) +- 폰트: ν•œκΈ€ 지원 폰트 μ‚¬μš© +- μ›Œν„°λ§ˆν¬ λ˜λŠ” 좜처 ν‘œμ‹œ + +## κ΅¬ν˜„ μ§€μΉ¨ + +- μ½”λ“œλŠ” python으둜 μž‘μ„±μ„ ν•œλ‹€ +- λΈ”λ‘œκ·Έ μƒμ„±μ‹œ μ•„λž˜ 폴더/파일 이름 νŒ¨ν„΄μœΌλ‘œ 생성을 ν•œλ‹€ + - index.md (markdown 포멧) + - 폴더 이름은 νšŒμ‚¬λͺ…μœΌλ‘œ 생성 + - μ΄λ―Έμ§€λŠ” 같은 폴더에 λ‘”λ‹€ +- λΈ”λ‘œκ·Έ λ‚΄μš©μ€ ν•œκΈ€λ‘œ μž‘μ„±ν•œλ‹€ +- AI μ‚¬μš©μ΄ ν•„μš”ν•˜λ©΄ OpenAIλ₯Ό μ‚¬μš©ν•˜κ³  API KEYλŠ” OS ν™˜κ²½λ³€μˆ˜λ₯Ό μ‚¬μš©ν•œλ‹€ +- κ·Έλž˜ν”„ μƒμ„±μ‹œ plotly libraryλ₯Ό μ‚¬μš©ν•˜κ³  png μ΄λ―Έμ§€λ‘œ μƒμ„±ν•œλ‹€ +- 사전 ν…ŒμŠ€νŠΈ μ½”λ“œλŠ” modules 폴더에 일단 μž‘μ„±μ„ ν•΄λ‘μ—ˆλ‹€ + +### 기술 μŠ€νƒ +- Python 3.8+ +- ν•„μˆ˜ 라이브러리: + - pandas: 데이터 처리 + - plotly: 차트 생성 + - yfinance: μ£Όκ°€ 데이터 + - beautifulsoup4: μ›Ή μŠ€ν¬λž˜ν•‘ + - requests: HTTP μš”μ²­ + - Pillow: CEO 이미지 처리 + - pyyaml: config 파일 처리 + - jinja2: ν…œν”Œλ¦Ώ μ—”μ§„ (λ§ˆν¬λ‹€μš΄ μƒμ„±μš©) + - openai: AI ν…μŠ€νŠΈ 생성 + - google-api-python-client: ꡬ글 이미지 검색 (λ˜λŠ” λŒ€μ²΄ 방법) + +### CEO 이미지 검색 κ΅¬ν˜„ λ°©μ•ˆ +1. **Google Custom Search API μ‚¬μš© (ꢌμž₯)** + - API ν‚€ ν•„μš” (ν™˜κ²½λ³€μˆ˜λ‘œ 관리) + - 일일 검색 ν•œλ„: 100회 (무료) + - "[νšŒμ‚¬λͺ…] CEO" ν‚€μ›Œλ“œλ‘œ 검색 + +2. **λŒ€μ²΄ λ°©μ•ˆ** + - Bing Image Search API + - Web scraping (법적 μ œμ•½ 확인 ν•„μš”) + - 사전 κ΅¬μΆ•λœ CEO 이미지 λ°μ΄ν„°λ² μ΄μŠ€ ν™œμš© + +3. **μ‚¬μš©μž μΈν„°νŽ˜μ΄μŠ€** + - ν„°λ―Έλ„μ—μ„œ 이미지 URL 5개 ν‘œμ‹œ + - 번호 선택 방식 (1-5) + - μ„ νƒν•œ 이미지 λ‹€μš΄λ‘œλ“œ 및 μ €μž₯ + - 이미지 크기 μžλ™ μ‘°μ • (400x400px) + +## 선택적 μΆ”κ°€ κΈ°λŠ₯ +- μ†Œμ…œ λ―Έλ””μ–΄ 곡유용 μš”μ•½ 이미지 생성 +- 포트폴리였 λ³€ν™” μ•Œλ¦Ό (μž„κ³„κ°’ μ„€μ •) + +## ν…ŒμŠ€νŠΈ μš”κ΅¬μ‚¬ν•­ +- κΈ°λŠ₯ ν•˜λ‚˜μ”© κ°œλ°œν•˜λ©΄μ„œ 잘 κ΅¬ν˜„μ΄ λ˜μ—ˆλŠ”μ§€ ν…ŒμŠ€νŠΈλ‘œ ν™•μΈν•œλ‹€ +- λ‹¨μœ„ ν…ŒμŠ€νŠΈ μž‘μ„± +- 톡합 ν…ŒμŠ€νŠΈ μ‹œλ‚˜λ¦¬μ˜€ +- μ—λŸ¬ μΌ€μ΄μŠ€ ν…ŒμŠ€νŠΈ \ No newline at end of file diff --git a/scripts/guru_portfolio/docs/todo.md b/scripts/guru_portfolio/docs/todo.md new file mode 100644 index 0000000..1f4e1a5 --- /dev/null +++ b/scripts/guru_portfolio/docs/todo.md @@ -0,0 +1,137 @@ +# 13F 포트폴리였 λΈ”λ‘œκ·Έ μžλ™ν™” TODO List + +## 🎯 ν”„λ‘œμ νŠΈ κ°œμš” +주식 λŒ€κ°€μ˜ 뢄기별 포트폴리였 λ³€ν™”λ₯Ό λΆ„μ„ν•˜κ³  λΈ”λ‘œκ·Έ 포슀트λ₯Ό μžλ™ μƒμ„±ν•˜λŠ” μ‹œμŠ€ν…œ ꡬ좕 + +## πŸ“‹ TODO List + +### 1. ν”„λ‘œμ νŠΈ 초기 μ„€μ • +- [ ] ν”„λ‘œμ νŠΈ ꡬ쑰 생성 (blog_posts 폴더 λ“±) +- [ ] requirements.txt μ—…λ°μ΄νŠΈ (μΆ”κ°€ 라이브러리) +- [ ] ν™˜κ²½λ³€μˆ˜ μ„€μ • 파일 생성 (.env.example) + - [ ] OPENAI_API_KEY + - [ ] GOOGLE_API_KEY (Custom Search) + - [ ] GOOGLE_CSE_ID (Custom Search Engine ID) + +### 2. Config μ‹œμŠ€ν…œ κ΅¬ν˜„ +- [ ] YAML config 파일 νŒŒμ„œ κ΅¬ν˜„ +- [ ] Config 데이터 검증 둜직 +- [ ] μƒ˜ν”Œ config.yaml 파일 생성 +- [ ] Config λ‘œλ” λͺ¨λ“ˆ 개발 + +### 3. CEO 이미지 검색 κΈ°λŠ₯ +- [ ] Google Custom Search API 연동 λͺ¨λ“ˆ +- [ ] 이미지 검색 κΈ°λŠ₯ κ΅¬ν˜„ ("[νšŒμ‚¬λͺ…] CEO" ν‚€μ›Œλ“œ) +- [ ] 터미널 UI둜 5개 이미지 선택 κΈ°λŠ₯ +- [ ] 이미지 λ‹€μš΄λ‘œλ“œ 및 μ €μž₯ κΈ°λŠ₯ +- [ ] 이미지 λ¦¬μ‚¬μ΄μ¦ˆ (400x400px) +- [ ] λŒ€μ²΄ λ°©μ•ˆ κ΅¬ν˜„ (Bing API λ˜λŠ” κΈ°λ³Έ 이미지) + +### 4. 데이터 비ꡐ 뢄석 κΈ°λŠ₯ +- [ ] 두 λΆ„κΈ° 포트폴리였 데이터 비ꡐ λͺ¨λ“ˆ +- [ ] μ‹ κ·œ μ’…λͺ© μΆ”μΆœ +- [ ] μ²­μ‚° μ’…λͺ© μΆ”μΆœ +- [ ] 비쀑 λ³€ν™” 계산 (증가/κ°μ†Œ) +- [ ] 포트폴리였 λ³€ν™” μ‹œκ°ν™” 차트 + +### 5. λΈ”λ‘œκ·Έ 컨텐츠 생성 μ—”μ§„ +- [ ] Jinja2 ν…œν”Œλ¦Ώ μ‹œμŠ€ν…œ ꡬ좕 +- [ ] λ§ˆν¬λ‹€μš΄ ν…œν”Œλ¦Ώ 파일 생성 + - [ ] index.md 메인 ν…œν”Œλ¦Ώ + - [ ] μ„Ήμ…˜λ³„ λΆ€λΆ„ ν…œν”Œλ¦Ώ + +### 6. AI ν…μŠ€νŠΈ 생성 톡합 +- [ ] OpenAI API 연동 λͺ¨λ“ˆ +- [ ] CEO μ†Œκ°œ ν…μŠ€νŠΈ 생성 (5쀄) +- [ ] 포트폴리였 뢄석 μš”μ•½ 생성 +- [ ] 섹터별 뢄석 ν…μŠ€νŠΈ 생성 +- [ ] 투자 μ „λž΅ μΈμ‚¬μ΄νŠΈ 생성 +- [ ] API 호좜 μ΅œμ ν™” 및 캐싱 + +### 7. 차트 생성 κ°œμ„  +- [ ] ν†΅μΌλœ 색상 νŒ”λ ˆνŠΈ μ •μ˜ +- [ ] ν•œκΈ€ 폰트 μ„€μ • (λ‚˜λˆ”κ³ λ”• λ“±) +- [ ] 차트 크기 ν‘œμ€€ν™” (1200x800px) +- [ ] μ›Œν„°λ§ˆν¬/좜처 ν‘œμ‹œ μΆ”κ°€ + +#### 7.1 μ‹ κ·œ 차트 개발 +- [ ] 포트폴리였 λ³€ν™” 비ꡐ 차트 +- [ ] S&P500 vs λ‚˜μŠ€λ‹₯ vs 포트폴리였 μ„±λŠ₯ 비ꡐ +- [ ] 섹터별 ν…Œμ΄λΈ” 차트 +- [ ] λ³€ν™” ν‘œμ‹œ μ•„μ΄μ½˜ 적용 (πŸ†•πŸ“ˆπŸ“‰βŒ) + +### 8. ν…Œμ΄λΈ” 생성 κΈ°λŠ₯ +- [ ] 전체 포트폴리였 ν…Œμ΄λΈ” 생성 +- [ ] TradingView 링크 생성 κΈ°λŠ₯ +- [ ] 숫자 ν¬λ§·νŒ… (천 λ‹¨μœ„, μ†Œμˆ˜μ ) +- [ ] μ •λ ¬ κΈ°λŠ₯ κ΅¬ν˜„ +- [ ] νšŒμ‚¬ μ„€λͺ… μžλ™ 생성 + +### 9. SEO μ΅œμ ν™” +- [ ] 메타데이터 생성 λͺ¨λ“ˆ +- [ ] 제λͺ© νŒ¨ν„΄ 적용 +- [ ] ν‚€μ›Œλ“œ μžλ™ μΆ”μΆœ +- [ ] Open Graph νƒœκ·Έ 생성 + +### 10. 메인 μ‹€ν–‰ 슀크립트 +- [ ] CLI μΈν„°νŽ˜μ΄μŠ€ 개발 +- [ ] Config 파일 읽기 +- [ ] 각 νšŒμ‚¬λ³„ 처리 루프 +- [ ] 폴더 ꡬ쑰 생성 (νšŒμ‚¬λͺ… 폴더) +- [ ] 이미지 파일 정리 + +### 11. μ—λŸ¬ 처리 및 λ‘œκΉ… +- [ ] λ‘œκΉ… μ‹œμŠ€ν…œ ꡬ좕 +- [ ] μ—λŸ¬ 처리 둜직 + - [ ] 13F 데이터 μ—†μŒ + - [ ] API ν•œλ„ 초과 + - [ ] λ„€νŠΈμ›Œν¬ 였λ₯˜ +- [ ] μž¬μ‹œλ„ 둜직 κ΅¬ν˜„ +- [ ] μ§„ν–‰ μƒνƒœ ν‘œμ‹œ + +### 12. ν…ŒμŠ€νŠΈ μž‘μ„± +- [ ] λ‹¨μœ„ ν…ŒμŠ€νŠΈ + - [ ] Config νŒŒμ„œ ν…ŒμŠ€νŠΈ + - [ ] 데이터 비ꡐ 둜직 ν…ŒμŠ€νŠΈ + - [ ] 차트 생성 ν…ŒμŠ€νŠΈ +- [ ] 톡합 ν…ŒμŠ€νŠΈ + - [ ] 전체 μ›Œν¬ν”Œλ‘œμš° ν…ŒμŠ€νŠΈ + - [ ] μƒ˜ν”Œ λ°μ΄ν„°λ‘œ E2E ν…ŒμŠ€νŠΈ +- [ ] μ—λŸ¬ μΌ€μ΄μŠ€ ν…ŒμŠ€νŠΈ + +### 13. λ¬Έμ„œν™” +- [ ] README.md μ—…λ°μ΄νŠΈ +- [ ] API ν‚€ μ„€μ • κ°€μ΄λ“œ +- [ ] μ‚¬μš© 예제 μž‘μ„± +- [ ] νŠΈλŸ¬λΈ”μŠˆνŒ… κ°€μ΄λ“œ + +### 14. 선택적 κΈ°λŠ₯ (Phase 2) +- [ ] μ†Œμ…œ λ―Έλ””μ–΄ 이미지 생성 +- [ ] 포트폴리였 λ³€ν™” μ•Œλ¦Ό +- [ ] 배치 처리 μŠ€μΌ€μ€„λŸ¬ +- [ ] μ›Ή λŒ€μ‹œλ³΄λ“œ + +## πŸš€ 개발 μš°μ„ μˆœμœ„ + +### Phase 1 (핡심 κΈ°λŠ₯) +1. Config μ‹œμŠ€ν…œ +2. CEO 이미지 검색 +3. 데이터 비ꡐ 뢄석 +4. κΈ°λ³Έ 차트 생성 +5. λΈ”λ‘œκ·Έ ν…œν”Œλ¦Ώ 생성 + +### Phase 2 (ν–₯상 κΈ°λŠ₯) +1. AI ν…μŠ€νŠΈ 생성 +2. κ³ κΈ‰ 차트 κΈ°λŠ₯ +3. SEO μ΅œμ ν™” +4. μ—λŸ¬ 처리 κ°•ν™” + +### Phase 3 (μΆ”κ°€ κΈ°λŠ₯) +1. 선택적 κΈ°λŠ₯ κ΅¬ν˜„ +2. μ„±λŠ₯ μ΅œμ ν™” +3. μ‚¬μš©μž κ²½ν—˜ κ°œμ„  + +## πŸ“ 참고사항 +- 각 κΈ°λŠ₯은 λͺ¨λ“ˆν™”ν•˜μ—¬ 개발 +- ν…ŒμŠ€νŠΈ μ½”λ“œμ™€ ν•¨κ»˜ 개발 +- μ§„ν–‰ 상황은 μ •κΈ°μ μœΌλ‘œ μ—…λ°μ΄νŠΈ \ No newline at end of file diff --git a/scripts/guru_portfolio/main.py b/scripts/guru_portfolio/main.py new file mode 100644 index 0000000..1701fe2 --- /dev/null +++ b/scripts/guru_portfolio/main.py @@ -0,0 +1,246 @@ +#!/usr/bin/env python3 +""" +13F Portfolio Visualization Tool + +This script fetches portfolio data from 13f.info and creates interactive visualizations +""" + +import argparse +import sys +import os +from datetime import datetime + +from modules import DataFetcher, DataProcessor, Visualizer, ReportGenerator, PerformanceTracker + + +def main(): + # Parse command line arguments + parser = argparse.ArgumentParser( + description='Fetch and visualize 13F portfolio data', + formatter_class=argparse.RawDescriptionHelpFormatter, + epilog=""" +Examples: + python main.py "Berkshire Hathaway Inc" "Q3 2024" + python main.py "Bridgewater Associates" "Q2 2024" --output-dir results/ + python main.py "Scion Asset Management" "Q4 2023" --top-n 30 + python main.py "ARK Invest" "Q3 2024" --compare-sp500 --lookback-quarters 4 + """ + ) + + parser.add_argument('company', + help='Company name (e.g., "Berkshire Hathaway Inc")') + parser.add_argument('quarter', + help='Quarter in format "Q1 2025"') + parser.add_argument('--output-dir', '-o', + default='output', + help='Output directory for results (default: output)') + parser.add_argument('--top-n', '-n', + type=int, + default=20, + help='Number of top holdings to show in bar chart (default: 20)') + parser.add_argument('--no-visualizations', + action='store_true', + help='Skip creating visualizations') + parser.add_argument('--save-html', + action='store_true', + help='Also save visualizations as interactive HTML files') + parser.add_argument('--compare-sp500', + action='store_true', + help='Compare portfolio performance with S&P 500') + parser.add_argument('--lookback-quarters', + type=int, + default=4, + help='Number of quarters to look back for performance comparison (default: 4)') + + args = parser.parse_args() + + # Create output directory + os.makedirs(args.output_dir, exist_ok=True) + + print(f"\n{'='*60}") + print(f"13F Portfolio Analysis Tool") + print(f"{'='*60}") + print(f"Company: {args.company}") + print(f"Quarter: {args.quarter}") + print(f"Output Directory: {args.output_dir}") + if args.compare_sp500: + print(f"Performance Comparison: Enabled (looking back {args.lookback_quarters} quarters)") + print(f"{'='*60}\n") + + try: + # Step 1: Fetch data + print("πŸ“Š Fetching portfolio data from 13f.info...") + fetcher = DataFetcher() + raw_data = fetcher.fetch_portfolio_data(args.company, args.quarter) + print(f"βœ… Successfully fetched {len(raw_data)} holdings") + + # Step 2: Process data + print("\nπŸ”„ Processing portfolio data...") + processor = DataProcessor() + processed_data = processor.process_portfolio_data(raw_data) + metrics = processor.calculate_portfolio_metrics(processed_data) + treemap_data = processor.prepare_treemap_data(processed_data) + + print(f"βœ… Data processed successfully") + print(f" - Total value: ${metrics['total_value']:,.0f}") + print(f" - Total positions: {metrics['total_positions']}") + print(f" - Top 10 concentration: {metrics['top_10_concentration']:.1f}%") + + # Step 3: Create visualizations + if not args.no_visualizations: + print("\nπŸ“ˆ Creating visualizations...") + visualizer = Visualizer() + figures = {} + + # Create treemap + print(" - Creating portfolio treemap...") + treemap_fig = visualizer.create_treemap( + treemap_data, + f"{args.company} Portfolio - {args.quarter}" + ) + figures['portfolio_treemap'] = treemap_fig + + # Create sector pie chart + print(" - Creating sector allocation chart...") + sector_fig = visualizer.create_sector_pie_chart(processed_data) + figures['sector_allocation'] = sector_fig + + # Create concentration chart + print(" - Creating top holdings chart...") + concentration_fig = visualizer.create_concentration_chart( + processed_data, + top_n=args.top_n + ) + figures['top_holdings'] = concentration_fig + + # Step 3.5: Create performance comparison if requested + if args.compare_sp500: + print("\nπŸ“Š Creating performance comparison with S&P 500...") + performance_tracker = PerformanceTracker() + + # Simulate historical portfolio data (in real scenario, you'd fetch historical 13F data) + print(" - Simulating historical portfolio values...") + holdings_history = performance_tracker.simulate_portfolio_from_holdings( + processed_data, + lookback_quarters=args.lookback_quarters + ) + + # Calculate portfolio performance + holdings_dfs = [h[1] for h in holdings_history] + dates = [h[0] for h in holdings_history] + portfolio_df = performance_tracker.calculate_portfolio_performance(holdings_dfs, dates) + + # Compare with S&P 500 + print(" - Fetching S&P 500 data...") + start_date = dates[0] + end_date = dates[-1] + comparison_df = performance_tracker.compare_performance( + portfolio_df, + start_date, + end_date + ) + + # Calculate risk metrics + risk_metrics = performance_tracker.calculate_risk_metrics(comparison_df) + + # Create comparison charts + print(" - Creating performance comparison chart...") + performance_fig = visualizer.create_performance_comparison_chart( + comparison_df, + company_name=args.company + ) + figures['performance_comparison'] = performance_fig + + print(" - Creating returns comparison chart...") + try: + returns_fig = visualizer.create_returns_comparison_chart( + comparison_df, + company_name=args.company + ) + figures['returns_comparison'] = returns_fig + except ValueError as e: + print(f" ⚠️ Skipping returns chart: {e}") + + print(" - Creating risk metrics chart...") + risk_fig = visualizer.create_risk_metrics_chart( + risk_metrics, + company_name=args.company + ) + figures['risk_metrics'] = risk_fig + + # Print performance summary + print(f"\nπŸ“Š Performance Summary:") + print(f" - Portfolio Total Return: {risk_metrics.get('portfolio_total_return', 0):.2f}%") + print(f" - S&P 500 Total Return: {risk_metrics.get('sp500_total_return', 0):.2f}%") + print(f" - Relative Performance: {risk_metrics.get('relative_performance', 0):+.2f}%") + if risk_metrics.get('portfolio_beta') is not None: + print(f" - Portfolio Beta: {risk_metrics['portfolio_beta']:.2f}") + + # Save visualizations + visualizer.save_visualizations(figures, args.output_dir, save_html=args.save_html) + print("βœ… Visualizations saved") + + # Step 4: Generate report + print("\nπŸ“ Generating analysis report...") + reporter = ReportGenerator() + report_path = os.path.join(args.output_dir, 'portfolio_analysis.md') + reporter.generate_report( + args.company, + args.quarter, + processed_data, + metrics, + report_path + ) + print(f"βœ… Report saved to: {report_path}") + + # Success summary + print(f"\n{'='*60}") + print("✨ Analysis completed successfully!") + print(f"{'='*60}") + print(f"\nResults saved in: {os.path.abspath(args.output_dir)}/") + print("\nGenerated files:") + print(" πŸ“„ portfolio_analysis.md - Detailed analysis report") + if not args.no_visualizations: + print(" πŸ–ΌοΈ portfolio_treemap.png - Portfolio treemap visualization") + print(" πŸ–ΌοΈ sector_allocation.png - Sector breakdown pie chart") + print(" πŸ–ΌοΈ top_holdings.png - Top holdings bar chart") + if args.compare_sp500: + print(" πŸ“ˆ performance_comparison.png - Performance vs S&P 500") + if 'returns_comparison' in figures: + print(" πŸ“Š returns_comparison.png - Quarterly returns comparison") + print(" πŸ“Š risk_metrics.png - Risk-adjusted metrics comparison") + if args.save_html: + print(" 🌐 portfolio_treemap.html - Interactive treemap") + print(" 🌐 sector_allocation.html - Interactive sector chart") + print(" 🌐 top_holdings.html - Interactive holdings chart") + if args.compare_sp500: + print(" 🌐 performance_comparison.html - Interactive performance chart") + if 'returns_comparison' in figures: + print(" 🌐 returns_comparison.html - Interactive returns chart") + print(" 🌐 risk_metrics.html - Interactive risk metrics chart") + + except ValueError as e: + print(f"\n❌ Error: {str(e)}") + print("\nPlease check that:") + print(" 1. The company name matches exactly (try searching on 13f.info)") + print(" 2. The quarter has been filed (13F reports are filed 45 days after quarter end)") + + # Generate error report + reporter = ReportGenerator() + error_report = reporter.generate_summary_report(args.company, args.quarter, str(e)) + error_path = os.path.join(args.output_dir, 'error_report.md') + with open(error_path, 'w') as f: + f.write(error_report) + print(f"\nπŸ“„ Error report saved to: {error_path}") + + sys.exit(1) + + except Exception as e: + print(f"\n❌ Unexpected error: {str(e)}") + import traceback + traceback.print_exc() + sys.exit(1) + + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/scripts/guru_portfolio/modules/__init__.py b/scripts/guru_portfolio/modules/__init__.py new file mode 100644 index 0000000..9f7d91d --- /dev/null +++ b/scripts/guru_portfolio/modules/__init__.py @@ -0,0 +1,9 @@ +"""13F Portfolio Visualization Modules""" + +from .data_fetcher import DataFetcher +from .data_processor import DataProcessor +from .visualizer import Visualizer +from .report_generator import ReportGenerator +from .performance_tracker import PerformanceTracker + +__all__ = ['DataFetcher', 'DataProcessor', 'Visualizer', 'ReportGenerator', 'PerformanceTracker'] \ No newline at end of file diff --git a/scripts/guru_portfolio/modules/data_fetcher.py b/scripts/guru_portfolio/modules/data_fetcher.py new file mode 100644 index 0000000..dd12bbb --- /dev/null +++ b/scripts/guru_portfolio/modules/data_fetcher.py @@ -0,0 +1,285 @@ +"""Data fetcher module for extracting 13F filing data from 13f.info""" + +import requests +from bs4 import BeautifulSoup +import pandas as pd +import re +from typing import Dict, List, Optional +import time +from urllib.parse import quote +import json + + +class DataFetcher: + """Fetches 13F filing data from 13f.info website""" + + BASE_URL = "https://13f.info" + + def __init__(self): + self.session = requests.Session() + self.session.headers.update({ + 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36' + }) + + def fetch_portfolio_data(self, company: str, quarter: str) -> pd.DataFrame: + """ + Fetch portfolio data for a specific company and quarter + + Args: + company: Company name (e.g., "Berkshire Hathaway Inc") + quarter: Quarter in format "Q1 2025" + + Returns: + DataFrame with portfolio holdings data + """ + # First, search for the company to get the correct URL + search_url = f"{self.BASE_URL}/search" + search_params = {'q': company} + + try: + # Search for the company + response = self.session.get(search_url, params=search_params) + response.raise_for_status() + + soup = BeautifulSoup(response.text, 'lxml') + + # Find the company link + company_link = self._find_company_link(soup, company) + if not company_link: + raise ValueError(f"Company '{company}' not found") + + # Get the company's filings page + company_url = f"{self.BASE_URL}{company_link}" + response = self.session.get(company_url) + response.raise_for_status() + + soup = BeautifulSoup(response.text, 'lxml') + + # Find the specific quarter filing + filing_link = self._find_quarter_link(soup, quarter) + if not filing_link: + raise ValueError(f"Quarter '{quarter}' not found for {company}") + + # Get the filing data + filing_url = f"{self.BASE_URL}{filing_link}" + response = self.session.get(filing_url) + response.raise_for_status() + + # Parse the holdings data + holdings_df = self._parse_holdings(response.text) + + # Add metadata + holdings_df['company'] = company + holdings_df['quarter'] = quarter + + return holdings_df + + except requests.RequestException as e: + raise Exception(f"Error fetching data: {str(e)}") + + def _find_company_link(self, soup: BeautifulSoup, company: str) -> Optional[str]: + """Find the link to the company's page from search results""" + # Look for exact match first + links = soup.find_all('a', href=re.compile(r'/manager/')) + + for link in links: + if company.lower() in link.text.lower(): + return link['href'] + + return None + + def _find_quarter_link(self, soup: BeautifulSoup, quarter: str) -> Optional[str]: + """Find the link to a specific quarter filing""" + # Look for table with filing data + tables = soup.find_all('table') + + for table in tables: + # Check if this is the filings table by looking at headers + headers = table.find_all('th') + if headers: + header_text = [h.text.strip().lower() for h in headers] + # Check if this looks like a filings table + if any('quarter' in h for h in header_text): + # This is likely the filings table + rows = table.find_all('tr')[1:] # Skip header row + + for row in rows: + cells = row.find_all('td') + if cells and len(cells) > 0: + # First cell usually contains the quarter + quarter_cell = cells[0].text.strip() + + # Check if this row matches our quarter + if quarter.upper() == quarter_cell.upper(): + # Find the link in this row + link = row.find('a', href=True) + if link: + return link['href'] + + # Fallback: look for links with quarter text + links = soup.find_all('a', href=re.compile(r'/13f/')) + + for link in links: + if quarter.upper() in link.text.strip().upper(): + return link['href'] + + return None + + def _parse_holdings(self, html: str) -> pd.DataFrame: + """Parse the holdings table from the filing page""" + # Extract filing ID from the HTML to construct API endpoint + filing_id_match = re.search(r'/data/13f/(\d+)', html) + if not filing_id_match: + # Fallback: try to extract from URL in the page + filing_id_match = re.search(r'000095012\d+', html) + if not filing_id_match: + raise ValueError("Could not find filing ID for data endpoint") + + filing_id = filing_id_match.group(1) if filing_id_match.group(0).startswith('/data') else filing_id_match.group(0) + + # Fetch data from JSON endpoint + data_url = f"{self.BASE_URL}/data/13f/{filing_id}" + + # Add necessary headers for API request + headers = { + 'Accept': 'application/json, text/javascript, */*; q=0.01', + 'X-Requested-With': 'XMLHttpRequest', + 'Referer': self.session.headers.get('Referer', self.BASE_URL) + } + + response = self.session.get(data_url, headers=headers) + response.raise_for_status() + + try: + json_data = response.json() + holdings_data = json_data.get('data', []) + + if not holdings_data: + raise ValueError("No holdings data found in API response") + + # Convert array data to DataFrame + # Format: [symbol, name, class, cusip, value, percentage, shares, principal, option_type] + columns = ['symbol', 'security_name', 'class', 'cusip', 'market_value', + 'portfolio_weight', 'shares', 'principal', 'option_type'] + + df = pd.DataFrame(holdings_data, columns=columns[:len(holdings_data[0])] if holdings_data else columns) + + # Convert numeric columns + numeric_columns = ['market_value', 'portfolio_weight', 'shares'] + for col in numeric_columns: + if col in df.columns: + df[col] = pd.to_numeric(df[col], errors='coerce') + + # Market value is in thousands, so multiply by 1000 + if 'market_value' in df.columns: + df['market_value'] = df['market_value'] * 1000 + + return df + + except (json.JSONDecodeError, KeyError) as e: + # Fallback to HTML parsing if JSON fails + print(f"JSON parsing failed: {e}, falling back to HTML parsing") + return self._parse_holdings_html(html) + + def _parse_holdings_html(self, html: str) -> pd.DataFrame: + """Fallback HTML parsing method""" + soup = BeautifulSoup(html, 'lxml') + + # Find the holdings table + table = soup.find('table', {'class': re.compile('holdings|portfolio', re.I)}) + if not table: + # Try alternative selectors + table = soup.find('table') + + if not table: + raise ValueError("Holdings table not found") + + # Extract headers + headers = [] + header_row = table.find('thead') + if header_row: + headers = [th.text.strip() for th in header_row.find_all('th')] + else: + # Try to find headers in first row + first_row = table.find('tr') + if first_row: + headers = [td.text.strip() for td in first_row.find_all(['th', 'td'])] + + # Extract data rows + rows = [] + tbody = table.find('tbody') or table + for tr in tbody.find_all('tr')[1:]: # Skip header row if in tbody + row_data = [] + for td in tr.find_all('td'): + text = td.text.strip() + # Clean up numeric values + text = text.replace('$', '').replace(',', '').replace('%', '') + row_data.append(text) + + if row_data: # Skip empty rows + rows.append(row_data) + + # Create DataFrame + if not headers: + # Default headers if none found + headers = ['Security', 'Shares', 'Value', 'Weight', 'Change'] + + # Ensure we have the right number of columns + max_cols = max(len(row) for row in rows) if rows else len(headers) + headers = headers[:max_cols] + + # Pad headers if necessary + while len(headers) < max_cols: + headers.append(f'Column{len(headers)+1}') + + df = pd.DataFrame(rows, columns=headers[:len(rows[0])] if rows else headers) + + # Standardize column names + df = self._standardize_columns(df) + + return df + + def _standardize_columns(self, df: pd.DataFrame) -> pd.DataFrame: + """Standardize column names and data types""" + # If we already have standardized columns from JSON parsing, just ensure data types + if 'security_name' in df.columns and 'symbol' in df.columns: + # Already standardized from JSON + return df + + # Common column name mappings for HTML parsing + column_mapping = { + 'company': 'security_name', + 'name': 'security_name', + 'security': 'security_name', + 'issuer': 'security_name', + 'stock': 'security_name', + 'shares': 'shares', + 'quantity': 'shares', + 'value': 'market_value', + 'market value': 'market_value', + 'mkt val': 'market_value', + 'weight': 'portfolio_weight', + 'percent': 'portfolio_weight', + '%': 'portfolio_weight', + 'change': 'change_percent', + 'chg': 'change_percent', + 'sym': 'symbol', + 'ticker': 'symbol' + } + + # Rename columns + df.columns = [column_mapping.get(col.lower(), col.lower()) for col in df.columns] + + # Convert numeric columns + numeric_columns = ['shares', 'market_value', 'portfolio_weight', 'change_percent'] + for col in numeric_columns: + if col in df.columns: + df[col] = pd.to_numeric(df[col], errors='coerce') + + # Ensure required columns exist + required_columns = ['security_name', 'shares', 'market_value'] + for col in required_columns: + if col not in df.columns: + df[col] = None + + return df \ No newline at end of file diff --git a/scripts/guru_portfolio/modules/data_processor.py b/scripts/guru_portfolio/modules/data_processor.py new file mode 100644 index 0000000..f916df2 --- /dev/null +++ b/scripts/guru_portfolio/modules/data_processor.py @@ -0,0 +1,253 @@ +"""Data processor module for processing and analyzing portfolio data""" + +import pandas as pd +import numpy as np +from typing import Dict, List, Tuple + + +class DataProcessor: + """Processes and analyzes 13F portfolio data""" + + def __init__(self): + self.sector_mapping = self._load_sector_mapping() + + def process_portfolio_data(self, df: pd.DataFrame) -> pd.DataFrame: + """ + Process raw portfolio data for visualization + + Args: + df: Raw portfolio DataFrame + + Returns: + Processed DataFrame with additional calculated fields + """ + # Make a copy to avoid modifying original + processed_df = df.copy() + + # Calculate portfolio weight if not present + if 'portfolio_weight' not in processed_df.columns or processed_df['portfolio_weight'].isna().all(): + total_value = processed_df['market_value'].sum() + processed_df['portfolio_weight'] = (processed_df['market_value'] / total_value * 100) + + # Add sector information (simplified - in real world would use external data) + processed_df['sector'] = processed_df['security_name'].apply(self._guess_sector) + + # Calculate value in millions for better readability + processed_df['value_millions'] = processed_df['market_value'] / 1_000_000 + + # Add display labels + processed_df['display_label'] = processed_df.apply( + lambda row: f"{row['security_name']}
" + f"${row['value_millions']:.1f}M ({row['portfolio_weight']:.1f}%)", + axis=1 + ) + + # Sort by market value descending + processed_df = processed_df.sort_values('market_value', ascending=False) + + # Add rank + processed_df['rank'] = range(1, len(processed_df) + 1) + + return processed_df + + def calculate_portfolio_metrics(self, df: pd.DataFrame) -> Dict: + """ + Calculate key portfolio metrics + + Args: + df: Processed portfolio DataFrame + + Returns: + Dictionary with portfolio metrics + """ + metrics = { + 'total_value': df['market_value'].sum(), + 'total_positions': len(df), + 'top_10_concentration': df.head(10)['portfolio_weight'].sum(), + 'largest_position': { + 'name': df.iloc[0]['security_name'], + 'weight': df.iloc[0]['portfolio_weight'], + 'value': df.iloc[0]['market_value'] + }, + 'sector_breakdown': self._calculate_sector_breakdown(df), + 'concentration_metrics': self._calculate_concentration_metrics(df) + } + + return metrics + + def prepare_treemap_data(self, df: pd.DataFrame) -> pd.DataFrame: + """ + Prepare data specifically for treemap visualization + + Args: + df: Processed portfolio DataFrame + + Returns: + DataFrame formatted for treemap + """ + treemap_df = df.copy() + + # Filter out very small positions for clarity (optional) + # treemap_df = treemap_df[treemap_df['portfolio_weight'] >= 0.1] + + # Ensure all required columns exist + required_columns = ['security_name', 'sector', 'market_value', + 'portfolio_weight', 'display_label'] + + for col in required_columns: + if col not in treemap_df.columns: + if col == 'sector': + treemap_df[col] = 'Unknown' + else: + treemap_df[col] = '' + + return treemap_df + + def _load_sector_mapping(self) -> Dict[str, str]: + """Load sector mapping for common stocks""" + # Enhanced sector mapping - covers more companies + return { + # Technology + 'apple': 'Technology', + 'microsoft': 'Technology', + 'google': 'Technology', + 'alphabet': 'Technology', + 'meta': 'Technology', + 'facebook': 'Technology', + 'verisign': 'Technology', + 'amazon': 'Consumer Discretionary', # Amazon is classified as Consumer Discretionary + + # Financials + 'berkshire': 'Financials', + 'bank of america': 'Financials', + 'bank amer': 'Financials', + 'wells': 'Financials', + 'jpmorgan': 'Financials', + 'chase': 'Financials', + 'american express': 'Financials', + 'visa': 'Financials', + 'mastercard': 'Financials', + 'capital one': 'Financials', + 'moody': 'Financials', + 'moodys': 'Financials', + 'aon': 'Financials', + 'chubb': 'Financials', + 'insurance': 'Financials', + + # Energy + 'chevron': 'Energy', + 'exxon': 'Energy', + 'occidental': 'Energy', + 'petroleum': 'Energy', + 'conocophillips': 'Energy', + + # Consumer Staples + 'coca cola': 'Consumer Staples', + 'coca-cola': 'Consumer Staples', + 'pepsi': 'Consumer Staples', + 'kraft': 'Consumer Staples', + 'heinz': 'Consumer Staples', + 'kroger': 'Consumer Staples', + 'constellation brands': 'Consumer Staples', + 'procter': 'Consumer Staples', + 'p&g': 'Consumer Staples', + 'walmart': 'Consumer Staples', + 'costco': 'Consumer Staples', + + # Healthcare + 'johnson': 'Healthcare', + 'pfizer': 'Healthcare', + 'merck': 'Healthcare', + 'davita': 'Healthcare', + 'unitedhealth': 'Healthcare', + 'abbvie': 'Healthcare', + 'abbott': 'Healthcare', + 'eli lilly': 'Healthcare', + 'bristol': 'Healthcare', + 'cvs': 'Healthcare', + + # Consumer Discretionary + 'dominos': 'Consumer Discretionary', + 'pizza': 'Consumer Discretionary', + 'mcdonald': 'Consumer Discretionary', + 'starbucks': 'Consumer Discretionary', + 'nike': 'Consumer Discretionary', + 'home depot': 'Consumer Discretionary', + 'tesla': 'Consumer Discretionary', + + # Communication Services + 'sirius': 'Communication Services', + 'comcast': 'Communication Services', + 'disney': 'Communication Services', + 'netflix': 'Communication Services', + 'verizon': 'Communication Services', + 'at&t': 'Communication Services', + 't-mobile': 'Communication Services', + + # Industrials + 'boeing': 'Industrials', + 'caterpillar': 'Industrials', + 'general electric': 'Industrials', + 'honeywell': 'Industrials', + '3m': 'Industrials', + 'ups': 'Industrials', + 'fedex': 'Industrials', + } + + def _guess_sector(self, security_name: str) -> str: + """Guess sector based on security name""" + name_lower = security_name.lower() + + for keyword, sector in self.sector_mapping.items(): + if keyword in name_lower: + return sector + + # Default sectors based on common patterns + if any(word in name_lower for word in ['reit', 'property', 'real estate']): + return 'Real Estate' + elif any(word in name_lower for word in ['utility', 'electric', 'gas']): + return 'Utilities' + elif any(word in name_lower for word in ['telecom', 'communications']): + return 'Communication Services' + elif any(word in name_lower for word in ['materials', 'chemical', 'mining']): + return 'Materials' + elif any(word in name_lower for word in ['industrial', 'aerospace', 'defense']): + return 'Industrials' + + return 'Other' + + def _calculate_sector_breakdown(self, df: pd.DataFrame) -> Dict[str, float]: + """Calculate portfolio weight by sector""" + sector_weights = df.groupby('sector')['portfolio_weight'].sum().to_dict() + return dict(sorted(sector_weights.items(), key=lambda x: x[1], reverse=True)) + + def _calculate_concentration_metrics(self, df: pd.DataFrame) -> Dict: + """Calculate portfolio concentration metrics""" + weights = df['portfolio_weight'].values + + # Herfindahl-Hirschman Index (HHI) + hhi = np.sum(weights ** 2) + + # Effective number of positions + effective_n = 1 / hhi if hhi > 0 else 0 + + # Gini coefficient + gini = self._calculate_gini(weights) + + return { + 'hhi': hhi, + 'effective_positions': effective_n, + 'gini_coefficient': gini, + 'top_5_weight': df.head(5)['portfolio_weight'].sum(), + 'top_20_weight': df.head(20)['portfolio_weight'].sum() + } + + def _calculate_gini(self, weights: np.ndarray) -> float: + """Calculate Gini coefficient for concentration""" + # Sort weights + sorted_weights = np.sort(weights) + n = len(sorted_weights) + + # Calculate Gini + cumsum = np.cumsum(sorted_weights) + return (2 * np.sum((np.arange(1, n + 1) * sorted_weights))) / (n * cumsum[-1]) - (n + 1) / n \ No newline at end of file diff --git a/scripts/guru_portfolio/modules/performance_tracker.py b/scripts/guru_portfolio/modules/performance_tracker.py new file mode 100644 index 0000000..a713abd --- /dev/null +++ b/scripts/guru_portfolio/modules/performance_tracker.py @@ -0,0 +1,217 @@ +"""Performance tracker module for portfolio comparison with benchmarks""" + +import yfinance as yf +import pandas as pd +import numpy as np +from datetime import datetime, timedelta +from typing import Dict, List, Tuple, Optional +import warnings +warnings.filterwarnings('ignore') + + +class PerformanceTracker: + """Tracks portfolio performance and compares with benchmarks""" + + def __init__(self): + self.sp500_ticker = "^GSPC" # S&P 500 index ticker + + def fetch_sp500_data(self, start_date: str, end_date: str) -> pd.DataFrame: + """ + Fetch S&P 500 historical data + + Args: + start_date: Start date in format 'YYYY-MM-DD' + end_date: End date in format 'YYYY-MM-DD' + + Returns: + DataFrame with S&P 500 price data + """ + try: + sp500 = yf.download(self.sp500_ticker, start=start_date, end=end_date, progress=False) + sp500_df = pd.DataFrame({ + 'date': sp500.index, + 'sp500_close': sp500['Close'], + 'sp500_return': sp500['Close'].pct_change() + }).reset_index(drop=True) + + return sp500_df + except Exception as e: + raise Exception(f"Error fetching S&P 500 data: {str(e)}") + + def calculate_portfolio_performance(self, + holdings_history: List[pd.DataFrame], + dates: List[str]) -> pd.DataFrame: + """ + Calculate portfolio performance over time + + Args: + holdings_history: List of DataFrames with portfolio holdings for each quarter + dates: List of corresponding dates for each holdings snapshot + + Returns: + DataFrame with portfolio performance metrics + """ + portfolio_values = [] + + for holdings_df, date in zip(holdings_history, dates): + total_value = holdings_df['market_value'].sum() + portfolio_values.append({ + 'date': pd.to_datetime(date), + 'portfolio_value': total_value + }) + + portfolio_df = pd.DataFrame(portfolio_values) + portfolio_df = portfolio_df.sort_values('date').reset_index(drop=True) + + # Calculate returns + portfolio_df['portfolio_return'] = portfolio_df['portfolio_value'].pct_change() + + return portfolio_df + + def compare_performance(self, + portfolio_df: pd.DataFrame, + start_date: str, + end_date: str) -> pd.DataFrame: + """ + Compare portfolio performance with S&P 500 + + Args: + portfolio_df: DataFrame with portfolio performance data + start_date: Start date for comparison + end_date: End date for comparison + + Returns: + Combined DataFrame with portfolio and S&P 500 performance + """ + # Fetch S&P 500 data + sp500_df = self.fetch_sp500_data(start_date, end_date) + + # Normalize dates for merging + portfolio_df['date'] = pd.to_datetime(portfolio_df['date']) + sp500_df['date'] = pd.to_datetime(sp500_df['date']) + + # For quarterly portfolio data, we'll match to the nearest S&P 500 date + combined_data = [] + + for _, row in portfolio_df.iterrows(): + portfolio_date = row['date'] + + # Find nearest S&P 500 date + date_diffs = abs(sp500_df['date'] - portfolio_date) + nearest_idx = date_diffs.argmin() + sp500_row = sp500_df.iloc[nearest_idx] + + combined_data.append({ + 'date': portfolio_date, + 'portfolio_value': row['portfolio_value'], + 'portfolio_return': row.get('portfolio_return', 0), + 'sp500_close': sp500_row['sp500_close'], + 'sp500_return': sp500_row.get('sp500_return', 0) + }) + + comparison_df = pd.DataFrame(combined_data) + + # Calculate cumulative returns + comparison_df['portfolio_cum_return'] = (1 + comparison_df['portfolio_return'].fillna(0)).cumprod() - 1 + comparison_df['sp500_cum_return'] = (1 + comparison_df['sp500_return'].fillna(0)).cumprod() - 1 + + # Calculate normalized values (base 100) + if len(comparison_df) > 0: + comparison_df['portfolio_normalized'] = 100 * comparison_df['portfolio_value'] / comparison_df['portfolio_value'].iloc[0] + comparison_df['sp500_normalized'] = 100 * comparison_df['sp500_close'] / comparison_df['sp500_close'].iloc[0] + + return comparison_df + + def calculate_risk_metrics(self, comparison_df: pd.DataFrame) -> Dict[str, float]: + """ + Calculate risk-adjusted performance metrics + + Args: + comparison_df: DataFrame with portfolio and benchmark performance + + Returns: + Dictionary with risk metrics + """ + metrics = {} + + # Remove NaN values for calculations + portfolio_returns = comparison_df['portfolio_return'].dropna() + sp500_returns = comparison_df['sp500_return'].dropna() + + if len(portfolio_returns) > 1: + # Volatility (annualized) + metrics['portfolio_volatility'] = portfolio_returns.std() * np.sqrt(4) # Quarterly to annual + metrics['sp500_volatility'] = sp500_returns.std() * np.sqrt(4) + + # Total return + metrics['portfolio_total_return'] = comparison_df['portfolio_cum_return'].iloc[-1] * 100 + metrics['sp500_total_return'] = comparison_df['sp500_cum_return'].iloc[-1] * 100 + + # Relative performance + metrics['relative_performance'] = metrics['portfolio_total_return'] - metrics['sp500_total_return'] + + # Sharpe ratio (assuming 0% risk-free rate for simplicity) + portfolio_avg_return = portfolio_returns.mean() * 4 # Annualized + sp500_avg_return = sp500_returns.mean() * 4 + + if metrics['portfolio_volatility'] > 0: + metrics['portfolio_sharpe'] = portfolio_avg_return / metrics['portfolio_volatility'] + else: + metrics['portfolio_sharpe'] = 0 + + if metrics['sp500_volatility'] > 0: + metrics['sp500_sharpe'] = sp500_avg_return / metrics['sp500_volatility'] + else: + metrics['sp500_sharpe'] = 0 + + # Beta (if enough data points) + if len(portfolio_returns) >= 4: + covariance = np.cov(portfolio_returns, sp500_returns)[0, 1] + sp500_variance = np.var(sp500_returns) + if sp500_variance > 0: + metrics['portfolio_beta'] = covariance / sp500_variance + else: + metrics['portfolio_beta'] = 1.0 + else: + metrics['portfolio_beta'] = None + + return metrics + + def simulate_portfolio_from_holdings(self, current_holdings: pd.DataFrame, + lookback_quarters: int = 4) -> List[Tuple[str, pd.DataFrame]]: + """ + Simulate historical portfolio values based on current holdings + This is a simplified approach that assumes the same holdings over time + + Args: + current_holdings: Current portfolio holdings + lookback_quarters: Number of quarters to look back + + Returns: + List of (date, holdings) tuples + """ + holdings_history = [] + current_date = datetime.now() + + # Generate quarterly dates going back + for i in range(lookback_quarters + 1): + # Calculate date for each quarter + months_back = i * 3 + quarter_date = current_date - timedelta(days=months_back * 30) + date_str = quarter_date.strftime('%Y-%m-%d') + + # For simulation, we'll adjust values based on S&P 500 performance + # This is a simplified approach + simulated_holdings = current_holdings.copy() + + # Adjust market values based on how far back we're looking + # This is a rough approximation + adjustment_factor = 1 - (i * 0.02) # Assume 2% growth per quarter + simulated_holdings['market_value'] = simulated_holdings['market_value'] * adjustment_factor + + holdings_history.append((date_str, simulated_holdings)) + + # Reverse to have chronological order + holdings_history.reverse() + + return holdings_history \ No newline at end of file diff --git a/scripts/guru_portfolio/modules/report_generator.py b/scripts/guru_portfolio/modules/report_generator.py new file mode 100644 index 0000000..3315598 --- /dev/null +++ b/scripts/guru_portfolio/modules/report_generator.py @@ -0,0 +1,227 @@ +"""Report generator module for creating portfolio analysis reports""" + +import pandas as pd +from datetime import datetime +from typing import Dict, Any +import os + + +class ReportGenerator: + """Generates markdown reports for portfolio analysis""" + + def __init__(self): + self.report_template = """# 13F Portfolio Analysis Report + +**Company**: {company} +**Quarter**: {quarter} +**Generated**: {generated_date} + +--- + +## Executive Summary + +- **Total Portfolio Value**: ${total_value:,.0f} +- **Number of Positions**: {total_positions} +- **Top 10 Concentration**: {top_10_concentration:.1f}% +- **Largest Position**: {largest_position_name} ({largest_position_weight:.1f}%) + +## Portfolio Metrics + +### Concentration Analysis +- **Top 5 Holdings Weight**: {top_5_weight:.1f}% +- **Top 20 Holdings Weight**: {top_20_weight:.1f}% +- **Herfindahl-Hirschman Index (HHI)**: {hhi:.4f} +- **Effective Number of Positions**: {effective_positions:.1f} +- **Gini Coefficient**: {gini:.3f} + +### Sector Allocation + +{sector_table} + +## Top 20 Holdings + +{holdings_table} + +## Visualizations + +The following visualizations have been generated: + +1. **Portfolio Treemap** (`portfolio_treemap.png`): Treemap showing all holdings sized by market value +2. **Sector Allocation Pie Chart** (`sector_allocation.png`): Breakdown of portfolio by sector +3. **Top Holdings Bar Chart** (`top_holdings.png`): Bar chart of top 20 holdings by weight + +### Portfolio Treemap +![Portfolio Treemap](portfolio_treemap.png) + +### Sector Allocation +![Sector Allocation](sector_allocation.png) + +### Top Holdings +![Top Holdings](top_holdings.png) + +## Analysis Notes + +{analysis_notes} + +--- + +*This report was automatically generated from 13F filing data available at [13f.info](https://13f.info)* +""" + + def generate_report(self, + company: str, + quarter: str, + processed_df: pd.DataFrame, + metrics: Dict[str, Any], + output_path: str = "portfolio_analysis.md") -> str: + """ + Generate a comprehensive markdown report + + Args: + company: Company name + quarter: Quarter (e.g., "Q1 2025") + processed_df: Processed portfolio DataFrame + metrics: Portfolio metrics dictionary + output_path: Path to save the report + + Returns: + Path to the generated report + """ + # Generate sector table + sector_table = self._generate_sector_table(metrics['sector_breakdown']) + + # Generate holdings table + holdings_table = self._generate_holdings_table(processed_df.head(20)) + + # Generate analysis notes + analysis_notes = self._generate_analysis_notes(metrics, processed_df) + + # Format the report + report_content = self.report_template.format( + company=company, + quarter=quarter, + generated_date=datetime.now().strftime("%Y-%m-%d %H:%M:%S"), + total_value=metrics['total_value'], + total_positions=metrics['total_positions'], + top_10_concentration=metrics['top_10_concentration'], + largest_position_name=metrics['largest_position']['name'], + largest_position_weight=metrics['largest_position']['weight'], + top_5_weight=metrics['concentration_metrics']['top_5_weight'], + top_20_weight=metrics['concentration_metrics']['top_20_weight'], + hhi=metrics['concentration_metrics']['hhi'], + effective_positions=metrics['concentration_metrics']['effective_positions'], + gini=metrics['concentration_metrics']['gini_coefficient'], + sector_table=sector_table, + holdings_table=holdings_table, + analysis_notes=analysis_notes + ) + + # Save the report + with open(output_path, 'w', encoding='utf-8') as f: + f.write(report_content) + + return output_path + + def _generate_sector_table(self, sector_breakdown: Dict[str, float]) -> str: + """Generate markdown table for sector breakdown""" + table = "| Sector | Weight (%) |\n" + table += "|--------|------------|\n" + + for sector, weight in sector_breakdown.items(): + table += f"| {sector} | {weight:.2f}% |\n" + + return table + + def _generate_holdings_table(self, top_holdings: pd.DataFrame) -> str: + """Generate markdown table for top holdings""" + table = "| Rank | Security | Shares | Value | Weight (%) |\n" + table += "|------|----------|--------|-------|------------|\n" + + for _, row in top_holdings.iterrows(): + table += (f"| {row['rank']} | {row['security_name']} | " + f"{row['shares']:,.0f} | ${row['value_millions']:.1f}M | " + f"{row['portfolio_weight']:.2f}% |\n") + + return table + + def _generate_analysis_notes(self, metrics: Dict[str, Any], df: pd.DataFrame) -> str: + """Generate analytical insights""" + notes = [] + + # Concentration analysis + hhi = metrics['concentration_metrics']['hhi'] + if hhi > 0.15: + notes.append("- **High Concentration**: The portfolio shows high concentration " + f"(HHI = {hhi:.3f}), indicating significant weight in top positions.") + elif hhi < 0.05: + notes.append("- **Well Diversified**: The portfolio is well-diversified " + f"(HHI = {hhi:.3f}), with no single position dominating.") + + # Sector concentration + top_sector = max(metrics['sector_breakdown'].items(), key=lambda x: x[1]) + if top_sector[1] > 40: + notes.append(f"- **Sector Concentration**: {top_sector[0]} sector represents " + f"{top_sector[1]:.1f}% of the portfolio, showing significant sector bet.") + + # Position count analysis + if metrics['total_positions'] < 20: + notes.append("- **Focused Portfolio**: With fewer than 20 positions, this is " + "a highly focused investment strategy.") + elif metrics['total_positions'] > 100: + notes.append("- **Broad Diversification**: The portfolio contains over 100 positions, " + "indicating a broadly diversified approach.") + + # Top holding analysis + if metrics['largest_position']['weight'] > 20: + notes.append(f"- **Dominant Position**: {metrics['largest_position']['name']} " + f"represents {metrics['largest_position']['weight']:.1f}% of the portfolio.") + + return "\n".join(notes) if notes else "No significant concentration issues identified." + + def generate_summary_report(self, + company: str, + quarter: str, + error_message: str = None) -> str: + """ + Generate a simple error report if data fetching fails + + Args: + company: Company name + quarter: Quarter + error_message: Error message to include + + Returns: + Error report content + """ + report = f"""# 13F Portfolio Analysis Report - Error + +**Company**: {company} +**Quarter**: {quarter} +**Generated**: {datetime.now().strftime("%Y-%m-%d %H:%M:%S")} + +--- + +## Error + +Unable to fetch portfolio data for the specified company and quarter. + +**Error Details**: {error_message} + +## Possible Reasons + +1. The company name might not exactly match the name in the 13F database +2. The specified quarter might not have been filed yet +3. The company might not be required to file 13F reports +4. Network connection issues + +## Suggestions + +- Try searching for the company on [13f.info](https://13f.info) to find the exact name +- Verify that the quarter has been filed (13F reports are filed 45 days after quarter end) +- Check if the company is an institutional investment manager with >$100M AUM + +--- + +*This report was automatically generated* +""" + return report \ No newline at end of file diff --git a/scripts/guru_portfolio/modules/visualizer.py b/scripts/guru_portfolio/modules/visualizer.py new file mode 100644 index 0000000..8c8a8d9 --- /dev/null +++ b/scripts/guru_portfolio/modules/visualizer.py @@ -0,0 +1,544 @@ +"""Visualizer module for creating portfolio visualizations""" + +import plotly.graph_objects as go +import plotly.express as px +import pandas as pd +from typing import Dict, Optional +import plotly.io as pio + + +class Visualizer: + """Creates visualizations for portfolio data""" + + def __init__(self): + # Set default theme + pio.templates.default = "plotly_white" + + # Color scheme for sectors + self.sector_colors = { + 'Technology': '#1f77b4', + 'Financials': '#ff7f0e', + 'Healthcare': '#2ca02c', + 'Consumer Discretionary': '#d62728', + 'Consumer Staples': '#9467bd', + 'Energy': '#8c564b', + 'Industrials': '#e377c2', + 'Materials': '#7f7f7f', + 'Real Estate': '#bcbd22', + 'Utilities': '#17becf', + 'Communication Services': '#ff9896', + 'Other': '#aec7e8', + 'Other (<1% each)': '#aec7e8' + } + + def create_treemap(self, df: pd.DataFrame, title: str = "Portfolio Holdings Treemap") -> go.Figure: + """ + Create an interactive treemap visualization of portfolio holdings + + Args: + df: Processed portfolio DataFrame + title: Chart title + + Returns: + Plotly Figure object + """ + # Prepare data for treemap + df_treemap = df.copy() + + # Sort by market value for better visualization + df_treemap = df_treemap.sort_values('market_value', ascending=False) + + # Create color scale based on sector + unique_sectors = df_treemap['sector'].unique() + sector_color_map = {sector: self.sector_colors.get(sector, '#999999') + for sector in unique_sectors} + + # Assign colors based on sector + colors = [sector_color_map[sector] for sector in df_treemap['sector']] + + fig = go.Figure(go.Treemap( + labels=df_treemap['security_name'].tolist(), + parents=[''] * len(df_treemap), # No hierarchy, all at root level + values=df_treemap['market_value'].tolist(), + text=[f"{row['security_name']}
" + f"{row['sector']}
" + f"${row['value_millions']:.1f}M
" + f"{row['portfolio_weight']:.2f}%" + for _, row in df_treemap.iterrows()], + textinfo="text", + hovertemplate='%{label}
' + + 'Sector: %{customdata}
' + + 'Value: $%{value:,.0f}
' + + 'Weight: %{text}%
' + + '', + customdata=df_treemap['sector'].tolist(), + marker=dict( + colors=colors, + line=dict(width=2, color='white') + ), + # Add portfolio weight as hover text + hovertext=[f"{weight:.2f}" for weight in df_treemap['portfolio_weight']] + )) + + # Update layout + fig.update_layout( + title={ + 'text': title, + 'x': 0.5, + 'xanchor': 'center', + 'font': {'size': 24} + }, + margin=dict(t=80, l=10, r=10, b=10), + height=800, + width=1200 + ) + + return fig + + def create_sector_pie_chart(self, df: pd.DataFrame) -> go.Figure: + """ + Create a pie chart showing sector allocation + + Args: + df: Processed portfolio DataFrame + + Returns: + Plotly Figure object + """ + # Aggregate by sector + sector_data = df.groupby('sector').agg({ + 'market_value': 'sum', + 'portfolio_weight': 'sum' + }).reset_index() + + # Sort by weight + sector_data = sector_data.sort_values('portfolio_weight', ascending=False) + + # Group sectors with less than 1% into "Other" + other_threshold = 1.0 # 1% threshold + + # Separate major and minor sectors + major_sectors = sector_data[sector_data['portfolio_weight'] >= other_threshold].copy() + minor_sectors = sector_data[sector_data['portfolio_weight'] < other_threshold] + + # If there are minor sectors, combine them into "Other" + if len(minor_sectors) > 0: + other_row = pd.DataFrame({ + 'sector': ['Other (<1% each)'], + 'market_value': [minor_sectors['market_value'].sum()], + 'portfolio_weight': [minor_sectors['portfolio_weight'].sum()] + }) + sector_data = pd.concat([major_sectors, other_row], ignore_index=True) + else: + sector_data = major_sectors + + # Sort again by weight + sector_data = sector_data.sort_values('portfolio_weight', ascending=False) + + # Create colors list + colors = [self.sector_colors.get(sector, '#999999') for sector in sector_data['sector']] + + fig = go.Figure(data=[go.Pie( + labels=sector_data['sector'], + values=sector_data['market_value'], + text=[f"{sector}
{weight:.1f}%" + for sector, weight in zip(sector_data['sector'], sector_data['portfolio_weight'])], + textposition='inside', + textinfo='text', + insidetextorientation='radial', + marker=dict(colors=colors, line=dict(color='white', width=2)), + hovertemplate='%{label}
' + + 'Value: $%{value:,.0f}
' + + 'Weight: %{percent}
' + + '' + )]) + + fig.update_layout( + title={ + 'text': 'Portfolio Allocation by Sector', + 'x': 0.5, + 'xanchor': 'center', + 'font': {'size': 20} + }, + height=600, + width=800, + showlegend=True, + legend=dict( + orientation="v", + yanchor="middle", + y=0.5, + xanchor="left", + x=1.05 + ), + font=dict(size=14) + ) + + return fig + + def create_concentration_chart(self, df: pd.DataFrame, top_n: int = 20) -> go.Figure: + """ + Create a bar chart showing top holdings + + Args: + df: Processed portfolio DataFrame + top_n: Number of top holdings to show + + Returns: + Plotly Figure object + """ + # Get top N holdings + top_holdings = df.head(top_n).copy() + + # Reverse order for better visualization + top_holdings = top_holdings.iloc[::-1] + + fig = go.Figure() + + # Add bar trace + fig.add_trace(go.Bar( + x=top_holdings['portfolio_weight'], + y=top_holdings['security_name'], + orientation='h', + text=[f"{weight:.2f}%" for weight in top_holdings['portfolio_weight']], + textposition='outside', + marker=dict( + color=top_holdings['portfolio_weight'], + colorscale='Blues', + showscale=False + ), + hovertemplate='%{y}
' + + 'Weight: %{x:.2f}%
' + + 'Value: $%{customdata:,.0f}
' + + '', + customdata=top_holdings['market_value'] + )) + + # Update layout + fig.update_layout( + title={ + 'text': f'Top {top_n} Holdings by Weight', + 'x': 0.5, + 'xanchor': 'center', + 'font': {'size': 20} + }, + xaxis=dict( + title='Portfolio Weight (%)', + tickformat='.1f', + range=[0, max(top_holdings['portfolio_weight']) * 1.1] + ), + yaxis=dict( + title='', + tickmode='linear' + ), + height=max(600, top_n * 30), + width=1000, + margin=dict(l=200, r=50, t=80, b=50) + ) + + return fig + + def save_visualizations(self, figures: Dict[str, go.Figure], output_dir: str = ".", save_html: bool = False): + """ + Save all figures as PNG images (and optionally as HTML) + + Args: + figures: Dictionary of figure names and Figure objects + output_dir: Directory to save outputs + save_html: Whether to also save as interactive HTML (default: False) + """ + import os + + # Create output directory if it doesn't exist + os.makedirs(output_dir, exist_ok=True) + + for name, fig in figures.items(): + # Save as PNG (primary format) + png_path = os.path.join(output_dir, f"{name}.png") + try: + # High quality PNG export + fig.write_image( + png_path, + width=1200, + height=800, + scale=2, # 2x resolution for better quality + engine='kaleido' + ) + print(f"βœ… Saved {name}.png") + except Exception as e: + print(f"❌ Error saving {name}.png: {e}") + print("Make sure kaleido is installed: pip install kaleido") + + # Optionally save as interactive HTML + if save_html: + html_path = os.path.join(output_dir, f"{name}.html") + fig.write_html(html_path) + print(f"βœ… Saved {name}.html") + + def create_performance_comparison_chart(self, comparison_df: pd.DataFrame, + company_name: str = "Portfolio") -> go.Figure: + """ + Create a line chart comparing portfolio performance with S&P 500 + + Args: + comparison_df: DataFrame with portfolio and S&P 500 performance data + company_name: Name of the company/portfolio + + Returns: + Plotly Figure object + """ + fig = go.Figure() + + # Add portfolio line + fig.add_trace(go.Scatter( + x=comparison_df['date'], + y=comparison_df['portfolio_normalized'], + mode='lines+markers', + name=company_name, + line=dict(color='#1f77b4', width=3), + marker=dict(size=8), + hovertemplate='%{fullData.name}
' + + 'Date: %{x}
' + + 'Value: %{y:.2f}
' + + '' + )) + + # Add S&P 500 line + fig.add_trace(go.Scatter( + x=comparison_df['date'], + y=comparison_df['sp500_normalized'], + mode='lines+markers', + name='S&P 500', + line=dict(color='#ff7f0e', width=3), + marker=dict(size=8), + hovertemplate='%{fullData.name}
' + + 'Date: %{x}
' + + 'Value: %{y:.2f}
' + + '' + )) + + # Update layout + fig.update_layout( + title={ + 'text': f'{company_name} vs S&P 500 Performance Comparison', + 'x': 0.5, + 'xanchor': 'center', + 'font': {'size': 20} + }, + xaxis=dict( + title='Date', + tickformat='%Y-%m-%d', + showgrid=True, + gridwidth=1, + gridcolor='lightgray' + ), + yaxis=dict( + title='Normalized Value (Base 100)', + showgrid=True, + gridwidth=1, + gridcolor='lightgray' + ), + hovermode='x unified', + showlegend=True, + legend=dict( + x=0.02, + y=0.98, + xanchor='left', + yanchor='top', + bgcolor='rgba(255, 255, 255, 0.8)', + bordercolor='black', + borderwidth=1 + ), + plot_bgcolor='white', + height=600, + width=1000 + ) + + # Add annotation for final values + if len(comparison_df) > 0: + last_row = comparison_df.iloc[-1] + portfolio_final = last_row['portfolio_normalized'] + sp500_final = last_row['sp500_normalized'] + + # Determine which performed better + if portfolio_final > sp500_final: + annotation_text = f"{company_name} outperformed S&P 500 by {portfolio_final - sp500_final:.1f}%" + annotation_color = 'green' + else: + annotation_text = f"S&P 500 outperformed {company_name} by {sp500_final - portfolio_final:.1f}%" + annotation_color = 'red' + + fig.add_annotation( + x=0.5, + y=1.1, + xref='paper', + yref='paper', + text=annotation_text, + showarrow=False, + font=dict(size=14, color=annotation_color), + xanchor='center' + ) + + return fig + + def create_returns_comparison_chart(self, comparison_df: pd.DataFrame, + company_name: str = "Portfolio") -> go.Figure: + """ + Create a bar chart comparing quarterly returns + + Args: + comparison_df: DataFrame with portfolio and S&P 500 performance data + company_name: Name of the company/portfolio + + Returns: + Plotly Figure object + """ + # Filter out the first row (no return data) + returns_df = comparison_df[comparison_df['portfolio_return'].notna()].copy() + + if len(returns_df) == 0: + raise ValueError("Not enough data to create returns comparison") + + fig = go.Figure() + + # Add portfolio returns bars + fig.add_trace(go.Bar( + x=returns_df['date'], + y=returns_df['portfolio_return'] * 100, + name=company_name, + marker_color='#1f77b4', + opacity=0.8, + hovertemplate='%{fullData.name}
' + + 'Date: %{x}
' + + 'Return: %{y:.2f}%
' + + '' + )) + + # Add S&P 500 returns bars + fig.add_trace(go.Bar( + x=returns_df['date'], + y=returns_df['sp500_return'] * 100, + name='S&P 500', + marker_color='#ff7f0e', + opacity=0.8, + hovertemplate='%{fullData.name}
' + + 'Date: %{x}
' + + 'Return: %{y:.2f}%
' + + '' + )) + + # Update layout + fig.update_layout( + title={ + 'text': 'Quarterly Returns Comparison', + 'x': 0.5, + 'xanchor': 'center', + 'font': {'size': 20} + }, + xaxis=dict( + title='Quarter', + tickformat='%Y-%m-%d' + ), + yaxis=dict( + title='Return (%)', + tickformat='.1f' + ), + barmode='group', + showlegend=True, + legend=dict( + x=0.02, + y=0.98, + xanchor='left', + yanchor='top' + ), + height=500, + width=1000 + ) + + # Add zero line + fig.add_hline(y=0, line_dash="dash", line_color="gray", opacity=0.5) + + return fig + + def create_risk_metrics_chart(self, metrics: Dict[str, float], + company_name: str = "Portfolio") -> go.Figure: + """ + Create a comparison chart for risk metrics + + Args: + metrics: Dictionary with risk metrics + company_name: Name of the company/portfolio + + Returns: + Plotly Figure object + """ + # Prepare data for visualization + metric_names = ['Total Return (%)', 'Volatility (%)', 'Sharpe Ratio'] + portfolio_values = [ + metrics.get('portfolio_total_return', 0), + metrics.get('portfolio_volatility', 0) * 100, + metrics.get('portfolio_sharpe', 0) + ] + sp500_values = [ + metrics.get('sp500_total_return', 0), + metrics.get('sp500_volatility', 0) * 100, + metrics.get('sp500_sharpe', 0) + ] + + fig = go.Figure() + + # Add portfolio metrics + fig.add_trace(go.Bar( + x=metric_names, + y=portfolio_values, + name=company_name, + marker_color='#1f77b4', + text=[f'{v:.2f}' for v in portfolio_values], + textposition='outside' + )) + + # Add S&P 500 metrics + fig.add_trace(go.Bar( + x=metric_names, + y=sp500_values, + name='S&P 500', + marker_color='#ff7f0e', + text=[f'{v:.2f}' for v in sp500_values], + textposition='outside' + )) + + # Update layout + fig.update_layout( + title={ + 'text': 'Risk-Adjusted Performance Metrics', + 'x': 0.5, + 'xanchor': 'center', + 'font': {'size': 20} + }, + xaxis=dict(title=''), + yaxis=dict(title='Value'), + barmode='group', + showlegend=True, + legend=dict( + x=0.02, + y=0.98, + xanchor='left', + yanchor='top' + ), + height=500, + width=800 + ) + + # Add beta as annotation if available + if metrics.get('portfolio_beta') is not None: + fig.add_annotation( + x=0.5, + y=1.05, + xref='paper', + yref='paper', + text=f"Portfolio Beta: {metrics['portfolio_beta']:.2f}", + showarrow=False, + font=dict(size=14), + xanchor='center' + ) + + return fig \ No newline at end of file diff --git a/scripts/guru_portfolio/output/error_report.md b/scripts/guru_portfolio/output/error_report.md new file mode 100644 index 0000000..a228c02 --- /dev/null +++ b/scripts/guru_portfolio/output/error_report.md @@ -0,0 +1,30 @@ +# 13F Portfolio Analysis Report - Error + +**Company**: Berkshire Hathaway Inc +**Quarter**: Q1 2025 +**Generated**: 2025-07-05 23:02:13 + +--- + +## Error + +Unable to fetch portfolio data for the specified company and quarter. + +**Error Details**: Quarter 'Q1 2025' not found for Berkshire Hathaway Inc + +## Possible Reasons + +1. The company name might not exactly match the name in the 13F database +2. The specified quarter might not have been filed yet +3. The company might not be required to file 13F reports +4. Network connection issues + +## Suggestions + +- Try searching for the company on [13f.info](https://13f.info) to find the exact name +- Verify that the quarter has been filed (13F reports are filed 45 days after quarter end) +- Check if the company is an institutional investment manager with >$100M AUM + +--- + +*This report was automatically generated* diff --git a/scripts/guru_portfolio/output/portfolio_analysis.md b/scripts/guru_portfolio/output/portfolio_analysis.md new file mode 100644 index 0000000..65c6bec --- /dev/null +++ b/scripts/guru_portfolio/output/portfolio_analysis.md @@ -0,0 +1,89 @@ +# 13F Portfolio Analysis Report + +**Company**: Berkshire Hathaway Inc +**Quarter**: Q1 2025 +**Generated**: 2025-07-05 23:38:16 + +--- + +## Executive Summary + +- **Total Portfolio Value**: $258,701,128,000 +- **Number of Positions**: 36 +- **Top 10 Concentration**: 89.2% +- **Largest Position**: APPLE INC (25.8%) + +## Portfolio Metrics + +### Concentration Analysis +- **Top 5 Holdings Weight**: 70.6% +- **Top 20 Holdings Weight**: 97.9% +- **Herfindahl-Hirschman Index (HHI)**: 1285.2800 +- **Effective Number of Positions**: 0.0 +- **Gini Coefficient**: 0.757 + +### Sector Allocation + +| Sector | Weight (%) | +|--------|------------| +| Financials | 36.60% | +| Technology | 27.10% | +| Consumer Staples | 17.10% | +| Energy | 12.80% | +| Healthcare | 2.10% | +| Communication Services | 1.70% | +| Other | 1.40% | +| Consumer Discretionary | 1.20% | + + +## Top 20 Holdings + +| Rank | Security | Shares | Value | Weight (%) | +|------|----------|--------|-------|------------| +| 1 | APPLE INC | 300,000,000 | $66639.0M | 25.80% | +| 2 | AMERICAN EXPRESS CO | 151,610,700 | $40790.9M | 15.80% | +| 3 | COCA COLA CO | 400,000,000 | $28648.0M | 11.10% | +| 4 | BANK AMER CORP | 631,573,531 | $26355.6M | 10.20% | +| 5 | CHEVRON CORP NEW | 118,610,534 | $19842.4M | 7.70% | +| 6 | OCCIDENTAL PETE CORP | 264,941,431 | $13077.5M | 5.10% | +| 7 | MOODYS CORP | 24,669,778 | $11488.5M | 4.40% | +| 8 | KRAFT HEINZ CO | 325,634,818 | $9909.1M | 3.80% | +| 9 | CHUBB LIMITED | 27,033,784 | $8163.9M | 3.20% | +| 10 | DAVITA INC | 35,142,479 | $5375.7M | 2.10% | +| 11 | KROGER CO | 50,000,000 | $3384.5M | 1.30% | +| 12 | VERISIGN INC | 13,289,880 | $3373.9M | 1.30% | +| 13 | VISA INC | 8,297,460 | $2907.9M | 1.10% | +| 14 | SIRIUS XM HOLDINGS INC | 119,776,692 | $2700.4M | 1.00% | +| 15 | CONSTELLATION BRANDS INC | 12,009,000 | $2203.9M | 0.90% | +| 16 | MASTERCARD INC | 3,986,648 | $2185.2M | 0.80% | +| 17 | AMAZON COM INC | 10,000,000 | $1902.6M | 0.70% | +| 18 | AON PLC | 4,100,000 | $1636.3M | 0.60% | +| 19 | CAPITAL ONE FINL CORP | 7,150,000 | $1282.0M | 0.50% | +| 20 | DOMINOS PIZZA INC | 2,620,613 | $1204.0M | 0.50% | + + +## Visualizations + +The following visualizations have been generated: + +1. **Portfolio Treemap** (`portfolio_treemap.png`): Treemap showing all holdings sized by market value +2. **Sector Allocation Pie Chart** (`sector_allocation.png`): Breakdown of portfolio by sector +3. **Top Holdings Bar Chart** (`top_holdings.png`): Bar chart of top 20 holdings by weight + +### Portfolio Treemap +![Portfolio Treemap](portfolio_treemap.png) + +### Sector Allocation +![Sector Allocation](sector_allocation.png) + +### Top Holdings +![Top Holdings](top_holdings.png) + +## Analysis Notes + +- **High Concentration**: The portfolio shows high concentration (HHI = 1285.280), indicating significant weight in top positions. +- **Dominant Position**: APPLE INC represents 25.8% of the portfolio. + +--- + +*This report was automatically generated from 13F filing data available at [13f.info](https://13f.info)* diff --git a/scripts/guru_portfolio/output/portfolio_treemap.png b/scripts/guru_portfolio/output/portfolio_treemap.png new file mode 100644 index 0000000..a1d8f56 Binary files /dev/null and b/scripts/guru_portfolio/output/portfolio_treemap.png differ diff --git a/scripts/guru_portfolio/output/sector_allocation.png b/scripts/guru_portfolio/output/sector_allocation.png new file mode 100644 index 0000000..26e0cdc Binary files /dev/null and b/scripts/guru_portfolio/output/sector_allocation.png differ diff --git a/scripts/guru_portfolio/output/top_holdings.png b/scripts/guru_portfolio/output/top_holdings.png new file mode 100644 index 0000000..5da3536 Binary files /dev/null and b/scripts/guru_portfolio/output/top_holdings.png differ diff --git a/scripts/guru_portfolio/pyproject.toml b/scripts/guru_portfolio/pyproject.toml new file mode 100644 index 0000000..129a0b0 --- /dev/null +++ b/scripts/guru_portfolio/pyproject.toml @@ -0,0 +1,59 @@ +[project] +name = "13f-portfolio-visualization" +version = "0.1.0" +description = "13F Portfolio Data Extraction and Visualization Tool" +readme = "README.md" +requires-python = ">=3.8" +authors = [ + {name = "Finance Guru Team", email = "team@financeguru.com"} +] +keywords = ["finance", "portfolio", "visualization", "13f", "investment"] +classifiers = [ + "Development Status :: 3 - Alpha", + "Intended Audience :: Financial and Insurance Industry", + "Topic :: Office/Business :: Financial :: Investment", + "Programming Language :: Python :: 3", + "Programming Language :: Python :: 3.8", + "Programming Language :: Python :: 3.9", + "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3.11", +] + +dependencies = [ + "pandas>=2.0.0", + "plotly>=5.14.0", + "numpy>=1.24.0", + "requests>=2.28.0", + "beautifulsoup4>=4.11.0", + "lxml>=4.9.0", + "kaleido>=0.2.1", + "yfinance>=0.2.18", +] + +[project.optional-dependencies] +dev = [ + "pytest>=7.0.0", + "black>=23.0.0", + "flake8>=6.0.0", + "mypy>=1.0.0", +] + +[project.scripts] +portfolio-viz = "main:main" + +[build-system] +requires = ["setuptools>=61.0", "wheel"] +build-backend = "setuptools.build_meta" + +[tool.setuptools.packages.find] +include = ["modules*"] + +[tool.black] +line-length = 100 +target-version = ['py38', 'py39', 'py310', 'py311'] + +[tool.mypy] +python_version = "3.8" +warn_return_any = true +warn_unused_configs = true +ignore_missing_imports = true \ No newline at end of file diff --git a/scripts/guru_portfolio/test_performance_comparison.py b/scripts/guru_portfolio/test_performance_comparison.py new file mode 100644 index 0000000..307ac25 --- /dev/null +++ b/scripts/guru_portfolio/test_performance_comparison.py @@ -0,0 +1,80 @@ +#!/usr/bin/env python3 +""" +Test script for portfolio performance comparison feature +""" + +import subprocess +import sys +import os + +def test_performance_comparison(): + """Test the performance comparison feature with a well-known fund""" + + print("πŸ§ͺ Testing Portfolio Performance Comparison Feature") + print("="*60) + + # Test cases with different companies + test_cases = [ + { + "company": "Berkshire Hathaway Inc", + "quarter": "Q3 2024", + "lookback": 4, + "description": "Warren Buffett's Berkshire Hathaway" + }, + { + "company": "ARK Invest", + "quarter": "Q3 2024", + "lookback": 8, + "description": "Cathie Wood's ARK Invest (8 quarters lookback)" + } + ] + + for i, test in enumerate(test_cases, 1): + print(f"\nπŸ“Š Test Case {i}: {test['description']}") + print("-"*50) + + # Prepare command + cmd = [ + sys.executable, + "main.py", + test["company"], + test["quarter"], + "--compare-sp500", + "--lookback-quarters", str(test["lookback"]), + "--output-dir", f"test_output_{i}", + "--save-html" + ] + + print(f"Running: {' '.join(cmd)}") + + try: + # Run the command + result = subprocess.run(cmd, capture_output=True, text=True) + + if result.returncode == 0: + print("βœ… Test passed!") + print("\nOutput files created:") + output_dir = f"test_output_{i}" + if os.path.exists(output_dir): + for file in sorted(os.listdir(output_dir)): + print(f" - {file}") + else: + print("❌ Test failed!") + print(f"Error: {result.stderr}") + + except Exception as e: + print(f"❌ Exception occurred: {e}") + + print("\n" + "="*60) + print("πŸŽ‰ Performance comparison feature testing completed!") + print("\nTo view the results, check the test_output_* directories") + print("The performance_comparison.png files show the portfolio vs S&P 500 comparison") + +if __name__ == "__main__": + # First, ensure dependencies are installed + print("πŸ“¦ Installing dependencies...") + subprocess.run([sys.executable, "-m", "pip", "install", "-e", "."], + capture_output=True) + + # Run the tests + test_performance_comparison() \ No newline at end of file