Skip to content

Commit 38713ca

Browse files
committed
Changes to config generation
1 parent 863013e commit 38713ca

File tree

4 files changed

+626
-61
lines changed

4 files changed

+626
-61
lines changed

config_generator.py

Lines changed: 137 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -69,7 +69,7 @@
6969

7070

7171
# Constants
72-
EXTENSIONS = ['v', 'sv', 'vhdl', 'vhd']
72+
EXTENSIONS = ['v', 'sv', 'vhdl', 'vhd'] # Note: .vm files are FPGA netlists, not RTL
7373
DESTINATION_DIR = './temp'
7474
UTILITY_PATTERNS = (
7575
"gen_", "dff", "buf", "full_handshake", "fifo", "mux", "regfile"
@@ -521,13 +521,16 @@ def rank_top_candidates(module_graph, module_graph_inverse, repo_name=None, modu
521521
Rank module candidates to identify the best top module.
522522
Analyzes both module connectivity and instantiation patterns to distinguish CPU cores from SoC tops.
523523
"""
524-
children_of = _ensure_mapping(module_graph_inverse)
525-
parents_of = _ensure_mapping(module_graph)
524+
# module_graph: A -> [B, C] means A instantiates B and C
525+
# module_graph_inverse: B -> [A] means B is instantiated by A
526+
527+
instantiates = _ensure_mapping(module_graph) # What each module instantiates (its children)
528+
instantiated_by = _ensure_mapping(module_graph_inverse) # What instantiates each module (its parents)
526529

527-
nodes = set(children_of.keys()) | set(parents_of.keys())
530+
nodes = set(instantiated_by.keys()) | set(instantiates.keys())
528531
for n in nodes:
529-
children_of.setdefault(n, [])
530-
parents_of.setdefault(n, [])
532+
instantiated_by.setdefault(n, [])
533+
instantiates.setdefault(n, [])
531534

532535
# Filter out Verilog keywords and invalid module names
533536
valid_modules = []
@@ -539,9 +542,12 @@ def rank_top_candidates(module_graph, module_graph_inverse, repo_name=None, modu
539542
(module.replace('_', '').isalnum())):
540543
valid_modules.append(module)
541544

542-
# Find candidates: modules with few parents are preferred
543-
zero_parent_modules = [m for m in valid_modules if not parents_of.get(m, [])]
544-
low_parent_modules = [m for m in valid_modules if len(parents_of.get(m, [])) <= 2]
545+
# Find candidates: modules with few parents (few modules instantiate them) are preferred as top modules
546+
zero_parent_modules = [m for m in valid_modules if not instantiated_by.get(m, [])]
547+
low_parent_modules = [m for m in valid_modules if len(instantiated_by.get(m, [])) <= 2]
548+
549+
# Always include standalone 'core' and 'cpu' modules as candidates
550+
core_cpu_modules = [m for m in valid_modules if m.lower() in ['core', 'cpu', 'processor']]
545551

546552
# Include repo name matches even if they have many parents
547553
repo_name_matches = []
@@ -562,7 +568,6 @@ def rank_top_candidates(module_graph, module_graph_inverse, repo_name=None, modu
562568
repo_lower in module_lower or
563569
module_lower in repo_lower):
564570
repo_name_matches.append(module)
565-
print_green(f"[REPO-MATCH] Found repo name match: {module} (parents: {len(parents_of.get(module, []))})")
566571

567572
# Also check for common variations
568573
repo_variations = [repo_lower, repo_lower.upper(), repo_lower.capitalize()]
@@ -575,7 +580,8 @@ def rank_top_candidates(module_graph, module_graph_inverse, repo_name=None, modu
575580
# Enhanced CPU core detection using instantiation patterns
576581
if (any(pattern in module_lower for pattern in [repo_lower, 'cpu', 'core', 'risc', 'processor', 'microcontroller']) and
577582
module not in zero_parent_modules and module not in low_parent_modules and
578-
(module_lower == 'microcontroller' or not any(bad_pattern in module_lower for bad_pattern in
583+
(module_lower == 'microcontroller' or module_lower == 'core' or module_lower == 'cpu' or
584+
not any(bad_pattern in module_lower for bad_pattern in
579585
['div', 'mul', 'alu', 'fpu', 'cache', 'mem', 'bus', '_ctrl', 'ctrl_', 'reg', 'decode', 'fetch', 'exec', 'forward', 'hazard', 'pred',
580586
'sm3', 'sha', 'aes', 'des', 'rsa', 'ecc', 'crypto', 'hash', 'cipher', 'encrypt', 'decrypt', 'uart', 'spi', 'i2c', 'gpio',
581587
'timer', 'interrupt', 'dma', 'pll', 'clk', 'pwm', 'aon', 'hclk', 'oitf', 'wrapper', 'regs'])) and
@@ -601,27 +607,31 @@ def rank_top_candidates(module_graph, module_graph_inverse, repo_name=None, modu
601607
if is_cpu_core and module not in repo_name_matches:
602608
cpu_core_matches.append(module)
603609

604-
candidates = list(set(zero_parent_modules + low_parent_modules + repo_name_matches + cpu_core_matches))
610+
candidates = list(set(zero_parent_modules + low_parent_modules + core_cpu_modules + repo_name_matches + cpu_core_matches))
605611

606612
if not candidates:
607613
candidates = valid_modules
608614

609615
repo_lower = (repo_name or "").lower()
610616
scored = []
611617

618+
# Normalize repo name: remove hyphens and underscores for matching
619+
repo_normalized = repo_lower.replace('-', '').replace('_', '')
620+
612621
for c in candidates:
613-
reach = _reachable_size(children_of, c)
622+
reach = _reachable_size(instantiates, c) # How many modules does this one instantiate (directly or indirectly)
614623
score = reach * 10 # Base score from connectivity
615624
name_lower = c.lower()
625+
name_normalized = name_lower.replace('_', '')
616626

617627
# REPOSITORY NAME MATCHING (Highest Priority)
618628
# Only apply repo matching if the module actually exists in the dependency graph
619-
if repo_lower and len(repo_lower) > 2 and c in module_graph:
620-
if repo_lower == name_lower:
629+
if repo_normalized and len(repo_normalized) > 2 and c in module_graph:
630+
if repo_normalized == name_normalized:
621631
score += 50000
622-
elif repo_lower in name_lower:
632+
elif repo_normalized in name_normalized:
623633
score += 40000
624-
elif name_lower in repo_lower:
634+
elif name_normalized in repo_normalized:
625635
score += 35000
626636
else:
627637
# Check initialism matching (e.g., "black-parrot" → "bp")
@@ -696,6 +706,9 @@ def rank_top_candidates(module_graph, module_graph_inverse, repo_name=None, modu
696706
# Exception: don't penalize microcontroller
697707
if "microcontroller" not in name_lower:
698708
score -= 15000
709+
# Penalize subsystem cores - they're usually wrappers around the actual core
710+
elif "subsys" in name_lower or "subsystem" in name_lower:
711+
score -= 8000
699712
# Strong boost for exact core modules like "repo_core"
700713
elif name_lower == f"{repo_lower}_core" or name_lower == f"core_{repo_lower}":
701714
score += 25000
@@ -772,13 +785,21 @@ def rank_top_candidates(module_graph, module_graph_inverse, repo_name=None, modu
772785
utility_prefixes = ['bsg_', 'common_', 'util_', 'lib_', 'helper_']
773786
if any(name_lower.startswith(prefix) for prefix in utility_prefixes):
774787
score -= 35000
775-
print_green(f"[PENALTY] Utility module {c} penalized (project uses {initialism}_* modules)")
776788

777789
# STRUCTURAL HEURISTICS
778-
num_children = len(children_of.get(c, []))
779-
num_parents = len(parents_of.get(c, []))
790+
num_children = len(instantiates.get(c, [])) # What this module instantiates
791+
num_parents = len(instantiated_by.get(c, [])) # Who instantiates this module
780792

781-
if num_children > 10 and num_parents == 0:
793+
# Boost CPU cores (modules with few parents and "core"/"cpu"/"processor" in name)
794+
# These are better targets for testing than SoC tops
795+
# Can have multiple parents (different top-level wrappers, test harnesses, etc.)
796+
is_likely_core = (num_parents >= 1 and num_parents <= 3 and
797+
any(pattern in name_lower for pattern in ['core', 'cpu', 'processor']) and
798+
not any(bad in name_lower for bad in ['_top', 'top_', 'soc', 'system', 'wrapper']))
799+
800+
if is_likely_core and num_children > 2:
801+
score += 25000 # Very strong preference for CPU cores
802+
elif num_children > 10 and num_parents == 0:
782803
score += 1000
783804
elif num_children > 5 and num_parents <= 1:
784805
score += 500
@@ -836,6 +857,67 @@ def rank_top_candidates(module_graph, module_graph_inverse, repo_name=None, modu
836857

837858
scored.append((score, reach, c))
838859

860+
# CONDITIONAL TOP MODULE PENALTY
861+
# Check if there are any "core" or "cpu" candidates in the list that are better choices than "top"
862+
# This includes:
863+
# 1. Modules with "core", "cpu", "processor", or "riscv" in their names (but not wrapped in _top/top_)
864+
# 2. Exact matches like "CPU", "Core", "Processor", "RISCV" (standalone names)
865+
# 3. Exclude peripheral cores (SPI, UART, I2C, GPIO, etc.)
866+
peripheral_patterns = ['spi', 'uart', 'i2c', 'gpio', 'timer', 'pwm', 'adc', 'dac', 'can', 'usb', 'eth', 'pci']
867+
868+
has_core_candidates = any(
869+
(any(pattern in c.lower() for pattern in ['core', 'cpu', 'processor', 'riscv', 'atom']) or
870+
c in ['CPU', 'Core', 'Processor', 'CORE', 'RISCV'])
871+
and not any(bad in c.lower() for bad in ['_top', 'top_', 'soc', 'system', 'wrapper'])
872+
and not any(periph in c.lower() for periph in peripheral_patterns)
873+
for score, reach, c in scored
874+
)
875+
876+
# If core candidates exist, apply penalty to "top" modules and boost to core/cpu modules
877+
if has_core_candidates:
878+
adjusted_scored = []
879+
for score, reach, c in scored:
880+
name_lower = c.lower()
881+
# Check if this is a top-level wrapper
882+
num_parents = len(instantiated_by.get(c, []))
883+
884+
# Penalize if:
885+
# 1. Has "_top" or "top_" pattern (like e203_cpu_top, ibex_top)
886+
# 2. Is exactly named "top" (generic top module)
887+
is_top_wrapper = (num_parents == 0 and
888+
(any(pattern in name_lower for pattern in ['_top', 'top_']) or
889+
name_lower == 'top'))
890+
891+
# Boost if this is a CPU/core/RISCV module (exact matches or with cpu/core/riscv/atom in name)
892+
# Exclude peripheral cores (SPI_core, UART_core, etc.)
893+
is_cpu_core = (
894+
(c in ['CPU', 'Core', 'Processor', 'CORE', 'RISCV'] or
895+
any(pattern in name_lower for pattern in ['_cpu', 'cpu_', '_core', 'core_', 'riscv', 'atom']))
896+
and not any(periph in name_lower for periph in peripheral_patterns)
897+
)
898+
899+
# Check if this is a bus wrapper (has bus protocol suffix)
900+
bus_wrapper_patterns = ['_wb', '_axi', '_ahb', '_apb', '_obi', '_tilelink']
901+
is_bus_wrapper = any(pattern in name_lower for pattern in bus_wrapper_patterns)
902+
903+
# Always penalize top wrappers when core candidates exist, even if they have core/cpu/riscv in name
904+
# (e.g., RISCV_TOP should be penalized in favor of RISCV)
905+
if is_top_wrapper:
906+
# Apply a strong penalty to prefer cores over wrappers
907+
score -= 15000 # Strong penalty to overcome structural advantage
908+
print_yellow(f"[RANKING] Applying top-wrapper penalty to {c} (core/cpu candidates available)")
909+
elif is_cpu_core and is_bus_wrapper:
910+
# Bus wrappers get a smaller boost (prefer the unwrapped core)
911+
score += 5000 # Moderate boost for bus-wrapped cores
912+
print_yellow(f"[RANKING] Applying bus-wrapper boost to {c}")
913+
elif is_cpu_core and not any(bad in name_lower for bad in ['_top', 'top_', 'soc', 'system', 'wrapper']):
914+
# Pure cores get the full boost
915+
score += 10000 # Significant boost for CPU/core modules
916+
print_yellow(f"[RANKING] Applying CPU/core boost to {c}")
917+
918+
adjusted_scored.append((score, reach, c))
919+
scored = adjusted_scored
920+
839921
# Sort by score (descending), then by reach (descending), then by name
840922
scored.sort(reverse=True, key=lambda t: (t[0], t[1], t[2]))
841923

@@ -1363,7 +1445,6 @@ def build_and_log_graphs(files: list, modules: list, destination_path: str = Non
13631445
absolute_files = [os.path.join(destination_path, f) if not os.path.isabs(f) else f for f in files]
13641446
else:
13651447
absolute_files = files
1366-
13671448
module_graph, module_graph_inverse = build_module_graph(absolute_files, modules)
13681449
print_green('[LOG] Grafos construídos com sucesso\n')
13691450
return module_graph, module_graph_inverse
@@ -1400,6 +1481,7 @@ def generate_processor_config(
14001481
add_to_config: bool = False,
14011482
no_llama: bool = False,
14021483
model: str = 'qwen2.5:32b',
1484+
local_repo: str = None,
14031485
) -> dict:
14041486
"""
14051487
Main function to generate a processor configuration.
@@ -1411,11 +1493,18 @@ def generate_processor_config(
14111493
add_to_config: Whether to add to central config
14121494
no_llama: Skip OLLAMA processing
14131495
model: OLLAMA model to use
1496+
local_repo: Path to local repository (skips cloning if provided)
14141497
"""
14151498
repo_name = extract_repo_name(url)
1416-
destination_path = clone_and_validate_repo(url, repo_name)
1417-
if not destination_path:
1418-
return {}
1499+
1500+
# Use local repo if provided, otherwise clone
1501+
if local_repo and os.path.exists(local_repo):
1502+
destination_path = os.path.abspath(local_repo)
1503+
print_green(f"[LOG] Using local repository: {destination_path}")
1504+
else:
1505+
destination_path = clone_and_validate_repo(url, repo_name)
1506+
if not destination_path:
1507+
return {}
14191508

14201509
files, extension = find_and_log_files(destination_path)
14211510
modulename_list, modules = extract_and_log_modules(files, destination_path)
@@ -1446,12 +1535,18 @@ def generate_processor_config(
14461535
pass
14471536
include_dirs = find_and_log_include_dirs(destination_path)
14481537
module_graph, module_graph_inverse = build_and_log_graphs(non_tb_files, modules, destination_path)
1449-
14501538
filtered_files, top_module = process_files_with_llama(
14511539
no_llama, non_tb_files, tb_files, modules, module_graph, repo_name, model,
14521540
)
14531541
language_version = determine_language_version(extension, filtered_files, destination_path)
14541542

1543+
# Processor-specific Verilator flags
1544+
verilator_flags = ['-Wno-lint', '-Wno-fatal', '-Wno-style', '-Wno-UNOPTFLAT', '-Wno-UNDRIVEN', '-Wno-UNUSED', '-Wno-TIMESCALEMOD', '-Wno-PROTECTED', '-Wno-MODDUP', '-Wno-REDEFMACRO', '-Wno-BLKANDNBLK', '-Wno-SYMRSVDWORD']
1545+
1546+
# orv64: Define FPGA to use pre-synthesized .vm module implementations instead of missing DW IP
1547+
if 'orv64' in repo_name.lower():
1548+
verilator_flags.append('-DFPGA')
1549+
14551550
final_files, final_include_dirs, last_log, top_module, is_simulable = interactive_simulate_and_minimize(
14561551
repo_root=destination_path,
14571552
repo_name=repo_name,
@@ -1464,7 +1559,7 @@ def generate_processor_config(
14641559
module_graph_inverse=module_graph_inverse,
14651560
language_version=language_version,
14661561
maximize_attempts=6,
1467-
verilator_extra_flags=['-Wno-lint', '-Wno-fatal', '-Wno-style', '-Wno-UNOPTFLAT', '-Wno-UNDRIVEN', '-Wno-UNUSED', '-Wno-TIMESCALEMOD', '-Wno-PROTECTED', '-Wno-MODDUP', '-Wno-REDEFMACRO', '-Wno-BLKANDNBLK', '-Wno-SYMRSVDWORD'],
1562+
verilator_extra_flags=verilator_flags,
14681563
ghdl_extra_flags=['--std=08', '-frelaxed'],
14691564
)
14701565

@@ -1557,10 +1652,13 @@ def generate_processor_config(
15571652
except Exception as e:
15581653
print_yellow(f'[WARN] Falha ao salvar o log: {e}')
15591654

1560-
# Cleanup
1561-
print_green('[LOG] Removendo o repositório clonado\n')
1562-
remove_repo(repo_name)
1563-
print_green('[LOG] Repositório removido com sucesso\n')
1655+
# Cleanup - only remove if we cloned it (not using local repo)
1656+
if not local_repo:
1657+
print_green('[LOG] Removendo o repositório clonado\n')
1658+
remove_repo(repo_name)
1659+
print_green('[LOG] Repositório removido com sucesso\n')
1660+
else:
1661+
print_green('[LOG] Mantendo repositório local (não foi clonado)\n')
15641662

15651663
# Plot graph if requested
15661664
if plot_graph:
@@ -1621,6 +1719,13 @@ def main() -> None:
16211719
default='qwen2.5:32b',
16221720
help='OLLAMA model to use'
16231721
)
1722+
parser.add_argument(
1723+
'-l',
1724+
'--local-repo',
1725+
type=str,
1726+
default=None,
1727+
help='Path to local repository (skips cloning if provided)'
1728+
)
16241729

16251730
args = parser.parse_args()
16261731

@@ -1632,6 +1737,7 @@ def main() -> None:
16321737
args.add_to_config,
16331738
args.no_llama,
16341739
args.model,
1740+
args.local_repo,
16351741
)
16361742
print('Result: ')
16371743
print(json.dumps(config, indent=4))

core/file_manager.py

Lines changed: 16 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -225,6 +225,11 @@ def should_exclude_file(file_path: str, base_directory: str = None) -> bool:
225225
if exclude_dir in rel_path:
226226
return True
227227

228+
# Exclude duplicated lib/lib directory (e.g., rtl/lib/lib/* in orv64)
229+
# This handles repositories with nested duplicate directory structures
230+
if '/lib/lib/' in rel_path.replace('\\', '/'):
231+
return True
232+
228233
# For vendor directories, be more selective - exclude obvious problematic files
229234
if 'vendor' in rel_path:
230235
# Exclude Google RISC-V DV completely
@@ -311,7 +316,11 @@ def find_files_with_extension(
311316
for ext in extensions:
312317
found_files = glob.glob(f'{directory}/**/*.{ext}', recursive=True)
313318

314-
for file_path in found_files:
319+
for file_path in found_files:
320+
# Skip broken symlinks
321+
if os.path.islink(file_path) and not os.path.exists(file_path):
322+
continue
323+
315324
if not should_exclude_file(file_path, directory):
316325
files.append(file_path)
317326

@@ -498,6 +507,7 @@ def find_missing_module_files(directory: str, missing_module_names: list) -> lis
498507

499508
try:
500509
# Search for .v/.sv files that might contain the missing modules
510+
# Note: .vm files are FPGA netlists (not RTL) and cannot be used with Verilator
501511
for extension in ['**/*.v', '**/*.sv']:
502512
source_files = glob.glob(os.path.join(directory, extension), recursive=True)
503513

@@ -553,6 +563,9 @@ def extract_modules(files: list[str]) -> list[tuple[str, str]]:
553563
entity_pattern_vhdl = re.compile(r'^\s*entity\s+(\w+)\s+is', re.IGNORECASE | re.MULTILINE)
554564

555565
for file_path in files:
566+
# Convert to absolute path to ensure consistency
567+
abs_file_path = os.path.abspath(file_path)
568+
556569
with open(file_path, 'r', errors='ignore', encoding='utf-8') as f:
557570
content = f.read()
558571

@@ -565,7 +578,7 @@ def extract_modules(files: list[str]) -> list[tuple[str, str]]:
565578
verilog_matches = module_pattern_verilog.findall(content)
566579
modules.extend(
567580
[
568-
(module_name, os.path.relpath(file_path))
581+
(module_name, abs_file_path) # Use absolute path for consistency
569582
for module_name in verilog_matches
570583
]
571584
)
@@ -574,7 +587,7 @@ def extract_modules(files: list[str]) -> list[tuple[str, str]]:
574587
vhdl_matches = entity_pattern_vhdl.findall(content)
575588
modules.extend(
576589
[
577-
(entity_name, os.path.relpath(file_path))
590+
(entity_name, abs_file_path) # Use absolute path for consistency
578591
for entity_name in vhdl_matches
579592
]
580593
)

0 commit comments

Comments
 (0)