6969
7070
7171# Constants
72- EXTENSIONS = ['v' , 'sv' , 'vhdl' , 'vhd' ]
72+ EXTENSIONS = ['v' , 'sv' , 'vhdl' , 'vhd' ] # Note: .vm files are FPGA netlists, not RTL
7373DESTINATION_DIR = './temp'
7474UTILITY_PATTERNS = (
7575 "gen_" , "dff" , "buf" , "full_handshake" , "fifo" , "mux" , "regfile"
@@ -521,13 +521,16 @@ def rank_top_candidates(module_graph, module_graph_inverse, repo_name=None, modu
521521 Rank module candidates to identify the best top module.
522522 Analyzes both module connectivity and instantiation patterns to distinguish CPU cores from SoC tops.
523523 """
524- children_of = _ensure_mapping (module_graph_inverse )
525- parents_of = _ensure_mapping (module_graph )
524+ # module_graph: A -> [B, C] means A instantiates B and C
525+ # module_graph_inverse: B -> [A] means B is instantiated by A
526+
527+ instantiates = _ensure_mapping (module_graph ) # What each module instantiates (its children)
528+ instantiated_by = _ensure_mapping (module_graph_inverse ) # What instantiates each module (its parents)
526529
527- nodes = set (children_of .keys ()) | set (parents_of .keys ())
530+ nodes = set (instantiated_by .keys ()) | set (instantiates .keys ())
528531 for n in nodes :
529- children_of .setdefault (n , [])
530- parents_of .setdefault (n , [])
532+ instantiated_by .setdefault (n , [])
533+ instantiates .setdefault (n , [])
531534
532535 # Filter out Verilog keywords and invalid module names
533536 valid_modules = []
@@ -539,9 +542,12 @@ def rank_top_candidates(module_graph, module_graph_inverse, repo_name=None, modu
539542 (module .replace ('_' , '' ).isalnum ())):
540543 valid_modules .append (module )
541544
542- # Find candidates: modules with few parents are preferred
543- zero_parent_modules = [m for m in valid_modules if not parents_of .get (m , [])]
544- low_parent_modules = [m for m in valid_modules if len (parents_of .get (m , [])) <= 2 ]
545+ # Find candidates: modules with few parents (few modules instantiate them) are preferred as top modules
546+ zero_parent_modules = [m for m in valid_modules if not instantiated_by .get (m , [])]
547+ low_parent_modules = [m for m in valid_modules if len (instantiated_by .get (m , [])) <= 2 ]
548+
549+ # Always include standalone 'core' and 'cpu' modules as candidates
550+ core_cpu_modules = [m for m in valid_modules if m .lower () in ['core' , 'cpu' , 'processor' ]]
545551
546552 # Include repo name matches even if they have many parents
547553 repo_name_matches = []
@@ -562,7 +568,6 @@ def rank_top_candidates(module_graph, module_graph_inverse, repo_name=None, modu
562568 repo_lower in module_lower or
563569 module_lower in repo_lower ):
564570 repo_name_matches .append (module )
565- print_green (f"[REPO-MATCH] Found repo name match: { module } (parents: { len (parents_of .get (module , []))} )" )
566571
567572 # Also check for common variations
568573 repo_variations = [repo_lower , repo_lower .upper (), repo_lower .capitalize ()]
@@ -575,7 +580,8 @@ def rank_top_candidates(module_graph, module_graph_inverse, repo_name=None, modu
575580 # Enhanced CPU core detection using instantiation patterns
576581 if (any (pattern in module_lower for pattern in [repo_lower , 'cpu' , 'core' , 'risc' , 'processor' , 'microcontroller' ]) and
577582 module not in zero_parent_modules and module not in low_parent_modules and
578- (module_lower == 'microcontroller' or not any (bad_pattern in module_lower for bad_pattern in
583+ (module_lower == 'microcontroller' or module_lower == 'core' or module_lower == 'cpu' or
584+ not any (bad_pattern in module_lower for bad_pattern in
579585 ['div' , 'mul' , 'alu' , 'fpu' , 'cache' , 'mem' , 'bus' , '_ctrl' , 'ctrl_' , 'reg' , 'decode' , 'fetch' , 'exec' , 'forward' , 'hazard' , 'pred' ,
580586 'sm3' , 'sha' , 'aes' , 'des' , 'rsa' , 'ecc' , 'crypto' , 'hash' , 'cipher' , 'encrypt' , 'decrypt' , 'uart' , 'spi' , 'i2c' , 'gpio' ,
581587 'timer' , 'interrupt' , 'dma' , 'pll' , 'clk' , 'pwm' , 'aon' , 'hclk' , 'oitf' , 'wrapper' , 'regs' ])) and
@@ -601,27 +607,31 @@ def rank_top_candidates(module_graph, module_graph_inverse, repo_name=None, modu
601607 if is_cpu_core and module not in repo_name_matches :
602608 cpu_core_matches .append (module )
603609
604- candidates = list (set (zero_parent_modules + low_parent_modules + repo_name_matches + cpu_core_matches ))
610+ candidates = list (set (zero_parent_modules + low_parent_modules + core_cpu_modules + repo_name_matches + cpu_core_matches ))
605611
606612 if not candidates :
607613 candidates = valid_modules
608614
609615 repo_lower = (repo_name or "" ).lower ()
610616 scored = []
611617
618+ # Normalize repo name: remove hyphens and underscores for matching
619+ repo_normalized = repo_lower .replace ('-' , '' ).replace ('_' , '' )
620+
612621 for c in candidates :
613- reach = _reachable_size (children_of , c )
622+ reach = _reachable_size (instantiates , c ) # How many modules does this one instantiate (directly or indirectly )
614623 score = reach * 10 # Base score from connectivity
615624 name_lower = c .lower ()
625+ name_normalized = name_lower .replace ('_' , '' )
616626
617627 # REPOSITORY NAME MATCHING (Highest Priority)
618628 # Only apply repo matching if the module actually exists in the dependency graph
619- if repo_lower and len (repo_lower ) > 2 and c in module_graph :
620- if repo_lower == name_lower :
629+ if repo_normalized and len (repo_normalized ) > 2 and c in module_graph :
630+ if repo_normalized == name_normalized :
621631 score += 50000
622- elif repo_lower in name_lower :
632+ elif repo_normalized in name_normalized :
623633 score += 40000
624- elif name_lower in repo_lower :
634+ elif name_normalized in repo_normalized :
625635 score += 35000
626636 else :
627637 # Check initialism matching (e.g., "black-parrot" → "bp")
@@ -696,6 +706,9 @@ def rank_top_candidates(module_graph, module_graph_inverse, repo_name=None, modu
696706 # Exception: don't penalize microcontroller
697707 if "microcontroller" not in name_lower :
698708 score -= 15000
709+ # Penalize subsystem cores - they're usually wrappers around the actual core
710+ elif "subsys" in name_lower or "subsystem" in name_lower :
711+ score -= 8000
699712 # Strong boost for exact core modules like "repo_core"
700713 elif name_lower == f"{ repo_lower } _core" or name_lower == f"core_{ repo_lower } " :
701714 score += 25000
@@ -772,13 +785,21 @@ def rank_top_candidates(module_graph, module_graph_inverse, repo_name=None, modu
772785 utility_prefixes = ['bsg_' , 'common_' , 'util_' , 'lib_' , 'helper_' ]
773786 if any (name_lower .startswith (prefix ) for prefix in utility_prefixes ):
774787 score -= 35000
775- print_green (f"[PENALTY] Utility module { c } penalized (project uses { initialism } _* modules)" )
776788
777789 # STRUCTURAL HEURISTICS
778- num_children = len (children_of .get (c , []))
779- num_parents = len (parents_of .get (c , []))
790+ num_children = len (instantiates .get (c , [])) # What this module instantiates
791+ num_parents = len (instantiated_by .get (c , [])) # Who instantiates this module
780792
781- if num_children > 10 and num_parents == 0 :
793+ # Boost CPU cores (modules with few parents and "core"/"cpu"/"processor" in name)
794+ # These are better targets for testing than SoC tops
795+ # Can have multiple parents (different top-level wrappers, test harnesses, etc.)
796+ is_likely_core = (num_parents >= 1 and num_parents <= 3 and
797+ any (pattern in name_lower for pattern in ['core' , 'cpu' , 'processor' ]) and
798+ not any (bad in name_lower for bad in ['_top' , 'top_' , 'soc' , 'system' , 'wrapper' ]))
799+
800+ if is_likely_core and num_children > 2 :
801+ score += 25000 # Very strong preference for CPU cores
802+ elif num_children > 10 and num_parents == 0 :
782803 score += 1000
783804 elif num_children > 5 and num_parents <= 1 :
784805 score += 500
@@ -836,6 +857,67 @@ def rank_top_candidates(module_graph, module_graph_inverse, repo_name=None, modu
836857
837858 scored .append ((score , reach , c ))
838859
860+ # CONDITIONAL TOP MODULE PENALTY
861+ # Check if there are any "core" or "cpu" candidates in the list that are better choices than "top"
862+ # This includes:
863+ # 1. Modules with "core", "cpu", "processor", or "riscv" in their names (but not wrapped in _top/top_)
864+ # 2. Exact matches like "CPU", "Core", "Processor", "RISCV" (standalone names)
865+ # 3. Exclude peripheral cores (SPI, UART, I2C, GPIO, etc.)
866+ peripheral_patterns = ['spi' , 'uart' , 'i2c' , 'gpio' , 'timer' , 'pwm' , 'adc' , 'dac' , 'can' , 'usb' , 'eth' , 'pci' ]
867+
868+ has_core_candidates = any (
869+ (any (pattern in c .lower () for pattern in ['core' , 'cpu' , 'processor' , 'riscv' , 'atom' ]) or
870+ c in ['CPU' , 'Core' , 'Processor' , 'CORE' , 'RISCV' ])
871+ and not any (bad in c .lower () for bad in ['_top' , 'top_' , 'soc' , 'system' , 'wrapper' ])
872+ and not any (periph in c .lower () for periph in peripheral_patterns )
873+ for score , reach , c in scored
874+ )
875+
876+ # If core candidates exist, apply penalty to "top" modules and boost to core/cpu modules
877+ if has_core_candidates :
878+ adjusted_scored = []
879+ for score , reach , c in scored :
880+ name_lower = c .lower ()
881+ # Check if this is a top-level wrapper
882+ num_parents = len (instantiated_by .get (c , []))
883+
884+ # Penalize if:
885+ # 1. Has "_top" or "top_" pattern (like e203_cpu_top, ibex_top)
886+ # 2. Is exactly named "top" (generic top module)
887+ is_top_wrapper = (num_parents == 0 and
888+ (any (pattern in name_lower for pattern in ['_top' , 'top_' ]) or
889+ name_lower == 'top' ))
890+
891+ # Boost if this is a CPU/core/RISCV module (exact matches or with cpu/core/riscv/atom in name)
892+ # Exclude peripheral cores (SPI_core, UART_core, etc.)
893+ is_cpu_core = (
894+ (c in ['CPU' , 'Core' , 'Processor' , 'CORE' , 'RISCV' ] or
895+ any (pattern in name_lower for pattern in ['_cpu' , 'cpu_' , '_core' , 'core_' , 'riscv' , 'atom' ]))
896+ and not any (periph in name_lower for periph in peripheral_patterns )
897+ )
898+
899+ # Check if this is a bus wrapper (has bus protocol suffix)
900+ bus_wrapper_patterns = ['_wb' , '_axi' , '_ahb' , '_apb' , '_obi' , '_tilelink' ]
901+ is_bus_wrapper = any (pattern in name_lower for pattern in bus_wrapper_patterns )
902+
903+ # Always penalize top wrappers when core candidates exist, even if they have core/cpu/riscv in name
904+ # (e.g., RISCV_TOP should be penalized in favor of RISCV)
905+ if is_top_wrapper :
906+ # Apply a strong penalty to prefer cores over wrappers
907+ score -= 15000 # Strong penalty to overcome structural advantage
908+ print_yellow (f"[RANKING] Applying top-wrapper penalty to { c } (core/cpu candidates available)" )
909+ elif is_cpu_core and is_bus_wrapper :
910+ # Bus wrappers get a smaller boost (prefer the unwrapped core)
911+ score += 5000 # Moderate boost for bus-wrapped cores
912+ print_yellow (f"[RANKING] Applying bus-wrapper boost to { c } " )
913+ elif is_cpu_core and not any (bad in name_lower for bad in ['_top' , 'top_' , 'soc' , 'system' , 'wrapper' ]):
914+ # Pure cores get the full boost
915+ score += 10000 # Significant boost for CPU/core modules
916+ print_yellow (f"[RANKING] Applying CPU/core boost to { c } " )
917+
918+ adjusted_scored .append ((score , reach , c ))
919+ scored = adjusted_scored
920+
839921 # Sort by score (descending), then by reach (descending), then by name
840922 scored .sort (reverse = True , key = lambda t : (t [0 ], t [1 ], t [2 ]))
841923
@@ -1363,7 +1445,6 @@ def build_and_log_graphs(files: list, modules: list, destination_path: str = Non
13631445 absolute_files = [os .path .join (destination_path , f ) if not os .path .isabs (f ) else f for f in files ]
13641446 else :
13651447 absolute_files = files
1366-
13671448 module_graph , module_graph_inverse = build_module_graph (absolute_files , modules )
13681449 print_green ('[LOG] Grafos construídos com sucesso\n ' )
13691450 return module_graph , module_graph_inverse
@@ -1400,6 +1481,7 @@ def generate_processor_config(
14001481 add_to_config : bool = False ,
14011482 no_llama : bool = False ,
14021483 model : str = 'qwen2.5:32b' ,
1484+ local_repo : str = None ,
14031485) -> dict :
14041486 """
14051487 Main function to generate a processor configuration.
@@ -1411,11 +1493,18 @@ def generate_processor_config(
14111493 add_to_config: Whether to add to central config
14121494 no_llama: Skip OLLAMA processing
14131495 model: OLLAMA model to use
1496+ local_repo: Path to local repository (skips cloning if provided)
14141497 """
14151498 repo_name = extract_repo_name (url )
1416- destination_path = clone_and_validate_repo (url , repo_name )
1417- if not destination_path :
1418- return {}
1499+
1500+ # Use local repo if provided, otherwise clone
1501+ if local_repo and os .path .exists (local_repo ):
1502+ destination_path = os .path .abspath (local_repo )
1503+ print_green (f"[LOG] Using local repository: { destination_path } " )
1504+ else :
1505+ destination_path = clone_and_validate_repo (url , repo_name )
1506+ if not destination_path :
1507+ return {}
14191508
14201509 files , extension = find_and_log_files (destination_path )
14211510 modulename_list , modules = extract_and_log_modules (files , destination_path )
@@ -1446,12 +1535,18 @@ def generate_processor_config(
14461535 pass
14471536 include_dirs = find_and_log_include_dirs (destination_path )
14481537 module_graph , module_graph_inverse = build_and_log_graphs (non_tb_files , modules , destination_path )
1449-
14501538 filtered_files , top_module = process_files_with_llama (
14511539 no_llama , non_tb_files , tb_files , modules , module_graph , repo_name , model ,
14521540 )
14531541 language_version = determine_language_version (extension , filtered_files , destination_path )
14541542
1543+ # Processor-specific Verilator flags
1544+ verilator_flags = ['-Wno-lint' , '-Wno-fatal' , '-Wno-style' , '-Wno-UNOPTFLAT' , '-Wno-UNDRIVEN' , '-Wno-UNUSED' , '-Wno-TIMESCALEMOD' , '-Wno-PROTECTED' , '-Wno-MODDUP' , '-Wno-REDEFMACRO' , '-Wno-BLKANDNBLK' , '-Wno-SYMRSVDWORD' ]
1545+
1546+ # orv64: Define FPGA to use pre-synthesized .vm module implementations instead of missing DW IP
1547+ if 'orv64' in repo_name .lower ():
1548+ verilator_flags .append ('-DFPGA' )
1549+
14551550 final_files , final_include_dirs , last_log , top_module , is_simulable = interactive_simulate_and_minimize (
14561551 repo_root = destination_path ,
14571552 repo_name = repo_name ,
@@ -1464,7 +1559,7 @@ def generate_processor_config(
14641559 module_graph_inverse = module_graph_inverse ,
14651560 language_version = language_version ,
14661561 maximize_attempts = 6 ,
1467- verilator_extra_flags = [ '-Wno-lint' , '-Wno-fatal' , '-Wno-style' , '-Wno-UNOPTFLAT' , '-Wno-UNDRIVEN' , '-Wno-UNUSED' , '-Wno-TIMESCALEMOD' , '-Wno-PROTECTED' , '-Wno-MODDUP' , '-Wno-REDEFMACRO' , '-Wno-BLKANDNBLK' , '-Wno-SYMRSVDWORD' ] ,
1562+ verilator_extra_flags = verilator_flags ,
14681563 ghdl_extra_flags = ['--std=08' , '-frelaxed' ],
14691564 )
14701565
@@ -1557,10 +1652,13 @@ def generate_processor_config(
15571652 except Exception as e :
15581653 print_yellow (f'[WARN] Falha ao salvar o log: { e } ' )
15591654
1560- # Cleanup
1561- print_green ('[LOG] Removendo o repositório clonado\n ' )
1562- remove_repo (repo_name )
1563- print_green ('[LOG] Repositório removido com sucesso\n ' )
1655+ # Cleanup - only remove if we cloned it (not using local repo)
1656+ if not local_repo :
1657+ print_green ('[LOG] Removendo o repositório clonado\n ' )
1658+ remove_repo (repo_name )
1659+ print_green ('[LOG] Repositório removido com sucesso\n ' )
1660+ else :
1661+ print_green ('[LOG] Mantendo repositório local (não foi clonado)\n ' )
15641662
15651663 # Plot graph if requested
15661664 if plot_graph :
@@ -1621,6 +1719,13 @@ def main() -> None:
16211719 default = 'qwen2.5:32b' ,
16221720 help = 'OLLAMA model to use'
16231721 )
1722+ parser .add_argument (
1723+ '-l' ,
1724+ '--local-repo' ,
1725+ type = str ,
1726+ default = None ,
1727+ help = 'Path to local repository (skips cloning if provided)'
1728+ )
16241729
16251730 args = parser .parse_args ()
16261731
@@ -1632,6 +1737,7 @@ def main() -> None:
16321737 args .add_to_config ,
16331738 args .no_llama ,
16341739 args .model ,
1740+ args .local_repo ,
16351741 )
16361742 print ('Result: ' )
16371743 print (json .dumps (config , indent = 4 ))
0 commit comments