Skip to content

Commit b0462b6

Browse files
Release 2025.11.1 (#27)
* Release 2025.11.1 (#340) # 🎈 Minor Updates - Adds support for translating C code with self-references. - Adds a fallback `unimplemented!()` C FFI wrapper. # 🚧 Bug fixes - Fixed a bug related to template for `ideas.wrapper`. --------- Co-authored-by: Cory Cornelius <[email protected]> * Release 2025.11.1.patch1 (#349) (#25) # 🎈 Minor Updates - Resolve source priority paths in `ideas.init`. - Changed default model temperature to `0.0`. - Removed slow AST C parsing functionality. --------- Co-authored-by: Cory Cornelius <[email protected]> * Release 2025.11.1.patch2 (#360) (#26) # 🎈 Minor Updates - Add all past translated symbols to the current symbol in `ideas.translate_recurrent` - Enable `unsafe` Rust compilation errors in `ideas.translate_recurrent` - Clean-up `ideas.utils` # 🚧 Fixes - Fix a bug related to `enum` constant not being captured as standalone symbols - Fix `ideas.wrapper` prompt to correctly reference the final on-disk location --------- Co-authored-by: Cory Cornelius <[email protected]>
1 parent a84ba74 commit b0462b6

File tree

11 files changed

+172
-171
lines changed

11 files changed

+172
-171
lines changed

IDEAS.mk

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -33,10 +33,9 @@ GREEN_COL := \033[1;32m
3333
PROJECT_C_FILES = $(shell jq -r 'map(.file) | .[] | @text' build-ninja/compile_commands.json)
3434
C_FILES = $(subst ${CURDIR}/test_case/,,${PROJECT_C_FILES})
3535
TEST_FILES := $(wildcard test_vectors/*.json)
36-
TARGETS := $(shell find build-ninja -maxdepth 1 -type f -executable -exec basename {} \; | cut -d. -f1 | sed -e "s/^lib//gi")
37-
ARTIFACTS := $(shell find build-ninja -maxdepth 1 -type f -executable -exec basename {} \;)
36+
TARGETS ?= $(shell find build-ninja -maxdepth 1 -type f -executable -exec basename {} \; | cut -d. -f1 | sed -e "s/^lib//gi")
3837
ifeq (${TARGETS},)
39-
ifneq (${MAKECMDGOALS},cmake)
38+
ifeq ($(filter cmake clean,$(MAKECMDGOALS)),)
4039
$(error No TARGETS found! You need to run cmake!)
4140
endif
4241
endif
@@ -264,7 +263,7 @@ wrapper: $(patsubst %,${TRANSLATION_DIR}/%/wrapper.log,${TARGETS}) ;
264263
${TRANSLATION_DIR}/%/wrapper.log: ${TRANSLATION_DIR}/%/translate.log | build-ninja/lib%.so.symbols
265264
@mkdir -p $(@D)/src/wrapper
266265
-@cat build-ninja/lib$*.so.symbols | xargs -t -I{} bindgen --disable-header-comment --no-doc-comments --no-layout-tests $(@D)/src/lib.c --allowlist-function {} -o $(@D)/src/wrapper/{}.rs
267-
-@cat build-ninja/lib$*.so.symbols | xargs -t -I{} sed -zEe 's/\nunsafe extern "C" \{\s+(.*);\s+}/\n\#[unsafe(export_name = "{}")]\1 {\n unimplemented!();\n}/gi' -i $(@D)/src/wrapper/{}.rs
266+
-@cat build-ninja/lib$*.so.symbols | xargs -t -I{} sed -zEe 's/unsafe extern "C" \{\s+(.*);\s+}/\n\#[unsafe(export_name = "{}")]\n\1 {\n unimplemented!();\n}/gi' -i $(@D)/src/wrapper/{}.rs
268267
-@cat build-ninja/lib$*.so.symbols | xargs -t -I{} sed -e 's/pub fn/pub extern "C" fn/gi' -i $(@D)/src/wrapper/{}.rs
269268
-@cat build-ninja/lib$*.so.symbols | xargs -t -I{} rustfmt ${@D}/src/wrapper/{}.rs
270269
-uv run python -m ideas.wrapper model.name=${PROVIDER}/${MODEL} \
@@ -328,4 +327,4 @@ repair: ${TRANSLATION_DIR}/translate.log \
328327
clean:
329328
rm -rf $(addprefix test_case/,$(addsuffix .i,${C_FILES}))
330329
rm -rf build-ninja
331-
rm -rf ${TRANSLATION_DIR}
330+
find . -name Cargo.toml -exec cargo clean --quiet --manifest-path {} \;

src/ideas/ast.py

Lines changed: 6 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111
from clang.cindex import conf, c_object_p
1212
from ctypes import pointer, c_size_t, c_char_p, c_uint, c_int
1313

14-
from ideas.utils import Symbol, filter_edges_by_set, get_all_deps
14+
from ideas.utils import Symbol
1515

1616
FILENAME = "file.c"
1717

@@ -48,18 +48,6 @@ class TreeResult:
4848
complete_graph: dict[str, list[Symbol]] = field(
4949
default_factory=lambda: defaultdict(lambda: list())
5050
)
51-
top_level_ref_graph: dict[str, list[Symbol]] = field(
52-
default_factory=lambda: defaultdict(lambda: list())
53-
)
54-
55-
def get_top_level_symbols_for_name(self, name: str) -> list[str]:
56-
ref_fn = [
57-
self.symbols[symbol.name].decl
58-
for symbol in self.top_level_ref_graph[name]
59-
if symbol.name in self.symbols
60-
]
61-
62-
return ref_fn
6351

6452

6553
def create_translation_unit(code: str) -> TranslationUnit:
@@ -93,6 +81,7 @@ def extract_info_c(tu: TranslationUnit) -> TreeResult:
9381
for child in node.get_children():
9482
# Register a dependency on the underlying enum for each enumerator
9583
if child.kind == CursorKind.ENUM_CONSTANT_DECL: # type: ignore[reportAttributeAccess]
84+
result.symbols[child.get_usr()] = Symbol(child.get_usr(), child)
9685
result.complete_graph[child.get_usr()].append(result.symbols[usr])
9786

9887
# Typedefs
@@ -119,6 +108,7 @@ def extract_info_c(tu: TranslationUnit) -> TreeResult:
119108

120109
# Register a dependency on the typedef for each (possibly deeply nested) enumerator
121110
if child.kind == CursorKind.ENUM_CONSTANT_DECL: # type: ignore[reportAttributeAccessIssue]
111+
result.symbols[child.get_usr()] = Symbol(child.get_usr(), child)
122112
result.complete_graph[child.get_usr()].append(result.symbols[usr])
123113

124114
# All other declarations
@@ -137,16 +127,6 @@ def extract_info_c(tu: TranslationUnit) -> TreeResult:
137127
# All referenced symbols
138128
result.complete_graph[usr] = extract_referenced_symbols(node)
139129

140-
# Resolve top-level dependencies
141-
cache = {}
142-
for name in result.symbols.keys():
143-
# Get all dependencies of this symbol
144-
expanded_deps = get_all_deps(result.complete_graph, name, cache=cache)
145-
# Add the dependencies to the top-level graph
146-
result.top_level_ref_graph[name] = filter_edges_by_set(
147-
expanded_deps, result.symbols.keys()
148-
)
149-
150130
return result
151131

152132

@@ -159,6 +139,9 @@ def extract_referenced_symbols(node: Cursor) -> list[Symbol]:
159139
# Ignore internal references to, e.g., function parameters
160140
if child_node.referenced is None:
161141
continue
142+
# Ignore self references
143+
if child_node.referenced.get_usr() == node.get_usr():
144+
continue
162145
symbol_uses.append(Symbol(child_node.referenced.get_usr(), child_node))
163146

164147
return symbol_uses

src/ideas/init.py

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -86,7 +86,10 @@ def get_symbols_and_dependencies(
8686
export_symbols = [c14n_symbol_name(name, project_symbols) for name in export_symbols]
8787
dependencies = reachable_subgraph(project_dependencies, export_symbols)
8888
symbols = filter_symbols(
89-
project_symbols, filter_tag_definitions=True, filter_function_declarations=True
89+
project_symbols,
90+
filter_tag_definitions=True,
91+
filter_function_declarations=True,
92+
filter_enum_constants=True,
9093
)
9194

9295
return symbols, dependencies
@@ -128,6 +131,7 @@ def filter_symbols(
128131
filter_system: bool = True,
129132
filter_tag_definitions: bool = False,
130133
filter_function_declarations: bool = False,
134+
filter_enum_constants: bool = False,
131135
) -> dict[str, Symbol]:
132136
filtered_symbols = {}
133137
for name, symbol in symbols.items():
@@ -159,6 +163,11 @@ def filter_symbols(
159163
):
160164
continue
161165

166+
# Filter enum constants since they should be contained with an ENUM_DECL
167+
if filter_enum_constants:
168+
if symbol.cursor.kind == CursorKind.ENUM_CONSTANT_DECL: # type: ignore[reportAttributeAccessIssue]
169+
continue
170+
162171
filtered_symbols[name] = symbols[name]
163172
return filtered_symbols
164173

@@ -281,7 +290,9 @@ def main(cfg: InitConfig) -> None:
281290

282291
source_priority = None
283292
if isinstance(cfg.source_priority, Path):
284-
source_priority = [Path(path) for path in cfg.source_priority.read_text().splitlines()]
293+
source_priority = [
294+
Path(path).resolve() for path in cfg.source_priority.read_text().splitlines()
295+
]
285296

286297
init(
287298
cfg.filename,

src/ideas/ltu.py

Lines changed: 1 addition & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -31,30 +31,13 @@ def build_unit(
3131
ast_info: TreeResult, type: str = "functional_maximal"
3232
) -> list[LLMTranslationUnit]:
3333
if type == "functional_maximal":
34-
return build_functional_maximal_unit(ast_info)
34+
raise ValueError("ltu-max not implemented!")
3535
elif type == "functional_minimal":
3636
return build_functional_minimal_unit(ast_info)
3737
else:
3838
raise ValueError(f"Unknown unit type: {type}")
3939

4040

41-
def build_functional_maximal_unit(ast_info: TreeResult) -> list[LLMTranslationUnit]:
42-
definitions = [
43-
(name, definition) for name, definition in ast_info.fn_definitions.items() if definition
44-
]
45-
units = []
46-
for name, definition in definitions:
47-
unit = LLMTranslationUnit(
48-
symbol_name=name,
49-
symbol_definition=definition,
50-
)
51-
# Add unique symbols referenced by this function
52-
# NOTE: We use dict.fromkeys to preserve the order of appearance
53-
unit.ref_symbols = list(dict.fromkeys(ast_info.get_top_level_symbols_for_name(name)))
54-
units.append(unit)
55-
return units
56-
57-
5841
def build_functional_minimal_unit(ast_info: TreeResult) -> list[LLMTranslationUnit]:
5942
definitions = [
6043
(name, definition) for name, definition in ast_info.fn_definitions.items() if definition

src/ideas/model.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -23,8 +23,7 @@ class ModelConfig:
2323
@dataclass
2424
class GenerateConfig:
2525
max_new_tokens: int = 32000
26-
do_sample: bool = False
27-
temperature: float = 1.0
26+
temperature: float = 0.0
2827
top_p: float = 1.0
2928
top_k: int | None = None
3029

src/ideas/translate_recurrent.py

Lines changed: 30 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
# SPDX-License-Identifier: Apache-2.0
55
#
66

7+
import os
78
import io
89
import json
910
import logging
@@ -73,38 +74,28 @@ def forward(self, filename: Path, crate: Crate) -> dspy.Prediction:
7374
# Translate symbol by symbol
7475
translations: dict[str, str] = OrderedDict()
7576
for symbol_name, symbol_code in sources.items():
76-
ref_names = [
77-
name for name in bfs(symbol_name, dependencies) if name in translations
78-
]
7977
dep_names = [
8078
name for name in bfs(symbol_name, references, max_depth=1) if name in sources
8179
]
82-
# move dependent names that has a translation to reference names
83-
for dep_name in dep_names:
84-
assert dep_name not in translations, (
85-
f"Dependency {dep_name} should not already be translated"
86-
)
87-
for ref_name in bfs(dep_name, dependencies):
88-
if ref_name in translations and ref_name not in ref_names:
89-
ref_names.append(ref_name)
9080

9181
# Gather reference and dependent code in order of translations and sources, respectively
92-
ref_translations = "\n\n".join(
93-
[translation for name, translation in translations.items() if name in ref_names]
94-
)
82+
ref_translations = "\n\n".join(translations.values())
9583
dep_sources = "\n\n".join(
9684
[source for name, source in sources.items() if name in dep_names]
9785
)
9886

9987
logger.info(f"Translating `{symbol_name}` ...")
10088
logger.debug(f"```c\n{symbol_code}\n```")
10189

90+
# FIXME: Pass a Symbol here instead of symbol_code and is_snippet_main. Similarly,
91+
# dep_sources should probably be a list[Symbol] too.
10292
pred = self.translate_with_feedback(
10393
ref_translations,
10494
symbol_code,
10595
dep_sources,
10696
crate,
10797
max_iters=self.max_iters,
98+
is_snippet_main=symbol_name == "c:@F@main",
10899
)
109100
# pred = dspy.Prediction(translation=dspy.Code(code=""))
110101

@@ -115,16 +106,22 @@ def forward(self, filename: Path, crate: Crate) -> dspy.Prediction:
115106
# Update state
116107
translations[symbol_name] = pred.translation.code
117108
with crate.rust_src_path.with_suffix(".jsonl").open("a") as f:
118-
f.write(
119-
json.dumps(
109+
for prior_translation, feedback in zip(pred.prior_translations, pred.feedbacks):
110+
jsonl = json.dumps(
120111
{
121112
"name": symbol_name,
122-
"source": symbol_code,
113+
"reference_names": list(translations.keys()),
114+
"reference_code": ref_translations,
115+
"snippet": symbol_code,
116+
"dependent_names": dep_names,
117+
"dependent_code": dep_sources,
118+
"prior_translation": prior_translation,
119+
"feedback": feedback,
123120
"translation": pred.translation.code,
121+
"success": pred.success,
124122
}
125123
)
126-
+ "\n"
127-
)
124+
f.write(jsonl + "\n")
128125

129126
translation = "\n\n".join(translations.values())
130127
return dspy.Prediction(translation=translation)
@@ -146,8 +143,6 @@ class TranslateSignature(dspy.Signature):
146143
Use the `cargo build` feedback about the prior_translation, if provided, when generating the Rust translation.
147144
"""
148145

149-
# For example, reason about how a Rust translation of the dependent_code would inform a safe and idiomatic translation of the C snippet.
150-
151146
reference_code: dspy.Code["Rust"] = dspy.InputField() # noqa: F821
152147
snippet: dspy.Code["C"] = dspy.InputField() # noqa: F821
153148
dependent_code: dspy.Code["C"] = dspy.InputField() # noqa: F821
@@ -185,46 +180,54 @@ def translate_with_feedback(
185180
crate: Crate,
186181
*,
187182
max_iters: int = 0,
183+
is_snippet_main: bool = False,
188184
) -> dspy.Prediction:
189185
pred = self.translate(reference_code, snippet, dependent_code)
190-
i = 0
191-
for i in range(max_iters):
186+
success, prior_translations, feedbacks = False, [""], [""]
187+
for _ in range(max_iters):
192188
rust_src = ""
193189
if len(reference_code) > 0:
194190
rust_src += reference_code + "\n\n"
195191
rust_src += pred.translation.code + "\n\n"
196-
if crate.is_bin and "fn main()" not in rust_src:
192+
if crate.is_bin and not is_snippet_main:
197193
# Work around E0601 error
198194
rust_src += 'fn main() {\n println!("Hello, world!");\n}\n'
199195

200196
crate.rust_src_path.write_text(rust_src)
197+
env = os.environ.copy()
198+
env["RUSTFLAGS"] = (env.get("RUSTFLAGS", "") + " -D unsafe-code").strip()
201199
success, feedback = tools.run_subprocess(
202200
[
203201
"cargo",
204202
"build",
205203
"--quiet",
206204
"--color=never",
207205
f"--manifest-path={crate.cargo_toml}",
208-
]
206+
],
207+
env=env,
209208
)
210209
if success:
211210
break
212211
logger.debug(
213212
f"Feedback\n```rust\n{reference_code}\n{pred.translation.code}\n```\n\n# Feedback\n{feedback}\n\n# reasoning\n{pred.reasoning}"
214213
)
215214

215+
feedbacks.append(feedback)
216+
prior_translations.append(pred.translation.code)
216217
pred = self.translate(
217218
reference_code,
218219
snippet,
219220
dependent_code,
220-
prior_translation=pred.translation,
221+
prior_translation=pred.translation.code,
221222
feedback=feedback,
222223
)
223224
else:
224225
logger.warning(
225226
f"Translation failed to build after {max_iters} feedback iterations!"
226227
)
227-
pred["iters"] = i
228+
pred["feedbacks"] = feedbacks
229+
pred["prior_translations"] = prior_translations
230+
pred["success"] = success
228231
return pred
229232

230233
def get_history(self, n: int = 1, clear: bool = False) -> str:

0 commit comments

Comments
 (0)