generative-computing · csbobby · Apr 1, 2026 · Apr 1, 2026 · Apr 1, 2026 · Apr 1, 2026
@@ -27,20 +27,22 @@
 
 
 class DecompVersion(StrEnum):
-    """Available versions of the decomposition pipeline template.
+    """Available template versions for generated decomposition programs.
 
-    Newer versions must be declared last to ensure ``latest`` always resolves to
-    the most recent template.
+    Newer concrete versions must be declared after older ones so that
+    ``latest`` can resolve to the most recently declared template version.
 
-    Args:
-        latest (str): Sentinel value that resolves to the last declared version.
-        v1 (str): Version 1 of the decomposition pipeline template.
+    Attributes:
+        latest: Sentinel value that resolves to the last declared concrete
+            template version.
+        v1: Version 1 of the decomposition program template.
+        v2: Version 2 of the decomposition program template.
     """
 
     latest = "latest"
     v1 = "v1"
     v2 = "v2"
-    # v3 = "v3"
+    v3 = "v3"
 
 
 this_file_dir = Path(__file__).resolve().parent
@@ -225,44 +227,42 @@ def run(
         ),
     ] = LogMode.demo,
 ) -> None:
-    """Decompose one or more user queries into subtasks with constraints and dependency metadata.
-
-    Reads user queries either from a file or interactively, runs the LLM
-    decomposition pipeline to produce subtask descriptions, Jinja2 prompt templates,
-    constraint lists, and dependency metadata, and writes one ``.json`` result file
-    plus one rendered ``.py`` script per task job to the output directory.
+    """Runs the ``m decompose`` CLI workflow and writes generated outputs.
 
-    If ``input_file`` contains multiple non-empty lines, each line is treated as a
-    separate task job.
+    Reads user queries from a file or interactive input, runs the decomposition
+    pipeline for each task job, and writes one JSON file, one rendered Python
+    program, and any generated validation modules under a per-job output
+    directory.
 
     Args:
-        out_dir: Path to an existing directory where output files are saved.
-        out_name: Base name (no extension) for the output files. Defaults to
-            ``"m_decomp_result"``.
-        input_file: Optional path to a text file containing one or more user
-            queries. If the file contains multiple non-empty lines, each line is
-            treated as a separate task job. If omitted, the query is collected
-            interactively.
-        model_id: Model name or ID used for all decomposition pipeline steps.
-        backend: Inference backend -- ``"ollama"``, ``"openai"``, or ``"rits"``.
-        backend_req_timeout: Request timeout in seconds for model inference calls.
-        backend_endpoint: Base URL of the configured endpoint. Required when
-            ``backend="openai"`` or ``backend="rits"``.
-        backend_api_key: API key for the configured endpoint. Required when
-            ``backend="openai"`` or ``backend="rits"``.
-        version: Version of the decomposition pipeline template to use.
-        input_var: Optional list of user-input variable names (e.g. ``"DOC"``).
-            Each name must be a valid Python identifier. Pass this option
-            multiple times to define multiple variables.
-        log_mode: Logging detail mode for CLI and pipeline output.
+        out_dir: Existing directory under which per-job output directories are
+            created.
+        out_name: Base name used for the per-job output directory and generated
+            files.
+        input_file: Optional path to a text file containing one or more task
+            prompts. Each non-empty line is processed as a separate task job.
+            When omitted, the command prompts interactively for one task.
+        model_id: Model identifier used for all decomposition pipeline stages.
+        backend: Inference backend used to execute model calls.
+        backend_req_timeout: Request timeout in seconds for backend inference calls.
+        backend_endpoint: Endpoint URL or base URL required by remote backends.
+        backend_api_key: API key required by remote backends.
+        version: Template version used to render the generated Python program.
+            ``latest`` resolves to the most recently declared concrete version.
+        input_var: Optional user input variable names to expose in generated
+            prompts and programs. Each name must be a valid non-keyword Python
+            identifier.
+        log_mode: Logging verbosity for CLI and pipeline execution.
 
     Raises:
-        AssertionError: If ``out_name`` contains invalid characters, if
-            ``out_dir`` does not exist or is not a directory, or if any
-            ``input_var`` name is not a valid Python identifier.
-        ValueError: If the input file contains no non-empty task lines.
-        Exception: Re-raised from the decomposition pipeline after cleaning up
-            any partially written output directories.
+        AssertionError: If ``out_name`` is invalid, ``out_dir`` does not name an
+            existing directory, ``input_file`` does not name an existing file,
+            or any declared ``input_var`` is not a valid Python identifier.
+        ValueError: If ``input_file`` exists but contains no non-empty task
+            lines.
+        Exception: Propagates pipeline, rendering, parsing, or file-writing
+            failures. Any output directories created earlier in the run are
+            removed before the exception is re-raised.
     """
     created_dirs: list[Path] = []
 

@@ -0,0 +1,100 @@
+{% if user_inputs -%}
+import os
+{% endif -%}
+import textwrap
+
+import mellea
+
+{%- set ns = namespace(need_req=false) -%}
+{%- for item in subtasks -%}
+  {%- for c in item.constraints or [] -%}
+    {%- if c.val_fn -%}
+      {%- set ns.need_req = true -%}
+    {%- endif -%}
+  {%- endfor -%}
+{%- endfor %}
+
+{%- if ns.need_req %}
+from mellea.stdlib.requirements import req
+{%- for c in identified_constraints %}
+{%- if c.val_fn and c.val_fn_name %}
+from validations.{{ c.val_fn_name }} import validate_input as {{ c.val_fn_name }}
+{%- endif %}
+{%- endfor %}
+{%- endif %}
+
+m = mellea.start_session(model_id="mistral-small3.2:latest")
+{%- if user_inputs %}
+
+
+# User Input Variables
+try:
+    {%- for var in user_inputs %}
+    {{ var | lower }} = os.environ["{{ var | upper }}"]
+    {%- endfor %}
+except KeyError as e:
+    raise SystemExit(f"ERROR: One or more required environment variables are not set: {e}")
+{%- endif %}
+{%- for item in subtasks %}
+
+
+{{ item.tag | lower }}_gnrl = textwrap.dedent(
+    R"""
+    {{ item.general_instructions | trim | indent(width=4, first=False) }}
+    """.strip()
+)
+{{ item.tag | lower }} = m.instruct(
+    {%- if not (item.input_vars_required or []) %}
+    {{ item.subtask[3:] | trim | tojson }},
+    {%- else %}
+    textwrap.dedent(
+        R"""
+        {{ item.subtask[3:] | trim }}
+
+        Here are the input variables and their content:
+        {%- for var in item.input_vars_required or [] %}
+
+        - {{ var | upper }} = {{ "{{" }}{{ var | upper }}{{ "}}" }}
+        {%- endfor %}
+        """.strip()
+    ),
+    {%- endif %}
+    {%- if item.constraints %}
+    requirements=[
+        {%- for c in item.constraints %}
+        {%- if c.val_fn and c.val_fn_name %}
+        req(
+            {{ c.constraint | tojson}},
+            validation_fn={{ c.val_fn_name }},
+        ),
+        {%- else %}
+        {{ c.constraint | tojson}},
+        {%- endif %}
+        {%- endfor %}
+    ],
+    {%- else %}
+    requirements=None,
+    {%- endif %}
+    {%- if item.input_vars_required %}
+    user_variables={
+        {%- for var in item.input_vars_required or [] %}
+        {{ var | upper | tojson }}: {{ var | lower }},
+        {%- endfor %}
+    },
+    {%- endif %}
+    grounding_context={
+        "GENERAL_INSTRUCTIONS": {{ item.tag | lower }}_gnrl,
+        {%- for var in item.depends_on or [] %}
+        {{ var | upper | tojson }}: {{ var | lower }}.value,
+        {%- endfor %}
+    },
+)
+assert {{ item.tag | lower }}.value is not None, 'ERROR: task "{{ item.tag | lower }}" execution failed'
+{%- if loop.last %}
+
+
+final_answer = {{ item.tag | lower }}.value
+
+print(final_answer)
+{%- endif -%}
+{%- endfor -%}
@@ -13,27 +13,41 @@
 T = TypeVar("T")
 
 RE_GENERAL_INSTRUCTIONS = re.compile(
-    r"<general_instructions>(.+?)</general_instructions>",
+    r"<general_instructions>(.*?)</general_instructions>",
     flags=re.IGNORECASE | re.DOTALL,
 )
 
+RE_GENERAL_INSTRUCTIONS_OPEN = re.compile(
+    r"<general_instructions>(.*)", flags=re.IGNORECASE | re.DOTALL
+)
+
+RE_FINAL_SENTENCE = re.compile(
+    r"\n*All tags are closed and my assignment is finished\.\s*$", flags=re.IGNORECASE
+)
+
 
 @final
 class _GeneralInstructions(PromptModule):
     @staticmethod
     def _default_parser(generated_str: str) -> str:
         general_instructions_match = re.search(RE_GENERAL_INSTRUCTIONS, generated_str)
 
-        general_instructions_str: str | None = (
-            general_instructions_match.group(1).strip()
-            if general_instructions_match
-            else None
-        )
-
-        if general_instructions_str is None:
-            raise TagExtractionError(
-                'LLM failed to generate correct tags for extraction: "<general_instructions>"'
+        if general_instructions_match:
+            general_instructions_str = general_instructions_match.group(1).strip()
+        else:
+            # fallback: opening tag only (in case the closing tag is missing)
+            general_instructions_match = re.search(
+                RE_GENERAL_INSTRUCTIONS_OPEN, generated_str
             )
+            if not general_instructions_match:
+                raise TagExtractionError(
+                    'LLM failed to generate correct tags for extraction: "<general_instructions>"'
+                )
+            general_instructions_str = general_instructions_match.group(1).strip()
+
+        general_instructions_str = re.sub(
+            RE_FINAL_SENTENCE, "", general_instructions_str
+        ).strip()
 
         return general_instructions_str
 
@@ -50,20 +64,19 @@ def generate(
 
         system_prompt = get_system_prompt()
         user_prompt = get_user_prompt(task_prompt=input_str)
-
         action = Message("user", user_prompt)
 
+        model_options = {
+            ModelOption.SYSTEM_PROMPT: system_prompt,
+            ModelOption.TEMPERATURE: 0,
+            ModelOption.MAX_NEW_TOKENS: max_new_tokens,
+        }
+
         try:
-            gen_result = mellea_session.act(
-                action=action,
-                model_options={
-                    ModelOption.SYSTEM_PROMPT: system_prompt,
-                    ModelOption.TEMPERATURE: 0,
-                    ModelOption.MAX_NEW_TOKENS: max_new_tokens,
-                },
-            ).value
+            response = mellea_session.act(action=action, model_options=model_options)
+            gen_result = response.value
         except Exception as e:
-            raise BackendGenerationError(f"LLM generation failed: {e}")
+            raise BackendGenerationError(f"LLM generation failed: {e}") from e
 
         if gen_result is None:
             raise BackendGenerationError(

@@ -13,7 +13,7 @@ Do not write anything between </general_instructions> and the final sentence exc
 Here are some complete examples to guide you on how to complete your assignment:
 
 {% for item in icl_examples -%}
-<example>
+<example_{{ loop.index }}>
 <task_prompt>
 {{ item["task_prompt"] }}
 </task_prompt>
@@ -22,7 +22,7 @@ Here are some complete examples to guide you on how to complete your assignment:
 </general_instructions>
 
 All tags are closed and my assignment is finished.
-</example>
+</example_{{ loop.index }}>
 
 {% endfor -%}
 That concludes the complete examples of your assignment.

@@ -19,7 +19,7 @@
 )
 
 RE_ASSIGNED_CONS = re.compile(
-    r"<assigned_constraints>(.+?)</assigned_constraints>",
+    r"<assigned_constraints>(.*?)</assigned_constraints>",
     flags=re.IGNORECASE | re.DOTALL,
 )
 
@@ -57,12 +57,12 @@ def _default_parser(generated_str: str) -> list[SubtaskPromptConstraintsItem]:
             For example
             ```
             [ SubtaskPromptConstraintsItem(
-                  subtask=<str>,
-                  tag=<str>,
-                  prompt_template=<str>
-                  constraints=<list[str]>
-              ),
-              ...
+                subtask=<str>,
+                tag=<str>,
+                prompt_template=<str>
+                constraints=<list[str]>
+            ),
+            ...
             ]
             ```
 
@@ -92,18 +92,24 @@ def _default_parser(generated_str: str) -> list[SubtaskPromptConstraintsItem]:
 
             subtask_constraint_assign_match = re.search(RE_ASSIGNED_CONS, data[3])
 
-            subtask_constraint_assign_str: str | None = (
+            # ===== fallback: use raw text when there is no tag =====
+            subtask_constraint_assign_str: str = (
                 subtask_constraint_assign_match.group(1).strip()
                 if subtask_constraint_assign_match
-                else None
+                else data[3].strip()
             )
 
-            if subtask_constraint_assign_str is None:
-                raise TagExtractionError(
-                    'LLM failed to generate correct tags for extraction: "<assigned_constraints>"'
-                )
+            subtask_constraint_assign_str = re.sub(
+                r"\n*All tags are closed and my assignment is finished\.\s*$",
+                "",
+                subtask_constraint_assign_str,
+                flags=re.IGNORECASE,
+            ).strip()
 
             subtask_constraint_assign_str_upper = subtask_constraint_assign_str.upper()
+
+            subtask_constraint_assign: list[str] = []
+
             if (
                 "N/A" in subtask_constraint_assign_str_upper
                 or "N / A" in subtask_constraint_assign_str_upper
@@ -112,10 +118,22 @@ def _default_parser(generated_str: str) -> list[SubtaskPromptConstraintsItem]:
             ):
                 subtask_constraint_assign = []
             else:
-                subtask_constraint_assign = [
-                    line.strip()[2:] if line.strip()[:2] == "- " else line.strip()
-                    for line in subtask_constraint_assign_str.splitlines()
-                ]
+                for line in subtask_constraint_assign_str.splitlines():
+                    stripped = line.strip()
+
+                    if not stripped:
+                        continue
+
+                    # Only keep lines starting with "- "
+                    if stripped.startswith("- "):
+                        value = stripped[2:].strip()
+                        if value:
+                            subtask_constraint_assign.append(value)
+
+                # Remove duplicates while preserving order
+                subtask_constraint_assign = list(
+                    dict.fromkeys(subtask_constraint_assign)
+                )
 
             result.append(
                 SubtaskPromptConstraintsItem(