diff --git a/opto/optimizers/__init__.py b/opto/optimizers/__init__.py
index 482b1b2d..a41b6d34 100644
--- a/opto/optimizers/__init__.py
+++ b/opto/optimizers/__init__.py
@@ -4,7 +4,9 @@
from opto.optimizers.opro_v2 import OPROv2
from opto.optimizers.textgrad import TextGrad
from opto.optimizers.optoprime_v2 import OptoPrimeV2
+from opto.optimizers.optoprime_v3 import OptoPrimeV3
+from opto.optimizers.opro_v3 import OPROv3
OptoPrime = OptoPrimeV1
-__all__ = ["OPRO", "OptoPrime", "OptoPrimeMulti", "TextGrad", "OptoPrimeV2", "OptoPrimeV1", "OPROv2"]
\ No newline at end of file
+__all__ = ["OPRO", "OptoPrime", "OptoPrimeMulti", "TextGrad", "OptoPrimeV2", "OptoPrimeV1", "OPROv2", "OptoPrimeV3", "OPROv3"]
\ No newline at end of file
diff --git a/opto/optimizers/opro_v3.py b/opto/optimizers/opro_v3.py
new file mode 100644
index 00000000..20575b88
--- /dev/null
+++ b/opto/optimizers/opro_v3.py
@@ -0,0 +1,543 @@
+"""
+Key difference to v2:
+1. Use the new backbone conversation history manager
+2. Support multimodal node (both trainable and non-trainable)
+3. Break from the OptoPrime style template, support more customizable template from user, for brevity and streamlined usage.
+"""
+
+from textwrap import dedent
+from dataclasses import dataclass
+from typing import Dict, Optional, List, Union
+from opto.trace.nodes import ParameterNode
+
+from opto.optimizers.optoprime_v3 import OptoPrimeV3, OptimizerPromptSymbolSet
+from opto.utils.backbone import (
+ ContentBase, ImageContent, ContentBlockList,
+ DEFAULT_IMAGE_PLACEHOLDER
+)
+
+# Not inheriting from optoprime_v2 because this should have a smaller set
+class OPROPromptSymbolSet(OptimizerPromptSymbolSet):
+ """Prompt symbol set for OPRO optimizer.
+
+ This class defines the tags and symbols used in the OPRO optimizer's prompts
+ and output parsing. It provides a structured way to format problems and parse
+ responses from the language model.
+
+ Attributes
+ ----------
+ instruction_section_title : str
+ Title for the instruction section in prompts.
+ variable_section_title : str
+ Title for the variable/solution section in prompts.
+ feedback_section_title : str
+ Title for the feedback section in prompts.
+ node_tag : str
+ Tag used to identify constant nodes in the computation graph.
+ variable_tag : str
+ Tag used to identify variable nodes that can be optimized.
+ value_tag : str
+ Tag used to wrap the value of a node.
+ constraint_tag : str
+ Tag used to wrap constraint expressions for nodes.
+ reasoning_tag : str
+ Tag used to wrap reasoning in the output.
+ improved_variable_tag : str
+ Tag used to wrap improved variable values in the output.
+ name_tag : str
+ Tag used to wrap variable names.
+ expect_json : bool
+ Whether to expect JSON output format (default: False).
+
+ Methods
+ -------
+ default_prompt_symbols
+ Returns default prompt symbols dictionary.
+
+ Notes
+ -----
+ This class inherits from OptimizerPromptSymbolSet but defines a smaller,
+ more focused set of symbols specifically for OPRO optimization.
+ """
+
+ instruction_section_title = "# Instruction"
+ variables_section_title = "# Solution"
+ feedback_section_title = "# Feedback"
+ context_section_title = "# Context"
+
+ node_tag = "node" # nodes that are constants in the graph
+ variable_tag = "solution" # nodes that can be changed
+ value_tag = "value" # inside node, we have value tag
+ constraint_tag = "constraint" # inside node, we have constraint tag
+
+ # output format
+ # Note: we currently don't support extracting format's like "```code```" because we assume supplied tag is name-only, i.e.,
+ reasoning_tag = "reasoning"
+ improved_variable_tag = "variable"
+ name_tag = "name"
+
+ expect_json = False # this will stop `enforce_json` arguments passed to LLM calls
+
+ @property
+ def default_prompt_symbols(self) -> Dict[str, str]:
+ return {
+ "variables": self.variables_section_title,
+ "feedback": self.feedback_section_title,
+ "instruction": self.instruction_section_title,
+ "context": self.context_section_title
+ }
+
+@dataclass
+class ProblemInstance:
+ """Represents a problem instance for OPRO optimization.
+
+ This dataclass encapsulates a complete problem instance including the
+ instruction, current variables/solution, and feedback received.
+
+ Supports multimodal content - variables can contain images.
+
+ Attributes
+ ----------
+ instruction : str
+ The instruction describing what needs to be done or the question to answer.
+ variables : Union[str, List[ContentBase]]
+ The current proposed solution that can be modified. Can contain images.
+ feedback : str
+ Feedback about the current solution.
+ context: str
+ Optional context information that might be useful to solve the problem.
+
+ optimizer_prompt_symbol_set : OPROPromptSymbolSet
+ The symbol set used for formatting the problem.
+ problem_template : str
+ Template for formatting the problem instance as a string.
+
+ Methods
+ -------
+ __repr__()
+ Returns a formatted string representation of the problem instance.
+ to_content_blocks()
+ Returns a ContentBlockList for multimodal prompts.
+ has_images()
+ Returns True if the problem instance contains images.
+
+ Notes
+ -----
+ The problem instance is formatted using the problem_template which
+ organizes the instruction, variables, and feedback into a structured format.
+ """
+ instruction: str
+ variables: Union[str, List[ContentBase]]
+ feedback: str
+ context: Optional[ContentBlockList]
+
+ optimizer_prompt_symbol_set: OPROPromptSymbolSet
+
+ problem_template = dedent(
+ """
+ # Instruction
+ {instruction}
+
+ # Solution
+ {variables}
+
+ # Feedback
+ {feedback}
+ """
+ )
+
+ @staticmethod
+ def _content_to_text(content: Union[str, List[ContentBase]]) -> str:
+ """Convert content (str or List[ContentBlock]) to text representation.
+
+ Handles both string content and ContentBlockList/List[ContentBlock].
+ Uses ContentBlockList.blocks_to_text for list content.
+ """
+ if isinstance(content, str):
+ return content
+ # Use the shared utility from ContentBlockList
+ return ContentBlockList.blocks_to_text(content, DEFAULT_IMAGE_PLACEHOLDER)
+
+ def __repr__(self) -> str:
+ """Return text-only representation for backward compatibility."""
+ optimization_query = self.problem_template.format(
+ instruction=self.instruction,
+ variables=self._content_to_text(self.variables),
+ feedback=self.feedback,
+ )
+
+ context_section = dedent("""
+
+ # Context
+ {context}
+ """)
+
+ if self.context is not None and self.context.to_text().strip() != "":
+ context_section = context_section.format(context=self.context.to_text())
+ optimization_query += context_section
+
+ return optimization_query
+
+ def to_content_blocks(self) -> ContentBlockList:
+ """Convert the problem instance to a list of ContentBlocks.
+
+ Consecutive TextContent blocks are merged into a single block for efficiency.
+ Images and other non-text blocks are kept separate.
+
+ Returns:
+ ContentBlockList: A list containing TextContent and ImageContent blocks
+ that represent the complete problem instance.
+ """
+ blocks = ContentBlockList()
+
+ # Instruction section
+ blocks.append(f"# Instruction\n{self.instruction}\n\n# Solution\n")
+
+ # Variables/Solution section (may contain images)
+ blocks.extend(self.variables)
+
+ # Feedback section
+ blocks.append(f"\n\n# Feedback\n{self.feedback}")
+
+ # Context section (optional)
+ if self.context is not None and self.context.to_text().strip() != "":
+ blocks.append(f"\n\n# Context\n")
+ blocks.extend(self.context)
+
+ return blocks
+
+ def has_images(self) -> bool:
+ """Check if this problem instance contains any images.
+
+ Returns:
+ bool: True if variables field contains ImageContent blocks.
+ """
+ if isinstance(self.variables, list):
+ for block in self.variables:
+ if isinstance(block, ImageContent):
+ return True
+ return False
+
+class OPROv3(OptoPrimeV3):
+ """OPRO (Optimization by PROmpting) optimizer version 2.
+
+ OPRO is an optimization algorithm that leverages large language models to
+ iteratively improve solutions based on feedback. It treats optimization as
+ a natural language problem where the LLM proposes improvements to variables
+ based on instruction and feedback.
+
+ Parameters
+ ----------
+ *args
+ Variable length argument list passed to parent class.
+ optimizer_prompt_symbol_set : OptimizerPromptSymbolSet, optional
+ The symbol set for formatting prompts and parsing outputs.
+ Defaults to OPROPromptSymbolSet().
+ include_example : bool, optional
+ Whether to include examples in the prompt. Default is False as
+ the default example in OptoPrimeV2 does not work well with OPRO.
+ memory_size : int, optional
+ Number of past optimization steps to remember. Default is 5.
+ **kwargs
+ Additional keyword arguments passed to parent class.
+
+ Attributes
+ ----------
+ representation_prompt : str
+ Template for explaining the problem representation to the LLM.
+ output_format_prompt_template : str
+ Template for specifying the expected output format.
+ user_prompt_template : str
+ Template for presenting the problem instance to the LLM.
+ final_prompt : str
+ Template for requesting the final revised solutions.
+ default_objective : str
+ Default objective when none is specified.
+
+ Methods
+ -------
+ problem_instance(summary, mask=None)
+ Creates a ProblemInstance from an optimization summary.
+ initialize_prompt()
+ Initializes and formats the prompt templates.
+
+ Notes
+ -----
+ OPRO differs from OptoPrime by focusing on simpler problem representations
+ and clearer feedback incorporation. It is particularly effective for
+ problems where the optimization can be expressed in natural language.
+
+ See Also
+ --------
+ OptoPrimeV2 : Parent class providing core optimization functionality.
+ OPROPromptSymbolSet : Symbol set used for formatting.
+
+ Examples
+ --------
+ >>> optimizer = OPROv3(memory_size=10)
+ >>> # Use optimizer to improve solutions based on feedback
+ """
+ representation_prompt = dedent(
+ """
+ You're tasked to change the proposed solution according to feedback.
+
+ Specifically, a problem will be composed of the following parts:
+ - {instruction_section_title}: the instruction which describes the things you need to do or the question you should answer.
+ - {variables_section_title}: the proposed solution that you can change/tweak (trainable).
+ - {feedback_section_title}: the feedback about the solution.
+ - {context_section_title}: the context information that might be useful to solve the problem.
+
+ If `data_type` is `code`, it means `{value_tag}` is the source code of a python code, which may include docstring and definitions.
+ """
+ )
+
+ output_format_prompt_template = dedent(
+ """
+ Output_format: Your output should be in the following XML/HTML format:
+
+ ```
+ {output_format}
+ ```
+
+ In <{reasoning_tag}>, explain the problem: 1. what the {instruction_section_title} means 2. what the {feedback_section_title} means to {variables_section_title} considering how {variables_section_title} follow {instruction_section_title}. 3. Reasoning about the suggested changes in {variables_section_title} (if needed) and the expected result.
+
+ If you need to suggest a change in the values of {variables_section_title}, write down the suggested values in <{improved_variable_tag}>. Remember you can change only the values in {variables_section_title}, not others. When `type` of a variable is `code`, you should write the new definition in the format of python code without syntax errors, and you should not change the function name or the function signature.
+
+ If no changes are needed, just output TERMINATE.
+ """
+ )
+
+ user_prompt_template = dedent(
+ """
+ Now you see problem instance:
+
+ ================================
+ {problem_instance}
+ ================================
+
+ """
+ )
+
+ context_prompt = dedent(
+ """
+ Here is some additional **context** to solving this problem:
+
+ {context}
+ """
+ )
+
+ final_prompt = dedent(
+ """
+ What are your revised solutions on {names}?
+
+ Your response:
+ """
+ )
+
+ # Default Objective becomes instruction for the next block
+ default_objective = "Propose a new solution that will incorporate the feedback."
+
+ def __init__(self, *args,
+ optimizer_prompt_symbol_set: OptimizerPromptSymbolSet = None,
+ include_example=False, # default example in OptoPrimeV2 does not work in OPRO
+ memory_size=5,
+ problem_context: Optional[ContentBlockList] = None,
+ **kwargs):
+ """Initialize the OPROv2 optimizer.
+
+ Parameters
+ ----------
+ *args
+ Variable length argument list passed to parent class.
+ optimizer_prompt_symbol_set : OptimizerPromptSymbolSet, optional
+ The symbol set for formatting prompts and parsing outputs.
+ If None, uses OPROPromptSymbolSet().
+ include_example : bool, optional
+ Whether to include examples in the prompt. Default is False.
+ memory_size : int, optional
+ Number of past optimization steps to remember. Default is 5.
+ **kwargs
+ Additional keyword arguments passed to parent class.
+ """
+ optimizer_prompt_symbol_set = optimizer_prompt_symbol_set or OPROPromptSymbolSet()
+ super().__init__(*args, optimizer_prompt_symbol_set=optimizer_prompt_symbol_set,
+ include_example=include_example, memory_size=memory_size,
+ problem_context=problem_context,
+ **kwargs)
+
+ def parameter_check(self, parameters: List[ParameterNode]):
+ """Check if the parameters are valid.
+ This can be overloaded by subclasses to add more checks.
+
+ Args:
+ parameters: List[ParameterNode]
+ The parameters to check.
+
+ Raises:
+ AssertionError: If more than one parameter contains image data.
+
+ Notes:
+ OPROv2 supports image parameters, but only one parameter can be
+ an image at a time since LLMs can only generate one image per inference.
+ """
+ # Count image parameters
+ image_params = [param for param in parameters if param.is_image]
+
+ if len(image_params) > 1:
+ param_names = ', '.join([f"'{p.name}'" for p in image_params])
+ raise AssertionError(
+ f"OPROv2 supports at most one image parameter, but found {len(image_params)}: "
+ f"{param_names}. LLMs can only generate one image at a time."
+ )
+
+ def problem_instance(self, summary, mask=None, use_content_blocks=False):
+ """Create a ProblemInstance from an optimization summary.
+
+ Parameters
+ ----------
+ summary : object
+ The optimization summary containing variables and feedback.
+ mask : list, optional
+ List of sections to mask/hide in the problem instance.
+ Can include "#Instruction", variable section title, or feedback section title.
+ use_content_blocks : bool, optional
+ If True, use content blocks for multimodal support (images).
+ If False, use text-only representation.
+
+ Returns
+ -------
+ ProblemInstance
+ A formatted problem instance ready for presentation to the LLM.
+
+ Notes
+ -----
+ The mask parameter allows selective hiding of problem components,
+ useful for ablation studies or specific optimization strategies.
+ """
+ mask = mask or []
+
+ if use_content_blocks:
+ # Use content block representation for multimodal support
+ variables_content = (
+ self.repr_node_value_compact_as_content_blocks(
+ summary.variables,
+ node_tag=self.optimizer_prompt_symbol_set.variable_tag,
+ value_tag=self.optimizer_prompt_symbol_set.value_tag,
+ constraint_tag=self.optimizer_prompt_symbol_set.constraint_tag
+ )
+ if self.optimizer_prompt_symbol_set.variables_section_title not in mask
+ else ContentBlockList()
+ )
+ else:
+ # Use text-only representation (backward compatible)
+ variables_content = (
+ self.repr_node_value_compact(
+ summary.variables,
+ node_tag=self.optimizer_prompt_symbol_set.variable_tag,
+ value_tag=self.optimizer_prompt_symbol_set.value_tag,
+ constraint_tag=self.optimizer_prompt_symbol_set.constraint_tag
+ )
+ if self.optimizer_prompt_symbol_set.variables_section_title not in mask
+ else ""
+ )
+
+ return ProblemInstance(
+ instruction=self.objective if "#Instruction" not in mask else "",
+ variables=variables_content,
+ feedback=summary.user_feedback if self.optimizer_prompt_symbol_set.feedback_section_title not in mask else "",
+ context=self.problem_context if hasattr(self, 'problem_context') else None,
+ optimizer_prompt_symbol_set=self.optimizer_prompt_symbol_set
+ )
+
+ def repr_node_value_compact_as_content_blocks(self, node_dict, node_tag="node",
+ value_tag="value", constraint_tag="constraint") -> ContentBlockList:
+ """Returns a ContentBlockList with compact representation, including images.
+
+ Consecutive TextContent blocks are merged for efficiency.
+ Non-image values are truncated. Images break the text flow.
+ """
+ from opto.optimizers.optoprime_v3 import value_to_image_content
+
+ blocks = ContentBlockList()
+
+ for k, v in node_dict.items():
+ value_data = v[0]
+ constraint = v[1]
+
+ if "__code" not in k:
+ # Check if this is an image
+ image_content = value_to_image_content(value_data)
+
+ if image_content is not None:
+ # Image node: output XML structure, then image, then closing
+ type_name = "image"
+ constraint_expr = f"<{constraint_tag}>\n{constraint}\n{constraint_tag}>" if constraint is not None and node_tag == self.optimizer_prompt_symbol_set.variable_tag else ""
+
+ xml_text = f"<{node_tag} name=\"{k}\" type=\"{type_name}\">\n<{value_tag}>\n"
+ blocks.append(xml_text)
+ blocks.append(image_content) # Image breaks the text flow
+
+ closing_text = f"\n{value_tag}>\n{constraint_expr}{node_tag}>\n\n" if constraint_expr else f"\n{value_tag}>\n{node_tag}>\n\n"
+ blocks.append(closing_text)
+ else:
+ # Non-image node: truncated text representation
+ node_value = self.truncate_expression(value_data, self.initial_var_char_limit)
+ if constraint is not None and node_tag == self.optimizer_prompt_symbol_set.variable_tag:
+ constraint_expr = f"<{constraint_tag}>\n{constraint}\n{constraint_tag}>"
+ blocks.append(
+ f"<{node_tag} name=\"{k}\" type=\"{type(value_data).__name__}\">\n<{value_tag}>\n{node_value}\n{value_tag}>\n{constraint_expr}\n{node_tag}>\n\n"
+ )
+ else:
+ blocks.append(
+ f"<{node_tag} name=\"{k}\" type=\"{type(value_data).__name__}\">\n<{value_tag}>\n{node_value}\n{value_tag}>\n{node_tag}>\n\n"
+ )
+ else:
+ # Code node (never an image)
+ constraint_expr = f"<{constraint_tag}>\n{constraint}\n{constraint_tag}>"
+ signature = constraint.replace("The code should start with:\n", "")
+ func_body = value_data.replace(signature, "")
+ node_value = self.truncate_expression(func_body, self.initial_var_char_limit)
+ blocks.append(
+ f"<{node_tag} name=\"{k}\" type=\"code\">\n<{value_tag}>\n{signature}{node_value}\n{value_tag}>\n{constraint_expr}\n{node_tag}>\n\n"
+ )
+
+ return blocks
+
+ def initialize_prompt(self):
+ """Initialize and format the prompt templates.
+
+ This method formats the representation_prompt and output_format_prompt
+ templates with the appropriate symbols from the optimizer_prompt_symbol_set.
+ It prepares the prompts for use in optimization.
+
+ Notes
+ -----
+ This method should be called during initialization to ensure all
+ prompt templates are properly formatted with the correct tags and symbols.
+ """
+ self.representation_prompt = self.representation_prompt.format(
+ variable_expression_format=dedent(f"""
+ <{self.optimizer_prompt_symbol_set.variable_tag} name="variable_name" type="data_type">
+ <{self.optimizer_prompt_symbol_set.value_tag}>
+ value
+ {self.optimizer_prompt_symbol_set.value_tag}>
+ <{self.optimizer_prompt_symbol_set.constraint_tag}>
+ constraint_expression
+ {self.optimizer_prompt_symbol_set.constraint_tag}>
+ {self.optimizer_prompt_symbol_set.variable_tag}>
+ """),
+ value_tag=self.optimizer_prompt_symbol_set.value_tag,
+ variables_section_title=self.optimizer_prompt_symbol_set.variables_section_title.replace(" ", ""),
+ feedback_section_title=self.optimizer_prompt_symbol_set.feedback_section_title.replace(" ", ""),
+ instruction_section_title=self.optimizer_prompt_symbol_set.instruction_section_title.replace(" ", ""),
+ context_section_title=self.optimizer_prompt_symbol_set.context_section_title.replace(" ", "")
+ )
+ self.output_format_prompt = self.output_format_prompt_template.format(
+ output_format=self.optimizer_prompt_symbol_set.output_format,
+ reasoning_tag=self.optimizer_prompt_symbol_set.reasoning_tag,
+ improved_variable_tag=self.optimizer_prompt_symbol_set.improved_variable_tag,
+ instruction_section_title=self.optimizer_prompt_symbol_set.instruction_section_title.replace(" ", ""),
+ feedback_section_title=self.optimizer_prompt_symbol_set.feedback_section_title.replace(" ", ""),
+ variables_section_title=self.optimizer_prompt_symbol_set.variables_section_title.replace(" ", ""),
+ context_section_title=self.optimizer_prompt_symbol_set.context_section_title.replace(" ", "")
+ )
diff --git a/opto/optimizers/optoprime_v3.py b/opto/optimizers/optoprime_v3.py
new file mode 100644
index 00000000..0bab6bc9
--- /dev/null
+++ b/opto/optimizers/optoprime_v3.py
@@ -0,0 +1,1282 @@
+"""
+Key difference to v2:
+1. Use the new backbone conversation history manager
+2. Support multimodal node (both trainable and non-trainable)
+"""
+
+import re
+import json
+from typing import List, Union, Tuple, Optional
+from dataclasses import dataclass
+from opto.optimizers.optoprime import OptoPrime, node_to_function_feedback
+from opto.trace.utils import dedent
+from opto.optimizers.utils import truncate_expression, extract_xml_like_data, is_bedrock_model
+from opto.trace.nodes import ParameterNode, is_image
+from opto.trace.propagators import GraphPropagator
+from opto.trace.propagators.propagators import Propagator
+
+from opto.utils.llm import AbstractModel, LLM
+from opto.optimizers.buffers import FIFOBuffer
+from opto.utils.backbone import (
+ Chat, UserTurn, AssistantTurn, PromptTemplate,
+ TextContent, ImageContent, ContentBlockList,
+ DEFAULT_IMAGE_PLACEHOLDER, Content
+)
+import copy
+import pickle
+from typing import Dict, Any
+
+
+def value_to_image_content(value: Any) -> Optional[ImageContent]:
+ """Convert a value to ImageContent if it's an image, otherwise return None.
+
+ Uses is_image() from opto.trace.nodes for validation (stricter than ImageContent.build,
+ e.g., only accepts URLs with image extensions), then delegates to ImageContent.build().
+
+ Supports (via is_image detection):
+ - Base64 data URL strings (data:image/...)
+ - HTTP/HTTPS URLs pointing to images (pattern-based, must have image extension)
+ - PIL Image objects
+ - Raw image bytes
+ """
+ if not is_image(value):
+ return None
+ return ImageContent.build(value)
+
+
+class OptimizerPromptSymbolSet:
+ """
+ By inheriting this class and pass into the optimizer. People can change the optimizer documentation
+
+ This divides into three parts:
+ - Section titles: the title of each section in the prompt
+ - Node tags: the tags that capture the graph structure (only tag names are allowed to be changed)
+ - Output format: the format of the output of the optimizer
+ """
+
+ # Titles should be written as markdown titles (space between # and title)
+ # In text, we automatically remove space in the title, so it will become `#Title`
+ variables_section_title = "# Variables"
+ inputs_section_title = "# Inputs"
+ outputs_section_title = "# Outputs"
+ others_section_title = "# Others"
+ feedback_section_title = "# Feedback"
+ instruction_section_title = "# Instruction"
+ code_section_title = "# Code"
+ documentation_section_title = "# Documentation"
+ context_section_title = "# Context"
+
+ node_tag = "node" # nodes that are constants in the graph
+ variable_tag = "variable" # nodes that can be changed
+ value_tag = "value" # inside node, we have value tag
+ constraint_tag = "constraint" # inside node, we have constraint tag
+
+ # output format
+ # Note: we currently don't support extracting format's like "```code```" because we assume supplied tag is name-only, i.e.,
+ reasoning_tag = "reasoning"
+ improved_variable_tag = "variable"
+ name_tag = "name"
+
+ # only used by JSON format
+ suggestion_tag = "suggestion"
+
+ expect_json = False # this will stop `enforce_json` arguments passed to LLM calls
+
+ # custom output format
+ # if this is not None, then the user needs to implement the following functions:
+ # - output_response_extractor
+ # - example_output
+ custom_output_format_instruction = None
+
+ @property
+ def output_format(self) -> str:
+ """
+ This function defines the input to:
+ ```
+ {output_format}
+ ```
+ In the self.output_format_prompt_template in the OptoPrimeV2
+ """
+ if self.custom_output_format_instruction is None:
+ # we use a default XML like format
+ return dedent(f"""
+ <{self.reasoning_tag}>
+ reasoning
+ {self.reasoning_tag}>
+ <{self.improved_variable_tag}>
+ <{self.name_tag}>variable_name{self.name_tag}>
+ <{self.value_tag}>
+ value
+ {self.value_tag}>
+ {self.improved_variable_tag}>
+ """)
+ else:
+ return self.custom_output_format_instruction.strip()
+
+ def example_output(self, reasoning, variables):
+ """
+ reasoning: str
+ variables: format {variable_name, value}
+ """
+ if self.custom_output_format_instruction is not None:
+ raise NotImplementedError
+ else:
+ # Build the output string in the same XML-like format as self.output_format
+ output = []
+ if reasoning != "":
+ output.append(f"<{self.reasoning_tag}>")
+ output.append(reasoning)
+ output.append(f"{self.reasoning_tag}>")
+ for var_name, value in variables.items():
+ output.append(f"<{self.improved_variable_tag}>")
+ output.append(f"<{self.name_tag}>{var_name}{self.name_tag}>")
+ output.append(f"<{self.value_tag}>")
+ output.append(str(value))
+ output.append(f"{self.value_tag}>")
+ output.append(f"{self.improved_variable_tag}>")
+ return "\n".join(output)
+
+ def output_response_extractor(self, response: str) -> Dict[str, Any]:
+ # the response here should just be plain text
+
+ if self.custom_output_format_instruction is None:
+ extracted_data = extract_xml_like_data(response,
+ reasoning_tag=self.reasoning_tag,
+ improved_variable_tag=self.improved_variable_tag,
+ name_tag=self.name_tag,
+ value_tag=self.value_tag)
+
+ # if the suggested value is a code, and the entire code body is empty (i.e., not even function signature is present)
+ # then we remove such suggestion
+ keys_to_remove = []
+ for key, value in extracted_data['variables'].items():
+ if "__code" in key and value.strip() == "":
+ keys_to_remove.append(key)
+
+ for key in keys_to_remove:
+ del extracted_data['variables'][key]
+
+ return extracted_data
+ else:
+ raise NotImplementedError(
+ "If you supplied a custom output format prompt template, you need to implement your own response extractor")
+
+ @property
+ def default_prompt_symbols(self) -> Dict[str, str]:
+ return {
+ "variables": self.variables_section_title,
+ "inputs": self.inputs_section_title,
+ "outputs": self.outputs_section_title,
+ "others": self.others_section_title,
+ "feedback": self.feedback_section_title,
+ "instruction": self.instruction_section_title,
+ "code": self.code_section_title,
+ "documentation": self.documentation_section_title,
+ "context": self.context_section_title,
+ "reasoning": self.reasoning_tag,
+ "suggestion": self.suggestion_tag
+ }
+
+
+class OptimizerPromptSymbolSetJSON(OptimizerPromptSymbolSet):
+ """We enforce a JSON output format extraction"""
+
+ expect_json = True
+
+ custom_output_format_instruction = dedent("""
+ {
+ "reasoning": ,
+ "suggestion": {
+ : ,
+ : ,
+ }
+ }
+ """)
+
+ def example_output(self, reasoning, variables):
+ """
+ reasoning: str
+ variables: format {variable_name, value}
+ """
+
+ # Build the output string in the same JSON format as described in custom_output_format_instruction
+ output = {
+ "reasoning": reasoning,
+ "suggestion": {var_name: value for var_name, value in variables.items()}
+ }
+ return json.dumps(output, indent=2)
+
+ def output_response_extractor(self, response: str) -> Dict[str, Any]:
+ """
+ Extracts reasoning and suggestion variables from the LLM response using OptoPrime's extraction logic.
+ """
+ # Use the centralized extraction logic from OptoPrime
+ suggestion_tag = self.default_prompt_symbols.get("suggestion", "suggestion")
+ reasoning_tag = self.default_prompt_symbols.get("reasoning", "reasoning")
+
+ ignore_extraction_error = True
+
+ reasoning = "(Unable to extract, possibly due to parsing failure)"
+
+ if "```" in response:
+ # First try to extract from ```json ... ``` blocks
+ json_match = re.findall(r"```json\s*(.*?)```", response, re.DOTALL)
+ if len(json_match) > 0:
+ response = json_match[0].strip()
+ else:
+ # Fall back to regular ``` ... ``` blocks
+ match = re.findall(r"```(.*?)```", response, re.DOTALL)
+ if len(match) > 0:
+ # Remove language identifier if present (e.g., "json", "python")
+ content = match[0].strip()
+ # Check if first line is a language identifier
+ lines = content.split('\n', 1)
+ if len(lines) > 1 and lines[0].strip().isalpha() and len(lines[0].strip()) < 20:
+ response = lines[1].strip()
+ else:
+ response = content
+
+ json_extracted = {}
+ suggestion = {}
+ attempt_n = 0
+ while attempt_n < 2:
+ try:
+ json_extracted = json.loads(response)
+ if isinstance(json_extracted, dict): # trim all whitespace keys in the json_extracted
+ json_extracted = {k.strip(): v for k, v in json_extracted.items()}
+ suggestion = json_extracted.get(suggestion_tag, json_extracted)
+ reasoning = json_extracted.get(reasoning_tag, "")
+ break
+ except json.JSONDecodeError:
+ response = re.findall(r"{.*}", response, re.DOTALL)
+ if len(response) > 0:
+ response = response[0]
+ attempt_n += 1
+ except Exception:
+ attempt_n += 1
+
+ if not isinstance(suggestion, dict):
+ suggestion = json_extracted if isinstance(json_extracted, dict) else {}
+
+ if len(suggestion) == 0:
+ pattern = rf'"{suggestion_tag}"\s*:\s*\{{(.*?)\}}'
+ suggestion_match = re.search(pattern, str(response), re.DOTALL)
+ if suggestion_match:
+ suggestion = {}
+ suggestion_content = suggestion_match.group(1)
+ pair_pattern = r'"([a-zA-Z0-9_]+)"\s*:\s*"(.*)"'
+ pairs = re.findall(pair_pattern, suggestion_content, re.DOTALL)
+ for key, value in pairs:
+ suggestion[key] = value
+
+ if len(suggestion) == 0 and not ignore_extraction_error:
+ print(f"Cannot extract {suggestion_tag} from LLM's response:\n{response}")
+
+ keys_to_remove = []
+ for key, value in suggestion.items():
+ if "__code" in key and value.strip() == "":
+ keys_to_remove.append(key)
+ for key in keys_to_remove:
+ del suggestion[key]
+
+ return {"reasoning": reasoning, "variables": suggestion}
+
+
+class OptimizerPromptSymbolSet2(OptimizerPromptSymbolSet):
+ variables_section_title = "# Variables"
+ inputs_section_title = "# Inputs"
+ outputs_section_title = "# Outputs"
+ others_section_title = "# Others"
+ feedback_section_title = "# Feedback"
+ instruction_section_title = "# Instruction"
+ code_section_title = "# Code"
+ documentation_section_title = "# Documentation"
+ context_section_title = "# Context"
+
+ node_tag = "const" # nodes that are constants in the graph
+ variable_tag = "var" # nodes that can be changed
+ value_tag = "data" # inside node, we have value tag
+ constraint_tag = "constraint" # inside node, we have constraint tag
+
+ # output format
+ reasoning_tag = "reason"
+ improved_variable_tag = "var"
+ name_tag = "name"
+
+
+@dataclass
+class FunctionFeedback:
+ """Container for structured feedback from function execution traces.
+
+ Used by OptoPrime to organize execution traces into a format suitable
+ for LLM-based optimization.
+
+ Attributes
+ ----------
+ graph : list[tuple[int, str]]
+ Topologically sorted function calls with (depth, representation) pairs.
+ documentation : dict[str, str]
+ Mapping of function names to their documentation strings.
+ others : dict[str, Any]
+ Intermediate variables with (data, description) tuples.
+ roots : dict[str, Any]
+ Input/root variables with (data, description) tuples.
+ output : dict[str, Any]
+ Output/leaf variables with (data, description) tuples.
+ user_feedback : Union[str, ContentBlockList]
+ User-provided feedback about the execution. May include images.
+
+ Notes
+ -----
+ This structure separates the execution trace into logical components
+ that can be formatted into prompts for LLM-based optimization.
+ """
+
+ graph: List[
+ Tuple[int, str]
+ ] # Each item is is a representation of function call. The items are topologically sorted.
+ documentation: Dict[str, str] # Function name and its documentationstring
+ others: Dict[str, Any] # Intermediate variable names and their data
+ roots: Dict[str, Any] # Root variable name and its data
+ output: Dict[str, Any] # Leaf variable name and its data
+ user_feedback: Union[str, ContentBlockList] # User feedback at the leaf of the graph (may include images)
+
+
+@dataclass
+class ProblemInstance:
+ """Problem instance with multimodal content support.
+
+ A composite of multiple ContentBlockLists representing different parts
+ of a problem. Uses ContentBlockList for variables, inputs, others, and
+ outputs to support both text and image content in a unified way.
+
+ The class provides:
+ - __repr__: Returns text-only representation for logging
+ - to_content_blocks(): Returns ContentBlockList for multimodal prompts
+ - has_images(): Check if any field contains images
+ """
+ instruction: str
+ code: str
+ documentation: str
+ variables: ContentBlockList
+ inputs: ContentBlockList
+ others: ContentBlockList
+ outputs: ContentBlockList
+ feedback: ContentBlockList # May contain images mixed with text
+ context: Optional[ContentBlockList]
+
+ optimizer_prompt_symbol_set: OptimizerPromptSymbolSet
+
+ def __post_init__(self):
+ # Normalize content fields so callers may pass plain strings (or None).
+ # ContentBlockList.ensure is idempotent for existing ContentBlockLists.
+ self.variables = ContentBlockList.ensure(self.variables)
+ self.inputs = ContentBlockList.ensure(self.inputs)
+ self.others = ContentBlockList.ensure(self.others)
+ self.outputs = ContentBlockList.ensure(self.outputs)
+ self.feedback = ContentBlockList.ensure(self.feedback)
+ if self.context is not None:
+ self.context = ContentBlockList.ensure(self.context)
+
+ problem_template = dedent(
+ """
+ # Instruction
+ {instruction}
+
+ # Code
+ {code}
+
+ # Documentation
+ {documentation}
+
+ # Variables
+ {variables}
+
+ # Inputs
+ {inputs}
+
+ # Others
+ {others}
+
+ # Outputs
+ {outputs}
+
+ # Context
+ {context}
+
+ # Feedback
+ {feedback}
+ """
+ )
+
+ def __repr__(self) -> str:
+ """Return text-only representation for backward compatibility.
+
+ Uses ContentBlockList.to_text() for fields that may contain images.
+ """
+ optimization_query = self.problem_template.format(
+ instruction=self.instruction,
+ code=self.code,
+ documentation=self.documentation,
+ variables=self.variables.to_text(),
+ inputs=self.inputs.to_text(),
+ outputs=self.outputs.to_text(),
+ others=self.others.to_text(),
+ context=self.context.to_text() if self.context is not None else "",
+ feedback=self.feedback.to_text()
+ )
+
+ return optimization_query
+
+ def to_content_blocks(self) -> ContentBlockList:
+ """Convert the problem instance to a list of ContentBlocks.
+
+ Consecutive TextContent blocks are merged into a single block for efficiency.
+ Images and other non-text blocks are kept separate.
+
+ Returns:
+ ContentBlockList: A list containing TextContent and ImageContent blocks
+ that represent the complete problem instance including any images
+ from variables, inputs, others, or outputs.
+ """
+ blocks = ContentBlockList()
+
+ # Header sections (always text)
+ header = dedent(f"""
+ # Instruction
+ {self.instruction}
+
+ # Code
+ {self.code}
+
+ # Documentation
+ {self.documentation}
+
+ # Variables
+ """)
+ blocks.append(header)
+
+ # Variables section (may contain images)
+ blocks.extend(self.variables)
+
+ # Inputs section
+ blocks.append("\n\n# Inputs\n")
+ blocks.extend(self.inputs)
+
+ # Others section
+ blocks.append("\n\n# Others\n")
+ blocks.extend(self.others)
+
+ # Outputs section
+ blocks.append("\n\n# Outputs\n")
+ blocks.extend(self.outputs)
+
+ # Context section (optional)
+ if self.context is not None and self.context.to_text().strip() != "":
+ blocks.append(f"\n\n# Context\n") # section name
+ blocks.extend(self.context) # extend the blocks
+
+ # Feedback section (may contain images)
+ blocks.append("\n\n# Feedback\n")
+ blocks.extend(self.feedback)
+
+ return blocks
+
+ def has_images(self) -> bool:
+ """Check if this problem instance contains any images.
+
+ Efficiently checks each ContentBlockList field directly
+ without building full content blocks.
+
+ Returns:
+ bool: True if any field contains ImageContent blocks.
+ """
+ return any(
+ field.has_images()
+ for field in [self.variables, self.inputs, self.others, self.outputs, self.feedback]
+ )
+
+
+
+
+
+# we provide two aliases for the Content class for semantic convenience
+Context = Content
+Feedback = Content
+
+class OptoPrimeV3(OptoPrime):
+ # This is generic representation prompt, which just explains how to read the problem.
+ representation_prompt = dedent(
+ """You're tasked to solve a coding/algorithm problem. You will see the instruction, the code, the documentation of each function used in the code, and the feedback about the execution result.
+
+ Specifically, a problem will be composed of the following parts:
+ - {instruction_section_title}: the instruction which describes the things you need to do or the question you should answer.
+ - {code_section_title}: the code defined in the problem.
+ - {documentation_section_title}: the documentation of each function used in #Code. The explanation might be incomplete and just contain high-level description. You can use the values in #Others to help infer how those functions work.
+ - {variables_section_title}: the input variables that you can change/tweak (trainable).
+ - {inputs_section_title}: the values of fixed inputs to the code, which CANNOT be changed (fixed).
+ - {others_section_title}: the intermediate values created through the code execution.
+ - {outputs_section_title}: the result of the code output.
+ - {feedback_section_title}: the feedback about the code's execution result.
+ - {context_section_title}: the context information that might be useful to solve the problem.
+
+ In `{variables_section_title}`, `{inputs_section_title}`, `{outputs_section_title}`, and `{others_section_title}`, the format is:
+
+ For variables we express as this:
+ {variable_expression_format}
+
+ If `data_type` is `code`, it means `{value_tag}` is the source code of a python code, which may include docstring and definitions."""
+ )
+
+ # Optimization
+ default_objective = "You need to change the `{value_tag}` of the variables in {variables_section_title} to improve the output in accordance to {feedback_section_title}."
+
+ output_format_prompt_template = dedent(
+ """
+ Output_format: Your output should be in the following XML or JSON format:
+
+ {output_format}
+
+ In <{reasoning_tag}>, explain the problem: 1. what the {instruction_section_title} means 2. what the {feedback_section_title} on {outputs_section_title} means to {variables_section_title} considering how {variables_section_title} are used in {code_section_title} and other values in {documentation_section_title}, {inputs_section_title}, {others_section_title}. 3. Reasoning about the suggested changes in {variables_section_title} (if needed) and the expected result.
+
+ If you need to suggest a change in the values of {variables_section_title}, write down the suggested values in <{improved_variable_tag}>. Remember you can change only the values in {variables_section_title}, not others. When `type` of a variable is `code`, you should write the new definition in the format of python code without syntax errors, and you should not change the function name or the function signature.
+
+ If no changes are needed, just output TERMINATE.
+ """
+ )
+
+ example_problem_template = PromptTemplate(dedent(
+ """
+ Here is an example of problem instance and response:
+
+ ================================
+ {example_problem}
+ ================================
+
+ Your response:
+ {example_response}
+ """
+ ))
+
+ user_prompt_template = PromptTemplate(dedent(
+ """
+ Now you see problem instance:
+
+ ================================
+ {problem_instance}
+ ================================
+
+ """
+ ))
+
+ final_prompt = dedent(
+ """
+ What are your suggestions on variables {names}?
+
+ Your response:
+ """
+ )
+
+ def __init__(
+ self,
+ parameters: List[ParameterNode],
+ llm: AbstractModel = None,
+ *args,
+ image_llm: AbstractModel = None,
+ propagator: Propagator = None,
+ objective: Union[None, str] = None,
+ ignore_extraction_error: bool = True,
+ # ignore the type conversion error when extracting updated values from LLM's suggestion
+ include_example=False,
+ memory_size=0, # Memory size to store the past feedback
+ max_tokens=8192,
+ log=True,
+ initial_var_char_limit=2000,
+ optimizer_prompt_symbol_set: OptimizerPromptSymbolSet = OptimizerPromptSymbolSet(),
+ use_json_object_format=True, # whether to use json object format for the response when calling LLM
+ truncate_expression=truncate_expression,
+ problem_context: Optional[ContentBlockList] = None,
+ **kwargs,
+ ):
+ super().__init__(parameters, *args, propagator=propagator, **kwargs)
+
+ self.truncate_expression = truncate_expression
+ self.problem_context: Optional[ContentBlockList] = problem_context
+ self.output_contains_image = False
+
+ self.use_json_object_format = use_json_object_format if optimizer_prompt_symbol_set.expect_json and use_json_object_format else False
+ self.ignore_extraction_error = ignore_extraction_error
+ self.llm = llm or LLM(mm_beta=True)
+ self.image_llm = image_llm
+
+ assert self.llm.mm_beta, "OptoPrimeV3 enables multi-modal LLM backbone by default. Please use LLM(model='...', mm_beta=True)."
+
+ self.objective = objective or self.default_objective.format(value_tag=optimizer_prompt_symbol_set.value_tag,
+ variables_section_title=optimizer_prompt_symbol_set.variables_section_title,
+ feedback_section_title=optimizer_prompt_symbol_set.feedback_section_title)
+ self.initial_var_char_limit = initial_var_char_limit
+ self.optimizer_prompt_symbol_set = optimizer_prompt_symbol_set
+
+ self.example_problem_summary = FunctionFeedback(graph=[(1, 'y = add(x=a,y=b)'), (2, "z = subtract(x=y, y=c)")],
+ documentation={'add': 'This is an add operator of x and y.',
+ 'subtract': "subtract y from x"},
+ others={'y': (6, None)},
+ roots={'a': (5, "a > 0"),
+ 'b': (1, None),
+ 'c': (5, None)},
+ output={'z': (1, None)},
+ user_feedback='The result of the code is not as expected. The result should be 10, but the code returns 1'
+ )
+ self.example_problem_summary.variables = {'a': (5, "a > 0")}
+ self.example_problem_summary.inputs = {'b': (1, None), 'c': (5, None)}
+
+ self.example_problem = self.problem_instance(self.example_problem_summary)
+ self.example_response = self.optimizer_prompt_symbol_set.example_output(
+ reasoning="In this case, the desired response would be to change the value of input a to 14, as that would make the code return 10.",
+ variables={
+ 'a': 10,
+ }
+ )
+
+ self.include_example = include_example
+ self.max_tokens = max_tokens
+ self.log = [] if log else None
+ self.summary_log = [] if log else None
+ self.memory = FIFOBuffer(memory_size)
+ self.conversation_history = Chat()
+ self.conversation_length = memory_size # Number of conversation turns to keep
+
+ self.default_prompt_symbols = self.optimizer_prompt_symbol_set.default_prompt_symbols
+
+ self.prompt_symbols = copy.deepcopy(self.default_prompt_symbols)
+ self.initialize_instruct_prompt()
+
+ def parameter_check(self, parameters: List[ParameterNode]):
+ """Check if the parameters are valid.
+ This can be overloaded by subclasses to add more checks.
+
+ Args:
+ parameters: List[ParameterNode]
+ The parameters to check.
+
+ Raises:
+ AssertionError: If more than one parameter contains image data.
+
+ Notes:
+ OptoPrimeV3 supports image parameters, but only one parameter can be
+ an image at a time since LLMs can only generate one image per inference.
+ """
+ # Count image parameters
+ image_params = [param for param in parameters if param.is_image]
+
+ if len(image_params) > 1:
+ param_names = ', '.join([f"'{p.name}'" for p in image_params])
+ raise AssertionError(
+ f"OptoPrimeV3 supports at most one image parameter, but found {len(image_params)}: "
+ f"{param_names}. LLMs can only generate one image at a time."
+ )
+ if len(image_params) == 1:
+ self.output_contains_image = True
+
+ def add_context(self, *args, images: Optional[List[Any]] = None, format: str = "PNG"):
+ """Add context to the optimizer, supporting both text and images.
+
+ Two usage patterns are supported:
+
+ **Usage 1: Variadic arguments (alternating text and images)**
+
+ optimizer.add_context("text part 1", image_link, "text part 2", image_file)
+
+ Each argument is either a string (text) or an image source.
+
+ **Usage 2: Template with placeholders**
+
+ optimizer.add_context(
+ "text part 1 [IMAGE] text part 2 [IMAGE]",
+ images=[image_link, image_file]
+ )
+
+ The text contains `[IMAGE]` placeholders that are replaced by images
+ from the `images` list in order. The number of placeholders must match
+ the number of images.
+
+ Args:
+ *args: Variable arguments. In Usage 1, alternating text and images.
+ In Usage 2, a single template string with placeholders.
+ images: Optional list of image sources for Usage 2. Each can be:
+ - URL string (http/https)
+ - Local file path
+ - PIL Image object
+ - Numpy array
+ format: Image format for numpy arrays (PNG, JPEG, etc.). Default: PNG
+
+ Raises:
+ ValueError: If using Usage 2 and the number of placeholders doesn't
+ match the number of images.
+
+ Examples:
+ # Usage 1: Alternating text and images
+ optimizer.add_context("Here's the diagram:", "diagram.png", "And here's another:", "other.png")
+
+ # Usage 2: Template with placeholders
+ optimizer.add_context("See [IMAGE] and compare with [IMAGE]", images=["a.png", "b.png"])
+
+ # Text-only context
+ optimizer.add_context("Important background information")
+ """
+ ctx = Content(*args, images=images, format=format)
+
+ # Store the context
+ if self.problem_context is None:
+ self.problem_context = ctx
+ else:
+ # Append to existing context with a newline separator
+ self.problem_context.append("\n\n")
+ self.problem_context.extend(ctx.to_content_blocks())
+
+ def initialize_instruct_prompt(self):
+ self.representation_prompt = self.representation_prompt.format(
+ variable_expression_format=dedent(f"""
+ <{self.optimizer_prompt_symbol_set.variable_tag} name="variable_name" type="data_type">
+ <{self.optimizer_prompt_symbol_set.value_tag}>
+ value
+ {self.optimizer_prompt_symbol_set.value_tag}>
+ <{self.optimizer_prompt_symbol_set.constraint_tag}>
+ constraint_expression
+ {self.optimizer_prompt_symbol_set.constraint_tag}>
+ {self.optimizer_prompt_symbol_set.variable_tag}>
+ """),
+ value_tag=self.optimizer_prompt_symbol_set.value_tag,
+ variables_section_title=self.optimizer_prompt_symbol_set.variables_section_title.replace(" ", ""),
+ inputs_section_title=self.optimizer_prompt_symbol_set.inputs_section_title.replace(" ", ""),
+ outputs_section_title=self.optimizer_prompt_symbol_set.outputs_section_title.replace(" ", ""),
+ feedback_section_title=self.optimizer_prompt_symbol_set.feedback_section_title.replace(" ", ""),
+ instruction_section_title=self.optimizer_prompt_symbol_set.instruction_section_title.replace(" ", ""),
+ code_section_title=self.optimizer_prompt_symbol_set.code_section_title.replace(" ", ""),
+ documentation_section_title=self.optimizer_prompt_symbol_set.documentation_section_title.replace(" ", ""),
+ others_section_title=self.optimizer_prompt_symbol_set.others_section_title.replace(" ", ""),
+ context_section_title=self.optimizer_prompt_symbol_set.context_section_title.replace(" ", "")
+ )
+ self.output_format_prompt = self.output_format_prompt_template.format(
+ output_format=self.optimizer_prompt_symbol_set.output_format,
+ reasoning_tag=self.optimizer_prompt_symbol_set.reasoning_tag,
+ improved_variable_tag=self.optimizer_prompt_symbol_set.improved_variable_tag,
+ instruction_section_title=self.optimizer_prompt_symbol_set.instruction_section_title.replace(" ", ""),
+ feedback_section_title=self.optimizer_prompt_symbol_set.feedback_section_title.replace(" ", ""),
+ outputs_section_title=self.optimizer_prompt_symbol_set.outputs_section_title.replace(" ", ""),
+ code_section_title=self.optimizer_prompt_symbol_set.code_section_title.replace(" ", ""),
+ documentation_section_title=self.optimizer_prompt_symbol_set.documentation_section_title.replace(" ", ""),
+ variables_section_title=self.optimizer_prompt_symbol_set.variables_section_title.replace(" ", ""),
+ inputs_section_title=self.optimizer_prompt_symbol_set.inputs_section_title.replace(" ", ""),
+ others_section_title=self.optimizer_prompt_symbol_set.others_section_title.replace(" ", ""),
+ )
+
+ def repr_node_value(self, node_dict, node_tag="node",
+ value_tag="value", constraint_tag="constraint") -> str:
+ """Returns text-only representation of node values (backward compatible)."""
+ temp_list = []
+ for k, v in node_dict.items():
+ if "__code" not in k:
+ # For images, use placeholder text
+ value_repr = "[IMAGE]" if is_image(v[0]) else str(v[0])
+ if v[1] is not None and node_tag == self.optimizer_prompt_symbol_set.variable_tag:
+ constraint_expr = f"<{constraint_tag}>\n{v[1]}\n{constraint_tag}>"
+ temp_list.append(
+ f"<{node_tag} name=\"{k}\" type=\"{type(v[0]).__name__}\">\n<{value_tag}>\n{value_repr}\n{value_tag}>\n{constraint_expr}\n{node_tag}>\n")
+ else:
+ temp_list.append(
+ f"<{node_tag} name=\"{k}\" type=\"{type(v[0]).__name__}\">\n<{value_tag}>\n{value_repr}\n{value_tag}>\n{node_tag}>\n")
+ else:
+ constraint_expr = f"\n{v[1]}\n"
+ signature = v[1].replace("The code should start with:\n", "")
+ func_body = v[0].replace(signature, "")
+ temp_list.append(
+ f"<{node_tag} name=\"{k}\" type=\"code\">\n<{value_tag}>\n{signature}{func_body}\n{value_tag}>\n{constraint_expr}\n{node_tag}>\n")
+ return "\n".join(temp_list)
+
+ def repr_node_value_compact(self, node_dict, node_tag="node",
+ value_tag="value", constraint_tag="constraint") -> str:
+ """Returns text-only compact representation of node values (backward compatible)."""
+ temp_list = []
+ for k, v in node_dict.items():
+ if "__code" not in k:
+ # For images, use placeholder text
+ if is_image(v[0]):
+ node_value = "[IMAGE]"
+ else:
+ node_value = self.truncate_expression(v[0], self.initial_var_char_limit)
+ if v[1] is not None and node_tag == self.optimizer_prompt_symbol_set.variable_tag:
+ constraint_expr = f"<{constraint_tag}>\n{v[1]}\n{constraint_tag}>"
+ temp_list.append(
+ f"<{node_tag} name=\"{k}\" type=\"{type(v[0]).__name__}\">\n<{value_tag}>\n{node_value}\n{value_tag}>\n{constraint_expr}\n{node_tag}>\n")
+ else:
+ temp_list.append(
+ f"<{node_tag} name=\"{k}\" type=\"{type(v[0]).__name__}\">\n<{value_tag}>\n{node_value}\n{value_tag}>\n{node_tag}>\n")
+ else:
+ constraint_expr = f"<{constraint_tag}>\n{v[1]}\n{constraint_tag}>"
+ # we only truncate the function body
+ signature = v[1].replace("The code should start with:\n", "")
+ func_body = v[0].replace(signature, "")
+ node_value = self.truncate_expression(func_body, self.initial_var_char_limit)
+ temp_list.append(
+ f"<{node_tag} name=\"{k}\" type=\"code\">\n<{value_tag}>\n{signature}{node_value}\n{value_tag}>\n{constraint_expr}\n{node_tag}>\n")
+ return "\n".join(temp_list)
+
+ def repr_node_value_as_content_blocks(self, node_dict, node_tag="node",
+ value_tag="value", constraint_tag="constraint") -> ContentBlockList:
+ """Returns a ContentBlockList representing node values, including images.
+
+ Consecutive TextContent blocks are merged for efficiency.
+ For image values, the text before and after the image are separate blocks.
+ """
+ blocks = ContentBlockList()
+
+ for k, v in node_dict.items():
+ value_data = v[0]
+ constraint = v[1]
+
+ if "__code" not in k:
+ # Check if this is an image
+ image_content = value_to_image_content(value_data)
+
+ if image_content is not None:
+ # Image node: output XML structure, then image, then closing
+ type_name = "image"
+ constraint_expr = f"<{constraint_tag}>\n{constraint}\n{constraint_tag}>" if constraint is not None and node_tag == self.optimizer_prompt_symbol_set.variable_tag else ""
+
+ xml_text = f"<{node_tag} name=\"{k}\" type=\"{type_name}\">\n<{value_tag}>\n"
+ blocks.append(xml_text)
+ blocks.append(image_content) # Image breaks the text flow
+
+ closing_text = f"\n{value_tag}>\n{constraint_expr}{node_tag}>\n\n" if constraint_expr else f"\n{value_tag}>\n{node_tag}>\n\n"
+ blocks.append(closing_text)
+ else:
+ # Non-image node: text representation
+ if constraint is not None and node_tag == self.optimizer_prompt_symbol_set.variable_tag:
+ constraint_expr = f"<{constraint_tag}>\n{constraint}\n{constraint_tag}>"
+ blocks.append(
+ f"<{node_tag} name=\"{k}\" type=\"{type(value_data).__name__}\">\n<{value_tag}>\n{value_data}\n{value_tag}>\n{constraint_expr}\n{node_tag}>\n\n"
+ )
+ else:
+ blocks.append(
+ f"<{node_tag} name=\"{k}\" type=\"{type(value_data).__name__}\">\n<{value_tag}>\n{value_data}\n{value_tag}>\n{node_tag}>\n\n"
+ )
+ else:
+ # Code node (never an image)
+ constraint_expr = f"<{constraint_tag}>\n{constraint}\n{constraint_tag}>"
+ signature = constraint.replace("The code should start with:\n", "")
+ func_body = value_data.replace(signature, "")
+ blocks.append(
+ f"<{node_tag} name=\"{k}\" type=\"code\">\n<{value_tag}>\n{signature}{func_body}\n{value_tag}>\n{constraint_expr}\n{node_tag}>\n\n"
+ )
+
+ return blocks
+
+ def repr_node_value_compact_as_content_blocks(self, node_dict, node_tag="node",
+ value_tag="value", constraint_tag="constraint") -> ContentBlockList:
+ """Returns a ContentBlockList with compact representation, including images.
+
+ Consecutive TextContent blocks are merged for efficiency.
+ Non-image values are truncated. Images break the text flow.
+ """
+ blocks = ContentBlockList()
+
+ for k, v in node_dict.items():
+ value_data = v[0]
+ constraint = v[1]
+
+ if "__code" not in k:
+ # Check if this is an image
+ image_content = value_to_image_content(value_data)
+
+ if image_content is not None:
+ # Image node: output XML structure, then image, then closing
+ type_name = "image"
+ constraint_expr = f"<{constraint_tag}>\n{constraint}\n{constraint_tag}>" if constraint is not None and node_tag == self.optimizer_prompt_symbol_set.variable_tag else ""
+
+ xml_text = f"<{node_tag} name=\"{k}\" type=\"{type_name}\">\n<{value_tag}>\n"
+ blocks.append(xml_text)
+ blocks.append(image_content) # Image breaks the text flow
+
+ closing_text = f"\n{value_tag}>\n{constraint_expr}{node_tag}>\n\n" if constraint_expr else f"\n{value_tag}>\n{node_tag}>\n\n"
+ blocks.append(closing_text)
+ else:
+ # Non-image node: truncated text representation
+ node_value = self.truncate_expression(value_data, self.initial_var_char_limit)
+ if constraint is not None and node_tag == self.optimizer_prompt_symbol_set.variable_tag:
+ constraint_expr = f"<{constraint_tag}>\n{constraint}\n{constraint_tag}>"
+ blocks.append(
+ f"<{node_tag} name=\"{k}\" type=\"{type(value_data).__name__}\">\n<{value_tag}>\n{node_value}\n{value_tag}>\n{constraint_expr}\n{node_tag}>\n\n"
+ )
+ else:
+ blocks.append(
+ f"<{node_tag} name=\"{k}\" type=\"{type(value_data).__name__}\">\n<{value_tag}>\n{node_value}\n{value_tag}>\n{node_tag}>\n\n"
+ )
+ else:
+ # Code node (never an image)
+ constraint_expr = f"<{constraint_tag}>\n{constraint}\n{constraint_tag}>"
+ signature = constraint.replace("The code should start with:\n", "")
+ func_body = value_data.replace(signature, "")
+ node_value = self.truncate_expression(func_body, self.initial_var_char_limit)
+ blocks.append(
+ f"<{node_tag} name=\"{k}\" type=\"code\">\n<{value_tag}>\n{signature}{node_value}\n{value_tag}>\n{constraint_expr}\n{node_tag}>\n\n"
+ )
+
+ return blocks
+
+ def summarize(self):
+ """Aggregate feedback from parameters into a structured summary.
+
+ Collects and organizes feedback from all trainable parameters into
+ a FunctionFeedback structure suitable for problem representation.
+
+ Returns
+ -------
+ FunctionFeedback
+ Structured feedback containing:
+ - variables: Trainable parameters with values and descriptions
+ - inputs: Non-trainable root nodes
+ - graph: Topologically sorted function calls
+ - others: Intermediate computation values
+ - output: Final output values
+ - documentation: Function documentation strings
+ - user_feedback: Aggregated user feedback
+
+ Notes
+ -----
+ The method performs several transformations:
+ 1. Aggregates feedback from all trainable parameters
+ 2. Converts the trace graph to FunctionFeedback structure
+ 3. Separates root nodes into variables (trainable) and inputs (non-trainable)
+ 4. Preserves the computation graph and intermediate values
+
+ Parameters without feedback (disconnected from output) are still
+ included in the summary but may not receive updates.
+ """
+ # Aggregate feedback from all the parameters
+ feedbacks = [
+ self.propagator.aggregate(node.feedback)
+ for node in self.parameters
+ if node.trainable
+ ]
+ summary = sum(feedbacks) # TraceGraph
+ # Construct variables and update others
+ # Some trainable nodes might not receive feedback, because they might not be connected to the output
+ summary = node_to_function_feedback(summary)
+ # Classify the root nodes into variables and others
+ # summary.variables = {p.py_name: p.data for p in self.parameters if p.trainable and p.py_name in summary.roots}
+
+ trainable_param_dict = {p.py_name: p for p in self.parameters if p.trainable}
+ summary.variables = {
+ py_name: data
+ for py_name, data in summary.roots.items()
+ if py_name in trainable_param_dict
+ }
+ summary.inputs = {
+ py_name: data
+ for py_name, data in summary.roots.items()
+ if py_name not in trainable_param_dict
+ } # non-variable roots
+
+ return summary
+
+ def construct_prompt(self, summary, mask=None, *args, **kwargs):
+ """Construct the system and user prompt.
+
+ The prompt for the optimizer agent is rather complex.
+ There are prompts that are automatically constructed through the Trace frontend (aka the bundle/node API).
+ However, we also allow the user to provide additional context to the optimizer agent.
+
+ We handle multimodal (MM) conversion implicitly for the automatic part (TraceGraph),
+ but we handle the user-provided context explicitly.
+
+ Args:
+ summary: The FunctionFeedback summary containing graph information.
+ mask: List of section titles to exclude from the problem instance.
+
+ Returns:
+ Tuple of (system_prompt: str, user_prompt: ContentBlockList)
+ - system_prompt is always a string
+ - user_prompt is a ContentBlockList for multimodal support
+ """
+ system_prompt = (
+ self.representation_prompt + self.output_format_prompt
+ ) # generic representation + output rule
+
+ problem_inst = self.problem_instance(summary, mask=mask)
+
+ # Build user prompt as ContentBlockList (auto-merges consecutive text)
+ user_content_blocks = ContentBlockList()
+
+ # Add example if included
+ if self.include_example:
+ example_text = self.example_problem_template.format(
+ example_problem=str(self.example_problem), # Example is always text
+ example_response=self.example_response,
+ )
+ user_content_blocks.append(example_text)
+
+ # Add problem instance template
+ # context is part of the problem instance
+ user_content_blocks.append(self.user_prompt_template.format(
+ problem_instance=problem_inst.to_content_blocks(),
+ ))
+
+ # Add final prompt
+ var_names = ", ".join(k for k in summary.variables.keys())
+ user_content_blocks.append(self.final_prompt.format(
+ names=var_names,
+ ))
+
+ return system_prompt, user_content_blocks
+
+ def problem_instance(self, summary: FunctionFeedback, mask=None):
+ """Create a ProblemInstance from the summary.
+
+ Args:
+ summary: The FunctionFeedback summary containing graph information.
+ mask: List of section titles to exclude from the problem instance.
+
+ Returns:
+ ProblemInstance with content block fields for multimodal support.
+ """
+ mask = mask or []
+
+ # Use content block representations for multimodal support
+ variables_content = (
+ self.repr_node_value_as_content_blocks(
+ summary.variables,
+ node_tag=self.optimizer_prompt_symbol_set.variable_tag,
+ value_tag=self.optimizer_prompt_symbol_set.value_tag,
+ constraint_tag=self.optimizer_prompt_symbol_set.constraint_tag
+ )
+ if self.optimizer_prompt_symbol_set.variables_section_title not in mask
+ else ContentBlockList()
+ )
+
+ # we add a temporary check here to ensure no more than 1 parameter is an image
+ variable_stats = variables_content.count_blocks()
+ if 'ImageContent' in variable_stats:
+ assert variable_stats['ImageContent'] <= 1, "Currently we do not support generating multiple images (more than 1 parameter is an image)"
+ self.output_contains_image = True
+
+ inputs_content = (
+ self.repr_node_value_compact_as_content_blocks(
+ summary.inputs,
+ node_tag=self.optimizer_prompt_symbol_set.node_tag,
+ value_tag=self.optimizer_prompt_symbol_set.value_tag,
+ constraint_tag=self.optimizer_prompt_symbol_set.constraint_tag
+ )
+ if self.optimizer_prompt_symbol_set.inputs_section_title not in mask
+ else ContentBlockList()
+ )
+ outputs_content = (
+ self.repr_node_value_compact_as_content_blocks(
+ summary.output,
+ node_tag=self.optimizer_prompt_symbol_set.node_tag,
+ value_tag=self.optimizer_prompt_symbol_set.value_tag,
+ constraint_tag=self.optimizer_prompt_symbol_set.constraint_tag
+ )
+ if self.optimizer_prompt_symbol_set.outputs_section_title not in mask
+ else ContentBlockList()
+ )
+ others_content = (
+ self.repr_node_value_compact_as_content_blocks(
+ summary.others,
+ node_tag=self.optimizer_prompt_symbol_set.node_tag,
+ value_tag=self.optimizer_prompt_symbol_set.value_tag,
+ constraint_tag=self.optimizer_prompt_symbol_set.constraint_tag
+ )
+ if self.optimizer_prompt_symbol_set.others_section_title not in mask
+ else ContentBlockList()
+ )
+
+ return ProblemInstance(
+ instruction=self.objective if "#Instruction" not in mask else "",
+ code=(
+ "\n".join([v for k, v in sorted(summary.graph)])
+ if self.optimizer_prompt_symbol_set.inputs_section_title not in mask
+ else ""
+ ),
+ documentation=(
+ "\n".join([f"[{k}] {v}" for k, v in summary.documentation.items()])
+ if self.optimizer_prompt_symbol_set.documentation_section_title not in mask
+ else ""
+ ),
+ variables=variables_content,
+ inputs=inputs_content,
+ outputs=outputs_content,
+ others=others_content,
+ feedback=Content(summary.user_feedback) if self.optimizer_prompt_symbol_set.feedback_section_title not in mask else Content(""),
+ context=self.problem_context,
+ optimizer_prompt_symbol_set=self.optimizer_prompt_symbol_set
+ )
+
+ def _step(
+ self, verbose=False, mask=None, *args, **kwargs
+ ) -> Dict[ParameterNode, Any]:
+ """Execute one optimization step.
+
+ Args:
+ verbose: If True, print prompts and responses.
+ mask: List of section titles to exclude from the problem instance.
+
+ Returns:
+ Dictionary mapping parameters to their updated values.
+ """
+ assert isinstance(self.propagator, GraphPropagator)
+ summary = self.summarize()
+
+ system_prompt, user_content_blocks = self.construct_prompt(summary, mask=mask)
+
+ response = self.call_llm(
+ system_prompt=system_prompt,
+ user_prompt=user_content_blocks,
+ verbose=verbose,
+ max_tokens=self.max_tokens,
+ )
+
+ if "TERMINATE" in response.to_text():
+ return {}
+
+ suggestion = self.extract_llm_suggestion(response.to_text())
+ update_dict = self.construct_update_dict(suggestion['variables'])
+ # suggestion has two keys: reasoning, and variables
+
+ # for update_dict, we manually update the image according to the variable name
+ if response.get_images().has_images():
+ images = response.get_images()
+ assert len(images) == 1, "Currently we only allow at most one image parameter"
+ # find the variable name
+ image_param = [param for param in self.parameters if param.is_image][0]
+ update_dict[image_param] = images[0].as_image() # parameter as PIL Image
+
+ if self.log is not None:
+ # For logging, use text representation
+ log_user_prompt = str(self.problem_instance(summary))
+ self.log.append(
+ {
+ "system_prompt": system_prompt,
+ "user_prompt": log_user_prompt,
+ "response": response,
+ }
+ )
+ self.summary_log.append(
+ {"problem_instance": self.problem_instance(summary), "summary": summary}
+ )
+
+ return update_dict
+
+ def extract_llm_suggestion(self, response: str):
+ """Extract the suggestion from the response."""
+
+ suggestion = self.optimizer_prompt_symbol_set.output_response_extractor(response)
+
+ if len(suggestion) == 0:
+ if not self.ignore_extraction_error:
+ print("Cannot extract suggestion from LLM's response:")
+ print(response)
+
+ return suggestion
+
+ def call_llm(
+ self,
+ system_prompt: str,
+ user_prompt: ContentBlockList,
+ verbose: Union[bool, str] = False,
+ max_tokens: int = 4096,
+ ) -> AssistantTurn:
+ """Call the LLM with a prompt and return the response.
+
+ Args:
+ system_prompt: The system prompt (always a string).
+ user_prompt: The user prompt as ContentBlockList for multimodal content.
+ verbose: If True, print the prompt and response. If "output", only print response.
+ max_tokens: Maximum tokens in the response.
+
+ Returns:
+ assistant_turn: AssistantTurn object
+ """
+ if verbose not in (False, "output"):
+ # Print text portions, indicate if images present
+ text_parts = [block.text for block in user_prompt if isinstance(block, TextContent)]
+ has_images = any(isinstance(block, ImageContent) for block in user_prompt)
+ suffix = f" [+ {DEFAULT_IMAGE_PLACEHOLDER}]" if has_images else ""
+ print("Prompt\n", system_prompt + "".join(text_parts) + suffix)
+
+ # Update system prompt in conversation history
+ self.conversation_history.system_prompt = system_prompt
+
+ # Create user turn with content
+ user_turn = UserTurn(user_prompt)
+ self.conversation_history.add_user_turn(user_turn)
+
+ # Get messages with conversation length control (truncate from start)
+ # conversation_length = n historical rounds (user+assistant pairs) to keep
+ # The current user turn is automatically included by to_messages()
+ messages = self.conversation_history.to_messages(
+ n=self.conversation_length if self.conversation_length > 0 else -1,
+ truncate_strategy="from_start",
+ model_name=self.llm.model_name
+ )
+
+ # Bedrock doesn't support response_format natively - LiteLLM adds tools which breaks the response
+ _is_bedrock = hasattr(self.llm, 'model_name') and is_bedrock_model(self.llm.model_name)
+ response_format = {"type": "json_object"} if (self.use_json_object_format and not _is_bedrock) else None
+
+ # Prepare common arguments
+ llm_kwargs = {"messages": messages, "max_tokens": max_tokens, "response_format": response_format}
+
+ # Add image generation tool only for non-Gemini models when output contains image
+ if self.output_contains_image and 'gemini' not in self.llm.model_name:
+ llm_kwargs["tools"] = [{"type": "image_generation"}]
+
+ assistant_turn = self.llm(**llm_kwargs)
+
+ if verbose:
+ print("LLM response:\n", assistant_turn)
+
+ self.conversation_history.add_assistant_turn(assistant_turn)
+
+ return assistant_turn
+
+ def save(self, path: str):
+ """Save the optimizer state to a file."""
+ with open(path, 'wb') as f:
+ pickle.dump({
+ "truncate_expression": self.truncate_expression,
+ "use_json_object_format": self.use_json_object_format,
+ "ignore_extraction_error": self.ignore_extraction_error,
+ "objective": self.objective,
+ "initial_var_char_limit": self.initial_var_char_limit,
+ "optimizer_prompt_symbol_set": self.optimizer_prompt_symbol_set,
+ "include_example": self.include_example,
+ "max_tokens": self.max_tokens,
+ "memory": self.memory,
+ "conversation_history": self.conversation_history,
+ "conversation_length": self.conversation_length,
+ "default_prompt_symbols": self.default_prompt_symbols,
+ "prompt_symbols": self.prompt_symbols,
+ "representation_prompt": self.representation_prompt,
+ "output_format_prompt": self.output_format_prompt,
+ }, f)
+
+ def load(self, path: str):
+ """Load the optimizer state from a file."""
+ with open(path, 'rb') as f:
+ state = pickle.load(f)
+ self.truncate_expression = state["truncate_expression"]
+ self.use_json_object_format = state["use_json_object_format"]
+ self.ignore_extraction_error = state["ignore_extraction_error"]
+ self.objective = state["objective"]
+ self.initial_var_char_limit = state["initial_var_char_limit"]
+ self.optimizer_prompt_symbol_set = state["optimizer_prompt_symbol_set"]
+ self.include_example = state["include_example"]
+ self.max_tokens = state["max_tokens"]
+ self.memory = state["memory"]
+ self.conversation_history = state.get("conversation_history", Chat())
+ self.conversation_length = state.get("conversation_length", 0)
+ self.default_prompt_symbols = state["default_prompt_symbols"]
+ self.prompt_symbols = state["prompt_symbols"]
+ self.representation_prompt = state["representation_prompt"]
+ self.output_format_prompt = state["output_format_prompt"]
diff --git a/tests/llm_optimizers_tests/test_optoprime_v3.py b/tests/llm_optimizers_tests/test_optoprime_v3.py
new file mode 100644
index 00000000..f124c5ec
--- /dev/null
+++ b/tests/llm_optimizers_tests/test_optoprime_v3.py
@@ -0,0 +1,510 @@
+import os
+import pytest
+from opto.trace import GRAPH
+from opto.utils.llm import LLM
+
+from opto.trace import node, bundle
+from opto.optimizers.optoprime_v3 import (
+ OptoPrimeV3, OptimizerPromptSymbolSet2, ProblemInstance,
+ OptimizerPromptSymbolSet, value_to_image_content
+)
+from opto.utils.backbone import TextContent, ImageContent
+
+# You can override for temporarly testing a specific optimizer ALL_OPTIMIZERS = [TextGrad] # [OptoPrimeMulti] ALL_OPTIMIZERS = [OptoPrime]
+
+# Tests that issue real LLM calls are opt-in: set RUN_LIVE_LLM_TESTS=1 to run
+# them. CI runs against a text-only stub that cannot satisfy the multimodal
+# optimizer steps, so they are skipped there.
+SKIP_REASON = "Live LLM test; set RUN_LIVE_LLM_TESTS=1 to run"
+HAS_CREDENTIALS = os.environ.get("RUN_LIVE_LLM_TESTS") == "1"
+llm = LLM()
+
+
+@pytest.fixture(autouse=True)
+def clear_graph():
+ """Reset the graph before each test"""
+ GRAPH.clear()
+ yield
+ GRAPH.clear()
+
+
+@pytest.mark.skipif(not HAS_CREDENTIALS, reason=SKIP_REASON)
+def test_response_extraction():
+ pass
+
+
+def test_tag_template_change():
+ num_1 = node(1, trainable=True)
+ num_2 = node(2, trainable=True, description="<=5")
+ result = num_1 + num_2
+ optimizer = OptoPrimeV3([num_1, num_2], use_json_object_format=False,
+ ignore_extraction_error=False,
+ include_example=True,
+ optimizer_prompt_symbol_set=OptimizerPromptSymbolSet2())
+
+ optimizer.zero_feedback()
+ optimizer.backward(result, 'make this number bigger')
+
+ summary = optimizer.summarize()
+ system_prompt, user_prompt = optimizer.construct_prompt(summary)
+
+ # system_prompt is a string, user_prompt is a ContentBlockList
+ system_prompt = optimizer.replace_symbols(system_prompt, optimizer.prompt_symbols)
+
+ # Convert ContentBlockList to text for symbol replacement
+ user_prompt_text = "".join(block.text for block in user_prompt if isinstance(block, TextContent))
+ user_prompt_text = optimizer.replace_symbols(user_prompt_text, optimizer.prompt_symbols)
+
+ assert """""" in system_prompt, "Expected tag to be present in system_prompt"
+ assert """""" in user_prompt_text, "Expected tag to be present in user_prompt"
+
+ print(system_prompt)
+ print(user_prompt_text)
+
+
+@bundle()
+def transform(num):
+ """Add number"""
+ return num + 1
+
+
+@bundle(trainable=True)
+def multiply(num):
+ return num * 5
+
+
+def test_function_repr():
+ num_1 = node(1, trainable=False)
+
+ result = multiply(transform(num_1))
+ optimizer = OptoPrimeV3([multiply.parameter], use_json_object_format=False,
+ ignore_extraction_error=False,
+ include_example=True)
+
+ optimizer.zero_feedback()
+ optimizer.backward(result, 'make this number bigger')
+
+ summary = optimizer.summarize()
+ system_prompt, user_prompt = optimizer.construct_prompt(summary)
+
+ system_prompt = optimizer.replace_symbols(system_prompt, optimizer.prompt_symbols)
+ # Convert ContentBlockList to text for symbol replacement
+ user_prompt_text = "".join(block.text for block in user_prompt if isinstance(block, TextContent))
+ user_prompt_text = optimizer.replace_symbols(user_prompt_text, optimizer.prompt_symbols)
+
+ function_repr = """
+
+def multiply(num):
+ return num * 5
+
+
+The code should start with:
+def multiply(num):
+
+"""
+
+ assert function_repr in user_prompt_text, "Expected function representation to be present in user_prompt"
+
+def test_big_data_truncation():
+ num_1 = node("**2", trainable=True)
+
+ list_1 = node("12345691912338" * 10, trainable=False)
+
+ result = list_1 + num_1
+
+ optimizer = OptoPrimeV3([num_1], use_json_object_format=False,
+ ignore_extraction_error=False,
+ include_example=True, initial_var_char_limit=10)
+
+ optimizer.zero_feedback()
+ optimizer.backward(result, 'compute the expression')
+
+ summary = optimizer.summarize()
+ system_prompt, user_prompt = optimizer.construct_prompt(summary)
+
+ system_prompt = optimizer.replace_symbols(system_prompt, optimizer.prompt_symbols)
+ # Convert ContentBlockList to text for symbol replacement
+ user_prompt_text = "".join(block.text for block in user_prompt if isinstance(block, TextContent))
+ user_prompt_text = optimizer.replace_symbols(user_prompt_text, optimizer.prompt_symbols)
+
+ truncated_repr = """1234569191...(skipped due to length limit)"""
+
+ assert truncated_repr in user_prompt_text, "Expected truncated list representation to be present in user_prompt"
+
+def test_extraction_pipeline():
+ num_1 = node(1, trainable=True)
+ num_2 = node(2, trainable=True, description="<=5")
+ result = num_1 + num_2
+ optimizer = OptoPrimeV3([num_1, num_2], use_json_object_format=False,
+ ignore_extraction_error=False,
+ include_example=True,
+ optimizer_prompt_symbol_set=OptimizerPromptSymbolSet2())
+
+ optimizer.zero_feedback()
+ optimizer.backward(result, 'make this number bigger')
+
+ summary = optimizer.summarize()
+ system_prompt, user_prompt = optimizer.construct_prompt(summary)
+
+ # Verify construct_prompt returns expected types
+ assert isinstance(system_prompt, str)
+ assert isinstance(user_prompt, list)
+
+ # Test extraction from a mock response
+ response = """
+The instruction suggests that the output, `add0`, needs to be made bigger than it currently is (3). The code performs an addition of `int0` and `int1` to produce `add0`. To increase `add0`, we can increase the values of `int0` or `int1`, or both. Given that `int1` has a constraint of being less than or equal to 5, we can set `int0` to a higher value, since it has no explicit constraint. By adjusting `int0` to a higher value, the output can be made larger in accordance with the feedback.
+
+
+
+int0
+
+5
+
+
+
+
+int1
+
+5
+
+"""
+ suggestion = optimizer.extract_llm_suggestion(response)
+
+ assert 'reasoning' in suggestion, "Expected 'reasoning' in suggestion"
+ assert 'variables' in suggestion, "Expected 'variables' in suggestion"
+ assert 'int0' in suggestion['variables'], "Expected 'int0' variable in suggestion"
+ assert 'int1' in suggestion['variables'], "Expected 'int1' variable in suggestion"
+ assert suggestion['variables']['int0'] == '5', "Expected int0 to be incremented to 5"
+ assert suggestion['variables']['int1'] == '5', "Expected int1 to be incremented to 5"
+
+
+# ==================== Multimodal / Content Block Tests ====================
+
+def test_problem_instance_text_only():
+ """Test that ProblemInstance with text-only content works correctly."""
+ from opto.utils.backbone import ContentBlockList
+ symbol_set = OptimizerPromptSymbolSet()
+
+ instance = ProblemInstance(
+ instruction="Test instruction",
+ code="y = add(x=a, y=b)",
+ documentation="[add] Adds two numbers",
+ variables=ContentBlockList("5"),
+ inputs=ContentBlockList("3"),
+ others=ContentBlockList(),
+ outputs=ContentBlockList("8"),
+ feedback="Result should be 10",
+ context="Some context",
+ optimizer_prompt_symbol_set=symbol_set
+ )
+
+ # Test __repr__ returns string
+ text_repr = str(instance)
+ assert "Test instruction" in text_repr
+ assert "y = add(x=a, y=b)" in text_repr
+ assert "Result should be 10" in text_repr
+ assert "Some context" in text_repr
+
+ # Test to_content_blocks returns list
+ blocks = instance.to_content_blocks()
+ assert isinstance(blocks, list)
+ assert len(blocks) > 0
+ assert all(isinstance(b, (TextContent, ImageContent)) for b in blocks)
+
+ # Test has_images returns False for text-only
+ assert not instance.has_images()
+
+
+def test_problem_instance_with_content_blocks():
+ """Test ProblemInstance with ContentBlockList fields containing images."""
+ from opto.utils.backbone import ContentBlockList
+ symbol_set = OptimizerPromptSymbolSet()
+
+ # Create content blocks with an image
+ variables_blocks = ContentBlockList([
+ TextContent(text=""),
+ ImageContent(image_url="https://example.com/test.jpg"),
+ TextContent(text="")
+ ])
+
+ instance = ProblemInstance(
+ instruction="Analyze the image",
+ code="result = analyze(img)",
+ documentation="[analyze] Analyzes an image",
+ variables=variables_blocks,
+ inputs=ContentBlockList(),
+ others=ContentBlockList(),
+ outputs=ContentBlockList("cat"),
+ feedback="Result should be 'dog'",
+ context=None,
+ optimizer_prompt_symbol_set=symbol_set
+ )
+
+ # Test __repr__ handles content blocks (should show [IMAGE] placeholder)
+ text_repr = str(instance)
+ assert "Analyze the image" in text_repr
+ assert "[IMAGE]" in text_repr
+
+ # Test to_content_blocks includes the image
+ blocks = instance.to_content_blocks()
+ assert isinstance(blocks, list)
+
+ # Find the ImageContent block
+ image_blocks = [b for b in blocks if isinstance(b, ImageContent)]
+ assert len(image_blocks) == 1
+ assert image_blocks[0].image_url == "https://example.com/test.jpg"
+
+ # Test has_images returns True
+ assert instance.has_images()
+
+
+def test_problem_instance_mixed_content():
+ """Test ProblemInstance with mixed text and image content in multiple fields."""
+ from opto.utils.backbone import ContentBlockList
+ symbol_set = OptimizerPromptSymbolSet()
+
+ # Variables with image
+ variables_blocks = ContentBlockList([
+ TextContent(text="Hello\n"),
+ TextContent(text=""),
+ ImageContent(image_data="base64data", media_type="image/png"),
+ TextContent(text="")
+ ])
+
+ # Inputs with image
+ inputs_blocks = ContentBlockList([
+ TextContent(text=""),
+ ImageContent(image_url="https://example.com/ref.png"),
+ TextContent(text="")
+ ])
+
+ instance = ProblemInstance(
+ instruction="Compare images",
+ code="result = compare(img, reference)",
+ documentation="[compare] Compares two images",
+ variables=variables_blocks,
+ inputs=inputs_blocks,
+ others=ContentBlockList(),
+ outputs=ContentBlockList("0.8"),
+ feedback="Similarity should be higher",
+ context="Context text",
+ optimizer_prompt_symbol_set=symbol_set
+ )
+
+ # Test has_images
+ assert instance.has_images()
+
+ # Test to_content_blocks
+ blocks = instance.to_content_blocks()
+ image_blocks = [b for b in blocks if isinstance(b, ImageContent)]
+ assert len(image_blocks) == 2 # One from variables, one from inputs
+
+
+def test_value_to_image_content_url():
+ """Test value_to_image_content with URL strings."""
+ # Valid image URL
+ result = value_to_image_content("https://example.com/image.jpg")
+ assert result is not None
+ assert isinstance(result, ImageContent)
+ assert result.image_url == "https://example.com/image.jpg"
+
+ # Non-image URL (no image extension) - is_image returns False for pattern check
+ result = value_to_image_content("https://example.com/page.html")
+ assert result is None
+
+ # Non-URL string
+ result = value_to_image_content("just a regular string")
+ assert result is None
+
+
+def test_value_to_image_content_base64():
+ """Test value_to_image_content with base64 data URLs."""
+ # Valid base64 data URL
+ data_url = "data:image/png;base64,iVBORw0KGgoAAAANSUhEUg=="
+ result = value_to_image_content(data_url)
+ assert result is not None
+ assert isinstance(result, ImageContent)
+ assert result.image_data == "iVBORw0KGgoAAAANSUhEUg=="
+ assert result.media_type == "image/png"
+
+
+def test_value_to_image_content_non_image():
+ """Test value_to_image_content with non-image values."""
+ # Integer
+ assert value_to_image_content(42) is None
+
+ # List
+ assert value_to_image_content([1, 2, 3]) is None
+
+ # Dict
+ assert value_to_image_content({"key": "value"}) is None
+
+ # Regular string
+ assert value_to_image_content("hello world") is None
+
+
+def test_construct_prompt():
+ """Test construct_prompt returns ContentBlockList for multimodal support."""
+ num_1 = node(1, trainable=True)
+ num_2 = node(2, trainable=True)
+ result = num_1 + num_2
+
+ optimizer = OptoPrimeV3([num_1, num_2], use_json_object_format=False)
+ optimizer.zero_feedback()
+ optimizer.backward(result, 'make this number bigger')
+
+ summary = optimizer.summarize()
+ system_prompt, user_prompt = optimizer.construct_prompt(summary)
+
+ # system_prompt should be string, user_prompt should be ContentBlockList
+ assert isinstance(system_prompt, str)
+ assert isinstance(user_prompt, list)
+ assert all(isinstance(b, (TextContent, ImageContent)) for b in user_prompt)
+
+ # Check that text content contains expected info
+ text_parts = [b.text for b in user_prompt if isinstance(b, TextContent)]
+ full_text = "".join(text_parts)
+ assert "int0" in full_text or "int1" in full_text
+
+
+def test_repr_node_value_as_content_blocks():
+ """Test repr_node_value_as_content_blocks method."""
+ num_1 = node(1, trainable=True)
+ result = num_1 + 1
+
+ optimizer = OptoPrimeV3([num_1], use_json_object_format=False)
+ optimizer.zero_feedback()
+ optimizer.backward(result, 'test')
+
+ # Test with non-image nodes
+ summary = optimizer.summarize()
+ blocks = optimizer.repr_node_value_as_content_blocks(
+ summary.variables,
+ node_tag=optimizer.optimizer_prompt_symbol_set.variable_tag,
+ value_tag=optimizer.optimizer_prompt_symbol_set.value_tag,
+ constraint_tag=optimizer.optimizer_prompt_symbol_set.constraint_tag
+ )
+
+ assert isinstance(blocks, list)
+ assert len(blocks) > 0
+ assert all(isinstance(b, TextContent) for b in blocks) # No images in this case
+
+
+def test_repr_node_value_compact_as_content_blocks():
+ """Test repr_node_value_compact_as_content_blocks method."""
+ long_string = "x" * 5000 # Long string that will be truncated
+ str_node = node(long_string, trainable=True)
+ result = str_node + "!"
+
+ optimizer = OptoPrimeV3([str_node], use_json_object_format=False, initial_var_char_limit=100)
+ optimizer.zero_feedback()
+ optimizer.backward(result, 'test')
+
+ summary = optimizer.summarize()
+ blocks = optimizer.repr_node_value_compact_as_content_blocks(
+ summary.inputs,
+ node_tag=optimizer.optimizer_prompt_symbol_set.node_tag,
+ value_tag=optimizer.optimizer_prompt_symbol_set.value_tag,
+ constraint_tag=optimizer.optimizer_prompt_symbol_set.constraint_tag
+ )
+
+ # Should be truncated
+ text_parts = [b.text for b in blocks if isinstance(b, TextContent)]
+ full_text = "".join(text_parts)
+ assert "skipped due to length limit" in full_text or len(full_text) < len(long_string)
+
+
+# ==================== Real LLM Call Tests ====================
+
+@pytest.mark.skipif(not HAS_CREDENTIALS, reason=SKIP_REASON)
+def test_optimizer_step_real_llm_call():
+ """Test a real optimization step with LLM call."""
+ # Create a simple optimization problem
+ greeting = node("Hello", trainable=True, description="A greeting message")
+
+ @bundle()
+ def make_sentence(word):
+ """Create a sentence from a word."""
+ return f"{word}, how are you today?"
+
+ result = make_sentence(greeting)
+
+ # Create optimizer
+ optimizer = OptoPrimeV3(
+ [greeting],
+ use_json_object_format=False,
+ ignore_extraction_error=True,
+ include_example=False,
+ )
+
+ # Setup feedback
+ optimizer.zero_feedback()
+ optimizer.backward(result, "The greeting should be more formal and professional")
+
+ # Execute optimization step - this makes a real LLM call
+ update_dict = optimizer.step(verbose=True)
+
+ # Verify the optimizer produced a suggestion
+ print(f"Update dict: {update_dict}")
+
+ # The LLM should have suggested a new value
+ # We don't assert specific content since LLM output varies
+ # but we verify the step completed without error
+ assert optimizer.log is not None
+ assert len(optimizer.log) > 0
+
+ # Check that the log contains the expected structure
+ last_log = optimizer.log[-1]
+ assert "system_prompt" in last_log
+ assert "user_prompt" in last_log
+ assert "response" in last_log
+
+ print(f"LLM Response: {last_log['response'][:500]}...")
+
+
+@pytest.mark.skipif(not HAS_CREDENTIALS, reason=SKIP_REASON)
+def test_optimizer_step_with_content_blocks():
+ """Test optimization step using content blocks (multimodal mode)."""
+ # Create trainable parameters
+ num_1 = node(5, trainable=True, description="A number to optimize")
+ num_2 = node(3, trainable=True, description="Another number")
+
+ result = num_1 + num_2
+
+ # Create optimizer
+ optimizer = OptoPrimeV3(
+ [num_1, num_2],
+ use_json_object_format=False,
+ ignore_extraction_error=True,
+ include_example=False,
+ )
+
+ # Setup feedback
+ optimizer.zero_feedback()
+ optimizer.backward(result, "The sum should be exactly 100")
+
+ # Test that construct_prompt returns ContentBlockList
+ summary = optimizer.summarize()
+ system_prompt, user_prompt = optimizer.construct_prompt(summary)
+
+ # Verify content blocks structure
+ from opto.utils.backbone import ContentBlockList
+ assert isinstance(user_prompt, ContentBlockList)
+ assert len(user_prompt) > 0
+
+ # Verify text is merged (should be fewer blocks than if not merged)
+ text_blocks = [b for b in user_prompt if isinstance(b, TextContent)]
+ print(f"Number of text blocks after merging: {len(text_blocks)}")
+
+ # Execute the step (this makes a real LLM call)
+ update_dict = optimizer.step(verbose=True)
+
+ print(f"Update dict: {update_dict}")
+
+ # Verify the step completed
+ assert optimizer.log is not None
+ assert len(optimizer.log) > 0
+
+@pytest.mark.skipif(not HAS_CREDENTIALS, reason=SKIP_REASON)
+def test_optimizer_multimodal_parameter_update():
+ pass
\ No newline at end of file