Source code for sherpa_ai.prompts.prompt_template_loader

"""Prompt template loading and formatting module for Sherpa AI.

This module provides functionality for loading prompt templates and formatting
them with variables. It extends the base PromptLoader to add variable
substitution capabilities for different prompt types.
"""

from typing import Dict, List, Optional, Union, Any, Type
from sherpa_ai.prompts.Base import ChatPromptVersion, TextPromptVersion, JsonPromptVersion
from sherpa_ai.prompts.prompt_loader import PromptLoader
import copy
from pydantic import BaseModel, create_model
from langchain_core.language_models import BaseChatModel

[docs] class PromptTemplate(PromptLoader): """Template loader and formatter for prompts. This class extends PromptLoader to add variable substitution capabilities. It can format text, chat, and JSON prompts by replacing placeholders with actual values. Example: >>> template = PromptTemplate("prompts.json") >>> formatted = template.format_prompt( ... prompt_parent_id="chat", ... prompt_id="greeting", ... version="1.0", ... variables={"name": "Alice"} ... ) >>> print(formatted[0]["content"]) 'Hello Alice!' """ def __init__(self, json_file_path: str): """Initialize the prompt template loader. Args: json_file_path (str): Path to JSON file containing prompt templates. """ super().__init__(json_file_path) def _detect_provider(self, llm: Optional[BaseChatModel]) -> Optional[str]: """Detect the LLM provider from an LLM instance. This method identifies the provider by checking the _llm_type attribute and class name as fallback. Args: llm (Optional[BaseChatModel]): The LLM instance to detect. Returns: Optional[str]: Provider name ('openai', 'anthropic', 'google', 'cohere') or None if provider cannot be detected. Example: >>> template = PromptTemplate("prompts.json") >>> llm = ChatOpenAI() >>> provider = template._detect_provider(llm) >>> print(provider) 'openai' """ if llm is None: return None # Check for wrapped models (e.g., ChatModelWithLogging) actual_llm = llm if hasattr(llm, 'llm'): actual_llm = llm.llm # Try to get provider from _llm_type attribute if hasattr(actual_llm, '_llm_type'): llm_type = actual_llm._llm_type.lower() if 'openai' in llm_type: return 'openai' elif 'anthropic' in llm_type or 'claude' in llm_type or 'bedrock' in llm_type: return 'anthropic' elif 'google' in llm_type or 'gemini' in llm_type or 'vertexai' in llm_type: return 'google' elif 'cohere' in llm_type: return 'cohere' # Fallback to class name inspection class_name = actual_llm.__class__.__name__.lower() if 'openai' in class_name: return 'openai' elif 'anthropic' in class_name or 'claude' in class_name: return 'anthropic' elif 'google' in class_name or 'gemini' in class_name or 'vertex' in class_name: return 'google' elif 'cohere' in class_name: return 'cohere' return None def _pydantic_to_json_schema(self, pydantic_model: Type[BaseModel]) -> Dict[str, Any]: """Convert a Pydantic model to JSON Schema. This method uses Pydantic's built-in model_json_schema() method to convert a Pydantic model to JSON Schema format. Args: pydantic_model (Type[BaseModel]): The Pydantic model class to convert. Returns: Dict[str, Any]: JSON Schema representation of the Pydantic model. Example: >>> from pydantic import BaseModel >>> class User(BaseModel): ... name: str ... age: int >>> template = PromptTemplate("prompts.json") >>> schema = template._pydantic_to_json_schema(User) >>> print(schema['properties']['name']['type']) 'string' """ if not issubclass(pydantic_model, BaseModel): raise ValueError(f"Expected a Pydantic BaseModel, got {type(pydantic_model)}") return pydantic_model.model_json_schema() def _json_schema_to_pydantic(self, json_schema: Dict[str, Any], model_name: str = "DynamicModel") -> Type[BaseModel]: """Convert a JSON Schema to a Pydantic model. This method dynamically creates a Pydantic model from a JSON Schema. Note: This is a simplified implementation and may not handle all JSON Schema features. Args: json_schema (Dict[str, Any]): The JSON Schema to convert. model_name (str): Name for the generated Pydantic model class. Returns: Type[BaseModel]: A dynamically created Pydantic model class. Example: >>> template = PromptTemplate("prompts.json") >>> schema = { ... "type": "object", ... "properties": { ... "name": {"type": "string"}, ... "age": {"type": "integer"} ... }, ... "required": ["name"] ... } >>> Model = template._json_schema_to_pydantic(schema, "User") >>> instance = Model(name="Alice", age=30) """ if json_schema.get("type") != "object": raise ValueError("JSON Schema must be of type 'object' to convert to Pydantic model") properties = json_schema.get("properties", {}) required = set(json_schema.get("required", [])) # Map JSON Schema types to Python types type_mapping = { "string": str, "integer": int, "number": float, "boolean": bool, "array": list, "object": dict, } field_definitions = {} for field_name, field_schema in properties.items(): field_type = field_schema.get("type", "string") python_type = type_mapping.get(field_type, str) # Handle Optional fields if field_name not in required: field_definitions[field_name] = (Optional[python_type], None) else: field_definitions[field_name] = (python_type, ...) return create_model(model_name, **field_definitions)
[docs] def format_prompt( self, prompt_parent_id: str, prompt_id: str, # Changed from name to prompt_id version: str, variables: Optional[Dict[str, Union[str, int, float]]] = None ) -> Optional[Union[str, List[Dict[str, str]], Dict]]: """Format a prompt by replacing variables with values. This method loads a prompt template and replaces any variable placeholders with provided values. It handles different prompt types (text, chat, JSON) appropriately. Args: prompt_parent_id (str): Name of the wrapper containing the prompt. prompt_id (str): ID of the prompt to format. version (str): Version of the prompt to format. variables (Optional[Dict[str, Union[str, int, float]]]): Values to substitute in the prompt. If None, uses defaults from JSON. Returns: Optional[Union[str, List[Dict[str, str]], Dict]]: Formatted prompt content if found and successfully formatted, None otherwise. Example: >>> template = PromptTemplate("prompts.json") >>> formatted = template.format_prompt( ... prompt_parent_id="text", ... prompt_id="search", ... version="1.0", ... variables={"query": "python programming"} ... ) >>> print(formatted) 'Search for: python programming' """ # We need to get the specific prompt version object prompt_version_obj = self.get_prompt_version(prompt_parent_id, prompt_id, version) if not prompt_version_obj: return None prompt_variables = prompt_version_obj.variables or {} final_variables = prompt_variables.copy() if variables: final_variables.update(variables) # Format the prompt content if isinstance(prompt_version_obj, ChatPromptVersion): formatted_prompt = [] for message in prompt_version_obj.content: role = message.get("role") text = message.get("content", "") # Replace placeholders with variables for var_name, var_value in final_variables.items(): placeholder = f"{{{var_name}}}" if placeholder in text: text = text.replace(placeholder, str(var_value)) formatted_prompt.append({"role": role, "content": text}) return formatted_prompt elif isinstance(prompt_version_obj, TextPromptVersion): content = prompt_version_obj.content # Handle array content if isinstance(content, list): content = "\n".join(str(item) for item in content) if content else "" for var_name, var_value in final_variables.items(): placeholder = f"{{{var_name}}}" if placeholder in content: content = content.replace(placeholder, str(var_value)) return content elif isinstance(prompt_version_obj, JsonPromptVersion): import copy formatted_prompt = copy.deepcopy(prompt_version_obj.content) def replace_in_dict(data: Dict) -> Dict: """Recursively replace variables in dictionary values. Args: data (Dict): Dictionary to process. Returns: Dict: Dictionary with variables replaced. """ for key, value in data.items(): if isinstance(value, str): for var_name, var_value in final_variables.items(): placeholder = f"{{{var_name}}}" if placeholder in value: data[key] = value.replace(placeholder, str(var_value)) elif isinstance(value, dict): data[key] = replace_in_dict(value) return data return replace_in_dict(formatted_prompt) else: raise ValueError(f"Unknown prompt version type: {type(prompt_version_obj)}")
def _format_for_openai(self, json_schema: Dict[str, Any], schema_name: str = "ResponseModel") -> Type[BaseModel]: """Format response format for OpenAI/Azure OpenAI. OpenAI accepts Pydantic models directly. This method converts JSON Schema to a Pydantic model. Args: json_schema (Dict[str, Any]): JSON Schema to convert. schema_name (str): Name for the generated Pydantic model. Returns: Type[BaseModel]: Pydantic model class for OpenAI. """ # Extract schema if nested in json_schema structure if "json_schema" in json_schema and "schema" in json_schema["json_schema"]: schema = json_schema["json_schema"]["schema"] if "name" in json_schema["json_schema"]: schema_name = json_schema["json_schema"]["name"] elif "schema" in json_schema: schema = json_schema["schema"] else: schema = json_schema return self._json_schema_to_pydantic(schema, schema_name) def _format_for_anthropic(self, json_schema: Dict[str, Any], schema_name: str = "response") -> Dict[str, Any]: """Format response format for Anthropic Claude. Anthropic uses JSON Schema in tool definitions with tool_choice. Args: json_schema (Dict[str, Any]): JSON Schema to format. schema_name (str): Name for the tool function. Returns: Dict[str, Any]: Tool definition format for Anthropic. """ # Extract schema if nested if "json_schema" in json_schema and "schema" in json_schema["json_schema"]: schema = json_schema["json_schema"]["schema"] if "name" in json_schema["json_schema"]: schema_name = json_schema["json_schema"]["name"] elif "schema" in json_schema: schema = json_schema["schema"] else: schema = json_schema return { "tools": [ { "type": "function", "name": schema_name, "input_schema": schema } ], "tool_choice": { "type": "tool", "name": schema_name } } def _format_for_google(self, json_schema: Dict[str, Any]) -> Dict[str, Any]: """Format response format for Google Gemini. Google Gemini uses JSON Schema via responseSchema parameter. Uses OpenAPI 3.0 Schema subset. Args: json_schema (Dict[str, Any]): JSON Schema to format. Returns: Dict[str, Any]: responseSchema format for Google Gemini. """ # Extract schema if nested if "json_schema" in json_schema and "schema" in json_schema["json_schema"]: schema = json_schema["json_schema"]["schema"] elif "schema" in json_schema: schema = json_schema["schema"] else: schema = json_schema # Google Gemini expects OpenAPI 3.0 Schema format # For most cases, the JSON Schema is compatible return { "responseSchema": schema } def _format_for_cohere(self, json_schema: Dict[str, Any]) -> Dict[str, Any]: """Format response format for Cohere. Cohere uses JSON Schema via response_format parameter. Args: json_schema (Dict[str, Any]): JSON Schema to format. Returns: Dict[str, Any]: response_format format for Cohere. """ # Extract schema if nested if "json_schema" in json_schema and "schema" in json_schema["json_schema"]: schema = json_schema["json_schema"]["schema"] elif "schema" in json_schema: schema = json_schema["schema"] else: schema = json_schema return { "response_format": schema }
[docs] def format_response_format_for_provider( self, prompt_parent_id: str, prompt_id: str, version: str, llm: Optional[BaseChatModel], variables: Optional[Dict[str, Union[str, int, float, List]]] = None ) -> Optional[Union[Dict[str, Any], Type[BaseModel]]]: """Format response format schema for a specific LLM provider. This method detects the provider from the LLM instance and returns the response format in the appropriate format for that provider. Args: prompt_parent_id (str): Name of the wrapper containing the prompt. prompt_id (str): ID of the prompt to format. version (str): Version of the prompt to format. llm (Optional[BaseChatModel]): LLM instance to detect provider from. variables (Optional[Dict[str, Union[str, int, float, List]]]): Values to substitute in the schema. If None, uses defaults from JSON. Returns: Optional[Union[Dict[str, Any], Type[BaseModel]]]: Provider-specific format: - OpenAI: Pydantic model class - Anthropic: Tool definition dict - Google: responseSchema dict - Cohere: response_format dict - None/Unknown: JSON Schema dict (fallback) Example: >>> template = PromptTemplate("prompts.json") >>> llm = ChatOpenAI() >>> formatted = template.format_response_format_for_provider( ... prompt_parent_id="addition_prompts", ... prompt_id="add_numbers_text", ... version="1.0", ... llm=llm ... ) >>> # Returns Pydantic model for OpenAI """ # First, get the formatted JSON Schema json_schema = self.format_response_format(prompt_parent_id, prompt_id, version, variables) if json_schema is None: return None # Detect provider provider = self._detect_provider(llm) if provider is None: # Fallback to JSON Schema if provider cannot be detected return json_schema # Get schema name if available schema_name = "ResponseModel" if "json_schema" in json_schema and "name" in json_schema["json_schema"]: schema_name = json_schema["json_schema"]["name"] # Format based on provider try: if provider == "openai": return self._format_for_openai(json_schema, schema_name) elif provider == "anthropic": return self._format_for_anthropic(json_schema, schema_name) elif provider == "google": return self._format_for_google(json_schema) elif provider == "cohere": return self._format_for_cohere(json_schema) else: # Unknown provider, return JSON Schema as fallback return json_schema except (ValueError, KeyError) as e: # If conversion fails, return original JSON Schema # This maintains backward compatibility return json_schema
[docs] def format_response_format( self, prompt_parent_id: str, prompt_id: str, version: str, variables: Optional[Dict[str, Union[str, int, float, List]]] = None ) -> Optional[Dict]: """Format the response format schema by replacing variables with values. This method specifically handles variable substitution in response format schemas, including enum values in JSON schemas. Args: prompt_parent_id (str): Name of the wrapper containing the prompt. prompt_id (str): ID of the prompt to format. version (str): Version of the prompt to format. variables (Optional[Dict[str, Union[str, int, float, List]]]): Values to substitute in the schema. If None, uses defaults from JSON. Returns: Optional[Dict]: Formatted response format schema if found and successfully formatted, None otherwise. Example: >>> template = PromptTemplate("prompts.json") >>> formatted_schema = template.format_response_format( ... prompt_parent_id="supplement", ... prompt_id="recommendation", ... version="1.0", ... variables={"available_skus": ["SKU-001", "SKU-002", "SKU-003"]} ... ) """ prompt_version_obj = self.get_prompt_version(prompt_parent_id, prompt_id, version) if not prompt_version_obj: return None prompt_variables = prompt_version_obj.variables or {} final_variables = prompt_variables.copy() if variables: final_variables.update(variables) if not prompt_version_obj.response_format: return None formatted_response_format = copy.deepcopy(prompt_version_obj.response_format) def replace_in_schema(data: Any) -> Any: """Recursively replace variables in schema data. Args: data (Any): Data to process (dict, list, or primitive). Returns: Any: Data with variables replaced. """ if isinstance(data, dict): for key, value in data.items(): if isinstance(value, str): for var_name, var_value in final_variables.items(): placeholder = f"{{{var_name}}}" if placeholder in value: data[key] = value.replace(placeholder, str(var_value)) elif isinstance(value, list): data[key] = replace_in_schema(value) elif isinstance(value, dict): data[key] = replace_in_schema(value) elif isinstance(data, list): for i, item in enumerate(data): if isinstance(item, str): for var_name, var_value in final_variables.items(): if item == f"{{{var_name}}}": if isinstance(var_value, list): data[i:i+1] = var_value return data else: data[i] = var_value else: data[i] = replace_in_schema(item) return data return replace_in_schema(formatted_response_format)
[docs] def get_full_formatted_prompt( self, prompt_parent_id: str, prompt_id: str, version: str, variables: Optional[Dict[str, Union[str, int, float, List]]] = None, llm: Optional[BaseChatModel] = None ) -> Optional[Dict[str, Union[str, List[Dict[str, str]], Dict, Type[BaseModel]]]]: """Get a formatted prompt with metadata. This method formats a prompt and returns it along with its description and output schema. It's useful when you need the complete prompt context, not just the formatted content. Args: prompt_parent_id (str): Name of the wrapper containing the prompt. prompt_id (str): ID of the prompt to format. version (str): Version of the prompt to format. variables (Optional[Dict[str, Union[str, int, float, List]]]): Values to substitute in the prompt. If None, uses defaults from JSON. llm (Optional[BaseChatModel]): Optional LLM instance for provider-specific schema formatting. If provided, output_schema will be formatted for the detected provider. Returns: Optional[Dict[str, Union[str, List[Dict[str, str]], Dict, Type[BaseModel]]]]: Dictionary containing formatted content, description, and schema, or None if prompt not found. The output_schema will be provider-specific if llm is provided, otherwise it will be JSON Schema. Example: >>> template = PromptTemplate("prompts.json") >>> result = template.get_full_formatted_prompt( ... prompt_parent_id="text", ... prompt_id="search", ... version="1.0", ... variables={"query": "python"} ... ) >>> print(result["description"]) 'Search query template' >>> # With provider-specific formatting: >>> llm = ChatOpenAI() >>> result = template.get_full_formatted_prompt( ... prompt_parent_id="addition_prompts", ... prompt_id="add_numbers_text", ... version="1.0", ... llm=llm ... ) >>> # output_schema will be a Pydantic model for OpenAI """ target_prompt = None for pg in self.prompts: if pg.prompt_parent_id == prompt_parent_id: for p in pg.prompts: if p.prompt_id == prompt_id: target_prompt = p break if target_prompt: break if not target_prompt: return None prompt_version_obj = self.get_prompt_version(prompt_parent_id, prompt_id, version) if not prompt_version_obj: return None formatted_content = self.format_prompt(prompt_parent_id, prompt_id, version, variables) if not formatted_content: return None # Format the response format schema # Use provider-specific formatting if llm is provided if llm is not None: formatted_response_format = self.format_response_format_for_provider( prompt_parent_id, prompt_id, version, llm, variables ) else: formatted_response_format = self.format_response_format( prompt_parent_id, prompt_id, version, variables ) return { "description": target_prompt.description, "content": formatted_content, "output_schema": formatted_response_format }