The Model Orchestrator is the central component that manages GitHub models via Azure AI Inference.
The orchestrator provides:
from aurelis.models import get_model_orchestrator
orchestrator = get_model_orchestrator()
from aurelis.models import ModelRequest, ModelType, TaskType
request = ModelRequest(
prompt="Generate a Python function to calculate fibonacci numbers",
model_type=ModelType.CODESTRAL_2501,
task_type=TaskType.CODE_GENERATION,
system_prompt="You are an expert Python developer.",
temperature=0.1,
max_tokens=1000
)
response = await orchestrator.process_request(request)
print(response.content)
Model | Provider | Best For | Context |
---|---|---|---|
CODESTRAL_2501 |
Mistral | Code generation & optimization | 4K |
GPT_4O |
OpenAI | Complex reasoning & multimodal | 4K |
GPT_4O_MINI |
OpenAI | Fast responses & documentation | 4K |
COHERE_COMMAND_R |
Cohere | Documentation & explanations | 4K |
COHERE_COMMAND_R_PLUS |
Cohere | Advanced reasoning | 4K |
META_LLAMA_70B |
Meta | Balanced performance | 4K |
META_LLAMA_405B |
Meta | Maximum capability | 4K |
MISTRAL_LARGE |
Mistral | Enterprise applications | 4K |
MISTRAL_NEMO |
Mistral | Fast inference | 4K |
The orchestrator automatically selects the best model based on task type:
# Code generation tasks -> Codestral-2501
request = ModelRequest(
prompt="Create a REST API",
task_type=TaskType.CODE_GENERATION
)
# Documentation tasks -> Cohere Command-R
request = ModelRequest(
prompt="Document this function",
task_type=TaskType.DOCUMENTATION
)
# Complex reasoning -> GPT-4o
request = ModelRequest(
prompt="Analyze this architecture",
task_type=TaskType.ANALYSIS
)
@dataclass
class ModelRequest:
prompt: str
model_type: Optional[ModelType] = None
task_type: TaskType = TaskType.GENERAL
system_prompt: Optional[str] = None
temperature: float = 0.1
max_tokens: Optional[int] = None
context: Optional[Dict[str, Any]] = None
metadata: Optional[Dict[str, Any]] = None
class TaskType(Enum):
CODE_GENERATION = "code_generation"
CODE_COMPLETION = "code_completion"
CODE_OPTIMIZATION = "code_optimization"
DOCUMENTATION = "documentation"
EXPLANATION = "explanation"
REFACTORING = "refactoring"
TESTING = "testing"
ANALYSIS = "analysis"
GENERAL = "general"
@dataclass
class ModelResponse:
content: str
model_used: ModelType
tokens_used: int
processing_time: float
cached: bool = False
metadata: Optional[Dict[str, Any]] = None
request = ModelRequest(
prompt="Optimize this function",
system_prompt="""You are a senior Python performance engineer.
Focus on algorithmic efficiency and memory optimization.
Provide detailed explanations for your optimizations."""
)
request = ModelRequest(
prompt="Add error handling to this function",
context={
"existing_code": open("function.py").read(),
"project_structure": ["models/", "views/", "utils/"],
"error_patterns": ["ConnectionError", "ValidationError"]
}
)
request = ModelRequest(
prompt="Generate unit tests",
metadata={
"user_id": "developer_123",
"project_id": "aurelis",
"session_id": "session_456"
}
)
Responses are automatically cached based on:
# First call - hits the model
response1 = await orchestrator.process_request(request)
print(f"Cached: {response1.cached}") # False
# Second identical call - hits cache
response2 = await orchestrator.process_request(request)
print(f"Cached: {response2.cached}") # True
from aurelis.core.config import get_config
config = get_config()
config.cache_enabled = True
config.cache_ttl = 3600 # 1 hour
config.cache_max_size = 1000
from aurelis.core.exceptions import ModelError, AuthenticationError
try:
response = await orchestrator.process_request(request)
except AuthenticationError as e:
print(f"GitHub token invalid: {e}")
except ModelError as e:
print(f"Model processing failed: {e}")
# Automatic fallback will be attempted
The orchestrator includes circuit breaker patterns:
# Automatic fallback on model failures
request = ModelRequest(
prompt="Generate code",
model_type=ModelType.CODESTRAL_2501 # Primary
)
# If Codestral fails, automatically tries GPT-4o-mini
response = await orchestrator.process_request(request)
print(f"Used model: {response.model_used}")
response = await orchestrator.process_request(request)
print(f"Tokens used: {response.tokens_used}")
print(f"Processing time: {response.processing_time:.2f}s")
requests = [
ModelRequest(prompt="Generate function A"),
ModelRequest(prompt="Generate function B"),
ModelRequest(prompt="Generate function C")
]
responses = await orchestrator.process_batch(requests)
for response in responses:
print(f"Response: {response.content[:100]}...")
GitHub model access is automatically rate-limited:
# Automatic rate limiting and retry
response = await orchestrator.process_request(request)
# Rate limit information in metadata
if response.metadata:
print(f"Rate limit remaining: {response.metadata.get('rate_limit_remaining')}")
print(f"Reset time: {response.metadata.get('rate_limit_reset')}")
# For code generation
request = ModelRequest(
prompt="Create a web scraper",
model_type=ModelType.CODESTRAL_2501
)
# For documentation
request = ModelRequest(
prompt="Document this API",
model_type=ModelType.COHERE_COMMAND_R
)
# Good: Specific and clear
request = ModelRequest(
prompt="Create a Python function that validates email addresses using regex"
)
# Better: Include context and requirements
request = ModelRequest(
prompt="Create a Python function that validates email addresses using regex",
system_prompt="Write production-ready code with proper error handling",
context={"framework": "FastAPI", "testing": "pytest"}
)
async def generate_code(prompt: str) -> str:
try:
request = ModelRequest(prompt=prompt)
response = await orchestrator.process_request(request)
return response.content
except Exception as e:
logger.error(f"Code generation failed: {e}")
return "# Code generation failed, please try again"
# Enable caching for repetitive tasks
request = ModelRequest(
prompt="Explain Python decorators",
task_type=TaskType.EXPLANATION
)
# Disable caching for unique generations
request = ModelRequest(
prompt=f"Generate unique code for user {user_id}",
metadata={"cache_disabled": True}
)
# Used in CLI commands
async def cli_generate(prompt: str, model: str = None):
orchestrator = get_model_orchestrator()
request = ModelRequest(
prompt=prompt,
model_type=ModelType(model) if model else None
)
response = await orchestrator.process_request(request)
return response.content
# Used in interactive shell
class AurelisShell:
def __init__(self):
self.orchestrator = get_model_orchestrator()
async def process_command(self, command: str):
request = ModelRequest(
prompt=command,
task_type=self._detect_task_type(command)
)
return await self.orchestrator.process_request(request)