Managing connectivity to multiple AI models from different providers can be complex and error-prone. A unified AI model management system provides a centralized approach to handle multiple providers, abstract away differences, and provide a consistent interface for your applications.
Managed Unified Management
Tetrate Agent Router Service provides intelligent routing for GenAI developers with unified model access, automatic fallback, cost management, and cross-provider monitoring. This managed service can significantly reduce the complexity of building and maintaining your own unified AI management system.
- Unified model access across providers
- Automatic fallback and failover
- Cost optimization and monitoring
- Centralized API management
The Challenge of Multi-Provider AI
Modern AI applications often need to work with multiple providers:
- Provider Diversity: OpenAI, Anthropic, Google, Azure, AWS, and more
- API Differences: Each provider has unique endpoints, authentication, and response formats
- Cost Optimization: Different providers offer different pricing and capabilities
- Reliability: Need fallback options when providers are unavailable
- Complexity: Managing multiple API keys, rate limits, and error handling
:::tip Unified Management Benefits A unified AI model management system can reduce development time by 60% and improve reliability by providing automatic fallbacks and load balancing. :::
Core Architecture Patterns
1. Provider Abstraction Layer
The foundation of unified management is abstracting provider differences behind a consistent interface.
from abc import ABC, abstractmethod
from typing import Dict, Any, Optional
import asyncio
class AIProvider(ABC):
"""Abstract base class for AI providers"""
@abstractmethod
async def generate_text(self, prompt: str, model: str, **kwargs) -> str:
"""Generate text using the provider's API"""
pass
@abstractmethod
async def get_models(self) -> list:
"""Get available models from this provider"""
pass
@abstractmethod
async def get_cost_estimate(self, prompt: str, model: str) -> float:
"""Estimate cost for a request"""
pass
class OpenAIProvider(AIProvider):
"""OpenAI provider implementation"""
def __init__(self, api_key: str):
self.api_key = api_key
self.client = openai.AsyncOpenAI(api_key=api_key)
async def generate_text(self, prompt: str, model: str, **kwargs) -> str:
try:
response = await self.client.chat.completions.create(
model=model,
messages=[{"role": "user", "content": prompt}],
**kwargs
)
return response.choices[0].message.content
except Exception as e:
raise AIProviderError(f"OpenAI error: {str(e)}")
async def get_models(self) -> list:
models = await self.client.models.list()
return [model.id for model in models.data]
async def get_cost_estimate(self, prompt: str, model: str) -> float:
# OpenAI pricing per 1K tokens
pricing = {
"gpt-4": 0.03,
"gpt-3.5-turbo": 0.002,
"gpt-4-turbo": 0.01
}
estimated_tokens = len(prompt.split()) * 1.3
return (estimated_tokens / 1000) * pricing.get(model, 0.01)
class AnthropicProvider(AIProvider):
"""Anthropic provider implementation"""
def __init__(self, api_key: str):
self.api_key = api_key
self.client = anthropic.AsyncAnthropic(api_key=api_key)
async def generate_text(self, prompt: str, model: str, **kwargs) -> str:
try:
response = await self.client.messages.create(
model=model,
max_tokens=kwargs.get('max_tokens', 1000),
messages=[{"role": "user", "content": prompt}]
)
return response.content[0].text
except Exception as e:
raise AIProviderError(f"Anthropic error: {str(e)}")
async def get_models(self) -> list:
return ["claude-3-opus-20240229", "claude-3-sonnet-20240229", "claude-3-haiku-20240307"]
async def get_cost_estimate(self, prompt: str, model: str) -> float:
pricing = {
"claude-3-opus-20240229": 0.015,
"claude-3-sonnet-20240229": 0.003,
"claude-3-haiku-20240307": 0.00025
}
estimated_tokens = len(prompt.split()) * 1.3
return (estimated_tokens / 1000) * pricing.get(model, 0.003)
2. Unified Model Manager
The unified manager coordinates multiple providers and provides a single interface.
class UnifiedAIManager:
"""Unified AI model management system"""
def __init__(self):
self.providers: Dict[str, AIProvider] = {}
self.model_mapping: Dict[str, str] = {}
self.health_status: Dict[str, bool] = {}
self.usage_metrics: Dict[str, Dict] = {}
def register_provider(self, name: str, provider: AIProvider):
"""Register a new AI provider"""
self.providers[name] = provider
self.health_status[name] = True
self.usage_metrics[name] = {
'requests': 0,
'errors': 0,
'total_cost': 0.0,
'last_used': None
}
def map_model_to_provider(self, model_name: str, provider_name: str):
"""Map a model name to a specific provider"""
self.model_mapping[model_name] = provider_name
async def generate_text(self, prompt: str, model: str, **kwargs) -> str:
"""Generate text using the best available provider"""
# Determine which provider to use
provider_name = self.model_mapping.get(model, self._select_best_provider(model))
if not provider_name or provider_name not in self.providers:
raise ValueError(f"No provider available for model: {model}")
provider = self.providers[provider_name]
try:
# Attempt the request
result = await provider.generate_text(prompt, model, **kwargs)
# Update metrics
self._update_metrics(provider_name, success=True)
return result
except Exception as e:
# Update error metrics
self._update_metrics(provider_name, success=False)
# Try fallback provider
fallback_provider = self._get_fallback_provider(provider_name)
if fallback_provider:
return await fallback_provider.generate_text(prompt, model, **kwargs)
raise AIProviderError(f"All providers failed for model {model}: {str(e)}")
def _select_best_provider(self, model: str) -> Optional[str]:
"""Select the best provider based on health, cost, and availability"""
available_providers = [
name for name, is_healthy in self.health_status.items()
if is_healthy and name in self.providers
]
if not available_providers:
return None
# Simple selection: prefer providers with fewer errors
best_provider = min(
available_providers,
key=lambda p: self.usage_metrics[p]['errors']
)
return best_provider
def _get_fallback_provider(self, failed_provider: str) -> Optional[AIProvider]:
"""Get a fallback provider when the primary one fails"""
available_providers = [
(name, provider) for name, provider in self.providers.items()
if name != failed_provider and self.health_status[name]
]
if available_providers:
return available_providers[0][1]
return None
def _update_metrics(self, provider_name: str, success: bool):
"""Update usage metrics for a provider"""
if provider_name in self.usage_metrics:
self.usage_metrics[provider_name]['requests'] += 1
if not success:
self.usage_metrics[provider_name]['errors'] += 1
self.usage_metrics[provider_name]['last_used'] = time.time()
async def get_available_models(self) -> Dict[str, list]:
"""Get all available models from all providers"""
models = {}
for name, provider in self.providers.items():
try:
models[name] = await provider.get_models()
except Exception:
models[name] = []
return models
def get_health_status(self) -> Dict[str, bool]:
"""Get health status of all providers"""
return self.health_status.copy()
def get_usage_metrics(self) -> Dict[str, Dict]:
"""Get usage metrics for all providers"""
return self.usage_metrics.copy()
3. Configuration Management
Centralized configuration for managing multiple providers.
import yaml
from dataclasses import dataclass
from typing import Dict, Any
@dataclass
class ProviderConfig:
name: str
api_key: str
base_url: Optional[str] = None
timeout: int = 30
max_retries: int = 3
rate_limit: Optional[int] = None
class ConfigurationManager:
"""Manage configuration for multiple AI providers"""
def __init__(self, config_path: str = "ai_config.yaml"):
self.config_path = config_path
self.config = self._load_config()
def _load_config(self) -> Dict[str, Any]:
"""Load configuration from YAML file"""
try:
with open(self.config_path, 'r') as file:
return yaml.safe_load(file)
except FileNotFoundError:
return self._get_default_config()
def _get_default_config(self) -> Dict[str, Any]:
"""Get default configuration template"""
return {
'providers': {
'openai': {
'api_key': '${OPENAI_API_KEY}',
'base_url': 'https://api.openai.com/v1',
'timeout': 30,
'max_retries': 3
},
'anthropic': {
'api_key': '${ANTHROPIC_API_KEY}',
'base_url': 'https://api.anthropic.com',
'timeout': 30,
'max_retries': 3
}
},
'model_mapping': {
'gpt-4': 'openai',
'gpt-3.5-turbo': 'openai',
'claude-3-opus': 'anthropic',
'claude-3-sonnet': 'anthropic',
'claude-3-haiku': 'anthropic'
},
'fallback_strategy': 'round_robin',
'health_check_interval': 60,
'cost_tracking': True
}
def get_provider_config(self, provider_name: str) -> ProviderConfig:
"""Get configuration for a specific provider"""
if provider_name not in self.config['providers']:
raise ValueError(f"Provider {provider_name} not found in configuration")
provider_config = self.config['providers'][provider_name]
# Resolve environment variables
api_key = self._resolve_env_var(provider_config['api_key'])
return ProviderConfig(
name=provider_name,
api_key=api_key,
base_url=provider_config.get('base_url'),
timeout=provider_config.get('timeout', 30),
max_retries=provider_config.get('max_retries', 3),
rate_limit=provider_config.get('rate_limit')
)
def _resolve_env_var(self, value: str) -> str:
"""Resolve environment variable references"""
if value.startswith('${') and value.endswith('}'):
env_var = value[2:-1]
return os.getenv(env_var, '')
return value
def get_model_mapping(self) -> Dict[str, str]:
"""Get model to provider mapping"""
return self.config.get('model_mapping', {})
def save_config(self):
"""Save current configuration to file"""
with open(self.config_path, 'w') as file:
yaml.dump(self.config, file, default_flow_style=False)
Implementation Example
Hereβs a complete example of setting up and using the unified AI management system:
import asyncio
import os
from typing import Dict, Any
async def setup_unified_ai_manager():
"""Set up a complete unified AI management system"""
# Initialize configuration
config_manager = ConfigurationManager()
# Initialize the unified manager
ai_manager = UnifiedAIManager()
# Register providers
for provider_name in ['openai', 'anthropic']:
try:
provider_config = config_manager.get_provider_config(provider_name)
if provider_name == 'openai':
provider = OpenAIProvider(provider_config.api_key)
elif provider_name == 'anthropic':
provider = AnthropicProvider(provider_config.api_key)
ai_manager.register_provider(provider_name, provider)
print(f"β
Registered provider: {provider_name}")
except Exception as e:
print(f"β Failed to register {provider_name}: {e}")
# Set up model mapping
model_mapping = config_manager.get_model_mapping()
for model, provider in model_mapping.items():
ai_manager.map_model_to_provider(model, provider)
return ai_manager
async def example_usage():
"""Example of using the unified AI manager"""
# Set up the manager
ai_manager = await setup_unified_ai_manager()
# Example prompts
prompts = [
"Explain quantum computing in simple terms",
"Write a Python function to calculate fibonacci numbers",
"Summarize the benefits of unified AI management"
]
# Generate responses using different models
models = ['gpt-3.5-turbo', 'claude-3-haiku', 'gpt-4']
for i, prompt in enumerate(prompts):
model = models[i % len(models)]
try:
print(f"\nπ€ Using model: {model}")
print(f"π Prompt: {prompt[:50]}...")
response = await ai_manager.generate_text(prompt, model, max_tokens=150)
print(f"β
Response: {response[:100]}...")
except Exception as e:
print(f"β Error: {e}")
# Show metrics
print(f"\nπ Usage Metrics:")
metrics = ai_manager.get_usage_metrics()
for provider, data in metrics.items():
print(f" {provider}: {data['requests']} requests, {data['errors']} errors")
## Run the example
```python
if __name__ == "__main__":
asyncio.run(example_usage())
Configuration File Example
Create a ai_config.yaml
file for your configuration:
providers:
openai:
api_key: ${OPENAI_API_KEY}
base_url: https://api.openai.com/v1
timeout: 30
max_retries: 3
rate_limit: 3500 # requests per minute
anthropic:
api_key: ${ANTHROPIC_API_KEY}
base_url: https://api.anthropic.com
timeout: 30
max_retries: 3
rate_limit: 500
google:
api_key: ${GOOGLE_AI_API_KEY}
base_url: https://generativelanguage.googleapis.com
timeout: 30
max_retries: 3
model_mapping:
gpt-4: openai
gpt-3.5-turbo: openai
gpt-4-turbo: openai
claude-3-opus: anthropic
claude-3-sonnet: anthropic
claude-3-haiku: anthropic
gemini-pro: google
fallback_strategy: round_robin
health_check_interval: 60
cost_tracking: true
Best Practices
1. Provider Selection Strategy
class ProviderSelectionStrategy:
"""Different strategies for selecting providers"""
@staticmethod
def cost_based(providers: Dict[str, AIProvider], prompt: str, model: str) -> str:
"""Select provider based on cost"""
costs = {}
for name, provider in providers.items():
try:
costs[name] = provider.get_cost_estimate(prompt, model)
except:
costs[name] = float('inf')
return min(costs, key=costs.get)
@staticmethod
def performance_based(providers: Dict[str, AIProvider], metrics: Dict) -> str:
"""Select provider based on performance metrics"""
performance_scores = {}
for name in providers:
provider_metrics = metrics.get(name, {})
error_rate = provider_metrics.get('errors', 0) / max(provider_metrics.get('requests', 1), 1)
performance_scores[name] = 1 - error_rate
return max(performance_scores, key=performance_scores.get)
@staticmethod
def round_robin(providers: Dict[str, AIProvider], last_used: Dict[str, float]) -> str:
"""Select provider using round-robin strategy"""
if not last_used:
return list(providers.keys())[0]
# Find provider used least recently
return min(last_used, key=last_used.get)
2. Error Handling and Retry Logic
import asyncio
from functools import wraps
def retry_with_backoff(max_retries: int = 3, base_delay: float = 1.0):
"""Decorator for retry logic with exponential backoff"""
def decorator(func):
@wraps(func)
async def wrapper(*args, **kwargs):
last_exception = None
for attempt in range(max_retries):
try:
return await func(*args, **kwargs)
except Exception as e:
last_exception = e
if attempt < max_retries - 1:
delay = base_delay * (2 ** attempt)
await asyncio.sleep(delay)
raise last_exception
return wrapper
return decorator
class RobustAIManager(UnifiedAIManager):
"""Enhanced AI manager with robust error handling"""
@retry_with_backoff(max_retries=3)
async def generate_text(self, prompt: str, model: str, **kwargs) -> str:
"""Generate text with retry logic"""
return await super().generate_text(prompt, model, **kwargs)
Monitoring and Observability
import logging
from datetime import datetime
class AIConnectivityMonitor:
"""Monitor AI connectivity and performance"""
def __init__(self):
self.logger = logging.getLogger('ai_connectivity')
self.metrics = {
'requests': 0,
'errors': 0,
'latency': [],
'costs': []
}
def log_request(self, provider: str, model: str, latency: float, cost: float, success: bool):
"""Log a request for monitoring"""
self.metrics['requests'] += 1
self.metrics['latency'].append(latency)
self.metrics['costs'].append(cost)
if not success:
self.metrics['errors'] += 1
self.logger.info(
f"Request to {provider}/{model}: "
f"latency={latency:.2f}s, cost=${cost:.4f}, success={success}"
)
def get_health_report(self) -> Dict[str, Any]:
"""Generate health report"""
total_requests = self.metrics['requests']
error_rate = self.metrics['errors'] / max(total_requests, 1)
avg_latency = sum(self.metrics['latency']) / max(len(self.metrics['latency']), 1)
total_cost = sum(self.metrics['costs'])
return {
'total_requests': total_requests,
'error_rate': error_rate,
'avg_latency': avg_latency,
'total_cost': total_cost,
'timestamp': datetime.now().isoformat()
}
Conclusion
A unified AI model management system provides significant benefits:
- Simplified Development: Single interface for multiple providers
- Improved Reliability: Automatic fallbacks and error handling
- Cost Optimization: Intelligent provider selection based on cost and performance
- Better Monitoring: Centralized metrics and health monitoring
- Scalability: Easy to add new providers and models
:::tip Implementation Priority Start with a simple provider abstraction layer, then gradually add advanced features like load balancing and monitoring. :::
The key to success is starting with a solid foundation and gradually building more sophisticated features as your needs grow. Remember to monitor performance and costs to ensure your unified system is providing the expected benefits.