A Modern Swift AI SDK that makes AI integration feel natural
Named after the spider-tank AI from Ghost in the Shell, Tachikoma provides an intelligent, adaptable interface for AI services with a completely modern Swift-native API.
import Tachikoma // Single unified module
// Simple text generation
let answer = try await generate("What is 2+2?", using: .openai(.gpt4o))
print(answer) // "4"
// With different models
let response1 = try await generate("Hello", using: .anthropic(.opus4))
let response2 = try await generate("Hello", using: .grok(.grok4))
let response3 = try await generate("Hello", using: .ollama(.llama33))
// Multi-turn conversations
let conversation = Conversation()
conversation.addUserMessage("Hello!")
let response = try await conversation.continue(using: .claude)
print(response) // Assistant's response
conversation.addUserMessage("Tell me about Swift")
let nextResponse = try await conversation.continue()
// Uses same model as previous call
COMPLETE: Full implementation of OpenAI's Realtime API for ultra-low-latency voice conversations!
The Realtime API enables natural, real-time voice conversations with GPT-4o, featuring:
- ~500ms latency - Near-instantaneous voice responses
- WebSocket streaming - Persistent bidirectional connection
- Native voice I/O - No separate TTS/STR needed
- Function calling - Voice-triggered tool execution
// Basic voice conversation
let conversation = try RealtimeConversation(configuration: config)
try await conversation.start(
model: .gpt4oRealtime,
voice: .nova,
instructions: "You are a helpful voice assistant"
)
// Manual turn control
try await conversation.startListening()
// User speaks...
try await conversation.stopListening()
// Handle responses
for await transcript in conversation.transcriptUpdates {
print("Assistant: \(transcript)")
}
// End conversation
await conversation.end()
// Full-featured configuration
let config = EnhancedSessionConfiguration(
model: "gpt-4o-realtime-preview",
voice: .nova,
instructions: "You are an expert assistant",
inputAudioFormat: .pcm16,
outputAudioFormat: .pcm16,
inputAudioTranscription: .whisper, // Transcribe user audio
turnDetection: RealtimeTurnDetection(
type: .serverVad,
threshold: 0.5,
silenceDurationMs: 200,
prefixPaddingMs: 300
),
modalities: .all, // Text and audio
temperature: 0.8,
maxResponseOutputTokens: 4096
)
// Production-ready settings
let settings = ConversationSettings(
autoReconnect: true,
maxReconnectAttempts: 3,
reconnectDelay: 2.0,
bufferWhileDisconnected: true, // Buffer audio during disconnection
enableEchoCancellation: true,
enableNoiseSuppression: true
)
// Create advanced conversation manager
let conversation = try AdvancedRealtimeConversation(
apiKey: apiKey,
configuration: config,
settings: settings
)
// Dynamic modality switching
try await conversation.updateModalities(.text) // Text-only mode
try await conversation.updateModalities(.audio) // Audio-only mode
try await conversation.updateModalities(.all) // Both modalities
// Update VAD settings on the fly
try await conversation.updateTurnDetection(
RealtimeTurnDetection.serverVAD
)
// Register tools for voice interaction
await conversation.registerTools([
createTool(
name: "get_weather",
description: "Get current weather for a location",
parameters: [
AgentToolParameterProperty(
name: "location",
type: .string,
description: "City and state/country"
)
]
) { args in
let location = try args.stringValue("location")
// Fetch weather data...
return .string("Sunny, 22°C in \(location)")
}
])
// Built-in tools available
await conversation.registerBuiltInTools()
// Includes: Weather, Time, Calculator, WebSearch, Translation
// Tools are automatically invoked during voice conversation
// User: "What's the weather in Tokyo?"
// Assistant calls get_weather("Tokyo") and responds with result
// Custom audio pipeline configuration
let pipeline = try AudioStreamPipeline()
pipeline.delegate = self
// Handle audio events
extension MyController: AudioStreamPipelineDelegate {
func audioStreamPipeline(_ pipeline: AudioStreamPipeline,
didCaptureAudio data: Data) {
// Process captured audio (24kHz PCM16)
}
func audioStreamPipeline(_ pipeline: AudioStreamPipeline,
didUpdateAudioLevel level: Float) {
// Update UI with audio levels
}
func audioStreamPipeline(_ pipeline: AudioStreamPipeline,
didDetectSpeechStart: Bool) {
// Handle speech detection
}
}
// Observable conversation for SwiftUI
@StateObject private var conversation = AdvancedRealtimeConversation(
apiKey: apiKey,
configuration: .voiceConversation()
)
// Pre-built SwiftUI view
RealtimeConversationView(
apiKey: apiKey,
configuration: .voiceConversation(),
onError: { error in
print("Conversation error: \(error)")
}
)
All 37 Realtime API events are fully supported:
// Client events (9 types)
case sessionUpdate(SessionUpdateEvent)
case inputAudioBufferAppend(InputAudioBufferAppendEvent)
case inputAudioBufferCommit
case inputAudioBufferClear
case conversationItemCreate(ConversationItemCreateEvent)
case conversationItemTruncate(ConversationItemTruncateEvent)
case conversationItemDelete(ConversationItemDeleteEvent)
case responseCreate(ResponseCreateEvent)
case responseCancel
// Server events (28 types)
case sessionCreated(SessionCreatedEvent)
case conversationItemCreated(ConversationItemCreatedEvent)
case inputAudioBufferSpeechStarted
case responseAudioDelta(ResponseAudioDeltaEvent)
case responseFunctionCallArgumentsDone(ResponseFunctionCallArgumentsDoneEvent)
case rateLimitsUpdated(RateLimitsUpdatedEvent)
case error(RealtimeErrorEvent)
// ... and more
- ✅ Phase 1: Core WebSocket infrastructure
- ✅ Phase 2: Event system (37 events)
- ✅ Phase 3: Audio pipeline (24kHz PCM16)
- ✅ Phase 4: Function calling
- ✅ Phase 5: Advanced features (VAD, modalities, auto-reconnect)
- ✅ Phase 6: Testing & documentation
See docs/openai-harmony.md for complete implementation details.
Comprehensive audio capabilities in a dedicated module:
import TachikomaAudio
// Transcribe audio files
let text = try await transcribe(contentsOf: audioURL)
// With specific model and language
let result = try await transcribe(
audioData,
using: .openai(.whisper1),
language: "en",
timestampGranularities: [.word, .segment]
)
// Generate speech from text
let audioData = try await generateSpeech(
"Hello world",
using: .openai(.tts1HD),
voice: .nova,
speed: 1.2
)
// Record audio from microphone
let recorder = AudioRecorder()
try await recorder.startRecording()
// ... user speaks ...
let recording = try await recorder.stopRecording()
let transcription = try await transcribe(recording)
- Transcription: OpenAI Whisper, Groq, Deepgram, ElevenLabs
- Speech Synthesis: OpenAI TTS, ElevenLabs
- Recording: Cross-platform AVFoundation support
Tachikoma provides a type-safe tool system that ensures compile-time safety and eliminates runtime errors:
// Type-safe tool using AgentToolProtocol
struct WeatherTool: AgentToolProtocol {
struct Input: AgentToolValue {
let location: String
let units: String
static var agentValueType: AgentValueType { .object }
func toJSON() throws -> Any {
["location": location, "units": units]
}
static func fromJSON(_ json: Any) throws -> Input {
guard let dict = json as? [String: Any],
let location = dict["location"] as? String,
let units = dict["units"] as? String else {
throw TachikomaError.invalidInput("Invalid weather input")
}
return Input(location: location, units: units)
}
}
struct Output: AgentToolValue {
let temperature: Double
let conditions: String
static var agentValueType: AgentValueType { .object }
func toJSON() throws -> Any {
["temperature": temperature, "conditions": conditions]
}
static func fromJSON(_ json: Any) throws -> Output {
guard let dict = json as? [String: Any],
let temperature = dict["temperature"] as? Double,
let conditions = dict["conditions"] as? String else {
throw TachikomaError.invalidInput("Invalid weather output")
}
return Output(temperature: temperature, conditions: conditions)
}
}
var name: String { "get_weather" }
var description: String { "Get current weather for a location" }
var schema: AgentToolSchema {
AgentToolSchema(
properties: [
"location": AgentPropertySchema(type: .string, description: "City name"),
"units": AgentPropertySchema(type: .string, description: "Temperature units", enumValues: ["celsius", "fahrenheit"])
],
required: ["location", "units"]
)
}
func execute(_ input: Input, context: ToolExecutionContext) async throws -> Output {
// Your weather API integration here
return Output(temperature: 22.5, conditions: "Sunny")
}
}
// Use with type-erased wrapper for dynamic scenarios
let weatherTool = WeatherTool()
let anyTool = AnyAgentTool(weatherTool)
// Or use the simpler AgentTool for backwards compatibility
let simpleTool = AgentTool(
name: "get_weather",
description: "Get current weather",
parameters: AgentToolParameters(
properties: [
"location": AgentToolParameterProperty(
name: "location",
type: .string,
description: "City name"
)
],
required: ["location"]
)
) { args in
let location = try args.stringValue("location")
return AnyAgentToolValue(string: "Sunny, 22°C in \(location)")
}
// Use tools with AI models
let result = try await generateText(
model: .claude,
messages: [.user("What's the weather in Tokyo?")],
tools: [simpleTool]
)
print(result.text)
The new type-safe tool system is built on the AgentToolValue
protocol:
// All standard types conform to AgentToolValue
let stringValue = AnyAgentToolValue(string: "hello")
let intValue = AnyAgentToolValue(int: 42)
let doubleValue = AnyAgentToolValue(double: 3.14)
let boolValue = AnyAgentToolValue(bool: true)
let nullValue = AnyAgentToolValue(null: ())
// Complex types are supported
let arrayValue = AnyAgentToolValue(array: [
AnyAgentToolValue(string: "item1"),
AnyAgentToolValue(int: 2)
])
let objectValue = AnyAgentToolValue(object: [
"name": AnyAgentToolValue(string: "Alice"),
"age": AnyAgentToolValue(int: 30)
])
// Easy conversion from JSON
let jsonValue = try AnyAgentToolValue.fromJSON(["key": "value", "count": 123])
// Type-safe access
if let str = jsonValue.objectValue?["key"]?.stringValue {
print(str) // "value"
}
Build stateful, autonomous AI agents with memory and tool access:
import TachikomaAgent
// Define your agent with context
class MyContext {
var database: Database
var api: APIClient
}
let context = MyContext()
let agent = Agent(
name: "DataAnalyst",
instructions: "You are a helpful data analyst. You can query databases and call APIs to answer questions.",
model: .openai(.gpt4o),
tools: [databaseTool, apiTool],
context: context
)
// Execute tasks
let response = try await agent.execute("What were our top products last month?")
print(response.text)
// Stream responses
let stream = try await agent.stream("Generate a detailed sales report")
for try await delta in stream {
print(delta.content ?? "", terminator: "")
}
// Conversation persists across calls
let followUp = try await agent.execute("Can you break that down by region?")
// Session management
let sessionManager = AgentSessionManager.shared
sessionManager.createSession(sessionId: "analyst-001", agent: agent)
// Save/load sessions
let session = try await sessionManager.loadSession(id: "analyst-001")
Compile-time safety with provider-specific enums and full autocomplete support:
// Provider-specific models with compile-time checking
.openai(.gpt4o, .gpt41, .o3, .o3Mini, .custom("ft:gpt-4o:org:abc"))
.anthropic(.opus4, .sonnet4, .haiku35, .opus4Thinking)
.grok(.grok4, .grok40709, .grok2Vision1212)
.ollama(.llama33, .llama32, .llava, .codellama)
.google(.gemini2Flash, .gemini15Pro)
.mistral(.large2, .nemo)
.groq(.llama3170b, .mixtral8x7b)
// Third-party aggregators
.openRouter(modelId: "anthropic/claude-3.5-sonnet")
.together(modelId: "meta-llama/Llama-2-70b-chat-hf")
.replicate(modelId: "meta/llama-2-70b-chat")
// Custom endpoints
.openaiCompatible(modelId: "gpt-4", baseURL: "https://api.azure.com")
.anthropicCompatible(modelId: "claude-3-opus", baseURL: "https://api.custom.ai")
Simple, async-first API following modern Swift patterns:
// Generate text from a prompt
func generate(
_ prompt: String,
using model: LanguageModel = .default,
system: String? = nil,
maxTokens: Int? = nil,
temperature: Double? = nil
) async throws -> String
// Stream responses in real-time
func stream(
_ prompt: String,
using model: LanguageModel = .default,
system: String? = nil,
maxTokens: Int? = nil,
temperature: Double? = nil
) async throws -> AsyncThrowingStream<TextStreamDelta, Error>
// Analyze images with vision models
func analyze(
image: ImageInput,
prompt: String,
using model: Model? = nil
) async throws -> String
// Advanced generation with full control
func generateText(
model: LanguageModel,
messages: [ModelMessage],
tools: [AgentTool]? = nil,
settings: GenerationSettings = .default,
maxSteps: Int = 1
) async throws -> GenerateTextResult
// Generate embeddings (NEW)
func generateEmbedding(
model: EmbeddingModel,
input: EmbeddingInput,
settings: EmbeddingSettings = .default
) async throws -> EmbeddingResult
Enhanced capabilities inspired by OpenAI Harmony patterns:
- 🎭 Multi-Channel Responses - Separate thinking, analysis, and final answers
- 🧠 Reasoning Effort Levels - Control depth of reasoning (low/medium/high)
- 🔄 Automatic Retry Handler - Exponential backoff with smart rate limit handling
- 🔧 Enhanced Tool System - Namespace and recipient support for tool routing
- 📊 Embeddings API - Unified interface for OpenAI, Cohere, Voyage embeddings
- 💾 Response Caching - Intelligent caching to reduce API calls
// Multi-channel responses
let result = try await generateText(
model: .openai(.o3),
messages: messages,
settings: GenerationSettings(reasoningEffort: .high)
)
for message in result.messages {
switch message.channel {
case .thinking: print("[Reasoning] \(message.content)")
case .final: print("[Answer] \(message.content)")
default: break
}
}
// Automatic retry with exponential backoff
let retryHandler = RetryHandler(policy: .aggressive)
let response = try await retryHandler.execute {
try await generateText(model: .openai(.gpt4o), messages: messages)
}
// Enhanced tools with namespaces
let tool = AgentTool(
name: "search",
description: "Search the web",
parameters: params,
namespace: "web",
recipient: "search-engine",
execute: { /* ... */ }
)
- 🎙️ WebSocket Streaming - Persistent connection for low-latency (~500ms) voice conversations
- 🔊 Bidirectional Audio - Real-time audio input and output with PCM16 format
- 🎯 Server VAD - Automatic Voice Activity Detection for natural turn-taking
- 🔄 Dynamic Modalities - Switch between text/audio/both during conversation
- 🛠️ Voice-Triggered Tools - Execute functions via voice commands
- 🔁 Auto-Reconnect - Automatic reconnection with exponential backoff
- 💾 Audio Buffering - Buffer audio during disconnection for seamless recovery
- 📱 SwiftUI Ready - Observable objects with @Published properties
See docs/openai-harmony.md for complete Realtime API documentation.
Fluent conversation API with SwiftUI integration:
@available(macOS 13.0, iOS 16.0, watchOS 9.0, tvOS 16.0, *)
public final class Conversation: ObservableObject {
@Published public private(set) var messages: [ConversationMessage] = []
public func addUserMessage(_ content: String)
public func addAssistantMessage(_ content: String)
public func continue(using model: Model? = nil, tools: (any ToolKit)? = nil) async throws -> String
public func clear()
}
// Usage in SwiftUI
struct ChatView: View {
@StateObject private var conversation = Conversation()
var body: some View {
// Your chat UI here
}
}
Configure provider-specific settings while keeping universal parameters separate:
// GPT-5 with verbosity control (enables preamble messages)
let result = try await generateText(
model: .openai(.gpt5),
messages: messages,
settings: GenerationSettings(
maxTokens: 1000,
providerOptions: .init(
openai: .init(
verbosity: .high // Shows GPT-5's preamble messages
)
)
)
)
// O3/O4 reasoning models with effort levels
let reasoning = try await generateText(
model: .openai(.o3),
messages: messages,
settings: GenerationSettings(
providerOptions: .init(
openai: .init(
reasoningEffort: .high, // Control reasoning depth
parallelToolCalls: false
)
)
)
)
// Claude with thinking mode
let claude = try await generateText(
model: .anthropic(.opus4),
messages: messages,
settings: GenerationSettings(
temperature: 0.7,
providerOptions: .init(
anthropic: .init(
thinking: .enabled(budgetTokens: 5000),
cacheControl: .persistent
)
)
)
)
// Gemini with thinking configuration
let gemini = try await generateText(
model: .google(.gemini25),
messages: messages,
settings: GenerationSettings(
topK: 40, // Google supports topK
providerOptions: .init(
google: .init(
thinkingConfig: .init(
budgetTokens: 3000,
includeThoughts: true
),
safetySettings: .moderate
)
)
)
)
OpenAI Options:
verbosity
: Control output detail (low/medium/high) - GPT-5 onlyreasoningEffort
: Reasoning depth (minimal/low/medium/high) - O3/O4 onlyparallelToolCalls
: Enable parallel tool executionresponseFormat
: JSON mode (.json) or schema validationpreviousResponseId
: Chain responses for conversation persistencelogprobs
: Include log probabilities in response
Anthropic Options:
thinking
: Enable reasoning mode with token budgetcacheControl
: Conversation caching (ephemeral/persistent)
Google Options:
thinkingConfig
: Control reasoning with token budgetsafetySettings
: Content filtering (strict/moderate/relaxed)
Mistral Options:
safeMode
: Enable safe content generation
Groq Options:
speed
: Inference speed (normal/fast/ultraFast)
Grok Options:
funMode
: Enable creative responsesincludeCurrentEvents
: Access to current events
Type-safe function calling with structured tool definitions:
// Tool parameters use strongly-typed enums (no strings)
public enum ParameterType: String, Sendable, Codable {
case string, number, integer, boolean, array, object, null
}
// Define tools using the protocol
struct MathToolKit: ToolKit {
var tools: [Tool<MathToolKit>] {
[
createTool(
name: "calculate",
description: "Perform mathematical calculations"
) { input, context in
let expression = try input.stringValue("expression")
return try context.calculate(expression)
}
]
}
func calculate(_ expression: String) throws -> String {
// Your calculation logic
let expr = NSExpression(format: expression)
let result = expr.expressionValue(with: nil, context: nil) as! NSNumber
return "\(result.doubleValue)"
}
}
// Use with AI models
let toolkit = MathToolKit()
let result = try await generateText(
model: .claude,
messages: [.user("What is 15 * 23?")],
tools: toolkit.tools.map { $0.toAgentTool() }
)
- Tool Type Extraction: All tool-related types have been centralized in
Core/ToolTypes.swift
to eliminate circular dependencies and improve build times - Type-Safe Parameters: Removed string-based parameter types in favor of strongly-typed
ParameterType
enum - Module Isolation: TachikomaMCP can now use tool types without depending on agent functionality
- Build Time Optimization: Estimated 40-50% improvement in incremental build times through better modularization
Tachikoma Swift AI SDK
|
┌─────────────────────────────────────────────────────────────────┐
│ Public API Layer │
│ generate() • stream() • analyze() • Conversation • ToolKit │
└─────────────────────────────────────────────────────────────────┘
|
┌─────────────────────────────────────────────────────────────────┐
│ Model Selection System │
│ LanguageModel.openai(.gpt4o) • .anthropic(.opus4) │
│ .grok(.grok4) • .ollama(.llama33) • .google(.gemini2Flash) │
└─────────────────────────────────────────────────────────────────┘
|
┌─────────────────────────────────────────────────────────────────┐
│ Provider Implementations │
│ OpenAI • Anthropic • Grok (xAI) • Ollama • Google • Mistral │
│ Groq • OpenRouter • Together • Replicate • Custom Endpoints │
└─────────────────────────────────────────────────────────────────┘
Tachikoma provides multiple modules for different functionality:
Tachikoma
- Core AI SDK with generation, models, tools, and conversations- All tool types are now in
Core/ToolTypes.swift
for better modularization - Type-safe
ParameterType
enum for tool parameters (no string-based types) - Unified tool system used by all other modules
- All tool types are now in
TachikomaAgent
- Agent system module for building autonomous AI agentsAgent<Context>
class for stateful AI agents with conversation memory- Session management and persistence
- Tool integration with context passing
- Built on top of core Tachikoma functionality
TachikomaMCP
- Model Context Protocol (MCP) client and tool adapters- Uses shared tool types from core module
- Provides dynamic tool discovery and execution
- Bridges MCP tools to Tachikoma's
AgentTool
format
TachikomaAudio
- Comprehensive audio processing module:- Audio transcription (OpenAI Whisper, Groq, Deepgram)
- Speech synthesis (OpenAI TTS, ElevenLabs)
- Audio recording with AVFoundation
- Batch processing and convenience functions
The LanguageModel
enum provides compile-time safety and autocomplete for all supported AI providers:
public enum LanguageModel: Sendable, CustomStringConvertible {
// Major providers with sub-enums
case openai(OpenAI) // .gpt4o, .gpt41, .o3, .o3Mini, .o4Mini
case anthropic(Anthropic) // .opus4, .sonnet4, .haiku35, .opus4Thinking
case grok(Grok) // .grok4, .grok40709, .grok2Vision1212
case ollama(Ollama) // .llama33, .llama32, .llava, .codellama
case google(Google) // .gemini2Flash, .gemini15Pro
case mistral(Mistral) // .large2, .nemo, .codestral
case groq(Groq) // .llama3170b, .mixtral8x7b
// Third-party aggregators
case openRouter(modelId: String)
case together(modelId: String)
case replicate(modelId: String)
// Custom endpoints
case openaiCompatible(modelId: String, baseURL: String)
case anthropicCompatible(modelId: String, baseURL: String)
case custom(provider: any ModelProvider)
public static let `default`: LanguageModel = .anthropic(.opus4)
public static let claude: LanguageModel = .anthropic(.opus4)
public static let gpt4o: LanguageModel = .openai(.gpt4o)
public static let llama: LanguageModel = .ollama(.llama33)
}
Each model includes capabilities metadata:
// Automatic capability detection
let model = LanguageModel.openai(.gpt4o)
print(model.supportsVision) // true
print(model.supportsTools) // true
print(model.supportsAudioInput) // true
print(model.contextLength) // 128,000
Core async functions following Vercel AI SDK patterns:
// Simple convenience functions
public func generate(_ prompt: String, using model: LanguageModel = .default) async throws -> String
public func stream(_ prompt: String, using model: LanguageModel = .default) async throws -> AsyncThrowingStream<TextStreamDelta, Error>
public func analyze(image: ImageInput, prompt: String, using model: Model? = nil) async throws -> String
// Advanced functions with full control
public func generateText(
model: LanguageModel,
messages: [ModelMessage],
tools: [AgentTool]? = nil,
settings: GenerationSettings = .default,
maxSteps: Int = 1
) async throws -> GenerateTextResult
public func streamText(
model: LanguageModel,
messages: [ModelMessage],
tools: [AgentTool]? = nil,
settings: GenerationSettings = .default,
maxSteps: Int = 1
) async throws -> StreamTextResult
public func generateObject<T: Codable & Sendable>(
model: LanguageModel,
messages: [ModelMessage],
schema: T.Type,
settings: GenerationSettings = .default
) async throws -> GenerateObjectResult<T>
Type-safe function calling with protocol-based tool definitions:
// Protocol for type-safe tools
public protocol AgentToolProtocol: Sendable {
associatedtype Input: AgentToolValue
associatedtype Output: AgentToolValue
var name: String { get }
var description: String { get }
var schema: AgentToolSchema { get }
func execute(_ input: Input, context: ToolExecutionContext) async throws -> Output
}
// Protocol for tool values with JSON conversion
public protocol AgentToolValue: Sendable, Codable {
static var agentValueType: AgentValueType { get }
func toJSON() throws -> Any
static func fromJSON(_ json: Any) throws -> Self
}
// Type-erased wrapper for dynamic scenarios
public struct AnyAgentToolValue: AgentToolValue {
// Wraps any AgentToolValue for runtime flexibility
}
// Backwards-compatible tool definition
public struct AgentTool: Sendable {
public let name: String
public let description: String
public let parameters: AgentToolParameters
public func execute(_ arguments: AgentToolArguments,
context: ToolExecutionContext? = nil) async throws -> AnyAgentToolValue
}
// Protocol for tool collections
public protocol ToolKit: Sendable {
associatedtype Context = Self
var tools: [Tool<Context>] { get }
}
Multi-turn conversations with SwiftUI support:
@available(macOS 13.0, iOS 16.0, watchOS 9.0, tvOS 16.0, *)
public final class Conversation: ObservableObject, @unchecked Sendable {
@Published public private(set) var messages: [ConversationMessage] = []
public func addUserMessage(_ content: String)
public func addAssistantMessage(_ content: String)
public func continue(using model: Model? = nil, tools: (any ToolKit)? = nil) async throws -> String
public func clear()
}
Extensible provider architecture supporting multiple AI services:
// Provider protocol for extensibility
public protocol ModelProvider: Sendable {
var modelId: String { get }
var baseURL: String? { get }
var apiKey: String? { get }
var capabilities: ModelCapabilities { get }
func generateText(request: ProviderRequest) async throws -> ProviderResponse
func streamText(request: ProviderRequest) async throws -> AsyncThrowingStream<TextStreamDelta, Error>
}
// Factory for creating providers
public struct ProviderFactory {
public static func createProvider(for model: LanguageModel) throws -> any ModelProvider
}
Component | File Location | Lines | Purpose |
---|---|---|---|
Model System | Sources/Tachikoma/Model.swift |
1-875 | Type-safe model enums, capabilities, provider selection |
Generation | Sources/Tachikoma/Generation.swift |
18-569 | Core async generation functions, streaming, image analysis |
Tool System | Sources/Tachikoma/Tool.swift |
- | Tool protocol, execution, input/output handling |
ToolKit Builders | Sources/Tachikoma/ToolKit.swift |
1-285 | ToolKit protocol, result builders, example implementations |
Conversation | Sources/Tachikoma/Conversation.swift |
- | Multi-turn conversation management, SwiftUI integration |
Provider Factory | Sources/Tachikoma/ProviderFactory.swift |
- | Provider instantiation, capability mapping |
Usage Tracking | Sources/Tachikoma/UsageTracking.swift |
- | Token usage, cost tracking, session management |
Model Selection | Sources/Tachikoma/ModelSelection.swift |
- | Command-line model parsing, string matching |
Add Tachikoma to your project using Swift Package Manager:
// Package.swift
dependencies: [
.package(url: "https://github.com/steipete/Tachikoma.git", from: "1.0.0")
],
targets: [
.target(
name: "YourTarget",
dependencies: [
// Core AI functionality
.product(name: "Tachikoma", package: "Tachikoma"),
// Agent system (optional)
.product(name: "TachikomaAgent", package: "Tachikoma"),
// Audio processing (optional)
.product(name: "TachikomaAudio", package: "Tachikoma"),
// MCP support (optional)
.product(name: "TachikomaMCP", package: "Tachikoma"),
]
)
]
Set up API keys for the providers you want to use:
# OpenAI
export OPENAI_API_KEY="sk-..."
# Anthropic
export ANTHROPIC_API_KEY="sk-ant-..."
# Grok (xAI)
export X_AI_API_KEY="xai-..."
# or
export XAI_API_KEY="xai-..."
# Groq
export GROQ_API_KEY="gsk_..."
# Mistral
export MISTRAL_API_KEY="..."
# Google AI
export GOOGLE_API_KEY="AIza..."
# Ollama (runs locally)
export OLLAMA_API_KEY="optional-token" # Usually not needed
# Custom base URLs (optional)
export OPENAI_BASE_URL="https://api.custom.com/v1"
export ANTHROPIC_BASE_URL="https://api.custom.com"
export OLLAMA_BASE_URL="http://localhost:11434"
Tachikoma automatically loads API keys from environment variables when the SDK initializes. The configuration system uses a hierarchical priority:
- Explicitly configured keys (via
configuration.setAPIKey()
) - Environment variables (loaded automatically on startup)
- Credentials file (
~/.tachikoma/credentials
)
import Tachikoma
// Keys are loaded automatically from environment variables
// No manual configuration needed if environment variables are set
// Check what's available
let config = TachikomaConfiguration() // Loads from environment by default
// These will return environment keys if available
print("OpenAI available: \(config.hasAPIKey(for: .openai))")
print("Anthropic available: \(config.hasAPIKey(for: .anthropic))")
print("Grok available: \(config.hasAPIKey(for: .grok))")
// Check specifically for environment vs configured keys
print("OpenAI from env: \(config.hasEnvironmentAPIKey(for: .openai))")
print("OpenAI configured: \(config.hasConfiguredAPIKey(for: .openai))")
The SDK now uses a type-safe Provider
enum instead of strings:
// ✅ Type-safe provider API
let config = TachikomaConfiguration()
config.setAPIKey("sk-...", for: .openai)
config.setAPIKey("sk-ant-...", for: .anthropic)
config.setAPIKey("xai-...", for: .grok)
// ✅ All standard providers supported
let providers: [Provider] = [
.openai, // OPENAI_API_KEY
.anthropic, // ANTHROPIC_API_KEY
.grok, // X_AI_API_KEY or XAI_API_KEY
.groq, // GROQ_API_KEY
.mistral, // MISTRAL_API_KEY
.google, // GOOGLE_API_KEY
.ollama, // OLLAMA_API_KEY (optional)
.custom("my-provider") // Custom provider ID
]
Some providers support multiple environment variable names:
// Grok supports both naming conventions
export X_AI_API_KEY="xai-..." # Primary
export XAI_API_KEY="xai-..." # Alternative
// The SDK automatically checks both
let provider = Provider.grok
print(provider.environmentVariable) // "X_AI_API_KEY"
print(provider.alternativeEnvironmentVariables) // ["XAI_API_KEY"]
import Tachikoma
// Simple generation with default model (Claude Opus 4)
let response = try await generate("Explain Swift async/await")
// With specific model
let gptResponse = try await generate(
"Write a haiku about programming",
using: .openai(.gpt4o)
)
// With system prompt and parameters
let response = try await generate(
"Tell me a joke",
using: .anthropic(.sonnet4),
system: "You are a friendly comedian",
maxTokens: 100,
temperature: 0.9
)
let stream = try await stream("Write a long story", using: .claude)
for try await delta in stream {
switch delta.type {
case .textDelta:
if let content = delta.content {
print(content, terminator: "")
}
case .done:
print("\n[Stream complete]")
break
default:
break
}
}
// Analyze image from file path
let analysis = try await analyze(
image: .filePath("/path/to/image.png"),
prompt: "What do you see in this image?",
using: .openai(.gpt4o) // Vision-capable model required
)
// Analyze image from base64 data
let analysis = try await analyze(
image: .base64(base64String),
prompt: "Describe the contents of this screenshot"
)
let conversation = Conversation()
// Add messages
conversation.addUserMessage("Hello! I'm learning Swift.")
let response1 = try await conversation.continue(using: .claude)
print(response1)
conversation.addUserMessage("Can you explain optionals?")
let response2 = try await conversation.continue() // Uses same model
print(response2)
// In SwiftUI
struct ChatView: View {
@StateObject private var conversation = Conversation()
@State private var inputText = ""
var body: some View {
VStack {
ScrollView {
ForEach(conversation.messages) { message in
MessageView(message: message)
}
}
HStack {
TextField("Type a message...", text: $inputText)
Button("Send") {
Task {
conversation.addUserMessage(inputText)
inputText = ""
let response = try await conversation.continue()
// Response automatically added to conversation
}
}
}
}
}
}
// Define a custom tool kit
struct CalculatorToolKit: ToolKit {
var tools: [Tool<CalculatorToolKit>] {
[
createTool(
name: "calculate",
description: "Perform mathematical calculations"
) { input, context in
let expression = try input.stringValue("expression")
return try context.evaluate(expression: expression)
},
createTool(
name: "convert_currency",
description: "Convert between currencies"
) { input, context in
let amount = try input.doubleValue("amount")
let from = try input.stringValue("from_currency")
let to = try input.stringValue("to_currency")
return try await context.convertCurrency(amount: amount, from: from, to: to)
}
]
}
func evaluate(expression: String) throws -> String {
let expr = NSExpression(format: expression)
let result = expr.expressionValue(with: nil, context: nil) as! NSNumber
return "Result: \(result.doubleValue)"
}
func convertCurrency(amount: Double, from: String, to: String) async throws -> String {
// Your currency conversion logic here
return "\(amount) \(from) = \(amount * 1.1) \(to) (example rate)"
}
}
// Use tools with AI
let calculator = CalculatorToolKit()
let result = try await generateText(
model: .claude,
messages: [.user("What is 15 * 23 + 100? Also convert 50 USD to EUR.")],
tools: calculator.tools.map { $0.toAgentTool() }
)
print(result.text) // AI will use the tools to calculate and convert
# Build the package
swift build
# Run tests
swift test
# Build in release mode
swift build -c release
Control when generation should stop with flexible stop conditions:
// Stop on specific strings
let response = try await generateText(
model: .openai(.gpt4o),
messages: [.user("Count to 10 then say END")],
settings: GenerationSettings(
stopConditions: StringStopCondition("END")
)
)
// Response will stop at "END"
// Stop after token limit
let limited = try await generateText(
model: .claude,
messages: [.user("Write a long story")],
settings: GenerationSettings(
stopConditions: TokenCountStopCondition(maxTokens: 100)
)
)
// Stop on regex patterns
let patternStop = try await generateText(
model: .openai(.gpt4o),
messages: [.user("Generate code")],
settings: GenerationSettings(
stopConditions: RegexStopCondition(pattern: "// END OF CODE")
)
)
// Combine multiple conditions
let settings = GenerationSettings.withStopConditions(
StringStopCondition("STOP", caseSensitive: false),
TokenCountStopCondition(maxTokens: 500),
TimeoutStopCondition(timeout: 30.0),
maxTokens: 1000,
temperature: 0.7
)
// With streaming
let stream = try await streamText(
model: .claude,
messages: messages,
settings: GenerationSettings(
stopConditions: StringStopCondition("END")
)
)
// Or apply stop conditions to existing streams
let stoppedStream = stream
.stopOnString("STOP")
.stopAfterTokens(500)
.stopAfterTime(30.0)
// Custom stop conditions
let customStop = PredicateStopCondition { text, delta in
// Stop when text contains both keywords
text.contains("COMPLETE") && text.contains("DONE")
}
// Builder pattern for complex conditions
let complexCondition = StopConditionBuilder()
.whenContains("END")
.whenMatches("\\[DONE\\]")
.afterTokens(1000)
.afterTime(60.0)
.build()
Stop conditions work with both generateText
and streamText
, and are automatically passed to providers that support native stop sequences (like OpenAI's stop
parameter).
Tachikoma automatically tracks token usage and costs across all operations:
// Usage is tracked automatically during generation
let result = try await generateText(
model: .openai(.gpt4o),
messages: [.user("Long prompt here...")]
)
if let usage = result.usage {
print("Input tokens: \(usage.inputTokens)")
print("Output tokens: \(usage.outputTokens)")
if let cost = usage.cost {
print("Estimated cost: $\(cost)")
}
}
// Access global usage statistics
let tracker = UsageTracker.shared
let todayUsage = tracker.getUsage(for: Date())
print("Today's total tokens: \(todayUsage.totalTokens)")
Handle complex workflows with multiple tool calls:
let result = try await generateText(
model: .claude,
messages: [.user("Research Swift 6 features, then write documentation")],
tools: researchTools.tools.map { $0.toAgentTool() },
maxSteps: 5 // Allow multiple rounds of tool calling
)
// Access all execution steps
for (index, step) in result.steps.enumerated() {
print("Step \(index + 1):")
print(" Text: \(step.text)")
print(" Tool calls: \(step.toolCalls.count)")
print(" Tool results: \(step.toolResults.count)")
}
Generate type-safe structured data:
struct PersonInfo: Codable {
let name: String
let age: Int
let occupation: String
let skills: [String]
}
let result = try await generateObject(
model: .claude,
messages: [.user("Create a person profile for a software engineer")],
schema: PersonInfo.self
)
let person = result.object
print("Name: \(person.name)")
print("Skills: \(person.skills.joined(separator: ", "))")
Extend Tachikoma with custom AI providers:
struct CustomProvider: ModelProvider {
let modelId = "custom-model-v1"
let baseURL: String? = "https://api.custom.ai"
let apiKey: String? = ProcessInfo.processInfo.environment["CUSTOM_API_KEY"]
let capabilities = ModelCapabilities(
supportsVision: true,
supportsTools: true,
contextLength: 200_000
)
func generateText(request: ProviderRequest) async throws -> ProviderResponse {
// Your custom implementation
}
func streamText(request: ProviderRequest) async throws -> AsyncThrowingStream<TextStreamDelta, Error> {
// Your custom streaming implementation
}
}
// Use custom provider
let customModel = LanguageModel.custom(provider: CustomProvider())
let response = try await generate("Hello", using: customModel)
See the following files for complete working examples:
Tests/TachikomaTests/MinimalModernAPITests.swift
- Model construction, tool creation, conversation managementSources/Tachikoma/ToolKit.swift
- WeatherToolKit and MathToolKit implementations (lines 125-224)Examples/TachikomaExamples.swift
- Advanced usage patterns and real-world scenariosExamples/DemoScript.swift
- Interactive demo script
- Architecture Guide - Deep dive into system design and components
- Modern API Design - Implementation plan and design decisions
- API Reference - Complete API documentation in code
- Migration Guide - Moving from other AI SDKs
- Provider Guide - Setting up different AI providers
- Tool Development - Creating custom tools and toolkits
- Swift 6.0+ with strict concurrency enabled
- Platform Support:
- macOS 13.0+ (Ventura)
- iOS 16.0+
- watchOS 9.0+
- tvOS 16.0+
- Linux (Ubuntu 20.04+)
- Windows 10+
- Concurrency: Full
@Sendable
compliance throughout - Dependencies:
- swift-log for logging
Tachikoma includes comprehensive test coverage using Swift Testing framework (Xcode 16+).
# Run all tests
swift test
# Run specific test suite
swift test --filter "ProviderOptionsTests"
# Run multiple test suites
swift test --filter "ProviderOptionsTests|ModelCapabilitiesTests"
# Run with verbose output
swift test --verbose
Tests are organized by feature area:
- Core Tests: Model capabilities, provider options, configuration
- Provider Tests: Individual provider implementations
- Tool Tests: Tool system and agent functionality
- Audio Tests: Audio processing and realtime features
- Integration Tests: End-to-end workflows
Tests for provider-specific configuration options:
@Test("OpenAI options with verbosity")
func testOpenAIVerbosity() {
let options = OpenAIOptions(
verbosity: .high,
reasoningEffort: .medium
)
#expect(options.verbosity == .high)
}
Tests for model-specific parameter validation:
@Test("GPT-5 excludes temperature")
func testGPT5Capabilities() {
let caps = ModelCapabilityRegistry.shared
.capabilities(for: .openai(.gpt5))
#expect(!caps.supportsTemperature)
#expect(caps.supportsVerbosity)
}
Tests for automatic parameter filtering:
@Test("Validate settings for model")
func testSettingsValidation() {
let settings = GenerationSettings(
temperature: 0.7, // Will be removed for GPT-5
providerOptions: .init(
openai: .init(verbosity: .high)
)
)
let validated = settings.validated(for: .openai(.gpt5))
#expect(validated.temperature == nil)
#expect(validated.providerOptions.openai?.verbosity == .high)
}
Use Swift Testing's modern syntax:
import Testing
@testable import Tachikoma
@Suite("My Feature Tests")
struct MyFeatureTests {
@Test("Test specific behavior")
func testBehavior() async throws {
// Arrange
let model = LanguageModel.openai(.gpt4o)
// Act
let result = try await generateText(
model: model,
messages: [.user("Hello")]
)
// Assert
#expect(!result.text.isEmpty)
}
}
- ✅ Provider Options: All provider-specific options
- ✅ Model Capabilities: Parameter support per model
- ✅ Settings Validation: Automatic filtering and validation
- ✅ Thread Safety: Concurrent access patterns
- ✅ Codable Conformance: Serialization/deserialization
- ✅ Tool System: Tool registration and execution
- ✅ Streaming: Async stream handling
- ✅ Error Handling: Proper error propagation
We welcome contributions! Please see our Contributing Guide for details.
git clone https://github.com/steipete/Tachikoma.git
cd Tachikoma
# Build and test
swift build
swift test
# Generate documentation
swift package generate-documentation
Tachikoma is available under the MIT License. See LICENSE for details.
Built with ❤️ for the Swift AI community
Intelligent • Adaptable • Reliable