ChromaDB Integration
Overview
ChromaDB is an open-source vector database that integrates seamlessly with LlmTornado. It provides efficient storage and retrieval of embeddings for semantic search and RAG applications.
Quick Start
csharp
// Install packages:
// dotnet add package LlmTornado.VectorDatabases.ChromaDB
using LlmTornado;
using LlmTornado.VectorDatabases;
using ChromaDB;
// Initialize
TornadoApi api = new TornadoApi("your-api-key");
// Create ChromaDB client
ChromaClient chromaClient = new ChromaClient("http://localhost:8000");
// Create collection
Collection collection = await chromaClient.CreateCollection("my_documents");
// Add documents with embeddings
List<string> documents = new List<string>
{
"LlmTornado is a C# library",
"Vector databases store embeddings",
"RAG improves AI responses"
};
// Generate embeddings
List<float[]> embeddings = await api.Embeddings.CreateEmbedding(
documents,
ChatModel.OpenAi.Embedding.Ada002
);
// Store in ChromaDB
await collection.Add(
ids: new[] { "doc1", "doc2", "doc3" },
embeddings: embeddings,
documents: documents
);
// Query
string query = "What is LlmTornado?";
float[] queryEmbedding = await api.Embeddings.CreateEmbedding(
query,
ChatModel.OpenAi.Embedding.Ada002
);
QueryResponse results = await collection.Query(
queryEmbeddings: new[] { queryEmbedding },
nResults: 3
);Setup
Installation
bash
# Install ChromaDB package
dotnet add package LlmTornado.VectorDatabases.ChromaDB
# Run ChromaDB server (Docker)
docker run -p 8000:8000 chromadb/chromaConfiguration
csharp
// Local ChromaDB
ChromaClient client = new ChromaClient("http://localhost:8000");
// Remote ChromaDB with authentication
ChromaClient client = new ChromaClient(
"https://your-chroma-instance.com",
authToken: "your-token"
);Working with Collections
Create Collection
csharp
Collection collection = await chromaClient.CreateCollection(
name: "my_collection",
metadata: new Dictionary<string, object>
{
["description"] = "Product descriptions"
}
);Add Documents
csharp
await collection.Add(
ids: documentIds,
embeddings: embeddings,
documents: documents,
metadatas: metadata // Optional
);Query Collection
csharp
QueryResponse results = await collection.Query(
queryEmbeddings: new[] { queryEmbedding },
nResults: 10,
where: new Dictionary<string, object> // Optional filters
{
["category"] = "technology"
}
);Update and Delete
csharp
// Update
await collection.Update(
ids: new[] { "doc1" },
embeddings: new[] { newEmbedding },
documents: new[] { newDocument }
);
// Delete
await collection.Delete(ids: new[] { "doc1" });RAG Implementation
csharp
async Task<string> RAGQuery(string userQuery)
{
// 1. Embed query
float[] queryEmbedding = await api.Embeddings.CreateEmbedding(
userQuery,
ChatModel.OpenAi.Embedding.Ada002
);
// 2. Retrieve relevant documents
QueryResponse results = await collection.Query(
queryEmbeddings: new[] { queryEmbedding },
nResults: 3
);
// 3. Build context
string context = string.Join("\n", results.Documents[0]);
// 4. Generate response with context
Conversation chat = api.Chat.CreateConversation(ChatModel.OpenAi.Gpt4.O);
chat.AppendSystemMessage($"Use this context to answer: {context}");
chat.AppendUserInput(userQuery);
return await chat.GetResponse();
}Best Practices
- Use meaningful collection names
- Include metadata for filtering
- Batch operations when possible
- Implement proper error handling
- Monitor collection size and performance
- Clean up old/unused documents