Skip to content

ChromaDB Integration

Overview

ChromaDB is an open-source vector database that integrates seamlessly with LlmTornado. It provides efficient storage and retrieval of embeddings for semantic search and RAG applications.

Quick Start

csharp
// Install packages:
// dotnet add package LlmTornado.VectorDatabases.ChromaDB

using LlmTornado;
using LlmTornado.VectorDatabases;
using ChromaDB;

// Initialize
TornadoApi api = new TornadoApi("your-api-key");

// Create ChromaDB client
ChromaClient chromaClient = new ChromaClient("http://localhost:8000");

// Create collection
Collection collection = await chromaClient.CreateCollection("my_documents");

// Add documents with embeddings
List<string> documents = new List<string> 
{ 
    "LlmTornado is a C# library",
    "Vector databases store embeddings",
    "RAG improves AI responses"
};

// Generate embeddings
List<float[]> embeddings = await api.Embeddings.CreateEmbedding(
    documents,
    ChatModel.OpenAi.Embedding.Ada002
);

// Store in ChromaDB
await collection.Add(
    ids: new[] { "doc1", "doc2", "doc3" },
    embeddings: embeddings,
    documents: documents
);

// Query
string query = "What is LlmTornado?";
float[] queryEmbedding = await api.Embeddings.CreateEmbedding(
    query,
    ChatModel.OpenAi.Embedding.Ada002
);

QueryResponse results = await collection.Query(
    queryEmbeddings: new[] { queryEmbedding },
    nResults: 3
);

Setup

Installation

bash
# Install ChromaDB package
dotnet add package LlmTornado.VectorDatabases.ChromaDB

# Run ChromaDB server (Docker)
docker run -p 8000:8000 chromadb/chroma

Configuration

csharp
// Local ChromaDB
ChromaClient client = new ChromaClient("http://localhost:8000");

// Remote ChromaDB with authentication
ChromaClient client = new ChromaClient(
    "https://your-chroma-instance.com",
    authToken: "your-token"
);

Working with Collections

Create Collection

csharp
Collection collection = await chromaClient.CreateCollection(
    name: "my_collection",
    metadata: new Dictionary<string, object>
    {
        ["description"] = "Product descriptions"
    }
);

Add Documents

csharp
await collection.Add(
    ids: documentIds,
    embeddings: embeddings,
    documents: documents,
    metadatas: metadata  // Optional
);

Query Collection

csharp
QueryResponse results = await collection.Query(
    queryEmbeddings: new[] { queryEmbedding },
    nResults: 10,
    where: new Dictionary<string, object>  // Optional filters
    {
        ["category"] = "technology"
    }
);

Update and Delete

csharp
// Update
await collection.Update(
    ids: new[] { "doc1" },
    embeddings: new[] { newEmbedding },
    documents: new[] { newDocument }
);

// Delete
await collection.Delete(ids: new[] { "doc1" });

RAG Implementation

csharp
async Task<string> RAGQuery(string userQuery)
{
    // 1. Embed query
    float[] queryEmbedding = await api.Embeddings.CreateEmbedding(
        userQuery,
        ChatModel.OpenAi.Embedding.Ada002
    );
    
    // 2. Retrieve relevant documents
    QueryResponse results = await collection.Query(
        queryEmbeddings: new[] { queryEmbedding },
        nResults: 3
    );
    
    // 3. Build context
    string context = string.Join("\n", results.Documents[0]);
    
    // 4. Generate response with context
    Conversation chat = api.Chat.CreateConversation(ChatModel.OpenAi.Gpt4.O);
    chat.AppendSystemMessage($"Use this context to answer: {context}");
    chat.AppendUserInput(userQuery);
    
    return await chat.GetResponse();
}

Best Practices

  • Use meaningful collection names
  • Include metadata for filtering
  • Batch operations when possible
  • Implement proper error handling
  • Monitor collection size and performance
  • Clean up old/unused documents