Skip to content

LlamaIndex Integration

The LlamaIndex integration for TruthVouch provides built-in fact-checking at three key points in your RAG pipeline: node retrieval, response evaluation, and query execution.

Installation

Terminal window
pip install llama-index-vouchedtruth

Components

VouchedTruthResponseEvaluator

Evaluates a RAG response string for factual accuracy:

from vouchedtruth_llamaindex import VouchedTruthResponseEvaluator
evaluator = VouchedTruthResponseEvaluator(threshold=0.8)
result = await evaluator.evaluate("The Eiffel Tower is in Paris, France.")
print(result.passing) # True
print(result.score) # e.g. 0.96
print(result.feedback) # "Fact-checked and verified"

VouchedTruthNodePostprocessor

Filters retrieved nodes by trust score before they reach the LLM:

from llama_index.core import VectorStoreIndex
from vouchedtruth_llamaindex import VouchedTruthNodePostprocessor
postprocessor = VouchedTruthNodePostprocessor(
threshold=0.75,
mode="standard"
)
index = VectorStoreIndex(nodes)
query_engine = index.as_query_engine(
node_postprocessors=[postprocessor]
)
response = query_engine.query("What is the capital of France?")
# Only high-trust nodes were used to generate response

VouchedTruthQueryEngine

A wrapper query engine that auto-verifies every response:

from llama_index.core import VectorStoreIndex
from vouchedtruth_llamaindex import VouchedTruthQueryEngine
index = VectorStoreIndex(nodes)
base_engine = index.as_query_engine()
engine = VouchedTruthQueryEngine(
inner_query_engine=base_engine,
threshold=0.8,
api_key="tv_live_..."
)
response = engine.query("Tell me about the Eiffel Tower.")
print(response.metadata["trust_score"]) # 0.92

TrustApiClient

Low-level async/sync client for manual Trust API calls:

from vouchedtruth_llamaindex import TrustApiClient
client = TrustApiClient(api_key="tv_live_...")
result = client.verify_sync(
"The Eiffel Tower is in Paris.",
mode="standard"
)
print(result.trust_score) # 0.97

Common Patterns

Verify Retrieved Documents

Filter search results by trust score before RAG:

from llama_index.core import VectorStoreIndex, Settings
from llama_index.embeddings.openai import OpenAIEmbedding
from llama_index.llms.openai import OpenAI
from vouchedtruth_llamaindex import VouchedTruthNodePostprocessor
# Set up index
Settings.embed_model = OpenAIEmbedding()
Settings.llm = OpenAI(model="gpt-4o")
index = VectorStoreIndex(nodes)
# Add fact-checking to retrieval
postprocessor = VouchedTruthNodePostprocessor(threshold=0.75)
query_engine = index.as_query_engine(
node_postprocessors=[postprocessor]
)
response = query_engine.query("What is quantum computing?")
# Only accurate nodes were used

Evaluate Final Response

Verify the LLM’s generated answer:

from llama_index.core import VectorStoreIndex
from vouchedtruth_llamaindex import VouchedTruthResponseEvaluator
index = VectorStoreIndex(nodes)
query_engine = index.as_query_engine()
evaluator = VouchedTruthResponseEvaluator(threshold=0.8)
response = query_engine.query("What is Paris known for?")
eval_result = await evaluator.evaluate(str(response))
if not eval_result.passing:
print(f"Low trust score: {eval_result.score}")
print(f"Feedback: {eval_result.feedback}")

Multi-Stage Verification

Verify both retrieved documents and final response:

from llama_index.core import VectorStoreIndex
from vouchedtruth_llamaindex import (
VouchedTruthNodePostprocessor,
VouchedTruthResponseEvaluator,
)
# Stage 1: Filter nodes by accuracy
postprocessor = VouchedTruthNodePostprocessor(threshold=0.75)
query_engine = index.as_query_engine(
node_postprocessors=[postprocessor]
)
# Stage 2: Evaluate final response
evaluator = VouchedTruthResponseEvaluator(threshold=0.85)
response = query_engine.query("What is the capital of France?")
eval_result = await evaluator.evaluate(str(response))
print(f"Response: {response}")
print(f"Final trust score: {eval_result.score}")

Streaming with Fact-Check

Stream response chunks while verifying:

from vouchedtruth_llamaindex import VouchedTruthResponseEvaluator
evaluator = VouchedTruthResponseEvaluator(threshold=0.8)
query_engine = index.as_query_engine(streaming=True)
response = query_engine.query("Explain photosynthesis...")
# Accumulate streamed text
full_response = ""
async for chunk in response:
print(chunk, end="", flush=True)
full_response += str(chunk)
# Verify after streaming completes
eval_result = await evaluator.evaluate(full_response)
print(f"\nTrust score: {eval_result.score}")

Configuration

Verification Modes

# Fast sampling
postprocessor = VouchedTruthNodePostprocessor(mode="spot_check")
# Balanced verification
postprocessor = VouchedTruthNodePostprocessor(mode="standard")
# Deep fact-checking
postprocessor = VouchedTruthNodePostprocessor(mode="deep")

Trust Threshold

Adjust for your use case:

# Strict mode (90% confidence required)
postprocessor = VouchedTruthNodePostprocessor(threshold=0.9)
# Standard mode (75% confidence)
postprocessor = VouchedTruthNodePostprocessor(threshold=0.75)
# Relaxed mode (50% confidence)
postprocessor = VouchedTruthNodePostprocessor(threshold=0.5)

Custom Metadata

VouchedTruth adds trust_score to node metadata:

from llama_index.core.schema import NodeWithScore
nodes = query_engine._retrieve(query)
for node in nodes:
print(f"Content: {node.get_content()}")
print(f"Trust Score: {node.metadata.get('trust_score', 'N/A')}")

Error Handling

from vouchedtruth_llamaindex import (
VouchedTruthResponseEvaluator,
TrustApiError,
)
evaluator = VouchedTruthResponseEvaluator(threshold=0.8)
try:
result = await evaluator.evaluate("Some text")
except TrustApiError as e:
print(f"Verification failed: {e}")
# Fallback behavior

Complete Example

import asyncio
from llama_index.core import VectorStoreIndex, Settings, SimpleDirectoryReader
from llama_index.embeddings.openai import OpenAIEmbedding
from llama_index.llms.openai import OpenAI
from vouchedtruth_llamaindex import (
VouchedTruthNodePostprocessor,
VouchedTruthResponseEvaluator,
)
async def main():
# Load documents
documents = SimpleDirectoryReader("./data").load_data()
# Configure LlamaIndex
Settings.embed_model = OpenAIEmbedding(model="text-embedding-3-small")
Settings.llm = OpenAI(model="gpt-4o")
# Create vector store and index
index = VectorStoreIndex.from_documents(documents)
# Add fact-checking at retrieval stage
postprocessor = VouchedTruthNodePostprocessor(
api_key="tv_live_...",
threshold=0.75,
mode="standard"
)
query_engine = index.as_query_engine(
node_postprocessors=[postprocessor],
similarity_top_k=5,
)
# Add fact-checking at evaluation stage
evaluator = VouchedTruthResponseEvaluator(
api_key="tv_live_...",
threshold=0.85
)
# Run query
response = query_engine.query(
"What are the main causes of climate change?"
)
print(f"Response: {response}\n")
# Evaluate final answer
eval_result = await evaluator.evaluate(str(response))
print(f"Trust Score: {eval_result.score}")
print(f"Feedback: {eval_result.feedback}")
if not eval_result.passing:
print("Warning: Low confidence in response — consider refining query")
if __name__ == "__main__":
asyncio.run(main())

Performance Tips

  1. Use spot_check mode for high-volume queries to reduce costs
  2. Cache trust scores in Redis to avoid re-checking identical text
  3. Filter at retrieval stage to reduce hallucinations before LLM processes them
  4. Adjust threshold based on use case (stricter for medical/financial, looser for exploratory)
  5. Batch verify multiple documents in parallel for faster processing

Next Steps