LlamaIndex Integration
The LlamaIndex integration for TruthVouch provides built-in fact-checking at three key points in your RAG pipeline: node retrieval, response evaluation, and query execution.
Installation
pip install llama-index-vouchedtruthComponents
VouchedTruthResponseEvaluator
Evaluates a RAG response string for factual accuracy:
from vouchedtruth_llamaindex import VouchedTruthResponseEvaluator
evaluator = VouchedTruthResponseEvaluator(threshold=0.8)result = await evaluator.evaluate("The Eiffel Tower is in Paris, France.")
print(result.passing) # Trueprint(result.score) # e.g. 0.96print(result.feedback) # "Fact-checked and verified"VouchedTruthNodePostprocessor
Filters retrieved nodes by trust score before they reach the LLM:
from llama_index.core import VectorStoreIndexfrom vouchedtruth_llamaindex import VouchedTruthNodePostprocessor
postprocessor = VouchedTruthNodePostprocessor( threshold=0.75, mode="standard")
index = VectorStoreIndex(nodes)query_engine = index.as_query_engine( node_postprocessors=[postprocessor])
response = query_engine.query("What is the capital of France?")# Only high-trust nodes were used to generate responseVouchedTruthQueryEngine
A wrapper query engine that auto-verifies every response:
from llama_index.core import VectorStoreIndexfrom vouchedtruth_llamaindex import VouchedTruthQueryEngine
index = VectorStoreIndex(nodes)base_engine = index.as_query_engine()
engine = VouchedTruthQueryEngine( inner_query_engine=base_engine, threshold=0.8, api_key="tv_live_...")
response = engine.query("Tell me about the Eiffel Tower.")print(response.metadata["trust_score"]) # 0.92TrustApiClient
Low-level async/sync client for manual Trust API calls:
from vouchedtruth_llamaindex import TrustApiClient
client = TrustApiClient(api_key="tv_live_...")result = client.verify_sync( "The Eiffel Tower is in Paris.", mode="standard")
print(result.trust_score) # 0.97Common Patterns
Verify Retrieved Documents
Filter search results by trust score before RAG:
from llama_index.core import VectorStoreIndex, Settingsfrom llama_index.embeddings.openai import OpenAIEmbeddingfrom llama_index.llms.openai import OpenAIfrom vouchedtruth_llamaindex import VouchedTruthNodePostprocessor
# Set up indexSettings.embed_model = OpenAIEmbedding()Settings.llm = OpenAI(model="gpt-4o")
index = VectorStoreIndex(nodes)
# Add fact-checking to retrievalpostprocessor = VouchedTruthNodePostprocessor(threshold=0.75)query_engine = index.as_query_engine( node_postprocessors=[postprocessor])
response = query_engine.query("What is quantum computing?")# Only accurate nodes were usedEvaluate Final Response
Verify the LLM’s generated answer:
from llama_index.core import VectorStoreIndexfrom vouchedtruth_llamaindex import VouchedTruthResponseEvaluator
index = VectorStoreIndex(nodes)query_engine = index.as_query_engine()
evaluator = VouchedTruthResponseEvaluator(threshold=0.8)
response = query_engine.query("What is Paris known for?")eval_result = await evaluator.evaluate(str(response))
if not eval_result.passing: print(f"Low trust score: {eval_result.score}") print(f"Feedback: {eval_result.feedback}")Multi-Stage Verification
Verify both retrieved documents and final response:
from llama_index.core import VectorStoreIndexfrom vouchedtruth_llamaindex import ( VouchedTruthNodePostprocessor, VouchedTruthResponseEvaluator,)
# Stage 1: Filter nodes by accuracypostprocessor = VouchedTruthNodePostprocessor(threshold=0.75)query_engine = index.as_query_engine( node_postprocessors=[postprocessor])
# Stage 2: Evaluate final responseevaluator = VouchedTruthResponseEvaluator(threshold=0.85)
response = query_engine.query("What is the capital of France?")eval_result = await evaluator.evaluate(str(response))
print(f"Response: {response}")print(f"Final trust score: {eval_result.score}")Streaming with Fact-Check
Stream response chunks while verifying:
from vouchedtruth_llamaindex import VouchedTruthResponseEvaluator
evaluator = VouchedTruthResponseEvaluator(threshold=0.8)
query_engine = index.as_query_engine(streaming=True)response = query_engine.query("Explain photosynthesis...")
# Accumulate streamed textfull_response = ""async for chunk in response: print(chunk, end="", flush=True) full_response += str(chunk)
# Verify after streaming completeseval_result = await evaluator.evaluate(full_response)print(f"\nTrust score: {eval_result.score}")Configuration
Verification Modes
# Fast samplingpostprocessor = VouchedTruthNodePostprocessor(mode="spot_check")
# Balanced verificationpostprocessor = VouchedTruthNodePostprocessor(mode="standard")
# Deep fact-checkingpostprocessor = VouchedTruthNodePostprocessor(mode="deep")Trust Threshold
Adjust for your use case:
# Strict mode (90% confidence required)postprocessor = VouchedTruthNodePostprocessor(threshold=0.9)
# Standard mode (75% confidence)postprocessor = VouchedTruthNodePostprocessor(threshold=0.75)
# Relaxed mode (50% confidence)postprocessor = VouchedTruthNodePostprocessor(threshold=0.5)Custom Metadata
VouchedTruth adds trust_score to node metadata:
from llama_index.core.schema import NodeWithScore
nodes = query_engine._retrieve(query)for node in nodes: print(f"Content: {node.get_content()}") print(f"Trust Score: {node.metadata.get('trust_score', 'N/A')}")Error Handling
from vouchedtruth_llamaindex import ( VouchedTruthResponseEvaluator, TrustApiError,)
evaluator = VouchedTruthResponseEvaluator(threshold=0.8)
try: result = await evaluator.evaluate("Some text")except TrustApiError as e: print(f"Verification failed: {e}") # Fallback behaviorComplete Example
import asynciofrom llama_index.core import VectorStoreIndex, Settings, SimpleDirectoryReaderfrom llama_index.embeddings.openai import OpenAIEmbeddingfrom llama_index.llms.openai import OpenAIfrom vouchedtruth_llamaindex import ( VouchedTruthNodePostprocessor, VouchedTruthResponseEvaluator,)
async def main(): # Load documents documents = SimpleDirectoryReader("./data").load_data()
# Configure LlamaIndex Settings.embed_model = OpenAIEmbedding(model="text-embedding-3-small") Settings.llm = OpenAI(model="gpt-4o")
# Create vector store and index index = VectorStoreIndex.from_documents(documents)
# Add fact-checking at retrieval stage postprocessor = VouchedTruthNodePostprocessor( api_key="tv_live_...", threshold=0.75, mode="standard" )
query_engine = index.as_query_engine( node_postprocessors=[postprocessor], similarity_top_k=5, )
# Add fact-checking at evaluation stage evaluator = VouchedTruthResponseEvaluator( api_key="tv_live_...", threshold=0.85 )
# Run query response = query_engine.query( "What are the main causes of climate change?" )
print(f"Response: {response}\n")
# Evaluate final answer eval_result = await evaluator.evaluate(str(response)) print(f"Trust Score: {eval_result.score}") print(f"Feedback: {eval_result.feedback}")
if not eval_result.passing: print("Warning: Low confidence in response — consider refining query")
if __name__ == "__main__": asyncio.run(main())Performance Tips
- Use spot_check mode for high-volume queries to reduce costs
- Cache trust scores in Redis to avoid re-checking identical text
- Filter at retrieval stage to reduce hallucinations before LLM processes them
- Adjust threshold based on use case (stricter for medical/financial, looser for exploratory)
- Batch verify multiple documents in parallel for faster processing