生產級 RAG 管線

AI/ML
7 個節點 · 6 條連接ai ml
ex-ai-ml-rag-pipeline.osop.yaml
# Production RAG Pipeline
# Embed documents, retrieve context, generate grounded answers, validate faithfulness
osop_version: "2.0"
id: ai-ml-rag-pipeline
name: "生產級 RAG 管線"

nodes:
  - id: ingest_documents
    type: api
    purpose: Accept document batch via upload API and register in document store
    runtime:
      endpoint: /api/v1/ingest
      method: POST
      url: https://rag-service.internal
    outputs: [document_ids, total_pages]
    timeout_sec: 120

  - id: chunk_and_embed
    type: agent
    purpose: Split documents into semantic chunks and generate vector embeddings
    runtime:
      provider: openai
      model: text-embedding-3-large
      config:
        chunk_size: 512
        chunk_overlap: 64
        embedding_dimensions: 1536
    inputs: [document_ids]
    outputs: [embedding_count, vector_store_id]
    timeout_sec: 300
    retry_policy:
      max_retries: 3
      backoff_sec: 5
    explain: |
      Uses recursive character splitting with sentence boundary awareness.
      Each chunk is embedded independently and stored with source metadata.

  - id: receive_query
    type: api
    purpose: Accept user query through the search endpoint
    runtime:
      endpoint: /api/v1/query
      method: POST
      url: https://rag-service.internal
    outputs: [user_query, session_id]

  - id: retrieve_context
    type: db
    purpose: Perform hybrid search combining vector similarity and BM25 keyword matching
    runtime:
      engine: pgvector
      connection: postgresql://rag:5432/vectors
    inputs: [user_query, vector_store_id]
    outputs: [retrieved_chunks, relevance_scores]
    timeout_sec: 10
    explain: |
      Reciprocal rank fusion combines dense and sparse retrieval results.
      Returns top-8 chunks with relevance scores above 0.7 threshold.

  - id: generate_answer
    type: agent
    purpose: Generate grounded answer from retrieved context using Claude
    runtime:
      provider: anthropic
      model: claude-sonnet-4-20250514
      config:
        max_tokens: 2048
        temperature: 0.1
        system_prompt: "Answer based only on the provided context. Cite sources."
    inputs: [user_query, retrieved_chunks]
    outputs: [answer, citations]
    timeout_sec: 30
    retry_policy:
      max_retries: 2
      backoff_sec: 2

  - id: validate_faithfulness
    type: agent
    purpose: Score answer faithfulness and hallucination rate against source chunks
    runtime:
      provider: anthropic
      model: claude-haiku-4-20250414
      config:
        evaluation_metrics: [faithfulness, relevance, completeness]
    inputs: [answer, retrieved_chunks, user_query]
    outputs: [faithfulness_score, relevance_score, flagged_claims]
    timeout_sec: 15

  - id: deliver_response
    type: api
    purpose: Return validated answer with citations and confidence metadata to client
    runtime:
      endpoint: /api/v1/response
      method: POST
      url: https://rag-service.internal
    inputs: [answer, citations, faithfulness_score, relevance_score]
    outputs: [response_id]

edges:
  # Ingestion path
  - from: ingest_documents
    to: chunk_and_embed
    mode: sequential

  # Query path
  - from: receive_query
    to: retrieve_context
    mode: sequential

  - from: retrieve_context
    to: generate_answer
    mode: sequential

  - from: generate_answer
    to: validate_faithfulness
    mode: sequential

  - from: validate_faithfulness
    to: deliver_response
    mode: conditional
    condition: "faithfulness_score >= 0.8"

  - from: validate_faithfulness
    to: generate_answer
    mode: fallback
    label: "Faithfulness below threshold, regenerate with stricter constraints"