Settings¶

Settings is the single configuration object for a Medha instance. It is a Pydantic Settings model, so all fields can be overridden via environment variables with the MEDHA_ prefix.
See the Configuration guide for field descriptions and usage examples.
Settings¶

Bases: BaseSettings
Central configuration for a Medha instance.
Source code in src/medha/config.py
class Settings(BaseSettings):
    """Central configuration for a Medha instance."""

    model_config = SettingsConfigDict(
        env_prefix="MEDHA_",
        env_file=".env",
        env_file_encoding="utf-8",
        extra="ignore",
    )

    # --- Backend selection ---
    backend_type: Literal[
        "qdrant", "memory", "pgvector", "elasticsearch",
        "vectorchord", "chroma", "weaviate", "redis", "azure-search", "lancedb"
    ] = Field(
        default="memory",
        description=(
            "Vector storage backend to use. "
            "'memory' uses pure Python in-process storage, zero external deps (default). "
            "'qdrant' requires qdrant-client (pip install medha-archai[qdrant]). "
            "'pgvector' requires asyncpg and pgvector (pip install medha-archai[pgvector]). "
            "'elasticsearch' requires elasticsearch[async]>=8.12 (pip install medha-archai[elasticsearch]). "
            "'vectorchord' requires asyncpg (pip install medha-archai[vectorchord]). "
            "'chroma' requires chromadb>=0.5 (pip install medha-archai[chroma]). "
            "'weaviate' requires weaviate-client>=4.6 (pip install medha-archai[weaviate]). "
            "'redis' requires redis[hiredis]>=4.6 (pip install medha-archai[redis]). "
            "'azure-search' requires azure-search-documents>=11.4 (pip install medha-archai[azure-search]). "
            "'lancedb' requires lancedb>=0.6 (pip install medha-archai[lancedb])."
        ),
    )

    # --- Backend ---
    qdrant_mode: Literal["memory", "docker", "cloud"] = Field(
        default="memory",
        description="Qdrant connection mode",
    )
    qdrant_host: str = Field(default="localhost", description="Qdrant host for docker/cloud mode")
    qdrant_port: int = Field(default=6333, ge=1, le=65535, description="Qdrant gRPC port")
    qdrant_url: str | None = Field(default=None, description="Full Qdrant URL (overrides host:port)")
    qdrant_api_key: SecretStr | None = Field(default=None, description="Qdrant Cloud API key")

    # --- Query language ---
    query_language: Literal["sql", "cypher", "graphql", "generic"] = Field(
        default="generic",
        description="Target query language (informational, no behavioral change)",
    )

    # --- Search thresholds ---
    score_threshold_exact: float = Field(default=0.99, ge=0.0, le=1.0)
    score_threshold_semantic: float = Field(default=0.85, ge=0.0, le=1.0)
    score_threshold_template: float = Field(
        default=0.70,
        ge=0.0,
        le=1.0,
        description=(
            "Minimum score for a template match to be returned. "
            "The maximum achievable score is ~0.88 "
            "(keyword_bonus=1.0 × 0.5 + param_completeness=1.0 × 0.3 + priority_1 × 0.08). "
            "Values above 0.88 will never match any template."
        ),
    )
    score_threshold_fuzzy: float = Field(default=85.0, ge=0.0, le=100.0, description="Fuzzy match threshold (0-100)")
    score_threshold_fuzzy_prefilter: float = Field(
        default=0.65,
        ge=0.0,
        le=1.0,
        description=(
            "Minimum cosine similarity for the vector pre-filter used in fuzzy search. "
            "Candidates below this threshold are excluded before Levenshtein scoring, "
            "reducing fuzzy search from O(n) to O(top_k). Lower values increase recall "
            "at the cost of more fuzzy comparisons."
        ),
    )
    fuzzy_prefilter_top_k: int = Field(
        default=50,
        ge=1,
        le=1000,
        description=(
            "Maximum number of vector-similar candidates to retrieve for fuzzy pre-filtering. "
            "Fuzzy scoring is applied only to these candidates instead of the full collection."
        ),
    )

    # --- L1 Cache ---
    l1_cache_max_size: int = Field(default=1000, ge=0, description="Max entries in L1 in-memory cache (0=disabled)")

    # --- Qdrant tuning ---
    hnsw_m: int = Field(default=16, ge=4, le=64, description="HNSW edges per node")
    hnsw_ef_construct: int = Field(default=100, ge=50, le=500, description="HNSW construction search depth")
    enable_quantization: bool = Field(default=True, description="Enable vector quantization")
    quantization_type: Literal["scalar", "binary"] = Field(
        default="scalar",
        description="Quantization method. Binary only for dim >= 512",
    )
    on_disk: bool = Field(default=False, description="Store vectors on disk (large datasets)")

    # --- PostgreSQL / pgvector ---
    pg_dsn: str | None = Field(
        default=None,
        description=(
            "Full asyncpg DSN for PostgreSQL connection "
            "(e.g. 'postgresql://user:pass@localhost:5432/dbname'). "
            "When set, overrides pg_host, pg_port, pg_database, pg_user, pg_password."
        ),
    )
    pg_host: str = Field(default="localhost", description="PostgreSQL host")
    pg_port: int = Field(default=5432, ge=1, le=65535, description="PostgreSQL port")
    pg_database: str = Field(default="medha", description="PostgreSQL database name")
    pg_user: str = Field(default="postgres", description="PostgreSQL user")
    pg_password: SecretStr = Field(default=SecretStr(""), description="PostgreSQL password")
    pg_schema: str = Field(default="public", description="PostgreSQL schema for Medha tables")
    pg_table_prefix: str = Field(
        default="medha",
        description="Prefix for Medha table names (e.g. 'medha' → table 'medha_my_cache')",
    )
    pg_pool_min_size: int = Field(default=2, ge=1, description="Min connections in asyncpg pool")
    pg_pool_max_size: int = Field(default=10, ge=1, description="Max connections in asyncpg pool")

    # --- VectorChord ---
    vc_lists: list[int] = Field(
        default_factory=lambda: [1000],
        description="Number of centroids per level for the vchordrq index.",
    )
    vc_residual_quantization: bool = Field(
        default=True,
        description="Enable residual quantization in the vchordrq index.",
    )

    # --- Weaviate ---
    weaviate_mode: Literal["local", "cloud"] = Field(
        default="local",
        description="Weaviate connection mode: 'local' (self-hosted) or 'cloud' (Weaviate Cloud).",
    )
    weaviate_host: str = Field(default="localhost", description="Weaviate host (local mode)")
    weaviate_http_port: int = Field(default=8080, ge=1, le=65535, description="Weaviate HTTP port (local mode)")
    weaviate_grpc_port: int = Field(default=50051, ge=1, le=65535, description="Weaviate gRPC port (local mode)")
    weaviate_http_secure: bool = Field(default=False, description="Use HTTPS for Weaviate HTTP connection")
    weaviate_grpc_secure: bool = Field(default=False, description="Use TLS for Weaviate gRPC connection")
    weaviate_cloud_url: str | None = Field(default=None, description="Weaviate Cloud cluster URL (cloud mode)")
    weaviate_api_key: SecretStr | None = Field(default=None, description="Weaviate API key")
    weaviate_collection_prefix: str = Field(
        default="Medha",
        description="Prefix for Weaviate collection names in PascalCase (e.g. 'Medha' → 'MedhaMyCache')",
    )

    # --- Redis Stack ---
    redis_mode: Literal["standalone", "sentinel"] = Field(
        default="standalone",
        description="Redis connection mode: 'standalone' or 'sentinel'.",
    )
    redis_url: str | None = Field(default=None, description="Full Redis URL (overrides host/port/db)")
    redis_host: str = Field(default="localhost", description="Redis host (standalone mode)")
    redis_port: int = Field(default=6379, ge=1, le=65535, description="Redis port")
    redis_db: int = Field(default=0, ge=0, description="Redis database index")
    redis_username: str | None = Field(default=None, description="Redis ACL username")
    redis_password: SecretStr | None = Field(default=None, description="Redis password")
    redis_ssl: bool = Field(default=False, description="Enable TLS for Redis connection")
    redis_ssl_certfile: str | None = Field(default=None, description="Path to client TLS certificate")
    redis_ssl_keyfile: str | None = Field(default=None, description="Path to client TLS key")
    redis_ssl_ca_certs: str | None = Field(default=None, description="Path to CA certificate bundle")
    redis_sentinel_hosts: list[str] = Field(
        default_factory=lambda: ["localhost:26379"],
        description="Sentinel host:port list (sentinel mode)",
    )
    redis_sentinel_master: str = Field(default="mymaster", description="Sentinel master name")
    redis_key_prefix: str = Field(default="medha", description="Prefix for all Redis keys and index names")
    redis_index_algorithm: Literal["HNSW", "FLAT"] = Field(
        default="HNSW",
        description="RediSearch vector index algorithm: 'HNSW' (approx) or 'FLAT' (exact brute-force)",
    )
    redis_hnsw_m: int = Field(default=16, ge=4, le=64, description="HNSW: edges per node")
    redis_hnsw_ef_construction: int = Field(default=200, ge=50, le=500, description="HNSW: build search depth")
    redis_hnsw_ef_runtime: int = Field(default=10, ge=10, le=500, description="HNSW: query search depth")
    redis_socket_timeout: float = Field(default=5.0, gt=0.0, description="Redis socket read timeout (s)")
    redis_socket_connect_timeout: float = Field(default=5.0, gt=0.0, description="Redis socket connect timeout (s)")

    # --- Chroma ---
    chroma_mode: Literal["ephemeral", "persistent", "http"] = Field(
        default="ephemeral",
        description="Chroma connection mode: 'ephemeral' (in-memory), 'persistent' (local disk), 'http' (remote server).",
    )
    chroma_host: str = Field(default="localhost", description="Chroma server host (http mode)")
    chroma_port: int = Field(default=8000, ge=1, le=65535, description="Chroma server port (http mode)")
    chroma_persist_path: str | None = Field(default=None, description="Directory for Chroma persistent storage")
    chroma_ssl: bool = Field(default=False, description="Use SSL for Chroma http connection")
    chroma_auth_token: SecretStr | None = Field(default=None, description="Bearer token for Chroma http authentication")

    # --- LanceDB ---
    lancedb_uri: str = Field(
        default="./lancedb_data",
        description=(
            "LanceDB storage URI. Use a local path (e.g. './lancedb_data') for embedded mode, "
            "or a cloud URI (s3://, gs://, az://) for cloud storage."
        ),
    )
    lancedb_table_prefix: str = Field(
        default="medha",
        description="Prefix for LanceDB table names (e.g. 'medha' → 'medha_my_cache').",
    )
    lancedb_metric: Literal["cosine", "l2", "dot"] = Field(
        default="cosine",
        description="Distance metric for LanceDB vector search: 'cosine' (default), 'l2', or 'dot'.",
    )

    # --- Azure AI Search ---
    azure_search_endpoint: str = Field(
        default="",
        description="Azure AI Search service endpoint (e.g. https://my-service.search.windows.net).",
    )
    azure_search_api_key: SecretStr | None = Field(
        default=None,
        description="Azure AI Search API key. If None, uses DefaultAzureCredential (requires azure-identity).",
    )
    azure_search_api_version: str = Field(
        default="2024-05-01-preview",
        description="Azure AI Search REST API version.",
    )
    azure_search_index_name: str = Field(
        default="medha",
        description=(
            "Prefix for Azure Search index names. "
            "Final index = '{azure_search_index_name}-{collection_name}' "
            "(e.g. 'medha' + 'my_cache' → 'medha-my-cache'). "
            "Corresponds to the env var MEDHA_AZURE_SEARCH_INDEX_NAME."
        ),
    )
    azure_search_top_k_candidates: int = Field(
        default=50,
        ge=1,
        le=10000,
        description=(
            "Extra candidates retrieved by HNSW before score filtering. "
            "Added to limit in VectorizedQuery to improve recall without increasing returned results."
        ),
    )

    # --- Elasticsearch ---
    es_hosts: list[str] = Field(
        default_factory=lambda: ["http://localhost:9200"],
        description="Elasticsearch node URLs",
    )
    es_api_key: SecretStr | None = Field(default=None, description="Elasticsearch API key")
    es_username: str | None = Field(default=None, description="Elasticsearch basic-auth username")
    es_password: SecretStr | None = Field(default=None, description="Elasticsearch basic-auth password")
    es_index_prefix: str = Field(default="medha", description="Prefix for Elasticsearch index names")
    es_num_candidates: int = Field(
        default=100, ge=1, le=10000, description="num_candidates for kNN search"
    )
    es_timeout: float = Field(default=30.0, gt=0.0, description="Request timeout in seconds")

    # --- Quantization search ---
    quantization_rescore: bool = Field(
        default=True,
        description="Re-score top results using original vectors after quantized search",
    )
    quantization_oversampling: float | None = Field(
        default=None,
        ge=1.0,
        description=(
            "Oversampling factor for quantized search "
            "(e.g. 2.0 fetches 2x candidates before re-scoring). None = Qdrant default"
        ),
    )
    quantization_ignore: bool = Field(
        default=False,
        description="Bypass quantized vectors and search only original vectors",
    )
    quantization_always_ram: bool = Field(
        default=True,
        description=(
            "Keep quantized vectors in RAM. Combined with on_disk=True enables "
            "hybrid storage (original on disk, quantized in RAM)"
        ),
    )

    # --- Template loading ---
    template_file: str | None = Field(default=None, description="Path to JSON template file")

    # --- Persistent embedding cache ---
    embedding_cache_path: str | None = Field(
        default=None,
        description=(
            "Path to a JSON file used to persist the embedding cache across restarts. "
            "When set, embeddings are loaded from disk on start() and saved to disk on close(). "
            "Speeds up warm-start scenarios where the same questions recur across sessions."
        ),
    )

    # --- File operations ---
    allowed_file_dir: str | None = Field(
        default=None,
        description=(
            "If set, warm_from_file() and load_templates_from_file() will reject paths "
            "outside this directory. Useful when the caller is not trusted. "
            "Example: '/app/data'. Default: None (no restriction)."
        ),
    )
    max_file_size_mb: int = Field(
        default=100,
        ge=1,
        le=10_000,
        description=(
            "Maximum file size in MB for warm_from_file() and load_templates_from_file(). "
            "Files exceeding this limit are rejected before reading."
        ),
    )

    # --- Input validation ---
    max_question_length: int = Field(
        default=8192,
        ge=64,
        le=1_000_000,
        description=(
            "Maximum allowed length (characters) for a question string. "
            "Questions exceeding this limit are rejected with SearchStrategy.ERROR "
            "to prevent DoS via oversized inputs. Default: 8192 chars (~8KB)."
        ),
    )

    # --- Cache lifecycle ---
    default_ttl_seconds: int | None = Field(
        default=None,
        ge=1,
        description=(
            "TTL di default in secondi per le nuove entry. "
            "None = entry immortali (comportamento attuale). "
            "Può essere sovrascritto entry per entry tramite il parametro ttl di store()."
        ),
    )
    cleanup_interval_seconds: int | None = Field(
        default=None,
        ge=60,
        description=(
            "Intervallo in secondi per il cleanup automatico delle entry scadute. "
            "None = nessun cleanup automatico. "
            "Se impostato, Medha.start() avvia un task asyncio che chiama expire() periodicamente."
        ),
    )

    # --- Batch operations ---
    batch_size: int = Field(default=100, ge=1, le=10000, description="Batch size for bulk upsert")
    batch_embed_concurrency: int = Field(default=1, ge=1, le=10, description="Chunk di embedding processati concorrentemente in store_many().")

    # --- Observability ---
    collect_stats: bool = Field(
        default=True,
        description="Enable collection of cache performance statistics.",
    )
    stats_max_latency_samples: int = Field(
        default=10_000,
        ge=100,
        le=1_000_000,
        description=(
            "Maximum number of per-request latency samples retained for percentile calculation. "
            "Older samples are evicted when the buffer is full (FIFO)."
        ),
    )

    # --- Timeouts ---
    embedding_timeout: float | None = Field(
        default=None,
        gt=0.0,
        description=(
            "Timeout in seconds for embedding calls (aembed and aembed_batch). "
            "None disables the timeout. Increase for large batches or slow networks."
        ),
    )

    # --- Validators ---
    @field_validator("pg_schema", "pg_table_prefix")
    @classmethod
    def validate_pg_identifier(cls, v: str) -> str:
        if not _SAFE_IDENTIFIER_RE.match(v):
            raise ValueError(
                f"Invalid PostgreSQL identifier '{v}': must match ^[a-zA-Z_][a-zA-Z0-9_]{{0,62}}$"
            )
        return v

    @field_validator("pg_pool_max_size")
    @classmethod
    def pool_max_gte_min(cls, v: int, info: ValidationInfo) -> int:
        min_size = info.data.get("pg_pool_min_size", 2)
        if v < min_size:
            raise ValueError(
                f"pg_pool_max_size ({v}) must be >= pg_pool_min_size ({min_size})"
            )
        return v

    @field_validator("score_threshold_exact")
    @classmethod
    def exact_must_be_high(cls, v: float) -> float:
        if v < 0.90:
            raise ValueError("Exact threshold should be >= 0.90 to avoid false positives")
        return v

    @field_validator("score_threshold_semantic")
    @classmethod
    def semantic_below_exact(cls, v: float, info: ValidationInfo) -> float:
        exact = info.data.get("score_threshold_exact", 0.99)
        if v >= exact:
            raise ValueError("Semantic threshold must be lower than exact threshold")
        return v