Skip to content

Settings

Settings is the single configuration object for a Medha instance. It is a Pydantic Settings model, so all fields can be overridden via environment variables with the MEDHA_ prefix.

See the Configuration guide for field descriptions and usage examples.


Settings

Bases: BaseSettings

Central configuration for a Medha instance.

Source code in src/medha/config.py
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
class Settings(BaseSettings):
    """Central configuration for a Medha instance."""

    model_config = SettingsConfigDict(
        env_prefix="MEDHA_",
        env_file=".env",
        env_file_encoding="utf-8",
        extra="ignore",
    )

    # --- Backend selection ---
    backend_type: Literal[
        "qdrant", "memory", "pgvector", "elasticsearch",
        "vectorchord", "chroma", "weaviate", "redis", "azure-search", "lancedb"
    ] = Field(
        default="memory",
        description=(
            "Vector storage backend to use. "
            "'memory' uses pure Python in-process storage, zero external deps (default). "
            "'qdrant' requires qdrant-client (pip install medha-archai[qdrant]). "
            "'pgvector' requires asyncpg and pgvector (pip install medha-archai[pgvector]). "
            "'elasticsearch' requires elasticsearch[async]>=8.12 (pip install medha-archai[elasticsearch]). "
            "'vectorchord' requires asyncpg (pip install medha-archai[vectorchord]). "
            "'chroma' requires chromadb>=0.5 (pip install medha-archai[chroma]). "
            "'weaviate' requires weaviate-client>=4.6 (pip install medha-archai[weaviate]). "
            "'redis' requires redis[hiredis]>=4.6 (pip install medha-archai[redis]). "
            "'azure-search' requires azure-search-documents>=11.4 (pip install medha-archai[azure-search]). "
            "'lancedb' requires lancedb>=0.6 (pip install medha-archai[lancedb])."
        ),
    )

    # --- Backend ---
    qdrant_mode: Literal["memory", "docker", "cloud"] = Field(
        default="memory",
        description="Qdrant connection mode",
    )
    qdrant_host: str = Field(default="localhost", description="Qdrant host for docker/cloud mode")
    qdrant_port: int = Field(default=6333, ge=1, le=65535, description="Qdrant gRPC port")
    qdrant_url: str | None = Field(default=None, description="Full Qdrant URL (overrides host:port)")
    qdrant_api_key: SecretStr | None = Field(default=None, description="Qdrant Cloud API key")

    # --- Query language ---
    query_language: Literal["sql", "cypher", "graphql", "generic"] = Field(
        default="generic",
        description="Target query language (informational, no behavioral change)",
    )

    # --- Search thresholds ---
    score_threshold_exact: float = Field(default=0.99, ge=0.0, le=1.0)
    score_threshold_semantic: float = Field(default=0.85, ge=0.0, le=1.0)
    score_threshold_template: float = Field(
        default=0.70,
        ge=0.0,
        le=1.0,
        description=(
            "Minimum score for a template match to be returned. "
            "The maximum achievable score is ~0.88 "
            "(keyword_bonus=1.0 × 0.5 + param_completeness=1.0 × 0.3 + priority_1 × 0.08). "
            "Values above 0.88 will never match any template."
        ),
    )
    score_threshold_fuzzy: float = Field(default=85.0, ge=0.0, le=100.0, description="Fuzzy match threshold (0-100)")
    score_threshold_fuzzy_prefilter: float = Field(
        default=0.65,
        ge=0.0,
        le=1.0,
        description=(
            "Minimum cosine similarity for the vector pre-filter used in fuzzy search. "
            "Candidates below this threshold are excluded before Levenshtein scoring, "
            "reducing fuzzy search from O(n) to O(top_k). Lower values increase recall "
            "at the cost of more fuzzy comparisons."
        ),
    )
    fuzzy_prefilter_top_k: int = Field(
        default=50,
        ge=1,
        le=1000,
        description=(
            "Maximum number of vector-similar candidates to retrieve for fuzzy pre-filtering. "
            "Fuzzy scoring is applied only to these candidates instead of the full collection."
        ),
    )

    # --- L1 Cache ---
    l1_cache_max_size: int = Field(default=1000, ge=0, description="Max entries in L1 in-memory cache (0=disabled)")

    # --- Qdrant tuning ---
    hnsw_m: int = Field(default=16, ge=4, le=64, description="HNSW edges per node")
    hnsw_ef_construct: int = Field(default=100, ge=50, le=500, description="HNSW construction search depth")
    enable_quantization: bool = Field(default=True, description="Enable vector quantization")
    quantization_type: Literal["scalar", "binary"] = Field(
        default="scalar",
        description="Quantization method. Binary only for dim >= 512",
    )
    on_disk: bool = Field(default=False, description="Store vectors on disk (large datasets)")

    # --- PostgreSQL / pgvector ---
    pg_dsn: str | None = Field(
        default=None,
        description=(
            "Full asyncpg DSN for PostgreSQL connection "
            "(e.g. 'postgresql://user:pass@localhost:5432/dbname'). "
            "When set, overrides pg_host, pg_port, pg_database, pg_user, pg_password."
        ),
    )
    pg_host: str = Field(default="localhost", description="PostgreSQL host")
    pg_port: int = Field(default=5432, ge=1, le=65535, description="PostgreSQL port")
    pg_database: str = Field(default="medha", description="PostgreSQL database name")
    pg_user: str = Field(default="postgres", description="PostgreSQL user")
    pg_password: SecretStr = Field(default=SecretStr(""), description="PostgreSQL password")
    pg_schema: str = Field(default="public", description="PostgreSQL schema for Medha tables")
    pg_table_prefix: str = Field(
        default="medha",
        description="Prefix for Medha table names (e.g. 'medha' → table 'medha_my_cache')",
    )
    pg_pool_min_size: int = Field(default=2, ge=1, description="Min connections in asyncpg pool")
    pg_pool_max_size: int = Field(default=10, ge=1, description="Max connections in asyncpg pool")

    # --- VectorChord ---
    vc_lists: list[int] = Field(
        default_factory=lambda: [1000],
        description="Number of centroids per level for the vchordrq index.",
    )
    vc_residual_quantization: bool = Field(
        default=True,
        description="Enable residual quantization in the vchordrq index.",
    )

    # --- Weaviate ---
    weaviate_mode: Literal["local", "cloud"] = Field(
        default="local",
        description="Weaviate connection mode: 'local' (self-hosted) or 'cloud' (Weaviate Cloud).",
    )
    weaviate_host: str = Field(default="localhost", description="Weaviate host (local mode)")
    weaviate_http_port: int = Field(default=8080, ge=1, le=65535, description="Weaviate HTTP port (local mode)")
    weaviate_grpc_port: int = Field(default=50051, ge=1, le=65535, description="Weaviate gRPC port (local mode)")
    weaviate_http_secure: bool = Field(default=False, description="Use HTTPS for Weaviate HTTP connection")
    weaviate_grpc_secure: bool = Field(default=False, description="Use TLS for Weaviate gRPC connection")
    weaviate_cloud_url: str | None = Field(default=None, description="Weaviate Cloud cluster URL (cloud mode)")
    weaviate_api_key: SecretStr | None = Field(default=None, description="Weaviate API key")
    weaviate_collection_prefix: str = Field(
        default="Medha",
        description="Prefix for Weaviate collection names in PascalCase (e.g. 'Medha' → 'MedhaMyCache')",
    )

    # --- Redis Stack ---
    redis_mode: Literal["standalone", "sentinel"] = Field(
        default="standalone",
        description="Redis connection mode: 'standalone' or 'sentinel'.",
    )
    redis_url: str | None = Field(default=None, description="Full Redis URL (overrides host/port/db)")
    redis_host: str = Field(default="localhost", description="Redis host (standalone mode)")
    redis_port: int = Field(default=6379, ge=1, le=65535, description="Redis port")
    redis_db: int = Field(default=0, ge=0, description="Redis database index")
    redis_username: str | None = Field(default=None, description="Redis ACL username")
    redis_password: SecretStr | None = Field(default=None, description="Redis password")
    redis_ssl: bool = Field(default=False, description="Enable TLS for Redis connection")
    redis_ssl_certfile: str | None = Field(default=None, description="Path to client TLS certificate")
    redis_ssl_keyfile: str | None = Field(default=None, description="Path to client TLS key")
    redis_ssl_ca_certs: str | None = Field(default=None, description="Path to CA certificate bundle")
    redis_sentinel_hosts: list[str] = Field(
        default_factory=lambda: ["localhost:26379"],
        description="Sentinel host:port list (sentinel mode)",
    )
    redis_sentinel_master: str = Field(default="mymaster", description="Sentinel master name")
    redis_key_prefix: str = Field(default="medha", description="Prefix for all Redis keys and index names")
    redis_index_algorithm: Literal["HNSW", "FLAT"] = Field(
        default="HNSW",
        description="RediSearch vector index algorithm: 'HNSW' (approx) or 'FLAT' (exact brute-force)",
    )
    redis_hnsw_m: int = Field(default=16, ge=4, le=64, description="HNSW: edges per node")
    redis_hnsw_ef_construction: int = Field(default=200, ge=50, le=500, description="HNSW: build search depth")
    redis_hnsw_ef_runtime: int = Field(default=10, ge=10, le=500, description="HNSW: query search depth")
    redis_socket_timeout: float = Field(default=5.0, gt=0.0, description="Redis socket read timeout (s)")
    redis_socket_connect_timeout: float = Field(default=5.0, gt=0.0, description="Redis socket connect timeout (s)")

    # --- Chroma ---
    chroma_mode: Literal["ephemeral", "persistent", "http"] = Field(
        default="ephemeral",
        description="Chroma connection mode: 'ephemeral' (in-memory), 'persistent' (local disk), 'http' (remote server).",
    )
    chroma_host: str = Field(default="localhost", description="Chroma server host (http mode)")
    chroma_port: int = Field(default=8000, ge=1, le=65535, description="Chroma server port (http mode)")
    chroma_persist_path: str | None = Field(default=None, description="Directory for Chroma persistent storage")
    chroma_ssl: bool = Field(default=False, description="Use SSL for Chroma http connection")
    chroma_auth_token: SecretStr | None = Field(default=None, description="Bearer token for Chroma http authentication")

    # --- LanceDB ---
    lancedb_uri: str = Field(
        default="./lancedb_data",
        description=(
            "LanceDB storage URI. Use a local path (e.g. './lancedb_data') for embedded mode, "
            "or a cloud URI (s3://, gs://, az://) for cloud storage."
        ),
    )
    lancedb_table_prefix: str = Field(
        default="medha",
        description="Prefix for LanceDB table names (e.g. 'medha' → 'medha_my_cache').",
    )
    lancedb_metric: Literal["cosine", "l2", "dot"] = Field(
        default="cosine",
        description="Distance metric for LanceDB vector search: 'cosine' (default), 'l2', or 'dot'.",
    )

    # --- Azure AI Search ---
    azure_search_endpoint: str = Field(
        default="",
        description="Azure AI Search service endpoint (e.g. https://my-service.search.windows.net).",
    )
    azure_search_api_key: SecretStr | None = Field(
        default=None,
        description="Azure AI Search API key. If None, uses DefaultAzureCredential (requires azure-identity).",
    )
    azure_search_api_version: str = Field(
        default="2024-05-01-preview",
        description="Azure AI Search REST API version.",
    )
    azure_search_index_name: str = Field(
        default="medha",
        description=(
            "Prefix for Azure Search index names. "
            "Final index = '{azure_search_index_name}-{collection_name}' "
            "(e.g. 'medha' + 'my_cache' → 'medha-my-cache'). "
            "Corresponds to the env var MEDHA_AZURE_SEARCH_INDEX_NAME."
        ),
    )
    azure_search_top_k_candidates: int = Field(
        default=50,
        ge=1,
        le=10000,
        description=(
            "Extra candidates retrieved by HNSW before score filtering. "
            "Added to limit in VectorizedQuery to improve recall without increasing returned results."
        ),
    )

    # --- Elasticsearch ---
    es_hosts: list[str] = Field(
        default_factory=lambda: ["http://localhost:9200"],
        description="Elasticsearch node URLs",
    )
    es_api_key: SecretStr | None = Field(default=None, description="Elasticsearch API key")
    es_username: str | None = Field(default=None, description="Elasticsearch basic-auth username")
    es_password: SecretStr | None = Field(default=None, description="Elasticsearch basic-auth password")
    es_index_prefix: str = Field(default="medha", description="Prefix for Elasticsearch index names")
    es_num_candidates: int = Field(
        default=100, ge=1, le=10000, description="num_candidates for kNN search"
    )
    es_timeout: float = Field(default=30.0, gt=0.0, description="Request timeout in seconds")

    # --- Quantization search ---
    quantization_rescore: bool = Field(
        default=True,
        description="Re-score top results using original vectors after quantized search",
    )
    quantization_oversampling: float | None = Field(
        default=None,
        ge=1.0,
        description=(
            "Oversampling factor for quantized search "
            "(e.g. 2.0 fetches 2x candidates before re-scoring). None = Qdrant default"
        ),
    )
    quantization_ignore: bool = Field(
        default=False,
        description="Bypass quantized vectors and search only original vectors",
    )
    quantization_always_ram: bool = Field(
        default=True,
        description=(
            "Keep quantized vectors in RAM. Combined with on_disk=True enables "
            "hybrid storage (original on disk, quantized in RAM)"
        ),
    )

    # --- Template loading ---
    template_file: str | None = Field(default=None, description="Path to JSON template file")

    # --- Persistent embedding cache ---
    embedding_cache_path: str | None = Field(
        default=None,
        description=(
            "Path to a JSON file used to persist the embedding cache across restarts. "
            "When set, embeddings are loaded from disk on start() and saved to disk on close(). "
            "Speeds up warm-start scenarios where the same questions recur across sessions."
        ),
    )

    # --- File operations ---
    allowed_file_dir: str | None = Field(
        default=None,
        description=(
            "If set, warm_from_file() and load_templates_from_file() will reject paths "
            "outside this directory. Useful when the caller is not trusted. "
            "Example: '/app/data'. Default: None (no restriction)."
        ),
    )
    max_file_size_mb: int = Field(
        default=100,
        ge=1,
        le=10_000,
        description=(
            "Maximum file size in MB for warm_from_file() and load_templates_from_file(). "
            "Files exceeding this limit are rejected before reading."
        ),
    )

    # --- Input validation ---
    max_question_length: int = Field(
        default=8192,
        ge=64,
        le=1_000_000,
        description=(
            "Maximum allowed length (characters) for a question string. "
            "Questions exceeding this limit are rejected with SearchStrategy.ERROR "
            "to prevent DoS via oversized inputs. Default: 8192 chars (~8KB)."
        ),
    )

    # --- Cache lifecycle ---
    default_ttl_seconds: int | None = Field(
        default=None,
        ge=1,
        description=(
            "TTL di default in secondi per le nuove entry. "
            "None = entry immortali (comportamento attuale). "
            "Può essere sovrascritto entry per entry tramite il parametro ttl di store()."
        ),
    )
    cleanup_interval_seconds: int | None = Field(
        default=None,
        ge=60,
        description=(
            "Intervallo in secondi per il cleanup automatico delle entry scadute. "
            "None = nessun cleanup automatico. "
            "Se impostato, Medha.start() avvia un task asyncio che chiama expire() periodicamente."
        ),
    )

    # --- Batch operations ---
    batch_size: int = Field(default=100, ge=1, le=10000, description="Batch size for bulk upsert")
    batch_embed_concurrency: int = Field(default=1, ge=1, le=10, description="Chunk di embedding processati concorrentemente in store_many().")

    # --- Observability ---
    collect_stats: bool = Field(
        default=True,
        description="Enable collection of cache performance statistics.",
    )
    stats_max_latency_samples: int = Field(
        default=10_000,
        ge=100,
        le=1_000_000,
        description=(
            "Maximum number of per-request latency samples retained for percentile calculation. "
            "Older samples are evicted when the buffer is full (FIFO)."
        ),
    )

    # --- Timeouts ---
    embedding_timeout: float | None = Field(
        default=None,
        gt=0.0,
        description=(
            "Timeout in seconds for embedding calls (aembed and aembed_batch). "
            "None disables the timeout. Increase for large batches or slow networks."
        ),
    )

    # --- Validators ---
    @field_validator("pg_schema", "pg_table_prefix")
    @classmethod
    def validate_pg_identifier(cls, v: str) -> str:
        if not _SAFE_IDENTIFIER_RE.match(v):
            raise ValueError(
                f"Invalid PostgreSQL identifier '{v}': must match ^[a-zA-Z_][a-zA-Z0-9_]{{0,62}}$"
            )
        return v

    @field_validator("pg_pool_max_size")
    @classmethod
    def pool_max_gte_min(cls, v: int, info: ValidationInfo) -> int:
        min_size = info.data.get("pg_pool_min_size", 2)
        if v < min_size:
            raise ValueError(
                f"pg_pool_max_size ({v}) must be >= pg_pool_min_size ({min_size})"
            )
        return v

    @field_validator("score_threshold_exact")
    @classmethod
    def exact_must_be_high(cls, v: float) -> float:
        if v < 0.90:
            raise ValueError("Exact threshold should be >= 0.90 to avoid false positives")
        return v

    @field_validator("score_threshold_semantic")
    @classmethod
    def semantic_below_exact(cls, v: float, info: ValidationInfo) -> float:
        exact = info.data.get("score_threshold_exact", 0.99)
        if v >= exact:
            raise ValueError("Semantic threshold must be lower than exact threshold")
        return v