-
Notifications
You must be signed in to change notification settings - Fork 3
Expand file tree
/
Copy pathexample_2.py
More file actions
79 lines (62 loc) · 2.26 KB
/
example_2.py
File metadata and controls
79 lines (62 loc) · 2.26 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
import time
from pathlib import Path
import pandas as pd
from tqdm import tqdm
from vcache import (
HNSWLibVectorDB,
InMemoryEmbeddingMetadataStorage,
LLMComparisonSimilarityEvaluator,
MRUEvictionPolicy,
OpenAIEmbeddingEngine,
OpenAIInferenceEngine,
VCache,
VCacheConfig,
VCachePolicy,
VerifiedDecisionPolicy,
)
"""
Run
export OPENAI_API_KEY="<your-api-key>"
before running the script with
poetry run python example_2.py
"""
def __get_vcache() -> VCache:
print("Initializing vCache configuration...")
# 1. Configure the components for vCache
config: VCacheConfig = VCacheConfig(
inference_engine=OpenAIInferenceEngine(model_name="gpt-4.1-2025-04-14"),
embedding_engine=OpenAIEmbeddingEngine(model_name="text-embedding-3-small"),
vector_db=HNSWLibVectorDB(),
embedding_metadata_storage=InMemoryEmbeddingMetadataStorage(),
similarity_evaluator=LLMComparisonSimilarityEvaluator(
inference_engine=OpenAIInferenceEngine(model_name="gpt-4.1-nano-2025-04-14")
),
eviction_policy=MRUEvictionPolicy(max_size=4096),
)
# 2. Choose a caching policy
policy: VCachePolicy = VerifiedDecisionPolicy(delta=0.03)
# 3. Initialize vCache with the configuration and policy
vcache: VCache = VCache(config, policy)
print("vCache initialized successfully.\n")
return vcache
def main():
vcache: VCache = __get_vcache()
print("Loading sample data from parquet file...")
script_dir: Path = Path(__file__).parent
df: pd.DataFrame = pd.read_parquet(script_dir / "sample_data.parquet")
print(f"Loaded {len(df)} rows of data\n")
print("Processing data with vCache...")
for i, row in tqdm(
df.iterrows(), total=len(df), desc="Processing rows", disable=True
):
prompt: str = row["text"]
system_prompt: str = row["task"]
start_time: float = time.time()
is_hit, response, _, _ = vcache.infer_with_cache_info(prompt, system_prompt)
end_time: float = time.time()
print(
f"Response for request {i}: {response}. Is hit: {is_hit}. Time taken: {(end_time - start_time):.3f} seconds"
)
print("Data processing completed.")
if __name__ == "__main__":
main()