1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
import logging
import uuid
logger = logging.getLogger("llm_monitor")
def monitored_llm_call(
messages: list,
model: str = "claude-opus-4-5",
system: str = None,
max_tokens: int = 1024,
user_id: str = None,
feature: str = None,
) -> tuple[str, LLMCallMetrics]:
request_id = str(uuid.uuid4())
start_time = time.time()
try:
kwargs = {
"model": model,
"max_tokens": max_tokens,
"messages": messages,
}
if system:
kwargs["system"] = system
response = client.messages.create(**kwargs)
latency_ms = int((time.time() - start_time) * 1000)
cost = calculate_cost(model, response.usage.input_tokens, response.usage.output_tokens)
metrics = LLMCallMetrics(
request_id=request_id,
timestamp=datetime.now(),
model=model,
input_tokens=response.usage.input_tokens,
output_tokens=response.usage.output_tokens,
latency_ms=latency_ms,
cost_usd=cost,
success=True,
user_id=user_id,
feature=feature,
)
logger.info("llm_call", extra={"metrics": metrics.__dict__})
return response.content[0].text, metrics
except Exception as e:
latency_ms = int((time.time() - start_time) * 1000)
metrics = LLMCallMetrics(
request_id=request_id,
timestamp=datetime.now(),
model=model,
input_tokens=0,
output_tokens=0,
latency_ms=latency_ms,
cost_usd=0,
success=False,
error_type=type(e).__name__,
user_id=user_id,
feature=feature,
)
logger.error("llm_call_failed", extra={"metrics": metrics.__dict__, "error": str(e)})
raise