Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions crates/openfang-api/src/middleware.rs
Original file line number Diff line number Diff line change
Expand Up @@ -234,14 +234,15 @@ pub async fn security_headers(request: Request<Body>, next: Next) -> Response<Bo
let mut response = next.run(request).await;
let headers = response.headers_mut();
headers.insert("x-content-type-options", "nosniff".parse().unwrap());
headers.insert("x-frame-options", "DENY".parse().unwrap());
// Allow embedding in Command Center (localhost:3000)
headers.insert("x-frame-options", "SAMEORIGIN".parse().unwrap());
headers.insert("x-xss-protection", "1; mode=block".parse().unwrap());
// The dashboard handler (webchat_page) sets its own nonce-based CSP.
// For all other responses (API endpoints), apply a strict default.
if !headers.contains_key("content-security-policy") {
headers.insert(
"content-security-policy",
"default-src 'none'; frame-ancestors 'none'"
"default-src 'none'; frame-ancestors 'self' http://localhost:3000"
.parse()
.unwrap(),
);
Expand Down
6 changes: 6 additions & 0 deletions crates/openfang-kernel/src/kernel.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2101,12 +2101,14 @@ impl OpenFangKernel {

// Persist usage to database (same as non-streaming path)
let model = &manifest.model.model;
let provider = &manifest.model.provider;
let cost = MeteringEngine::estimate_cost_with_catalog(
&kernel_clone
.model_catalog
.read()
.unwrap_or_else(|e| e.into_inner()),
model,
provider,
result.total_usage.input_tokens,
result.total_usage.output_tokens,
);
Expand Down Expand Up @@ -2658,9 +2660,11 @@ impl OpenFangKernel {

// Record usage in the metering engine (uses catalog pricing as single source of truth)
let model = &manifest.model.model;
let provider = &manifest.model.provider;
let cost = MeteringEngine::estimate_cost_with_catalog(
&self.model_catalog.read().unwrap_or_else(|e| e.into_inner()),
model,
provider,
result.total_usage.input_tokens,
result.total_usage.output_tokens,
);
Expand Down Expand Up @@ -3148,9 +3152,11 @@ impl OpenFangKernel {
.unwrap_or((0, 0));

let model = &entry.manifest.model.model;
let provider = &entry.manifest.model.provider;
let cost = MeteringEngine::estimate_cost_with_catalog(
&self.model_catalog.read().unwrap_or_else(|e| e.into_inner()),
model,
provider,
input_tokens,
output_tokens,
);
Expand Down
114 changes: 107 additions & 7 deletions crates/openfang-kernel/src/metering.rs
Original file line number Diff line number Diff line change
Expand Up @@ -192,15 +192,28 @@ impl MeteringEngine {

/// Estimate cost using the model catalog as the pricing source.
///
/// Falls back to the default rate ($1/$3 per million) if the model is not
/// found in the catalog.
/// If the model is in the catalog, its pricing is used verbatim.
/// Otherwise:
/// - Local providers (ollama, vllm, lmstudio, lemonade, llamacpp, local)
/// default to $0/$0 — inference is running on the user's own hardware
/// and has no per-token cost. This covers custom Ollama Modelfiles and
/// any other locally-served model that isn't explicitly cataloged.
/// - Everything else (cloud providers) falls back to $1/$3 per million,
/// a conservative placeholder that surfaces the cost of an unknown
/// cloud model rather than silently hiding it.
pub fn estimate_cost_with_catalog(
catalog: &openfang_runtime::model_catalog::ModelCatalog,
model: &str,
provider: &str,
input_tokens: u64,
output_tokens: u64,
) -> f64 {
let (input_per_m, output_per_m) = catalog.pricing(model).unwrap_or((1.0, 3.0));
let fallback = if is_local_provider(provider) {
(0.0, 0.0)
} else {
(1.0, 3.0)
};
let (input_per_m, output_per_m) = catalog.pricing(model).unwrap_or(fallback);
let input_cost = (input_tokens as f64 / 1_000_000.0) * input_per_m;
let output_cost = (output_tokens as f64 / 1_000_000.0) * output_per_m;
input_cost + output_cost
Expand All @@ -212,6 +225,26 @@ impl MeteringEngine {
}
}

/// True when the provider runs inference locally (zero per-token cost).
///
/// Used by `estimate_cost_with_catalog` to pick a $0/$0 fallback for
/// models that aren't explicitly registered in the catalog. A custom
/// Ollama Modelfile like `my-model:latest` will miss the catalog but
/// still cost nothing to run, so it should not trip budget quotas.
fn is_local_provider(provider: &str) -> bool {
matches!(
provider.to_lowercase().as_str(),
"ollama"
| "vllm"
| "lmstudio"
| "lm-studio"
| "lemonade"
| "llamacpp"
| "llama.cpp"
| "local"
)
}

/// Budget status snapshot — current spend vs limits for all time windows.
#[derive(Debug, Clone, serde::Serialize)]
pub struct BudgetStatus {
Expand Down Expand Up @@ -758,6 +791,7 @@ mod tests {
let cost = MeteringEngine::estimate_cost_with_catalog(
&catalog,
"claude-sonnet-4-20250514",
"anthropic",
1_000_000,
1_000_000,
);
Expand All @@ -768,24 +802,90 @@ mod tests {
fn test_estimate_cost_with_catalog_alias() {
let catalog = openfang_runtime::model_catalog::ModelCatalog::new();
// "sonnet" alias should resolve to same pricing
let cost =
MeteringEngine::estimate_cost_with_catalog(&catalog, "sonnet", 1_000_000, 1_000_000);
let cost = MeteringEngine::estimate_cost_with_catalog(
&catalog,
"sonnet",
"anthropic",
1_000_000,
1_000_000,
);
assert!((cost - 18.0).abs() < 0.01);
}

#[test]
fn test_estimate_cost_with_catalog_unknown_uses_default() {
fn test_estimate_cost_with_catalog_unknown_cloud_uses_default() {
let catalog = openfang_runtime::model_catalog::ModelCatalog::new();
// Unknown model falls back to $1/$3
// Unknown cloud model falls back to $1/$3 — surfaces cost, doesn't hide it.
let cost = MeteringEngine::estimate_cost_with_catalog(
&catalog,
"totally-unknown-model",
"openai",
1_000_000,
1_000_000,
);
assert!((cost - 4.0).abs() < 0.01);
}

#[test]
fn test_estimate_cost_with_catalog_unknown_local_is_free() {
let catalog = openfang_runtime::model_catalog::ModelCatalog::new();
// Unknown local model (e.g. custom Ollama Modelfile) → $0.
// This prevents false budget-quota trips on zero-cost inference.
for provider in [
"ollama",
"Ollama",
"OLLAMA",
"vllm",
"lmstudio",
"lm-studio",
"lemonade",
"llamacpp",
"llama.cpp",
"local",
] {
let cost = MeteringEngine::estimate_cost_with_catalog(
&catalog,
"gemma4-agent",
provider,
1_000_000,
1_000_000,
);
assert_eq!(cost, 0.0, "provider {provider} must default to $0");
}
}

#[test]
fn test_estimate_cost_with_catalog_known_model_ignores_provider_hint() {
let catalog = openfang_runtime::model_catalog::ModelCatalog::new();
// When the model IS in the catalog, catalog pricing wins regardless
// of the provider hint. This guards against a caller mislabeling a
// known cloud model with a "local" provider tag.
let cost = MeteringEngine::estimate_cost_with_catalog(
&catalog,
"claude-sonnet-4-20250514",
"ollama",
1_000_000,
1_000_000,
);
assert!((cost - 18.0).abs() < 0.01);
}

#[test]
fn test_is_local_provider() {
assert!(super::is_local_provider("ollama"));
assert!(super::is_local_provider("OLLAMA"));
assert!(super::is_local_provider("vllm"));
assert!(super::is_local_provider("lmstudio"));
assert!(super::is_local_provider("lm-studio"));
assert!(super::is_local_provider("lemonade"));
assert!(super::is_local_provider("llamacpp"));
assert!(super::is_local_provider("llama.cpp"));
assert!(super::is_local_provider("local"));
assert!(!super::is_local_provider("anthropic"));
assert!(!super::is_local_provider("openai"));
assert!(!super::is_local_provider(""));
}

#[test]
fn test_get_summary() {
let engine = setup();
Expand Down
2 changes: 2 additions & 0 deletions crates/openfang-runtime/src/drivers/openai.rs
Original file line number Diff line number Diff line change
Expand Up @@ -253,6 +253,8 @@ struct OaiResponseMessage {
tool_calls: Option<Vec<OaiToolCall>>,
/// Reasoning/thinking content returned by some models (DeepSeek-R1, Qwen3, etc.)
/// via LM Studio, Ollama, and other local inference servers.
/// Ollama uses "reasoning" for Gemma 4; others use "reasoning_content".
#[serde(alias = "reasoning")]
reasoning_content: Option<String>,
}

Expand Down
Loading