{
  "schema_version": "1.0.0",
  "bot_id": "1.12",
  "bot_name": "ModelDriftMonitor",
  "slug": "modeldriftmonitor",
  "layer": "Risk",
  "layer_key": "risk",
  "bot_class": "Guardrail",
  "authority": [
    "Veto"
  ],
  "status": "planned",
  "readiness": "Planned",
  "flagship": true,
  "is_reference": false,
  "public_export": false,
  "identity": {
    "layer": "Risk",
    "bot_class": "Guardrail",
    "authority": "Veto",
    "runs_before": "ExecutionPlan emit",
    "runs_after": "Strategy OrderIntent",
    "applies_to": "Every OrderIntent from a model-driven strategy \u2014 detects when live strategy behaviour has drifted from the backtest distribution",
    "default_mode": "planned",
    "user_visible": "summary-only",
    "developer_owner": "Polytraders core \u2014 Risk pod"
  },
  "purpose": "ModelDriftMonitor flags strategies whose live behaviour has decoupled from their backtest distribution. It computes a drift score by comparing the rolling distribution of live signals or fill prices against the expected backtest baseline and hard-rejects new orders when drift exceeds the configured ceiling, preventing a degraded model from continuing to place orders.",
  "why_it_matters": [
    {
      "failure": "Model degradation undetected",
      "consequence": "A strategy whose signal quality has degraded continues placing orders, accumulating losses without any automated circuit-breaker."
    },
    {
      "failure": "Market regime change",
      "consequence": "A model trained on one market regime may produce pathological signals in a new regime; model drift detection provides an early-warning gate before significant capital is deployed."
    }
  ],
  "polymarket_inputs": [
    {
      "input": "Recent fill prices and strategy signal values",
      "source": "clob_auth",
      "required": true,
      "use": "Compute rolling live distribution for comparison against backtest baseline."
    },
    {
      "input": "Market metadata (category, volume, resolution type)",
      "source": "gamma",
      "required": false,
      "use": "Provide context for regime classification to distinguish genuine drift from expected regime variation."
    }
  ],
  "internal_inputs": [
    {
      "input": "Strategy backtest distribution summary (mean, std, percentiles)",
      "source": "internal",
      "required": true,
      "use": "Define the expected baseline distribution against which live behaviour is compared."
    },
    {
      "input": "KillSwitch active flag",
      "source": "KillSwitch",
      "required": true,
      "use": "If active, reject immediately."
    }
  ],
  "raw_params": [
    "max_drift_score \u00b7 float",
    "drift_lookback_n \u00b7 int",
    "drift_metric \u00b7 enum"
  ],
  "parameters": [
    {
      "name": "max_drift_score",
      "default": 0.25,
      "warning": 0.15,
      "hard": 0.25,
      "controls": "Maximum allowed drift score (KS statistic or similar) before new orders are blocked.",
      "why_default_matters": "A KS statistic of 0.25 indicates the live distribution has shifted significantly from the backtest; above this threshold the model is likely operating outside its trained regime.",
      "threshold_logic": [
        {
          "condition": "drift_score <= 0.15",
          "action": "APPROVE"
        },
        {
          "condition": "0.15 < drift_score <= 0.25",
          "action": "WARN \u2014 MODEL_DRIFT_WARN"
        },
        {
          "condition": "drift_score > 0.25",
          "action": "HARD_REJECT \u2014 MODEL_DRIFT_EXCEEDED"
        }
      ],
      "dev_check": "if (driftScore > params.max_drift_score) return reject('MODEL_DRIFT_EXCEEDED');",
      "user_facing": "This strategy's live behaviour has diverged from its design parameters. New orders are paused."
    },
    {
      "name": "drift_lookback_n",
      "default": 50,
      "warning": null,
      "hard": null,
      "controls": "Number of recent observations used to compute the live distribution.",
      "why_default_matters": "50 observations provides a stable sample while remaining responsive to recent regime shifts.",
      "threshold_logic": [
        {
          "condition": "observations < drift_lookback_n",
          "action": "Skip drift check \u2014 insufficient data"
        },
        {
          "condition": "observations >= drift_lookback_n",
          "action": "Compute drift score"
        }
      ],
      "dev_check": "if (liveObs.length < params.drift_lookback_n) return approve('MODEL_DRIFT_SKIPPED');",
      "user_facing": ""
    },
    {
      "name": "drift_metric",
      "default": "ks_statistic",
      "warning": null,
      "hard": null,
      "controls": "Statistical metric used to measure distribution drift. Supported: ks_statistic (Kolmogorov\u2013Smirnov), psi (Population Stability Index).",
      "why_default_matters": "KS statistic is distribution-free and computationally cheap, making it suitable for real-time evaluation.",
      "threshold_logic": [
        {
          "condition": "metric=ks_statistic",
          "action": "Compare CDF of live vs backtest"
        },
        {
          "condition": "metric=psi",
          "action": "Compute PSI bins"
        }
      ],
      "dev_check": "const driftScore = computeDrift(liveObs, backtestBaseline, params.drift_metric);",
      "user_facing": ""
    }
  ],
  "default_config": {
    "bot_id": "risk.model_drift_monitor",
    "version": "0.1.0",
    "mode": "hard_guard",
    "defaults": {
      "max_drift_score": 0.25,
      "drift_lookback_n": 50,
      "drift_metric": "ks_statistic"
    },
    "locked": {
      "max_drift_score": {
        "max": 0.5
      }
    }
  },
  "implementation_flow": [
    "Receive OrderIntent from a model-driven strategy with strategy_id.",
    "Check KillSwitch; if active, HARD_REJECT(KILL_SWITCH_ACTIVE).",
    "Load backtest baseline distribution for strategy_id from internal store.",
    "If baseline unavailable, HARD_REJECT(MODEL_DRIFT_DATA_UNAVAILABLE).",
    "Fetch recent fill prices and signal values for strategy_id.",
    "If fewer than drift_lookback_n observations, APPROVE (skip \u2014 insufficient data).",
    "Compute drift_score using configured drift_metric (KS or PSI).",
    "If drift_score > max_drift_score, HARD_REJECT(MODEL_DRIFT_EXCEEDED).",
    "If drift_score > warning threshold, attach WARN annotation; APPROVE.",
    "APPROVE with drift_score attached to the RiskVote."
  ],
  "decision_logic": {
    "approve": "Drift score is within the warning threshold, or fewer than drift_lookback_n observations are available.",
    "reshape_required": "Not used; model drift is a strategy-level condition that cannot be addressed by resizing a single order.",
    "reject": "Drift score exceeds the hard ceiling, or backtest baseline is unavailable (fail-closed)."
  },
  "decision_output_schema": "RiskVote",
  "decision_output_example": {
    "guard_id": "risk.model_drift_monitor",
    "decision": "HARD_REJECT",
    "severity": "HARD",
    "reason_code": "MODEL_DRIFT_EXCEEDED",
    "message": "Strategy strat_002 drift score 0.32 exceeds ceiling 0.25. New orders blocked.",
    "constraints": {},
    "inputs_used": [
      "internal.backtest_baseline",
      "clob_auth.fills"
    ],
    "checked_at": "2026-05-10T11:00:00Z"
  },
  "developer_log": {
    "bot_id": "risk.model_drift_monitor",
    "decision": "HARD_REJECT",
    "reason_code": "MODEL_DRIFT_EXCEEDED",
    "inputs_used": [
      "internal.backtest_baseline",
      "clob_auth.fills"
    ],
    "metrics": {
      "strategy_id": "strat_002",
      "drift_score": 0.32,
      "drift_metric": "ks_statistic",
      "lookback_n": 50,
      "ceiling": 0.25
    },
    "checked_at": "2026-05-10T11:00:00Z"
  },
  "user_explanations": [
    {
      "situation": "Order blocked \u2014 model drift",
      "message": "This strategy's behaviour has diverged from its design baseline. New orders are paused while the strategy is reviewed."
    },
    {
      "situation": "Warning \u2014 drift approaching limit",
      "message": "This strategy is showing signs of behavioural drift. Consider reviewing recent fills before increasing position size."
    }
  ],
  "failure_modes": {
    "main_failure_mode": "Failing to detect drift because the backtest baseline is outdated and the live distribution matches a new, equally invalid pattern.",
    "false_positive_risk": "Blocking a legitimately adapting strategy because the baseline has not been updated to reflect an intentional strategy improvement.",
    "false_negative_risk": "Approving orders during early-stage drift if the lookback window is too long to detect a fast regime change.",
    "safe_fallback": "If backtest baseline data is unavailable, HARD_REJECT with MODEL_DRIFT_DATA_UNAVAILABLE. Never approve when the baseline cannot be loaded.",
    "required_dependencies": [
      "Strategy backtest baseline store",
      "CLOB fill history",
      "KillSwitch active flag"
    ]
  },
  "acceptance_tests": {
    "unit": [
      {
        "test": "Approve when drift score within warning threshold",
        "setup": "drift_score=0.10, ceiling=0.25",
        "expected": "APPROVE"
      },
      {
        "test": "Warn when drift between warning and hard",
        "setup": "drift_score=0.20, warning=0.15, hard=0.25",
        "expected": "APPROVE with WARN annotation"
      },
      {
        "test": "Reject when drift exceeds ceiling",
        "setup": "drift_score=0.32, ceiling=0.25",
        "expected": "HARD_REJECT(MODEL_DRIFT_EXCEEDED)"
      },
      {
        "test": "Skip when insufficient observations",
        "setup": "observations=30, lookback_n=50",
        "expected": "APPROVE (check skipped)"
      }
    ],
    "integration": [
      {
        "test": "Drift detected after regime change in live fills",
        "expected": "HARD_REJECT(MODEL_DRIFT_EXCEEDED) within one evaluation cycle of drift_score exceeding ceiling"
      },
      {
        "test": "KillSwitch bypasses drift check",
        "expected": "HARD_REJECT(KILL_SWITCH_ACTIVE) without computing drift score"
      }
    ],
    "property": [
      {
        "property": "Drift score above hard ceiling never results in APPROVE",
        "required": "Always true"
      },
      {
        "property": "Missing baseline always results in HARD_REJECT",
        "required": "Always true \u2014 fail-closed on missing baseline"
      }
    ]
  },
  "checklist_overrides": {},
  "legacy_goal": "Flag strategies whose live behaviour has decoupled from their backtest distribution.",
  "legacy_pm_signals": [
    "Live fill-rate, slippage, and edge realised vs. backtest priors",
    "Feature-distribution drift on each strategy's input vector",
    "Rolling KL / population-stability index per feature",
    "Time-since-last-shadow-rerun"
  ],
  "legacy_external_feeds": [],
  "reporting_groups": [
    "risk_compliance"
  ],
  "reason_codes": [
    {
      "code": "KILL_SWITCH_ACTIVE",
      "severity": "HARD_REJECT",
      "meaning": "Global kill switch active.",
      "action": "Immediate HARD_REJECT.",
      "user_message": "Trading is paused. Please try again later."
    },
    {
      "code": "MODEL_DRIFT_EXCEEDED",
      "severity": "HARD_REJECT",
      "meaning": "Strategy drift score exceeds the hard ceiling.",
      "action": "HARD_REJECT; log drift_score, strategy_id, and metric.",
      "user_message": "This strategy's behaviour has diverged from its design parameters."
    },
    {
      "code": "MODEL_DRIFT_WARN",
      "severity": "WARN",
      "meaning": "Drift score is between warning and hard thresholds.",
      "action": "Attach WARN annotation; APPROVE.",
      "user_message": ""
    },
    {
      "code": "MODEL_DRIFT_DATA_UNAVAILABLE",
      "severity": "HARD_REJECT",
      "meaning": "Backtest baseline unavailable for this strategy.",
      "action": "HARD_REJECT (fail-closed).",
      "user_message": "Strategy baseline data is unavailable. Please try again."
    }
  ],
  "metrics": {
    "emitted": [
      {
        "name": "polytraders_risk_modeldriftmonitor_decisions_total",
        "type": "counter",
        "unit": "count",
        "labels": [
          "decision",
          "reason_code",
          "strategy_id"
        ],
        "meaning": "Total decisions by type, reason, and strategy."
      },
      {
        "name": "polytraders_risk_modeldriftmonitor_drift_score",
        "type": "gauge",
        "unit": "ratio",
        "labels": [
          "strategy_id",
          "metric"
        ],
        "meaning": "Current drift score per strategy at last evaluation."
      },
      {
        "name": "polytraders_risk_modeldriftmonitor_eval_latency_ms",
        "type": "histogram",
        "unit": "milliseconds",
        "labels": [],
        "meaning": "Latency from intent to RiskVote emit."
      }
    ],
    "alerts": [
      {
        "name": "ModelDriftMonitorDriftDetected",
        "condition": "rate(polytraders_risk_modeldriftmonitor_decisions_total{reason_code='MODEL_DRIFT_EXCEEDED'}[5m]) > 0",
        "severity": "P2",
        "runbook": "#runbook-modeldrift-detected"
      },
      {
        "name": "ModelDriftMonitorDataUnavailable",
        "condition": "rate(polytraders_risk_modeldriftmonitor_decisions_total{reason_code='MODEL_DRIFT_DATA_UNAVAILABLE'}[5m]) > 0",
        "severity": "P1",
        "runbook": "#runbook-modeldrift-data"
      }
    ]
  },
  "state": {
    "store": "in-memory + redis",
    "shape": "Rolling observation buffer (drift_lookback_n entries) per strategy_id; backtest baseline cached from Redis.",
    "ttl": "Baseline cache TTL: 300s. Rolling buffer: evicts oldest on new entry.",
    "recovery": "Baseline reloaded from Redis on cold start. If unavailable, HARD_REJECT until restored.",
    "size_estimate": "~4 KB per strategy for 50-observation buffer"
  },
  "concurrency": {
    "execution_model": "single-threaded event loop",
    "max_in_flight": 100,
    "idempotency_key": "intent_id",
    "timeout_ms": 150,
    "backpressure": "drop newest",
    "locking": "per-strategy_id mutex during drift score computation"
  },
  "dependencies": {
    "depends_on": [
      {
        "bot_id": "risk.kill_switch",
        "why": "Global brake checked first.",
        "contract": "HARD_REJECT(KILL_SWITCH_ACTIVE) short-circuits all evaluation."
      }
    ],
    "emits_to": [
      {
        "bot_id": "exec.smart_router",
        "why": "Approved RiskVote passes to SmartRouter.",
        "contract": "APPROVE passes; HARD_REJECT discards intent."
      }
    ],
    "sibling": [],
    "external": [
      {
        "service": "Data API (fill history)",
        "endpoint": "https://data-api.polymarket.com",
        "sla": "99.9% / 500ms p99",
        "failure_mode": "HARD_REJECT(MODEL_DRIFT_DATA_UNAVAILABLE) if fill history unavailable."
      }
    ]
  },
  "security_surfaces": {
    "signs_orders": false,
    "private_key_access": "none",
    "abuse_vectors": [
      "Injecting synthetic fill data to deflate the drift score",
      "Bypassing drift check by submitting from a strategy_id with no backtest baseline"
    ],
    "mitigations": [
      "Fill data sourced exclusively from CLOB authenticated endpoint with provenance timestamp",
      "Missing baseline triggers HARD_REJECT (fail-closed); unknown strategy_id is treated as missing baseline"
    ]
  },
  "polymarket_v2_compat": {
    "clob_version": "v2",
    "collateral": "pUSD",
    "eip712_domain_version": "2",
    "builder_code_aware": false,
    "negrisk_aware": false,
    "multichain_ready": false,
    "sdk_used": "py-clob-client-v2",
    "settlement_contract": "CTFExchangeV2",
    "notes": "Fill prices and signal values are denominated in pUSD. Uses CLOB V2 authenticated fill history endpoint."
  },
  "version": {
    "spec": "2.0.0",
    "implementation": "0.1.0",
    "schema": "2",
    "released": null,
    "planned_release": "Q4-2026"
  },
  "migration_history": [
    {
      "date": "2026-04-28",
      "from": "n/a",
      "to": "v2-spec",
      "reason": "Spec drafted post-CLOB-V2 cutover; bot not yet implemented",
      "action_taken": "Designed against V2 schema (pUSD, builder codes, V2 EIP-712 domain)"
    }
  ],
  "reference_implementation": {
    "pseudocode": "FUNCTION evaluateModelDrift(intent):\n  ks = FETCH internal.killswitch.status\n  IF ks.active: EMIT RiskVote(HARD_REJECT, KILL_SWITCH_ACTIVE); RETURN\n\n  baseline = FETCH internal.backtest_baseline(intent.strategy_id)\n  IF baseline IS NULL:\n    EMIT RiskVote(HARD_REJECT, MODEL_DRIFT_DATA_UNAVAILABLE); RETURN\n\n  liveObs = FETCH clob_auth.fill_prices(intent.strategy_id, n=params.drift_lookback_n)\n  IF len(liveObs) < params.drift_lookback_n:\n    EMIT RiskVote(APPROVE); RETURN  // insufficient data \u2014 skip\n\n  driftScore = compute_ks(liveObs, baseline.distribution)\n\n  IF driftScore > params.max_drift_score:\n    EMIT RiskVote(HARD_REJECT, MODEL_DRIFT_EXCEEDED,\n                  drift_score=driftScore); RETURN\n  IF driftScore > params.max_drift_score * 0.6:\n    annotations.append(WARN(MODEL_DRIFT_WARN, drift_score=driftScore))\n\n  EMIT RiskVote(APPROVE, drift_score=driftScore)",
    "sdk_calls": [
      "clob_auth.fill_prices(strategy_id, n)",
      "internal.backtest_baseline(strategy_id)",
      "internal.killswitch.status()"
    ],
    "complexity": "O(N log N) for KS statistic where N = drift_lookback_n (max 50)"
  },
  "wire_examples": {
    "input": [
      {
        "label": "OrderIntent \u2014 drift exceeded",
        "source": "internal",
        "payload": {
          "intent_id": "int_d4e5f6a7b8c90004",
          "strategy_id": "strat_002",
          "size_usd": 200,
          "generated_at_ms": 1746800000000
        }
      }
    ],
    "output": [
      {
        "label": "RiskVote \u2014 HARD_REJECT",
        "payload": {
          "guard_id": "risk.model_drift_monitor",
          "decision": "HARD_REJECT",
          "severity": "HARD",
          "reason_code": "MODEL_DRIFT_EXCEEDED",
          "message": "Drift score 0.32 exceeds ceiling 0.25 for strategy strat_002.",
          "constraints": {},
          "checked_at": "2026-05-10T11:00:00Z"
        }
      }
    ]
  },
  "failure_injection": [
    {
      "scenario": "BASELINE_UNAVAILABLE",
      "how_to_inject": "Delete backtest baseline from Redis for strategy_id",
      "expected_behaviour": "HARD_REJECT(MODEL_DRIFT_DATA_UNAVAILABLE)",
      "recovery": "Returns to normal within one baseline cache refresh after baseline is restored."
    },
    {
      "scenario": "DRIFT_SPIKE",
      "how_to_inject": "Inject fill prices with distribution KS distance 0.40 from baseline",
      "expected_behaviour": "HARD_REJECT(MODEL_DRIFT_EXCEEDED) with drift_score=0.40",
      "recovery": "Returns to APPROVE once live fills converge back toward the baseline distribution."
    },
    {
      "scenario": "INSUFFICIENT_OBSERVATIONS",
      "how_to_inject": "Clear fill history for strategy_id, set lookback_n=50",
      "expected_behaviour": "APPROVE (check skipped \u2014 insufficient observations)",
      "recovery": "Check activates after drift_lookback_n fills have been accumulated."
    }
  ],
  "runbook": {
    "summary": "ModelDriftMonitor incidents typically indicate a genuine strategy degradation or a regime change. Verify whether the drift is expected before unlocking the strategy.",
    "oncall_actions": [
      {
        "alert": "ModelDriftMonitorDriftDetected",
        "first_step": "Inspect drift_score gauge per strategy_id; compare live fills to the backtest baseline to determine if drift is genuine or a data artefact.",
        "escalation": "Risk pod lead; strategy may need baseline recalibration before re-enabling.",
        "diagnosis": "",
        "mitigation": ""
      },
      {
        "alert": "ModelDriftMonitorDataUnavailable",
        "first_step": "Check Redis baseline store and CLOB fill history endpoint.",
        "escalation": "Risk pod lead if sustained > 2 minutes.",
        "diagnosis": "",
        "mitigation": ""
      }
    ],
    "manual_overrides": [
      {
        "command": "polytraders risk update-baseline --strategy-id <id> --source recent_fills",
        "effect": "After a confirmed intentional strategy update that changes the expected fill distribution."
      }
    ],
    "healthcheck": "GET /internal/health/modeldriftmonitor \u2192 green: Baselines loaded for all active strategies, fill history accessible, p99 eval latency < 150ms; red: Any strategy baseline missing, fill history unavailable, or HARD_REJECT rate > 0.1"
  },
  "promotion_gates": {
    "to_shadow": [
      {
        "gate": "Unit tests pass for drift spike and skip scenarios",
        "how_measured": "CI test run",
        "threshold": "100% pass"
      }
    ],
    "to_limited_live": [
      {
        "gate": "Shadow drift scores align with expected baseline for active strategies over 48h",
        "how_measured": "Grafana drift_score gauge comparison",
        "threshold": "No spurious HARD_REJECTs in shadow run"
      }
    ],
    "to_general_live": [
      {
        "gate": "Baseline update workflow tested and documented",
        "how_measured": "Manual E2E test of update_baseline command",
        "threshold": "Pass"
      }
    ]
  },
  "reporting": {
    "emits_kinds": [
      "RiskVote"
    ],
    "topics": [
      "polytraders.reports.risk"
    ],
    "partition_by": "trace_id",
    "cadence": "every-event",
    "retention_class": "2y",
    "sampling_rule": "emit-every",
    "bus_failure_action": "fail-closed",
    "user_visible": "summary-only",
    "consumes_kinds": [
      "ObservationReport"
    ]
  },
  "capital_impact": "Direct",
  "mode_support": [
    "quarantine"
  ],
  "v3_status": {
    "phase": 4,
    "phase_name": "Core risk",
    "docs": {
      "done": 27,
      "total": 27,
      "state": "done"
    },
    "impl": {
      "done": 0,
      "total": 15,
      "state": "pending"
    },
    "runtime": {
      "done": 0,
      "total": 8,
      "state": "pending"
    },
    "overall": "pending"
  }
}