{
  "schema_version": "1.0.0",
  "bot_id": "2.10",
  "bot_name": "LatencyProfiler",
  "slug": "latencyprofiler",
  "layer": "Execution",
  "layer_key": "exec",
  "bot_class": "Execution Utility",
  "authority": [
    "Reshape"
  ],
  "status": "planned",
  "readiness": "Spec started",
  "flagship": false,
  "is_reference": false,
  "public_export": false,
  "identity": {
    "layer": "Execution",
    "bot_class": "Execution Utility",
    "authority": "Reshape",
    "runs_before": "Any exec bot that uses latency data for routing decisions",
    "runs_after": "Order submission and fill events from ws_user",
    "applies_to": "All CLOB V2 order submission and ws feed routes continuously",
    "default_mode": "shadow_only",
    "user_visible": "summary-only",
    "developer_owner": "Polytraders core \u2014 Execution pod"
  },
  "purpose": "LatencyProfiler continuously measures round-trip order submission latency by route and surfaces regressions. It probes each configured route at probe_interval_s and emits ObservationReports when p95 or p99 thresholds are breached.",
  "why_it_matters": [
    {
      "failure": "Latency regression undetected",
      "consequence": "Strategy signals age past their TTL in transit, causing stale-signal discards and missed opportunities without a clear root cause."
    },
    {
      "failure": "Route not profiled per endpoint",
      "consequence": "A degraded CLOB endpoint continues to receive orders because the routing layer lacks per-route latency data."
    },
    {
      "failure": "WebSocket lag not tracked",
      "consequence": "ws_user fill events arrive late, causing order lifecycle state to be updated with significant delay."
    }
  ],
  "polymarket_inputs": [
    {
      "input": "CLOB V2 order submission endpoint (probe orders)",
      "source": "clob_auth",
      "required": true,
      "use": "Measure submit-to-ack latency per endpoint."
    },
    {
      "input": "WebSocket user feed heartbeat",
      "source": "ws_user",
      "required": true,
      "use": "Measure ws feed lag by comparing heartbeat timestamp to local clock."
    }
  ],
  "internal_inputs": [
    {
      "input": "Probe trigger from scheduler",
      "source": "internal scheduler",
      "required": true,
      "use": "Trigger a latency probe on each configured route every probe_interval_s."
    }
  ],
  "raw_params": [
    "warn_p95_ms \u00b7 int",
    "fail_p99_ms \u00b7 int",
    "routes_to_probe \u00b7 list",
    "probe_interval_s \u00b7 int"
  ],
  "parameters": [
    {
      "name": "warn_p95_ms",
      "default": 150,
      "warning": 200,
      "hard": 500,
      "controls": "p95 round-trip latency in milliseconds above which a WARN ObservationReport is emitted.",
      "why_default_matters": "150ms p95 is the target for acceptable order routing latency; above 200ms strategies begin experiencing signal-age issues.",
      "threshold_logic": [
        {
          "condition": "p95_ms <= 150",
          "action": "No alert"
        },
        {
          "condition": "150 < p95_ms <= 200",
          "action": "WARN \u2014 LATENCY_WARN emitted"
        },
        {
          "condition": "p95_ms > 500 (hard)",
          "action": "HARD_REJECT \u2014 LATENCY_HARD_BREACH; alert fired"
        }
      ],
      "dev_check": "if p95 > params.warn_p95_ms: emit(LATENCY_WARN)",
      "user_facing": "Exchange connection speed is being monitored."
    },
    {
      "name": "fail_p99_ms",
      "default": 500,
      "warning": 750,
      "hard": 1000,
      "controls": "p99 round-trip latency in milliseconds above which HARD_REJECT is raised and the route is flagged as degraded.",
      "why_default_matters": "500ms p99 is the threshold at which GTD signal TTLs begin expiring in transit; above this, order submission must be suspended on the degraded route.",
      "threshold_logic": [
        {
          "condition": "p99_ms <= 500",
          "action": "Healthy"
        },
        {
          "condition": "500 < p99_ms <= 750",
          "action": "WARN \u2014 LATENCY_P99_ELEVATED"
        },
        {
          "condition": "p99_ms > 1000 (hard)",
          "action": "HARD_REJECT \u2014 flag route degraded; notify exec bots"
        }
      ],
      "dev_check": "if p99 > params.fail_p99_ms: flagRoute(route, 'degraded')",
      "user_facing": ""
    },
    {
      "name": "probe_interval_s",
      "default": 30,
      "warning": 60,
      "hard": 120,
      "controls": "How often to send a probe request to each configured route to measure latency.",
      "why_default_matters": "30s provides frequent enough sampling to detect latency regressions within one minute while consuming minimal rate-limit budget.",
      "threshold_logic": [
        {
          "condition": "interval <= 30s",
          "action": "Normal probe cadence"
        },
        {
          "condition": "interval > 60s",
          "action": "WARN \u2014 latency regressions may go undetected for > 1 minute"
        },
        {
          "condition": "interval > 120s (hard)",
          "action": "Reject config"
        }
      ],
      "dev_check": "assert params.probe_interval_s <= params.hard",
      "user_facing": ""
    },
    {
      "name": "routes_to_probe",
      "default": [
        "clob_auth",
        "ws_user"
      ],
      "warning": "\u2014",
      "hard": "\u2014",
      "controls": "List of route identifiers to probe. Each entry corresponds to a configured CLOB V2 endpoint or WebSocket feed.",
      "why_default_matters": "Probing both REST auth and WebSocket feeds captures the two most latency-sensitive paths for order execution.",
      "threshold_logic": [
        {
          "condition": "includes both clob_auth and ws_user",
          "action": "Full coverage"
        },
        {
          "condition": "missing ws_user",
          "action": "WARN \u2014 WebSocket lag not monitored"
        }
      ],
      "dev_check": "if 'ws_user' not in params.routes_to_probe: emit(WARN)",
      "user_facing": ""
    }
  ],
  "default_config": {
    "bot_id": "exec.latencyprofiler",
    "version": "0.1.0",
    "mode": "shadow_only",
    "defaults": {
      "warn_p95_ms": 150,
      "fail_p99_ms": 500,
      "probe_interval_s": 30,
      "routes_to_probe": [
        "clob_auth",
        "ws_user"
      ]
    },
    "locked": {
      "warn_p95_ms": {
        "max": 500
      },
      "fail_p99_ms": {
        "max": 1000
      },
      "probe_interval_s": {
        "max": 120
      }
    }
  },
  "implementation_flow": [
    "Every probe_interval_s, for each route in routes_to_probe: send a probe request and record send_ms.",
    "For clob_auth: issue a lightweight GET /time or authenticated OPTIONS; record ack_ms.",
    "For ws_user: compare heartbeat ts_ms to local now_ms; record feed_lag_ms.",
    "Maintain a rolling window of the last 100 probe round-trip times per route.",
    "Compute p50, p95, p99 from the rolling window.",
    "If p95 > warn_p95_ms: emit ObservationReport(LATENCY_WARN) for the route.",
    "If p99 > fail_p99_ms: emit ObservationReport(LATENCY_HARD_BREACH); flag route as degraded in internal state store.",
    "Publish per-route latency histogram metrics every probe cycle."
  ],
  "decision_logic": {
    "approve": "p95 and p99 within thresholds; route healthy; no ObservationReport emitted.",
    "reshape_required": "Not applicable \u2014 LatencyProfiler is observation-only; it does not reshape orders.",
    "reject": "p99 exceeds fail_p99_ms; route flagged degraded; LATENCY_HARD_BREACH emitted.",
    "warning_only": "p95 exceeds warn_p95_ms but p99 within threshold; LATENCY_WARN emitted."
  },
  "decision_output_schema": "ObservationReport",
  "decision_output_example": {
    "report_id": "rep_5e6f7a8b9c0d1e2f",
    "trace_id": "trc_4d5e6f7a8b9c0d1e",
    "bot_id": "exec.latencyprofiler",
    "route": "clob_auth",
    "p50_ms": 45,
    "p95_ms": 160,
    "p99_ms": 280,
    "verdict": "LATENCY_WARN",
    "window_size": 100,
    "measured_at_ms": 1746770300000
  },
  "developer_log": {
    "route": "clob_auth",
    "p50_ms": 45,
    "p95_ms": 160,
    "p99_ms": 280,
    "warn_p95_ms": 150,
    "fail_p99_ms": 500,
    "samples": 100,
    "route_degraded": false
  },
  "user_explanations": [
    {
      "situation": "Latency warning on submission route",
      "message": "The connection to the exchange is slightly slower than normal. Orders may take a moment longer to be processed."
    },
    {
      "situation": "Route flagged degraded",
      "message": "The exchange connection speed has degraded significantly. Order submission may be suspended until conditions improve."
    }
  ],
  "failure_modes": {
    "main_failure_mode": "Probe requests consume rate-limit budget on a congested connection, making actual order submission slower.",
    "false_positive_risk": "A single slow probe response inflates p99, triggering LATENCY_HARD_BREACH when the route is actually healthy.",
    "false_negative_risk": "Rolling window too large (100 samples over 30s intervals) means a sudden latency spike takes up to 50 minutes to fully propagate through the p99 estimate.",
    "safe_fallback": "If probe itself times out, record as max latency (1000ms) in the rolling window; emit LATENCY_HARD_BREACH after 3 consecutive timeouts.",
    "required_dependencies": [
      "clob_auth endpoint",
      "ws_user heartbeat",
      "internal scheduler for probe triggers"
    ]
  },
  "acceptance_tests": {
    "unit": [
      {
        "test": "p95 computation from rolling window",
        "setup": "Inject 100 samples with 95th sample = 180ms",
        "expected": "p95_ms=180 > warn_p95_ms=150; LATENCY_WARN emitted"
      },
      {
        "test": "Route flagged degraded when p99 > fail_p99_ms",
        "setup": "p99=600ms, fail_p99_ms=500",
        "expected": "route_degraded=true; LATENCY_HARD_BREACH emitted"
      },
      {
        "test": "No alert when both p95 and p99 within thresholds",
        "setup": "p95=100ms, p99=200ms",
        "expected": "No ObservationReport emitted"
      }
    ],
    "integration": [
      {
        "test": "Probe cycle: send probe \u2192 receive ack \u2192 compute latency \u2192 update rolling window",
        "expected": "Rolling window updated; metrics emitted; alert fired only if threshold breached"
      },
      {
        "test": "ws_user lag detection via heartbeat comparison",
        "expected": "feed_lag_ms computed; LATENCY_WARN if lag > warn_p95_ms"
      }
    ],
    "property": [
      {
        "property": "Rolling window always contains <= 100 samples per route",
        "required": "Always true \u2014 oldest sample evicted on overflow"
      },
      {
        "property": "p99 >= p95 >= p50 always holds",
        "required": "Always true"
      }
    ]
  },
  "checklist_overrides": {},
  "legacy_goal": "Continuously measure round-trip latency by route and surface regressions.",
  "legacy_pm_signals": [
    "Submit-to-ack latency per endpoint and order type",
    "WebSocket-feed lag vs. REST snapshot",
    "p50 / p95 / p99 by region and time of day"
  ],
  "legacy_external_feeds": [],
  "reporting_groups": [
    "execution"
  ],
  "network": [
    "polygon"
  ],
  "api_surface": [
    "clob_auth",
    "ws_user",
    "internal"
  ],
  "version": {
    "spec": "2.0.0",
    "implementation": "0.1.0",
    "schema": "2",
    "released": null,
    "planned_release": "Q4-2026"
  },
  "migration_history": [
    {
      "date": "2026-04-28",
      "from": "n/a",
      "to": "v2-spec",
      "reason": "Spec drafted post-CLOB-V2 cutover; bot not yet implemented",
      "action_taken": "Designed against V2 schema (pUSD, builder codes, V2 EIP-712 domain)"
    }
  ],
  "polymarket_v2_compat": {
    "clob_version": "v2",
    "collateral": "pUSD",
    "eip712_domain_version": "2",
    "builder_code_aware": false,
    "negrisk_aware": false,
    "multichain_ready": false,
    "sdk_used": "py-clob-client-v2",
    "settlement_contract": "CTFExchangeV2",
    "notes": "LatencyProfiler probes CLOB V2 auth endpoint latency only; it does not sign or submit real orders. All measurements are in milliseconds from the local system clock."
  },
  "reference_implementation": {
    "pseudocode": "FUNCTION probeRoute(route):\n  sendMs = now_ms()\n  IF route == 'clob_auth':\n    result = clob_auth.GET('/time')  // lightweight probe\n    ackMs = now_ms()\n    rtt = ackMs - sendMs\n    IF result IS NULL OR result.error:\n      rtt = 1000  // count as max latency\n  ELIF route == 'ws_user':\n    hb = ws_user.lastHeartbeat()\n    rtt = now_ms() - hb.ts_ms\n\n  // Update rolling window\n  windows[route].append(rtt)\n  IF len(windows[route]) > 100:\n    windows[route].pop(0)\n\n  // Compute percentiles\n  sorted_w = sorted(windows[route])\n  p50 = sorted_w[int(0.50 * len(sorted_w))]\n  p95 = sorted_w[int(0.95 * len(sorted_w))]\n  p99 = sorted_w[int(0.99 * len(sorted_w))]\n\n  // Threshold checks\n  IF p99 > params.fail_p99_ms:\n    routeState[route] = 'degraded'\n    EMIT ObservationReport(route, p50, p95, p99, LATENCY_HARD_BREACH)\n  ELIF p95 > params.warn_p95_ms:\n    EMIT ObservationReport(route, p50, p95, p99, LATENCY_WARN)\n\nSCHEDULE probeRoute FOR EACH route IN params.routes_to_probe\n         EVERY params.probe_interval_s",
    "sdk_calls": [
      "clob_auth.GET('/time')",
      "ws_user.lastHeartbeat()"
    ],
    "complexity": "O(W log W) where W = rolling window size (100)"
  },
  "wire_examples": {
    "input": [
      {
        "label": "Probe trigger (internal scheduler)",
        "source": "internal",
        "payload": {
          "route": "clob_auth",
          "trigger_ts_ms": 1746770300000
        }
      }
    ],
    "output": [
      {
        "label": "ObservationReport \u2014 LATENCY_WARN",
        "payload": {
          "report_id": "rep_5e6f7a8b9c0d1e2f",
          "bot_id": "exec.latencyprofiler",
          "route": "clob_auth",
          "p50_ms": 45,
          "p95_ms": 160,
          "p99_ms": 280,
          "verdict": "LATENCY_WARN",
          "measured_at_ms": 1746770300000
        }
      }
    ]
  },
  "reason_codes": [
    {
      "code": "LATENCY_OK",
      "severity": "INFO",
      "meaning": "All probed routes within p95 and p99 thresholds.",
      "action": "No alert; emit metrics only.",
      "user_message": ""
    },
    {
      "code": "LATENCY_WARN",
      "severity": "WARN",
      "meaning": "p95 latency exceeded warn_p95_ms on a probed route.",
      "action": "Emit ObservationReport with WARN; do not block orders.",
      "user_message": "Exchange connection is slightly slower than normal."
    },
    {
      "code": "LATENCY_HARD_BREACH",
      "severity": "HARD_REJECT",
      "meaning": "p99 latency exceeded fail_p99_ms; route flagged as degraded.",
      "action": "Flag route degraded; notify exec bots; alert ops.",
      "user_message": "Exchange connection has degraded. Order submission may be affected."
    },
    {
      "code": "PROBE_TIMEOUT",
      "severity": "WARN",
      "meaning": "Probe request timed out; recorded as max latency (1000ms) in rolling window.",
      "action": "Record max latency; check for 3 consecutive timeouts before HARD_REJECT.",
      "user_message": ""
    }
  ],
  "metrics": {
    "emitted": [
      {
        "name": "polytraders_exec_latencyprofiler_rtt_ms",
        "type": "histogram",
        "unit": "ms",
        "labels": [
          "route"
        ],
        "meaning": "Round-trip latency histogram per probed route."
      },
      {
        "name": "polytraders_exec_latencyprofiler_degraded_routes",
        "type": "gauge",
        "unit": "count",
        "labels": [],
        "meaning": "Number of routes currently flagged as degraded."
      },
      {
        "name": "polytraders_exec_latencyprofiler_probe_errors_total",
        "type": "counter",
        "unit": "count",
        "labels": [
          "route"
        ],
        "meaning": "Total probe timeouts or errors per route."
      }
    ],
    "alerts": [
      {
        "name": "LatencyProfilerRoutesDegraded",
        "condition": "polytraders_exec_latencyprofiler_degraded_routes > 0",
        "severity": "P1",
        "runbook": "#runbook-latencyprofiler-degraded"
      },
      {
        "name": "LatencyProfilerHighP99",
        "condition": "histogram_quantile(0.99, rate(polytraders_exec_latencyprofiler_rtt_ms_bucket[5m])) > 500",
        "severity": "P2",
        "runbook": "#runbook-latencyprofiler-p99"
      }
    ]
  },
  "state": {
    "store": "in-memory rolling window per route",
    "shape": "Dict keyed by route_id; value = list of last 100 RTT samples (ms)",
    "ttl": "Window entries expire after 1h without new probes",
    "recovery": "Window cleared on restart; first probe cycle rebuilds estimates from scratch.",
    "size_estimate": "~800 bytes per route (100 \u00d7 8-byte floats)"
  },
  "concurrency": {
    "execution_model": "scheduled coroutine per route",
    "max_in_flight": 10,
    "idempotency_key": "route + probe_trigger_ts_ms",
    "timeout_ms": 1000,
    "backpressure": "Drop probe if previous probe for same route still in flight",
    "locking": "per-route mutex for rolling window writes"
  },
  "dependencies": {
    "depends_on": [
      {
        "bot_id": "internal.scheduler",
        "why": "Provides probe triggers every probe_interval_s.",
        "contract": "Probe fires within \u00b15s of scheduled interval."
      }
    ],
    "emits_to": [
      {
        "bot_id": "exec.orderlifecyclemanager",
        "why": "Degraded route flags inform lifecycle manager to escalate stuck-order thresholds.",
        "contract": "ObservationReport with route_degraded=true consumed by exec bots."
      }
    ],
    "sibling": [],
    "external": [
      {
        "service": "CLOB V2 auth API",
        "endpoint": "https://clob.polymarket.com",
        "sla": "99.95% / 200ms p99",
        "failure_mode": "Probe timeout counted as 1000ms in rolling window."
      },
      {
        "service": "WS user feed",
        "endpoint": "wss://ws-subscriptions-clob.polymarket.com/ws/user",
        "sla": "best-effort",
        "failure_mode": "If heartbeat absent > 5s, feed_lag recorded as 5000ms."
      }
    ]
  },
  "security_surfaces": {
    "signs_orders": false,
    "private_key_access": "none",
    "abuse_vectors": [
      "Flooding probe scheduler to exhaust rate-limit budget with unnecessary latency checks",
      "Injecting fake degraded-route state to suppress order submission on healthy routes"
    ],
    "mitigations": [
      "Probe rate capped at 1/probe_interval_s per route; scheduler enforces minimum interval",
      "Route degraded state writable only by LatencyProfiler process; read by other exec bots via internal read-only API"
    ]
  },
  "failure_injection": [
    {
      "scenario": "CLOB_AUTH_HIGH_LATENCY",
      "how_to_inject": "Add 600ms artificial delay to clob_auth GET /time responses",
      "expected_behaviour": "p99 > fail_p99_ms after enough samples; route flagged degraded; LATENCY_HARD_BREACH emitted",
      "recovery": "Delay removed; next probe cycle shows improved p99; route unflagged after 3 healthy probes"
    },
    {
      "scenario": "WS_USER_HEARTBEAT_STALE",
      "how_to_inject": "Stop ws_user heartbeat for 10s",
      "expected_behaviour": "feed_lag = 10000ms; LATENCY_HARD_BREACH emitted for ws_user route",
      "recovery": "Heartbeat resumes; lag drops; route unflagged"
    },
    {
      "scenario": "PROBE_RATE_LIMIT_EXHAUSTION",
      "how_to_inject": "Reduce probe_interval_s to 1s and increase routes_to_probe to 10 entries",
      "expected_behaviour": "Rate limit budget exhausted; WARN emitted; probe interval auto-clamped to minimum",
      "recovery": "Config corrected; probes resume at safe interval"
    }
  ],
  "runbook": {
    "summary": "LatencyProfiler incidents are always route degradations. Check CLOB status page and ws_user heartbeat freshness first.",
    "oncall_actions": [
      {
        "alert": "LatencyProfilerRoutesDegraded",
        "first_step": "Check Polymarket status page; check CLOB auth endpoint health. If degraded, pause order submission until route recovers.",
        "diagnosis": "",
        "mitigation": "",
        "escalation": "Infra on-call if CLOB unreachable > 2 min"
      },
      {
        "alert": "LatencyProfilerHighP99",
        "first_step": "Check p99 histogram by route; identify which route is degraded. Cross-reference with ExchangeStatusMonitor.",
        "diagnosis": "",
        "mitigation": "",
        "escalation": "Exec pod lead if p99 > 750ms sustained"
      }
    ],
    "manual_overrides": [
      {
        "name": "unflag_route",
        "how": "polytraders bot unflag-route exec.latencyprofiler --route clob_auth",
        "when": "Route was incorrectly flagged degraded due to a probe anomaly; confirm route is healthy first.",
        "command": "polytraders bot unflag-route exec.latencyprofiler --route clob_auth",
        "effect": "Route was incorrectly flagged degraded due to a probe anomaly; confirm route is healthy first."
      }
    ],
    "healthcheck": "GET /internal/health/latencyprofiler -> 200 if All probed routes healthy, degraded_routes=0, p99 < fail_p99_ms on all routes. Red: degraded_routes > 0, probe_errors_total spiking, scheduler not firing."
  },
  "promotion_gates": {
    "to_shadow": [
      {
        "gate": "p95/p99 computation unit tests pass with known input windows",
        "how_measured": "CI test run",
        "threshold": "100% pass"
      }
    ],
    "to_limited_live": [
      {
        "gate": "No false-positive route-degraded flags over 48h shadow run",
        "how_measured": "degraded_routes gauge cross-referenced with CLOB status page",
        "threshold": "Zero false positives"
      }
    ],
    "to_general_live": [
      {
        "gate": "Latency breach detected within 2 probe cycles of actual CLOB degradation over 7-day limited-live",
        "how_measured": "Correlation of LATENCY_HARD_BREACH events with CLOB incident log",
        "threshold": "Detection within 2 \u00d7 probe_interval_s"
      }
    ]
  },
  "reporting": {
    "emits_kinds": [
      "ObservationReport"
    ],
    "topics": [
      "polytraders.reports.observation"
    ],
    "partition_key": "trace_id",
    "cadence": "every-event",
    "retention_class": "30d",
    "sampling_rule": "emit-every",
    "bus_failure_action": "drop-after-buffer",
    "user_visible": "summary-only",
    "consumes_kinds": []
  },
  "capital_impact": "Direct",
  "mode_support": [
    "quarantine"
  ],
  "v3_status": {
    "phase": 5,
    "phase_name": "Execution rails",
    "docs": {
      "done": 27,
      "total": 27,
      "state": "done"
    },
    "impl": {
      "done": 0,
      "total": 15,
      "state": "pending"
    },
    "runtime": {
      "done": 0,
      "total": 8,
      "state": "pending"
    },
    "overall": "pending"
  }
}