Law/scripts/ops/perf_long_chat_workspace.sh
2026-03-17 00:49:39 +03:00

266 lines
8.6 KiB
Bash
Executable file

#!/usr/bin/env bash
set -euo pipefail
ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/../.." && pwd)"
cd "$ROOT_DIR"
BASE_URL="${1:-http://localhost:8081}"
REPORT_DIR="${REPORT_DIR:-reports/perf}"
ITERATIONS="${PERF_ITERATIONS:-5}"
ADMIN_EMAIL="${PERF_ADMIN_EMAIL:-admin@example.com}"
ADMIN_PASSWORD="${PERF_ADMIN_PASSWORD:-admin123}"
MESSAGE_COUNT="${PERF_LONG_CHAT_MESSAGES:-2000}"
WINDOW_LIMIT="${PERF_CHAT_WINDOW_LIMIT:-50}"
TS_HUMAN="$(date -u +"%Y-%m-%d %H:%M:%S UTC")"
TS_FILE="$(date -u +"%Y%m%d-%H%M%S")"
REPORT_FILE="${REPORT_DIR}/perf-long-chat-workspace-${TS_FILE}.md"
mkdir -p "$REPORT_DIR"
TMP_DIR="$(mktemp -d)"
trap 'rm -rf "$TMP_DIR"' EXIT
require_cmd() {
command -v "$1" >/dev/null 2>&1 || {
echo "missing command: $1" >&2
exit 1
}
}
require_cmd curl
require_cmd python3
require_cmd docker
json_escape() {
python3 - "$1" <<'PY'
import json
import sys
print(json.dumps(sys.argv[1]))
PY
}
SEED_JSON_FILE="$TMP_DIR/seed.json"
PERF_LONG_CHAT_MESSAGES="$MESSAGE_COUNT" docker compose -f docker-compose.yml -f docker-compose.local.yml exec -T backend python - <<'PY' >"$SEED_JSON_FILE"
import json
import os
from datetime import datetime, timedelta, timezone
from app.db.session import SessionLocal
from app.models.request import Request
from app.models.message import Message
message_count = max(1, int(os.environ.get("PERF_LONG_CHAT_MESSAGES") or "2000"))
now = datetime.now(timezone.utc)
track = f"TRK-PERF-CHAT-{now.strftime('%Y%m%d%H%M%S')}"
db = SessionLocal()
try:
req = Request(
track_number=track,
client_name="Perf Chat Client",
client_phone="+79990009999",
topic_code="consulting",
status_code="IN_PROGRESS",
description=f"Perf long chat seed ({message_count})",
extra_fields={},
)
db.add(req)
db.flush()
started_at = now - timedelta(minutes=message_count)
batch = []
for index in range(message_count):
created_at = started_at + timedelta(minutes=index)
batch.append(
Message(
request_id=req.id,
author_type="CLIENT" if index % 2 == 0 else "LAWYER",
author_name="Клиент" if index % 2 == 0 else "Юрист",
body=f"perf message {index}",
created_at=created_at,
updated_at=created_at,
)
)
if len(batch) >= 500:
db.add_all(batch)
db.flush()
batch.clear()
if batch:
db.add_all(batch)
db.flush()
db.commit()
print(json.dumps({"request_id": str(req.id), "track_number": req.track_number, "message_count": message_count}))
finally:
db.close()
PY
REQUEST_ID="$(python3 - "$SEED_JSON_FILE" <<'PY'
import json
import sys
with open(sys.argv[1], "r", encoding="utf-8") as fh:
data = json.load(fh)
print(str(data["request_id"]))
PY
)"
TRACK_NUMBER="$(python3 - "$SEED_JSON_FILE" <<'PY'
import json
import sys
with open(sys.argv[1], "r", encoding="utf-8") as fh:
data = json.load(fh)
print(str(data["track_number"]))
PY
)"
LOGIN_BODY="$(printf '{"email":%s,"password":%s}' "$(json_escape "$ADMIN_EMAIL")" "$(json_escape "$ADMIN_PASSWORD")")"
LOGIN_RESPONSE_FILE="$TMP_DIR/login.json"
curl -fsS \
-H "Content-Type: application/json" \
-X POST \
-d "$LOGIN_BODY" \
"$BASE_URL/api/admin/auth/login" >"$LOGIN_RESPONSE_FILE"
AUTH_TOKEN="$(python3 - "$LOGIN_RESPONSE_FILE" <<'PY'
import json
import sys
with open(sys.argv[1], "r", encoding="utf-8") as fh:
data = json.load(fh)
token = str(data.get("access_token") or "").strip()
if not token:
raise SystemExit("login did not return access_token")
print(token)
PY
)"
measure_endpoint() {
local name="$1"
local path="$2"
local headers_file body_file curl_meta status_code total_ms
for run in $(seq 1 "$ITERATIONS"); do
headers_file="$TMP_DIR/${name}-${run}.headers"
body_file="$TMP_DIR/${name}-${run}.body"
curl_meta="$(curl -sS \
-D "$headers_file" \
-o "$body_file" \
-H "Authorization: Bearer $AUTH_TOKEN" \
-w '%{http_code} %{time_total}' \
"$BASE_URL$path")"
status_code="$(echo "$curl_meta" | awk '{print $1}')"
total_ms="$(echo "$curl_meta" | awk '{printf "%.2f", $2 * 1000}')"
if [[ "$status_code" != "200" ]]; then
echo "endpoint ${name} failed: HTTP ${status_code}" >&2
cat "$body_file" >&2 || true
exit 1
fi
python3 - "$headers_file" "$body_file" "$name" "$run" "$total_ms" >>"$TMP_DIR/raw.tsv" <<'PY'
import json
import sys
headers_path, body_path, name, run, total_ms = sys.argv[1:6]
headers = {}
with open(headers_path, "r", encoding="utf-8") as fh:
for line in fh:
line = line.strip()
if not line or ":" not in line:
continue
key, value = line.split(":", 1)
headers[key.strip().lower()] = value.strip()
payload = {}
with open(body_path, "r", encoding="utf-8") as fh:
try:
payload = json.load(fh)
except Exception:
payload = {}
rows = payload.get("rows") or payload.get("messages") or []
print("\t".join([
name,
run,
total_ms,
headers.get("x-perf-label", ""),
headers.get("x-perf-duration-ms", ""),
str(len(rows) if isinstance(rows, list) else 0),
str(payload.get("total", payload.get("messages_total", 0)) or 0),
str(payload.get("has_more", payload.get("messages_has_more", False))),
]))
PY
done
}
: >"$TMP_DIR/raw.tsv"
measure_endpoint "request_workspace_long_chat" "/api/admin/requests/${REQUEST_ID}/workspace"
measure_endpoint "messages_window_older_page" "/api/admin/chat/requests/${REQUEST_ID}/messages-window?before_count=${WINDOW_LIMIT}&limit=${WINDOW_LIMIT}"
python3 - "$TMP_DIR/raw.tsv" "$REPORT_FILE" "$TS_HUMAN" "$BASE_URL" "$REQUEST_ID" "$TRACK_NUMBER" "$MESSAGE_COUNT" "$ITERATIONS" <<'PY'
import csv
import statistics
import sys
from collections import defaultdict
raw_path, report_path, ts_human, base_url, request_id, track_number, message_count, iterations = sys.argv[1:9]
rows = defaultdict(list)
with open(raw_path, "r", encoding="utf-8") as fh:
reader = csv.reader(fh, delimiter="\t")
for name, run, total_ms, perf_label, perf_duration, rows_len, total_items, has_more in reader:
rows[name].append(
{
"run": int(run),
"total_ms": float(total_ms or 0),
"perf_label": perf_label or "-",
"perf_duration_ms": float(perf_duration or 0),
"rows_len": int(rows_len or 0),
"total_items": int(total_items or 0),
"has_more": str(has_more).strip().lower() == "true",
}
)
def percentile(sorted_values, ratio):
if not sorted_values:
return 0.0
if len(sorted_values) == 1:
return sorted_values[0]
index = round((len(sorted_values) - 1) * ratio)
return sorted_values[index]
with open(report_path, "w", encoding="utf-8") as out:
out.write("# Perf Long Chat Workspace Report\n\n")
out.write(f"- Timestamp: `{ts_human}`\n")
out.write(f"- Base URL: `{base_url}`\n")
out.write(f"- Request ID: `{request_id}`\n")
out.write(f"- Track Number: `{track_number}`\n")
out.write(f"- Seeded Messages: `{message_count}`\n")
out.write(f"- Iterations per endpoint: `{iterations}`\n\n")
out.write("| Endpoint | Perf Label | Avg Total ms | P95 Total ms | Avg Server ms | Rows | Total | Has More |\n")
out.write("|---|---|---:|---:|---:|---:|---:|---|\n")
for name in ["request_workspace_long_chat", "messages_window_older_page"]:
items = rows.get(name, [])
totals = sorted(item["total_ms"] for item in items)
servers = [item["perf_duration_ms"] for item in items if item["perf_duration_ms"] > 0]
avg_total = statistics.mean(totals) if totals else 0.0
p95_total = percentile(totals, 0.95)
avg_server = statistics.mean(servers) if servers else 0.0
label = items[0]["perf_label"] if items else "-"
sample = items[0] if items else {"rows_len": 0, "total_items": 0, "has_more": False}
out.write(
f"| {name} | `{label}` | {avg_total:.2f} | {p95_total:.2f} | {avg_server:.2f} | "
f"{sample['rows_len']} | {sample['total_items']} | {sample['has_more']} |\n"
)
out.write("\n## Raw Runs\n\n")
out.write("| Endpoint | Run | Total ms | Server ms | Rows | Total | Has More |\n")
out.write("|---|---:|---:|---:|---:|---:|---|\n")
for name, items in rows.items():
for item in sorted(items, key=lambda value: value["run"]):
out.write(
f"| {name} | {item['run']} | {item['total_ms']:.2f} | {item['perf_duration_ms']:.2f} | "
f"{item['rows_len']} | {item['total_items']} | {item['has_more']} |\n"
)
PY
echo "report: $REPORT_FILE"