backupy-agent/apps/agent/internal/pipeline/hooks.go
TronoSfera 8b0c978337 feat(initial): Backupy agent + backupy-decrypt CLI
Source ports from the TronoSfera/backupy-cloud monorepo:
- apps/agent/        — Go agent (WSS client, persistent queue, Docker
                       discovery, 5 DB drivers: PG/MySQL/Mongo/Redis/SQLite,
                       pre/post hooks, Prometheus metrics)
- apps/backupy-decrypt/ — standalone CLI for client-side decryption
- packages/proto/    — protobuf wire format (generated .pb.go committed
                       so the repo builds without protoc)
- docs/              — agent spec + wire-protocol contract

Apache-2.0 license. Image published to ghcr.io/tronosfera/backupy-agent
on every v* tag via .github/workflows/release.yml (multi-arch amd64+arm64).
2026-05-17 20:22:35 +03:00

283 lines
8.9 KiB
Go

// Package pipeline — pre/post hook execution (D-16).
//
// # Security model
//
// Pre and post hooks are arbitrary shell commands executed by the agent
// on its own host with the agent's filesystem permissions. They are
// inherently dangerous: any code path that can write a hook string into
// backup_jobs can run code on every agent that polls that job.
//
// The agent host owner must trust the user's backup config — a
// compromised server could push hostile hooks. We mitigate this with
// defense-in-depth limits enforced on BOTH sides:
//
// server: validates max command length (HookCommandMaxBytes)
// and hook count (HooksMaxCount) at job-config time.
// agent : enforces a per-hook timeout (DefaultHookTimeout, capped by
// HookTimeoutMax) and a hard total budget per backup run
// (HooksTotalBudget) so a wedged hook cannot keep an agent
// process pinned forever.
//
// Commands are passed verbatim to /bin/sh -c — NO env-var or path
// expansion happens in our code. The shell performs interpolation; we
// never call fmt.Sprintf-style formatting on user-supplied strings.
//
// Hook stdout and stderr are captured into separate 8 KB ring buffers
// (HookOutputBufBytes) so a noisy hook cannot OOM the agent.
package pipeline
import (
"bytes"
"context"
"errors"
"fmt"
"io"
"os/exec"
"sync"
"syscall"
"time"
)
// Limits — these constants encode the security model above and are
// referenced by the server-side validators.
const (
// HookCommandMaxBytes caps each hook string. 4 KB matches the
// argv/env headroom on every supported OS and is well above any
// realistic shell-pipeline length.
HookCommandMaxBytes = 4 * 1024
// HooksMaxCount caps the number of pre/post hooks per job. 16 is
// generous for any normal workflow (snapshot → quiesce → notify),
// while preventing a runaway config from generating dozens of
// child processes per run.
HooksMaxCount = 16
// HookOutputBufBytes is the per-stream (stdout, stderr) ring-buffer
// size. 8 KB is small enough to keep many concurrent runs in
// memory and large enough to capture typical hook chatter.
HookOutputBufBytes = 8 * 1024
// DefaultHookTimeout is the per-hook timeout when the job config
// does not override it.
DefaultHookTimeout = 5 * time.Minute
// HookTimeoutMax caps the per-hook timeout regardless of job
// config. Prevents a single hostile hook from hanging the agent.
HookTimeoutMax = 15 * time.Minute
// HooksTotalBudget is the hard ceiling for the combined runtime of
// every pre+post hook in a single backup run. Once exceeded,
// further hooks return immediately with ErrHooksBudgetExceeded.
HooksTotalBudget = 30 * time.Minute
)
// ErrHooksBudgetExceeded indicates the total hook runtime budget for
// a backup run was exhausted.
var ErrHooksBudgetExceeded = errors.New("pipeline: hook budget exceeded for run")
// HookResult is the post-mortem of a single hook invocation. Stdout and
// Stderr are best-effort: the last HookOutputBufBytes of each stream are
// kept, earlier bytes are dropped.
type HookResult struct {
// Command is the raw shell string that was executed (informational).
Command string
// ExitCode is the process exit code. 0 == success. For timeouts
// and context cancellations it is -1.
ExitCode int
// Stdout / Stderr hold up to HookOutputBufBytes of captured output.
Stdout string
Stderr string
// Duration is the wall-clock time the hook took.
Duration time.Duration
// TimedOut indicates the hook was killed because its per-hook
// timeout fired (vs. caller-cancelled or completed naturally).
TimedOut bool
}
// RunHook executes command under /bin/sh -c, applying timeout and
// capturing up to HookOutputBufBytes of stdout/stderr each.
//
// The returned error is non-nil when the hook FAILED (non-zero exit,
// timeout, or process spawn error). HookResult is always returned with
// whatever fields are known.
//
// Environment variables in env are added on top of the current process
// environment (caller can pass nil for default).
func RunHook(ctx context.Context, command string, env []string, timeout time.Duration) (HookResult, error) {
if command == "" {
return HookResult{}, errors.New("pipeline: empty hook command")
}
if timeout <= 0 {
timeout = DefaultHookTimeout
}
if timeout > HookTimeoutMax {
timeout = HookTimeoutMax
}
// Per-hook timeout layered on top of the caller's ctx.
hookCtx, cancel := context.WithTimeout(ctx, timeout)
defer cancel()
cmd := exec.CommandContext(hookCtx, "/bin/sh", "-c", command)
if env != nil {
// Append (not replace) so the agent's PATH etc. are still
// available to the shell.
cmd.Env = append(cmd.Env, env...)
}
stdoutBuf := newHookRingBuffer(HookOutputBufBytes)
stderrBuf := newHookRingBuffer(HookOutputBufBytes)
cmd.Stdout = stdoutBuf
cmd.Stderr = stderrBuf
start := time.Now()
runErr := cmd.Run()
dur := time.Since(start)
result := HookResult{
Command: command,
Duration: dur,
Stdout: stdoutBuf.String(),
Stderr: stderrBuf.String(),
}
if runErr == nil {
result.ExitCode = 0
return result, nil
}
// Distinguish: timeout / parent-cancel / non-zero exit.
if hookCtx.Err() != nil {
// Either deadline (timeout) or caller cancel. Mark exit -1.
result.ExitCode = -1
if errors.Is(hookCtx.Err(), context.DeadlineExceeded) {
result.TimedOut = true
return result, fmt.Errorf("hook timed out after %s: %w", timeout, hookCtx.Err())
}
return result, fmt.Errorf("hook cancelled: %w", hookCtx.Err())
}
// Non-zero exit (or process-start failure). exec.ExitError carries
// the exit code.
var exitErr *exec.ExitError
if errors.As(runErr, &exitErr) {
if ws, ok := exitErr.Sys().(syscall.WaitStatus); ok {
result.ExitCode = ws.ExitStatus()
} else {
result.ExitCode = exitErr.ExitCode()
}
return result, fmt.Errorf("hook exited non-zero (%d): %w", result.ExitCode, runErr)
}
// Spawn failure, signal, or unknown error.
result.ExitCode = -1
return result, fmt.Errorf("hook failed: %w", runErr)
}
// HookSet executes a sequence of hooks under a shared total-budget.
// It returns the per-hook results in order, and the first error (if
// any). The shared budget across the whole set is HooksTotalBudget;
// callers should hold one HookSet per backup run and feed it the
// pre_hooks list, then the post_hooks list.
type HookSet struct {
mu sync.Mutex
consumed time.Duration
}
// NewHookSet returns an empty HookSet that has not yet consumed any
// budget.
func NewHookSet() *HookSet {
return &HookSet{}
}
// Run executes one hook, charging its duration against the set's
// budget. If the budget is already exhausted when Run is called, the
// hook is skipped and ErrHooksBudgetExceeded is returned with an empty
// HookResult.
func (h *HookSet) Run(ctx context.Context, command string, env []string, timeout time.Duration) (HookResult, error) {
h.mu.Lock()
used := h.consumed
h.mu.Unlock()
if used >= HooksTotalBudget {
return HookResult{Command: command}, ErrHooksBudgetExceeded
}
// Cap the per-hook timeout at the remaining budget.
remaining := HooksTotalBudget - used
if timeout <= 0 || timeout > remaining {
timeout = remaining
}
res, err := RunHook(ctx, command, env, timeout)
h.mu.Lock()
h.consumed += res.Duration
h.mu.Unlock()
return res, err
}
// hookRingBuffer keeps the LAST `cap` bytes written to it. Writes that
// exceed `cap` discard the oldest bytes. Safe for the io.Writer
// contract used by exec.Cmd; not safe for concurrent writes (exec.Cmd
// writes from one goroutine per stream).
type hookRingBuffer struct {
buf []byte
cap int
full bool
pos int // next write position
}
func newHookRingBuffer(cap int) *hookRingBuffer {
return &hookRingBuffer{buf: make([]byte, 0, cap), cap: cap}
}
func (r *hookRingBuffer) Write(p []byte) (int, error) {
n := len(p)
if n == 0 {
return 0, nil
}
// If we haven't yet wrapped, grow the slice up to cap.
if !r.full && len(r.buf) < r.cap {
room := r.cap - len(r.buf)
if n <= room {
r.buf = append(r.buf, p...)
r.pos = (r.pos + n) % r.cap
if len(r.buf) == r.cap {
r.full = true
}
return n, nil
}
r.buf = append(r.buf, p[:room]...)
p = p[room:]
r.full = true
r.pos = 0
}
// We're full; overwrite oldest bytes.
if len(p) >= r.cap {
// Only the trailing cap bytes matter.
copy(r.buf, p[len(p)-r.cap:])
r.pos = 0
return n, nil
}
// Write may wrap around the end of the slice.
end := r.pos + len(p)
if end <= r.cap {
copy(r.buf[r.pos:], p)
} else {
first := r.cap - r.pos
copy(r.buf[r.pos:], p[:first])
copy(r.buf[:len(p)-first], p[first:])
}
r.pos = (r.pos + len(p)) % r.cap
return n, nil
}
// String returns the buffer contents in write order (oldest first).
func (r *hookRingBuffer) String() string {
if !r.full {
return string(r.buf)
}
out := bytes.NewBuffer(make([]byte, 0, r.cap))
out.Write(r.buf[r.pos:])
out.Write(r.buf[:r.pos])
return out.String()
}
// Ensure hookRingBuffer satisfies io.Writer.
var _ io.Writer = (*hookRingBuffer)(nil)