// Package pipeline — pre/post hook execution (D-16). // // # Security model // // Pre and post hooks are arbitrary shell commands executed by the agent // on its own host with the agent's filesystem permissions. They are // inherently dangerous: any code path that can write a hook string into // backup_jobs can run code on every agent that polls that job. // // The agent host owner must trust the user's backup config — a // compromised server could push hostile hooks. We mitigate this with // defense-in-depth limits enforced on BOTH sides: // // server: validates max command length (HookCommandMaxBytes) // and hook count (HooksMaxCount) at job-config time. // agent : enforces a per-hook timeout (DefaultHookTimeout, capped by // HookTimeoutMax) and a hard total budget per backup run // (HooksTotalBudget) so a wedged hook cannot keep an agent // process pinned forever. // // Commands are passed verbatim to /bin/sh -c — NO env-var or path // expansion happens in our code. The shell performs interpolation; we // never call fmt.Sprintf-style formatting on user-supplied strings. // // Hook stdout and stderr are captured into separate 8 KB ring buffers // (HookOutputBufBytes) so a noisy hook cannot OOM the agent. package pipeline import ( "bytes" "context" "errors" "fmt" "io" "os/exec" "sync" "syscall" "time" ) // Limits — these constants encode the security model above and are // referenced by the server-side validators. const ( // HookCommandMaxBytes caps each hook string. 4 KB matches the // argv/env headroom on every supported OS and is well above any // realistic shell-pipeline length. HookCommandMaxBytes = 4 * 1024 // HooksMaxCount caps the number of pre/post hooks per job. 16 is // generous for any normal workflow (snapshot → quiesce → notify), // while preventing a runaway config from generating dozens of // child processes per run. HooksMaxCount = 16 // HookOutputBufBytes is the per-stream (stdout, stderr) ring-buffer // size. 8 KB is small enough to keep many concurrent runs in // memory and large enough to capture typical hook chatter. HookOutputBufBytes = 8 * 1024 // DefaultHookTimeout is the per-hook timeout when the job config // does not override it. DefaultHookTimeout = 5 * time.Minute // HookTimeoutMax caps the per-hook timeout regardless of job // config. Prevents a single hostile hook from hanging the agent. HookTimeoutMax = 15 * time.Minute // HooksTotalBudget is the hard ceiling for the combined runtime of // every pre+post hook in a single backup run. Once exceeded, // further hooks return immediately with ErrHooksBudgetExceeded. HooksTotalBudget = 30 * time.Minute ) // ErrHooksBudgetExceeded indicates the total hook runtime budget for // a backup run was exhausted. var ErrHooksBudgetExceeded = errors.New("pipeline: hook budget exceeded for run") // HookResult is the post-mortem of a single hook invocation. Stdout and // Stderr are best-effort: the last HookOutputBufBytes of each stream are // kept, earlier bytes are dropped. type HookResult struct { // Command is the raw shell string that was executed (informational). Command string // ExitCode is the process exit code. 0 == success. For timeouts // and context cancellations it is -1. ExitCode int // Stdout / Stderr hold up to HookOutputBufBytes of captured output. Stdout string Stderr string // Duration is the wall-clock time the hook took. Duration time.Duration // TimedOut indicates the hook was killed because its per-hook // timeout fired (vs. caller-cancelled or completed naturally). TimedOut bool } // RunHook executes command under /bin/sh -c, applying timeout and // capturing up to HookOutputBufBytes of stdout/stderr each. // // The returned error is non-nil when the hook FAILED (non-zero exit, // timeout, or process spawn error). HookResult is always returned with // whatever fields are known. // // Environment variables in env are added on top of the current process // environment (caller can pass nil for default). func RunHook(ctx context.Context, command string, env []string, timeout time.Duration) (HookResult, error) { if command == "" { return HookResult{}, errors.New("pipeline: empty hook command") } if timeout <= 0 { timeout = DefaultHookTimeout } if timeout > HookTimeoutMax { timeout = HookTimeoutMax } // Per-hook timeout layered on top of the caller's ctx. hookCtx, cancel := context.WithTimeout(ctx, timeout) defer cancel() cmd := exec.CommandContext(hookCtx, "/bin/sh", "-c", command) if env != nil { // Append (not replace) so the agent's PATH etc. are still // available to the shell. cmd.Env = append(cmd.Env, env...) } stdoutBuf := newHookRingBuffer(HookOutputBufBytes) stderrBuf := newHookRingBuffer(HookOutputBufBytes) cmd.Stdout = stdoutBuf cmd.Stderr = stderrBuf start := time.Now() runErr := cmd.Run() dur := time.Since(start) result := HookResult{ Command: command, Duration: dur, Stdout: stdoutBuf.String(), Stderr: stderrBuf.String(), } if runErr == nil { result.ExitCode = 0 return result, nil } // Distinguish: timeout / parent-cancel / non-zero exit. if hookCtx.Err() != nil { // Either deadline (timeout) or caller cancel. Mark exit -1. result.ExitCode = -1 if errors.Is(hookCtx.Err(), context.DeadlineExceeded) { result.TimedOut = true return result, fmt.Errorf("hook timed out after %s: %w", timeout, hookCtx.Err()) } return result, fmt.Errorf("hook cancelled: %w", hookCtx.Err()) } // Non-zero exit (or process-start failure). exec.ExitError carries // the exit code. var exitErr *exec.ExitError if errors.As(runErr, &exitErr) { if ws, ok := exitErr.Sys().(syscall.WaitStatus); ok { result.ExitCode = ws.ExitStatus() } else { result.ExitCode = exitErr.ExitCode() } return result, fmt.Errorf("hook exited non-zero (%d): %w", result.ExitCode, runErr) } // Spawn failure, signal, or unknown error. result.ExitCode = -1 return result, fmt.Errorf("hook failed: %w", runErr) } // HookSet executes a sequence of hooks under a shared total-budget. // It returns the per-hook results in order, and the first error (if // any). The shared budget across the whole set is HooksTotalBudget; // callers should hold one HookSet per backup run and feed it the // pre_hooks list, then the post_hooks list. type HookSet struct { mu sync.Mutex consumed time.Duration } // NewHookSet returns an empty HookSet that has not yet consumed any // budget. func NewHookSet() *HookSet { return &HookSet{} } // Run executes one hook, charging its duration against the set's // budget. If the budget is already exhausted when Run is called, the // hook is skipped and ErrHooksBudgetExceeded is returned with an empty // HookResult. func (h *HookSet) Run(ctx context.Context, command string, env []string, timeout time.Duration) (HookResult, error) { h.mu.Lock() used := h.consumed h.mu.Unlock() if used >= HooksTotalBudget { return HookResult{Command: command}, ErrHooksBudgetExceeded } // Cap the per-hook timeout at the remaining budget. remaining := HooksTotalBudget - used if timeout <= 0 || timeout > remaining { timeout = remaining } res, err := RunHook(ctx, command, env, timeout) h.mu.Lock() h.consumed += res.Duration h.mu.Unlock() return res, err } // hookRingBuffer keeps the LAST `cap` bytes written to it. Writes that // exceed `cap` discard the oldest bytes. Safe for the io.Writer // contract used by exec.Cmd; not safe for concurrent writes (exec.Cmd // writes from one goroutine per stream). type hookRingBuffer struct { buf []byte cap int full bool pos int // next write position } func newHookRingBuffer(cap int) *hookRingBuffer { return &hookRingBuffer{buf: make([]byte, 0, cap), cap: cap} } func (r *hookRingBuffer) Write(p []byte) (int, error) { n := len(p) if n == 0 { return 0, nil } // If we haven't yet wrapped, grow the slice up to cap. if !r.full && len(r.buf) < r.cap { room := r.cap - len(r.buf) if n <= room { r.buf = append(r.buf, p...) r.pos = (r.pos + n) % r.cap if len(r.buf) == r.cap { r.full = true } return n, nil } r.buf = append(r.buf, p[:room]...) p = p[room:] r.full = true r.pos = 0 } // We're full; overwrite oldest bytes. if len(p) >= r.cap { // Only the trailing cap bytes matter. copy(r.buf, p[len(p)-r.cap:]) r.pos = 0 return n, nil } // Write may wrap around the end of the slice. end := r.pos + len(p) if end <= r.cap { copy(r.buf[r.pos:], p) } else { first := r.cap - r.pos copy(r.buf[r.pos:], p[:first]) copy(r.buf[:len(p)-first], p[first:]) } r.pos = (r.pos + len(p)) % r.cap return n, nil } // String returns the buffer contents in write order (oldest first). func (r *hookRingBuffer) String() string { if !r.full { return string(r.buf) } out := bytes.NewBuffer(make([]byte, 0, r.cap)) out.Write(r.buf[r.pos:]) out.Write(r.buf[:r.pos]) return out.String() } // Ensure hookRingBuffer satisfies io.Writer. var _ io.Writer = (*hookRingBuffer)(nil)