backupy-agent/apps/agent/internal/pipeline/pg_dump.go
TronoSfera 8b0c978337 feat(initial): Backupy agent + backupy-decrypt CLI
Source ports from the TronoSfera/backupy-cloud monorepo:
- apps/agent/        — Go agent (WSS client, persistent queue, Docker
                       discovery, 5 DB drivers: PG/MySQL/Mongo/Redis/SQLite,
                       pre/post hooks, Prometheus metrics)
- apps/backupy-decrypt/ — standalone CLI for client-side decryption
- packages/proto/    — protobuf wire format (generated .pb.go committed
                       so the repo builds without protoc)
- docs/              — agent spec + wire-protocol contract

Apache-2.0 license. Image published to ghcr.io/tronosfera/backupy-agent
on every v* tag via .github/workflows/release.yml (multi-arch amd64+arm64).
2026-05-17 20:22:35 +03:00

195 lines
6.7 KiB
Go

package pipeline
import (
"bytes"
"context"
"errors"
"fmt"
"io"
"os"
"os/exec"
"strconv"
"strings"
backupv1 "github.com/backupy/backupy/packages/proto/gen/go/backupv1"
)
// PgDumpMagic is the magic header pg_dump custom-format archives start
// with — "PGDMP". The smoke validation step (D-08) verifies the first
// five bytes of the dump stream before any uploading.
const PgDumpMagic = "PGDMP"
// pgDump is the PostgreSQL driver implementation.
type pgDump struct {
// binary is the on-disk pg_dump executable. Tests inject a stub.
binary string
// runner abstracts os/exec so the unit tests can mock it.
runner cmdRunner
}
// NewPgDump constructs the default driver wired to the bundled pg_dump
// binary on $PATH.
func NewPgDump() Driver {
return &pgDump{binary: "pg_dump", runner: realRunner{}}
}
// Name implements Driver.Name.
func (p *pgDump) Name() string { return "pg_dump" }
// Validate runs `pg_dump --version` and a trivial `psql`-style probe to
// confirm we can reach the target. We deliberately only invoke the
// pg_dump binary itself so the agent does not need to bundle the full
// psql client.
func (p *pgDump) Validate(ctx context.Context, target *backupv1.Target) error {
if target == nil || target.Connection == nil {
return errors.New("pipeline: pg_dump: nil target/connection")
}
// `pg_dump --version` returns immediately and exits 0 if the binary
// is present. We use it as a cheap "binary installed" check.
versionOut, err := p.runner.Output(ctx, p.binary, []string{"--version"}, nil)
if err != nil {
return fmt.Errorf("pipeline: pg_dump version probe failed: %w", err)
}
if !strings.Contains(strings.ToLower(string(versionOut)), "pg_dump") {
return fmt.Errorf("pipeline: unexpected pg_dump --version output: %q", string(versionOut))
}
// A schema-only dump piped to /dev/null is a reasonable smoke test:
// it actually opens a connection but transfers almost no data.
args := append(p.connArgs(target), "--schema-only", "--no-acl", "--no-owner")
if _, err := p.runner.Output(ctx, p.binary, args, p.env(target)); err != nil {
return fmt.Errorf("pipeline: pg_dump smoke probe failed: %w", err)
}
return nil
}
// Dump streams a custom-format pg_dump archive to `out`.
func (p *pgDump) Dump(ctx context.Context, target *backupv1.Target, out io.Writer) (DumpInfo, error) {
if target == nil || target.Connection == nil {
return DumpInfo{}, errors.New("pipeline: pg_dump: nil target/connection")
}
args := append(p.connArgs(target),
"--format=custom",
"--no-owner",
"--no-acl",
"--serializable-deferrable",
"--no-comments",
)
if err := p.runner.RunStream(ctx, p.binary, args, p.env(target), out); err != nil {
return DumpInfo{}, fmt.Errorf("pipeline: pg_dump exec: %w", err)
}
versionOut, vErr := p.runner.Output(ctx, p.binary, []string{"--version"}, nil)
engineVersion := "PostgreSQL"
if vErr == nil {
engineVersion = parsePgDumpVersion(string(versionOut))
}
return DumpInfo{EngineVersion: engineVersion}, nil
}
// connArgs builds the host/port/user/db flag tuple shared by Validate and Dump.
func (p *pgDump) connArgs(t *backupv1.Target) []string {
c := t.Connection
args := []string{}
if c.Host != "" {
args = append(args, "-h", c.Host)
}
if c.Port != 0 {
args = append(args, "-p", strconv.FormatUint(uint64(c.Port), 10))
}
if c.Username != "" {
args = append(args, "-U", c.Username)
}
if c.Database != "" {
args = append(args, "-d", c.Database)
}
return args
}
// env returns the environment for the child process — specifically
// PGPASSWORD so the password is never visible on the command line.
//
// password_secret_ref is the server-side reference; by the time we get
// here the WSS layer has already resolved it to the actual secret. To
// keep this package agnostic we read it back from a connection field
// named password_secret_ref interpreted literally as the password value
// (the agent stores resolved secrets there for the duration of one run).
func (p *pgDump) env(t *backupv1.Target) []string {
if t.Connection == nil || t.Connection.PasswordSecretRef == "" {
return nil
}
return []string{"PGPASSWORD=" + t.Connection.PasswordSecretRef}
}
// parsePgDumpVersion converts "pg_dump (PostgreSQL) 16.2" to
// "PostgreSQL 16.2", the canonical engine_version string.
func parsePgDumpVersion(s string) string {
s = strings.TrimSpace(s)
// e.g. "pg_dump (PostgreSQL) 16.2"
if i := strings.Index(s, "(PostgreSQL)"); i >= 0 {
rest := strings.TrimSpace(s[i+len("(PostgreSQL)"):])
return "PostgreSQL " + rest
}
return s
}
// IsPgDumpMagic returns true if `head` starts with the pg_dump custom
// archive magic. Used by smoke-validation.
func IsPgDumpMagic(head []byte) bool {
return bytes.HasPrefix(head, []byte(PgDumpMagic))
}
// -----------------------------------------------------------------------------
// cmd runner abstraction (shared with mysqldump.go)
// -----------------------------------------------------------------------------
// cmdRunner allows tests to swap out os/exec with deterministic stubs.
type cmdRunner interface {
// Output runs cmd+args with env, returns combined stdout, or an
// error. Used for short-lived commands like --version probes.
Output(ctx context.Context, name string, args []string, env []string) ([]byte, error)
// RunStream runs cmd+args with env and pipes stdout to `out`.
// stderr is captured into the returned error on non-zero exit.
RunStream(ctx context.Context, name string, args []string, env []string, out io.Writer) error
}
// realRunner is the production cmdRunner backed by os/exec.
type realRunner struct{}
func (realRunner) Output(ctx context.Context, name string, args []string, env []string) ([]byte, error) {
cmd := exec.CommandContext(ctx, name, args...)
cmd.Env = mergeEnv(env)
out, err := cmd.Output()
if err != nil {
var ee *exec.ExitError
if errors.As(err, &ee) {
return out, fmt.Errorf("%s exited %d: %s", name, ee.ExitCode(), bytes.TrimSpace(ee.Stderr))
}
return out, err
}
return out, nil
}
func (realRunner) RunStream(ctx context.Context, name string, args []string, env []string, out io.Writer) error {
cmd := exec.CommandContext(ctx, name, args...)
cmd.Env = mergeEnv(env)
cmd.Stdout = out
var stderr bytes.Buffer
cmd.Stderr = &stderr
if err := cmd.Run(); err != nil {
return fmt.Errorf("%s: %w (stderr=%s)", name, err, strings.TrimSpace(stderr.String()))
}
return nil
}
// mergeEnv inherits the parent process environment and overlays the
// supplied entries on top. Returning nil keeps Go's default (inherit
// everything) when callers pass no overrides.
func mergeEnv(extra []string) []string {
if len(extra) == 0 {
return nil
}
base := os.Environ()
out := make([]string, 0, len(base)+len(extra))
out = append(out, base...)
out = append(out, extra...)
return out
}