backupy-agent/apps/agent/internal/pipeline/encrypt.go
TronoSfera 8b0c978337 feat(initial): Backupy agent + backupy-decrypt CLI
Source ports from the TronoSfera/backupy-cloud monorepo:
- apps/agent/        — Go agent (WSS client, persistent queue, Docker
                       discovery, 5 DB drivers: PG/MySQL/Mongo/Redis/SQLite,
                       pre/post hooks, Prometheus metrics)
- apps/backupy-decrypt/ — standalone CLI for client-side decryption
- packages/proto/    — protobuf wire format (generated .pb.go committed
                       so the repo builds without protoc)
- docs/              — agent spec + wire-protocol contract

Apache-2.0 license. Image published to ghcr.io/tronosfera/backupy-agent
on every v* tag via .github/workflows/release.yml (multi-arch amd64+arm64).
2026-05-17 20:22:35 +03:00

181 lines
6.1 KiB
Go

// Backupy chunk-stream encryption format (v1).
//
// The agent encrypts the (already-compressed) backup as a sequence of
// independent AES-256-GCM frames. The output is appended to the upload
// stream byte-by-byte — no length prefix on the whole blob, no header.
//
// Wire format (all integers big-endian):
//
// chunk := uint32 ciphertext_len // bytes that follow, EXCLUDING this u32
// 12-byte random nonce // unique per chunk
// ciphertext (≤ chunkPlainSize + 16-byte GCM tag)
//
// EOF marker: a single chunk with ciphertext_len == 0. The decryptor
// treats this as "stream finished cleanly" — without it, a truncated
// upload would be indistinguishable from a clean end.
//
// Chunk plaintext size is CHUNK_PLAIN_SIZE = 1 MiB. Larger frames waste
// memory on the decryptor; smaller frames pay too much per-chunk overhead.
//
// The DEK is 32 bytes (AES-256). It is supplied by the server in
// RunBackup.encrypted_dek, decrypted by the agent runtime (KMS path —
// not covered here), and discarded once the upload completes. The
// envelope ciphertext (KMS-wrapped DEK) is passed THROUGH the agent
// unchanged in BackupCompleted.encrypted_dek so the server can persist
// it alongside the backup row.
//
// The backupy-decrypt CLI inverts this format. Keep the constants below
// in sync with apps/backupy-decrypt — they are the single source of truth.
package pipeline
import (
"crypto/aes"
"crypto/cipher"
"crypto/rand"
"encoding/binary"
"errors"
"fmt"
"io"
)
const (
// ChunkPlainSize is the maximum plaintext bytes per AES-GCM frame.
// 1 MiB keeps decryption memory bounded while keeping per-chunk
// overhead (16 byte tag + 12 byte nonce + 4 byte header) negligible.
ChunkPlainSize = 1 << 20
// dekSize is the expected DEK length (AES-256).
dekSize = 32
// nonceSize matches AES-GCM's standard 96-bit nonce. The stdlib
// rejects any other length when GCM.NonceSize() is honoured.
nonceSize = 12
// gcmTagSize is the GCM authentication tag length (16 bytes).
gcmTagSize = 16
// chunkHeaderSize is the 4-byte big-endian length prefix per chunk.
chunkHeaderSize = 4
)
// Encryptor encrypts arbitrarily large streams using AES-256-GCM with a
// per-chunk random nonce. Construct one per backup run — reuse across
// runs is allowed but cheap to avoid.
type Encryptor struct {
dek []byte
aead cipher.AEAD
}
// NewEncryptor builds an Encryptor from a 32-byte DEK.
func NewEncryptor(dek []byte) (*Encryptor, error) {
if len(dek) != dekSize {
return nil, fmt.Errorf("pipeline: DEK must be %d bytes, got %d", dekSize, len(dek))
}
block, err := aes.NewCipher(dek)
if err != nil {
return nil, fmt.Errorf("pipeline: aes new cipher: %w", err)
}
aead, err := cipher.NewGCM(block)
if err != nil {
return nil, fmt.Errorf("pipeline: gcm new: %w", err)
}
// Defensive: confirm the stdlib's nonce expectation matches our constant.
if aead.NonceSize() != nonceSize {
return nil, fmt.Errorf("pipeline: gcm nonce size mismatch: %d", aead.NonceSize())
}
// Keep a copy so the caller may overwrite the slice afterwards.
keyCopy := make([]byte, len(dek))
copy(keyCopy, dek)
return &Encryptor{dek: keyCopy, aead: aead}, nil
}
// Stream reads plaintext from `in` in ChunkPlainSize chunks, encrypts
// each one with a fresh random nonce, and writes framed ciphertext to
// `out`. After EOF on `in` it writes the zero-length terminator chunk.
//
// Returns the total number of PLAINTEXT bytes consumed from `in`.
func (e *Encryptor) Stream(in io.Reader, out io.Writer) (int64, error) {
if e == nil || e.aead == nil {
return 0, errors.New("pipeline: nil Encryptor")
}
buf := make([]byte, ChunkPlainSize)
header := make([]byte, chunkHeaderSize)
nonce := make([]byte, nonceSize)
var total int64
for {
n, readErr := io.ReadFull(in, buf)
if n > 0 {
if _, err := rand.Read(nonce); err != nil {
return total, fmt.Errorf("pipeline: read nonce: %w", err)
}
ct := e.aead.Seal(nil, nonce, buf[:n], nil)
// Frame: u32(len) || nonce || ciphertext+tag
binary.BigEndian.PutUint32(header, uint32(len(nonce)+len(ct)))
if _, err := out.Write(header); err != nil {
return total, fmt.Errorf("pipeline: write chunk header: %w", err)
}
if _, err := out.Write(nonce); err != nil {
return total, fmt.Errorf("pipeline: write chunk nonce: %w", err)
}
if _, err := out.Write(ct); err != nil {
return total, fmt.Errorf("pipeline: write chunk ciphertext: %w", err)
}
total += int64(n)
}
if readErr == io.EOF || readErr == io.ErrUnexpectedEOF {
break
}
if readErr != nil {
return total, fmt.Errorf("pipeline: read plaintext: %w", readErr)
}
}
// EOF marker: zero-length chunk.
binary.BigEndian.PutUint32(header, 0)
if _, err := out.Write(header); err != nil {
return total, fmt.Errorf("pipeline: write eof marker: %w", err)
}
return total, nil
}
// Decrypt is the inverse of Stream — used by tests and the
// backupy-decrypt CLI. Validates GCM tags and the EOF marker.
func (e *Encryptor) Decrypt(in io.Reader, out io.Writer) (int64, error) {
if e == nil || e.aead == nil {
return 0, errors.New("pipeline: nil Encryptor")
}
header := make([]byte, chunkHeaderSize)
var total int64
for {
if _, err := io.ReadFull(in, header); err != nil {
if errors.Is(err, io.EOF) {
// Stream ended without an explicit terminator — refuse.
return total, errors.New("pipeline: encrypted stream truncated (no EOF marker)")
}
return total, fmt.Errorf("pipeline: read chunk header: %w", err)
}
size := binary.BigEndian.Uint32(header)
if size == 0 {
return total, nil // clean EOF
}
if size < uint32(nonceSize+gcmTagSize) {
return total, fmt.Errorf("pipeline: chunk size %d below minimum", size)
}
frame := make([]byte, size)
if _, err := io.ReadFull(in, frame); err != nil {
return total, fmt.Errorf("pipeline: read chunk body: %w", err)
}
nonce := frame[:nonceSize]
ct := frame[nonceSize:]
pt, err := e.aead.Open(nil, nonce, ct, nil)
if err != nil {
return total, fmt.Errorf("pipeline: gcm open: %w", err)
}
if _, err := out.Write(pt); err != nil {
return total, fmt.Errorf("pipeline: write plaintext: %w", err)
}
total += int64(len(pt))
}
}