commit 71a5fdb142a2257204c3b6eed04344a7cc27beb3 Author: tronosfera Date: Mon Jan 19 10:27:20 2026 +0300 Initial commit diff --git a/.env.example b/.env.example new file mode 100644 index 0000000..652dc84 --- /dev/null +++ b/.env.example @@ -0,0 +1,65 @@ +# Example environment variables for the backup service +# +# Copy this file to `.env` and adjust the values as needed. Both the +# server and client read configuration from environment variables. When +# using Docker Compose you can reference this file via the `env_file` +# directive or set variables in the compose file directly. + +# ----------------------------------------------------------------------------- +# Server configuration +# ----------------------------------------------------------------------------- +SECRET_KEY=mysecretkey + +# Use Postgres instead of SQLite. This DSN is used by SQLAlchemy. If you +# leave it empty or omit it, the server will fall back to a local SQLite +# database stored in `/app/backup.db`. +DATABASE_URL=postgresql+psycopg2://backup:backup@db:5432/backup + +# Configure S3 storage. When S3_BUCKET is defined the server will use +# Amazon S3 (or any S3-compatible service) to store backup files. If +# omitted, files are stored on the local filesystem under `/app/data`. +S3_BUCKET=backup +AWS_ACCESS_KEY_ID=minio +AWS_SECRET_ACCESS_KEY=minio123 +AWS_REGION=us-east-1 +S3_ENDPOINT=http://minio:9000 + +# ----------------------------------------------------------------------------- +# Client configuration +# ----------------------------------------------------------------------------- +# URL of the backup server. For local deployments this will typically be +# `http://server:8000` when using Docker Compose. When running the +# client independently, set this to the public address of your server. +SERVER_URL=http://server:8000 + +# Credentials of a user with permission to register new clients on the server. +USERNAME=admin +PASSWORD=adminpass + +# Optional human‑readable name for this client. Defaults to the hostname. +CLIENT_NAME=example-client + +# Comma‑separated list of directories to back up. The client will walk +# these directories recursively and upload files that have changed since +# the last backup. +MONITORED_PATHS=/data + +# Semicolon‑separated list of shell commands to run before each backup cycle. +# This can be used, for example, to dump a PostgreSQL database. Leave empty +# if not needed. +PRE_COMMANDS= + +# Seconds between ping requests to the server. Pings update the last +# heartbeat timestamp for the client in the server UI. +PING_INTERVAL=300 + +# Seconds between full backup scans. Lower this value to increase the +# frequency of backups. +BACKUP_INTERVAL=3600 + +# Enable or disable the client web interface. When set to `false`, the +# client does not launch the configuration UI and requires all mandatory +# variables (SERVER_URL, USERNAME, PASSWORD and MONITORED_PATHS) to be +# provided. When `true` (default), the client starts a small web server on +# port 8080 to collect configuration interactively. +CLIENT_UI_ENABLED=true \ No newline at end of file diff --git a/README.md b/README.md new file mode 100644 index 0000000..73b405d --- /dev/null +++ b/README.md @@ -0,0 +1,160 @@ +# Backup Service + +This repository contains a simple backup system composed of a server and a +client. The service was designed to meet the requirements outlined in the +problem statement: it provides a REST API for storing and retrieving +deduplicated backups for multiple users, a web interface for administrators to +monitor clients, configurable retention policies (by age or version count), +support for both local filesystem storage and Amazon S3, and a Dockerised +client that periodically uploads files and runs pre‑backup commands. + +## Architecture Overview + +The system is split into two components: + +1. **Server** (`./server`) + * Built with [FastAPI](https://fastapi.tiangolo.com/) and SQLAlchemy. + * Stores user accounts, clients, deduplicated file hashes, backup records + and client logs in a relational database. + * Uses SHA‑256 to detect duplicate uploads and stores each unique file only + once. A key–value style table (`file_hashes`) maps the content hash to + the storage path【744670406339295†L270-L339】. + * Supports local filesystem storage or S3. When the `S3_BUCKET` + environment variable is set, files are uploaded to S3; otherwise they are + saved under `./data`. S3 lifecycle rules can be used to automatically + expire old versions of objects【17949889377376†L188-L219】. + * Implements retention policies on a per‑user basis. Administrators can + specify either a maximum number of versions or a maximum age (in days); + when a new backup is uploaded, older versions outside the policy are + pruned, preserving only the latest copy【709290716836410†L142-L159】. + * Provides a minimal HTML dashboard (`/`) displaying clients, their tokens, + and last ping/backup times. Forms for creating users and clients are + included as a starting point. + +2. **Client** (`./client`) + * Written in Python and runs continuously inside a container. + * Authenticates to the server using credentials provided via environment + variables, registers itself to obtain a unique client token, then + periodically sends pings and backups. + * Recursively scans directories listed in `MONITORED_PATHS`, computes + SHA‑256 hashes of each file and uploads only those that have changed + since the previous run. This reduces network and storage overhead while + still allowing the server to deduplicate identical content. + * Supports optional `PRE_COMMANDS` that run before each backup cycle. This + feature can be used to generate database dumps (e.g. running + `pg_dump`) or any other preparatory work. + * Sends log messages to the server when errors occur to aid debugging. + +### Web Interface and Configuration + +The server now includes a simple but more complete web interface built with +Jinja2 templates: + +* `/clients` – lists all registered clients with last ping/backup times and + displays the pre‑backup commands configured for each client. It includes + forms to create new users and new clients. +* `/clients/{id}` – shows details for a specific client. Administrators can + edit the **pre‑backup commands** for that client using a multiline text + area. The page also lists recent backups (with download links) and the + last 50 log messages. + +Behind the scenes, pre‑backup commands are stored in the client record in +the database. Clients call `/api/clients/{token}/config` to retrieve their +commands before each backup cycle. This allows administrators to update +backup behaviour centrally without redeploying clients. + +### Client Web Interface + +In addition to the server dashboard, the backup **client** offers its own +minimal web interface. When the client container starts it opens a small +FastAPI application on port **8080** that presents a form where you can +enter the server URL, username and password for registration, an optional +client name, and the directories to monitor for backups. Once you submit +the form the client stores the configuration, registers itself with the +server, and begins running backup cycles automatically. A confirmation +page provides a direct link to the new client's page on the server. + +This interface is enabled by default to make configuration easy. You can +disable it by setting the environment variable `CLIENT_UI_ENABLED=false`. +When the UI is disabled the client does not launch the HTTP server and will +exit if mandatory environment variables (`SERVER_URL`, `USERNAME`, +`PASSWORD` and `MONITORED_PATHS`) are missing. Use a `.env` file (see +`.env.example`) or set environment variables in your compose file to +configure the client non‑interactively. + +## Deployment with Docker Compose + +Rather than a single all‑in‑one compose file, the repository now provides +three Compose configurations to support a variety of deployment scenarios: + +| Compose file | Description | +|------------------------------|-----------------------------------------------------------------------------------| +| **`docker-compose.yml`** | Launches the server, PostgreSQL, MinIO and a client in one stack. Useful for +| | local testing or demonstration where all components run on the same host. | +| **`docker-compose.server.yml`** | Starts only the server stack (FastAPI app, database and MinIO). Use this when +| | deploying the server to a dedicated host or cloud. | +| **`docker-compose.client.yml`** | Runs just the client container. Use this to deploy the backup agent on a +| | separate machine and point it at your existing server. The client exposes +| | port 8080 for its configuration UI. | + +To run the all‑in‑one configuration: + +```bash +cd backup_service +docker compose up --build +``` + +To run just the server or just the client, specify the appropriate compose file: + +```bash +docker compose -f docker-compose.server.yml up --build +``` + +and, on a different host or in a separate terminal: + +```bash +docker compose -f docker-compose.client.yml up --build +``` + +The default server configuration uses SQLite for simplicity, storing files in +a volume mounted at `/app/data`. The provided compose files demonstrate how +to switch to PostgreSQL and MinIO by setting `DATABASE_URL`, `S3_BUCKET`, +`AWS_ACCESS_KEY_ID`, `AWS_SECRET_ACCESS_KEY`, `AWS_REGION` and `S3_ENDPOINT`. +Consult the comments in the compose files and the included `.env.example` +for guidance. The client container mounts a volume called `client_data` at +`/data`; any files placed in this directory will be backed up. You can +configure the client by editing environment variables in the compose file, +by supplying a `.env` file, or via the built‑in web interface on port 8080. + +## Usage Notes + +* Before starting the client you must create a user on the server. One way + to do this is to run the server, visit `http://localhost:8000` in a + browser, authenticate using a token from `/api/login`, and use the “Create + User” form. Alternatively, you can call the `/api/register_user` endpoint + directly using a bearer token from an existing admin. +* Ensure that the retention policies set on each user reflect your backup + strategy. For example, specifying `retention_versions=5` keeps the five + most recent versions of each file; specifying `retention_days=30` retains + versions from the last 30 days【709290716836410†L142-L159】. +* When using S3, consider configuring lifecycle rules to automatically expire + old objects or transition them to cheaper storage classes. S3 lifecycle + rules can automate the deletion of objects after a specified period to meet + data retention requirements【17949889377376†L188-L219】. + +## Limitations and Future Work + +This sample implementation is intended as a starting point. Some features +that could be improved include: + +* A richer web interface for managing users, clients and retention policies. +* More granular client configuration (e.g. inclusion/exclusion patterns, + incremental or differential backups) and scheduling via cron. +* Support for compressing and encrypting data before upload. +* Streaming large files to the server to avoid loading them entirely into + memory. +* Integration tests and better error handling. + +Despite these limitations, the provided code demonstrates the core +functionality required for a secure, deduplicated backup service and +provides a foundation for further development. \ No newline at end of file diff --git a/client/Dockerfile b/client/Dockerfile new file mode 100644 index 0000000..cafa3be --- /dev/null +++ b/client/Dockerfile @@ -0,0 +1,21 @@ +FROM python:3.11-slim + +ENV PYTHONDONTWRITEBYTECODE=1 \ + PYTHONUNBUFFERED=1 + +WORKDIR /app + +COPY requirements.txt . +RUN pip install --no-cache-dir --upgrade pip && \ + pip install --no-cache-dir -r requirements.txt + +COPY . . + +# Expose the port used by the client web interface. This allows +# external browsers to connect to the configuration UI when the +# client runs in its own container. See docker‑compose files for +# port mappings. +EXPOSE 8080 + +# The client runs indefinitely, sending backups according to its schedule +CMD ["python", "main.py"] \ No newline at end of file diff --git a/client/main.py b/client/main.py new file mode 100644 index 0000000..f6c1ad7 --- /dev/null +++ b/client/main.py @@ -0,0 +1,688 @@ +"""Client agent for the backup service. + +The client agent runs inside its own Docker container and periodically backs +up files to the backup server. It performs the following operations: + +1. On first run, authenticate to the server using credentials provided via + environment variables and register itself as a new client. The returned + client token is stored locally. +2. Run optional pre‑backup commands specified in ``PRE_COMMANDS``. These + commands are executed in the container shell before each backup cycle; for + example, you can use this feature to dump a PostgreSQL database to a + file. +3. Walk through the directories specified in ``MONITORED_PATHS`` (comma + separated) and compute a SHA256 hash of each file. If the hash has not + changed since the previous backup, the file is skipped to conserve + bandwidth. Otherwise, the file is uploaded to the server via the + ``/api/clients/{client_token}/backup`` endpoint. +4. Periodically send a ``ping`` to the server to update the client's last + heartbeat time. +5. Send log messages to the server when errors occur. + +Configuration is provided entirely through environment variables: + + * ``SERVER_URL`` (required): Base URL of the backup server, e.g. ``http://server:8000``. + * ``USERNAME`` and ``PASSWORD``: Credentials of a user with permission to + register new clients. + * ``CLIENT_NAME``: Human‑readable name for this client. + * ``MONITORED_PATHS``: Comma‑separated list of directory paths to back up. + * ``PRE_COMMANDS``: Semicolon‑separated list of shell commands to run before + each backup cycle (optional). + * ``PING_INTERVAL``: Seconds between ping requests (default: 300). + * ``BACKUP_INTERVAL``: Seconds between backup cycles (default: 3600). + +State such as the client token and previously computed file hashes is saved in +``state.json`` within the working directory. +""" + +from __future__ import annotations + +import hashlib +import json +import os +import sys +import time +from dataclasses import dataclass, asdict +from pathlib import Path +from typing import Dict, Optional, List + +import requests +import yaml # type: ignore +import subprocess +from urllib.parse import urlparse +import datetime + +# Imports for the web interface +from fastapi import FastAPI, Request, Form +from fastapi.responses import HTMLResponse, RedirectResponse +from fastapi.templating import Jinja2Templates +import threading +import uvicorn + + +STATE_FILE = "state.json" + + +@dataclass +class ClientState: + token: Optional[str] = None # client token assigned by server + client_id: Optional[int] = None # numeric ID assigned by server + access_token: Optional[str] = None # bearer token for API authentication + file_hashes: Dict[str, str] = None # maps file paths to last known hash + + def to_json(self) -> str: + return json.dumps(asdict(self)) + + @classmethod + def from_json(cls, data: str) -> "ClientState": + obj = json.loads(data) + return cls( + token=obj.get("token"), + client_id=obj.get("client_id"), + access_token=obj.get("access_token"), + file_hashes=obj.get("file_hashes", {}), + ) + + +class BackupClient: + def __init__(self) -> None: + # Read initial configuration from environment variables. These may be empty + # when the client is first started; in that case the client will run + # exclusively as a web UI until the user supplies configuration. + self.server_url = os.environ.get("SERVER_URL") or "" + self.username = os.environ.get("USERNAME") or "" + self.password = os.environ.get("PASSWORD") or "" + self.client_name = os.environ.get("CLIENT_NAME", os.uname().nodename) + self.monitored_paths = [p.strip() for p in os.environ.get("MONITORED_PATHS", "").split(",") if p.strip()] + self.pre_commands: List[str] = [c.strip() for c in os.environ.get("PRE_COMMANDS", "").split(";") if c.strip()] + self.ping_interval = int(os.environ.get("PING_INTERVAL", "300")) + self.backup_interval = int(os.environ.get("BACKUP_INTERVAL", "3600")) + + # A flag indicating whether the web UI should be enabled. By default + # the UI runs so that the client can be configured interactively when + # environment variables are not provided. Set the environment + # variable ``CLIENT_UI_ENABLED`` to ``false`` or ``0`` to disable the + # UI and run solely based on environment configuration. When the UI + # is disabled and mandatory settings are missing, the client will + # terminate with an error. + ui_env = os.environ.get("CLIENT_UI_ENABLED", "true").lower() + self.ui_enabled = ui_env not in ("false", "0", "no") + + # Determine whether the client is configured enough to start backing up. + self.configured = bool(self.server_url and self.username and self.password and self.monitored_paths) + + # Load saved state (token, access token, hashes). + self.state = self._load_state() + + # Remote pre‑backup commands retrieved from the server; overrides local commands + self.remote_pre_commands: List[str] = [] + + # Track last run timestamps for each task ID so we don't run tasks too frequently + # This dict maps task IDs to the last time we attempted to run them. + self.task_last_run: Dict[int, float] = {} + + def _load_state(self) -> ClientState: + if os.path.exists(STATE_FILE): + with open(STATE_FILE, "r") as f: + try: + return ClientState.from_json(f.read()) + except Exception: + pass + return ClientState(token=None, client_id=None, access_token=None, file_hashes={}) + + def _save_state(self) -> None: + with open(STATE_FILE, "w") as f: + f.write(self.state.to_json()) + + def _login(self) -> None: + """Authenticate using username/password and obtain an access token.""" + url = f"{self.server_url}/api/login" + data = {"username": self.username, "password": self.password} + # Use form-encoded data as required by OAuth2PasswordRequestForm + response = requests.post(url, data=data) + if response.status_code != 200: + raise RuntimeError(f"Failed to login: {response.text}") + token = response.json()["access_token"] + self.state.access_token = token + self._save_state() + + def _register_client(self) -> None: + """Register this client and obtain a client token and ID.""" + url = f"{self.server_url}/api/clients/register" + headers = {"Authorization": f"Bearer {self.state.access_token}"} + payload = {"name": self.client_name} + response = requests.post(url, json=payload, headers=headers) + if response.status_code != 200: + raise RuntimeError(f"Failed to register client: {response.text}") + data = response.json() + # The server returns the new client object, which includes id and token + self.state.token = data.get("token") + self.state.client_id = data.get("id") + self._save_state() + + def ensure_authenticated(self) -> None: + """Ensure we have valid tokens; login and register if necessary.""" + if not self.state.access_token: + self._login() + if not self.state.token: + self._register_client() + + def run_pre_commands(self) -> None: + # Use remote commands if available, otherwise fall back to local pre_commands + commands = self.remote_pre_commands if self.remote_pre_commands else self.pre_commands + for cmd in commands: + if not cmd: + continue + try: + subprocess.run(cmd, shell=True, check=True) + except subprocess.CalledProcessError as e: + self.send_log(level="ERROR", message=f"Pre‑command '{cmd}' failed: {e}") + + def compute_file_hash(self, path: Path) -> str: + """Compute SHA256 hash of a file.""" + sha256 = hashlib.sha256() + with path.open("rb") as f: + for chunk in iter(lambda: f.read(8192), b""): + sha256.update(chunk) + return sha256.hexdigest() + + def scan_and_backup(self) -> None: + """Walk monitored directories and upload changed files.""" + headers = { + "Authorization": f"Bearer {self.state.access_token}", + } + for dir_path in self.monitored_paths: + p = Path(dir_path) + if not p.exists(): + self.send_log(level="ERROR", message=f"Monitored path does not exist: {dir_path}") + continue + for file_path in p.rglob("*"): + if file_path.is_file(): + try: + current_hash = self.compute_file_hash(file_path) + except Exception as e: + self.send_log(level="ERROR", message=f"Failed to hash {file_path}: {e}") + continue + last_hash = self.state.file_hashes.get(str(file_path)) + if last_hash == current_hash: + continue # unchanged + # Upload file + url = f"{self.server_url}/api/clients/{self.state.token}/backup" + try: + with file_path.open("rb") as f: + files = {"file": (file_path.name, f, "application/octet-stream")} + data = {"path": str(file_path)} + resp = requests.post(url, headers=headers, files=files, data=data) + if resp.status_code == 200: + self.state.file_hashes[str(file_path)] = current_hash + self._save_state() + else: + self.send_log(level="ERROR", message=f"Failed to upload {file_path}: {resp.text}") + except Exception as e: + self.send_log(level="ERROR", message=f"Exception uploading {file_path}: {e}") + + # After completing the backup cycle, update the server with the current + # file structure. This allows administrators to browse the client's + # filesystem in the web UI. Only do this if we are authenticated. + if self.state.token: + try: + structure = self.get_file_structure() + url = f"{self.server_url}/api/clients/{self.state.token}/files" + # Send JSON body (list of {path, is_dir}) + requests.post(url, json=structure) + except Exception as e: + self.send_log(level="ERROR", message=f"Failed to send file structure: {e}") + + def send_ping(self) -> None: + url = f"{self.server_url}/api/clients/{self.state.token}/ping" + try: + requests.post(url) + except Exception as e: + self.send_log(level="ERROR", message=f"Ping failed: {e}") + + def send_log(self, level: str, message: str) -> None: + if not self.state.token: + return + url = f"{self.server_url}/api/clients/{self.state.token}/log" + try: + data = {"level": level, "message": message} + requests.post(url, data=data) + except Exception: + pass + + # ======== Extended functionality for tasks and file structure ======== + + def get_file_structure(self) -> List[dict]: + """ + Collect a list of files and directories under the monitored paths. + + Returns a list of dictionaries with keys ``path`` and ``is_dir``. Paths + are absolute as seen by the client container. Duplicate entries are + removed. + """ + items: List[dict] = [] + seen = set() + for root in self.monitored_paths: + root_path = Path(root) + if not root_path.exists(): + continue + # Walk directory tree + for dirpath, dirnames, filenames in os.walk(root_path): + # Record directory itself + if dirpath not in seen: + seen.add(dirpath) + items.append({"path": str(dirpath), "is_dir": True}) + for fname in filenames: + fpath = os.path.join(dirpath, fname) + if fpath not in seen: + seen.add(fpath) + items.append({"path": fpath, "is_dir": False}) + return items + + def fetch_tasks(self) -> List[dict]: + """Retrieve the list of tasks for this client from the server.""" + if not self.state.token: + return [] + url = f"{self.server_url}/api/clients/{self.state.token}/tasks" + try: + resp = requests.get(url) + if resp.status_code == 200: + return resp.json() + except Exception as e: + self.send_log(level="ERROR", message=f"Failed to fetch tasks: {e}") + return [] + + def run_task(self, task: dict) -> None: + """ + Execute a single backup task according to its specification. + + This method handles running any specified pre‑commands, compressing + files if requested, uploading the file to the server with + retention overrides and reporting the result back to the server. + """ + task_id = task.get("id") + path = task.get("path") + frequency = task.get("frequency_minutes") or 0 + pre_commands = task.get("pre_commands", []) or [] + retention_days = task.get("retention_days") + retention_versions = task.get("retention_versions") + compress = task.get("compress", False) + next_run = task.get("next_run") + pending_run_id = task.get("pending_run_id") + + # Parse next_run into a timestamp if provided + due = True + if next_run: + try: + dt = datetime.datetime.fromisoformat(next_run) + # Compare as naive UTC + due = datetime.datetime.utcnow() >= dt.replace(tzinfo=None) + except Exception: + pass + # Check local rate limiting: avoid running tasks too frequently in this loop + last = self.task_last_run.get(task_id) + if last and (time.time() - last) < (frequency * 60 if frequency else 0): + due = False + if not due: + return + + # Record start time + self.task_last_run[task_id] = time.time() + # Combine commands: first client's remote pre_commands, then task commands + commands = self.remote_pre_commands + pre_commands + run_status = "SUCCESS" + run_message = "" + # Determine run_id: if there is a pending run id from server, use it + run_id = pending_run_id or 0 + try: + # Execute pre‑commands + for cmd in commands: + if not cmd: + continue + subprocess.run(cmd, shell=True, check=True) + # Determine file to upload + file_path = Path(path) + if not file_path.exists(): + raise FileNotFoundError(f"Task path does not exist: {path}") + upload_path = file_path + temp_path: Optional[Path] = None + if compress and file_path.is_file(): + # Create a gzipped archive of the file in memory + import tarfile + import tempfile + temp_fd, temp_name = tempfile.mkstemp(suffix=".tar.gz") + os.close(temp_fd) + temp_path = Path(temp_name) + with tarfile.open(temp_path, "w:gz") as tar: + tar.add(file_path, arcname=file_path.name) + upload_path = temp_path + # Upload the file; pass retention overrides if provided + url = f"{self.server_url}/api/clients/{self.state.token}/backup" + with open(upload_path, "rb") as f: + files = {"file": (upload_path.name, f, "application/octet-stream")} + data = {"path": str(path)} + if retention_days is not None: + data["retention_days"] = str(retention_days) + if retention_versions is not None: + data["retention_versions"] = str(retention_versions) + resp = requests.post(url, headers={"Authorization": f"Bearer {self.state.access_token}"}, files=files, data=data) + # Clean up temporary file + if compress and temp_path and temp_path.exists(): + try: + temp_path.unlink() + except Exception: + pass + if resp.status_code != 200: + raise RuntimeError(f"Failed to upload task file: {resp.text}") + except Exception as e: + run_status = "FAILED" + run_message = str(e) + self.send_log(level="ERROR", message=f"Task {task_id} failed: {e}") + finally: + # Report status back to the server + status_url = f"{self.server_url}/api/clients/{self.state.token}/tasks/{task_id}/status" + try: + requests.post( + status_url, + data={ + "run_id": str(run_id), + "status": run_status, + "message": run_message, + }, + ) + except Exception: + # Avoid raising exceptions if status reporting fails + pass + + # ===== Configuration helpers for the web UI ===== + def apply_config( + self, + server_url: str, + username: str, + password: str, + client_name: str, + monitored_paths: List[str], + ) -> Optional[int]: + """ + Apply a new configuration provided by the user via the web UI. + + This method updates the client's connection settings, resets any stored + authentication tokens and file hashes, and registers the client with + the server. It returns the newly assigned client ID on success. + + Args: + server_url: Base URL of the backup server (e.g. ``http://localhost:8000``). + username: Username of a server user with permission to register clients. + password: Password for the user. + client_name: Human‑readable name for this client. + monitored_paths: List of directory paths to back up. + Returns: + The numeric client ID assigned by the server, or ``None`` if + registration fails. + """ + # Update attributes + self.server_url = server_url.strip() + self.username = username.strip() + self.password = password.strip() + self.client_name = client_name.strip() or os.uname().nodename + self.monitored_paths = monitored_paths or [] + # Mark the client as configured only if essential fields are provided + self.configured = bool(self.server_url and self.username and self.password and self.monitored_paths) + # Reset state + self.state.access_token = None + self.state.token = None + self.state.client_id = None + self.state.file_hashes = {} + try: + # Authenticate and register + self._login() + self._register_client() + # Save state to disk + self._save_state() + return self.state.client_id + except Exception as e: + # Log any error; the backup loop will skip operations until configured + self.send_log(level="ERROR", message=str(e)) + return None + + def run(self) -> None: + """ + Main loop for the backup client. + + If the client is not yet configured (no server URL, credentials or + monitored paths), this loop simply waits until configuration is + provided via the web interface. Once configured, it ensures the + client is authenticated and then periodically sends pings and + performs backups according to the configured intervals. + """ + last_backup = 0.0 + last_ping = 0.0 + last_task_check = 0.0 + while True: + # If configuration is incomplete, skip any activity + if not self.configured: + time.sleep(1) + continue + # Ensure authentication; if tokens are missing the client will + # attempt to login/register. Errors are logged but do not + # terminate the loop. + try: + self.ensure_authenticated() + except Exception as e: + self.send_log(level="ERROR", message=f"Authentication failed: {e}") + time.sleep(5) + continue + + now = time.time() + if now - last_ping >= self.ping_interval: + self.send_ping() + last_ping = now + if now - last_backup >= self.backup_interval: + # Retrieve remote commands before each backup cycle + try: + if self.state.token: + config_url = f"{self.server_url}/api/clients/{self.state.token}/config" + resp = requests.get(config_url) + if resp.status_code == 200: + data = resp.json() + self.remote_pre_commands = data.get("pre_commands", []) + except Exception as e: + self.send_log(level="ERROR", message=f"Failed to fetch config: {e}") + + self.run_pre_commands() + self.scan_and_backup() + last_backup = now + # Periodically check for scheduled tasks. Tasks are fetched + # independently of the backup interval so that "run now" requests + # are handled promptly. Adjust the frequency as needed. + if now - last_task_check >= 30: + tasks = self.fetch_tasks() + for t in tasks: + try: + self.run_task(t) + except Exception as e: + # Catch any unexpected exception and log it + self.send_log(level="ERROR", message=f"Task execution error: {e}") + last_task_check = now + time.sleep(1) + + +def create_app(client: BackupClient) -> FastAPI: + """Instantiate and return a FastAPI application for configuring the client. + + The web interface exposes a simple form at the root URL which asks for + the server address, user credentials, client name and monitored paths. On + submission the client is configured and registered with the server, and + the user is redirected to the server's web UI for the newly created client. + + Args: + client: The BackupClient instance to be configured via the UI. + + Returns: + A FastAPI application ready to be served by Uvicorn or another ASGI server. + """ + app = FastAPI() + # Determine template directory relative to this file + templates_dir = Path(__file__).parent / "templates" + templates = Jinja2Templates(directory=str(templates_dir)) + + @app.get("/", response_class=HTMLResponse) + async def index(request: Request): + # Render the configuration form. Prepopulate fields with current + # settings where available. If an error message exists in the query + # parameters it will be displayed on the page. + error = request.query_params.get("error") + return templates.TemplateResponse( + "login.html", + { + "request": request, + "server_url": client.server_url or "", + "username": client.username or "", + "client_name": client.client_name or "", + "monitored_paths": ",".join(client.monitored_paths) if client.monitored_paths else "", + "error": error, + }, + ) + + @app.post("/configure") + async def configure( + request: Request, + server_url: str = Form(...), + username: str = Form(...), + password: str = Form(...), + client_name: str = Form(""), + monitored_paths: str = Form(""), + ): + """ + Handle submission of the configuration form. Before registering the + client, validate the inputs to catch common mistakes such as + malformed URLs or missing credentials. If validation fails, the form + is re-rendered with an error message. On success the client is + registered and a success page with a link to the server UI is + displayed. + """ + # Trim whitespace + server_url = server_url.strip() + username = username.strip() + password = password.strip() + client_name = client_name.strip() + # Parse monitored paths from comma-separated input + paths = [p.strip() for p in monitored_paths.split(",") if p.strip()] + + # Validate the server URL + parsed = urlparse(server_url) + if parsed.scheme not in ("http", "https") or not parsed.netloc: + return templates.TemplateResponse( + "login.html", + { + "request": request, + "server_url": server_url, + "username": username, + "client_name": client_name, + "monitored_paths": monitored_paths, + "error": "Invalid server URL. Please enter a valid http or https URL.", + }, + ) + + # Validate required fields + if not username or not password: + return templates.TemplateResponse( + "login.html", + { + "request": request, + "server_url": server_url, + "username": username, + "client_name": client_name, + "monitored_paths": monitored_paths, + "error": "Username and password are required.", + }, + ) + if not paths: + return templates.TemplateResponse( + "login.html", + { + "request": request, + "server_url": server_url, + "username": username, + "client_name": client_name, + "monitored_paths": monitored_paths, + "error": "Please specify at least one directory to back up.", + }, + ) + + # Apply configuration and attempt to register the client. apply_config + # returns the client_id if registration succeeds. + client_id = client.apply_config( + server_url=server_url, + username=username, + password=password, + client_name=client_name, + monitored_paths=paths, + ) + if client_id: + # On success show a confirmation page with a link to the server UI. + return templates.TemplateResponse( + "success.html", + { + "request": request, + "server_url": server_url.rstrip("/"), + "client_id": client_id, + }, + ) + # If registration failed, re-render the form with a generic error message + return templates.TemplateResponse( + "login.html", + { + "request": request, + "server_url": server_url, + "username": username, + "client_name": client_name, + "monitored_paths": monitored_paths, + "error": "Failed to authenticate or register. Please check your credentials and server address.", + }, + ) + + return app + + +if __name__ == "__main__": + """ + Entry point for the client container. The behaviour depends on the + ``CLIENT_UI_ENABLED`` environment variable: + + * When ``CLIENT_UI_ENABLED`` is true (default), the client runs a small + web server on port 8080 to collect configuration from the user. The + backup loop will start automatically once the user submits the form. + * When ``CLIENT_UI_ENABLED`` is false (e.g. ``CLIENT_UI_ENABLED=0``), the + client skips launching the web UI. In this mode all required + configuration must be provided via environment variables. If any + required setting is missing the program will exit with an error. + """ + client = BackupClient() + + if client.ui_enabled: + # Launch the web interface in a background thread. The UI allows + # interactive configuration when environment variables are absent. + def start_web() -> None: + app = create_app(client) + uvicorn.run(app, host="0.0.0.0", port=8080, log_level="info") + + web_thread = threading.Thread(target=start_web, daemon=True) + web_thread.start() + # In UI mode always run the backup loop; it will no‑op until + # configuration is applied by the user. + client.run() + else: + # UI disabled. Ensure the client has enough configuration to run. + if not client.configured: + # Emit an error and terminate. Without configuration and without + # the UI there is no way for the user to provide settings. + sys.stderr.write( + "Error: CLIENT_UI_ENABLED is false but mandatory configuration\n" + "(SERVER_URL, USERNAME, PASSWORD and MONITORED_PATHS) was not provided.\n" + "Either set these environment variables or enable the UI.\n" + ) + sys.exit(1) + # Configuration is provided via environment variables. Start the + # backup loop immediately. + client.run() \ No newline at end of file diff --git a/client/requirements.txt b/client/requirements.txt new file mode 100644 index 0000000..6a33273 --- /dev/null +++ b/client/requirements.txt @@ -0,0 +1,6 @@ +requests +PyYAML +fastapi +uvicorn +jinja2 +python-multipart \ No newline at end of file diff --git a/client/templates/login.html b/client/templates/login.html new file mode 100644 index 0000000..c4ad909 --- /dev/null +++ b/client/templates/login.html @@ -0,0 +1,44 @@ + + + + + Configure Backup Client + + + + +
+

Configure Backup Client

+ {% if error %} +
+ {{ error }} +
+ {% endif %} +
+
+ + +
+
+ + +
+
+ + +
+
+ + +

If left blank, the container hostname will be used.

+
+
+ + +

Comma‑separated list of directories to back up.

+
+ +
+
+ + \ No newline at end of file diff --git a/client/templates/success.html b/client/templates/success.html new file mode 100644 index 0000000..3c4e397 --- /dev/null +++ b/client/templates/success.html @@ -0,0 +1,18 @@ + + + + + Registration Successful + + + + +
+

Registration Successful

+

Your backup client has been registered successfully. You can now manage this client from the server interface.

+ + Go to Client Dashboard + +
+ + \ No newline at end of file diff --git a/docker-compose.client.yml b/docker-compose.client.yml new file mode 100644 index 0000000..02a3a51 --- /dev/null +++ b/docker-compose.client.yml @@ -0,0 +1,39 @@ +version: '3.8' + +services: + client: + build: ./client + container_name: backup_client + # Expose the client's web interface on port 8080 so that it can be + # configured via a browser on the host. + ports: + - "8080:8080" + environment: + # Provide the address of the remote backup server. This should be + # updated to the actual server URL when deploying the client + # independently. Leaving it empty allows the user to set it via + # the web interface. + SERVER_URL: "" + # Credentials of a user with permission to register clients. These + # can also be provided via the web interface. If left blank the + # client will wait for user input. + USERNAME: "" + PASSWORD: "" + # Name of this client (will default to hostname if omitted). This + # value can be overridden in the web UI. + CLIENT_NAME: "" + # Directories inside the client container to back up. These can + # also be specified via the web UI as a comma‑separated list. + MONITORED_PATHS: "/data" + # Pre‑backup commands (e.g. dumping a PostgreSQL database). These + # can be left empty and configured later on the server. + PRE_COMMANDS: "" + # Ping and backup intervals. Modify as needed. + PING_INTERVAL: "300" + BACKUP_INTERVAL: "3600" + volumes: + # Mount host data to back up; adjust to your needs + - client_data:/data + +volumes: + client_data: \ No newline at end of file diff --git a/docker-compose.server.yml b/docker-compose.server.yml new file mode 100644 index 0000000..51d2981 --- /dev/null +++ b/docker-compose.server.yml @@ -0,0 +1,70 @@ +version: '3.8' + +services: + server: + build: ./server + container_name: backup_server + ports: + - "8000:8000" + environment: + # Change SECRET_KEY in production + SECRET_KEY: "mysecretkey" + # Use Postgres instead of SQLite. The DATABASE_URL uses the same + # credentials defined in the db service below. + DATABASE_URL: "postgresql+psycopg2://backup:backup@db:5432/backup" + # Configure S3 to point at the local MinIO service. These values + # correspond to the settings of the minio container defined below. + S3_BUCKET: "backup" + AWS_ACCESS_KEY_ID: "minio" + AWS_SECRET_ACCESS_KEY: "minio123" + AWS_REGION: "us-east-1" + # Endpoint for the local S3 service + S3_ENDPOINT: "http://minio:9000" + volumes: + # Persist local file storage + - server_data:/app/data + # Persist SQLite or Postgres database + - server_db:/app/backup.db + + db: + image: postgres:15 + container_name: backup_db + environment: + POSTGRES_USER: backup + POSTGRES_PASSWORD: backup + POSTGRES_DB: backup + volumes: + - postgres_data:/var/lib/postgresql/data + healthcheck: + test: ["CMD", "pg_isready", "-U", "backup"] + interval: 10s + timeout: 5s + retries: 5 + + # Lightweight S3-compatible storage using MinIO. This service + # provides an object store accessible at :9000 and a web console at + # :9001. Credentials and bucket configuration are set to match the + # server environment variables above. + minio: + image: quay.io/minio/minio + container_name: backup_minio + environment: + MINIO_ROOT_USER: minio + MINIO_ROOT_PASSWORD: minio123 + command: server /data --console-address ":9001" + ports: + - "9000:9000" + - "9001:9001" + volumes: + - minio_data:/data + healthcheck: + test: ["CMD", "curl", "-f", "http://localhost:9000/minio/health/live"] + interval: 10s + timeout: 5s + retries: 5 + +volumes: + server_data: + server_db: + postgres_data: + minio_data: \ No newline at end of file diff --git a/docker-compose.yml b/docker-compose.yml new file mode 100644 index 0000000..fe072e0 --- /dev/null +++ b/docker-compose.yml @@ -0,0 +1,100 @@ +version: '3.8' + +services: + server: + build: ./server + container_name: backup_server + ports: + - "8000:8000" + environment: + # Change SECRET_KEY in production + SECRET_KEY: "mysecretkey" + # Use Postgres instead of SQLite. The DATABASE_URL uses the same + # credentials defined in the db service below. + DATABASE_URL: "postgresql+psycopg2://backup:backup@db:5432/backup" + # Configure S3 to point at the local MinIO service. These values + # correspond to the settings of the minio container defined below. + S3_BUCKET: "backup" + AWS_ACCESS_KEY_ID: "minio" + AWS_SECRET_ACCESS_KEY: "minio123" + AWS_REGION: "us-east-1" + # Endpoint for the local S3 service + S3_ENDPOINT: "http://minio:9000" + volumes: + # Persist local file storage + - server_data:/app/data + # Persist SQLite database + - server_db:/app/backup.db + + # Example Postgres service (optional) + db: + image: postgres:15 + container_name: backup_db + environment: + POSTGRES_USER: backup + POSTGRES_PASSWORD: backup + POSTGRES_DB: backup + volumes: + - postgres_data:/var/lib/postgresql/data + healthcheck: + test: ["CMD", "pg_isready", "-U", "backup"] + interval: 10s + timeout: 5s + retries: 5 + + # Lightweight S3-compatible storage using MinIO. This service + # provides an object store accessible at :9000 and a web console at + # :9001. Credentials and bucket configuration are set to match the + # server environment variables above. + minio: + image: quay.io/minio/minio + container_name: backup_minio + environment: + MINIO_ROOT_USER: minio + MINIO_ROOT_PASSWORD: minio123 + command: server /data --console-address ":9001" + ports: + - "9000:9000" + - "9001:9001" + volumes: + - minio_data:/data + healthcheck: + test: ["CMD", "curl", "-f", "http://localhost:9000/minio/health/live"] + interval: 10s + timeout: 5s + retries: 5 + + client: + build: ./client + container_name: backup_client + depends_on: + - server + environment: + # Address of the server container; uses Docker's internal DNS + SERVER_URL: "http://server:8000" + # Provide credentials of a user with permission to register clients + USERNAME: "admin" + PASSWORD: "adminpass" + # Name of this client (will default to hostname if omitted) + CLIENT_NAME: "example-client" + # Directories inside the client container to back up + MONITORED_PATHS: "/data" + # Pre‑backup commands (e.g. dumping a PostgreSQL database) + PRE_COMMANDS: "" + # Ping interval in seconds + PING_INTERVAL: "300" + # Backup interval in seconds + BACKUP_INTERVAL: "3600" + volumes: + # Mount host data to back up; adjust to your needs + - client_data:/data + ports: + # Expose the client's web interface on port 8080 + - "8080:8080" + +volumes: + server_data: + server_db: + client_data: + postgres_data: + minio_data: \ No newline at end of file diff --git a/server/Dockerfile b/server/Dockerfile new file mode 100644 index 0000000..c4c0dcc --- /dev/null +++ b/server/Dockerfile @@ -0,0 +1,18 @@ +FROM python:3.11-slim + +ENV PYTHONDONTWRITEBYTECODE=1 \ + PYTHONUNBUFFERED=1 + +WORKDIR /app + +COPY requirements.txt . +RUN pip install --no-cache-dir --upgrade pip && \ + pip install --no-cache-dir -r requirements.txt + +COPY . . + +EXPOSE 8000 + +# Use gunicorn or uvicorn for FastAPI +# Run the FastAPI application as a module so that relative imports work +CMD ["uvicorn", "server.main:app", "--host", "0.0.0.0", "--port", "8000"] \ No newline at end of file diff --git a/server/__init__.py b/server/__init__.py new file mode 100644 index 0000000..f31e5c3 --- /dev/null +++ b/server/__init__.py @@ -0,0 +1 @@ +"""Server package initializer.""" diff --git a/server/auth.py b/server/auth.py new file mode 100644 index 0000000..0ddd3c7 --- /dev/null +++ b/server/auth.py @@ -0,0 +1,96 @@ +"""Authentication utilities for the backup service. + +This module implements password hashing, JSON Web Token (JWT) generation and +verification, and FastAPI dependencies for authenticating API calls. Users +authenticate via the ``/api/login`` endpoint by providing their username and +password; upon successful authentication a signed JWT is returned. The JWT +must be included in the ``Authorization: Bearer `` header for +subsequent requests. + +The JWT contains the user's ID and role (admin flag). Token expiration is +configurable via the ``ACCESS_TOKEN_EXPIRE_MINUTES`` environment variable. +""" + +from __future__ import annotations + +import os +import datetime +from typing import Optional + +from jose import JWTError, jwt +from passlib.context import CryptContext +from fastapi import Depends, HTTPException, status +from fastapi.security import OAuth2PasswordBearer +from sqlalchemy.orm import Session + +from . import models +from .database import get_db + + +# Configuration +SECRET_KEY = os.getenv("SECRET_KEY", "CHANGE_ME") +ALGORITHM = "HS256" +ACCESS_TOKEN_EXPIRE_MINUTES = int(os.getenv("ACCESS_TOKEN_EXPIRE_MINUTES", "60")) + + +# Password hashing context +pwd_context = CryptContext(schemes=["bcrypt"], deprecated="auto") + +oauth2_scheme = OAuth2PasswordBearer(tokenUrl="/api/login") + + +def hash_password(password: str) -> str: + return pwd_context.hash(password) + + +def verify_password(plain_password: str, hashed_password: str) -> bool: + return pwd_context.verify(plain_password, hashed_password) + + +def create_access_token(data: dict, expires_delta: Optional[datetime.timedelta] = None) -> str: + to_encode = data.copy() + expire = datetime.datetime.utcnow() + ( + expires_delta or datetime.timedelta(minutes=ACCESS_TOKEN_EXPIRE_MINUTES) + ) + to_encode.update({"exp": expire}) + encoded_jwt = jwt.encode(to_encode, SECRET_KEY, algorithm=ALGORITHM) + return encoded_jwt + + +async def get_current_user( + token: str = Depends(oauth2_scheme), db: Session = Depends(get_db) +) -> models.User: + """Retrieve the current user from a JWT token.""" + credentials_exception = HTTPException( + status_code=status.HTTP_401_UNAUTHORIZED, + detail="Could not validate credentials", + headers={"WWW-Authenticate": "Bearer"}, + ) + try: + payload = jwt.decode(token, SECRET_KEY, algorithms=[ALGORITHM]) + user_id: int | None = payload.get("sub") + if user_id is None: + raise credentials_exception + except JWTError as e: + raise credentials_exception from e + user = db.query(models.User).filter(models.User.id == user_id).first() + if user is None: + raise credentials_exception + return user + + +async def get_current_active_user( + current_user: models.User = Depends(get_current_user), +) -> models.User: + return current_user + + +async def get_current_admin( + current_user: models.User = Depends(get_current_user), +) -> models.User: + if not current_user.is_admin: + raise HTTPException( + status_code=status.HTTP_403_FORBIDDEN, + detail="Not enough privileges", + ) + return current_user \ No newline at end of file diff --git a/server/database.py b/server/database.py new file mode 100644 index 0000000..7a7ecd9 --- /dev/null +++ b/server/database.py @@ -0,0 +1,37 @@ +"""Database configuration for the backup service. + +This module defines the SQLAlchemy engine and session factory used to access +the relational database. The database URL can be configured via the +``DATABASE_URL`` environment variable. When using SQLite the database will +be stored on disk in the working directory. For production deployments a +server like PostgreSQL should be used. +""" + +from __future__ import annotations + +import os +from sqlalchemy import create_engine +from sqlalchemy.orm import sessionmaker, Session + + +DATABASE_URL = os.getenv("DATABASE_URL", "sqlite:///./backup.db") + + +# Determine whether to use check_same_thread for SQLite +if DATABASE_URL.startswith("sqlite"): + engine = create_engine( + DATABASE_URL, connect_args={"check_same_thread": False} + ) +else: + engine = create_engine(DATABASE_URL) + +SessionLocal = sessionmaker(autocommit=False, autoflush=False, bind=engine) + + +def get_db() -> Session: + """Yield a new database session and ensure it is closed afterwards.""" + db = SessionLocal() + try: + yield db + finally: + db.close() \ No newline at end of file diff --git a/server/main.py b/server/main.py new file mode 100644 index 0000000..31b2337 --- /dev/null +++ b/server/main.py @@ -0,0 +1,748 @@ +"""Main application entry point for the backup server. + +This FastAPI application implements both a REST API for clients to register, +upload backups and send health checks, as well as a minimal web interface +for administrators to monitor clients and manage retention policies. The +design follows the requirements: + +* Clients register themselves with the server and obtain a unique token used + for subsequent API calls. +* Files are uploaded as multipart form data; the server computes the file's + hash and uses a hash‑to‑storage lookup to avoid reuploading duplicates + 【744670406339295†L270-L339】. +* Retention policies can be configured per user via maximum age (days) or + maximum number of versions, reflecting best practices for balancing version + depth against storage consumption【709290716836410†L142-L159】. +* The server can be run under Docker and stores data in a relational database + and optionally S3 for the backing file store. A simple admin interface + displays client statuses and last backup times. +""" + +from __future__ import annotations + +import asyncio +import datetime +import hashlib +import os +from typing import List, Optional + +from fastapi import ( + FastAPI, + Depends, + HTTPException, + status, + File, + UploadFile, + Form, + Request, + Response, +) +from fastapi.responses import HTMLResponse, FileResponse +from fastapi.templating import Jinja2Templates +from fastapi.security import OAuth2PasswordRequestForm +from sqlalchemy.orm import Session + +from . import models, schemas, auth, storage as storage_module, database + +app = FastAPI(title="Backup Service") +templates = Jinja2Templates(directory=os.path.join(os.path.dirname(__file__), "templates")) + +storage = storage_module.get_storage() + + +@app.on_event("startup") +async def on_startup() -> None: + # Create database tables if they do not exist + models.Base.metadata.create_all(bind=database.engine) + + # Ensure the `pre_commands` column exists on the clients table. SQLite will + # ignore the ALTER TABLE if the column already exists. For other + # databases this may fail gracefully if the column exists. + try: + with database.engine.connect() as conn: + conn.execute("""ALTER TABLE clients ADD COLUMN pre_commands TEXT""") + except Exception: + # Column already exists or migration failed; ignore + pass + + +@app.post("/api/register_user", response_model=schemas.UserOut) +async def register_user( + user: schemas.UserCreate, + db: Session = Depends(database.get_db), + current_admin: models.User = Depends(auth.get_current_admin), +) -> schemas.UserOut: + """Create a new user. + + Only administrators may create users. The user's password is hashed + before storage. Retention policies can be optionally provided. + """ + existing = db.query(models.User).filter(models.User.username == user.username).first() + if existing: + raise HTTPException(status_code=400, detail="Username already exists") + hashed_password = auth.hash_password(user.password) + db_user = models.User( + username=user.username, + hashed_password=hashed_password, + is_admin=user.is_admin, + retention_days=user.retention_days, + retention_versions=user.retention_versions, + ) + db.add(db_user) + db.commit() + db.refresh(db_user) + return db_user + + +@app.post("/api/login", response_model=schemas.Token) +async def login_for_access_token( + form_data: OAuth2PasswordRequestForm = Depends(), + db: Session = Depends(database.get_db), +) -> schemas.Token: + """Authenticate a user and return a JWT token.""" + user = db.query(models.User).filter(models.User.username == form_data.username).first() + if not user or not auth.verify_password(form_data.password, user.hashed_password): + raise HTTPException( + status_code=status.HTTP_401_UNAUTHORIZED, + detail="Incorrect username or password", + headers={"WWW-Authenticate": "Bearer"}, + ) + access_token = auth.create_access_token(data={"sub": user.id, "is_admin": user.is_admin}) + return schemas.Token(access_token=access_token) + + +@app.post("/api/clients/register", response_model=schemas.ClientOut) +async def register_client( + req: schemas.ClientRegisterRequest, + db: Session = Depends(database.get_db), + current_admin: models.User = Depends(auth.get_current_admin), +) -> schemas.ClientOut: + """Register a new client and return its unique token. + + Only administrators may call this endpoint. A client belongs to a specific + user (owner). If ``owner_id`` is omitted the current admin becomes the + owner. + """ + owner_id = req.owner_id or current_admin.id + owner = db.query(models.User).filter(models.User.id == owner_id).first() + if owner is None: + raise HTTPException(status_code=404, detail="Owner not found") + # Generate a random token; collisions are extremely unlikely + token = hashlib.sha256(os.urandom(32)).hexdigest() + client = models.Client(name=req.name, token=token, owner_id=owner.id) + db.add(client) + db.commit() + db.refresh(client) + return client + + +def get_client_by_token(token: str, db: Session) -> models.Client: + client = db.query(models.Client).filter(models.Client.token == token).first() + if not client: + raise HTTPException(status_code=404, detail="Client not found") + return client + + +@app.post("/api/clients/{client_token}/ping") +async def client_ping( + client_token: str, + db: Session = Depends(database.get_db), +) -> dict[str, str]: + """Update the last ping time for a client. + + Clients should call this endpoint periodically to indicate they are alive. + """ + client = get_client_by_token(client_token, db) + client.last_ping = datetime.datetime.utcnow() + db.commit() + return {"status": "pong"} + + +async def prune_old_versions( + db: Session, + client: models.Client, + original_path: str, + retention_days: Optional[int], + retention_versions: Optional[int], +) -> None: + """Prune old backup versions according to retention policies. + + This helper deletes the oldest entries that fall outside the specified + retention age or count. Both policies can be combined; whichever removes + more versions takes effect. The current (most recent) version is always + retained. + """ + query = ( + db.query(models.BackupFile) + .join(models.FileHash) + .filter(models.BackupFile.client_id == client.id) + .filter(models.BackupFile.original_path == original_path) + .order_by(models.BackupFile.version_time.desc()) + ) + backups: List[models.BackupFile] = query.all() + if not backups: + return + # Keep the newest backup always + backups_to_consider = backups[1:] + to_delete: List[models.BackupFile] = [] + now = datetime.datetime.utcnow() + # Age‑based retention + if retention_days is not None: + cutoff = now - datetime.timedelta(days=retention_days) + for bf in backups_to_consider: + if bf.version_time < cutoff: + to_delete.append(bf) + # Count‑based retention + if retention_versions is not None and len(backups) > retention_versions: + to_delete.extend(backups[retention_versions:]) + # Remove duplicates in case both policies selected the same backup + unique_to_delete = {b.id: b for b in to_delete}.values() + for bf in unique_to_delete: + # Delete database record + db.delete(bf) + # Determine if file hash is still referenced by any backup + if len(bf.file_hash.backups) == 1: + # Last reference; remove physical file + asyncio.create_task(storage.delete_file(bf.file_hash.storage_path)) # schedule deletion + db.delete(bf.file_hash) + db.commit() + + +@app.post("/api/clients/{client_token}/backup", response_model=schemas.BackupEntryOut) +async def upload_backup( + client_token: str, + file: UploadFile = File(...), + path: str = Form(..., description="Original file path on the client"), + retention_days: Optional[int] = Form(None, description="Override retention days for this file"), + retention_versions: Optional[int] = Form(None, description="Override retention versions for this file"), + db: Session = Depends(database.get_db), +) -> schemas.BackupEntryOut: + """Upload a file from a client and create a backup entry. + + The client is identified by its token. The server computes a SHA256 hash of + the uploaded content; if a file with the same hash already exists in the + deduplicated store, the new backup entry simply references the existing + storage path【744670406339295†L270-L339】. Retention policies defined on the + owning user (either age or version count) are applied. + """ + client = get_client_by_token(client_token, db) + data = await file.read() + hash_value = hashlib.sha256(data).hexdigest() + # Check if file already exists + file_hash = db.query(models.FileHash).filter(models.FileHash.hash_value == hash_value).first() + storage_path: str + if file_hash: + storage_path = file_hash.storage_path + else: + # Save file to storage backend + storage_path = await storage.save_file(data, filename=file.filename) + file_hash = models.FileHash(hash_value=hash_value, storage_path=storage_path) + db.add(file_hash) + # Create backup record + backup = models.BackupFile( + client_id=client.id, + file_hash=file_hash, + original_path=path, + size=len(data), + version_time=datetime.datetime.utcnow(), + ) + db.add(backup) + # Update last backup time + client.last_backup = backup.version_time + db.commit() + db.refresh(backup) + # Apply retention policy. Use overrides from the request if provided; + # otherwise fall back to the owner's defaults. This allows per‑file + # policies configured via backup tasks. + owner = client.owner + effective_retention_days = retention_days if retention_days is not None else owner.retention_days + effective_retention_versions = ( + retention_versions if retention_versions is not None else owner.retention_versions + ) + await prune_old_versions( + db=db, + client=client, + original_path=path, + retention_days=effective_retention_days, + retention_versions=effective_retention_versions, + ) + return schemas.BackupEntryOut( + id=backup.id, + original_path=backup.original_path, + version_time=backup.version_time, + size=backup.size, + file_hash=hash_value, + ) + + +@app.get("/api/clients/{client_token}/backups", response_model=List[schemas.BackupEntryOut]) +async def list_backups( + client_token: str, + db: Session = Depends(database.get_db), +) -> List[schemas.BackupEntryOut]: + """List backups for a client, ordered by newest first.""" + client = get_client_by_token(client_token, db) + backups = ( + db.query(models.BackupFile) + .filter(models.BackupFile.client_id == client.id) + .order_by(models.BackupFile.version_time.desc()) + .all() + ) + return [ + schemas.BackupEntryOut( + id=b.id, + original_path=b.original_path, + version_time=b.version_time, + size=b.size, + file_hash=b.file_hash.hash_value, + ) + for b in backups + ] + + +@app.get("/api/clients/{client_token}/download/{backup_id}") +async def download_backup( + client_token: str, + backup_id: int, + db: Session = Depends(database.get_db), +) -> Response: + """Download the contents of a backup file. + + This endpoint streams the content of the stored file back to the client. For + S3 storage backends the content is downloaded from S3 on demand. For local + storage backends the file is read from disk. + """ + client = get_client_by_token(client_token, db) + backup = ( + db.query(models.BackupFile) + .filter(models.BackupFile.id == backup_id) + .filter(models.BackupFile.client_id == client.id) + .first() + ) + if not backup: + raise HTTPException(status_code=404, detail="Backup not found") + file_hash = backup.file_hash + # Determine if storage is local + if isinstance(storage, storage_module.LocalStorage): + file_path = storage.root_dir / file_hash.storage_path + return FileResponse(path=file_path, filename=os.path.basename(backup.original_path)) + else: + # For S3 return the object contents + s3 = storage.s3 + obj = s3.get_object(Bucket=storage.bucket_name, Key=file_hash.storage_path) + content = obj["Body"].read() + return Response(content=content, media_type="application/octet-stream") + + +@app.post("/api/clients/{client_token}/log") +async def log_from_client( + client_token: str, + level: str = Form(...), + message: str = Form(...), + db: Session = Depends(database.get_db), +) -> dict[str, str]: + """Receive a log message from a client.""" + client = get_client_by_token(client_token, db) + log_entry = models.ClientLog( + client_id=client.id, level=level.upper(), message=message + ) + db.add(log_entry) + db.commit() + return {"status": "logged"} + + +# ======== File structure and task management endpoints ======== + +@app.post("/api/clients/{client_token}/files") +async def update_client_files( + client_token: str, + files: List[dict], + db: Session = Depends(database.get_db), +) -> dict[str, str]: + """Receive a full file listing from a client and replace existing entries. + + The payload should be a list of objects with keys ``path`` and ``is_dir``. + All existing ``ClientFile`` records for the client are removed and + recreated from the provided list. This endpoint allows the server to + present a file tree in the web interface. + """ + client = get_client_by_token(client_token, db) + # Delete existing file entries + db.query(models.ClientFile).filter(models.ClientFile.client_id == client.id).delete() + # Insert new entries + for item in files: + p = item.get("path") + is_dir = bool(item.get("is_dir")) + if not p: + continue + cf = models.ClientFile(client_id=client.id, path=p, is_dir=is_dir) + db.add(cf) + db.commit() + return {"status": "updated"} + + +@app.get("/api/clients/{client_id}/files", response_model=List[schemas.ClientFileOut]) +async def list_client_files( + client_id: int, + db: Session = Depends(database.get_db), + current_admin: models.User = Depends(auth.get_current_admin), +) -> List[schemas.ClientFileOut]: + """Return the file structure for a given client. + + Administrators can query this endpoint to display a file tree in the web + interface. Files are returned unsorted; the UI may organise them into + a hierarchical view. + """ + client = db.query(models.Client).filter(models.Client.id == client_id).first() + if not client: + raise HTTPException(status_code=404, detail="Client not found") + entries = ( + db.query(models.ClientFile) + .filter(models.ClientFile.client_id == client.id) + .all() + ) + return [schemas.ClientFileOut.from_orm(e) for e in entries] + + +@app.get("/api/clients/{client_token}/tasks", response_model=List[schemas.TaskOut]) +async def get_tasks_for_client( + client_token: str, + db: Session = Depends(database.get_db), +) -> List[schemas.TaskOut]: + """Return backup tasks for a client. + + This endpoint is called by the client to retrieve its scheduled tasks. The + `pre_commands` field is returned as a list of strings for easier + consumption by the client. + """ + client = get_client_by_token(client_token, db) + tasks = ( + db.query(models.BackupTask) + .filter(models.BackupTask.client_id == client.id) + .all() + ) + result: List[schemas.TaskOut] = [] + for t in tasks: + commands: List[str] = [] + if t.pre_commands: + commands = [cmd for cmd in t.pre_commands.splitlines() if cmd.strip()] + # Determine pending run ID if there is a run with status PENDING + pending_run = ( + db.query(models.TaskRun) + .filter(models.TaskRun.task_id == t.id, models.TaskRun.status == "PENDING") + .order_by(models.TaskRun.start_time.desc()) + .first() + ) + pending_id = pending_run.id if pending_run else None + result.append( + schemas.TaskOut( + id=t.id, + path=t.path, + frequency_minutes=t.frequency_minutes, + pre_commands=commands, + retention_days=t.retention_days, + retention_versions=t.retention_versions, + compress=t.compress, + last_run=t.last_run, + next_run=t.next_run, + pending_run_id=pending_id, + ) + ) + return result + + +@app.get("/api/clients/{client_id}/tasks", response_model=List[schemas.TaskOut]) +async def list_tasks_for_admin( + client_id: int, + db: Session = Depends(database.get_db), + current_admin: models.User = Depends(auth.get_current_admin), +) -> List[schemas.TaskOut]: + """List tasks associated with a client (admin view).""" + client = db.query(models.Client).filter(models.Client.id == client_id).first() + if not client: + raise HTTPException(status_code=404, detail="Client not found") + tasks = ( + db.query(models.BackupTask) + .filter(models.BackupTask.client_id == client.id) + .all() + ) + out: List[schemas.TaskOut] = [] + for t in tasks: + cmds = [] + if t.pre_commands: + cmds = [c for c in t.pre_commands.splitlines() if c.strip()] + pending_run = ( + db.query(models.TaskRun) + .filter(models.TaskRun.task_id == t.id, models.TaskRun.status == "PENDING") + .order_by(models.TaskRun.start_time.desc()) + .first() + ) + pending_id = pending_run.id if pending_run else None + out.append( + schemas.TaskOut( + id=t.id, + path=t.path, + frequency_minutes=t.frequency_minutes, + pre_commands=cmds, + retention_days=t.retention_days, + retention_versions=t.retention_versions, + compress=t.compress, + last_run=t.last_run, + next_run=t.next_run, + pending_run_id=pending_id, + ) + ) + return out + + +@app.post("/api/clients/{client_id}/tasks") +async def create_task( + client_id: int, + path: str = Form(...), + frequency_minutes: int = Form(..., gt=0, description="Run frequency in minutes"), + pre_commands: str = Form("", description="One command per line", max_length=4000), + retention_days: Optional[int] = Form(None), + retention_versions: Optional[int] = Form(None), + compress: bool = Form(False), + db: Session = Depends(database.get_db), + current_admin: models.User = Depends(auth.get_current_admin), +) -> Response: + """Create a new backup task for the specified client. + + Administrators use this endpoint (via the web UI) to schedule backups for + specific files or directories. The client will execute the task at the + configured frequency. + """ + client = db.query(models.Client).filter(models.Client.id == client_id).first() + if not client: + raise HTTPException(status_code=404, detail="Client not found") + task = models.BackupTask( + client_id=client.id, + path=path, + frequency_minutes=frequency_minutes, + pre_commands=pre_commands.strip() if pre_commands else None, + retention_days=retention_days, + retention_versions=retention_versions, + compress=compress, + ) + # Next run time initialised to now so that the client will pick up the task + task.next_run = datetime.datetime.utcnow() + db.add(task) + db.commit() + db.refresh(task) + # Redirect back to the client detail page + return Response(status_code=303, headers={"Location": f"/clients/{client_id}"}) + + +@app.post("/api/clients/{client_id}/tasks/{task_id}/run") +async def run_task_now( + client_id: int, + task_id: int, + db: Session = Depends(database.get_db), + current_admin: models.User = Depends(auth.get_current_admin), +) -> dict[str, str]: + """Request immediate execution of a task. + + This sets the task's ``next_run`` to the current time so the client will + execute it on its next polling cycle. A new ``TaskRun`` entry is + created with status ``PENDING``. + """ + task = ( + db.query(models.BackupTask) + .filter(models.BackupTask.id == task_id, models.BackupTask.client_id == client_id) + .first() + ) + if not task: + raise HTTPException(status_code=404, detail="Task not found") + now = datetime.datetime.utcnow() + task.next_run = now + # Create a TaskRun entry with pending status; it will be updated when + # the client reports completion + run = models.TaskRun(task_id=task.id, start_time=now, status="PENDING") + db.add(run) + db.commit() + return {"status": "scheduled", "run_id": run.id} + + +@app.post("/api/clients/{client_token}/tasks/{task_id}/status") +async def report_task_status( + client_token: str, + task_id: int, + run_id: int = Form(...), + status: str = Form(...), + message: str = Form(""), + db: Session = Depends(database.get_db), +) -> dict[str, str]: + """Receive status updates for a task run from the client. + + The client should call this endpoint after executing a task, providing + the run ID (created by ``run_task_now``) along with its status and any + message. The server records the end time and updates the task's + ``last_run`` and ``next_run`` values. + """ + client = get_client_by_token(client_token, db) + task = ( + db.query(models.BackupTask) + .filter(models.BackupTask.id == task_id, models.BackupTask.client_id == client.id) + .first() + ) + if not task: + raise HTTPException(status_code=404, detail="Task not found for this client") + run = db.query(models.TaskRun).filter(models.TaskRun.id == run_id, models.TaskRun.task_id == task.id).first() + now = datetime.datetime.utcnow() + if not run: + # If the run does not exist (e.g. scheduled automatically), create it + run = models.TaskRun(task_id=task.id, start_time=now) + db.add(run) + run.end_time = now + run.status = status + run.message = message + # Update task last_run and next_run times + task.last_run = run.end_time + if task.frequency_minutes: + task.next_run = run.end_time + datetime.timedelta(minutes=task.frequency_minutes) + db.commit() + return {"status": "recorded", "run_id": run.id} + + +# ======== Client configuration endpoints ========= + +@app.get("/api/clients/{client_token}/config", response_model=schemas.ClientConfigOut) +async def get_client_config( + client_token: str, + db: Session = Depends(database.get_db), +) -> schemas.ClientConfigOut: + """Return configuration information for a client. + + Currently this includes the list of pre‑backup commands. The client + requests this endpoint to retrieve any commands set by administrators. + """ + client = get_client_by_token(client_token, db) + commands = [] + if client.pre_commands: + commands = [cmd for cmd in client.pre_commands.splitlines() if cmd.strip()] + return schemas.ClientConfigOut(pre_commands=commands) + + +@app.post("/api/clients/{client_id}/config") +async def update_client_config( + client_id: int, + pre_commands: str = Form(..., description="One command per line"), + db: Session = Depends(database.get_db), + current_admin: models.User = Depends(auth.get_current_admin), +) -> Response: + """Update configuration for a client. + + Administrators can set shell commands to be executed by the client before + each backup. These commands are stored on the server and delivered to + clients via the `/api/clients/{client_token}/config` endpoint. Commands + should be separated by newlines. + """ + client = db.query(models.Client).filter(models.Client.id == client_id).first() + if not client: + raise HTTPException(status_code=404, detail="Client not found") + client.pre_commands = pre_commands.strip() if pre_commands else None + db.commit() + # Redirect back to the client details page + return Response(status_code=303, headers={"Location": f"/clients/{client_id}"}) + + +# ======== Web interface routes ========= + +@app.get("/clients", response_class=HTMLResponse) +async def list_clients_page( + request: Request, + current_user: models.User = Depends(auth.get_current_admin), + db: Session = Depends(database.get_db), +) -> Response: + """Render a page that lists all clients with management actions.""" + clients = db.query(models.Client).all() + return templates.TemplateResponse( + "clients.html", + { + "request": request, + "user": current_user, + "clients": clients, + }, + ) + + +@app.get("/clients/{client_id}", response_class=HTMLResponse) +async def client_detail_page( + client_id: int, + request: Request, + current_user: models.User = Depends(auth.get_current_admin), + db: Session = Depends(database.get_db), +) -> Response: + """Render details for a specific client, including backups and configuration.""" + client = db.query(models.Client).filter(models.Client.id == client_id).first() + if not client: + raise HTTPException(status_code=404, detail="Client not found") + # fetch backups ordered by latest + backups = ( + db.query(models.BackupFile) + .filter(models.BackupFile.client_id == client.id) + .order_by(models.BackupFile.version_time.desc()) + .all() + ) + files = ( + db.query(models.ClientFile) + .filter(models.ClientFile.client_id == client.id) + .order_by(models.ClientFile.path) + .all() + ) + tasks = ( + db.query(models.BackupTask) + .filter(models.BackupTask.client_id == client.id) + .all() + ) + logs = ( + db.query(models.ClientLog) + .filter(models.ClientLog.client_id == client.id) + .order_by(models.ClientLog.timestamp.desc()) + .limit(50) + .all() + ) + # Prepare mappings for task history. For each task, collect its run history and + # the backup entries that correspond to the task's path. This allows the + # template to display run status and available versions per task. + runs_map: dict[int, list] = {} + backups_map: dict[str, list] = {} + # Precompute runs for each task + for t in tasks: + runs = ( + db.query(models.TaskRun) + .filter(models.TaskRun.task_id == t.id) + .order_by(models.TaskRun.start_time.desc()) + .all() + ) + runs_map[t.id] = runs + # Group backups by original path + for b in backups: + backups_map.setdefault(b.original_path, []).append(b) + + return templates.TemplateResponse( + "client_detail.html", + { + "request": request, + "user": current_user, + "client": client, + "backups": backups, + "files": files, + "tasks": tasks, + "logs": logs, + "runs_map": runs_map, + "backups_map": backups_map, + }, + ) + + +@app.get("/", response_class=HTMLResponse) +async def root_redirect(request: Request) -> Response: + """Redirect the root URL to the clients list. + + The main administration interface is available at ``/clients``. This + redirect keeps the root endpoint simple and ensures there is no + ambiguity with multiple handlers for ``/``. + """ + return Response(status_code=303, headers={"Location": "/clients"}) \ No newline at end of file diff --git a/server/models.py b/server/models.py new file mode 100644 index 0000000..dc6e4b7 --- /dev/null +++ b/server/models.py @@ -0,0 +1,273 @@ +"""Database models for the backup service. + +This module defines SQLAlchemy ORM models used by the backup service. The schema +supports user accounts with optional administrator privileges, client machines +that connect to the server, deduplicated file storage keyed by a content +hash, individual backup entries referencing those hashes, and logs from +clients. Users can also specify retention policies either by limiting the +number of versions retained or by specifying an age after which old versions +should expire. + +References: + * Balancing versioning depth against storage consumption is an important + consideration when designing backup systems【709290716836410†L142-L159】. The schema + includes fields for both a maximum number of versions and a maximum + retention age so administrators can adjust these policies according to + their needs. + * Using a key-value store to map file hashes to storage locations makes it + efficient to check whether a file already exists and avoid uploading + duplicates【744670406339295†L270-L284】. +""" + +from __future__ import annotations + +import datetime +from typing import Optional, List + +from sqlalchemy import ( + Column, + Integer, + String, + Boolean, + DateTime, + ForeignKey, + UniqueConstraint, + Text, + func, +) +from sqlalchemy.orm import declarative_base, relationship + + +Base = declarative_base() + + +class User(Base): + """Represents an authenticated user. + + Users may be administrators (``is_admin=True``) and are allowed to create + other users and clients. Non‑admin users are intended to authenticate + against the API to view and download their backups. + """ + + __tablename__ = "users" + id: int = Column(Integer, primary_key=True) + username: str = Column(String(50), unique=True, nullable=False) + hashed_password: str = Column(String(128), nullable=False) + is_admin: bool = Column(Boolean, default=False) + # Optional retention policies set per user + retention_days: Optional[int] = Column(Integer, nullable=True) + retention_versions: Optional[int] = Column(Integer, nullable=True) + created_at: datetime.datetime = Column( + DateTime, nullable=False, server_default=func.now() + ) + updated_at: datetime.datetime = Column( + DateTime, + nullable=False, + server_default=func.now(), + onupdate=func.now(), + ) + + clients: List[Client] = relationship("Client", back_populates="owner") + + +class Client(Base): + """Represents a client machine that sends backups to the server. + + Each client holds a unique token used for authenticating API requests + originating from that machine. The server tracks the last ping and + backup times to display client health in the web interface. + """ + + __tablename__ = "clients" + id: int = Column(Integer, primary_key=True) + name: str = Column(String(128), nullable=False) + token: str = Column(String(64), unique=True, nullable=False) + owner_id: int = Column(Integer, ForeignKey("users.id"), nullable=False) + # Timestamp of the most recent ping request + last_ping: Optional[datetime.datetime] = Column(DateTime, nullable=True) + # Timestamp of the most recent backup + last_backup: Optional[datetime.datetime] = Column(DateTime, nullable=True) + created_at: datetime.datetime = Column( + DateTime, nullable=False, server_default=func.now() + ) + updated_at: datetime.datetime = Column( + DateTime, + nullable=False, + server_default=func.now(), + onupdate=func.now(), + ) + + owner: User = relationship("User", back_populates="clients") + backups: List[BackupFile] = relationship( + "BackupFile", back_populates="client", cascade="all, delete-orphan" + ) + logs: List[ClientLog] = relationship( + "ClientLog", back_populates="client", cascade="all, delete-orphan" + ) + + # Relationship to file structure entries. Each client maintains a list + # of files and directories that are present on the monitored machine. + # The list is updated whenever the client sends its file structure via + # the `/api/clients/{token}/files` endpoint. Entries are deleted + # automatically when the client is removed. + files: List[ClientFile] = relationship( + "ClientFile", back_populates="client", cascade="all, delete-orphan" + ) + + # Backup tasks configured for this client. Each task defines a path to + # backup, a frequency and optional pre‑commands and retention settings. + tasks: List[BackupTask] = relationship( + "BackupTask", back_populates="client", cascade="all, delete-orphan" + ) + + # Optional list of shell commands to run on the client before each backup. + # The commands are stored as a newline‑separated string so they can be + # edited in the admin UI. When the client requests its configuration the + # server returns these commands as a list. + pre_commands: Optional[str] = Column(Text, nullable=True) + + +class FileHash(Base): + """Represents a deduplicated file stored in the backend storage. + + The `hash_value` uniquely identifies the file contents. The `storage_path` + field stores the path or key used by the storage backend (local filesystem + or S3). Multiple backup records may reference the same FileHash if clients + upload identical files【744670406339295†L270-L339】. + """ + + __tablename__ = "file_hashes" + id: int = Column(Integer, primary_key=True) + hash_value: str = Column(String(128), unique=True, nullable=False) + storage_path: str = Column(String(512), nullable=False) + created_at: datetime.datetime = Column( + DateTime, nullable=False, server_default=func.now() + ) + + backups: List[BackupFile] = relationship( + "BackupFile", back_populates="file_hash", cascade="all, delete-orphan" + ) + + +class BackupFile(Base): + """Represents a single backup entry for a file on a client machine. + + Each backup entry references a deduplicated file via the `file_hash_id` + foreign key. The `original_path` records the path of the file on the + client. The `version_time` stores when the backup was taken. + """ + + __tablename__ = "backup_files" + id: int = Column(Integer, primary_key=True) + client_id: int = Column(Integer, ForeignKey("clients.id"), nullable=False) + file_hash_id: int = Column(Integer, ForeignKey("file_hashes.id"), nullable=False) + original_path: str = Column(String(1024), nullable=False) + version_time: datetime.datetime = Column( + DateTime, nullable=False, server_default=func.now() + ) + # Additional metadata such as file size could be stored here + size: Optional[int] = Column(Integer, nullable=True) + + client: Client = relationship("Client", back_populates="backups") + file_hash: FileHash = relationship("FileHash", back_populates="backups") + + __table_args__ = ( + # Unique constraint ensures that the same client cannot record two + # backups for the same path at the exact same time; this prevents + # accidentally creating duplicate entries if a client retries a request. + UniqueConstraint("client_id", "original_path", "version_time"), + ) + + +class ClientLog(Base): + """Represents log entries sent by clients. + + Log messages are stored with a timestamp and arbitrary text. This table is + useful for debugging and auditing client behaviour. + """ + + __tablename__ = "client_logs" + id: int = Column(Integer, primary_key=True) + client_id: int = Column(Integer, ForeignKey("clients.id"), nullable=False) + timestamp: datetime.datetime = Column( + DateTime, nullable=False, server_default=func.now() + ) + level: str = Column(String(20), nullable=False, default="INFO") + message: str = Column(Text, nullable=False) + + client: Client = relationship("Client", back_populates="logs") + + +# ====================== Additional models for advanced features ====================== + +class ClientFile(Base): + """Represents a file or directory present on a client machine. + + The server stores the file structure sent by each client so that + administrators can browse the client's filesystem from the web UI. The + `is_dir` flag distinguishes directories from regular files. Entries are + updated wholesale when the client sends its file listing; old entries are + removed and replaced with the new listing. + """ + + __tablename__ = "client_files" + id: int = Column(Integer, primary_key=True) + client_id: int = Column(Integer, ForeignKey("clients.id", ondelete="CASCADE"), nullable=False) + path: str = Column(String(1024), nullable=False) + is_dir: bool = Column(Boolean, default=False, nullable=False) + # Timestamp when this entry was last reported by the client + reported_at: datetime.datetime = Column(DateTime, nullable=False, server_default=func.now(), onupdate=func.now()) + + client: Client = relationship("Client", back_populates="files") + + +class BackupTask(Base): + """Represents a scheduled backup task for a specific file on a client. + + A task defines a path on the client machine to back up at a regular + interval (specified in minutes). Optional shell commands may run before + backing up, and retention settings can override the user's defaults on a + per‑task basis. The `compress` flag indicates whether the client should + archive the file before uploading it. The server tracks the last and + next run times to aid scheduling logic on the client side. + """ + + __tablename__ = "backup_tasks" + id: int = Column(Integer, primary_key=True) + client_id: int = Column(Integer, ForeignKey("clients.id", ondelete="CASCADE"), nullable=False) + path: str = Column(String(1024), nullable=False) + # Frequency in minutes; the client should run this task at least this often + frequency_minutes: int = Column(Integer, nullable=False) + pre_commands: Optional[str] = Column(Text, nullable=True) + retention_days: Optional[int] = Column(Integer, nullable=True) + retention_versions: Optional[int] = Column(Integer, nullable=True) + compress: bool = Column(Boolean, default=False, nullable=False) + last_run: Optional[datetime.datetime] = Column(DateTime, nullable=True) + next_run: Optional[datetime.datetime] = Column(DateTime, nullable=True) + created_at: datetime.datetime = Column(DateTime, nullable=False, server_default=func.now()) + updated_at: datetime.datetime = Column(DateTime, nullable=False, server_default=func.now(), onupdate=func.now()) + + client: Client = relationship("Client", back_populates="tasks") + runs: List[TaskRun] = relationship( + "TaskRun", back_populates="task", cascade="all, delete-orphan" + ) + + +class TaskRun(Base): + """Represents a single execution of a backup task. + + The client reports the outcome of each run back to the server. This + information allows administrators to see whether tasks are succeeding and + inspect any error messages. Timestamps record when the run started and + ended. + """ + + __tablename__ = "task_runs" + id: int = Column(Integer, primary_key=True) + task_id: int = Column(Integer, ForeignKey("backup_tasks.id", ondelete="CASCADE"), nullable=False) + start_time: datetime.datetime = Column(DateTime, nullable=False, server_default=func.now()) + end_time: Optional[datetime.datetime] = Column(DateTime, nullable=True) + status: str = Column(String(50), nullable=False) + message: Optional[str] = Column(Text, nullable=True) + + task: BackupTask = relationship("BackupTask", back_populates="runs") \ No newline at end of file diff --git a/server/requirements.txt b/server/requirements.txt new file mode 100644 index 0000000..7045fb9 --- /dev/null +++ b/server/requirements.txt @@ -0,0 +1,10 @@ +fastapi +uvicorn +sqlalchemy +psycopg2-binary +pydantic +passlib[bcrypt] +python-jose[cryptography] +boto3 +jinja2 +python-multipart \ No newline at end of file diff --git a/server/schemas.py b/server/schemas.py new file mode 100644 index 0000000..827d58f --- /dev/null +++ b/server/schemas.py @@ -0,0 +1,137 @@ +"""Pydantic models for request and response bodies. + +These schemas define the shape of data sent to and from the API. They are +used by FastAPI to validate input and generate OpenAPI documentation. +""" + +from __future__ import annotations + +from typing import Optional, List +from datetime import datetime + +from pydantic import BaseModel, Field, constr + + +# User schemas +class UserCreate(BaseModel): + username: constr(strip_whitespace=True, min_length=3, max_length=50) + password: constr(min_length=6) + is_admin: bool = False + retention_days: Optional[int] = Field( + None, description="Maximum age (in days) to retain old versions." + ) + retention_versions: Optional[int] = Field( + None, description="Maximum number of versions to retain per file." + ) + + +class UserOut(BaseModel): + id: int + username: str + is_admin: bool + retention_days: Optional[int] = None + retention_versions: Optional[int] = None + + class Config: + orm_mode = True + + +class Token(BaseModel): + access_token: str + token_type: str = "bearer" + + +# Client schemas +class ClientRegisterRequest(BaseModel): + name: str + owner_id: Optional[int] = None # Only used when admin registers on behalf of user + + +class ClientOut(BaseModel): + id: int + name: str + token: str + owner_id: int + last_ping: Optional[datetime] = None + last_backup: Optional[datetime] = None + pre_commands: Optional[str] = None + + class Config: + orm_mode = True + + +class ClientConfigOut(BaseModel): + """Configuration returned to the client. + + Currently it exposes the list of pre‑backup commands. Additional fields + could be added here in future (e.g. inclusion/exclusion patterns). + """ + pre_commands: List[str] + + +# Backup-related schemas +class BackupEntryOut(BaseModel): + id: int + original_path: str + version_time: datetime + size: Optional[int] + file_hash: str = Field(..., description="Content hash of the stored file") + + class Config: + orm_mode = True + + +class ClientLogEntry(BaseModel): + timestamp: datetime + level: str + message: str + + class Config: + orm_mode = True + + +# ==================== Additional schemas for advanced features ==================== + +class ClientFileOut(BaseModel): + id: int + path: str + is_dir: bool + + class Config: + orm_mode = True + + +class TaskOut(BaseModel): + id: int + path: str + frequency_minutes: int + pre_commands: List[str] = [] + retention_days: Optional[int] = None + retention_versions: Optional[int] = None + compress: bool + last_run: Optional[datetime] = None + next_run: Optional[datetime] = None + pending_run_id: Optional[int] = None + + class Config: + orm_mode = True + + +class TaskCreate(BaseModel): + path: str + frequency_minutes: int + pre_commands: Optional[str] = None + retention_days: Optional[int] = None + retention_versions: Optional[int] = None + compress: bool = False + + +class TaskRunOut(BaseModel): + id: int + start_time: datetime + end_time: Optional[datetime] + status: str + message: Optional[str] = None + + class Config: + orm_mode = True \ No newline at end of file diff --git a/server/storage.py b/server/storage.py new file mode 100644 index 0000000..8117d78 --- /dev/null +++ b/server/storage.py @@ -0,0 +1,140 @@ +"""Abstraction over storage backends for the backup service. + +The backup server supports storing files either on the local filesystem or in +Amazon S3. At runtime the storage backend is chosen based on environment +variables. Using S3 for storage enables durable, scalable backup storage and +facilitates lifecycle management (for example, using S3 expiration rules to +delete old versions of objects automatically【17949889377376†L188-L219】). When no +S3 configuration is provided the service falls back to storing files on disk +under a configurable directory. + +The Storage base class defines a common API for saving and deleting files. The +local storage implementation simply writes files to disk. The S3 +implementation uses boto3 to upload objects to a bucket and generate unique +keys. +""" + +from __future__ import annotations + +import hashlib +import os +import uuid +from pathlib import Path +from typing import Optional + +import boto3 + + +class Storage: + """Abstract base class for storage backends.""" + + async def save_file(self, data: bytes, filename: Optional[str] = None) -> str: + """Save a binary blob and return a storage key/path. + + Args: + data: The file content to store. + filename: Optional file name hint; ignored by some backends. + + Returns: + A string representing the storage location (e.g. file path or S3 key). + """ + raise NotImplementedError + + async def delete_file(self, key: str) -> None: + """Delete a file from storage. + + Args: + key: The storage key previously returned by ``save_file``. + """ + raise NotImplementedError + + +class LocalStorage(Storage): + """Filesystem storage backend. + + Files are stored inside a root directory defined by the ``BACKUP_STORAGE_PATH`` + environment variable (default: ``./data``). Each saved file is placed + under its hash name to avoid collisions; this also allows the backend to + deduplicate by content easily. + """ + + def __init__(self, root_dir: Optional[str] = None) -> None: + self.root_dir = Path(root_dir or os.getenv("BACKUP_STORAGE_PATH", "./data")).resolve() + self.root_dir.mkdir(parents=True, exist_ok=True) + + async def save_file(self, data: bytes, filename: Optional[str] = None) -> str: + # Use SHA256 of the content as file name to ensure uniqueness + hash_value = hashlib.sha256(data).hexdigest() + # Place files in subdirectories to avoid too many files in one folder + subdir = self.root_dir / hash_value[:2] + subdir.mkdir(parents=True, exist_ok=True) + file_path = subdir / hash_value + if not file_path.exists(): + with open(file_path, "wb") as f: + f.write(data) + # Return relative path from root to allow migration if root changes + return str(file_path.relative_to(self.root_dir)) + + async def delete_file(self, key: str) -> None: + file_path = self.root_dir / key + try: + file_path.unlink() + except FileNotFoundError: + pass + + +class S3Storage(Storage): + """Amazon S3 storage backend. + + Files are uploaded to an S3 bucket defined by the ``S3_BUCKET`` environment + variable. A UUID-based key is generated for each file. With S3 versioning + enabled, an object can have multiple versions, and lifecycle rules can be + applied to automatically expire old versions【17949889377376†L188-L219】. + """ + + def __init__(self, bucket_name: str, prefix: str = "backups/") -> None: + self.bucket_name = bucket_name + self.prefix = prefix + # boto3 will automatically use credentials from environment variables + # Allow overriding the S3 endpoint to support self‑hosted services like MinIO. + # When using a custom endpoint, you should also specify a region (any string), + # otherwise boto3 will attempt to infer AWS regions. We pass through + # ``S3_ENDPOINT`` from the environment if present. + endpoint_url = os.getenv("S3_ENDPOINT") + self.s3 = boto3.client( + "s3", + aws_access_key_id=os.getenv("AWS_ACCESS_KEY_ID"), + aws_secret_access_key=os.getenv("AWS_SECRET_ACCESS_KEY"), + region_name=os.getenv("AWS_REGION"), + endpoint_url=endpoint_url, + ) + + async def save_file(self, data: bytes, filename: Optional[str] = None) -> str: + # Generate a random key; include original filename for readability if provided + key = f"{self.prefix}{uuid.uuid4().hex}" + if filename: + # Sanitize filename to avoid path traversal + basename = os.path.basename(filename) + key = f"{self.prefix}{uuid.uuid4().hex}-{basename}" + self.s3.put_object(Bucket=self.bucket_name, Key=key, Body=data) + return key + + async def delete_file(self, key: str) -> None: + self.s3.delete_object(Bucket=self.bucket_name, Key=key) + + +def get_storage() -> Storage: + """Factory function returning the configured storage backend. + + If the ``S3_BUCKET`` environment variable is set the service uses S3, + otherwise it falls back to local storage. Additional configuration options + such as ``S3_PREFIX`` and ``BACKUP_STORAGE_PATH`` can also be used to + customise the storage key prefix and local directory. + """ + bucket = os.getenv("S3_BUCKET") + if bucket: + prefix = os.getenv("S3_PREFIX", "backups/") + return S3Storage(bucket_name=bucket, prefix=prefix) + else: + root = os.getenv("BACKUP_STORAGE_PATH", "./data") + return LocalStorage(root_dir=root) \ No newline at end of file diff --git a/server/templates/base.html b/server/templates/base.html new file mode 100644 index 0000000..baca42b --- /dev/null +++ b/server/templates/base.html @@ -0,0 +1,21 @@ + + + + + {{ title if title else 'Backup Service' }} + + + + + +
+ +
+
+ {% block content %}{% endblock %} +
+ + \ No newline at end of file diff --git a/server/templates/client_detail.html b/server/templates/client_detail.html new file mode 100644 index 0000000..0b7b249 --- /dev/null +++ b/server/templates/client_detail.html @@ -0,0 +1,247 @@ +{% extends "base.html" %} +{% block content %} +

Client {{ client.id }} – {{ client.name }}

+

Owner: {{ client.owner.username }} | Token: {{ client.token }}

+

Last ping: {{ client.last_ping if client.last_ping else '-' }}
+ Last backup: {{ client.last_backup if client.last_backup else '-' }}

+ + +

Pre‑Backup Commands

+
+
+ + +
+ +
+ + +

Backups

+{% if backups %} +
+ + + + + + + + + + + + + {% for b in backups %} + + + + + + + + + {% endfor %} + +
IDPathVersion TimeSize (bytes)HashDownload
{{ b.id }}{{ b.original_path }}{{ b.version_time }}{{ b.size }}{{ b.file_hash.hash_value }} + Download +
+
+{% else %} +

No backups yet.

+{% endif %} + +

File Structure

+{% if files %} +
+
    + {% for f in files %} +
  • {{ f.path }}{% if f.is_dir %}/{% endif %}
  • + {% endfor %} +
+
+{% else %} +

No file structure available. The client will send its file list on the next backup cycle.

+{% endif %} + + +

Backup Tasks

+{% if tasks %} +
+ + + + + + + + + + + + + + {% for t in tasks %} + + + + + + + + + + {% endfor %} + +
PathFrequency (min)RetentionCompressLast RunNext RunActions
{{ t.path }}{{ t.frequency_minutes }} + {% if t.retention_versions %}{{ t.retention_versions }} versions{% endif %} + {% if t.retention_days %} + {% if t.retention_versions %}
{% endif %} + {{ t.retention_days }} days + {% endif %} + {% if not t.retention_versions and not t.retention_days %}-{% endif %} +
{% if t.compress %}Yes{% else %}No{% endif %}{{ t.last_run if t.last_run else '-' }}{{ t.next_run if t.next_run else '-' }} +
+ +
+
+
+{% else %} +

No backup tasks configured.

+{% endif %} + + +{% if tasks %} +

Task History

+
+ {% for t in tasks %} +
+

Path: {{ t.path }}

+ +
Runs
+ {% set runs = runs_map.get(t.id) %} + {% if runs and runs|length > 0 %} +
+ + + + + + + + + + + {% for run in runs %} + + + + + + + {% endfor %} + +
StartEndStatusMessage
{{ run.start_time }}{{ run.end_time if run.end_time else '-' }}{{ run.status }}{{ run.message or '-' }}
+
+ {% else %} +

No runs recorded for this task.

+ {% endif %} + +
Backups
+ {% set bks = backups_map.get(t.path) %} + {% if bks and bks|length > 0 %} +
+ + + + + + + + + + + {% for b in bks %} + + + + + + + {% endfor %} + +
IDVersion TimeSizeDownload
{{ b.id }}{{ b.version_time }}{{ b.size }}Download
+
+ {% else %} +

No backups available for this path.

+ {% endif %} +
+ {% endfor %} +
+{% endif %} + + +

Create New Task

+
+
+ + + + {% for f in files %} + {% if not f.is_dir %}{% endif %} + {% endfor %} + +
+
+ + +
+
+ + +
+
+
+ + +
+
+ + +
+
+
+ + +
+ +
+ + +

Recent Logs

+{% if logs %} +
+ + + + + + + + + + {% for log in logs %} + + + + + + {% endfor %} + +
TimeLevelMessage
{{ log.timestamp }}{{ log.level }}{{ log.message }}
+
+{% else %} +

No logs.

+{% endif %} + +

← Back to list

+{% endblock %} \ No newline at end of file diff --git a/server/templates/clients.html b/server/templates/clients.html new file mode 100644 index 0000000..ced5ba4 --- /dev/null +++ b/server/templates/clients.html @@ -0,0 +1,85 @@ +{% extends "base.html" %} +{% block content %} +

Clients

+

List of registered clients. Click on a client ID to view details.

+ + +
+ + + + + + + + + + + + + + {% for client in clients %} + + + + + + + + + + {% endfor %} + +
IDNameOwnerLast PingLast BackupCommandsActions
+ {{ client.id }} + {{ client.name }}{{ client.owner.username }}{{ client.last_ping if client.last_ping else '-' }}{{ client.last_backup if client.last_backup else '-' }} + {% if client.pre_commands %} + {{ client.pre_commands|replace('\n', '
')|safe }} + {% else %} + - + {% endif %} +
+ Details +
+
+ + +

Create New Client

+
+
+ + +
+
+ + +
+ +
+ + +

Create New User

+
+
+ + +
+
+ + +
+
+ + +
+
+ + +
+
+ + +
+ +
+{% endblock %} \ No newline at end of file diff --git a/server/templates/dashboard.html b/server/templates/dashboard.html new file mode 100644 index 0000000..ce76f13 --- /dev/null +++ b/server/templates/dashboard.html @@ -0,0 +1,74 @@ +{% extends "base.html" %} +{% block content %} +

Backup Service Admin Dashboard

+

Welcome, {{ user.username }}!

+ + +

Clients

+
+ + + + + + + + + + + + + {% for client in clients %} + + + + + + + + + {% endfor %} + +
Client IDNameOwnerLast PingLast BackupToken
{{ client.id }}{{ client.name }}{{ client.owner.username }}{{ client.last_ping if client.last_ping else '-' }}{{ client.last_backup if client.last_backup else '-' }}{{ client.token }}
+
+ + +

Create User

+
+
+ + +
+
+ + +
+
+ + +
+
+ + +
+
+ + +
+ +
+ + +

Create Client

+
+
+ + +
+
+ + +
+ +
+{% endblock %} \ No newline at end of file diff --git a/tests/test_endpoints.py b/tests/test_endpoints.py new file mode 100644 index 0000000..ec15fed --- /dev/null +++ b/tests/test_endpoints.py @@ -0,0 +1,187 @@ +"""End‑to‑end tests for the backup service API. + +These tests exercise key functionality of the server to ensure that +registration, file structure reporting, task scheduling and status +reporting all work as expected. A temporary SQLite database is +used so that the tests do not affect production data. To execute +these tests, run ``pytest`` in the root of the repository. + +Note: The tests import FastAPI and SQLAlchemy; ensure that these +dependencies are installed in your development environment. In +offline or minimal environments you may need to install them +manually before running the tests. +""" + +import os +import tempfile +import pytest +from datetime import datetime, timedelta + +from fastapi.testclient import TestClient +from sqlalchemy import create_engine +from sqlalchemy.orm import sessionmaker + +from backup_service.server import models, auth, database, main as server_app + + +@pytest.fixture(scope="function") +def test_client(tmp_path): + """Set up a FastAPI TestClient with an isolated SQLite database. + + The fixture creates a temporary file for the SQLite database, + overrides the ``get_db`` dependency to use a session bound to + this database, and ensures that all tables are created before + returning the TestClient instance. After the test the overrides + are cleared. + """ + # Create a temporary SQLite database file + db_path = tmp_path / "test.db" + engine = create_engine(f"sqlite:///{db_path}", connect_args={"check_same_thread": False}) + # Bind a sessionmaker to the temporary engine + TestingSessionLocal = sessionmaker(autocommit=False, autoflush=False, bind=engine) + # Create all tables on the temporary engine + models.Base.metadata.create_all(bind=engine) + + # Dependency override to use the test session + def override_get_db(): + db = TestingSessionLocal() + try: + yield db + finally: + db.close() + + server_app.app.dependency_overrides[database.get_db] = override_get_db + + # Yield a TestClient instance + with TestClient(server_app.app) as client: + yield client + + # Clear overrides after the test to avoid side effects + server_app.app.dependency_overrides.clear() + + +def _create_admin_user(db_session, username="admin", password="secret"): + """Helper to insert an admin user into the database. + + Returns the created User object. The password is hashed using + the server's authentication helper so that the login endpoint + functions correctly. + """ + user = models.User( + username=username, + hashed_password=auth.hash_password(password), + is_admin=True, + ) + db_session.add(user) + db_session.commit() + db_session.refresh(user) + return user + + +def test_register_and_backup_flow(test_client): + """End‑to‑end test of client registration, file listing, task + creation and status reporting. + + This test performs the following steps: + + 1. Insert an admin user into the test database. + 2. Log in as the admin to obtain a JWT token. + 3. Register a new client and capture its token and ID. + 4. Post a file structure for the client and verify it can be retrieved. + 5. Create a backup task via the admin API and verify it appears in the task list returned to the client. + 6. Schedule the task to run immediately and record a success status. + 7. Confirm that the task's ``last_run`` and ``next_run`` fields are updated accordingly. + """ + # Access the test DB via the overridden get_db dependency + db = next(server_app.app.dependency_overrides[database.get_db]()) + # Create an admin user in the DB so that login works + admin = _create_admin_user(db) + + # Step 1: login to get JWT + resp = test_client.post( + "/api/login", + data={"username": admin.username, "password": "secret"}, + ) + assert resp.status_code == 200 + token = resp.json()["access_token"] + headers = {"Authorization": f"Bearer {token}"} + + # Step 2: register a new client + resp = test_client.post( + "/api/clients/register", + json={"name": "TestClient"}, + headers=headers, + ) + assert resp.status_code == 200 + client_data = resp.json() + client_id = client_data["id"] + client_token = client_data["token"] + + # Step 3: post file structure + file_list = [ + {"path": "/data", "is_dir": True}, + {"path": "/data/file.txt", "is_dir": False}, + ] + resp = test_client.post(f"/api/clients/{client_token}/files", json=file_list) + assert resp.status_code == 200 + assert resp.json()["status"] == "updated" + # Verify admin can retrieve the file structure + resp = test_client.get(f"/api/clients/{client_id}/files", headers=headers) + assert resp.status_code == 200 + data = resp.json() + assert len(data) == len(file_list) + paths = {entry["path"] for entry in data} + assert "/data" in paths and "/data/file.txt" in paths + + # Step 4: create a task for backing up /data/file.txt every 5 minutes + resp = test_client.post( + f"/api/clients/{client_id}/tasks", + data={ + "path": "/data/file.txt", + "frequency_minutes": "5", + "pre_commands": "echo pre", + "retention_versions": "3", + "retention_days": "30", + "compress": "true", + }, + headers=headers, + ) + # The endpoint returns a 303 redirect on success + assert resp.status_code == 303 + # Fetch tasks as the client would + resp = test_client.get(f"/api/clients/{client_token}/tasks") + assert resp.status_code == 200 + tasks = resp.json() + assert len(tasks) == 1 + task = tasks[0] + assert task["path"] == "/data/file.txt" + assert task["frequency_minutes"] == 5 + assert task["retention_versions"] == 3 + assert task["retention_days"] == 30 + assert task["compress"] is True + + # Step 5: schedule the task to run immediately + resp = test_client.post( + f"/api/clients/{client_id}/tasks/{task['id']}/run", + headers=headers, + ) + assert resp.status_code == 200 + run_id = resp.json()["run_id"] + # Fetch tasks again; pending_run_id should be set + resp = test_client.get(f"/api/clients/{client_token}/tasks") + task_with_pending = resp.json()[0] + assert task_with_pending["pending_run_id"] == run_id + + # Step 6: simulate client reporting success + resp = test_client.post( + f"/api/clients/{client_token}/tasks/{task['id']}/status", + data={"run_id": str(run_id), "status": "SUCCESS", "message": "ok"}, + ) + assert resp.status_code == 200 + # After reporting, pending_run_id should be cleared + resp = test_client.get(f"/api/clients/{client_token}/tasks") + updated_task = resp.json()[0] + assert updated_task.get("pending_run_id") is None + # The last_run and next_run fields should be set + assert updated_task.get("last_run") is not None + assert updated_task.get("next_run") is not None