From 8ae87363ed2103a7b1993bb200e44ea4316e3029 Mon Sep 17 00:00:00 2001 From: jester Date: Wed, 15 Apr 2026 20:56:30 +0000 Subject: [PATCH] Add guarded Minecraft backup flow --- internal/backup/backup.go | 432 ++++++++++++++++++++++++++++++++ internal/handlers/backups.go | 106 ++++++++ internal/handlers/files.go | 22 ++ internal/handlers/mods.go | 15 ++ internal/handlers/operations.go | 22 ++ internal/http/agent.go | 209 +++++++++++---- internal/state/state.go | 50 ++++ internal/system/autostart.go | 47 +--- internal/system/process.go | 68 ++++- internal/system/supervisor.go | 2 +- internal/update/update.go | 27 +- 11 files changed, 893 insertions(+), 107 deletions(-) create mode 100644 internal/backup/backup.go create mode 100644 internal/handlers/backups.go create mode 100644 internal/handlers/operations.go diff --git a/internal/backup/backup.go b/internal/backup/backup.go new file mode 100644 index 0000000..8b302a4 --- /dev/null +++ b/internal/backup/backup.go @@ -0,0 +1,432 @@ +package backup + +import ( + "archive/tar" + "compress/gzip" + "encoding/json" + "errors" + "fmt" + "io" + "os" + "path/filepath" + "sort" + "strings" + "time" + + "zlh-agent/internal/provision" + "zlh-agent/internal/state" + "zlh-agent/internal/system" +) + +const ( + RootDir = "/opt/zlh-agent/backups" + defaultMaxCount = 10 + manifestName = "backup_manifest.json" +) + +type Manifest struct { + ID string `json:"id"` + CreatedAtUTC string `json:"created_at_utc"` + ContainerType string `json:"container_type"` + Game string `json:"game"` + Variant string `json:"variant"` + Version string `json:"version"` + VMID int `json:"vmid"` + Archive string `json:"archive"` + Paths []string `json:"paths"` + FileCount int `json:"file_count"` + TotalBytes int64 `json:"total_bytes"` +} + +func Create(cfg *state.Config) (Manifest, error) { + if err := requireMinecraft(cfg); err != nil { + return Manifest{}, err + } + if err := os.MkdirAll(RootDir, 0o755); err != nil { + return Manifest{}, err + } + + id := time.Now().UTC().Format("20060102T150405Z") + archiveName := id + ".tar.gz" + archivePath := filepath.Join(RootDir, archiveName) + serverRoot := provision.ServerDir(*cfg) + paths := defaultPaths(cfg, serverRoot) + + _, running := system.GetServerPID() + saveOff := false + if running { + state.SetOperationMessage("flushing minecraft saves") + if err := system.RunMinecraftSaveOff(); err != nil { + return Manifest{}, fmt.Errorf("disable minecraft saves: %w", err) + } + saveOff = true + defer func() { + if saveOff { + _ = system.RunMinecraftSaveOn() + } + }() + } + + state.SetOperationMessage("creating backup archive") + manifest := Manifest{ + ID: id, + CreatedAtUTC: time.Now().UTC().Format(time.RFC3339), + ContainerType: cfg.ContainerType, + Game: cfg.Game, + Variant: cfg.Variant, + Version: cfg.Version, + VMID: cfg.VMID, + Archive: archiveName, + Paths: paths, + } + if err := writeArchive(serverRoot, archivePath, &manifest); err != nil { + return Manifest{}, err + } + if saveOff { + if err := system.RunMinecraftSaveOn(); err != nil { + return Manifest{}, fmt.Errorf("enable minecraft saves: %w", err) + } + saveOff = false + } + if err := writeManifestSidecar(manifest); err != nil { + return Manifest{}, err + } + if err := prune(defaultMaxCount); err != nil { + return Manifest{}, err + } + return manifest, nil +} + +func List() ([]Manifest, error) { + entries, err := os.ReadDir(RootDir) + if err != nil { + if errors.Is(err, os.ErrNotExist) { + return []Manifest{}, nil + } + return nil, err + } + out := make([]Manifest, 0, len(entries)) + for _, entry := range entries { + if entry.IsDir() || !strings.HasSuffix(entry.Name(), ".json") { + continue + } + manifest, err := readManifestSidecar(strings.TrimSuffix(entry.Name(), ".json")) + if err == nil { + out = append(out, manifest) + } + } + sort.Slice(out, func(i, j int) bool { + return out[i].CreatedAtUTC > out[j].CreatedAtUTC + }) + return out, nil +} + +func Restore(cfg *state.Config, id string) (Manifest, error) { + if err := requireMinecraft(cfg); err != nil { + return Manifest{}, err + } + id = strings.TrimSpace(id) + if !safeID(id) { + return Manifest{}, fmt.Errorf("invalid backup id") + } + + manifest, err := readManifestSidecar(id) + if err != nil { + return Manifest{}, err + } + archivePath := filepath.Join(RootDir, manifest.Archive) + if _, err := os.Stat(archivePath); err != nil { + return Manifest{}, err + } + + if _, running := system.GetServerPID(); running { + state.SetOperationMessage("stopping server before restore") + if err := system.StopServerAndWait(30 * time.Second); err != nil { + return Manifest{}, err + } + } + + state.SetOperationMessage("restoring backup archive") + if err := restoreArchive(provision.ServerDir(*cfg), archivePath, manifest.Paths); err != nil { + return Manifest{}, err + } + + state.SetOperationMessage("starting server after restore") + state.SetState(state.StateStarting) + state.SetReadyState(false, "", "") + if err := system.StartServerReady(cfg); err != nil { + return Manifest{}, err + } + return manifest, nil +} + +func requireMinecraft(cfg *state.Config) error { + if cfg == nil { + return fmt.Errorf("config required") + } + if !strings.EqualFold(cfg.ContainerType, "game") { + return fmt.Errorf("backups are only available for game containers") + } + if !strings.EqualFold(cfg.Game, "minecraft") { + return fmt.Errorf("backups are only implemented for minecraft") + } + return nil +} + +func defaultPaths(cfg *state.Config, serverRoot string) []string { + candidates := []string{} + world := strings.TrimSpace(cfg.World) + if world == "" { + world = "world" + } + candidates = append(candidates, world) + candidates = append(candidates, + "server.properties", + "whitelist.json", + "ops.json", + "banned-players.json", + "banned-ips.json", + "config", + ) + + paths := make([]string, 0, len(candidates)) + seen := map[string]struct{}{} + for _, rel := range candidates { + rel = filepath.ToSlash(filepath.Clean(strings.TrimSpace(rel))) + if rel == "." || rel == "" || strings.HasPrefix(rel, "../") || filepath.IsAbs(rel) { + continue + } + if _, ok := seen[rel]; ok { + continue + } + if _, err := os.Stat(filepath.Join(serverRoot, filepath.FromSlash(rel))); err == nil { + paths = append(paths, rel) + seen[rel] = struct{}{} + } + } + return paths +} + +func writeArchive(serverRoot, archivePath string, manifest *Manifest) error { + file, err := os.Create(archivePath) + if err != nil { + return err + } + defer file.Close() + + gz := gzip.NewWriter(file) + defer gz.Close() + tw := tar.NewWriter(gz) + defer tw.Close() + + for _, rel := range manifest.Paths { + if err := addPath(tw, serverRoot, rel, manifest); err != nil { + return err + } + } + + data, err := json.MarshalIndent(manifest, "", " ") + if err != nil { + return err + } + data = append(data, '\n') + header := &tar.Header{ + Name: manifestName, + Mode: 0o644, + Size: int64(len(data)), + ModTime: time.Now(), + } + if err := tw.WriteHeader(header); err != nil { + return err + } + _, err = tw.Write(data) + return err +} + +func addPath(tw *tar.Writer, serverRoot, rel string, manifest *Manifest) error { + abs := filepath.Join(serverRoot, filepath.FromSlash(rel)) + return filepath.WalkDir(abs, func(path string, d os.DirEntry, walkErr error) error { + if walkErr != nil { + return walkErr + } + info, err := d.Info() + if err != nil { + return err + } + if info.Mode()&os.ModeSymlink != 0 { + return nil + } + name, err := filepath.Rel(serverRoot, path) + if err != nil { + return err + } + name = filepath.ToSlash(name) + if name == "." || strings.HasPrefix(name, ".zlh-shadow/") || name == ".zlh-shadow" { + return nil + } + + header, err := tar.FileInfoHeader(info, "") + if err != nil { + return err + } + header.Name = name + if err := tw.WriteHeader(header); err != nil { + return err + } + if info.IsDir() { + return nil + } + f, err := os.Open(path) + if err != nil { + return err + } + n, err := io.Copy(tw, f) + closeErr := f.Close() + if err != nil { + return err + } + if closeErr != nil { + return closeErr + } + manifest.FileCount++ + manifest.TotalBytes += n + return nil + }) +} + +func restoreArchive(serverRoot, archivePath string, paths []string) error { + for _, rel := range paths { + if !safeRel(rel) { + return fmt.Errorf("backup contains unsafe path: %s", rel) + } + if err := os.RemoveAll(filepath.Join(serverRoot, filepath.FromSlash(rel))); err != nil { + return err + } + } + + file, err := os.Open(archivePath) + if err != nil { + return err + } + defer file.Close() + gz, err := gzip.NewReader(file) + if err != nil { + return err + } + defer gz.Close() + tr := tar.NewReader(gz) + + for { + header, err := tr.Next() + if errors.Is(err, io.EOF) { + return nil + } + if err != nil { + return err + } + if header.Name == manifestName { + continue + } + if !safeRel(header.Name) { + return fmt.Errorf("archive contains unsafe path: %s", header.Name) + } + if !selectedPath(header.Name, paths) { + return fmt.Errorf("archive contains unexpected path: %s", header.Name) + } + target := filepath.Join(serverRoot, filepath.FromSlash(header.Name)) + switch header.Typeflag { + case tar.TypeDir: + if err := os.MkdirAll(target, os.FileMode(header.Mode)&0o777); err != nil { + return err + } + case tar.TypeReg: + if err := os.MkdirAll(filepath.Dir(target), 0o755); err != nil { + return err + } + out, err := os.OpenFile(target, os.O_CREATE|os.O_TRUNC|os.O_WRONLY, os.FileMode(header.Mode)&0o777) + if err != nil { + return err + } + if _, err := io.Copy(out, tr); err != nil { + out.Close() + return err + } + if err := out.Close(); err != nil { + return err + } + default: + return fmt.Errorf("unsupported archive entry type for %s", header.Name) + } + } +} + +func writeManifestSidecar(manifest Manifest) error { + data, err := json.MarshalIndent(manifest, "", " ") + if err != nil { + return err + } + data = append(data, '\n') + return os.WriteFile(filepath.Join(RootDir, manifest.ID+".json"), data, 0o644) +} + +func readManifestSidecar(id string) (Manifest, error) { + if !safeID(id) { + return Manifest{}, fmt.Errorf("invalid backup id") + } + data, err := os.ReadFile(filepath.Join(RootDir, id+".json")) + if err != nil { + return Manifest{}, err + } + var manifest Manifest + if err := json.Unmarshal(data, &manifest); err != nil { + return Manifest{}, err + } + if manifest.ID != id { + return Manifest{}, fmt.Errorf("backup manifest id mismatch") + } + return manifest, nil +} + +func prune(maxCount int) error { + if maxCount <= 0 { + return nil + } + backups, err := List() + if err != nil { + return err + } + for i := maxCount; i < len(backups); i++ { + _ = os.Remove(filepath.Join(RootDir, backups[i].Archive)) + _ = os.Remove(filepath.Join(RootDir, backups[i].ID+".json")) + } + return nil +} + +func safeID(id string) bool { + if id == "" { + return false + } + for _, r := range id { + if (r >= '0' && r <= '9') || (r >= 'A' && r <= 'Z') || r == 'T' || r == 'Z' || r == '-' || r == '_' { + continue + } + return false + } + return true +} + +func safeRel(rel string) bool { + rel = filepath.ToSlash(filepath.Clean(strings.TrimSpace(rel))) + return rel != "" && rel != "." && !filepath.IsAbs(rel) && rel != ".." && !strings.HasPrefix(rel, "../") +} + +func selectedPath(name string, roots []string) bool { + name = filepath.ToSlash(filepath.Clean(name)) + for _, root := range roots { + root = filepath.ToSlash(filepath.Clean(root)) + if name == root || strings.HasPrefix(name, root+"/") { + return true + } + } + return false +} diff --git a/internal/handlers/backups.go b/internal/handlers/backups.go new file mode 100644 index 0000000..f2a70ac --- /dev/null +++ b/internal/handlers/backups.go @@ -0,0 +1,106 @@ +package handlers + +import ( + "encoding/json" + "net/http" + "strings" + + agentbackup "zlh-agent/internal/backup" + "zlh-agent/internal/state" +) + +func HandleGameBackups(w http.ResponseWriter, r *http.Request) { + switch r.Method { + case http.MethodGet: + handleGameBackupsList(w, r) + case http.MethodPost: + handleGameBackupCreate(w, r) + default: + writeJSONError(w, http.StatusMethodNotAllowed, "GET or POST only") + } +} + +func handleGameBackupsList(w http.ResponseWriter, r *http.Request) { + if _, ok := requireBackupConfig(w); !ok { + return + } + backups, err := agentbackup.List() + if err != nil { + writeJSONError(w, http.StatusInternalServerError, err.Error()) + return + } + writeJSON(w, http.StatusOK, map[string]any{"backups": backups}) +} + +func handleGameBackupCreate(w http.ResponseWriter, r *http.Request) { + endOp, ok := beginHandlerOperation(w, "backup_create", true, "creating backup") + if !ok { + return + } + defer endOp() + + cfg, ok := requireBackupConfig(w) + if !ok { + return + } + manifest, err := agentbackup.Create(cfg) + if err != nil { + writeJSONError(w, http.StatusInternalServerError, err.Error()) + return + } + writeJSON(w, http.StatusOK, manifest) +} + +func HandleGameBackupRestore(w http.ResponseWriter, r *http.Request) { + if r.Method != http.MethodPost { + writeJSONError(w, http.StatusMethodNotAllowed, "POST only") + return + } + endOp, ok := beginHandlerOperation(w, "backup_restore", true, "restoring backup") + if !ok { + return + } + defer endOp() + + cfg, ok := requireBackupConfig(w) + if !ok { + return + } + + id := strings.TrimSpace(r.URL.Query().Get("id")) + if id == "" { + var req struct { + ID string `json:"id"` + } + _ = json.NewDecoder(r.Body).Decode(&req) + id = strings.TrimSpace(req.ID) + } + if id == "" { + writeJSONError(w, http.StatusBadRequest, "backup id required") + return + } + + manifest, err := agentbackup.Restore(cfg, id) + if err != nil { + writeJSONError(w, http.StatusInternalServerError, err.Error()) + return + } + writeJSON(w, http.StatusOK, map[string]any{"restored": true, "backup": manifest}) +} + +func requireBackupConfig(w http.ResponseWriter) (*state.Config, bool) { + cfg, err := state.LoadConfig() + if err != nil { + writeJSONError(w, http.StatusBadRequest, "no config loaded") + return nil, false + } + if !strings.EqualFold(cfg.ContainerType, "game") { + writeJSONError(w, http.StatusBadRequest, "not a game container") + return nil, false + } + if !strings.EqualFold(cfg.Game, "minecraft") { + writeJSONError(w, http.StatusNotImplemented, "backups are only implemented for minecraft") + return nil, false + } + return cfg, true +} diff --git a/internal/handlers/files.go b/internal/handlers/files.go index 9e6c773..0e336bc 100644 --- a/internal/handlers/files.go +++ b/internal/handlers/files.go @@ -114,6 +114,11 @@ func HandleGameFilesUpload(w http.ResponseWriter, r *http.Request) { writeJSONError(w, http.StatusMethodNotAllowed, "POST only") return } + endOp, ok := beginHandlerOperation(w, "file_upload", false, "uploading file") + if !ok { + return + } + defer endOp() cfg, serverRoot, ok := requireFileContainer(w) if !ok { @@ -182,6 +187,11 @@ func HandleGameFilesRevert(w http.ResponseWriter, r *http.Request) { writeJSONError(w, http.StatusMethodNotAllowed, "POST only") return } + endOp, ok := beginHandlerOperation(w, "file_revert", false, "reverting file") + if !ok { + return + } + defer endOp() cfg, serverRoot, ok := requireFileContainer(w) if !ok { @@ -202,6 +212,12 @@ func HandleGameFilesRevert(w http.ResponseWriter, r *http.Request) { } func handleGameFilesDelete(w http.ResponseWriter, r *http.Request) { + endOp, ok := beginHandlerOperation(w, "file_delete", false, "deleting file") + if !ok { + return + } + defer endOp() + cfg, serverRoot, ok := requireFileContainer(w) if !ok { return @@ -221,6 +237,12 @@ func handleGameFilesDelete(w http.ResponseWriter, r *http.Request) { } func handleGameFilesWrite(w http.ResponseWriter, r *http.Request) { + endOp, ok := beginHandlerOperation(w, "file_write", false, "writing file") + if !ok { + return + } + defer endOp() + cfg, serverRoot, ok := requireFileContainer(w) if !ok { return diff --git a/internal/handlers/mods.go b/internal/handlers/mods.go index 34ad9ad..a0bb406 100644 --- a/internal/handlers/mods.go +++ b/internal/handlers/mods.go @@ -46,6 +46,11 @@ func HandleGameModsInstall(w http.ResponseWriter, r *http.Request) { writeJSONError(w, http.StatusMethodNotAllowed, "POST only") return } + endOp, ok := beginHandlerOperation(w, "mod_install", false, "installing mod") + if !ok { + return + } + defer endOp() cfg, serverRoot, ok := requireMinecraftGame(w) if !ok { @@ -167,6 +172,11 @@ func HandleGameModByID(w http.ResponseWriter, r *http.Request) { switch r.Method { case http.MethodPatch: + endOp, ok := beginHandlerOperation(w, "mod_update", false, "updating mod") + if !ok { + return + } + defer endOp() var req mods.PatchRequest if err := json.NewDecoder(r.Body).Decode(&req); err != nil { writeJSONError(w, http.StatusBadRequest, "invalid json") @@ -185,6 +195,11 @@ func HandleGameModByID(w http.ResponseWriter, r *http.Request) { modsLogf(cfg, "action=set_enabled mod_id=%s enabled=%t status=ok", modID, req.Enabled) writeJSON(w, http.StatusOK, resp) case http.MethodDelete: + endOp, ok := beginHandlerOperation(w, "mod_delete", false, "deleting mod") + if !ok { + return + } + defer endOp() resp, err := mods.DeleteMod(serverRoot, modID) if err != nil { modsLogf(cfg, "action=delete mod_id=%s status=failed err=%v", modID, err) diff --git a/internal/handlers/operations.go b/internal/handlers/operations.go new file mode 100644 index 0000000..330358f --- /dev/null +++ b/internal/handlers/operations.go @@ -0,0 +1,22 @@ +package handlers + +import ( + "fmt" + "net/http" + "time" + + "zlh-agent/internal/state" +) + +func beginHandlerOperation(w http.ResponseWriter, opType string, maintenance bool, message string) (func(), bool) { + end, ok, current := state.TryStartOperation(opType, maintenance, message) + if ok { + return end, true + } + msg := fmt.Sprintf("operation already in progress: %s", current.Type) + if !current.StartedAt.IsZero() { + msg = fmt.Sprintf("%s since %s", msg, current.StartedAt.UTC().Format(time.RFC3339)) + } + writeJSONError(w, http.StatusConflict, msg) + return nil, false +} diff --git a/internal/http/agent.go b/internal/http/agent.go index 896b3d7..d42025c 100755 --- a/internal/http/agent.go +++ b/internal/http/agent.go @@ -55,22 +55,6 @@ func lifecycleLog(cfg *state.Config, phase string, attempt int, started time.Tim util.LogLifecycle("[lifecycle] vmid=%d phase=%s attempt=%d elapsed_ms=%d %s", cfg.VMID, phase, attempt, elapsed, msg) } -func waitMinecraftReady(cfg *state.Config, phase string, started time.Time) error { - if strings.ToLower(cfg.Game) != "minecraft" { - return nil - } - - lifecycleLog(cfg, phase, 1, started, "probe_begin") - if err := mcstatus.WaitUntilReady(*cfg, ReadinessTimeout, 3*time.Second); err != nil { - state.SetReadyState(false, "minecraft_ping", err.Error()) - lifecycleLog(cfg, phase, 1, started, "probe_timeout err=%v", err) - return err - } - state.SetReadyState(true, "minecraft_ping", "") - lifecycleLog(cfg, phase, 1, started, "probe_ready") - return nil -} - func requireDevContainer() (*state.Config, error) { cfg, err := state.LoadConfig() if err != nil { @@ -85,6 +69,23 @@ func requireDevContainer() (*state.Config, error) { return cfg, nil } +func beginHTTPOperation(w http.ResponseWriter, opType string, maintenance bool, message string) (func(), bool) { + end, ok, current := state.TryStartOperation(opType, maintenance, message) + if ok { + return end, true + } + w.Header().Set("Content-Type", "application/json") + w.WriteHeader(http.StatusConflict) + _ = json.NewEncoder(w).Encode(map[string]any{ + "error": "operation already in progress", + "operationType": current.Type, + "maintenance": current.Maintenance, + "operationSince": current.StartedAt.UTC().Format(time.RFC3339), + "message": current.Message, + }) + return nil, false +} + /* -------------------------------------------------------------------------- Shared provision pipeline (installer + Minecraft verify) @@ -198,23 +199,30 @@ func handleConfig(w http.ResponseWriter, r *http.Request) { http.Error(w, "POST only", http.StatusMethodNotAllowed) return } + endOp, ok := beginHTTPOperation(w, "provision", true, "provisioning container") + if !ok { + return + } body, _ := io.ReadAll(r.Body) var cfg state.Config if err := json.Unmarshal(body, &cfg); err != nil { + endOp() http.Error(w, "bad json", http.StatusBadRequest) return } log.Printf("[http] vmid=%d action=config status=received type=%s runtime=%s game=%s variant=%s version=%s", cfg.VMID, cfg.ContainerType, cfg.Runtime, cfg.Game, cfg.Variant, cfg.Version) if err := state.SaveConfig(&cfg); err != nil { + endOp() log.Printf("[http] vmid=%d action=config status=save_failed err=%v", cfg.VMID, err) http.Error(w, "save config failed: "+err.Error(), http.StatusInternalServerError) return } go func(c state.Config) { + defer endOp() log.Printf("[http] vmid=%d async provision+start begin", c.VMID) started := time.Now() lifecycleLog(&c, "config_async", 1, started, "begin") @@ -228,7 +236,7 @@ func handleConfig(w http.ResponseWriter, r *http.Request) { state.SetState(state.StateStarting) state.SetReadyState(false, "", "") lifecycleLog(&c, "start", 1, started, "start_requested") - if err := system.StartServer(&c); err != nil { + if err := system.StartServerReady(&c); err != nil { log.Printf("[http] vmid=%d start error: %v", c.VMID, err) state.SetError(err) state.SetState(state.StateError) @@ -236,11 +244,6 @@ func handleConfig(w http.ResponseWriter, r *http.Request) { return } lifecycleLog(&c, "start", 1, started, "process_started") - if err := waitMinecraftReady(&c, "start_probe", started); err != nil { - state.SetError(err) - state.SetState(state.StateError) - return - } // ------------------------------------------------- // FORGE / NEOFORGE: wait → stop → patch → restart @@ -287,18 +290,13 @@ func handleConfig(w http.ResponseWriter, r *http.Request) { state.SetState(state.StateStarting) state.SetReadyState(false, "", "") - if err := system.StartServer(&c); err != nil { + if err := system.StartServerReady(&c); err != nil { log.Printf("[http] vmid=%d restart error: %v", c.VMID, err) state.SetError(err) state.SetState(state.StateError) lifecycleLog(&c, "forge_post", 1, started, "restart_failed err=%v", err) return } - if err := waitMinecraftReady(&c, "forge_restart_probe", started); err != nil { - state.SetError(err) - state.SetState(state.StateError) - return - } lifecycleLog(&c, "forge_post", 1, started, "complete") } } @@ -318,6 +316,16 @@ func handleConfig(w http.ResponseWriter, r *http.Request) { ---------------------------------------------------------------------------- */ func handleStart(w http.ResponseWriter, r *http.Request) { + if r.Method != http.MethodPost { + http.Error(w, "POST only", http.StatusMethodNotAllowed) + return + } + endOp, ok := beginHTTPOperation(w, "start", false, "starting server") + if !ok { + return + } + defer endOp() + cfg, err := state.LoadConfig() if err != nil { http.Error(w, "no config: "+err.Error(), http.StatusBadRequest) @@ -334,23 +342,16 @@ func handleStart(w http.ResponseWriter, r *http.Request) { state.SetState(state.StateStarting) state.SetReadyState(false, "", "") lifecycleLog(cfg, "start_manual", 1, started, "start_requested") - if err := system.StartServer(cfg); err != nil { + if err := system.StartServerReady(cfg); err != nil { log.Printf("[http] vmid=%d action=start status=failed err=%v", cfg.VMID, err) http.Error(w, "start error: "+err.Error(), http.StatusInternalServerError) lifecycleLog(cfg, "start_manual", 1, started, "start_failed err=%v", err) return } - if err := waitMinecraftReady(cfg, "start_manual_probe", started); err != nil { - log.Printf("[http] vmid=%d action=start status=readiness_failed err=%v", cfg.VMID, err) - state.SetError(err) - state.SetState(state.StateError) - http.Error(w, "start readiness error: "+err.Error(), http.StatusGatewayTimeout) - return - } log.Printf("[http] vmid=%d action=start status=ok", cfg.VMID) w.Header().Set("Content-Type", "application/json") - _, _ = w.Write([]byte(`{"ok": true, "state": "starting"}`)) + _, _ = w.Write([]byte(`{"ok": true, "state": "running"}`)) } /* @@ -360,6 +361,16 @@ func handleStart(w http.ResponseWriter, r *http.Request) { ---------------------------------------------------------------------------- */ func handleStop(w http.ResponseWriter, r *http.Request) { + if r.Method != http.MethodPost { + http.Error(w, "POST only", http.StatusMethodNotAllowed) + return + } + endOp, ok := beginHTTPOperation(w, "stop", false, "stopping server") + if !ok { + return + } + defer endOp() + if cfg, err := state.LoadConfig(); err == nil && cfg != nil { log.Printf("[http] vmid=%d action=stop status=requested", cfg.VMID) } @@ -380,6 +391,16 @@ func handleStop(w http.ResponseWriter, r *http.Request) { ---------------------------------------------------------------------------- */ func handleRestart(w http.ResponseWriter, r *http.Request) { + if r.Method != http.MethodPost { + http.Error(w, "POST only", http.StatusMethodNotAllowed) + return + } + endOp, ok := beginHTTPOperation(w, "restart", false, "restarting server") + if !ok { + return + } + defer endOp() + cfg, err := state.LoadConfig() if err != nil { http.Error(w, "no config", http.StatusBadRequest) @@ -402,22 +423,16 @@ func handleRestart(w http.ResponseWriter, r *http.Request) { started := time.Now() state.SetState(state.StateStarting) state.SetReadyState(false, "", "") - if err := system.StartServer(cfg); err != nil { + if err := system.StartServerReady(cfg); err != nil { log.Printf("[http] vmid=%d action=restart status=start_failed err=%v", cfg.VMID, err) http.Error(w, "restart error: "+err.Error(), http.StatusInternalServerError) return } - if err := waitMinecraftReady(cfg, "restart_manual_probe", started); err != nil { - log.Printf("[http] vmid=%d action=restart status=readiness_failed err=%v", cfg.VMID, err) - state.SetError(err) - state.SetState(state.StateError) - http.Error(w, "restart readiness error: "+err.Error(), http.StatusGatewayTimeout) - return - } + lifecycleLog(cfg, "restart_manual", 1, started, "ready") log.Printf("[http] vmid=%d action=restart status=ok", cfg.VMID) w.Header().Set("Content-Type", "application/json") - _, _ = w.Write([]byte(`{"ok": true, "state": "starting"}`)) + _, _ = w.Write([]byte(`{"ok": true, "state": "running"}`)) } /* @@ -475,6 +490,7 @@ func handleStatus(w http.ResponseWriter, r *http.Request) { serverRoot = provision.ServerDir(*cfg) } } + op := state.GetOperation() resp := map[string]any{ "state": state.GetState(), @@ -501,8 +517,16 @@ func handleStatus(w http.ResponseWriter, r *http.Request) { "devReadyAt": devReadyAt, "codeServerInstalled": codeServerInstalled, "codeServerRunning": codeServerRunning, + "operationInProgress": op.InProgress, + "operationType": op.Type, + "maintenance": op.Maintenance, + "operationStartedAt": "", + "operationMessage": op.Message, "timestamp": time.Now().Unix(), } + if op.InProgress && !op.StartedAt.IsZero() { + resp["operationStartedAt"] = op.StartedAt.UTC().Format(time.RFC3339) + } if err := state.GetError(); err != nil { resp["error"] = err.Error() @@ -588,11 +612,46 @@ func handleCodeServerRestart(w http.ResponseWriter, r *http.Request) { ---------------------------------------------------------------------------- */ func handleSendCommand(w http.ResponseWriter, r *http.Request) { - cmd := r.URL.Query().Get("cmd") + if r.Method != http.MethodPost { + http.Error(w, "POST only", http.StatusMethodNotAllowed) + return + } + cmd := "" + contentType := r.Header.Get("Content-Type") + if strings.Contains(contentType, "application/json") { + var req struct { + Command string `json:"command"` + Cmd string `json:"cmd"` + } + if err := json.NewDecoder(http.MaxBytesReader(w, r.Body, 4096)).Decode(&req); err != nil { + http.Error(w, "bad json", http.StatusBadRequest) + return + } + cmd = req.Command + if cmd == "" { + cmd = req.Cmd + } + } else { + if err := r.ParseForm(); err != nil { + http.Error(w, "bad form", http.StatusBadRequest) + return + } + cmd = r.Form.Get("command") + if cmd == "" { + cmd = r.Form.Get("cmd") + } + } + if cmd == "" { + cmd = r.URL.Query().Get("cmd") + } if cmd == "" { http.Error(w, "cmd required", http.StatusBadRequest) return } + if err := validateConsoleCommand(cmd); err != nil { + http.Error(w, err.Error(), http.StatusBadRequest) + return + } if err := system.SendConsoleCommand(cmd); err != nil { http.Error(w, "command error: "+err.Error(), http.StatusInternalServerError) @@ -602,6 +661,54 @@ func handleSendCommand(w http.ResponseWriter, r *http.Request) { w.WriteHeader(http.StatusNoContent) } +func validateConsoleCommand(cmd string) error { + if len(cmd) > 512 { + return fmt.Errorf("command exceeds 512 byte limit") + } + for _, r := range cmd { + if r < 32 || r == 127 { + return fmt.Errorf("command contains control characters") + } + } + return nil +} + +func handleReady(w http.ResponseWriter, r *http.Request) { + if r.Method != http.MethodGet { + http.Error(w, "GET only", http.StatusMethodNotAllowed) + return + } + + cfg, err := state.LoadConfig() + if err != nil || cfg == nil { + http.Error(w, "not configured", http.StatusServiceUnavailable) + return + } + op := state.GetOperation() + if op.InProgress { + http.Error(w, "operation in progress: "+op.Type, http.StatusServiceUnavailable) + return + } + switch state.GetState() { + case state.StateInstalling, state.StateStarting, state.StateStopping, state.StateError, state.StateCrashed: + http.Error(w, "not ready: "+string(state.GetState()), http.StatusServiceUnavailable) + return + } + if strings.EqualFold(cfg.ContainerType, "game") { + _, running := system.GetServerPID() + if !running { + http.Error(w, "server process not running", http.StatusServiceUnavailable) + return + } + if strings.EqualFold(cfg.Game, "minecraft") && !state.GetReady() { + http.Error(w, "minecraft not ready", http.StatusServiceUnavailable) + return + } + } + w.Header().Set("Content-Type", "application/json") + _ = json.NewEncoder(w).Encode(map[string]any{"ready": true}) +} + /* -------------------------------------------------------------------------- /agent/update ----------------------------------------------------------------------------*/ @@ -611,6 +718,11 @@ func handleAgentUpdate(w http.ResponseWriter, r *http.Request) { http.Error(w, "POST only", http.StatusMethodNotAllowed) return } + endOp, ok := beginHTTPOperation(w, "agent_update", true, "updating agent") + if !ok { + return + } + defer endOp() res := update.CheckAndUpdate(version.AgentVersion) w.Header().Set("Content-Type", "application/json") @@ -763,6 +875,7 @@ func NewMux() *http.ServeMux { m.HandleFunc("/stop", handleStop) m.HandleFunc("/restart", handleRestart) m.HandleFunc("/status", handleStatus) + m.HandleFunc("/ready", handleReady) m.HandleFunc("/dev/codeserver/start", handleCodeServerStart) m.HandleFunc("/dev/codeserver/stop", handleCodeServerStop) m.HandleFunc("/dev/codeserver/restart", handleCodeServerRestart) @@ -771,6 +884,8 @@ func NewMux() *http.ServeMux { m.HandleFunc("/agent/update/status", handleAgentUpdateStatus) m.HandleFunc("/version", handleVersion) m.HandleFunc("/game/players", handleGamePlayers) + m.HandleFunc("/game/backups", agenthandlers.HandleGameBackups) + m.HandleFunc("/game/backups/restore", agenthandlers.HandleGameBackupRestore) m.HandleFunc("/game/mods", agenthandlers.HandleGameMods) m.HandleFunc("/game/mods/install", agenthandlers.HandleGameModsInstall) m.HandleFunc("/game/mods/", agenthandlers.HandleGameModByID) diff --git a/internal/state/state.go b/internal/state/state.go index b845ca6..5517338 100755 --- a/internal/state/state.go +++ b/internal/state/state.go @@ -80,6 +80,7 @@ type agentStatus struct { readySource string readyError string lastReadyAt time.Time + operation OperationInfo } type CrashInfo struct { @@ -91,6 +92,14 @@ type CrashInfo struct { LogTail []string `json:"logTail"` } +type OperationInfo struct { + InProgress bool `json:"inProgress"` + Type string `json:"type,omitempty"` + Maintenance bool `json:"maintenance"` + StartedAt time.Time `json:"startedAt,omitempty"` + Message string `json:"message,omitempty"` +} + var global = &agentStatus{ state: StateIdle, lastChange: time.Now(), @@ -189,6 +198,12 @@ func IsIntentionalStop() bool { return global.intentionalStop } +func GetOperation() OperationInfo { + global.mu.Lock() + defer global.mu.Unlock() + return global.operation +} + /* -------------------------------------------------------------------------- STATE SETTERS — unified with logging ----------------------------------------------------------------------------*/ @@ -275,6 +290,41 @@ func SetReadyState(ready bool, source, errText string) { } } +func TryStartOperation(opType string, maintenance bool, message string) (func(), bool, OperationInfo) { + global.mu.Lock() + defer global.mu.Unlock() + + if global.operation.InProgress { + return nil, false, global.operation + } + + global.operation = OperationInfo{ + InProgress: true, + Type: opType, + Maintenance: maintenance, + StartedAt: time.Now().UTC(), + Message: message, + } + + end := func() { + global.mu.Lock() + defer global.mu.Unlock() + if global.operation.Type == opType { + global.operation = OperationInfo{} + } + } + + return end, true, global.operation +} + +func SetOperationMessage(message string) { + global.mu.Lock() + defer global.mu.Unlock() + if global.operation.InProgress { + global.operation.Message = message + } +} + /* -------------------------------------------------------------------------- CONFIG SAVE / LOAD ----------------------------------------------------------------------------*/ diff --git a/internal/system/autostart.go b/internal/system/autostart.go index f88f32f..318015d 100644 --- a/internal/system/autostart.go +++ b/internal/system/autostart.go @@ -25,13 +25,6 @@ import ( var AutoStartEnabled = false // controlled by config or template var AutoRestartOnCrash = true // can be disabled for debugging -// optional exponential backoff (3 attempts max) -var backoffDelays = []time.Duration{ - 5 * time.Second, - 10 * time.Second, - 20 * time.Second, -} - /* -------------------------------------------------------------------------- InitAutoStart — called from main.go ----------------------------------------------------------------------------*/ @@ -50,48 +43,10 @@ func InitAutoStart() { cfg, err := state.LoadConfig() if err == nil && cfg != nil { log.Println("[autostart] config detected: boot-starting server") - _ = StartServer(cfg) + _ = StartServerReady(cfg) return } time.Sleep(3 * time.Second) } }() } - -/* -------------------------------------------------------------------------- - monitorCrashes — restarts server if AutoRestartOnCrash=true -----------------------------------------------------------------------------*/ - -func monitorCrashes(cfg *state.Config) { - if !AutoRestartOnCrash { - log.Println("[autostart] crash monitoring disabled") - return - } - - attempt := 0 - - for { - time.Sleep(3 * time.Second) - - if state.GetState() != state.StateCrashed { - continue - } - - log.Println("[autostart] SERVER CRASH DETECTED") - - if attempt >= len(backoffDelays) { - log.Println("[autostart] max crash retries reached, not restarting") - return - } - - wait := backoffDelays[attempt] - log.Printf("[autostart] waiting %s before restart", wait) - - time.Sleep(wait) - attempt++ - - if err := StartServer(cfg); err != nil { - log.Println("[autostart]", err) - } - } -} diff --git a/internal/system/process.go b/internal/system/process.go index 5a20676..2cb4fe0 100755 --- a/internal/system/process.go +++ b/internal/system/process.go @@ -16,12 +16,15 @@ import ( "syscall" "time" + mcstatus "zlh-agent/internal/minecraft" "zlh-agent/internal/provision" "zlh-agent/internal/provision/devcontainer" "zlh-agent/internal/runtime" "zlh-agent/internal/state" ) +const ReadinessTimeout = 60 * time.Second + /* -------------------------------------------------------------------------- GLOBAL PROCESS STATE ----------------------------------------------------------------------------*/ @@ -91,6 +94,34 @@ func StartServer(cfg *state.Config) error { return nil } +func StartServerReady(cfg *state.Config) error { + if err := StartServer(cfg); err != nil { + return err + } + if err := WaitForReady(cfg, ReadinessTimeout); err != nil { + state.SetError(err) + state.SetState(state.StateError) + return err + } + return nil +} + +func WaitForReady(cfg *state.Config, timeout time.Duration) error { + if cfg == nil { + return fmt.Errorf("config required") + } + if !strings.EqualFold(cfg.ContainerType, "game") || !strings.EqualFold(cfg.Game, "minecraft") { + return nil + } + + if err := mcstatus.WaitUntilReady(*cfg, timeout, 3*time.Second); err != nil { + state.SetReadyState(false, "minecraft_ping", err.Error()) + return err + } + state.SetReadyState(true, "minecraft_ping", "") + return nil +} + func buildServerCommand(cfg *state.Config, dir, startScript string) (*exec.Cmd, error) { variant := strings.ToLower(strings.TrimSpace(cfg.Variant)) if variant == "forge" || variant == "neoforge" { @@ -182,7 +213,7 @@ func StopServer() error { // Try graceful stop if serverPTY != nil { - _ = runtime.Write(serverPTY, []byte("save-all\n")) + _ = runtime.Write(serverPTY, []byte("save-all flush\n")) time.Sleep(2 * time.Second) _ = runtime.Write(serverPTY, []byte("stop\n")) } @@ -199,6 +230,9 @@ func StopServer() error { } func WaitForServerExit(timeout time.Duration) error { + if timeout <= 0 { + return fmt.Errorf("timeout waiting for server process to exit") + } deadline := time.Now().Add(timeout) for { mu.Lock() @@ -223,8 +257,18 @@ func RestartServer(cfg *state.Config) error { if err := StopServer(); err != nil { // ignore if not running } + if err := WaitForServerExit(20 * time.Second); err != nil { + return err + } - return StartServer(cfg) + return StartServerReady(cfg) +} + +func StopServerAndWait(timeout time.Duration) error { + if err := StopServer(); err != nil { + return err + } + return WaitForServerExit(timeout) } /* -------------------------------------------------------------------------- @@ -242,6 +286,26 @@ func SendConsoleCommand(cmd string) error { return runtime.Write(serverPTY, []byte(cmd+"\n")) } +func RunMinecraftSaveOff() error { + if err := SendConsoleCommand("save-all flush"); err != nil { + return err + } + time.Sleep(2 * time.Second) + if err := SendConsoleCommand("save-off"); err != nil { + return err + } + time.Sleep(500 * time.Millisecond) + return nil +} + +func RunMinecraftSaveOn() error { + if err := SendConsoleCommand("save-on"); err != nil { + return err + } + time.Sleep(500 * time.Millisecond) + return nil +} + /* -------------------------------------------------------------------------- Dev Shell PTY ----------------------------------------------------------------------------*/ diff --git a/internal/system/supervisor.go b/internal/system/supervisor.go index 9cb8fd0..dce510e 100644 --- a/internal/system/supervisor.go +++ b/internal/system/supervisor.go @@ -101,7 +101,7 @@ func (s *processSupervisor) Watch(cfg *state.Config, cmd *exec.Cmd, ptmx *os.Fil return } - if err := StartServer(cfg); err != nil { + if err := StartServerReady(cfg); err != nil { state.SetError(err) state.SetState(state.StateError) log.Printf("[process] vmid=%d restart attempt=%d failed err=%v", cfg.VMID, attempt, err) diff --git a/internal/update/update.go b/internal/update/update.go index f0d0dd7..d3ad579 100644 --- a/internal/update/update.go +++ b/internal/update/update.go @@ -21,15 +21,15 @@ import ( const ( defaultArtifactBaseURL = "http://zlh-artifacts.internal.zlh:8080" - releasesDir = "/opt/zlh-agent/releases" - currentLink = "/opt/zlh-agent/current" - previousLink = "/opt/zlh-agent/previous" - binaryPath = "/opt/zlh-agent/zlh-agent" - stateDir = "/opt/zlh-agent/state" - statusFile = "/opt/zlh-agent/state/update.json" - defaultUnit = "zlh-agent" - defaultMode = "notify" - defaultKeepReleases = 3 // current + 2 previous + releasesDir = "/opt/zlh-agent/releases" + currentLink = "/opt/zlh-agent/current" + previousLink = "/opt/zlh-agent/previous" + binaryPath = "/opt/zlh-agent/zlh-agent" + stateDir = "/opt/zlh-agent/state" + statusFile = "/opt/zlh-agent/state/update.json" + defaultUnit = "zlh-agent" + defaultMode = "notify" + defaultKeepReleases = 3 // current + 2 previous ) type Manifest struct { @@ -565,8 +565,13 @@ func updateSymlinks(target string) error { if err := os.RemoveAll(previousLink); err != nil && !errors.Is(err, os.ErrNotExist) { return err } - if _, err := os.Lstat(currentLink); err == nil { - if err := os.Symlink("current", previousLink); err != nil { + + if currentResolved, err := filepath.EvalSymlinks(currentLink); err == nil && currentResolved != "" { + linkTarget, relErr := filepath.Rel(filepath.Dir(previousLink), currentResolved) + if relErr != nil { + linkTarget = currentResolved + } + if err := os.Symlink(linkTarget, previousLink); err != nil { return err } }