updates 3-20-26

This commit is contained in:
jester 2026-03-20 23:17:19 +00:00
parent 6019d0bc1c
commit f1e71b66eb
16 changed files with 667 additions and 107 deletions

View File

@ -3,6 +3,7 @@ package handlers
import (
"errors"
"io"
"log"
"mime"
"net/http"
"os"
@ -13,6 +14,10 @@ import (
"zlh-agent/internal/state"
)
func filesLogf(cfg *state.Config, format string, args ...any) {
log.Printf("[files] vmid=%d type=%s "+format, append([]any{cfg.VMID, cfg.ContainerType}, args...)...)
}
func HandleGameFilesList(w http.ResponseWriter, r *http.Request) {
if r.Method != http.MethodGet {
writeJSONError(w, http.StatusMethodNotAllowed, "GET only")
@ -151,9 +156,11 @@ func HandleGameFilesUpload(w http.ResponseWriter, r *http.Request) {
size, overwritten, err := agentfiles.Upload(cfg.ContainerType, serverRoot, normalizedPath, part, 0, overwrite)
part.Close()
if err != nil {
filesLogf(cfg, "action=upload path=%s overwrite=%t status=failed err=%v", normalizedPath, overwrite, err)
writeFilesError(w, err)
return
}
filesLogf(cfg, "action=upload path=%s size=%d overwritten=%t status=ok", normalizedPath, size, overwritten)
writeJSON(w, http.StatusOK, map[string]any{
"uploaded": true,
"path": normalizedPath,
@ -177,9 +184,11 @@ func HandleGameFilesRevert(w http.ResponseWriter, r *http.Request) {
revertedPath, err := agentfiles.Revert(cfg.ContainerType, serverRoot, r.URL.Query().Get("path"))
if err != nil {
filesLogf(cfg, "action=revert path=%s status=failed err=%v", r.URL.Query().Get("path"), err)
writeFilesError(w, err)
return
}
filesLogf(cfg, "action=revert path=%s status=ok", revertedPath)
writeJSON(w, http.StatusOK, map[string]any{
"reverted": true,
"path": revertedPath,
@ -194,9 +203,11 @@ func handleGameFilesDelete(w http.ResponseWriter, r *http.Request) {
deletedPath, err := agentfiles.Delete(cfg.ContainerType, serverRoot, r.URL.Query().Get("path"))
if err != nil {
filesLogf(cfg, "action=delete path=%s status=failed err=%v", r.URL.Query().Get("path"), err)
writeFilesError(w, err)
return
}
filesLogf(cfg, "action=delete path=%s status=ok", deletedPath)
writeJSON(w, http.StatusOK, map[string]any{
"deleted": true,
"path": deletedPath,
@ -223,9 +234,11 @@ func handleGameFilesWrite(w http.ResponseWriter, r *http.Request) {
}
backupCreated, err := agentfiles.Write(cfg.ContainerType, serverRoot, normalizedPath, data)
if err != nil {
filesLogf(cfg, "action=write path=%s size=%d status=failed err=%v", normalizedPath, len(data), err)
writeFilesError(w, err)
return
}
filesLogf(cfg, "action=write path=%s size=%d backup_created=%t status=ok", normalizedPath, len(data), backupCreated)
writeJSON(w, http.StatusOK, map[string]any{
"saved": true,
"path": normalizedPath,

View File

@ -3,6 +3,7 @@ package handlers
import (
"encoding/json"
"errors"
"log"
"net/http"
"net/url"
"os"
@ -13,6 +14,10 @@ import (
"zlh-agent/internal/state"
)
func modsLogf(cfg *state.Config, format string, args ...any) {
log.Printf("[mods] vmid=%d type=%s game=%s variant=%s "+format, append([]any{cfg.VMID, cfg.ContainerType, cfg.Game, cfg.Variant}, args...)...)
}
type jsonError struct {
Error string `json:"error"`
}
@ -42,7 +47,7 @@ func HandleGameModsInstall(w http.ResponseWriter, r *http.Request) {
return
}
_, serverRoot, ok := requireMinecraftGame(w)
cfg, serverRoot, ok := requireMinecraftGame(w)
if !ok {
return
}
@ -106,6 +111,7 @@ func HandleGameModsInstall(w http.ResponseWriter, r *http.Request) {
enabledPath := filepath.Join(modsDir, filename)
disabledPath := enabledPath + ".disabled"
if _, err := os.Stat(enabledPath); err == nil {
modsLogf(cfg, "action=install mod_id=%s source=%s filename=%s status=already_installed enabled=true", req.ModID, req.Source, filename)
writeJSON(w, http.StatusOK, map[string]any{
"status": "already-installed",
"fileName": filename,
@ -114,6 +120,7 @@ func HandleGameModsInstall(w http.ResponseWriter, r *http.Request) {
return
}
if _, err := os.Stat(disabledPath); err == nil {
modsLogf(cfg, "action=install mod_id=%s source=%s filename=%s status=already_installed enabled=false", req.ModID, req.Source, filename)
writeJSON(w, http.StatusOK, map[string]any{
"status": "already-installed",
"fileName": filename,
@ -124,10 +131,12 @@ func HandleGameModsInstall(w http.ResponseWriter, r *http.Request) {
resp, err := mods.InstallCurated(serverRoot, req)
if err != nil {
modsLogf(cfg, "action=install mod_id=%s source=%s status=failed err=%v", req.ModID, req.Source, err)
writeJSONError(w, http.StatusBadRequest, err.Error())
return
}
_ = resp
modsLogf(cfg, "action=install mod_id=%s source=%s filename=%s status=ok", req.ModID, req.Source, filename)
writeJSON(w, http.StatusOK, map[string]any{
"status": "installed",
"fileName": filename,
@ -135,7 +144,7 @@ func HandleGameModsInstall(w http.ResponseWriter, r *http.Request) {
}
func HandleGameModByID(w http.ResponseWriter, r *http.Request) {
_, serverRoot, ok := requireMinecraftGame(w)
cfg, serverRoot, ok := requireMinecraftGame(w)
if !ok {
return
}
@ -155,6 +164,7 @@ func HandleGameModByID(w http.ResponseWriter, r *http.Request) {
}
resp, err := mods.SetEnabled(serverRoot, modID, req.Enabled)
if err != nil {
modsLogf(cfg, "action=set_enabled mod_id=%s enabled=%t status=failed err=%v", modID, req.Enabled, err)
if errors.Is(err, os.ErrNotExist) {
writeJSONError(w, http.StatusNotFound, "mod not found")
return
@ -162,10 +172,12 @@ func HandleGameModByID(w http.ResponseWriter, r *http.Request) {
writeJSONError(w, http.StatusBadRequest, err.Error())
return
}
modsLogf(cfg, "action=set_enabled mod_id=%s enabled=%t status=ok", modID, req.Enabled)
writeJSON(w, http.StatusOK, resp)
case http.MethodDelete:
resp, err := mods.DeleteMod(serverRoot, modID)
if err != nil {
modsLogf(cfg, "action=delete mod_id=%s status=failed err=%v", modID, err)
if errors.Is(err, os.ErrNotExist) {
writeJSONError(w, http.StatusNotFound, "mod not found")
return
@ -173,6 +185,7 @@ func HandleGameModByID(w http.ResponseWriter, r *http.Request) {
writeJSONError(w, http.StatusBadRequest, err.Error())
return
}
modsLogf(cfg, "action=delete mod_id=%s status=ok", modID)
writeJSON(w, http.StatusOK, resp)
default:
writeJSONError(w, http.StatusMethodNotAllowed, "PATCH or DELETE only")

View File

@ -12,9 +12,11 @@ import (
"strings"
"time"
agentfiles "zlh-agent/internal/files"
agenthandlers "zlh-agent/internal/handlers"
mcstatus "zlh-agent/internal/minecraft"
"zlh-agent/internal/provision"
"zlh-agent/internal/provision/addons/codeserver"
"zlh-agent/internal/provision/devcontainer"
"zlh-agent/internal/provision/devcontainer/dotnet"
"zlh-agent/internal/provision/devcontainer/go"
@ -69,6 +71,20 @@ func waitMinecraftReady(cfg *state.Config, phase string, started time.Time) erro
return nil
}
func requireDevContainer() (*state.Config, error) {
cfg, err := state.LoadConfig()
if err != nil {
return nil, fmt.Errorf("load config: %w", err)
}
if cfg.ContainerType != "dev" {
return nil, fmt.Errorf("code-server controls are only available for dev containers")
}
if !cfg.EnableCodeServer {
return nil, fmt.Errorf("code-server is not enabled for this container")
}
return cfg, nil
}
/*
--------------------------------------------------------------------------
Shared provision pipeline (installer + Minecraft verify)
@ -190,8 +206,10 @@ func handleConfig(w http.ResponseWriter, r *http.Request) {
http.Error(w, "bad json", http.StatusBadRequest)
return
}
log.Printf("[http] vmid=%d action=config status=received type=%s runtime=%s game=%s variant=%s version=%s", cfg.VMID, cfg.ContainerType, cfg.Runtime, cfg.Game, cfg.Variant, cfg.Version)
if err := state.SaveConfig(&cfg); err != nil {
log.Printf("[http] vmid=%d action=config status=save_failed err=%v", cfg.VMID, err)
http.Error(w, "save config failed: "+err.Error(), http.StatusInternalServerError)
return
}
@ -310,22 +328,26 @@ func handleStart(w http.ResponseWriter, r *http.Request) {
http.Error(w, "dev containers do not support manual start", http.StatusBadRequest)
return
}
log.Printf("[http] vmid=%d action=start status=requested", cfg.VMID)
started := time.Now()
state.SetState(state.StateStarting)
state.SetReadyState(false, "", "")
lifecycleLog(cfg, "start_manual", 1, started, "start_requested")
if err := system.StartServer(cfg); err != nil {
log.Printf("[http] vmid=%d action=start status=failed err=%v", cfg.VMID, err)
http.Error(w, "start error: "+err.Error(), http.StatusInternalServerError)
lifecycleLog(cfg, "start_manual", 1, started, "start_failed err=%v", err)
return
}
if err := waitMinecraftReady(cfg, "start_manual_probe", started); err != nil {
log.Printf("[http] vmid=%d action=start status=readiness_failed err=%v", cfg.VMID, err)
state.SetError(err)
state.SetState(state.StateError)
http.Error(w, "start readiness error: "+err.Error(), http.StatusGatewayTimeout)
return
}
log.Printf("[http] vmid=%d action=start status=ok", cfg.VMID)
w.Header().Set("Content-Type", "application/json")
_, _ = w.Write([]byte(`{"ok": true, "state": "starting"}`))
@ -338,10 +360,16 @@ func handleStart(w http.ResponseWriter, r *http.Request) {
----------------------------------------------------------------------------
*/
func handleStop(w http.ResponseWriter, r *http.Request) {
if cfg, err := state.LoadConfig(); err == nil && cfg != nil {
log.Printf("[http] vmid=%d action=stop status=requested", cfg.VMID)
}
if err := system.StopServer(); err != nil {
http.Error(w, "stop error: "+err.Error(), http.StatusInternalServerError)
return
}
if cfg, err := state.LoadConfig(); err == nil && cfg != nil {
log.Printf("[http] vmid=%d action=stop status=ok", cfg.VMID)
}
w.WriteHeader(http.StatusNoContent)
}
@ -362,9 +390,11 @@ func handleRestart(w http.ResponseWriter, r *http.Request) {
http.Error(w, "dev containers do not support restart", http.StatusBadRequest)
return
}
log.Printf("[http] vmid=%d action=restart status=requested", cfg.VMID)
_ = system.StopServer()
if err := system.WaitForServerExit(20 * time.Second); err != nil {
log.Printf("[http] vmid=%d action=restart status=stop_wait_failed err=%v", cfg.VMID, err)
http.Error(w, "restart error: "+err.Error(), http.StatusInternalServerError)
return
}
@ -373,15 +403,18 @@ func handleRestart(w http.ResponseWriter, r *http.Request) {
state.SetState(state.StateStarting)
state.SetReadyState(false, "", "")
if err := system.StartServer(cfg); err != nil {
log.Printf("[http] vmid=%d action=restart status=start_failed err=%v", cfg.VMID, err)
http.Error(w, "restart error: "+err.Error(), http.StatusInternalServerError)
return
}
if err := waitMinecraftReady(cfg, "restart_manual_probe", started); err != nil {
log.Printf("[http] vmid=%d action=restart status=readiness_failed err=%v", cfg.VMID, err)
state.SetError(err)
state.SetState(state.StateError)
http.Error(w, "restart readiness error: "+err.Error(), http.StatusGatewayTimeout)
return
}
log.Printf("[http] vmid=%d action=restart status=ok", cfg.VMID)
w.Header().Set("Content-Type", "application/json")
_, _ = w.Write([]byte(`{"ok": true, "state": "starting"}`))
@ -404,6 +437,7 @@ func handleStatus(w http.ResponseWriter, r *http.Request) {
lastCrashExitCode := 0
lastCrashSignal := 0
lastCrashUptimeSeconds := int64(0)
lastCrashClassification := ""
var lastCrashLogTail []string
if crash := state.GetLastCrash(); crash != nil {
if !crash.Time.IsZero() {
@ -412,26 +446,62 @@ func handleStatus(w http.ResponseWriter, r *http.Request) {
lastCrashExitCode = crash.ExitCode
lastCrashSignal = crash.Signal
lastCrashUptimeSeconds = crash.UptimeSeconds
lastCrashClassification = crash.Classification
lastCrashLogTail = crash.LogTail
}
workspaceRoot := ""
serverRoot := ""
runtimeInstallPath := ""
runtimeInstalled := false
devProvisioned := false
devReadyAt := ""
codeServerInstalled := false
codeServerRunning := false
if cfg != nil {
if cfg.ContainerType == "dev" {
workspaceRoot = agentfiles.RuntimeRoot(cfg)
runtimeInstallPath = devcontainer.RuntimeInstallDir(cfg.Runtime, cfg.Version)
runtimeInstalled = devcontainer.RuntimeInstalled(cfg.Runtime, cfg.Version)
devProvisioned = devcontainer.IsProvisioned()
if readyInfo, err := devcontainer.ReadReadyMarker(); err == nil && readyInfo != nil {
devReadyAt = readyInfo.ReadyAt
}
if cfg.EnableCodeServer {
codeServerInstalled = codeserver.Installed()
codeServerRunning = codeserver.Running()
}
} else {
serverRoot = provision.ServerDir(*cfg)
}
}
resp := map[string]any{
"state": state.GetState(),
"processRunning": processRunning,
"ready": state.GetReady(),
"readySource": state.GetReadySource(),
"readyError": state.GetReadyError(),
"lastReadyAt": readyAt,
"installStep": state.GetInstallStep(),
"crashCount": state.GetCrashCount(),
"lastCrashTime": lastCrashTime,
"lastCrashExitCode": lastCrashExitCode,
"lastCrashSignal": lastCrashSignal,
"lastCrashUptimeSeconds": lastCrashUptimeSeconds,
"lastCrashLogTail": lastCrashLogTail,
"error": nil,
"config": cfg,
"timestamp": time.Now().Unix(),
"state": state.GetState(),
"processRunning": processRunning,
"ready": state.GetReady(),
"readySource": state.GetReadySource(),
"readyError": state.GetReadyError(),
"lastReadyAt": readyAt,
"installStep": state.GetInstallStep(),
"crashCount": state.GetCrashCount(),
"lastCrashTime": lastCrashTime,
"lastCrashExitCode": lastCrashExitCode,
"lastCrashSignal": lastCrashSignal,
"lastCrashUptimeSeconds": lastCrashUptimeSeconds,
"lastCrashClassification": lastCrashClassification,
"lastCrashLogTail": lastCrashLogTail,
"error": nil,
"config": cfg,
"workspaceRoot": workspaceRoot,
"serverRoot": serverRoot,
"runtimeInstallPath": runtimeInstallPath,
"runtimeInstalled": runtimeInstalled,
"devProvisioned": devProvisioned,
"devReadyAt": devReadyAt,
"codeServerInstalled": codeServerInstalled,
"codeServerRunning": codeServerRunning,
"timestamp": time.Now().Unix(),
}
if err := state.GetError(); err != nil {
@ -442,6 +512,75 @@ func handleStatus(w http.ResponseWriter, r *http.Request) {
_ = json.NewEncoder(w).Encode(resp)
}
func handleCodeServerStart(w http.ResponseWriter, r *http.Request) {
if r.Method != http.MethodPost {
http.Error(w, "POST only", http.StatusMethodNotAllowed)
return
}
cfg, err := requireDevContainer()
if err != nil {
http.Error(w, err.Error(), http.StatusBadRequest)
return
}
log.Printf("[http] vmid=%d action=codeserver_start status=requested", cfg.VMID)
if err := codeserver.Start(*cfg); err != nil {
log.Printf("[http] vmid=%d action=codeserver_start status=failed err=%v", cfg.VMID, err)
http.Error(w, "code-server start failed: "+err.Error(), http.StatusInternalServerError)
return
}
log.Printf("[http] vmid=%d action=codeserver_start status=ok", cfg.VMID)
w.Header().Set("Content-Type", "application/json")
_ = json.NewEncoder(w).Encode(map[string]any{"status": "ok", "running": codeserver.Running()})
}
func handleCodeServerStop(w http.ResponseWriter, r *http.Request) {
if r.Method != http.MethodPost {
http.Error(w, "POST only", http.StatusMethodNotAllowed)
return
}
cfg, err := requireDevContainer()
if err != nil {
http.Error(w, err.Error(), http.StatusBadRequest)
return
}
log.Printf("[http] vmid=%d action=codeserver_stop status=requested", cfg.VMID)
if err := codeserver.Stop(); err != nil {
log.Printf("[http] vmid=%d action=codeserver_stop status=failed err=%v", cfg.VMID, err)
http.Error(w, "code-server stop failed: "+err.Error(), http.StatusInternalServerError)
return
}
log.Printf("[http] vmid=%d action=codeserver_stop status=ok", cfg.VMID)
w.Header().Set("Content-Type", "application/json")
_ = json.NewEncoder(w).Encode(map[string]any{"status": "ok", "running": codeserver.Running()})
}
func handleCodeServerRestart(w http.ResponseWriter, r *http.Request) {
if r.Method != http.MethodPost {
http.Error(w, "POST only", http.StatusMethodNotAllowed)
return
}
cfg, err := requireDevContainer()
if err != nil {
http.Error(w, err.Error(), http.StatusBadRequest)
return
}
log.Printf("[http] vmid=%d action=codeserver_restart status=requested", cfg.VMID)
if err := codeserver.Restart(*cfg); err != nil {
log.Printf("[http] vmid=%d action=codeserver_restart status=failed err=%v", cfg.VMID, err)
http.Error(w, "code-server restart failed: "+err.Error(), http.StatusInternalServerError)
return
}
log.Printf("[http] vmid=%d action=codeserver_restart status=ok", cfg.VMID)
w.Header().Set("Content-Type", "application/json")
_ = json.NewEncoder(w).Encode(map[string]any{"status": "ok", "running": codeserver.Running()})
}
/*
--------------------------------------------------------------------------
/console/command
@ -624,6 +763,9 @@ func NewMux() *http.ServeMux {
m.HandleFunc("/stop", handleStop)
m.HandleFunc("/restart", handleRestart)
m.HandleFunc("/status", handleStatus)
m.HandleFunc("/dev/codeserver/start", handleCodeServerStart)
m.HandleFunc("/dev/codeserver/stop", handleCodeServerStop)
m.HandleFunc("/dev/codeserver/restart", handleCodeServerRestart)
m.HandleFunc("/console/command", handleSendCommand)
m.HandleFunc("/agent/update", handleAgentUpdate)
m.HandleFunc("/agent/update/status", handleAgentUpdateStatus)

View File

@ -2,21 +2,33 @@ package codeserver
import (
"fmt"
"path/filepath"
"strconv"
"zlh-agent/internal/provision/executil"
"zlh-agent/internal/state"
)
func Install(cfg state.Config) error {
scriptPath := filepath.Join(
executil.ScriptsRoot,
"addons",
"codeserver",
"install.sh",
)
func launchEnv(cfg state.Config) []string {
extraEnv := []string{
"CODE_SERVER_PORT=8080",
"CODE_SERVER_WORKSPACE=/home/dev/workspace",
}
if cfg.VMID != 0 {
extraEnv = append(
extraEnv,
"VMID="+strconv.Itoa(cfg.VMID),
)
}
return extraEnv
}
if err := executil.RunScript(scriptPath); err != nil {
func Install(cfg state.Config) error {
if Running() {
if err := Stop(); err != nil {
return fmt.Errorf("codeserver stop failed before install: %w", err)
}
}
if err := executil.RunEmbeddedScript("addons/codeserver/install.sh", launchEnv(cfg)...); err != nil {
return fmt.Errorf("codeserver install failed: %w", err)
}
return nil

View File

@ -4,19 +4,155 @@ import (
"fmt"
"os"
"os/exec"
"path/filepath"
"strconv"
"strings"
"syscall"
"time"
"zlh-agent/internal/provision/executil"
"zlh-agent/internal/state"
)
const marker = "/opt/zlh/.zlh/addons/code-server.installed"
const (
marker = "/opt/zlh/.zlh/addons/code-server.installed"
pidFile = "/opt/zlh/.zlh/addons/code-server.pid"
binPath = "/opt/zlh/services/code-server/bin/code-server"
)
func Installed() bool {
_, err := os.Stat(marker)
return err == nil
}
func Running() bool {
raw, err := os.ReadFile(pidFile)
if err == nil {
pid, convErr := strconv.Atoi(strings.TrimSpace(string(raw)))
if convErr == nil && pid > 0 {
process, findErr := os.FindProcess(pid)
if findErr == nil && process.Signal(syscall.Signal(0)) == nil {
return true
}
}
_ = os.Remove(pidFile)
}
pid, err := findRunningPID()
if err != nil || pid <= 0 {
return false
}
_ = os.WriteFile(pidFile, []byte(strconv.Itoa(pid)), 0o644)
return true
}
func Verify() error {
if _, err := os.Stat(marker); err != nil {
if !Installed() {
return fmt.Errorf("code-server addon marker missing")
}
if _, err := os.Stat(binPath); err != nil {
return fmt.Errorf("code-server binary missing at %s", binPath)
}
if _, err := exec.LookPath("code-server"); err != nil {
return fmt.Errorf("code-server binary not found in PATH")
}
return nil
}
func findRunningPID() (int, error) {
matches, err := filepath.Glob("/proc/[0-9]*/cmdline")
if err != nil {
return 0, err
}
for _, match := range matches {
raw, err := os.ReadFile(match)
if err != nil || len(raw) == 0 {
continue
}
cmdline := strings.ReplaceAll(string(raw), "\x00", " ")
if !strings.Contains(cmdline, "code-server") {
continue
}
if !strings.Contains(cmdline, "--bind-addr 0.0.0.0:8080") {
continue
}
pidStr := filepath.Base(filepath.Dir(match))
pid, err := strconv.Atoi(pidStr)
if err != nil || pid <= 0 {
continue
}
return pid, nil
}
return 0, fmt.Errorf("code-server process not found")
}
func Start(cfg state.Config) error {
if !Installed() {
return fmt.Errorf("code-server addon not installed")
}
if Running() {
if err := Stop(); err != nil {
return err
}
}
return executil.RunEmbeddedScript("addons/codeserver/install.sh", launchEnv(cfg)...)
}
func Stop() error {
raw, err := os.ReadFile(pidFile)
if err != nil {
if os.IsNotExist(err) {
return nil
}
return fmt.Errorf("read code-server pid: %w", err)
}
pid, err := strconv.Atoi(strings.TrimSpace(string(raw)))
if err != nil || pid <= 0 {
_ = os.Remove(pidFile)
return fmt.Errorf("invalid code-server pid")
}
process, err := os.FindProcess(pid)
if err != nil {
_ = os.Remove(pidFile)
return fmt.Errorf("find code-server process: %w", err)
}
if err := process.Signal(syscall.SIGTERM); err != nil && !errorsIsProcessDone(err) {
return fmt.Errorf("stop code-server: %w", err)
}
for i := 0; i < 20; i++ {
if !Running() {
_ = os.Remove(pidFile)
return nil
}
time.Sleep(250 * time.Millisecond)
}
if err := process.Signal(syscall.SIGKILL); err != nil && !errorsIsProcessDone(err) {
return fmt.Errorf("kill code-server: %w", err)
}
_ = os.Remove(pidFile)
return nil
}
func Restart(cfg state.Config) error {
if err := Stop(); err != nil {
return err
}
return Start(cfg)
}
func errorsIsProcessDone(err error) bool {
return err == os.ErrProcessDone || strings.Contains(strings.ToLower(err.Error()), "process already finished")
}

View File

@ -4,6 +4,7 @@ import (
"encoding/json"
"fmt"
"io"
"log"
"net/http"
"os"
"os/exec"
@ -55,6 +56,11 @@ type CatalogRuntime struct {
Versions []string `json:"versions"`
}
type ReadyInfo struct {
Runtime string `json:"runtime"`
ReadyAt string `json:"ready_at"`
}
// ReadyMarkerPath returns the absolute path to the ready marker file.
func ReadyMarkerPath() string {
return filepath.Join(MarkerDir, ReadyMarker)
@ -66,6 +72,19 @@ func IsProvisioned() bool {
return err == nil
}
func ReadReadyMarker() (*ReadyInfo, error) {
raw, err := os.ReadFile(ReadyMarkerPath())
if err != nil {
return nil, err
}
var info ReadyInfo
if err := json.Unmarshal(raw, &info); err != nil {
return nil, err
}
return &info, nil
}
// WriteReadyMarker records successful dev container provisioning.
// This should be called by runtime installers AFTER all install steps succeed.
func WriteReadyMarker(runtime string) error {
@ -94,6 +113,7 @@ func EnsureWorkspace() error {
func LoadCatalog() (*Catalog, error) {
url := provcommon.BuildArtifactURL(CatalogRelativePath)
log.Printf("[provision] action=load_catalog url=%s", url)
resp, err := (&http.Client{Timeout: 15 * time.Second}).Get(url)
if err != nil {
return nil, fmt.Errorf("fetch dev runtime catalog: %w", err)
@ -113,6 +133,7 @@ func LoadCatalog() (*Catalog, error) {
if err := json.Unmarshal(raw, &catalog); err != nil {
return nil, fmt.Errorf("parse dev runtime catalog: %w", err)
}
log.Printf("[provision] action=load_catalog status=ok runtimes=%d", len(catalog.Runtimes))
return &catalog, nil
}
@ -135,6 +156,7 @@ func ValidateRuntimeSelection(cfg state.Config) error {
if strings.EqualFold(runtime.ID, runtimeID) {
for _, candidate := range runtime.Versions {
if candidate == version {
log.Printf("[provision] action=validate_runtime runtime=%s version=%s status=ok", runtimeID, version)
return nil
}
}

View File

@ -2,6 +2,7 @@ package devcontainer
import (
"fmt"
"log"
"strings"
"zlh-agent/internal/state"
@ -14,10 +15,13 @@ import (
)
func Provision(cfg state.Config) error {
log.Printf("[provision] vmid=%d type=dev runtime=%s version=%s action=dev_provision status=begin", cfg.VMID, cfg.Runtime, cfg.Version)
if err := ValidateRuntimeSelection(cfg); err != nil {
log.Printf("[provision] vmid=%d type=dev runtime=%s version=%s action=dev_provision status=failed err=%v", cfg.VMID, cfg.Runtime, cfg.Version, err)
return err
}
if err := EnsureDevUserEnvironment(); err != nil {
log.Printf("[provision] vmid=%d type=dev runtime=%s version=%s action=ensure_dev_user status=failed err=%v", cfg.VMID, cfg.Runtime, cfg.Version, err)
return err
}
@ -40,10 +44,13 @@ func Provision(cfg state.Config) error {
}
if err != nil {
log.Printf("[provision] vmid=%d type=dev runtime=%s version=%s action=runtime_install status=failed err=%v", cfg.VMID, cfg.Runtime, cfg.Version, err)
return err
}
if err := WriteReadyMarker(runtime); err != nil {
log.Printf("[provision] vmid=%d type=dev runtime=%s version=%s action=write_ready_marker status=failed err=%v", cfg.VMID, cfg.Runtime, cfg.Version, err)
return fmt.Errorf("write dev ready marker: %w", err)
}
log.Printf("[provision] vmid=%d type=dev runtime=%s version=%s action=dev_provision status=ok", cfg.VMID, cfg.Runtime, cfg.Version)
return nil
}

View File

@ -3,6 +3,8 @@ package executil
import (
"bytes"
"fmt"
"io"
"log"
"os"
"os/exec"
"path"
@ -31,10 +33,20 @@ func RunEmbeddedScript(scriptPath string, extraEnv ...string) error {
}
// Match RunScript behavior (executil.go)
cmd.Stdout = os.Stdout
cmd.Stderr = os.Stderr
var buf bytes.Buffer
cmd.Stdout = io.MultiWriter(os.Stdout, &buf)
cmd.Stderr = io.MultiWriter(os.Stderr, &buf)
log.Printf("[provision] action=run_embedded_script path=%s", normalized)
return cmd.Run()
if err := cmd.Run(); err != nil {
log.Printf("[provision] action=run_embedded_script status=failed path=%s err=%v", normalized, err)
for _, line := range tailLogLines(buf.String(), 10) {
log.Printf("[provision] path=%s output=%s", normalized, line)
}
return err
}
log.Printf("[provision] action=run_embedded_script status=ok path=%s", normalized)
return nil
}
func loadEmbeddedPayload(normalized string) ([]byte, error) {

View File

@ -1,15 +1,45 @@
package executil
import (
"bytes"
"io"
"log"
"os"
"os/exec"
"strings"
)
const ScriptsRoot = "/opt/zlh-agent/scripts"
func RunScript(path string) error {
cmd := exec.Command("/bin/bash", path)
cmd.Stdout = os.Stdout
cmd.Stderr = os.Stderr
return cmd.Run()
var buf bytes.Buffer
cmd.Stdout = io.MultiWriter(os.Stdout, &buf)
cmd.Stderr = io.MultiWriter(os.Stderr, &buf)
log.Printf("[provision] action=run_script path=%s", path)
if err := cmd.Run(); err != nil {
log.Printf("[provision] action=run_script status=failed path=%s err=%v", path, err)
for _, line := range tailLogLines(buf.String(), 10) {
log.Printf("[provision] path=%s output=%s", path, line)
}
return err
}
log.Printf("[provision] action=run_script status=ok path=%s", path)
return nil
}
func tailLogLines(raw string, max int) []string {
lines := strings.Split(strings.ReplaceAll(raw, "\r\n", "\n"), "\n")
out := make([]string, 0, max)
for _, line := range lines {
line = strings.TrimSpace(line)
if line == "" {
continue
}
out = append(out, line)
}
if len(out) <= max {
return out
}
return out[len(out)-max:]
}

View File

@ -82,11 +82,12 @@ type agentStatus struct {
}
type CrashInfo struct {
Time time.Time `json:"time"`
ExitCode int `json:"exitCode"`
Signal int `json:"signal"`
UptimeSeconds int64 `json:"uptimeSeconds"`
LogTail []string `json:"logTail"`
Time time.Time `json:"time"`
ExitCode int `json:"exitCode"`
Signal int `json:"signal"`
UptimeSeconds int64 `json:"uptimeSeconds"`
Classification string `json:"classification"`
LogTail []string `json:"logTail"`
}
var global = &agentStatus{
@ -130,6 +131,12 @@ func GetCrashCount() int {
return global.crashCount
}
func ResetCrashCount() {
global.mu.Lock()
defer global.mu.Unlock()
global.crashCount = 0
}
func GetLastChange() time.Time {
global.mu.Lock()
defer global.mu.Unlock()

View File

@ -51,7 +51,6 @@ func InitAutoStart() {
if err == nil && cfg != nil {
log.Println("[autostart] config detected: boot-starting server")
_ = StartServer(cfg)
go monitorCrashes(cfg)
return
}
time.Sleep(3 * time.Second)

View File

@ -74,48 +74,16 @@ func StartServer(cfg *state.Config) error {
serverPTY = ptmx
serverStartTime = time.Now()
if state.GetState() != state.StateCrashed {
gameSupervisor.Reset()
}
state.ClearIntentionalStop()
state.SetState(state.StateRunning)
state.SetError(nil)
state.SetReadyState(false, "", "")
log.Printf("[process] vmid=%d server process started", cfg.VMID)
go func() {
err := cmd.Wait()
mu.Lock()
defer mu.Unlock()
if serverPTY != nil {
_ = serverPTY.Close()
}
if state.IsIntentionalStop() {
state.ClearIntentionalStop()
state.SetState(state.StateIdle)
state.SetReadyState(false, "", "")
log.Printf("[process] vmid=%d server exited after intentional stop", cfg.VMID)
} else if err != nil {
crashInfo := buildCrashInfo(cfg, err, serverStartTime)
state.SetLastCrash(crashInfo)
log.Printf("[process] server crashed vmid=%d exit_code=%d signal=%d uptime=%ds", cfg.VMID, crashInfo.ExitCode, crashInfo.Signal, crashInfo.UptimeSeconds)
if len(crashInfo.LogTail) > 0 {
log.Printf("[process] crash log tail:")
for _, line := range lastLines(crashInfo.LogTail, 20) {
log.Printf("[process] %s", line)
}
}
state.RecordCrash(err)
} else {
state.SetState(state.StateIdle)
state.SetReadyState(false, "", "")
log.Printf("[process] vmid=%d server exited cleanly", cfg.VMID)
}
serverCmd = nil
serverPTY = nil
serverStartTime = time.Time{}
}()
go gameSupervisor.Watch(cfg, cmd, ptmx, serverStartTime)
return nil
}
@ -370,19 +338,63 @@ func buildCrashInfo(cfg *state.Config, waitErr error, startedAt time.Time) *stat
}
}
logTail := tailLogLines(cfg, 40)
return &state.CrashInfo{
Time: time.Now().UTC(),
ExitCode: exitCode,
Signal: signal,
UptimeSeconds: uptime,
LogTail: tailLogLines(cfg, 40),
Time: time.Now().UTC(),
ExitCode: exitCode,
Signal: signal,
UptimeSeconds: uptime,
Classification: classifyCrash(waitErr, exitCode, signal, logTail),
LogTail: logTail,
}
}
func classifyCrash(waitErr error, exitCode, signal int, logTail []string) string {
errText := strings.ToLower(fmt.Sprint(waitErr))
for _, line := range logTail {
lower := strings.ToLower(line)
switch {
case strings.Contains(lower, "outofmemoryerror"),
strings.Contains(lower, "java heap space"),
strings.Contains(lower, "gc overhead limit exceeded"):
return "oom"
case strings.Contains(lower, "mixin"),
strings.Contains(lower, "failed to load mod"),
strings.Contains(lower, "loading errors encountered"),
strings.Contains(lower, "mod resolution encountered"),
strings.Contains(lower, "plugin"):
return "mod_or_plugin_error"
case strings.Contains(lower, "classnotfoundexception"),
strings.Contains(lower, "noclassdeffounderror"),
strings.Contains(lower, "could not find or load main class"),
strings.Contains(lower, "no such file or directory"):
return "missing_dependency"
}
}
switch {
case strings.Contains(errText, "ready"):
return "readiness_failure"
case signal == 9:
return "killed"
case signal != 0:
return "signaled_exit"
case exitCode != 0:
return "nonzero_exit"
default:
return "unexpected_exit"
}
}
func extractExitDetails(err error) (int, int) {
exitCode := -1
exitCode := 0
signal := 0
if err == nil {
return exitCode, signal
}
var exitErr *exec.ExitError
if !errors.As(err, &exitErr) {
return exitCode, signal

View File

@ -0,0 +1,126 @@
package system
import (
"fmt"
"log"
"os"
"os/exec"
"sync"
"time"
"zlh-agent/internal/state"
)
const (
supervisorStabilityWindow = 30 * time.Second
maxRestartAttempts = 3
)
var restartBackoffSchedule = []time.Duration{
30 * time.Second,
60 * time.Second,
120 * time.Second,
}
type processSupervisor struct {
mu sync.Mutex
restartAttempts int
}
var gameSupervisor = &processSupervisor{}
func (s *processSupervisor) Reset() {
s.mu.Lock()
defer s.mu.Unlock()
s.restartAttempts = 0
}
func (s *processSupervisor) nextAttempt(uptime time.Duration) int {
s.mu.Lock()
defer s.mu.Unlock()
if uptime >= supervisorStabilityWindow {
s.restartAttempts = 0
}
s.restartAttempts++
return s.restartAttempts
}
func (s *processSupervisor) Watch(cfg *state.Config, cmd *exec.Cmd, ptmx *os.File, startedAt time.Time) {
err := cmd.Wait()
uptime := time.Since(startedAt)
s.clearProcess(cmd, ptmx)
if state.IsIntentionalStop() {
state.ClearIntentionalStop()
state.SetState(state.StateIdle)
state.SetReadyState(false, "", "")
log.Printf("[process] vmid=%d server exited after intentional stop", cfg.VMID)
return
}
if uptime >= supervisorStabilityWindow {
state.ResetCrashCount()
}
crashInfo := buildCrashInfo(cfg, err, startedAt)
state.SetLastCrash(crashInfo)
log.Printf("[process] vmid=%d server crashed exit_code=%d signal=%d uptime=%ds", cfg.VMID, crashInfo.ExitCode, crashInfo.Signal, crashInfo.UptimeSeconds)
if len(crashInfo.LogTail) > 0 {
log.Printf("[process] vmid=%d crash log tail:", cfg.VMID)
for _, line := range lastLines(crashInfo.LogTail, 20) {
log.Printf("[process] vmid=%d %s", cfg.VMID, line)
}
}
recordErr := err
if recordErr == nil {
recordErr = fmt.Errorf("server exited unexpectedly")
}
state.RecordCrash(recordErr)
attempt := s.nextAttempt(uptime)
if attempt > maxRestartAttempts {
restartErr := fmt.Errorf("server crashed repeatedly")
state.SetError(restartErr)
state.SetState(state.StateError)
log.Printf("[process] vmid=%d restart limit reached attempts=%d", cfg.VMID, attempt-1)
return
}
delay := restartBackoffSchedule[attempt-1]
log.Printf("[process] vmid=%d restart attempt=%d delay=%s", cfg.VMID, attempt, delay)
timer := time.NewTimer(delay)
defer timer.Stop()
<-timer.C
if state.IsIntentionalStop() || state.GetState() == state.StateError {
return
}
if err := StartServer(cfg); err != nil {
state.SetError(err)
state.SetState(state.StateError)
log.Printf("[process] vmid=%d restart attempt=%d failed err=%v", cfg.VMID, attempt, err)
return
}
}
func (s *processSupervisor) clearProcess(cmd *exec.Cmd, ptmx *os.File) {
mu.Lock()
defer mu.Unlock()
if ptmx != nil {
_ = ptmx.Close()
}
if serverCmd == cmd {
serverCmd = nil
}
if serverPTY == ptmx {
serverPTY = nil
}
serverStartTime = time.Time{}
}

View File

@ -8,12 +8,14 @@ echo "[code-server] starting install"
# --------------------------------------------------
SERVICE_ROOT="/opt/zlh/services/code-server"
ZLH_ARTIFACT_BASE_URL="${ZLH_ARTIFACT_BASE_URL:-http://10.60.0.251:8080}"
ARTIFACT_URL="${ZLH_ARTIFACT_BASE_URL%/}/addons/code-server/code-server.zip"
ARTIFACT_TMP="/tmp/code-server.zip"
ARTIFACT_NAME="${ZLH_CODESERVER_ARTIFACT:-code-server.tar.gz}"
ARTIFACT_URL="${ZLH_ARTIFACT_BASE_URL%/}/addons/code-server/${ARTIFACT_NAME}"
ARTIFACT_TMP="/tmp/${ARTIFACT_NAME}"
MARKER="/opt/zlh/.zlh/addons/code-server.installed"
PID_FILE="/opt/zlh/.zlh/addons/code-server.pid"
LOG_FILE="/opt/zlh/logs/code-server.log"
WORKSPACE_DIR="/home/dev/workspace"
WORKSPACE_DIR="${CODE_SERVER_WORKSPACE:-/home/dev/workspace}"
PORT="${CODE_SERVER_PORT:-8080}"
BIN="${SERVICE_ROOT}/bin/code-server"
LINK_PATH="/usr/local/bin/code-server"
@ -21,7 +23,7 @@ mkdir -p "$(dirname "${MARKER}")"
mkdir -p "$(dirname "${LOG_FILE}")"
download_artifact() {
echo "[code-server] downloading ${ARTIFACT_URL}"
echo "[code-server] action=artifact_fetch step=download url=${ARTIFACT_URL} target=${ARTIFACT_TMP}"
if command -v curl >/dev/null 2>&1; then
curl -fL "${ARTIFACT_URL}" -o "${ARTIFACT_TMP}"
elif command -v wget >/dev/null 2>&1; then
@ -30,20 +32,33 @@ download_artifact() {
echo "[code-server][ERROR] curl or wget is required"
exit 1
fi
echo "[code-server] action=artifact_fetch step=download status=ok url=${ARTIFACT_URL} target=${ARTIFACT_TMP}"
}
extract_artifact() {
local tmp_dir
tmp_dir="$(mktemp -d)"
echo "[code-server] action=artifact_extract step=extract source=${ARTIFACT_TMP} target=${SERVICE_ROOT}"
if command -v unzip >/dev/null 2>&1; then
unzip -q "${ARTIFACT_TMP}" -d "${tmp_dir}"
elif command -v bsdtar >/dev/null 2>&1; then
bsdtar -xf "${ARTIFACT_TMP}" -C "${tmp_dir}"
else
echo "[code-server][ERROR] unzip or bsdtar is required"
exit 127
fi
case "${ARTIFACT_NAME}" in
*.tar.gz|*.tgz)
tar -xzf "${ARTIFACT_TMP}" -C "${tmp_dir}"
;;
*.zip)
if command -v unzip >/dev/null 2>&1; then
unzip -q "${ARTIFACT_TMP}" -d "${tmp_dir}"
elif command -v bsdtar >/dev/null 2>&1; then
bsdtar -xf "${ARTIFACT_TMP}" -C "${tmp_dir}"
else
echo "[code-server][ERROR] unzip or bsdtar is required for zip artifacts"
exit 127
fi
;;
*)
echo "[code-server][ERROR] unsupported artifact format: ${ARTIFACT_NAME}"
exit 1
;;
esac
EXTRACTED_DIR="$(find "${tmp_dir}" -maxdepth 1 -type d -name 'code-server*' | head -n1)"
if [ -z "${EXTRACTED_DIR}" ]; then
@ -53,17 +68,26 @@ extract_artifact() {
mv "${EXTRACTED_DIR}"/* "${SERVICE_ROOT}/"
rm -rf "${tmp_dir}"
echo "[code-server] action=artifact_extract step=extract status=ok source=${ARTIFACT_TMP} target=${SERVICE_ROOT}"
}
validate_artifact_shape() {
if [ ! -f "${BIN}" ]; then
echo "[code-server][ERROR] artifact missing expected binary path ${BIN}; expected a runnable release archive, got an incompatible archive"
exit 1
fi
}
# --------------------------------------------------
# Idempotency
# --------------------------------------------------
if [ ! -x "${BIN}" ]; then
echo "[code-server] action=artifact_install target=${SERVICE_ROOT} status=begin"
download_artifact
echo "[code-server] extracting ${ARTIFACT_TMP}"
rm -rf "${SERVICE_ROOT}"
mkdir -p "${SERVICE_ROOT}"
extract_artifact
validate_artifact_shape
chmod +x "${BIN}"
ln -sfn "${BIN}" "${LINK_PATH}"
fi
@ -74,11 +98,13 @@ if [ -f "${PID_FILE}" ] && kill -0 "$(cat "${PID_FILE}")" 2>/dev/null; then
echo "[code-server] already running"
else
rm -f "${PID_FILE}"
nohup "${BIN}" --bind-addr 0.0.0.0:8080 "${WORKSPACE_DIR}" >"${LOG_FILE}" 2>&1 &
echo "[code-server] action=service_launch command=\"${BIN} --bind-addr 0.0.0.0:${PORT} --auth none --disable-telemetry ${WORKSPACE_DIR}\""
HOME="/home/dev" USER="dev" LOGNAME="dev" \
nohup "${BIN}" --bind-addr "0.0.0.0:${PORT}" --auth none --disable-telemetry "${WORKSPACE_DIR}" >"${LOG_FILE}" 2>&1 &
echo $! > "${PID_FILE}"
fi
touch "${MARKER}"
rm -f "${ARTIFACT_TMP}"
echo "[code-server] install complete"
echo "[code-server] action=artifact_install target=${SERVICE_ROOT} status=ok"

View File

@ -21,7 +21,7 @@ fail() {
}
download_installer() {
log "Downloading ${INSTALLER_URL}"
log "action=artifact_fetch step=download_installer url=${INSTALLER_URL} target=${INSTALLER_TMP}"
if command -v curl >/dev/null 2>&1; then
curl -fL "${INSTALLER_URL}" -o "${INSTALLER_TMP}"
elif command -v wget >/dev/null 2>&1; then
@ -30,16 +30,17 @@ download_installer() {
fail "curl or wget is required"
fi
chmod +x "${INSTALLER_TMP}"
log "action=artifact_fetch step=download_installer status=ok url=${INSTALLER_URL} target=${INSTALLER_TMP}"
}
mkdir -p "${RUNTIME_ROOT}"
if [[ -d "${DEST_DIR}" ]]; then
log "Version already installed at ${DEST_DIR}"
log "action=artifact_install runtime=${RUNTIME} version=${RUNTIME_VERSION} target=${DEST_DIR} status=already_installed"
else
mkdir -p "${DEST_DIR}"
download_installer
log "Installing dotnet ${RUNTIME_VERSION} into ${DEST_DIR}"
log "action=artifact_install runtime=${RUNTIME} version=${RUNTIME_VERSION} target=${DEST_DIR} step=install status=begin"
bash "${INSTALLER_TMP}" --channel "${RUNTIME_VERSION}" --install-dir "${DEST_DIR}"
fi
@ -51,4 +52,4 @@ chmod +x /etc/profile.d/zlh-dotnet.sh
chmod -R 755 "${DEST_DIR}"
rm -f "${INSTALLER_TMP}"
log "Install complete"
log "action=artifact_install runtime=${RUNTIME} version=${RUNTIME_VERSION} target=${DEST_DIR} status=ok"

View File

@ -59,7 +59,7 @@ download_artifact() {
url="$(artifact_url)"
out="/tmp/$(artifact_name)"
log "Downloading ${url}"
log "action=artifact_fetch step=download url=${url} target=${out}"
if command -v curl >/dev/null 2>&1; then
curl -fL "${url}" -o "${out}"
elif command -v wget >/dev/null 2>&1; then
@ -67,13 +67,14 @@ download_artifact() {
else
fail "curl or wget is required"
fi
log "action=artifact_fetch step=download status=ok url=${url} target=${out}"
}
extract_artifact() {
local out
out="/tmp/$(artifact_name)"
log "Extracting to ${DEST_DIR}"
log "action=artifact_extract step=extract source=${out} target=${DEST_DIR}"
mkdir -p "${DEST_DIR}"
case "${ARCHIVE_EXT}" in
@ -84,6 +85,7 @@ extract_artifact() {
fail "Unsupported archive type: ${ARCHIVE_EXT}"
;;
esac
log "action=artifact_extract step=extract status=ok source=${out} target=${DEST_DIR}"
}
############################################
@ -132,12 +134,12 @@ ensure_ssh_host_keys() {
############################################
install_runtime() {
log "Installing ${RUNTIME} ${RUNTIME_VERSION}"
log "action=artifact_install runtime=${RUNTIME} version=${RUNTIME_VERSION} target=${DEST_DIR} status=begin"
mkdir -p "${RUNTIME_ROOT}"
if [[ -d "${DEST_DIR}" ]]; then
log "Version already installed at ${DEST_DIR}"
log "action=artifact_install runtime=${RUNTIME} version=${RUNTIME_VERSION} target=${DEST_DIR} status=already_installed"
else
download_artifact
extract_artifact
@ -149,5 +151,5 @@ install_runtime() {
ensure_ssh_host_keys
log "Install complete"
log "action=artifact_install runtime=${RUNTIME} version=${RUNTIME_VERSION} target=${DEST_DIR} status=ok"
}