zlh-agent/internal/system/process.go
2026-04-19 21:51:00 +00:00

565 lines
13 KiB
Go
Executable File

package system
import (
"bytes"
"errors"
"fmt"
"log"
"os"
"os/exec"
"os/user"
"path/filepath"
"strconv"
"strings"
"sync"
"syscall"
"time"
mcstatus "zlh-agent/internal/minecraft"
"zlh-agent/internal/provision"
"zlh-agent/internal/provision/devcontainer"
"zlh-agent/internal/runtime"
"zlh-agent/internal/state"
)
const ReadinessTimeout = 60 * time.Second
const ForgeReadinessTimeout = 5 * time.Minute
/* --------------------------------------------------------------------------
GLOBAL PROCESS STATE
----------------------------------------------------------------------------*/
var (
mu sync.Mutex
serverCmd *exec.Cmd
serverPTY *os.File
serverStartTime time.Time
devCmd *exec.Cmd
devPTY *os.File
)
func GetServerPID() (int, bool) {
mu.Lock()
defer mu.Unlock()
if serverCmd == nil || serverCmd.Process == nil {
return 0, false
}
return serverCmd.Process.Pid, true
}
/* --------------------------------------------------------------------------
StartServer (fixed)
----------------------------------------------------------------------------*/
func StartServer(cfg *state.Config) error {
mu.Lock()
defer mu.Unlock()
// Already running?
if serverCmd != nil {
return fmt.Errorf("server already running")
}
dir := provision.ServerDir(*cfg)
startScript := filepath.Join(dir, "start.sh")
log.Printf("[process] vmid=%d server start requested dir=%s", cfg.VMID, dir)
cmd, err := buildServerCommand(cfg, dir, startScript)
if err != nil {
return err
}
ptmx, err := runtime.CreatePTY(cmd)
if err != nil {
return fmt.Errorf("start server: %w", err)
}
serverCmd = cmd
serverPTY = ptmx
serverStartTime = time.Now()
if state.GetState() != state.StateCrashed {
gameSupervisor.Reset()
}
state.ClearIntentionalStop()
state.SetState(state.StateRunning)
state.SetError(nil)
state.SetReadyState(false, "", "")
log.Printf("[process] vmid=%d server process started", cfg.VMID)
go gameSupervisor.Watch(cfg, cmd, ptmx, serverStartTime)
return nil
}
func StartServerReady(cfg *state.Config) error {
if err := StartServer(cfg); err != nil {
return err
}
if err := WaitForReady(cfg, readinessTimeoutForConfig(cfg)); err != nil {
state.SetError(err)
state.SetState(state.StateError)
return err
}
return nil
}
func readinessTimeoutForConfig(cfg *state.Config) time.Duration {
if cfg == nil {
return ReadinessTimeout
}
variant := strings.ToLower(strings.TrimSpace(cfg.Variant))
if strings.EqualFold(cfg.ContainerType, "game") &&
strings.EqualFold(cfg.Game, "minecraft") &&
(variant == "forge" || variant == "neoforge") {
return ForgeReadinessTimeout
}
return ReadinessTimeout
}
func WaitForReady(cfg *state.Config, timeout time.Duration) error {
if cfg == nil {
return fmt.Errorf("config required")
}
if !strings.EqualFold(cfg.ContainerType, "game") || !strings.EqualFold(cfg.Game, "minecraft") {
return nil
}
if err := mcstatus.WaitUntilReady(*cfg, timeout, 3*time.Second); err != nil {
state.SetReadyState(false, "minecraft_ping", err.Error())
return err
}
state.SetReadyState(true, "minecraft_ping", "")
return nil
}
func buildServerCommand(cfg *state.Config, dir, startScript string) (*exec.Cmd, error) {
variant := strings.ToLower(strings.TrimSpace(cfg.Variant))
if variant == "forge" || variant == "neoforge" {
cmd := exec.Command("/bin/bash", startScript)
cmd.Dir = dir
return cmd, nil
}
if strings.EqualFold(cfg.Game, "minecraft") {
runtimeType := provision.MinecraftRuntime(*cfg)
jarPath, err := resolveMinecraftRuntimeJar(runtimeType, dir)
if err != nil {
return nil, err
}
xms, xmx := minecraftHeap(cfg.MemoryMB)
javaBin := filepath.Join(provision.JavaRoot, "java")
cmd := exec.Command(javaBin,
fmt.Sprintf("-Xms%dM", xms),
fmt.Sprintf("-Xmx%dM", xmx),
"-jar", jarPath, "nogui",
)
cmd.Dir = dir
return cmd, nil
}
cmd := exec.Command("/bin/bash", startScript)
cmd.Dir = dir
return cmd, nil
}
func resolveMinecraftRuntimeJar(runtimeType, dir string) (string, error) {
if runtimeType == "fabric" {
log.Printf("[runtime] type=fabric jar=server.jar")
return filepath.Join(dir, "server.jar"), nil
}
log.Printf("[runtime] type=%s jar=server.jar", runtimeType)
return filepath.Join(dir, "server.jar"), nil
}
func minecraftHeap(memoryMB int) (xms int, xmx int) {
mem := memoryMB
if mem <= 0 {
mem = 2048
}
xmx = mem
xms = max(mem/2, 512)
if xms > xmx {
xms = xmx
}
return xms, xmx
}
/* --------------------------------------------------------------------------
StopServer
----------------------------------------------------------------------------*/
func StopServer() error {
mu.Lock()
defer mu.Unlock()
if serverCmd == nil {
return fmt.Errorf("server not running")
}
cfg, _ := state.LoadConfig()
if cfg != nil {
log.Printf("[process] vmid=%d stop requested", cfg.VMID)
} else {
log.Printf("[process] stop requested")
}
state.SetState(state.StateStopping)
state.MarkIntentionalStop()
state.SetReadyState(false, "", "")
// Try graceful stop
if serverPTY != nil {
_ = runtime.Write(serverPTY, []byte("save-all flush\n"))
time.Sleep(2 * time.Second)
_ = runtime.Write(serverPTY, []byte("stop\n"))
}
// Wait a moment
time.Sleep(4 * time.Second)
// If still running, force kill
if serverCmd.Process != nil {
_ = serverCmd.Process.Kill()
}
return nil
}
func WaitForServerExit(timeout time.Duration) error {
if timeout <= 0 {
return fmt.Errorf("timeout waiting for server process to exit")
}
deadline := time.Now().Add(timeout)
for {
mu.Lock()
running := serverCmd != nil
mu.Unlock()
if !running {
return nil
}
if time.Now().After(deadline) {
return fmt.Errorf("timeout waiting for server process to exit")
}
time.Sleep(200 * time.Millisecond)
}
}
/* --------------------------------------------------------------------------
RestartServer
----------------------------------------------------------------------------*/
func RestartServer(cfg *state.Config) error {
if err := StopServer(); err != nil {
// ignore if not running
}
if err := WaitForServerExit(20 * time.Second); err != nil {
return err
}
return StartServerReady(cfg)
}
func StopServerAndWait(timeout time.Duration) error {
if err := StopServer(); err != nil {
return err
}
return WaitForServerExit(timeout)
}
/* --------------------------------------------------------------------------
SendConsoleCommand
----------------------------------------------------------------------------*/
func SendConsoleCommand(cmd string) error {
mu.Lock()
defer mu.Unlock()
if serverPTY == nil {
return fmt.Errorf("server console not available")
}
return runtime.Write(serverPTY, []byte(cmd+"\n"))
}
func RunMinecraftSaveOff() error {
if err := SendConsoleCommand("save-all flush"); err != nil {
return err
}
time.Sleep(2 * time.Second)
if err := SendConsoleCommand("save-off"); err != nil {
return err
}
time.Sleep(500 * time.Millisecond)
return nil
}
func RunMinecraftSaveOn() error {
if err := SendConsoleCommand("save-on"); err != nil {
return err
}
time.Sleep(500 * time.Millisecond)
return nil
}
/* --------------------------------------------------------------------------
Dev Shell PTY
----------------------------------------------------------------------------*/
func StartDevShell() (*os.File, error) {
mu.Lock()
defer mu.Unlock()
if devPTY != nil && devCmd != nil {
return devPTY, nil
}
if err := devcontainer.EnsureDevUserEnvironment(); err != nil {
return nil, fmt.Errorf("prepare dev environment: %w", err)
}
shell := "/bin/bash"
if _, err := os.Stat(shell); err != nil {
shell = "/bin/sh"
}
var cmd *exec.Cmd
if shell == "/bin/bash" {
cmd = exec.Command(shell, "-l", "-i")
} else {
cmd = exec.Command(shell, "-i")
}
cmd.Dir = devcontainer.WorkspaceDir
devUser, err := user.Lookup(devcontainer.DevUser)
if err != nil {
return nil, fmt.Errorf("lookup dev user: %w", err)
}
uid, err := strconv.Atoi(devUser.Uid)
if err != nil {
return nil, fmt.Errorf("parse dev uid: %w", err)
}
gid, err := strconv.Atoi(devUser.Gid)
if err != nil {
return nil, fmt.Errorf("parse dev gid: %w", err)
}
cmd.Env = append(os.Environ(),
"HOME="+devcontainer.DevHome,
"USER="+devcontainer.DevUser,
"LOGNAME="+devcontainer.DevUser,
"TERM=xterm-256color",
)
cmd.SysProcAttr = &syscall.SysProcAttr{
Credential: &syscall.Credential{
Uid: uint32(uid),
Gid: uint32(gid),
},
}
ptmx, err := runtime.CreatePTY(cmd)
if err != nil {
return nil, fmt.Errorf("start dev shell: %w", err)
}
devCmd = cmd
devPTY = ptmx
state.SetState(state.StateRunning)
state.SetError(nil)
go func() {
err := cmd.Wait()
mu.Lock()
defer mu.Unlock()
if devPTY != nil {
_ = devPTY.Close()
}
if err != nil {
state.RecordCrash(err)
} else {
state.SetState(state.StateIdle)
}
devCmd = nil
devPTY = nil
}()
return devPTY, nil
}
func GetConsolePTY(cfg *state.Config) (*os.File, error) {
if cfg.ContainerType == "dev" {
return StartDevShell()
}
mu.Lock()
defer mu.Unlock()
if serverPTY == nil {
return nil, fmt.Errorf("server console not available")
}
return serverPTY, nil
}
func WriteConsoleInput(cfg *state.Config, input string) error {
if before, ok := strings.CutSuffix(input, "\n"); ok {
input = before
}
payload := []byte(input + "\n")
if cfg.ContainerType == "dev" {
mu.Lock()
defer mu.Unlock()
if devPTY == nil {
return fmt.Errorf("dev shell not available")
}
return runtime.Write(devPTY, payload)
}
mu.Lock()
defer mu.Unlock()
if serverPTY == nil {
return fmt.Errorf("server console not available")
}
return runtime.Write(serverPTY, payload)
}
/* --------------------------------------------------------------------------
Stop Dev Shell
----------------------------------------------------------------------------*/
func StopDevShell() error {
mu.Lock()
defer mu.Unlock()
if devCmd == nil {
return nil
}
if devPTY != nil {
_ = runtime.Write(devPTY, []byte("exit\n"))
}
time.Sleep(1 * time.Second)
if devCmd.Process != nil {
_ = devCmd.Process.Kill()
}
if devPTY != nil {
_ = devPTY.Close()
devPTY = nil
}
devCmd = nil
state.SetState(state.StateIdle)
return nil
}
func buildCrashInfo(cfg *state.Config, waitErr error, startedAt time.Time) *state.CrashInfo {
exitCode, signal := extractExitDetails(waitErr)
uptime := int64(0)
if !startedAt.IsZero() {
uptime = max(int64(time.Since(startedAt).Seconds()), 0)
}
logTail := tailLogLines(cfg, 40)
return &state.CrashInfo{
Time: time.Now().UTC(),
ExitCode: exitCode,
Signal: signal,
UptimeSeconds: uptime,
Classification: classifyCrash(waitErr, exitCode, signal, logTail),
LogTail: logTail,
}
}
func classifyCrash(waitErr error, exitCode, signal int, logTail []string) string {
errText := strings.ToLower(fmt.Sprint(waitErr))
for _, line := range logTail {
lower := strings.ToLower(line)
switch {
case strings.Contains(lower, "outofmemoryerror"),
strings.Contains(lower, "java heap space"),
strings.Contains(lower, "gc overhead limit exceeded"):
return "oom"
case strings.Contains(lower, "mixin"),
strings.Contains(lower, "failed to load mod"),
strings.Contains(lower, "loading errors encountered"),
strings.Contains(lower, "mod resolution encountered"),
strings.Contains(lower, "plugin"):
return "mod_or_plugin_error"
case strings.Contains(lower, "classnotfoundexception"),
strings.Contains(lower, "noclassdeffounderror"),
strings.Contains(lower, "could not find or load main class"),
strings.Contains(lower, "no such file or directory"):
return "missing_dependency"
}
}
switch {
case strings.Contains(errText, "ready"):
return "readiness_failure"
case signal == 9:
return "killed"
case signal != 0:
return "signaled_exit"
case exitCode != 0:
return "nonzero_exit"
default:
return "unexpected_exit"
}
}
func extractExitDetails(err error) (int, int) {
exitCode := 0
signal := 0
if err == nil {
return exitCode, signal
}
var exitErr *exec.ExitError
if !errors.As(err, &exitErr) {
return exitCode, signal
}
exitCode = exitErr.ExitCode()
if status, ok := exitErr.Sys().(syscall.WaitStatus); ok && status.Signaled() {
signal = int(status.Signal())
}
return exitCode, signal
}
func tailLogLines(cfg *state.Config, maxLines int) []string {
buf, err := TailLatestLog(cfg, 64*1024)
if err != nil || len(buf) == 0 {
return nil
}
rawLines := bytes.Split(buf, []byte{'\n'})
lines := make([]string, 0, len(rawLines))
for _, raw := range rawLines {
line := strings.TrimSpace(string(raw))
if line == "" {
continue
}
lines = append(lines, line)
}
return lastLines(lines, maxLines)
}
func lastLines(lines []string, maxLines int) []string {
if len(lines) <= maxLines {
return append([]string(nil), lines...)
}
return append([]string(nil), lines[len(lines)-maxLines:]...)
}