zlh-agent/internal/system/process.go
2026-03-15 11:06:08 +00:00

422 lines
9.1 KiB
Go
Executable File

package system
import (
"bytes"
"errors"
"fmt"
"log"
"os"
"os/exec"
"os/user"
"path/filepath"
"strconv"
"strings"
"sync"
"syscall"
"time"
"zlh-agent/internal/provision"
"zlh-agent/internal/provision/devcontainer"
"zlh-agent/internal/runtime"
"zlh-agent/internal/state"
)
/* --------------------------------------------------------------------------
GLOBAL PROCESS STATE
----------------------------------------------------------------------------*/
var (
mu sync.Mutex
serverCmd *exec.Cmd
serverPTY *os.File
serverStartTime time.Time
devCmd *exec.Cmd
devPTY *os.File
)
func GetServerPID() (int, bool) {
mu.Lock()
defer mu.Unlock()
if serverCmd == nil || serverCmd.Process == nil {
return 0, false
}
return serverCmd.Process.Pid, true
}
/* --------------------------------------------------------------------------
StartServer (fixed)
----------------------------------------------------------------------------*/
func StartServer(cfg *state.Config) error {
mu.Lock()
defer mu.Unlock()
// Already running?
if serverCmd != nil {
return fmt.Errorf("server already running")
}
dir := provision.ServerDir(*cfg)
startScript := filepath.Join(dir, "start.sh")
log.Printf("[process] vmid=%d server start requested dir=%s", cfg.VMID, dir)
cmd := exec.Command("/bin/bash", startScript)
cmd.Dir = dir
ptmx, err := runtime.CreatePTY(cmd)
if err != nil {
return fmt.Errorf("start server: %w", err)
}
serverCmd = cmd
serverPTY = ptmx
serverStartTime = time.Now()
state.ClearIntentionalStop()
state.SetState(state.StateRunning)
state.SetError(nil)
state.SetReadyState(false, "", "")
log.Printf("[process] vmid=%d server process started", cfg.VMID)
go func() {
err := cmd.Wait()
mu.Lock()
defer mu.Unlock()
if serverPTY != nil {
_ = serverPTY.Close()
}
if state.IsIntentionalStop() {
state.ClearIntentionalStop()
state.SetState(state.StateIdle)
state.SetReadyState(false, "", "")
log.Printf("[process] vmid=%d server exited after intentional stop", cfg.VMID)
} else if err != nil {
crashInfo := buildCrashInfo(cfg, err, serverStartTime)
state.SetLastCrash(crashInfo)
log.Printf("[process] server crashed vmid=%d exit_code=%d signal=%d uptime=%ds", cfg.VMID, crashInfo.ExitCode, crashInfo.Signal, crashInfo.UptimeSeconds)
if len(crashInfo.LogTail) > 0 {
log.Printf("[process] crash log tail:")
for _, line := range lastLines(crashInfo.LogTail, 20) {
log.Printf("[process] %s", line)
}
}
state.RecordCrash(err)
} else {
state.SetState(state.StateIdle)
state.SetReadyState(false, "", "")
log.Printf("[process] vmid=%d server exited cleanly", cfg.VMID)
}
serverCmd = nil
serverPTY = nil
serverStartTime = time.Time{}
}()
return nil
}
/* --------------------------------------------------------------------------
StopServer
----------------------------------------------------------------------------*/
func StopServer() error {
mu.Lock()
defer mu.Unlock()
if serverCmd == nil {
return fmt.Errorf("server not running")
}
cfg, _ := state.LoadConfig()
if cfg != nil {
log.Printf("[process] vmid=%d stop requested", cfg.VMID)
} else {
log.Printf("[process] stop requested")
}
state.SetState(state.StateStopping)
state.MarkIntentionalStop()
state.SetReadyState(false, "", "")
// Try graceful stop
if serverPTY != nil {
_ = runtime.Write(serverPTY, []byte("save-all\n"))
time.Sleep(2 * time.Second)
_ = runtime.Write(serverPTY, []byte("stop\n"))
}
// Wait a moment
time.Sleep(4 * time.Second)
// If still running, force kill
if serverCmd.Process != nil {
_ = serverCmd.Process.Kill()
}
return nil
}
func WaitForServerExit(timeout time.Duration) error {
deadline := time.Now().Add(timeout)
for {
mu.Lock()
running := serverCmd != nil
mu.Unlock()
if !running {
return nil
}
if time.Now().After(deadline) {
return fmt.Errorf("timeout waiting for server process to exit")
}
time.Sleep(200 * time.Millisecond)
}
}
/* --------------------------------------------------------------------------
RestartServer
----------------------------------------------------------------------------*/
func RestartServer(cfg *state.Config) error {
if err := StopServer(); err != nil {
// ignore if not running
}
return StartServer(cfg)
}
/* --------------------------------------------------------------------------
SendConsoleCommand
----------------------------------------------------------------------------*/
func SendConsoleCommand(cmd string) error {
mu.Lock()
defer mu.Unlock()
if serverPTY == nil {
return fmt.Errorf("server console not available")
}
return runtime.Write(serverPTY, []byte(cmd+"\n"))
}
/* --------------------------------------------------------------------------
Dev Shell PTY
----------------------------------------------------------------------------*/
func StartDevShell() (*os.File, error) {
mu.Lock()
defer mu.Unlock()
if devPTY != nil && devCmd != nil {
return devPTY, nil
}
if err := devcontainer.EnsureDevUserEnvironment(); err != nil {
return nil, fmt.Errorf("prepare dev environment: %w", err)
}
shell := "/bin/bash"
if _, err := os.Stat(shell); err != nil {
shell = "/bin/sh"
}
var cmd *exec.Cmd
if shell == "/bin/bash" {
cmd = exec.Command(shell, "-l", "-i")
} else {
cmd = exec.Command(shell, "-i")
}
cmd.Dir = devcontainer.WorkspaceDir
devUser, err := user.Lookup(devcontainer.DevUser)
if err != nil {
return nil, fmt.Errorf("lookup dev user: %w", err)
}
uid, err := strconv.Atoi(devUser.Uid)
if err != nil {
return nil, fmt.Errorf("parse dev uid: %w", err)
}
gid, err := strconv.Atoi(devUser.Gid)
if err != nil {
return nil, fmt.Errorf("parse dev gid: %w", err)
}
cmd.Env = append(os.Environ(),
"HOME="+devcontainer.DevHome,
"USER="+devcontainer.DevUser,
"LOGNAME="+devcontainer.DevUser,
"TERM=xterm-256color",
)
cmd.SysProcAttr = &syscall.SysProcAttr{
Credential: &syscall.Credential{
Uid: uint32(uid),
Gid: uint32(gid),
},
}
ptmx, err := runtime.CreatePTY(cmd)
if err != nil {
return nil, fmt.Errorf("start dev shell: %w", err)
}
devCmd = cmd
devPTY = ptmx
state.SetState(state.StateRunning)
state.SetError(nil)
go func() {
err := cmd.Wait()
mu.Lock()
defer mu.Unlock()
if devPTY != nil {
_ = devPTY.Close()
}
if err != nil {
state.RecordCrash(err)
} else {
state.SetState(state.StateIdle)
}
devCmd = nil
devPTY = nil
}()
return devPTY, nil
}
func GetConsolePTY(cfg *state.Config) (*os.File, error) {
if cfg.ContainerType == "dev" {
return StartDevShell()
}
mu.Lock()
defer mu.Unlock()
if serverPTY == nil {
return nil, fmt.Errorf("server console not available")
}
return serverPTY, nil
}
func WriteConsoleInput(cfg *state.Config, input string) error {
if strings.HasSuffix(input, "\n") {
input = strings.TrimSuffix(input, "\n")
}
payload := []byte(input + "\n")
if cfg.ContainerType == "dev" {
mu.Lock()
defer mu.Unlock()
if devPTY == nil {
return fmt.Errorf("dev shell not available")
}
return runtime.Write(devPTY, payload)
}
mu.Lock()
defer mu.Unlock()
if serverPTY == nil {
return fmt.Errorf("server console not available")
}
return runtime.Write(serverPTY, payload)
}
/* --------------------------------------------------------------------------
Stop Dev Shell
----------------------------------------------------------------------------*/
func StopDevShell() error {
mu.Lock()
defer mu.Unlock()
if devCmd == nil {
return nil
}
if devPTY != nil {
_ = runtime.Write(devPTY, []byte("exit\n"))
}
time.Sleep(1 * time.Second)
if devCmd.Process != nil {
_ = devCmd.Process.Kill()
}
if devPTY != nil {
_ = devPTY.Close()
devPTY = nil
}
devCmd = nil
state.SetState(state.StateIdle)
return nil
}
func buildCrashInfo(cfg *state.Config, waitErr error, startedAt time.Time) *state.CrashInfo {
exitCode, signal := extractExitDetails(waitErr)
uptime := int64(0)
if !startedAt.IsZero() {
uptime = int64(time.Since(startedAt).Seconds())
if uptime < 0 {
uptime = 0
}
}
return &state.CrashInfo{
Time: time.Now().UTC(),
ExitCode: exitCode,
Signal: signal,
UptimeSeconds: uptime,
LogTail: tailLogLines(cfg, 40),
}
}
func extractExitDetails(err error) (int, int) {
exitCode := -1
signal := 0
var exitErr *exec.ExitError
if !errors.As(err, &exitErr) {
return exitCode, signal
}
exitCode = exitErr.ExitCode()
if status, ok := exitErr.Sys().(syscall.WaitStatus); ok && status.Signaled() {
signal = int(status.Signal())
}
return exitCode, signal
}
func tailLogLines(cfg *state.Config, maxLines int) []string {
buf, err := TailLatestLog(cfg, 64*1024)
if err != nil || len(buf) == 0 {
return nil
}
rawLines := bytes.Split(buf, []byte{'\n'})
lines := make([]string, 0, len(rawLines))
for _, raw := range rawLines {
line := strings.TrimSpace(string(raw))
if line == "" {
continue
}
lines = append(lines, line)
}
return lastLines(lines, maxLines)
}
func lastLines(lines []string, maxLines int) []string {
if len(lines) <= maxLines {
return append([]string(nil), lines...)
}
return append([]string(nil), lines[len(lines)-maxLines:]...)
}