diff --git a/internal/alloy/alloy.go b/internal/alloy/alloy.go new file mode 100644 index 0000000..2163529 --- /dev/null +++ b/internal/alloy/alloy.go @@ -0,0 +1,360 @@ +package alloy + +import ( + "bytes" + "errors" + "fmt" + "log" + "net" + "os" + "os/exec" + "sort" + "strconv" + "strings" + "time" + + "zlh-agent/internal/state" +) + +const ( + ConfigPath = "/etc/alloy/config.alloy" + tmpConfigPath = "/etc/alloy/config.alloy.tmp" + metricsPort = "12345" + unixJob = "integrations/unix" + collector = "alloy" + alloyServiceName = "alloy" + updateAttempts = 3 + retryDelay = time.Second +) + +type Result struct { + Applied bool + Labels map[string]string +} + +var ( + writeFile = os.WriteFile + readFile = os.ReadFile + removeFile = os.Remove + renameFile = os.Rename + localIPFunc = localIPv4 + runCommand = runSystemCommand + dialTimeout = net.DialTimeout + sleepFunc = time.Sleep +) + +func EnsureConfig(cfg state.Config) (Result, error) { + var last Result + var lastErr error + + for attempt := 1; attempt <= updateAttempts; attempt++ { + result, err := ensureConfigOnce(cfg, attempt, last.Applied) + if err == nil { + return result, nil + } + last = result + lastErr = err + log.Printf("[alloy] vmid=%d action=update status=failed attempt=%d/%d err=%v", cfg.VMID, attempt, updateAttempts, err) + if attempt < updateAttempts { + sleepFunc(retryDelay) + } + } + + return last, lastErr +} + +func ensureConfigOnce(cfg state.Config, attempt int, forceRestart bool) (Result, error) { + log.Printf("[alloy] vmid=%d action=render status=attempt type=%s", cfg.VMID, cfg.ContainerType) + + if !managedContainerType(cfg.ContainerType) { + log.Printf("[alloy] vmid=%d action=render status=skipped type=%s", cfg.VMID, cfg.ContainerType) + return Result{}, nil + } + + labels, err := Labels(cfg) + if err != nil { + log.Printf("[alloy] vmid=%d action=render status=failed err=%v", cfg.VMID, err) + return Result{}, err + } + log.Printf("[alloy] vmid=%d action=render status=labels labels=%s", cfg.VMID, formatLabels(labels)) + + existing, err := readFile(ConfigPath) + if err != nil { + return Result{Labels: labels}, fmt.Errorf("read alloy config: %w", err) + } + + rendered, err := ReplaceExternalLabelsBlock(string(existing), labels) + if err != nil { + return Result{Labels: labels}, err + } + + if err := writeFile(tmpConfigPath, []byte(rendered), 0o644); err != nil { + return Result{Labels: labels}, fmt.Errorf("write alloy temp config: %w", err) + } + + if bytes.Equal(existing, []byte(rendered)) { + if removeErr := removeFile(tmpConfigPath); removeErr != nil && !errors.Is(removeErr, os.ErrNotExist) { + return Result{Labels: labels}, fmt.Errorf("remove unchanged alloy temp config: %w", removeErr) + } + if forceRestart { + log.Printf("[alloy] vmid=%d action=render status=unchanged restart_retry=true", cfg.VMID) + if err := restartAndValidate(cfg.VMID, labels, attempt); err != nil { + return Result{Applied: true, Labels: labels}, err + } + return Result{Applied: true, Labels: labels}, nil + } + log.Printf("[alloy] vmid=%d action=render status=unchanged", cfg.VMID) + return Result{Applied: false, Labels: labels}, nil + } + + if err := renameFile(tmpConfigPath, ConfigPath); err != nil { + _ = removeFile(tmpConfigPath) + return Result{Labels: labels}, fmt.Errorf("replace alloy config: %w", err) + } + log.Printf("[alloy] vmid=%d action=render status=changed path=%s", cfg.VMID, ConfigPath) + + if err := restartAndValidate(cfg.VMID, labels, attempt); err != nil { + return Result{Applied: true, Labels: labels}, err + } + + return Result{Applied: true, Labels: labels}, nil +} + +func restartAndValidate(vmid int, labels map[string]string, attempt int) error { + log.Printf("[alloy] vmid=%d action=restart status=attempt attempt=%d/%d service=%s", vmid, attempt, updateAttempts, alloyServiceName) + if err := runCommand("systemctl", "restart", alloyServiceName); err != nil { + log.Printf("[alloy] vmid=%d action=restart status=failed err=%v labels=%s", vmid, err, formatLabels(labels)) + return fmt.Errorf("restart alloy: %w", err) + } + if err := validateAlloy(); err != nil { + log.Printf("[alloy] vmid=%d action=restart status=failed err=%v labels=%s", vmid, err, formatLabels(labels)) + return err + } + log.Printf("[alloy] vmid=%d action=restart status=success labels=%s", vmid, formatLabels(labels)) + return nil +} + +func RenderExternalLabelsBlock(labels map[string]string) string { + order := []string{ + "job", + "instance", + "collector", + "role", + "vmid", + } + + var b strings.Builder + b.WriteString(" external_labels = {\n") + for _, key := range order { + value, ok := labels[key] + if !ok { + continue + } + fmt.Fprintf(&b, " %-9s = \"%s\",\n", key, escapeAlloyString(value)) + } + b.WriteString(" }\n") + + return b.String() +} + +func ReplaceExternalLabelsBlock(config string, labels map[string]string) (string, error) { + start, end, err := externalLabelsRange(config) + if err != nil { + return "", err + } + return config[:start] + RenderExternalLabelsBlock(labels) + config[end:], nil +} + +func Labels(cfg state.Config) (map[string]string, error) { + containerType := strings.ToLower(strings.TrimSpace(cfg.ContainerType)) + if !managedContainerType(containerType) { + return nil, fmt.Errorf("unsupported alloy container type: %q", cfg.ContainerType) + } + + if cfg.VMID <= 0 { + return nil, fmt.Errorf("missing required container metadata: vmid") + } + + ip := strings.TrimSpace(cfg.ContainerIP) + if ip == "" { + var err error + ip, err = localIPFunc() + if err != nil { + return nil, err + } + } + if parsed := net.ParseIP(ip); parsed == nil { + return nil, fmt.Errorf("invalid container_ip metadata: %q", ip) + } + + role := "game-container" + if containerType == "dev" { + role = "dev-container" + } + + labels := map[string]string{ + "job": unixJob, + "instance": net.JoinHostPort(ip, metricsPort), + "collector": collector, + "role": role, + "vmid": strconv.Itoa(cfg.VMID), + } + + return labels, nil +} + +func managedContainerType(containerType string) bool { + switch strings.ToLower(strings.TrimSpace(containerType)) { + case "game", "dev": + return true + default: + return false + } +} + +func formatLabels(labels map[string]string) string { + keys := make([]string, 0, len(labels)) + for key := range labels { + keys = append(keys, key) + } + sort.Strings(keys) + + parts := make([]string, 0, len(keys)) + for _, key := range keys { + parts = append(parts, key+"="+strconv.Quote(labels[key])) + } + return strings.Join(parts, " ") +} + +func externalLabelsRange(config string) (int, int, error) { + const marker = "external_labels" + + start := strings.Index(config, marker) + if start < 0 { + return 0, 0, fmt.Errorf("alloy config missing external_labels block") + } + if next := strings.Index(config[start+len(marker):], marker); next >= 0 { + return 0, 0, fmt.Errorf("alloy config contains multiple external_labels blocks") + } + + eq := strings.Index(config[start+len(marker):], "=") + if eq < 0 { + return 0, 0, fmt.Errorf("alloy external_labels block missing assignment") + } + eq += start + len(marker) + + open := strings.Index(config[eq+1:], "{") + if open < 0 { + return 0, 0, fmt.Errorf("alloy external_labels block missing opening brace") + } + open += eq + 1 + + end, err := matchingBraceEnd(config, open) + if err != nil { + return 0, 0, err + } + + lineStart := strings.LastIndex(config[:start], "\n") + if lineStart < 0 { + lineStart = 0 + } else { + lineStart++ + } + if strings.TrimSpace(config[lineStart:start]) == "" { + start = lineStart + } + if end < len(config) && config[end] == '\n' { + end++ + } + + return start, end, nil +} + +func matchingBraceEnd(config string, open int) (int, error) { + depth := 0 + inString := false + escaped := false + + for i := open; i < len(config); i++ { + ch := config[i] + if inString { + if escaped { + escaped = false + continue + } + if ch == '\\' { + escaped = true + continue + } + if ch == '"' { + inString = false + } + continue + } + + switch ch { + case '"': + inString = true + case '{': + depth++ + case '}': + depth-- + if depth == 0 { + return i + 1, nil + } + } + } + + return 0, fmt.Errorf("alloy external_labels block missing closing brace") +} + +func escapeAlloyString(value string) string { + value = strings.ReplaceAll(value, "\\", "\\\\") + return strings.ReplaceAll(value, "\"", "\\\"") +} + +func validateAlloy() error { + if err := runCommand("systemctl", "is-active", "--quiet", alloyServiceName); err != nil { + return fmt.Errorf("alloy service is not active: %w", err) + } + + conn, err := dialTimeout("tcp", net.JoinHostPort("127.0.0.1", metricsPort), 3*time.Second) + if err != nil { + return fmt.Errorf("alloy is not listening on :%s: %w", metricsPort, err) + } + _ = conn.Close() + return nil +} + +func runSystemCommand(name string, args ...string) error { + out, err := exec.Command(name, args...).CombinedOutput() + if err != nil { + return fmt.Errorf("%s %s: %w: %s", name, strings.Join(args, " "), err, strings.TrimSpace(string(out))) + } + return nil +} + +func localIPv4() (string, error) { + addrs, err := net.InterfaceAddrs() + if err != nil { + return "", fmt.Errorf("read local addresses: %w", err) + } + + candidates := make([]string, 0) + for _, addr := range addrs { + ipNet, ok := addr.(*net.IPNet) + if !ok { + continue + } + ip := ipNet.IP.To4() + if ip == nil || ip.IsLoopback() || ip.IsLinkLocalUnicast() { + continue + } + candidates = append(candidates, ip.String()) + } + if len(candidates) == 0 { + return "", fmt.Errorf("missing required container metadata: non-loopback IPv4") + } + sort.Strings(candidates) + return candidates[0], nil +} diff --git a/internal/alloy/alloy_test.go b/internal/alloy/alloy_test.go new file mode 100644 index 0000000..b21c1d3 --- /dev/null +++ b/internal/alloy/alloy_test.go @@ -0,0 +1,347 @@ +package alloy + +import ( + "errors" + "net" + "os" + "strings" + "testing" + "time" + + "zlh-agent/internal/state" +) + +const templateConfig = `logging { + level = "info" +} + +prometheus.exporter.unix "local" {} + +prometheus.scrape "local" { + targets = prometheus.exporter.unix.local.targets + forward_to = [prometheus.remote_write.zlh_monitor.receiver] +} + +prometheus.remote_write "zlh_monitor" { + endpoint { + url = "http://10.60.0.25:9090/api/v1/write" + } + + external_labels = { + job = "old", + instance = "old", + collector = "old", + role = "old", + vmid = "old", + } +} +` + +func TestReplaceExternalLabelsBlockPreservesTemplate(t *testing.T) { + labels := map[string]string{ + "job": "integrations/unix", + "instance": "10.200.0.46:12345", + "collector": "alloy", + "role": "game-container", + "vmid": "5173", + } + + rendered, err := ReplaceExternalLabelsBlock(templateConfig, labels) + if err != nil { + t.Fatalf("ReplaceExternalLabelsBlock: %v", err) + } + + if !strings.Contains(rendered, `url = "http://10.60.0.25:9090/api/v1/write"`) { + t.Fatalf("rendered config did not preserve template body:\n%s", rendered) + } + if !strings.Contains(rendered, `job = "integrations/unix",`) { + t.Fatalf("rendered config missing job label:\n%s", rendered) + } + if !strings.Contains(rendered, `instance = "10.200.0.46:12345",`) { + t.Fatalf("rendered config missing instance label:\n%s", rendered) + } + if strings.Contains(rendered, `job = "old"`) { + t.Fatalf("rendered config retained old labels:\n%s", rendered) + } +} + +func TestLabelsDevAndGame(t *testing.T) { + restoreTestHooks(t) + + devLabels, err := Labels(state.Config{ + VMID: 6001, + ContainerIP: "10.60.0.223", + ContainerType: "dev", + }) + if err != nil { + t.Fatalf("Labels dev: %v", err) + } + assertLabel(t, devLabels, "instance", "10.60.0.223:12345") + assertLabel(t, devLabels, "role", "dev-container") + assertLabel(t, devLabels, "vmid", "6001") + + gameLabels, err := Labels(state.Config{ + VMID: 5001, + ContainerIP: "10.60.0.224", + ContainerType: "game", + }) + if err != nil { + t.Fatalf("Labels game: %v", err) + } + assertLabel(t, gameLabels, "instance", "10.60.0.224:12345") + assertLabel(t, gameLabels, "role", "game-container") + assertLabel(t, gameLabels, "vmid", "5001") +} + +func TestEnsureConfigUnchangedDoesNotRestart(t *testing.T) { + restoreTestHooks(t) + + cfg := state.Config{ + VMID: 6001, + ContainerIP: "10.60.0.223", + ContainerType: "dev", + } + labels, err := Labels(cfg) + if err != nil { + t.Fatalf("Labels: %v", err) + } + rendered, err := ReplaceExternalLabelsBlock(templateConfig, labels) + if err != nil { + t.Fatalf("ReplaceExternalLabelsBlock: %v", err) + } + + restartCalls := 0 + removeCalls := 0 + writeFile = func(path string, data []byte, perm os.FileMode) error { + if path != tmpConfigPath { + t.Fatalf("write path = %q, want %q", path, tmpConfigPath) + } + return nil + } + readFile = func(path string) ([]byte, error) { + if path != ConfigPath { + t.Fatalf("read path = %q, want %q", path, ConfigPath) + } + return []byte(rendered), nil + } + removeFile = func(path string) error { + removeCalls++ + if path != tmpConfigPath { + t.Fatalf("remove path = %q, want %q", path, tmpConfigPath) + } + return nil + } + renameFile = func(string, string) error { + t.Fatalf("rename should not be called for unchanged config") + return nil + } + runCommand = func(string, ...string) error { + restartCalls++ + return nil + } + + result, err := EnsureConfig(cfg) + if err != nil { + t.Fatalf("EnsureConfig: %v", err) + } + if result.Applied { + t.Fatalf("Applied = true, want false") + } + if removeCalls != 1 { + t.Fatalf("removeCalls = %d, want 1", removeCalls) + } + if restartCalls != 0 { + t.Fatalf("restartCalls = %d, want 0", restartCalls) + } +} + +func TestEnsureConfigChangedRestartsAndValidates(t *testing.T) { + restoreTestHooks(t) + + restartCalls := 0 + activeChecks := 0 + listenChecks := 0 + writeFile = func(path string, data []byte, perm os.FileMode) error { + if path != tmpConfigPath { + t.Fatalf("write path = %q, want %q", path, tmpConfigPath) + } + if !strings.Contains(string(data), `role = "game-container",`) { + t.Fatalf("temp config missing rendered labels:\n%s", string(data)) + } + return nil + } + readFile = func(path string) ([]byte, error) { + return []byte(templateConfig), nil + } + removeFile = func(string) error { return nil } + renameFile = func(oldPath, newPath string) error { + if oldPath != tmpConfigPath || newPath != ConfigPath { + t.Fatalf("rename = %q -> %q, want %q -> %q", oldPath, newPath, tmpConfigPath, ConfigPath) + } + return nil + } + runCommand = func(name string, args ...string) error { + if name != "systemctl" { + t.Fatalf("command name = %q, want systemctl", name) + } + joined := strings.Join(args, " ") + switch joined { + case "restart alloy": + restartCalls++ + case "is-active --quiet alloy": + activeChecks++ + default: + t.Fatalf("unexpected systemctl args: %s", joined) + } + return nil + } + dialTimeout = func(network, address string, timeout time.Duration) (net.Conn, error) { + if network != "tcp" || address != "127.0.0.1:12345" { + t.Fatalf("dial = %s %s, want tcp 127.0.0.1:12345", network, address) + } + listenChecks++ + left, right := net.Pipe() + _ = right.Close() + return left, nil + } + + result, err := EnsureConfig(state.Config{ + VMID: 5001, + ContainerIP: "10.60.0.224", + ContainerType: "game", + }) + if err != nil { + t.Fatalf("EnsureConfig: %v", err) + } + if !result.Applied { + t.Fatalf("Applied = false, want true") + } + if restartCalls != 1 { + t.Fatalf("restartCalls = %d, want 1", restartCalls) + } + if activeChecks != 1 { + t.Fatalf("activeChecks = %d, want 1", activeChecks) + } + if listenChecks != 1 { + t.Fatalf("listenChecks = %d, want 1", listenChecks) + } +} + +func TestEnsureConfigRetriesFailedUpdate(t *testing.T) { + restoreTestHooks(t) + + attempts := 0 + readFile = func(path string) ([]byte, error) { + attempts++ + if attempts == 1 { + return nil, errors.New("temporary read failure") + } + return []byte(templateConfig), nil + } + writeFile = func(string, []byte, os.FileMode) error { return nil } + removeFile = func(string) error { return nil } + renameFile = func(string, string) error { return nil } + runCommand = func(string, ...string) error { return nil } + + result, err := EnsureConfig(state.Config{ + VMID: 5001, + ContainerIP: "10.60.0.224", + ContainerType: "game", + }) + if err != nil { + t.Fatalf("EnsureConfig: %v", err) + } + if !result.Applied { + t.Fatalf("Applied = false, want true after retry") + } + if attempts != 2 { + t.Fatalf("attempts = %d, want 2", attempts) + } +} + +func TestEnsureConfigRetriesRestartAfterFileChanged(t *testing.T) { + restoreTestHooks(t) + + restarts := 0 + readFile = func(path string) ([]byte, error) { + if restarts == 0 { + return []byte(templateConfig), nil + } + labels, err := Labels(state.Config{VMID: 5001, ContainerIP: "10.60.0.224", ContainerType: "game"}) + if err != nil { + t.Fatalf("Labels: %v", err) + } + rendered, err := ReplaceExternalLabelsBlock(templateConfig, labels) + if err != nil { + t.Fatalf("ReplaceExternalLabelsBlock: %v", err) + } + return []byte(rendered), nil + } + writeFile = func(string, []byte, os.FileMode) error { return nil } + removeFile = func(string) error { return nil } + renameFile = func(string, string) error { return nil } + runCommand = func(name string, args ...string) error { + if name == "systemctl" && strings.Join(args, " ") == "restart alloy" { + restarts++ + if restarts == 1 { + return errors.New("temporary restart failure") + } + } + return nil + } + + result, err := EnsureConfig(state.Config{ + VMID: 5001, + ContainerIP: "10.60.0.224", + ContainerType: "game", + }) + if err != nil { + t.Fatalf("EnsureConfig: %v", err) + } + if !result.Applied { + t.Fatalf("Applied = false, want true after restart retry") + } + if restarts != 2 { + t.Fatalf("restarts = %d, want 2", restarts) + } +} + +func assertLabel(t *testing.T, labels map[string]string, key, want string) { + t.Helper() + if got := labels[key]; got != want { + t.Fatalf("labels[%q] = %q, want %q", key, got, want) + } +} + +func restoreTestHooks(t *testing.T) { + t.Helper() + + oldLocalIP := localIPFunc + oldRunCommand := runCommand + oldDialTimeout := dialTimeout + oldSleep := sleepFunc + oldWriteFile := writeFile + oldReadFile := readFile + oldRemoveFile := removeFile + oldRenameFile := renameFile + + t.Cleanup(func() { + localIPFunc = oldLocalIP + runCommand = oldRunCommand + dialTimeout = oldDialTimeout + sleepFunc = oldSleep + writeFile = oldWriteFile + readFile = oldReadFile + removeFile = oldRemoveFile + renameFile = oldRenameFile + }) + + localIPFunc = func() (string, error) { return "10.60.0.200", nil } + runCommand = func(string, ...string) error { return nil } + dialTimeout = func(string, string, time.Duration) (net.Conn, error) { + left, right := net.Pipe() + _ = right.Close() + return left, nil + } + sleepFunc = func(time.Duration) {} +} diff --git a/internal/http/agent.go b/internal/http/agent.go index dc11d6d..83b5d77 100755 --- a/internal/http/agent.go +++ b/internal/http/agent.go @@ -12,6 +12,7 @@ import ( "strings" "time" + "zlh-agent/internal/alloy" agentfiles "zlh-agent/internal/files" agenthandlers "zlh-agent/internal/handlers" mcstatus "zlh-agent/internal/minecraft" @@ -49,6 +50,17 @@ func dirExists(path string) bool { return err == nil && s.IsDir() } +var ( + provisionAll = provision.ProvisionAll + devIsProvisioned = devcontainer.IsProvisioned + devRuntimeInstalled = devcontainer.RuntimeInstalled + codeServerInstall = codeserver.Install + codeServerStart = codeserver.Start + codeServerVerify = codeserver.Verify + codeServerInstalled = codeserver.Installed + codeServerRunning = codeserver.Running +) + func lifecycleLog(cfg *state.Config, phase string, attempt int, started time.Time, format string, args ...any) { elapsed := time.Since(started).Milliseconds() msg := fmt.Sprintf(format, args...) @@ -96,7 +108,7 @@ func runProvisionPipeline(cfg *state.Config) error { state.SetState(state.StateInstalling) state.SetInstallStep("provision_all") - if err := provision.ProvisionAll(*cfg); err != nil { + if err := provisionAll(*cfg); err != nil { state.SetError(err) state.SetState(state.StateError) return err @@ -125,7 +137,7 @@ func ensureProvisioned(cfg *state.Config) error { if cfg.ContainerType == "dev" { - if !devcontainer.IsProvisioned() || !devcontainer.RuntimeInstalled(cfg.Runtime, cfg.Version) { + if !devIsProvisioned() || !devRuntimeInstalled(cfg.Runtime, cfg.Version) { if err := runProvisionPipeline(cfg); err != nil { return err } @@ -152,6 +164,10 @@ func ensureProvisioned(cfg *state.Config) error { return err } + if err := ensureDevCodeServer(cfg); err != nil { + return err + } + state.SetState(state.StateIdle) state.SetError(nil) return nil @@ -188,6 +204,42 @@ func ensureProvisioned(cfg *state.Config) error { return runProvisionPipeline(cfg) } +func ensureDevCodeServer(cfg *state.Config) error { + if cfg == nil || !devCodeServerRequested(*cfg) { + return nil + } + + if !codeServerInstalled() { + if err := codeServerInstall(*cfg); err != nil { + return fmt.Errorf("code-server install failed: %w", err) + } + } + if !codeServerRunning() { + if err := codeServerStart(*cfg); err != nil { + return fmt.Errorf("code-server start failed: %w", err) + } + } + if err := codeServerVerify(); err != nil { + return fmt.Errorf("code-server verification failed: %w", err) + } + if !codeServerRunning() { + return fmt.Errorf("code-server did not stay running") + } + return nil +} + +func devCodeServerRequested(cfg state.Config) bool { + if cfg.EnableCodeServer { + return true + } + for _, addon := range cfg.Addons { + if strings.EqualFold(addon, "codeserver") { + return true + } + } + return false +} + /* -------------------------------------------------------------------------- /config — the REAL provisioning trigger (async) @@ -220,6 +272,9 @@ func handleConfig(w http.ResponseWriter, r *http.Request) { http.Error(w, "save config failed: "+err.Error(), http.StatusInternalServerError) return } + if _, err := alloy.EnsureConfig(cfg); err != nil { + log.Printf("[http] vmid=%d action=config status=alloy_failed non_fatal=true err=%v", cfg.VMID, err) + } go func(c state.Config) { defer endOp() diff --git a/internal/http/agent_test.go b/internal/http/agent_test.go new file mode 100644 index 0000000..641e721 --- /dev/null +++ b/internal/http/agent_test.go @@ -0,0 +1,125 @@ +package agenthttp + +import ( + "testing" + + "zlh-agent/internal/state" +) + +func TestEnsureDevCodeServerInstallsAndStartsWhenRequested(t *testing.T) { + restoreCodeServerTestHooks(t) + + cfg := &state.Config{ + ContainerType: "dev", + Runtime: "node", + EnableCodeServer: true, + } + + installed := false + running := false + installCalls := 0 + startCalls := 0 + verifyCalls := 0 + + codeServerInstalled = func() bool { return installed } + codeServerRunning = func() bool { return running } + codeServerInstall = func(state.Config) error { + installCalls++ + installed = true + return nil + } + codeServerStart = func(state.Config) error { + startCalls++ + running = true + return nil + } + codeServerVerify = func() error { + verifyCalls++ + return nil + } + + if err := ensureDevCodeServer(cfg); err != nil { + t.Fatalf("ensureDevCodeServer: %v", err) + } + if installCalls != 1 { + t.Fatalf("installCalls = %d, want 1", installCalls) + } + if startCalls != 1 { + t.Fatalf("startCalls = %d, want 1", startCalls) + } + if verifyCalls != 1 { + t.Fatalf("verifyCalls = %d, want 1", verifyCalls) + } +} + +func TestEnsureDevCodeServerStartsInstalledStoppedAddon(t *testing.T) { + restoreCodeServerTestHooks(t) + + cfg := &state.Config{ + ContainerType: "dev", + Runtime: "node", + Addons: []string{"codeserver"}, + } + + running := false + installCalls := 0 + startCalls := 0 + + codeServerInstalled = func() bool { return true } + codeServerRunning = func() bool { return running } + codeServerInstall = func(state.Config) error { + installCalls++ + return nil + } + codeServerStart = func(state.Config) error { + startCalls++ + running = true + return nil + } + codeServerVerify = func() error { return nil } + + if err := ensureDevCodeServer(cfg); err != nil { + t.Fatalf("ensureDevCodeServer: %v", err) + } + if installCalls != 0 { + t.Fatalf("installCalls = %d, want 0", installCalls) + } + if startCalls != 1 { + t.Fatalf("startCalls = %d, want 1", startCalls) + } +} + +func TestEnsureDevCodeServerSkipsWhenNotRequested(t *testing.T) { + restoreCodeServerTestHooks(t) + + called := false + codeServerInstalled = func() bool { + called = true + return false + } + + if err := ensureDevCodeServer(&state.Config{ContainerType: "dev", Runtime: "node"}); err != nil { + t.Fatalf("ensureDevCodeServer: %v", err) + } + if called { + t.Fatalf("code-server hooks were called for config without code-server request") + } +} + +func restoreCodeServerTestHooks(t *testing.T) { + t.Helper() + + oldInstall := codeServerInstall + oldStart := codeServerStart + oldVerify := codeServerVerify + oldInstalled := codeServerInstalled + oldRunning := codeServerRunning + + t.Cleanup(func() { + codeServerInstall = oldInstall + codeServerStart = oldStart + codeServerVerify = oldVerify + codeServerInstalled = oldInstalled + codeServerRunning = oldRunning + }) +} diff --git a/internal/provision/addons/codeserver/verify.go b/internal/provision/addons/codeserver/verify.go index 493655b..3976639 100644 --- a/internal/provision/addons/codeserver/verify.go +++ b/internal/provision/addons/codeserver/verify.go @@ -31,7 +31,7 @@ func Running() bool { pid, convErr := strconv.Atoi(strings.TrimSpace(string(raw))) if convErr == nil && pid > 0 { process, findErr := os.FindProcess(pid) - if findErr == nil && process.Signal(syscall.Signal(0)) == nil { + if findErr == nil && process.Signal(syscall.Signal(0)) == nil && pidMatchesCodeServer(pid) { return true } } @@ -79,7 +79,7 @@ func findRunningPID() (int, error) { if !strings.Contains(cmdline, "code-server") { continue } - if !strings.Contains(cmdline, "--bind-addr 0.0.0.0:6000") { + if !strings.Contains(cmdline, "0.0.0.0:6000") { continue } @@ -94,16 +94,33 @@ func findRunningPID() (int, error) { return 0, fmt.Errorf("code-server process not found") } +func pidMatchesCodeServer(pid int) bool { + if pid <= 0 { + return false + } + raw, err := os.ReadFile(filepath.Join("/proc", strconv.Itoa(pid), "cmdline")) + if err != nil || len(raw) == 0 { + return false + } + cmdline := strings.ReplaceAll(string(raw), "\x00", " ") + return strings.Contains(cmdline, "code-server") && + strings.Contains(cmdline, "0.0.0.0:6000") +} + func Start(cfg state.Config) error { if !Installed() { return fmt.Errorf("code-server addon not installed") } if Running() { - if err := Stop(); err != nil { - return err - } + return nil } - return executil.RunEmbeddedScript("addons/codeserver/install.sh", launchEnv(cfg)...) + if err := executil.RunEmbeddedScript("addons/codeserver/start.sh", launchEnv(cfg)...); err != nil { + return err + } + if !Running() { + return fmt.Errorf("code-server did not stay running") + } + return nil } func Stop() error { diff --git a/internal/provision/provision.go b/internal/provision/provision.go index 27a5312..fe037ca 100644 --- a/internal/provision/provision.go +++ b/internal/provision/provision.go @@ -151,10 +151,10 @@ func ProvisionAll(cfg state.Config) error { /* --------------------------------------------------------- ADDONS (OPTIONAL, ROLE-AGNOSTIC) --------------------------------------------------------- */ - if cfg.ContainerType == "dev" && cfg.EnableCodeServer { + if strings.EqualFold(cfg.ContainerType, "dev") && codeServerRequested(cfg) { seen := false for _, addon := range cfg.Addons { - if addon == "codeserver" { + if strings.EqualFold(addon, "codeserver") { seen = true break } @@ -173,6 +173,18 @@ func ProvisionAll(cfg state.Config) error { return nil } +func codeServerRequested(cfg state.Config) bool { + if cfg.EnableCodeServer { + return true + } + for _, addon := range cfg.Addons { + if strings.EqualFold(addon, "codeserver") { + return true + } + } + return false +} + func normalizeMinecraftConfig(cfg *state.Config) { if cfg == nil { return diff --git a/internal/state/state.go b/internal/state/state.go index 5517338..fc9faf8 100755 --- a/internal/state/state.go +++ b/internal/state/state.go @@ -18,6 +18,7 @@ type Config struct { VMID int `json:"vmid"` // Container identity + ContainerIP string `json:"container_ip,omitempty"` ContainerType string `json:"container_type,omitempty"` // Dev runtime (only for dev containers) diff --git a/logs/agent.log b/logs/agent.log index e2af5a5..daaec0e 100644 --- a/logs/agent.log +++ b/logs/agent.log @@ -359,3 +359,96 @@ 2026/04/16 17:36:30 [update] notify check failed status=error current=0.0.0-dev target= err=Get "http://10.60.0.251:8080/agents/manifest.json": dial tcp 10.60.0.251:8080: connect: no route to host 2026/04/16 18:06:30 [update] notify check failed status=error current=0.0.0-dev target= err=Get "http://10.60.0.251:8080/agents/manifest.json": dial tcp 10.60.0.251:8080: connect: no route to host 2026/04/16 18:36:30 [update] notify check failed status=error current=0.0.0-dev target= err=Get "http://10.60.0.251:8080/agents/manifest.json": dial tcp 10.60.0.251:8080: connect: no route to host +2026/04/16 19:06:30 [update] notify check failed status=error current=0.0.0-dev target= err=Get "http://10.60.0.251:8080/agents/manifest.json": dial tcp 10.60.0.251:8080: connect: no route to host +2026/04/16 19:36:30 [update] notify check failed status=error current=0.0.0-dev target= err=Get "http://10.60.0.251:8080/agents/manifest.json": dial tcp 10.60.0.251:8080: connect: no route to host +2026/04/16 20:06:30 [update] notify check failed status=error current=0.0.0-dev target= err=Get "http://10.60.0.251:8080/agents/manifest.json": dial tcp 10.60.0.251:8080: connect: no route to host +2026/04/16 20:36:30 [update] notify check failed status=error current=0.0.0-dev target= err=Get "http://10.60.0.251:8080/agents/manifest.json": dial tcp 10.60.0.251:8080: connect: no route to host +2026/04/16 21:06:30 [update] notify check failed status=error current=0.0.0-dev target= err=Get "http://10.60.0.251:8080/agents/manifest.json": dial tcp 10.60.0.251:8080: connect: no route to host +2026/04/16 21:36:30 [update] notify check failed status=error current=0.0.0-dev target= err=Get "http://10.60.0.251:8080/agents/manifest.json": dial tcp 10.60.0.251:8080: connect: no route to host +2026/04/16 22:06:30 [update] notify check failed status=error current=0.0.0-dev target= err=Get "http://10.60.0.251:8080/agents/manifest.json": dial tcp 10.60.0.251:8080: connect: no route to host +2026/04/16 22:36:30 [update] notify check failed status=error current=0.0.0-dev target= err=Get "http://10.60.0.251:8080/agents/manifest.json": dial tcp 10.60.0.251:8080: connect: no route to host +2026/04/16 23:06:30 [update] notify check failed status=error current=0.0.0-dev target= err=Get "http://10.60.0.251:8080/agents/manifest.json": dial tcp 10.60.0.251:8080: connect: no route to host +2026/04/16 23:36:30 [update] notify check failed status=error current=0.0.0-dev target= err=Get "http://10.60.0.251:8080/agents/manifest.json": dial tcp 10.60.0.251:8080: connect: no route to host +2026/04/17 00:06:30 [update] notify check failed status=error current=0.0.0-dev target= err=Get "http://10.60.0.251:8080/agents/manifest.json": dial tcp 10.60.0.251:8080: connect: no route to host +2026/04/17 00:36:30 [update] notify check failed status=error current=0.0.0-dev target= err=Get "http://10.60.0.251:8080/agents/manifest.json": dial tcp 10.60.0.251:8080: connect: no route to host +2026/04/17 01:06:30 [update] notify check failed status=error current=0.0.0-dev target= err=Get "http://10.60.0.251:8080/agents/manifest.json": dial tcp 10.60.0.251:8080: connect: no route to host +2026/04/17 01:36:30 [update] notify check failed status=error current=0.0.0-dev target= err=Get "http://10.60.0.251:8080/agents/manifest.json": dial tcp 10.60.0.251:8080: connect: no route to host +2026/04/17 02:06:30 [update] notify check failed status=error current=0.0.0-dev target= err=Get "http://10.60.0.251:8080/agents/manifest.json": dial tcp 10.60.0.251:8080: connect: no route to host +2026/04/17 02:36:30 [update] notify check failed status=error current=0.0.0-dev target= err=Get "http://10.60.0.251:8080/agents/manifest.json": dial tcp 10.60.0.251:8080: connect: no route to host +2026/04/17 03:06:30 [update] notify check failed status=error current=0.0.0-dev target= err=Get "http://10.60.0.251:8080/agents/manifest.json": dial tcp 10.60.0.251:8080: connect: no route to host +2026/04/17 03:36:30 [update] notify check failed status=error current=0.0.0-dev target= err=Get "http://10.60.0.251:8080/agents/manifest.json": dial tcp 10.60.0.251:8080: connect: no route to host +2026/04/17 04:06:30 [update] notify check failed status=error current=0.0.0-dev target= err=Get "http://10.60.0.251:8080/agents/manifest.json": dial tcp 10.60.0.251:8080: connect: no route to host +2026/04/17 04:36:30 [update] notify check failed status=error current=0.0.0-dev target= err=Get "http://10.60.0.251:8080/agents/manifest.json": dial tcp 10.60.0.251:8080: connect: no route to host +2026/04/17 05:06:30 [update] notify check failed status=error current=0.0.0-dev target= err=Get "http://10.60.0.251:8080/agents/manifest.json": dial tcp 10.60.0.251:8080: connect: no route to host +2026/04/17 05:36:30 [update] notify check failed status=error current=0.0.0-dev target= err=Get "http://10.60.0.251:8080/agents/manifest.json": dial tcp 10.60.0.251:8080: connect: no route to host +2026/04/17 06:06:30 [update] notify check failed status=error current=0.0.0-dev target= err=Get "http://10.60.0.251:8080/agents/manifest.json": dial tcp 10.60.0.251:8080: connect: no route to host +2026/04/17 06:36:30 [update] notify check failed status=error current=0.0.0-dev target= err=Get "http://10.60.0.251:8080/agents/manifest.json": dial tcp 10.60.0.251:8080: connect: no route to host +2026/04/17 07:06:30 [update] notify check failed status=error current=0.0.0-dev target= err=Get "http://10.60.0.251:8080/agents/manifest.json": dial tcp 10.60.0.251:8080: connect: no route to host +2026/04/17 07:36:30 [update] notify check failed status=error current=0.0.0-dev target= err=Get "http://10.60.0.251:8080/agents/manifest.json": dial tcp 10.60.0.251:8080: connect: no route to host +2026/04/17 08:06:30 [update] notify check failed status=error current=0.0.0-dev target= err=Get "http://10.60.0.251:8080/agents/manifest.json": dial tcp 10.60.0.251:8080: connect: no route to host +2026/04/17 08:36:30 [update] notify check failed status=error current=0.0.0-dev target= err=Get "http://10.60.0.251:8080/agents/manifest.json": dial tcp 10.60.0.251:8080: connect: no route to host +2026/04/17 09:06:30 [update] notify check failed status=error current=0.0.0-dev target= err=Get "http://10.60.0.251:8080/agents/manifest.json": dial tcp 10.60.0.251:8080: connect: no route to host +2026/04/17 09:36:30 [update] notify check failed status=error current=0.0.0-dev target= err=Get "http://10.60.0.251:8080/agents/manifest.json": dial tcp 10.60.0.251:8080: connect: no route to host +2026/04/17 10:06:30 [update] notify check failed status=error current=0.0.0-dev target= err=Get "http://10.60.0.251:8080/agents/manifest.json": dial tcp 10.60.0.251:8080: connect: no route to host +2026/04/17 10:36:30 [update] notify check failed status=error current=0.0.0-dev target= err=Get "http://10.60.0.251:8080/agents/manifest.json": dial tcp 10.60.0.251:8080: connect: no route to host +2026/04/17 11:06:30 [update] notify check failed status=error current=0.0.0-dev target= err=Get "http://10.60.0.251:8080/agents/manifest.json": dial tcp 10.60.0.251:8080: connect: no route to host +2026/04/17 11:36:30 [update] notify check failed status=error current=0.0.0-dev target= err=Get "http://10.60.0.251:8080/agents/manifest.json": dial tcp 10.60.0.251:8080: connect: no route to host +2026/04/17 12:06:30 [update] notify check failed status=error current=0.0.0-dev target= err=Get "http://10.60.0.251:8080/agents/manifest.json": dial tcp 10.60.0.251:8080: connect: no route to host +2026/04/17 12:36:30 [update] notify check failed status=error current=0.0.0-dev target= err=Get "http://10.60.0.251:8080/agents/manifest.json": dial tcp 10.60.0.251:8080: connect: no route to host +2026/04/17 13:06:30 [update] notify check failed status=error current=0.0.0-dev target= err=Get "http://10.60.0.251:8080/agents/manifest.json": dial tcp 10.60.0.251:8080: connect: no route to host +2026/04/17 13:36:30 [update] notify check failed status=error current=0.0.0-dev target= err=Get "http://10.60.0.251:8080/agents/manifest.json": dial tcp 10.60.0.251:8080: connect: no route to host +2026/04/17 14:06:30 [update] notify check failed status=error current=0.0.0-dev target= err=Get "http://10.60.0.251:8080/agents/manifest.json": dial tcp 10.60.0.251:8080: connect: no route to host +2026/04/17 14:36:30 [update] notify check failed status=error current=0.0.0-dev target= err=Get "http://10.60.0.251:8080/agents/manifest.json": dial tcp 10.60.0.251:8080: connect: no route to host +2026/04/17 15:06:30 [update] notify check failed status=error current=0.0.0-dev target= err=Get "http://10.60.0.251:8080/agents/manifest.json": dial tcp 10.60.0.251:8080: connect: no route to host +2026/04/17 15:36:30 [update] notify check failed status=error current=0.0.0-dev target= err=Get "http://10.60.0.251:8080/agents/manifest.json": dial tcp 10.60.0.251:8080: connect: no route to host +2026/04/17 16:06:30 [update] notify check failed status=error current=0.0.0-dev target= err=Get "http://10.60.0.251:8080/agents/manifest.json": dial tcp 10.60.0.251:8080: connect: no route to host +2026/04/17 16:36:30 [update] notify check failed status=error current=0.0.0-dev target= err=Get "http://10.60.0.251:8080/agents/manifest.json": dial tcp 10.60.0.251:8080: connect: no route to host +2026/04/17 17:06:30 [update] notify check failed status=error current=0.0.0-dev target= err=Get "http://10.60.0.251:8080/agents/manifest.json": dial tcp 10.60.0.251:8080: connect: no route to host +2026/04/17 17:36:30 [update] notify check failed status=error current=0.0.0-dev target= err=Get "http://10.60.0.251:8080/agents/manifest.json": dial tcp 10.60.0.251:8080: connect: no route to host +2026/04/17 18:06:30 [update] notify check failed status=error current=0.0.0-dev target= err=Get "http://10.60.0.251:8080/agents/manifest.json": dial tcp 10.60.0.251:8080: connect: no route to host +2026/04/17 18:36:30 [update] notify check failed status=error current=0.0.0-dev target= err=Get "http://10.60.0.251:8080/agents/manifest.json": dial tcp 10.60.0.251:8080: connect: no route to host +2026/04/17 19:06:30 [update] notify check failed status=error current=0.0.0-dev target= err=Get "http://10.60.0.251:8080/agents/manifest.json": dial tcp 10.60.0.251:8080: connect: no route to host +2026/04/17 19:36:30 [update] notify check failed status=error current=0.0.0-dev target= err=Get "http://10.60.0.251:8080/agents/manifest.json": dial tcp 10.60.0.251:8080: connect: no route to host +2026/04/17 20:06:30 [update] notify check failed status=error current=0.0.0-dev target= err=Get "http://10.60.0.251:8080/agents/manifest.json": dial tcp 10.60.0.251:8080: connect: no route to host +2026/04/17 20:36:30 [update] notify check failed status=error current=0.0.0-dev target= err=Get "http://10.60.0.251:8080/agents/manifest.json": dial tcp 10.60.0.251:8080: connect: no route to host +2026/04/17 21:06:30 [update] notify check failed status=error current=0.0.0-dev target= err=Get "http://10.60.0.251:8080/agents/manifest.json": dial tcp 10.60.0.251:8080: connect: no route to host +2026/04/17 21:37:24 [update] notify check failed status=error current=0.0.0-dev target= err=Get "http://10.60.0.251:8080/agents/manifest.json": dial tcp 10.60.0.251:8080: connect: no route to host +2026/04/17 22:06:31 [update] notify check failed status=error current=0.0.0-dev target= err=Get "http://10.60.0.251:8080/agents/manifest.json": dial tcp 10.60.0.251:8080: connect: no route to host +2026/04/17 22:36:31 [update] notify check failed status=error current=0.0.0-dev target= err=Get "http://10.60.0.251:8080/agents/manifest.json": dial tcp 10.60.0.251:8080: connect: no route to host +2026/04/17 23:06:30 [update] notify check failed status=error current=0.0.0-dev target= err=Get "http://10.60.0.251:8080/agents/manifest.json": dial tcp 10.60.0.251:8080: connect: no route to host +2026/04/17 23:36:30 [update] notify check failed status=error current=0.0.0-dev target= err=Get "http://10.60.0.251:8080/agents/manifest.json": dial tcp 10.60.0.251:8080: connect: no route to host +2026/04/18 00:06:30 [update] notify check failed status=error current=0.0.0-dev target= err=Get "http://10.60.0.251:8080/agents/manifest.json": dial tcp 10.60.0.251:8080: connect: no route to host +2026/04/18 00:36:31 [update] notify check failed status=error current=0.0.0-dev target= err=Get "http://10.60.0.251:8080/agents/manifest.json": dial tcp 10.60.0.251:8080: connect: no route to host +2026/04/18 01:06:30 [update] notify check failed status=error current=0.0.0-dev target= err=Get "http://10.60.0.251:8080/agents/manifest.json": dial tcp 10.60.0.251:8080: connect: no route to host +2026/04/18 01:36:31 [update] notify check failed status=error current=0.0.0-dev target= err=Get "http://10.60.0.251:8080/agents/manifest.json": dial tcp 10.60.0.251:8080: connect: no route to host +2026/04/18 02:06:32 [update] notify check failed status=error current=0.0.0-dev target= err=Get "http://10.60.0.251:8080/agents/manifest.json": dial tcp 10.60.0.251:8080: connect: no route to host +2026/04/18 02:36:30 [update] notify check failed status=error current=0.0.0-dev target= err=Get "http://10.60.0.251:8080/agents/manifest.json": dial tcp 10.60.0.251:8080: connect: no route to host +2026/04/18 03:06:31 [update] notify check failed status=error current=0.0.0-dev target= err=Get "http://10.60.0.251:8080/agents/manifest.json": dial tcp 10.60.0.251:8080: connect: no route to host +2026/04/18 03:36:31 [update] notify check failed status=error current=0.0.0-dev target= err=Get "http://10.60.0.251:8080/agents/manifest.json": dial tcp 10.60.0.251:8080: connect: no route to host +2026/04/18 04:06:31 [update] notify check failed status=error current=0.0.0-dev target= err=Get "http://10.60.0.251:8080/agents/manifest.json": dial tcp 10.60.0.251:8080: connect: no route to host +2026/04/18 04:36:30 [update] notify check failed status=error current=0.0.0-dev target= err=Get "http://10.60.0.251:8080/agents/manifest.json": dial tcp 10.60.0.251:8080: connect: no route to host +2026/04/18 05:06:31 [update] notify check failed status=error current=0.0.0-dev target= err=Get "http://10.60.0.251:8080/agents/manifest.json": dial tcp 10.60.0.251:8080: connect: no route to host +2026/04/18 05:36:31 [update] notify check failed status=error current=0.0.0-dev target= err=Get "http://10.60.0.251:8080/agents/manifest.json": dial tcp 10.60.0.251:8080: connect: no route to host +2026/04/18 06:06:31 [update] notify check failed status=error current=0.0.0-dev target= err=Get "http://10.60.0.251:8080/agents/manifest.json": dial tcp 10.60.0.251:8080: connect: no route to host +2026/04/18 06:36:30 [update] notify check failed status=error current=0.0.0-dev target= err=Get "http://10.60.0.251:8080/agents/manifest.json": dial tcp 10.60.0.251:8080: connect: no route to host +2026/04/18 07:06:31 [update] notify check failed status=error current=0.0.0-dev target= err=Get "http://10.60.0.251:8080/agents/manifest.json": dial tcp 10.60.0.251:8080: connect: no route to host +2026/04/18 07:36:31 [update] notify check failed status=error current=0.0.0-dev target= err=Get "http://10.60.0.251:8080/agents/manifest.json": dial tcp 10.60.0.251:8080: connect: no route to host +2026/04/18 08:06:30 [update] notify check failed status=error current=0.0.0-dev target= err=Get "http://10.60.0.251:8080/agents/manifest.json": dial tcp 10.60.0.251:8080: connect: no route to host +2026/04/18 08:36:31 [update] notify check failed status=error current=0.0.0-dev target= err=Get "http://10.60.0.251:8080/agents/manifest.json": dial tcp 10.60.0.251:8080: connect: no route to host +2026/04/18 09:06:31 [update] notify check failed status=error current=0.0.0-dev target= err=Get "http://10.60.0.251:8080/agents/manifest.json": dial tcp 10.60.0.251:8080: connect: no route to host +2026/04/18 09:36:32 [update] notify check failed status=error current=0.0.0-dev target= err=Get "http://10.60.0.251:8080/agents/manifest.json": dial tcp 10.60.0.251:8080: connect: no route to host +2026/04/18 10:06:31 [update] notify check failed status=error current=0.0.0-dev target= err=Get "http://10.60.0.251:8080/agents/manifest.json": dial tcp 10.60.0.251:8080: connect: no route to host +2026/04/18 10:36:31 [update] notify check failed status=error current=0.0.0-dev target= err=Get "http://10.60.0.251:8080/agents/manifest.json": dial tcp 10.60.0.251:8080: connect: no route to host +2026/04/18 11:06:31 [update] notify check failed status=error current=0.0.0-dev target= err=Get "http://10.60.0.251:8080/agents/manifest.json": dial tcp 10.60.0.251:8080: connect: no route to host +2026/04/18 11:36:31 [update] notify check failed status=error current=0.0.0-dev target= err=Get "http://10.60.0.251:8080/agents/manifest.json": dial tcp 10.60.0.251:8080: connect: no route to host +2026/04/18 12:06:30 [update] notify check failed status=error current=0.0.0-dev target= err=Get "http://10.60.0.251:8080/agents/manifest.json": dial tcp 10.60.0.251:8080: connect: no route to host +2026/04/18 12:36:30 [update] notify check failed status=error current=0.0.0-dev target= err=Get "http://10.60.0.251:8080/agents/manifest.json": dial tcp 10.60.0.251:8080: connect: no route to host +2026/04/18 13:06:30 [update] notify check failed status=error current=0.0.0-dev target= err=Get "http://10.60.0.251:8080/agents/manifest.json": dial tcp 10.60.0.251:8080: connect: no route to host +2026/04/18 13:36:30 [update] notify check failed status=error current=0.0.0-dev target= err=Get "http://10.60.0.251:8080/agents/manifest.json": dial tcp 10.60.0.251:8080: connect: no route to host +2026/04/18 13:52:39 [agent] file logging enabled +2026/04/18 13:52:39 [agent] lifecycle logging enabled +2026/04/18 13:52:39 [agent] routes registered +2026/04/18 13:52:39 [autostart] disabled (ok) +2026/04/18 13:52:39 [update] periodic checks enabled (mode=notify interval=30m0s) +2026/04/18 13:52:39 [agent] listening on :18888 +2026/04/18 13:52:52 [update] notify check failed status=error current=0.0.0-dev target= err=Get "http://10.60.0.251:8080/agents/manifest.json": dial tcp 10.60.0.251:8080: connect: no route to host diff --git a/scripts/addons/codeserver/install.sh b/scripts/addons/codeserver/install.sh index be48f4b..7827c95 100644 --- a/scripts/addons/codeserver/install.sh +++ b/scripts/addons/codeserver/install.sh @@ -12,8 +12,6 @@ ARTIFACT_NAME="${ZLH_CODESERVER_ARTIFACT:-code-server.tar.gz}" ARTIFACT_URL="${ZLH_ARTIFACT_BASE_URL%/}/addons/code-server/${ARTIFACT_NAME}" ARTIFACT_TMP="/tmp/${ARTIFACT_NAME}" MARKER="/opt/zlh/.zlh/addons/code-server.installed" -PID_FILE="/opt/zlh/.zlh/addons/code-server.pid" -LOG_FILE="/opt/zlh/logs/code-server.log" WORKSPACE_DIR="${CODE_SERVER_WORKSPACE:-/home/dev/workspace}" PORT="${CODE_SERVER_PORT:-6000}" BIN="${SERVICE_ROOT}/bin/code-server" @@ -22,7 +20,6 @@ CONFIG_DIR="/home/dev/.config/code-server" CONFIG_FILE="${CONFIG_DIR}/config.yaml" mkdir -p "$(dirname "${MARKER}")" -mkdir -p "$(dirname "${LOG_FILE}")" download_artifact() { echo "[code-server] action=artifact_fetch step=download url=${ARTIFACT_URL} target=${ARTIFACT_TMP}" @@ -86,7 +83,6 @@ write_config() { bind-addr: 0.0.0.0:${PORT} auth: none disable-telemetry: true -proxy-domain: "" EOF chown -R dev:dev "${CONFIG_DIR}" 2>/dev/null || true echo "[code-server] action=config_write path=${CONFIG_FILE} status=ok" @@ -109,16 +105,6 @@ fi mkdir -p "${WORKSPACE_DIR}" write_config -if [ -f "${PID_FILE}" ] && kill -0 "$(cat "${PID_FILE}")" 2>/dev/null; then - echo "[code-server] already running" -else - rm -f "${PID_FILE}" - echo "[code-server] action=service_launch command=\"${BIN} --bind-addr 0.0.0.0:${PORT} --auth none --disable-telemetry ${WORKSPACE_DIR}\"" - HOME="/home/dev" USER="dev" LOGNAME="dev" \ - nohup "${BIN}" --bind-addr "0.0.0.0:${PORT}" --auth none --disable-telemetry "${WORKSPACE_DIR}" >"${LOG_FILE}" 2>&1 & - echo $! > "${PID_FILE}" -fi - touch "${MARKER}" rm -f "${ARTIFACT_TMP}" diff --git a/scripts/addons/codeserver/start.sh b/scripts/addons/codeserver/start.sh new file mode 100644 index 0000000..435b93e --- /dev/null +++ b/scripts/addons/codeserver/start.sh @@ -0,0 +1,109 @@ +#!/usr/bin/env bash +set -euo pipefail + +echo "[code-server] starting service" + +SERVICE_ROOT="/opt/zlh/services/code-server" +MARKER="/opt/zlh/.zlh/addons/code-server.installed" +PID_FILE="/opt/zlh/.zlh/addons/code-server.pid" +LOG_FILE="/opt/zlh/logs/code-server.log" +WORKSPACE_DIR="${CODE_SERVER_WORKSPACE:-/home/dev/workspace}" +PORT="${CODE_SERVER_PORT:-6000}" +BIN="${SERVICE_ROOT}/bin/code-server" +CONFIG_DIR="/home/dev/.config/code-server" +CONFIG_FILE="${CONFIG_DIR}/config.yaml" + +if [ ! -f "${MARKER}" ]; then + echo "[code-server][ERROR] addon marker missing at ${MARKER}" + exit 1 +fi + +if [ ! -x "${BIN}" ]; then + echo "[code-server][ERROR] binary missing or not executable at ${BIN}" + exit 1 +fi + +is_code_server_pid() { + local pid="$1" + if [ -z "${pid}" ] || [ ! -r "/proc/${pid}/cmdline" ]; then + return 1 + fi + local cmdline + cmdline="$(tr '\000' ' ' < "/proc/${pid}/cmdline")" + case "${cmdline}" in + *code-server*"0.0.0.0:${PORT}"*) return 0 ;; + *) return 1 ;; + esac +} + +find_running_pid() { + local cmdline_path pid + for cmdline_path in /proc/[0-9]*/cmdline; do + pid="$(basename "$(dirname "${cmdline_path}")")" + if is_code_server_pid "${pid}"; then + echo "${pid}" + return 0 + fi + done + return 1 +} + +mkdir -p "$(dirname "${PID_FILE}")" +mkdir -p "$(dirname "${LOG_FILE}")" +mkdir -p "${WORKSPACE_DIR}" +mkdir -p "${CONFIG_DIR}" +chown -R dev:dev "${WORKSPACE_DIR}" 2>/dev/null || true +touch "${LOG_FILE}" +chown dev:dev "${LOG_FILE}" 2>/dev/null || true +cat > "${CONFIG_FILE}" </dev/null || true + +if [ -f "${PID_FILE}" ] && is_code_server_pid "$(cat "${PID_FILE}")"; then + echo "[code-server] already running" + exit 0 +fi + +rm -f "${PID_FILE}" +echo "[code-server] action=service_launch command=\"${BIN} --bind-addr 0.0.0.0:${PORT} --auth none --disable-telemetry ${WORKSPACE_DIR}\"" + +if id dev >/dev/null 2>&1; then + pid_tmp="${PID_FILE}.tmp" + rm -f "${pid_tmp}" + su -s /bin/sh dev -c "HOME=/home/dev USER=dev LOGNAME=dev nohup '${BIN}' --bind-addr '0.0.0.0:${PORT}' --auth none --disable-telemetry '${WORKSPACE_DIR}' >'${LOG_FILE}' 2>&1 & echo \$!" > "${pid_tmp}" + mv "${pid_tmp}" "${PID_FILE}" +else + HOME="/home/dev" USER="dev" LOGNAME="dev" \ + nohup "${BIN}" --bind-addr "0.0.0.0:${PORT}" --auth none --disable-telemetry "${WORKSPACE_DIR}" >"${LOG_FILE}" 2>&1 & + echo $! > "${PID_FILE}" +fi + +sleep 2 +recorded_pid="" +if [ -f "${PID_FILE}" ]; then + recorded_pid="$(cat "${PID_FILE}")" +fi +if ! is_code_server_pid "${recorded_pid}"; then + discovered_pid="$(find_running_pid || true)" + if [ -n "${discovered_pid}" ]; then + echo "${discovered_pid}" > "${PID_FILE}" + fi +fi + +recorded_pid="" +if [ -f "${PID_FILE}" ]; then + recorded_pid="$(cat "${PID_FILE}")" +fi +if ! is_code_server_pid "${recorded_pid}"; then + echo "[code-server][ERROR] process exited after launch" + if [ -f "${LOG_FILE}" ]; then + tail -n 40 "${LOG_FILE}" || true + fi + rm -f "${PID_FILE}" + exit 1 +fi + +echo "[code-server] action=service_launch status=ok pid=$(cat "${PID_FILE}")" diff --git a/state/update.json b/state/update.json index b082969..8cd87a4 100644 --- a/state/update.json +++ b/state/update.json @@ -2,5 +2,5 @@ "status": "error", "current": "0.0.0-dev", "error": "Get \"http://10.60.0.251:8080/agents/manifest.json\": dial tcp 10.60.0.251:8080: connect: no route to host", - "checked_at_utc": "2026-04-16T18:36:27Z" + "checked_at_utc": "2026-04-18T13:52:49Z" } \ No newline at end of file