updates 4-17-26
This commit is contained in:
parent
94bcdf2e78
commit
312e350a1c
360
internal/alloy/alloy.go
Normal file
360
internal/alloy/alloy.go
Normal file
@ -0,0 +1,360 @@
|
||||
package alloy
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"errors"
|
||||
"fmt"
|
||||
"log"
|
||||
"net"
|
||||
"os"
|
||||
"os/exec"
|
||||
"sort"
|
||||
"strconv"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"zlh-agent/internal/state"
|
||||
)
|
||||
|
||||
const (
|
||||
ConfigPath = "/etc/alloy/config.alloy"
|
||||
tmpConfigPath = "/etc/alloy/config.alloy.tmp"
|
||||
metricsPort = "12345"
|
||||
unixJob = "integrations/unix"
|
||||
collector = "alloy"
|
||||
alloyServiceName = "alloy"
|
||||
updateAttempts = 3
|
||||
retryDelay = time.Second
|
||||
)
|
||||
|
||||
type Result struct {
|
||||
Applied bool
|
||||
Labels map[string]string
|
||||
}
|
||||
|
||||
var (
|
||||
writeFile = os.WriteFile
|
||||
readFile = os.ReadFile
|
||||
removeFile = os.Remove
|
||||
renameFile = os.Rename
|
||||
localIPFunc = localIPv4
|
||||
runCommand = runSystemCommand
|
||||
dialTimeout = net.DialTimeout
|
||||
sleepFunc = time.Sleep
|
||||
)
|
||||
|
||||
func EnsureConfig(cfg state.Config) (Result, error) {
|
||||
var last Result
|
||||
var lastErr error
|
||||
|
||||
for attempt := 1; attempt <= updateAttempts; attempt++ {
|
||||
result, err := ensureConfigOnce(cfg, attempt, last.Applied)
|
||||
if err == nil {
|
||||
return result, nil
|
||||
}
|
||||
last = result
|
||||
lastErr = err
|
||||
log.Printf("[alloy] vmid=%d action=update status=failed attempt=%d/%d err=%v", cfg.VMID, attempt, updateAttempts, err)
|
||||
if attempt < updateAttempts {
|
||||
sleepFunc(retryDelay)
|
||||
}
|
||||
}
|
||||
|
||||
return last, lastErr
|
||||
}
|
||||
|
||||
func ensureConfigOnce(cfg state.Config, attempt int, forceRestart bool) (Result, error) {
|
||||
log.Printf("[alloy] vmid=%d action=render status=attempt type=%s", cfg.VMID, cfg.ContainerType)
|
||||
|
||||
if !managedContainerType(cfg.ContainerType) {
|
||||
log.Printf("[alloy] vmid=%d action=render status=skipped type=%s", cfg.VMID, cfg.ContainerType)
|
||||
return Result{}, nil
|
||||
}
|
||||
|
||||
labels, err := Labels(cfg)
|
||||
if err != nil {
|
||||
log.Printf("[alloy] vmid=%d action=render status=failed err=%v", cfg.VMID, err)
|
||||
return Result{}, err
|
||||
}
|
||||
log.Printf("[alloy] vmid=%d action=render status=labels labels=%s", cfg.VMID, formatLabels(labels))
|
||||
|
||||
existing, err := readFile(ConfigPath)
|
||||
if err != nil {
|
||||
return Result{Labels: labels}, fmt.Errorf("read alloy config: %w", err)
|
||||
}
|
||||
|
||||
rendered, err := ReplaceExternalLabelsBlock(string(existing), labels)
|
||||
if err != nil {
|
||||
return Result{Labels: labels}, err
|
||||
}
|
||||
|
||||
if err := writeFile(tmpConfigPath, []byte(rendered), 0o644); err != nil {
|
||||
return Result{Labels: labels}, fmt.Errorf("write alloy temp config: %w", err)
|
||||
}
|
||||
|
||||
if bytes.Equal(existing, []byte(rendered)) {
|
||||
if removeErr := removeFile(tmpConfigPath); removeErr != nil && !errors.Is(removeErr, os.ErrNotExist) {
|
||||
return Result{Labels: labels}, fmt.Errorf("remove unchanged alloy temp config: %w", removeErr)
|
||||
}
|
||||
if forceRestart {
|
||||
log.Printf("[alloy] vmid=%d action=render status=unchanged restart_retry=true", cfg.VMID)
|
||||
if err := restartAndValidate(cfg.VMID, labels, attempt); err != nil {
|
||||
return Result{Applied: true, Labels: labels}, err
|
||||
}
|
||||
return Result{Applied: true, Labels: labels}, nil
|
||||
}
|
||||
log.Printf("[alloy] vmid=%d action=render status=unchanged", cfg.VMID)
|
||||
return Result{Applied: false, Labels: labels}, nil
|
||||
}
|
||||
|
||||
if err := renameFile(tmpConfigPath, ConfigPath); err != nil {
|
||||
_ = removeFile(tmpConfigPath)
|
||||
return Result{Labels: labels}, fmt.Errorf("replace alloy config: %w", err)
|
||||
}
|
||||
log.Printf("[alloy] vmid=%d action=render status=changed path=%s", cfg.VMID, ConfigPath)
|
||||
|
||||
if err := restartAndValidate(cfg.VMID, labels, attempt); err != nil {
|
||||
return Result{Applied: true, Labels: labels}, err
|
||||
}
|
||||
|
||||
return Result{Applied: true, Labels: labels}, nil
|
||||
}
|
||||
|
||||
func restartAndValidate(vmid int, labels map[string]string, attempt int) error {
|
||||
log.Printf("[alloy] vmid=%d action=restart status=attempt attempt=%d/%d service=%s", vmid, attempt, updateAttempts, alloyServiceName)
|
||||
if err := runCommand("systemctl", "restart", alloyServiceName); err != nil {
|
||||
log.Printf("[alloy] vmid=%d action=restart status=failed err=%v labels=%s", vmid, err, formatLabels(labels))
|
||||
return fmt.Errorf("restart alloy: %w", err)
|
||||
}
|
||||
if err := validateAlloy(); err != nil {
|
||||
log.Printf("[alloy] vmid=%d action=restart status=failed err=%v labels=%s", vmid, err, formatLabels(labels))
|
||||
return err
|
||||
}
|
||||
log.Printf("[alloy] vmid=%d action=restart status=success labels=%s", vmid, formatLabels(labels))
|
||||
return nil
|
||||
}
|
||||
|
||||
func RenderExternalLabelsBlock(labels map[string]string) string {
|
||||
order := []string{
|
||||
"job",
|
||||
"instance",
|
||||
"collector",
|
||||
"role",
|
||||
"vmid",
|
||||
}
|
||||
|
||||
var b strings.Builder
|
||||
b.WriteString(" external_labels = {\n")
|
||||
for _, key := range order {
|
||||
value, ok := labels[key]
|
||||
if !ok {
|
||||
continue
|
||||
}
|
||||
fmt.Fprintf(&b, " %-9s = \"%s\",\n", key, escapeAlloyString(value))
|
||||
}
|
||||
b.WriteString(" }\n")
|
||||
|
||||
return b.String()
|
||||
}
|
||||
|
||||
func ReplaceExternalLabelsBlock(config string, labels map[string]string) (string, error) {
|
||||
start, end, err := externalLabelsRange(config)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
return config[:start] + RenderExternalLabelsBlock(labels) + config[end:], nil
|
||||
}
|
||||
|
||||
func Labels(cfg state.Config) (map[string]string, error) {
|
||||
containerType := strings.ToLower(strings.TrimSpace(cfg.ContainerType))
|
||||
if !managedContainerType(containerType) {
|
||||
return nil, fmt.Errorf("unsupported alloy container type: %q", cfg.ContainerType)
|
||||
}
|
||||
|
||||
if cfg.VMID <= 0 {
|
||||
return nil, fmt.Errorf("missing required container metadata: vmid")
|
||||
}
|
||||
|
||||
ip := strings.TrimSpace(cfg.ContainerIP)
|
||||
if ip == "" {
|
||||
var err error
|
||||
ip, err = localIPFunc()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
if parsed := net.ParseIP(ip); parsed == nil {
|
||||
return nil, fmt.Errorf("invalid container_ip metadata: %q", ip)
|
||||
}
|
||||
|
||||
role := "game-container"
|
||||
if containerType == "dev" {
|
||||
role = "dev-container"
|
||||
}
|
||||
|
||||
labels := map[string]string{
|
||||
"job": unixJob,
|
||||
"instance": net.JoinHostPort(ip, metricsPort),
|
||||
"collector": collector,
|
||||
"role": role,
|
||||
"vmid": strconv.Itoa(cfg.VMID),
|
||||
}
|
||||
|
||||
return labels, nil
|
||||
}
|
||||
|
||||
func managedContainerType(containerType string) bool {
|
||||
switch strings.ToLower(strings.TrimSpace(containerType)) {
|
||||
case "game", "dev":
|
||||
return true
|
||||
default:
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
func formatLabels(labels map[string]string) string {
|
||||
keys := make([]string, 0, len(labels))
|
||||
for key := range labels {
|
||||
keys = append(keys, key)
|
||||
}
|
||||
sort.Strings(keys)
|
||||
|
||||
parts := make([]string, 0, len(keys))
|
||||
for _, key := range keys {
|
||||
parts = append(parts, key+"="+strconv.Quote(labels[key]))
|
||||
}
|
||||
return strings.Join(parts, " ")
|
||||
}
|
||||
|
||||
func externalLabelsRange(config string) (int, int, error) {
|
||||
const marker = "external_labels"
|
||||
|
||||
start := strings.Index(config, marker)
|
||||
if start < 0 {
|
||||
return 0, 0, fmt.Errorf("alloy config missing external_labels block")
|
||||
}
|
||||
if next := strings.Index(config[start+len(marker):], marker); next >= 0 {
|
||||
return 0, 0, fmt.Errorf("alloy config contains multiple external_labels blocks")
|
||||
}
|
||||
|
||||
eq := strings.Index(config[start+len(marker):], "=")
|
||||
if eq < 0 {
|
||||
return 0, 0, fmt.Errorf("alloy external_labels block missing assignment")
|
||||
}
|
||||
eq += start + len(marker)
|
||||
|
||||
open := strings.Index(config[eq+1:], "{")
|
||||
if open < 0 {
|
||||
return 0, 0, fmt.Errorf("alloy external_labels block missing opening brace")
|
||||
}
|
||||
open += eq + 1
|
||||
|
||||
end, err := matchingBraceEnd(config, open)
|
||||
if err != nil {
|
||||
return 0, 0, err
|
||||
}
|
||||
|
||||
lineStart := strings.LastIndex(config[:start], "\n")
|
||||
if lineStart < 0 {
|
||||
lineStart = 0
|
||||
} else {
|
||||
lineStart++
|
||||
}
|
||||
if strings.TrimSpace(config[lineStart:start]) == "" {
|
||||
start = lineStart
|
||||
}
|
||||
if end < len(config) && config[end] == '\n' {
|
||||
end++
|
||||
}
|
||||
|
||||
return start, end, nil
|
||||
}
|
||||
|
||||
func matchingBraceEnd(config string, open int) (int, error) {
|
||||
depth := 0
|
||||
inString := false
|
||||
escaped := false
|
||||
|
||||
for i := open; i < len(config); i++ {
|
||||
ch := config[i]
|
||||
if inString {
|
||||
if escaped {
|
||||
escaped = false
|
||||
continue
|
||||
}
|
||||
if ch == '\\' {
|
||||
escaped = true
|
||||
continue
|
||||
}
|
||||
if ch == '"' {
|
||||
inString = false
|
||||
}
|
||||
continue
|
||||
}
|
||||
|
||||
switch ch {
|
||||
case '"':
|
||||
inString = true
|
||||
case '{':
|
||||
depth++
|
||||
case '}':
|
||||
depth--
|
||||
if depth == 0 {
|
||||
return i + 1, nil
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return 0, fmt.Errorf("alloy external_labels block missing closing brace")
|
||||
}
|
||||
|
||||
func escapeAlloyString(value string) string {
|
||||
value = strings.ReplaceAll(value, "\\", "\\\\")
|
||||
return strings.ReplaceAll(value, "\"", "\\\"")
|
||||
}
|
||||
|
||||
func validateAlloy() error {
|
||||
if err := runCommand("systemctl", "is-active", "--quiet", alloyServiceName); err != nil {
|
||||
return fmt.Errorf("alloy service is not active: %w", err)
|
||||
}
|
||||
|
||||
conn, err := dialTimeout("tcp", net.JoinHostPort("127.0.0.1", metricsPort), 3*time.Second)
|
||||
if err != nil {
|
||||
return fmt.Errorf("alloy is not listening on :%s: %w", metricsPort, err)
|
||||
}
|
||||
_ = conn.Close()
|
||||
return nil
|
||||
}
|
||||
|
||||
func runSystemCommand(name string, args ...string) error {
|
||||
out, err := exec.Command(name, args...).CombinedOutput()
|
||||
if err != nil {
|
||||
return fmt.Errorf("%s %s: %w: %s", name, strings.Join(args, " "), err, strings.TrimSpace(string(out)))
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func localIPv4() (string, error) {
|
||||
addrs, err := net.InterfaceAddrs()
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("read local addresses: %w", err)
|
||||
}
|
||||
|
||||
candidates := make([]string, 0)
|
||||
for _, addr := range addrs {
|
||||
ipNet, ok := addr.(*net.IPNet)
|
||||
if !ok {
|
||||
continue
|
||||
}
|
||||
ip := ipNet.IP.To4()
|
||||
if ip == nil || ip.IsLoopback() || ip.IsLinkLocalUnicast() {
|
||||
continue
|
||||
}
|
||||
candidates = append(candidates, ip.String())
|
||||
}
|
||||
if len(candidates) == 0 {
|
||||
return "", fmt.Errorf("missing required container metadata: non-loopback IPv4")
|
||||
}
|
||||
sort.Strings(candidates)
|
||||
return candidates[0], nil
|
||||
}
|
||||
347
internal/alloy/alloy_test.go
Normal file
347
internal/alloy/alloy_test.go
Normal file
@ -0,0 +1,347 @@
|
||||
package alloy
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"net"
|
||||
"os"
|
||||
"strings"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"zlh-agent/internal/state"
|
||||
)
|
||||
|
||||
const templateConfig = `logging {
|
||||
level = "info"
|
||||
}
|
||||
|
||||
prometheus.exporter.unix "local" {}
|
||||
|
||||
prometheus.scrape "local" {
|
||||
targets = prometheus.exporter.unix.local.targets
|
||||
forward_to = [prometheus.remote_write.zlh_monitor.receiver]
|
||||
}
|
||||
|
||||
prometheus.remote_write "zlh_monitor" {
|
||||
endpoint {
|
||||
url = "http://10.60.0.25:9090/api/v1/write"
|
||||
}
|
||||
|
||||
external_labels = {
|
||||
job = "old",
|
||||
instance = "old",
|
||||
collector = "old",
|
||||
role = "old",
|
||||
vmid = "old",
|
||||
}
|
||||
}
|
||||
`
|
||||
|
||||
func TestReplaceExternalLabelsBlockPreservesTemplate(t *testing.T) {
|
||||
labels := map[string]string{
|
||||
"job": "integrations/unix",
|
||||
"instance": "10.200.0.46:12345",
|
||||
"collector": "alloy",
|
||||
"role": "game-container",
|
||||
"vmid": "5173",
|
||||
}
|
||||
|
||||
rendered, err := ReplaceExternalLabelsBlock(templateConfig, labels)
|
||||
if err != nil {
|
||||
t.Fatalf("ReplaceExternalLabelsBlock: %v", err)
|
||||
}
|
||||
|
||||
if !strings.Contains(rendered, `url = "http://10.60.0.25:9090/api/v1/write"`) {
|
||||
t.Fatalf("rendered config did not preserve template body:\n%s", rendered)
|
||||
}
|
||||
if !strings.Contains(rendered, `job = "integrations/unix",`) {
|
||||
t.Fatalf("rendered config missing job label:\n%s", rendered)
|
||||
}
|
||||
if !strings.Contains(rendered, `instance = "10.200.0.46:12345",`) {
|
||||
t.Fatalf("rendered config missing instance label:\n%s", rendered)
|
||||
}
|
||||
if strings.Contains(rendered, `job = "old"`) {
|
||||
t.Fatalf("rendered config retained old labels:\n%s", rendered)
|
||||
}
|
||||
}
|
||||
|
||||
func TestLabelsDevAndGame(t *testing.T) {
|
||||
restoreTestHooks(t)
|
||||
|
||||
devLabels, err := Labels(state.Config{
|
||||
VMID: 6001,
|
||||
ContainerIP: "10.60.0.223",
|
||||
ContainerType: "dev",
|
||||
})
|
||||
if err != nil {
|
||||
t.Fatalf("Labels dev: %v", err)
|
||||
}
|
||||
assertLabel(t, devLabels, "instance", "10.60.0.223:12345")
|
||||
assertLabel(t, devLabels, "role", "dev-container")
|
||||
assertLabel(t, devLabels, "vmid", "6001")
|
||||
|
||||
gameLabels, err := Labels(state.Config{
|
||||
VMID: 5001,
|
||||
ContainerIP: "10.60.0.224",
|
||||
ContainerType: "game",
|
||||
})
|
||||
if err != nil {
|
||||
t.Fatalf("Labels game: %v", err)
|
||||
}
|
||||
assertLabel(t, gameLabels, "instance", "10.60.0.224:12345")
|
||||
assertLabel(t, gameLabels, "role", "game-container")
|
||||
assertLabel(t, gameLabels, "vmid", "5001")
|
||||
}
|
||||
|
||||
func TestEnsureConfigUnchangedDoesNotRestart(t *testing.T) {
|
||||
restoreTestHooks(t)
|
||||
|
||||
cfg := state.Config{
|
||||
VMID: 6001,
|
||||
ContainerIP: "10.60.0.223",
|
||||
ContainerType: "dev",
|
||||
}
|
||||
labels, err := Labels(cfg)
|
||||
if err != nil {
|
||||
t.Fatalf("Labels: %v", err)
|
||||
}
|
||||
rendered, err := ReplaceExternalLabelsBlock(templateConfig, labels)
|
||||
if err != nil {
|
||||
t.Fatalf("ReplaceExternalLabelsBlock: %v", err)
|
||||
}
|
||||
|
||||
restartCalls := 0
|
||||
removeCalls := 0
|
||||
writeFile = func(path string, data []byte, perm os.FileMode) error {
|
||||
if path != tmpConfigPath {
|
||||
t.Fatalf("write path = %q, want %q", path, tmpConfigPath)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
readFile = func(path string) ([]byte, error) {
|
||||
if path != ConfigPath {
|
||||
t.Fatalf("read path = %q, want %q", path, ConfigPath)
|
||||
}
|
||||
return []byte(rendered), nil
|
||||
}
|
||||
removeFile = func(path string) error {
|
||||
removeCalls++
|
||||
if path != tmpConfigPath {
|
||||
t.Fatalf("remove path = %q, want %q", path, tmpConfigPath)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
renameFile = func(string, string) error {
|
||||
t.Fatalf("rename should not be called for unchanged config")
|
||||
return nil
|
||||
}
|
||||
runCommand = func(string, ...string) error {
|
||||
restartCalls++
|
||||
return nil
|
||||
}
|
||||
|
||||
result, err := EnsureConfig(cfg)
|
||||
if err != nil {
|
||||
t.Fatalf("EnsureConfig: %v", err)
|
||||
}
|
||||
if result.Applied {
|
||||
t.Fatalf("Applied = true, want false")
|
||||
}
|
||||
if removeCalls != 1 {
|
||||
t.Fatalf("removeCalls = %d, want 1", removeCalls)
|
||||
}
|
||||
if restartCalls != 0 {
|
||||
t.Fatalf("restartCalls = %d, want 0", restartCalls)
|
||||
}
|
||||
}
|
||||
|
||||
func TestEnsureConfigChangedRestartsAndValidates(t *testing.T) {
|
||||
restoreTestHooks(t)
|
||||
|
||||
restartCalls := 0
|
||||
activeChecks := 0
|
||||
listenChecks := 0
|
||||
writeFile = func(path string, data []byte, perm os.FileMode) error {
|
||||
if path != tmpConfigPath {
|
||||
t.Fatalf("write path = %q, want %q", path, tmpConfigPath)
|
||||
}
|
||||
if !strings.Contains(string(data), `role = "game-container",`) {
|
||||
t.Fatalf("temp config missing rendered labels:\n%s", string(data))
|
||||
}
|
||||
return nil
|
||||
}
|
||||
readFile = func(path string) ([]byte, error) {
|
||||
return []byte(templateConfig), nil
|
||||
}
|
||||
removeFile = func(string) error { return nil }
|
||||
renameFile = func(oldPath, newPath string) error {
|
||||
if oldPath != tmpConfigPath || newPath != ConfigPath {
|
||||
t.Fatalf("rename = %q -> %q, want %q -> %q", oldPath, newPath, tmpConfigPath, ConfigPath)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
runCommand = func(name string, args ...string) error {
|
||||
if name != "systemctl" {
|
||||
t.Fatalf("command name = %q, want systemctl", name)
|
||||
}
|
||||
joined := strings.Join(args, " ")
|
||||
switch joined {
|
||||
case "restart alloy":
|
||||
restartCalls++
|
||||
case "is-active --quiet alloy":
|
||||
activeChecks++
|
||||
default:
|
||||
t.Fatalf("unexpected systemctl args: %s", joined)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
dialTimeout = func(network, address string, timeout time.Duration) (net.Conn, error) {
|
||||
if network != "tcp" || address != "127.0.0.1:12345" {
|
||||
t.Fatalf("dial = %s %s, want tcp 127.0.0.1:12345", network, address)
|
||||
}
|
||||
listenChecks++
|
||||
left, right := net.Pipe()
|
||||
_ = right.Close()
|
||||
return left, nil
|
||||
}
|
||||
|
||||
result, err := EnsureConfig(state.Config{
|
||||
VMID: 5001,
|
||||
ContainerIP: "10.60.0.224",
|
||||
ContainerType: "game",
|
||||
})
|
||||
if err != nil {
|
||||
t.Fatalf("EnsureConfig: %v", err)
|
||||
}
|
||||
if !result.Applied {
|
||||
t.Fatalf("Applied = false, want true")
|
||||
}
|
||||
if restartCalls != 1 {
|
||||
t.Fatalf("restartCalls = %d, want 1", restartCalls)
|
||||
}
|
||||
if activeChecks != 1 {
|
||||
t.Fatalf("activeChecks = %d, want 1", activeChecks)
|
||||
}
|
||||
if listenChecks != 1 {
|
||||
t.Fatalf("listenChecks = %d, want 1", listenChecks)
|
||||
}
|
||||
}
|
||||
|
||||
func TestEnsureConfigRetriesFailedUpdate(t *testing.T) {
|
||||
restoreTestHooks(t)
|
||||
|
||||
attempts := 0
|
||||
readFile = func(path string) ([]byte, error) {
|
||||
attempts++
|
||||
if attempts == 1 {
|
||||
return nil, errors.New("temporary read failure")
|
||||
}
|
||||
return []byte(templateConfig), nil
|
||||
}
|
||||
writeFile = func(string, []byte, os.FileMode) error { return nil }
|
||||
removeFile = func(string) error { return nil }
|
||||
renameFile = func(string, string) error { return nil }
|
||||
runCommand = func(string, ...string) error { return nil }
|
||||
|
||||
result, err := EnsureConfig(state.Config{
|
||||
VMID: 5001,
|
||||
ContainerIP: "10.60.0.224",
|
||||
ContainerType: "game",
|
||||
})
|
||||
if err != nil {
|
||||
t.Fatalf("EnsureConfig: %v", err)
|
||||
}
|
||||
if !result.Applied {
|
||||
t.Fatalf("Applied = false, want true after retry")
|
||||
}
|
||||
if attempts != 2 {
|
||||
t.Fatalf("attempts = %d, want 2", attempts)
|
||||
}
|
||||
}
|
||||
|
||||
func TestEnsureConfigRetriesRestartAfterFileChanged(t *testing.T) {
|
||||
restoreTestHooks(t)
|
||||
|
||||
restarts := 0
|
||||
readFile = func(path string) ([]byte, error) {
|
||||
if restarts == 0 {
|
||||
return []byte(templateConfig), nil
|
||||
}
|
||||
labels, err := Labels(state.Config{VMID: 5001, ContainerIP: "10.60.0.224", ContainerType: "game"})
|
||||
if err != nil {
|
||||
t.Fatalf("Labels: %v", err)
|
||||
}
|
||||
rendered, err := ReplaceExternalLabelsBlock(templateConfig, labels)
|
||||
if err != nil {
|
||||
t.Fatalf("ReplaceExternalLabelsBlock: %v", err)
|
||||
}
|
||||
return []byte(rendered), nil
|
||||
}
|
||||
writeFile = func(string, []byte, os.FileMode) error { return nil }
|
||||
removeFile = func(string) error { return nil }
|
||||
renameFile = func(string, string) error { return nil }
|
||||
runCommand = func(name string, args ...string) error {
|
||||
if name == "systemctl" && strings.Join(args, " ") == "restart alloy" {
|
||||
restarts++
|
||||
if restarts == 1 {
|
||||
return errors.New("temporary restart failure")
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
result, err := EnsureConfig(state.Config{
|
||||
VMID: 5001,
|
||||
ContainerIP: "10.60.0.224",
|
||||
ContainerType: "game",
|
||||
})
|
||||
if err != nil {
|
||||
t.Fatalf("EnsureConfig: %v", err)
|
||||
}
|
||||
if !result.Applied {
|
||||
t.Fatalf("Applied = false, want true after restart retry")
|
||||
}
|
||||
if restarts != 2 {
|
||||
t.Fatalf("restarts = %d, want 2", restarts)
|
||||
}
|
||||
}
|
||||
|
||||
func assertLabel(t *testing.T, labels map[string]string, key, want string) {
|
||||
t.Helper()
|
||||
if got := labels[key]; got != want {
|
||||
t.Fatalf("labels[%q] = %q, want %q", key, got, want)
|
||||
}
|
||||
}
|
||||
|
||||
func restoreTestHooks(t *testing.T) {
|
||||
t.Helper()
|
||||
|
||||
oldLocalIP := localIPFunc
|
||||
oldRunCommand := runCommand
|
||||
oldDialTimeout := dialTimeout
|
||||
oldSleep := sleepFunc
|
||||
oldWriteFile := writeFile
|
||||
oldReadFile := readFile
|
||||
oldRemoveFile := removeFile
|
||||
oldRenameFile := renameFile
|
||||
|
||||
t.Cleanup(func() {
|
||||
localIPFunc = oldLocalIP
|
||||
runCommand = oldRunCommand
|
||||
dialTimeout = oldDialTimeout
|
||||
sleepFunc = oldSleep
|
||||
writeFile = oldWriteFile
|
||||
readFile = oldReadFile
|
||||
removeFile = oldRemoveFile
|
||||
renameFile = oldRenameFile
|
||||
})
|
||||
|
||||
localIPFunc = func() (string, error) { return "10.60.0.200", nil }
|
||||
runCommand = func(string, ...string) error { return nil }
|
||||
dialTimeout = func(string, string, time.Duration) (net.Conn, error) {
|
||||
left, right := net.Pipe()
|
||||
_ = right.Close()
|
||||
return left, nil
|
||||
}
|
||||
sleepFunc = func(time.Duration) {}
|
||||
}
|
||||
@ -12,6 +12,7 @@ import (
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"zlh-agent/internal/alloy"
|
||||
agentfiles "zlh-agent/internal/files"
|
||||
agenthandlers "zlh-agent/internal/handlers"
|
||||
mcstatus "zlh-agent/internal/minecraft"
|
||||
@ -49,6 +50,17 @@ func dirExists(path string) bool {
|
||||
return err == nil && s.IsDir()
|
||||
}
|
||||
|
||||
var (
|
||||
provisionAll = provision.ProvisionAll
|
||||
devIsProvisioned = devcontainer.IsProvisioned
|
||||
devRuntimeInstalled = devcontainer.RuntimeInstalled
|
||||
codeServerInstall = codeserver.Install
|
||||
codeServerStart = codeserver.Start
|
||||
codeServerVerify = codeserver.Verify
|
||||
codeServerInstalled = codeserver.Installed
|
||||
codeServerRunning = codeserver.Running
|
||||
)
|
||||
|
||||
func lifecycleLog(cfg *state.Config, phase string, attempt int, started time.Time, format string, args ...any) {
|
||||
elapsed := time.Since(started).Milliseconds()
|
||||
msg := fmt.Sprintf(format, args...)
|
||||
@ -96,7 +108,7 @@ func runProvisionPipeline(cfg *state.Config) error {
|
||||
state.SetState(state.StateInstalling)
|
||||
state.SetInstallStep("provision_all")
|
||||
|
||||
if err := provision.ProvisionAll(*cfg); err != nil {
|
||||
if err := provisionAll(*cfg); err != nil {
|
||||
state.SetError(err)
|
||||
state.SetState(state.StateError)
|
||||
return err
|
||||
@ -125,7 +137,7 @@ func ensureProvisioned(cfg *state.Config) error {
|
||||
|
||||
if cfg.ContainerType == "dev" {
|
||||
|
||||
if !devcontainer.IsProvisioned() || !devcontainer.RuntimeInstalled(cfg.Runtime, cfg.Version) {
|
||||
if !devIsProvisioned() || !devRuntimeInstalled(cfg.Runtime, cfg.Version) {
|
||||
if err := runProvisionPipeline(cfg); err != nil {
|
||||
return err
|
||||
}
|
||||
@ -152,6 +164,10 @@ func ensureProvisioned(cfg *state.Config) error {
|
||||
return err
|
||||
}
|
||||
|
||||
if err := ensureDevCodeServer(cfg); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
state.SetState(state.StateIdle)
|
||||
state.SetError(nil)
|
||||
return nil
|
||||
@ -188,6 +204,42 @@ func ensureProvisioned(cfg *state.Config) error {
|
||||
return runProvisionPipeline(cfg)
|
||||
}
|
||||
|
||||
func ensureDevCodeServer(cfg *state.Config) error {
|
||||
if cfg == nil || !devCodeServerRequested(*cfg) {
|
||||
return nil
|
||||
}
|
||||
|
||||
if !codeServerInstalled() {
|
||||
if err := codeServerInstall(*cfg); err != nil {
|
||||
return fmt.Errorf("code-server install failed: %w", err)
|
||||
}
|
||||
}
|
||||
if !codeServerRunning() {
|
||||
if err := codeServerStart(*cfg); err != nil {
|
||||
return fmt.Errorf("code-server start failed: %w", err)
|
||||
}
|
||||
}
|
||||
if err := codeServerVerify(); err != nil {
|
||||
return fmt.Errorf("code-server verification failed: %w", err)
|
||||
}
|
||||
if !codeServerRunning() {
|
||||
return fmt.Errorf("code-server did not stay running")
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func devCodeServerRequested(cfg state.Config) bool {
|
||||
if cfg.EnableCodeServer {
|
||||
return true
|
||||
}
|
||||
for _, addon := range cfg.Addons {
|
||||
if strings.EqualFold(addon, "codeserver") {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
/*
|
||||
--------------------------------------------------------------------------
|
||||
/config — the REAL provisioning trigger (async)
|
||||
@ -220,6 +272,9 @@ func handleConfig(w http.ResponseWriter, r *http.Request) {
|
||||
http.Error(w, "save config failed: "+err.Error(), http.StatusInternalServerError)
|
||||
return
|
||||
}
|
||||
if _, err := alloy.EnsureConfig(cfg); err != nil {
|
||||
log.Printf("[http] vmid=%d action=config status=alloy_failed non_fatal=true err=%v", cfg.VMID, err)
|
||||
}
|
||||
|
||||
go func(c state.Config) {
|
||||
defer endOp()
|
||||
|
||||
125
internal/http/agent_test.go
Normal file
125
internal/http/agent_test.go
Normal file
@ -0,0 +1,125 @@
|
||||
package agenthttp
|
||||
|
||||
import (
|
||||
"testing"
|
||||
|
||||
"zlh-agent/internal/state"
|
||||
)
|
||||
|
||||
func TestEnsureDevCodeServerInstallsAndStartsWhenRequested(t *testing.T) {
|
||||
restoreCodeServerTestHooks(t)
|
||||
|
||||
cfg := &state.Config{
|
||||
ContainerType: "dev",
|
||||
Runtime: "node",
|
||||
EnableCodeServer: true,
|
||||
}
|
||||
|
||||
installed := false
|
||||
running := false
|
||||
installCalls := 0
|
||||
startCalls := 0
|
||||
verifyCalls := 0
|
||||
|
||||
codeServerInstalled = func() bool { return installed }
|
||||
codeServerRunning = func() bool { return running }
|
||||
codeServerInstall = func(state.Config) error {
|
||||
installCalls++
|
||||
installed = true
|
||||
return nil
|
||||
}
|
||||
codeServerStart = func(state.Config) error {
|
||||
startCalls++
|
||||
running = true
|
||||
return nil
|
||||
}
|
||||
codeServerVerify = func() error {
|
||||
verifyCalls++
|
||||
return nil
|
||||
}
|
||||
|
||||
if err := ensureDevCodeServer(cfg); err != nil {
|
||||
t.Fatalf("ensureDevCodeServer: %v", err)
|
||||
}
|
||||
if installCalls != 1 {
|
||||
t.Fatalf("installCalls = %d, want 1", installCalls)
|
||||
}
|
||||
if startCalls != 1 {
|
||||
t.Fatalf("startCalls = %d, want 1", startCalls)
|
||||
}
|
||||
if verifyCalls != 1 {
|
||||
t.Fatalf("verifyCalls = %d, want 1", verifyCalls)
|
||||
}
|
||||
}
|
||||
|
||||
func TestEnsureDevCodeServerStartsInstalledStoppedAddon(t *testing.T) {
|
||||
restoreCodeServerTestHooks(t)
|
||||
|
||||
cfg := &state.Config{
|
||||
ContainerType: "dev",
|
||||
Runtime: "node",
|
||||
Addons: []string{"codeserver"},
|
||||
}
|
||||
|
||||
running := false
|
||||
installCalls := 0
|
||||
startCalls := 0
|
||||
|
||||
codeServerInstalled = func() bool { return true }
|
||||
codeServerRunning = func() bool { return running }
|
||||
codeServerInstall = func(state.Config) error {
|
||||
installCalls++
|
||||
return nil
|
||||
}
|
||||
codeServerStart = func(state.Config) error {
|
||||
startCalls++
|
||||
running = true
|
||||
return nil
|
||||
}
|
||||
codeServerVerify = func() error { return nil }
|
||||
|
||||
if err := ensureDevCodeServer(cfg); err != nil {
|
||||
t.Fatalf("ensureDevCodeServer: %v", err)
|
||||
}
|
||||
if installCalls != 0 {
|
||||
t.Fatalf("installCalls = %d, want 0", installCalls)
|
||||
}
|
||||
if startCalls != 1 {
|
||||
t.Fatalf("startCalls = %d, want 1", startCalls)
|
||||
}
|
||||
}
|
||||
|
||||
func TestEnsureDevCodeServerSkipsWhenNotRequested(t *testing.T) {
|
||||
restoreCodeServerTestHooks(t)
|
||||
|
||||
called := false
|
||||
codeServerInstalled = func() bool {
|
||||
called = true
|
||||
return false
|
||||
}
|
||||
|
||||
if err := ensureDevCodeServer(&state.Config{ContainerType: "dev", Runtime: "node"}); err != nil {
|
||||
t.Fatalf("ensureDevCodeServer: %v", err)
|
||||
}
|
||||
if called {
|
||||
t.Fatalf("code-server hooks were called for config without code-server request")
|
||||
}
|
||||
}
|
||||
|
||||
func restoreCodeServerTestHooks(t *testing.T) {
|
||||
t.Helper()
|
||||
|
||||
oldInstall := codeServerInstall
|
||||
oldStart := codeServerStart
|
||||
oldVerify := codeServerVerify
|
||||
oldInstalled := codeServerInstalled
|
||||
oldRunning := codeServerRunning
|
||||
|
||||
t.Cleanup(func() {
|
||||
codeServerInstall = oldInstall
|
||||
codeServerStart = oldStart
|
||||
codeServerVerify = oldVerify
|
||||
codeServerInstalled = oldInstalled
|
||||
codeServerRunning = oldRunning
|
||||
})
|
||||
}
|
||||
@ -31,7 +31,7 @@ func Running() bool {
|
||||
pid, convErr := strconv.Atoi(strings.TrimSpace(string(raw)))
|
||||
if convErr == nil && pid > 0 {
|
||||
process, findErr := os.FindProcess(pid)
|
||||
if findErr == nil && process.Signal(syscall.Signal(0)) == nil {
|
||||
if findErr == nil && process.Signal(syscall.Signal(0)) == nil && pidMatchesCodeServer(pid) {
|
||||
return true
|
||||
}
|
||||
}
|
||||
@ -79,7 +79,7 @@ func findRunningPID() (int, error) {
|
||||
if !strings.Contains(cmdline, "code-server") {
|
||||
continue
|
||||
}
|
||||
if !strings.Contains(cmdline, "--bind-addr 0.0.0.0:6000") {
|
||||
if !strings.Contains(cmdline, "0.0.0.0:6000") {
|
||||
continue
|
||||
}
|
||||
|
||||
@ -94,16 +94,33 @@ func findRunningPID() (int, error) {
|
||||
return 0, fmt.Errorf("code-server process not found")
|
||||
}
|
||||
|
||||
func pidMatchesCodeServer(pid int) bool {
|
||||
if pid <= 0 {
|
||||
return false
|
||||
}
|
||||
raw, err := os.ReadFile(filepath.Join("/proc", strconv.Itoa(pid), "cmdline"))
|
||||
if err != nil || len(raw) == 0 {
|
||||
return false
|
||||
}
|
||||
cmdline := strings.ReplaceAll(string(raw), "\x00", " ")
|
||||
return strings.Contains(cmdline, "code-server") &&
|
||||
strings.Contains(cmdline, "0.0.0.0:6000")
|
||||
}
|
||||
|
||||
func Start(cfg state.Config) error {
|
||||
if !Installed() {
|
||||
return fmt.Errorf("code-server addon not installed")
|
||||
}
|
||||
if Running() {
|
||||
if err := Stop(); err != nil {
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
}
|
||||
return executil.RunEmbeddedScript("addons/codeserver/install.sh", launchEnv(cfg)...)
|
||||
if err := executil.RunEmbeddedScript("addons/codeserver/start.sh", launchEnv(cfg)...); err != nil {
|
||||
return err
|
||||
}
|
||||
if !Running() {
|
||||
return fmt.Errorf("code-server did not stay running")
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func Stop() error {
|
||||
|
||||
@ -151,10 +151,10 @@ func ProvisionAll(cfg state.Config) error {
|
||||
/* ---------------------------------------------------------
|
||||
ADDONS (OPTIONAL, ROLE-AGNOSTIC)
|
||||
--------------------------------------------------------- */
|
||||
if cfg.ContainerType == "dev" && cfg.EnableCodeServer {
|
||||
if strings.EqualFold(cfg.ContainerType, "dev") && codeServerRequested(cfg) {
|
||||
seen := false
|
||||
for _, addon := range cfg.Addons {
|
||||
if addon == "codeserver" {
|
||||
if strings.EqualFold(addon, "codeserver") {
|
||||
seen = true
|
||||
break
|
||||
}
|
||||
@ -173,6 +173,18 @@ func ProvisionAll(cfg state.Config) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func codeServerRequested(cfg state.Config) bool {
|
||||
if cfg.EnableCodeServer {
|
||||
return true
|
||||
}
|
||||
for _, addon := range cfg.Addons {
|
||||
if strings.EqualFold(addon, "codeserver") {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func normalizeMinecraftConfig(cfg *state.Config) {
|
||||
if cfg == nil {
|
||||
return
|
||||
|
||||
@ -18,6 +18,7 @@ type Config struct {
|
||||
VMID int `json:"vmid"`
|
||||
|
||||
// Container identity
|
||||
ContainerIP string `json:"container_ip,omitempty"`
|
||||
ContainerType string `json:"container_type,omitempty"`
|
||||
|
||||
// Dev runtime (only for dev containers)
|
||||
|
||||
@ -359,3 +359,96 @@
|
||||
2026/04/16 17:36:30 [update] notify check failed status=error current=0.0.0-dev target= err=Get "http://10.60.0.251:8080/agents/manifest.json": dial tcp 10.60.0.251:8080: connect: no route to host
|
||||
2026/04/16 18:06:30 [update] notify check failed status=error current=0.0.0-dev target= err=Get "http://10.60.0.251:8080/agents/manifest.json": dial tcp 10.60.0.251:8080: connect: no route to host
|
||||
2026/04/16 18:36:30 [update] notify check failed status=error current=0.0.0-dev target= err=Get "http://10.60.0.251:8080/agents/manifest.json": dial tcp 10.60.0.251:8080: connect: no route to host
|
||||
2026/04/16 19:06:30 [update] notify check failed status=error current=0.0.0-dev target= err=Get "http://10.60.0.251:8080/agents/manifest.json": dial tcp 10.60.0.251:8080: connect: no route to host
|
||||
2026/04/16 19:36:30 [update] notify check failed status=error current=0.0.0-dev target= err=Get "http://10.60.0.251:8080/agents/manifest.json": dial tcp 10.60.0.251:8080: connect: no route to host
|
||||
2026/04/16 20:06:30 [update] notify check failed status=error current=0.0.0-dev target= err=Get "http://10.60.0.251:8080/agents/manifest.json": dial tcp 10.60.0.251:8080: connect: no route to host
|
||||
2026/04/16 20:36:30 [update] notify check failed status=error current=0.0.0-dev target= err=Get "http://10.60.0.251:8080/agents/manifest.json": dial tcp 10.60.0.251:8080: connect: no route to host
|
||||
2026/04/16 21:06:30 [update] notify check failed status=error current=0.0.0-dev target= err=Get "http://10.60.0.251:8080/agents/manifest.json": dial tcp 10.60.0.251:8080: connect: no route to host
|
||||
2026/04/16 21:36:30 [update] notify check failed status=error current=0.0.0-dev target= err=Get "http://10.60.0.251:8080/agents/manifest.json": dial tcp 10.60.0.251:8080: connect: no route to host
|
||||
2026/04/16 22:06:30 [update] notify check failed status=error current=0.0.0-dev target= err=Get "http://10.60.0.251:8080/agents/manifest.json": dial tcp 10.60.0.251:8080: connect: no route to host
|
||||
2026/04/16 22:36:30 [update] notify check failed status=error current=0.0.0-dev target= err=Get "http://10.60.0.251:8080/agents/manifest.json": dial tcp 10.60.0.251:8080: connect: no route to host
|
||||
2026/04/16 23:06:30 [update] notify check failed status=error current=0.0.0-dev target= err=Get "http://10.60.0.251:8080/agents/manifest.json": dial tcp 10.60.0.251:8080: connect: no route to host
|
||||
2026/04/16 23:36:30 [update] notify check failed status=error current=0.0.0-dev target= err=Get "http://10.60.0.251:8080/agents/manifest.json": dial tcp 10.60.0.251:8080: connect: no route to host
|
||||
2026/04/17 00:06:30 [update] notify check failed status=error current=0.0.0-dev target= err=Get "http://10.60.0.251:8080/agents/manifest.json": dial tcp 10.60.0.251:8080: connect: no route to host
|
||||
2026/04/17 00:36:30 [update] notify check failed status=error current=0.0.0-dev target= err=Get "http://10.60.0.251:8080/agents/manifest.json": dial tcp 10.60.0.251:8080: connect: no route to host
|
||||
2026/04/17 01:06:30 [update] notify check failed status=error current=0.0.0-dev target= err=Get "http://10.60.0.251:8080/agents/manifest.json": dial tcp 10.60.0.251:8080: connect: no route to host
|
||||
2026/04/17 01:36:30 [update] notify check failed status=error current=0.0.0-dev target= err=Get "http://10.60.0.251:8080/agents/manifest.json": dial tcp 10.60.0.251:8080: connect: no route to host
|
||||
2026/04/17 02:06:30 [update] notify check failed status=error current=0.0.0-dev target= err=Get "http://10.60.0.251:8080/agents/manifest.json": dial tcp 10.60.0.251:8080: connect: no route to host
|
||||
2026/04/17 02:36:30 [update] notify check failed status=error current=0.0.0-dev target= err=Get "http://10.60.0.251:8080/agents/manifest.json": dial tcp 10.60.0.251:8080: connect: no route to host
|
||||
2026/04/17 03:06:30 [update] notify check failed status=error current=0.0.0-dev target= err=Get "http://10.60.0.251:8080/agents/manifest.json": dial tcp 10.60.0.251:8080: connect: no route to host
|
||||
2026/04/17 03:36:30 [update] notify check failed status=error current=0.0.0-dev target= err=Get "http://10.60.0.251:8080/agents/manifest.json": dial tcp 10.60.0.251:8080: connect: no route to host
|
||||
2026/04/17 04:06:30 [update] notify check failed status=error current=0.0.0-dev target= err=Get "http://10.60.0.251:8080/agents/manifest.json": dial tcp 10.60.0.251:8080: connect: no route to host
|
||||
2026/04/17 04:36:30 [update] notify check failed status=error current=0.0.0-dev target= err=Get "http://10.60.0.251:8080/agents/manifest.json": dial tcp 10.60.0.251:8080: connect: no route to host
|
||||
2026/04/17 05:06:30 [update] notify check failed status=error current=0.0.0-dev target= err=Get "http://10.60.0.251:8080/agents/manifest.json": dial tcp 10.60.0.251:8080: connect: no route to host
|
||||
2026/04/17 05:36:30 [update] notify check failed status=error current=0.0.0-dev target= err=Get "http://10.60.0.251:8080/agents/manifest.json": dial tcp 10.60.0.251:8080: connect: no route to host
|
||||
2026/04/17 06:06:30 [update] notify check failed status=error current=0.0.0-dev target= err=Get "http://10.60.0.251:8080/agents/manifest.json": dial tcp 10.60.0.251:8080: connect: no route to host
|
||||
2026/04/17 06:36:30 [update] notify check failed status=error current=0.0.0-dev target= err=Get "http://10.60.0.251:8080/agents/manifest.json": dial tcp 10.60.0.251:8080: connect: no route to host
|
||||
2026/04/17 07:06:30 [update] notify check failed status=error current=0.0.0-dev target= err=Get "http://10.60.0.251:8080/agents/manifest.json": dial tcp 10.60.0.251:8080: connect: no route to host
|
||||
2026/04/17 07:36:30 [update] notify check failed status=error current=0.0.0-dev target= err=Get "http://10.60.0.251:8080/agents/manifest.json": dial tcp 10.60.0.251:8080: connect: no route to host
|
||||
2026/04/17 08:06:30 [update] notify check failed status=error current=0.0.0-dev target= err=Get "http://10.60.0.251:8080/agents/manifest.json": dial tcp 10.60.0.251:8080: connect: no route to host
|
||||
2026/04/17 08:36:30 [update] notify check failed status=error current=0.0.0-dev target= err=Get "http://10.60.0.251:8080/agents/manifest.json": dial tcp 10.60.0.251:8080: connect: no route to host
|
||||
2026/04/17 09:06:30 [update] notify check failed status=error current=0.0.0-dev target= err=Get "http://10.60.0.251:8080/agents/manifest.json": dial tcp 10.60.0.251:8080: connect: no route to host
|
||||
2026/04/17 09:36:30 [update] notify check failed status=error current=0.0.0-dev target= err=Get "http://10.60.0.251:8080/agents/manifest.json": dial tcp 10.60.0.251:8080: connect: no route to host
|
||||
2026/04/17 10:06:30 [update] notify check failed status=error current=0.0.0-dev target= err=Get "http://10.60.0.251:8080/agents/manifest.json": dial tcp 10.60.0.251:8080: connect: no route to host
|
||||
2026/04/17 10:36:30 [update] notify check failed status=error current=0.0.0-dev target= err=Get "http://10.60.0.251:8080/agents/manifest.json": dial tcp 10.60.0.251:8080: connect: no route to host
|
||||
2026/04/17 11:06:30 [update] notify check failed status=error current=0.0.0-dev target= err=Get "http://10.60.0.251:8080/agents/manifest.json": dial tcp 10.60.0.251:8080: connect: no route to host
|
||||
2026/04/17 11:36:30 [update] notify check failed status=error current=0.0.0-dev target= err=Get "http://10.60.0.251:8080/agents/manifest.json": dial tcp 10.60.0.251:8080: connect: no route to host
|
||||
2026/04/17 12:06:30 [update] notify check failed status=error current=0.0.0-dev target= err=Get "http://10.60.0.251:8080/agents/manifest.json": dial tcp 10.60.0.251:8080: connect: no route to host
|
||||
2026/04/17 12:36:30 [update] notify check failed status=error current=0.0.0-dev target= err=Get "http://10.60.0.251:8080/agents/manifest.json": dial tcp 10.60.0.251:8080: connect: no route to host
|
||||
2026/04/17 13:06:30 [update] notify check failed status=error current=0.0.0-dev target= err=Get "http://10.60.0.251:8080/agents/manifest.json": dial tcp 10.60.0.251:8080: connect: no route to host
|
||||
2026/04/17 13:36:30 [update] notify check failed status=error current=0.0.0-dev target= err=Get "http://10.60.0.251:8080/agents/manifest.json": dial tcp 10.60.0.251:8080: connect: no route to host
|
||||
2026/04/17 14:06:30 [update] notify check failed status=error current=0.0.0-dev target= err=Get "http://10.60.0.251:8080/agents/manifest.json": dial tcp 10.60.0.251:8080: connect: no route to host
|
||||
2026/04/17 14:36:30 [update] notify check failed status=error current=0.0.0-dev target= err=Get "http://10.60.0.251:8080/agents/manifest.json": dial tcp 10.60.0.251:8080: connect: no route to host
|
||||
2026/04/17 15:06:30 [update] notify check failed status=error current=0.0.0-dev target= err=Get "http://10.60.0.251:8080/agents/manifest.json": dial tcp 10.60.0.251:8080: connect: no route to host
|
||||
2026/04/17 15:36:30 [update] notify check failed status=error current=0.0.0-dev target= err=Get "http://10.60.0.251:8080/agents/manifest.json": dial tcp 10.60.0.251:8080: connect: no route to host
|
||||
2026/04/17 16:06:30 [update] notify check failed status=error current=0.0.0-dev target= err=Get "http://10.60.0.251:8080/agents/manifest.json": dial tcp 10.60.0.251:8080: connect: no route to host
|
||||
2026/04/17 16:36:30 [update] notify check failed status=error current=0.0.0-dev target= err=Get "http://10.60.0.251:8080/agents/manifest.json": dial tcp 10.60.0.251:8080: connect: no route to host
|
||||
2026/04/17 17:06:30 [update] notify check failed status=error current=0.0.0-dev target= err=Get "http://10.60.0.251:8080/agents/manifest.json": dial tcp 10.60.0.251:8080: connect: no route to host
|
||||
2026/04/17 17:36:30 [update] notify check failed status=error current=0.0.0-dev target= err=Get "http://10.60.0.251:8080/agents/manifest.json": dial tcp 10.60.0.251:8080: connect: no route to host
|
||||
2026/04/17 18:06:30 [update] notify check failed status=error current=0.0.0-dev target= err=Get "http://10.60.0.251:8080/agents/manifest.json": dial tcp 10.60.0.251:8080: connect: no route to host
|
||||
2026/04/17 18:36:30 [update] notify check failed status=error current=0.0.0-dev target= err=Get "http://10.60.0.251:8080/agents/manifest.json": dial tcp 10.60.0.251:8080: connect: no route to host
|
||||
2026/04/17 19:06:30 [update] notify check failed status=error current=0.0.0-dev target= err=Get "http://10.60.0.251:8080/agents/manifest.json": dial tcp 10.60.0.251:8080: connect: no route to host
|
||||
2026/04/17 19:36:30 [update] notify check failed status=error current=0.0.0-dev target= err=Get "http://10.60.0.251:8080/agents/manifest.json": dial tcp 10.60.0.251:8080: connect: no route to host
|
||||
2026/04/17 20:06:30 [update] notify check failed status=error current=0.0.0-dev target= err=Get "http://10.60.0.251:8080/agents/manifest.json": dial tcp 10.60.0.251:8080: connect: no route to host
|
||||
2026/04/17 20:36:30 [update] notify check failed status=error current=0.0.0-dev target= err=Get "http://10.60.0.251:8080/agents/manifest.json": dial tcp 10.60.0.251:8080: connect: no route to host
|
||||
2026/04/17 21:06:30 [update] notify check failed status=error current=0.0.0-dev target= err=Get "http://10.60.0.251:8080/agents/manifest.json": dial tcp 10.60.0.251:8080: connect: no route to host
|
||||
2026/04/17 21:37:24 [update] notify check failed status=error current=0.0.0-dev target= err=Get "http://10.60.0.251:8080/agents/manifest.json": dial tcp 10.60.0.251:8080: connect: no route to host
|
||||
2026/04/17 22:06:31 [update] notify check failed status=error current=0.0.0-dev target= err=Get "http://10.60.0.251:8080/agents/manifest.json": dial tcp 10.60.0.251:8080: connect: no route to host
|
||||
2026/04/17 22:36:31 [update] notify check failed status=error current=0.0.0-dev target= err=Get "http://10.60.0.251:8080/agents/manifest.json": dial tcp 10.60.0.251:8080: connect: no route to host
|
||||
2026/04/17 23:06:30 [update] notify check failed status=error current=0.0.0-dev target= err=Get "http://10.60.0.251:8080/agents/manifest.json": dial tcp 10.60.0.251:8080: connect: no route to host
|
||||
2026/04/17 23:36:30 [update] notify check failed status=error current=0.0.0-dev target= err=Get "http://10.60.0.251:8080/agents/manifest.json": dial tcp 10.60.0.251:8080: connect: no route to host
|
||||
2026/04/18 00:06:30 [update] notify check failed status=error current=0.0.0-dev target= err=Get "http://10.60.0.251:8080/agents/manifest.json": dial tcp 10.60.0.251:8080: connect: no route to host
|
||||
2026/04/18 00:36:31 [update] notify check failed status=error current=0.0.0-dev target= err=Get "http://10.60.0.251:8080/agents/manifest.json": dial tcp 10.60.0.251:8080: connect: no route to host
|
||||
2026/04/18 01:06:30 [update] notify check failed status=error current=0.0.0-dev target= err=Get "http://10.60.0.251:8080/agents/manifest.json": dial tcp 10.60.0.251:8080: connect: no route to host
|
||||
2026/04/18 01:36:31 [update] notify check failed status=error current=0.0.0-dev target= err=Get "http://10.60.0.251:8080/agents/manifest.json": dial tcp 10.60.0.251:8080: connect: no route to host
|
||||
2026/04/18 02:06:32 [update] notify check failed status=error current=0.0.0-dev target= err=Get "http://10.60.0.251:8080/agents/manifest.json": dial tcp 10.60.0.251:8080: connect: no route to host
|
||||
2026/04/18 02:36:30 [update] notify check failed status=error current=0.0.0-dev target= err=Get "http://10.60.0.251:8080/agents/manifest.json": dial tcp 10.60.0.251:8080: connect: no route to host
|
||||
2026/04/18 03:06:31 [update] notify check failed status=error current=0.0.0-dev target= err=Get "http://10.60.0.251:8080/agents/manifest.json": dial tcp 10.60.0.251:8080: connect: no route to host
|
||||
2026/04/18 03:36:31 [update] notify check failed status=error current=0.0.0-dev target= err=Get "http://10.60.0.251:8080/agents/manifest.json": dial tcp 10.60.0.251:8080: connect: no route to host
|
||||
2026/04/18 04:06:31 [update] notify check failed status=error current=0.0.0-dev target= err=Get "http://10.60.0.251:8080/agents/manifest.json": dial tcp 10.60.0.251:8080: connect: no route to host
|
||||
2026/04/18 04:36:30 [update] notify check failed status=error current=0.0.0-dev target= err=Get "http://10.60.0.251:8080/agents/manifest.json": dial tcp 10.60.0.251:8080: connect: no route to host
|
||||
2026/04/18 05:06:31 [update] notify check failed status=error current=0.0.0-dev target= err=Get "http://10.60.0.251:8080/agents/manifest.json": dial tcp 10.60.0.251:8080: connect: no route to host
|
||||
2026/04/18 05:36:31 [update] notify check failed status=error current=0.0.0-dev target= err=Get "http://10.60.0.251:8080/agents/manifest.json": dial tcp 10.60.0.251:8080: connect: no route to host
|
||||
2026/04/18 06:06:31 [update] notify check failed status=error current=0.0.0-dev target= err=Get "http://10.60.0.251:8080/agents/manifest.json": dial tcp 10.60.0.251:8080: connect: no route to host
|
||||
2026/04/18 06:36:30 [update] notify check failed status=error current=0.0.0-dev target= err=Get "http://10.60.0.251:8080/agents/manifest.json": dial tcp 10.60.0.251:8080: connect: no route to host
|
||||
2026/04/18 07:06:31 [update] notify check failed status=error current=0.0.0-dev target= err=Get "http://10.60.0.251:8080/agents/manifest.json": dial tcp 10.60.0.251:8080: connect: no route to host
|
||||
2026/04/18 07:36:31 [update] notify check failed status=error current=0.0.0-dev target= err=Get "http://10.60.0.251:8080/agents/manifest.json": dial tcp 10.60.0.251:8080: connect: no route to host
|
||||
2026/04/18 08:06:30 [update] notify check failed status=error current=0.0.0-dev target= err=Get "http://10.60.0.251:8080/agents/manifest.json": dial tcp 10.60.0.251:8080: connect: no route to host
|
||||
2026/04/18 08:36:31 [update] notify check failed status=error current=0.0.0-dev target= err=Get "http://10.60.0.251:8080/agents/manifest.json": dial tcp 10.60.0.251:8080: connect: no route to host
|
||||
2026/04/18 09:06:31 [update] notify check failed status=error current=0.0.0-dev target= err=Get "http://10.60.0.251:8080/agents/manifest.json": dial tcp 10.60.0.251:8080: connect: no route to host
|
||||
2026/04/18 09:36:32 [update] notify check failed status=error current=0.0.0-dev target= err=Get "http://10.60.0.251:8080/agents/manifest.json": dial tcp 10.60.0.251:8080: connect: no route to host
|
||||
2026/04/18 10:06:31 [update] notify check failed status=error current=0.0.0-dev target= err=Get "http://10.60.0.251:8080/agents/manifest.json": dial tcp 10.60.0.251:8080: connect: no route to host
|
||||
2026/04/18 10:36:31 [update] notify check failed status=error current=0.0.0-dev target= err=Get "http://10.60.0.251:8080/agents/manifest.json": dial tcp 10.60.0.251:8080: connect: no route to host
|
||||
2026/04/18 11:06:31 [update] notify check failed status=error current=0.0.0-dev target= err=Get "http://10.60.0.251:8080/agents/manifest.json": dial tcp 10.60.0.251:8080: connect: no route to host
|
||||
2026/04/18 11:36:31 [update] notify check failed status=error current=0.0.0-dev target= err=Get "http://10.60.0.251:8080/agents/manifest.json": dial tcp 10.60.0.251:8080: connect: no route to host
|
||||
2026/04/18 12:06:30 [update] notify check failed status=error current=0.0.0-dev target= err=Get "http://10.60.0.251:8080/agents/manifest.json": dial tcp 10.60.0.251:8080: connect: no route to host
|
||||
2026/04/18 12:36:30 [update] notify check failed status=error current=0.0.0-dev target= err=Get "http://10.60.0.251:8080/agents/manifest.json": dial tcp 10.60.0.251:8080: connect: no route to host
|
||||
2026/04/18 13:06:30 [update] notify check failed status=error current=0.0.0-dev target= err=Get "http://10.60.0.251:8080/agents/manifest.json": dial tcp 10.60.0.251:8080: connect: no route to host
|
||||
2026/04/18 13:36:30 [update] notify check failed status=error current=0.0.0-dev target= err=Get "http://10.60.0.251:8080/agents/manifest.json": dial tcp 10.60.0.251:8080: connect: no route to host
|
||||
2026/04/18 13:52:39 [agent] file logging enabled
|
||||
2026/04/18 13:52:39 [agent] lifecycle logging enabled
|
||||
2026/04/18 13:52:39 [agent] routes registered
|
||||
2026/04/18 13:52:39 [autostart] disabled (ok)
|
||||
2026/04/18 13:52:39 [update] periodic checks enabled (mode=notify interval=30m0s)
|
||||
2026/04/18 13:52:39 [agent] listening on :18888
|
||||
2026/04/18 13:52:52 [update] notify check failed status=error current=0.0.0-dev target= err=Get "http://10.60.0.251:8080/agents/manifest.json": dial tcp 10.60.0.251:8080: connect: no route to host
|
||||
|
||||
@ -12,8 +12,6 @@ ARTIFACT_NAME="${ZLH_CODESERVER_ARTIFACT:-code-server.tar.gz}"
|
||||
ARTIFACT_URL="${ZLH_ARTIFACT_BASE_URL%/}/addons/code-server/${ARTIFACT_NAME}"
|
||||
ARTIFACT_TMP="/tmp/${ARTIFACT_NAME}"
|
||||
MARKER="/opt/zlh/.zlh/addons/code-server.installed"
|
||||
PID_FILE="/opt/zlh/.zlh/addons/code-server.pid"
|
||||
LOG_FILE="/opt/zlh/logs/code-server.log"
|
||||
WORKSPACE_DIR="${CODE_SERVER_WORKSPACE:-/home/dev/workspace}"
|
||||
PORT="${CODE_SERVER_PORT:-6000}"
|
||||
BIN="${SERVICE_ROOT}/bin/code-server"
|
||||
@ -22,7 +20,6 @@ CONFIG_DIR="/home/dev/.config/code-server"
|
||||
CONFIG_FILE="${CONFIG_DIR}/config.yaml"
|
||||
|
||||
mkdir -p "$(dirname "${MARKER}")"
|
||||
mkdir -p "$(dirname "${LOG_FILE}")"
|
||||
|
||||
download_artifact() {
|
||||
echo "[code-server] action=artifact_fetch step=download url=${ARTIFACT_URL} target=${ARTIFACT_TMP}"
|
||||
@ -86,7 +83,6 @@ write_config() {
|
||||
bind-addr: 0.0.0.0:${PORT}
|
||||
auth: none
|
||||
disable-telemetry: true
|
||||
proxy-domain: ""
|
||||
EOF
|
||||
chown -R dev:dev "${CONFIG_DIR}" 2>/dev/null || true
|
||||
echo "[code-server] action=config_write path=${CONFIG_FILE} status=ok"
|
||||
@ -109,16 +105,6 @@ fi
|
||||
mkdir -p "${WORKSPACE_DIR}"
|
||||
write_config
|
||||
|
||||
if [ -f "${PID_FILE}" ] && kill -0 "$(cat "${PID_FILE}")" 2>/dev/null; then
|
||||
echo "[code-server] already running"
|
||||
else
|
||||
rm -f "${PID_FILE}"
|
||||
echo "[code-server] action=service_launch command=\"${BIN} --bind-addr 0.0.0.0:${PORT} --auth none --disable-telemetry ${WORKSPACE_DIR}\""
|
||||
HOME="/home/dev" USER="dev" LOGNAME="dev" \
|
||||
nohup "${BIN}" --bind-addr "0.0.0.0:${PORT}" --auth none --disable-telemetry "${WORKSPACE_DIR}" >"${LOG_FILE}" 2>&1 &
|
||||
echo $! > "${PID_FILE}"
|
||||
fi
|
||||
|
||||
touch "${MARKER}"
|
||||
rm -f "${ARTIFACT_TMP}"
|
||||
|
||||
|
||||
109
scripts/addons/codeserver/start.sh
Normal file
109
scripts/addons/codeserver/start.sh
Normal file
@ -0,0 +1,109 @@
|
||||
#!/usr/bin/env bash
|
||||
set -euo pipefail
|
||||
|
||||
echo "[code-server] starting service"
|
||||
|
||||
SERVICE_ROOT="/opt/zlh/services/code-server"
|
||||
MARKER="/opt/zlh/.zlh/addons/code-server.installed"
|
||||
PID_FILE="/opt/zlh/.zlh/addons/code-server.pid"
|
||||
LOG_FILE="/opt/zlh/logs/code-server.log"
|
||||
WORKSPACE_DIR="${CODE_SERVER_WORKSPACE:-/home/dev/workspace}"
|
||||
PORT="${CODE_SERVER_PORT:-6000}"
|
||||
BIN="${SERVICE_ROOT}/bin/code-server"
|
||||
CONFIG_DIR="/home/dev/.config/code-server"
|
||||
CONFIG_FILE="${CONFIG_DIR}/config.yaml"
|
||||
|
||||
if [ ! -f "${MARKER}" ]; then
|
||||
echo "[code-server][ERROR] addon marker missing at ${MARKER}"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if [ ! -x "${BIN}" ]; then
|
||||
echo "[code-server][ERROR] binary missing or not executable at ${BIN}"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
is_code_server_pid() {
|
||||
local pid="$1"
|
||||
if [ -z "${pid}" ] || [ ! -r "/proc/${pid}/cmdline" ]; then
|
||||
return 1
|
||||
fi
|
||||
local cmdline
|
||||
cmdline="$(tr '\000' ' ' < "/proc/${pid}/cmdline")"
|
||||
case "${cmdline}" in
|
||||
*code-server*"0.0.0.0:${PORT}"*) return 0 ;;
|
||||
*) return 1 ;;
|
||||
esac
|
||||
}
|
||||
|
||||
find_running_pid() {
|
||||
local cmdline_path pid
|
||||
for cmdline_path in /proc/[0-9]*/cmdline; do
|
||||
pid="$(basename "$(dirname "${cmdline_path}")")"
|
||||
if is_code_server_pid "${pid}"; then
|
||||
echo "${pid}"
|
||||
return 0
|
||||
fi
|
||||
done
|
||||
return 1
|
||||
}
|
||||
|
||||
mkdir -p "$(dirname "${PID_FILE}")"
|
||||
mkdir -p "$(dirname "${LOG_FILE}")"
|
||||
mkdir -p "${WORKSPACE_DIR}"
|
||||
mkdir -p "${CONFIG_DIR}"
|
||||
chown -R dev:dev "${WORKSPACE_DIR}" 2>/dev/null || true
|
||||
touch "${LOG_FILE}"
|
||||
chown dev:dev "${LOG_FILE}" 2>/dev/null || true
|
||||
cat > "${CONFIG_FILE}" <<EOF
|
||||
bind-addr: 0.0.0.0:${PORT}
|
||||
auth: none
|
||||
disable-telemetry: true
|
||||
EOF
|
||||
chown -R dev:dev "${CONFIG_DIR}" 2>/dev/null || true
|
||||
|
||||
if [ -f "${PID_FILE}" ] && is_code_server_pid "$(cat "${PID_FILE}")"; then
|
||||
echo "[code-server] already running"
|
||||
exit 0
|
||||
fi
|
||||
|
||||
rm -f "${PID_FILE}"
|
||||
echo "[code-server] action=service_launch command=\"${BIN} --bind-addr 0.0.0.0:${PORT} --auth none --disable-telemetry ${WORKSPACE_DIR}\""
|
||||
|
||||
if id dev >/dev/null 2>&1; then
|
||||
pid_tmp="${PID_FILE}.tmp"
|
||||
rm -f "${pid_tmp}"
|
||||
su -s /bin/sh dev -c "HOME=/home/dev USER=dev LOGNAME=dev nohup '${BIN}' --bind-addr '0.0.0.0:${PORT}' --auth none --disable-telemetry '${WORKSPACE_DIR}' >'${LOG_FILE}' 2>&1 & echo \$!" > "${pid_tmp}"
|
||||
mv "${pid_tmp}" "${PID_FILE}"
|
||||
else
|
||||
HOME="/home/dev" USER="dev" LOGNAME="dev" \
|
||||
nohup "${BIN}" --bind-addr "0.0.0.0:${PORT}" --auth none --disable-telemetry "${WORKSPACE_DIR}" >"${LOG_FILE}" 2>&1 &
|
||||
echo $! > "${PID_FILE}"
|
||||
fi
|
||||
|
||||
sleep 2
|
||||
recorded_pid=""
|
||||
if [ -f "${PID_FILE}" ]; then
|
||||
recorded_pid="$(cat "${PID_FILE}")"
|
||||
fi
|
||||
if ! is_code_server_pid "${recorded_pid}"; then
|
||||
discovered_pid="$(find_running_pid || true)"
|
||||
if [ -n "${discovered_pid}" ]; then
|
||||
echo "${discovered_pid}" > "${PID_FILE}"
|
||||
fi
|
||||
fi
|
||||
|
||||
recorded_pid=""
|
||||
if [ -f "${PID_FILE}" ]; then
|
||||
recorded_pid="$(cat "${PID_FILE}")"
|
||||
fi
|
||||
if ! is_code_server_pid "${recorded_pid}"; then
|
||||
echo "[code-server][ERROR] process exited after launch"
|
||||
if [ -f "${LOG_FILE}" ]; then
|
||||
tail -n 40 "${LOG_FILE}" || true
|
||||
fi
|
||||
rm -f "${PID_FILE}"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
echo "[code-server] action=service_launch status=ok pid=$(cat "${PID_FILE}")"
|
||||
@ -2,5 +2,5 @@
|
||||
"status": "error",
|
||||
"current": "0.0.0-dev",
|
||||
"error": "Get \"http://10.60.0.251:8080/agents/manifest.json\": dial tcp 10.60.0.251:8080: connect: no route to host",
|
||||
"checked_at_utc": "2026-04-16T18:36:27Z"
|
||||
"checked_at_utc": "2026-04-18T13:52:49Z"
|
||||
}
|
||||
Loading…
Reference in New Issue
Block a user