feat(index-builder): merge latest site build artifact before Pages deploy (§8.4, §11.3)

Add crane CLI to the runtime Dockerfile so the index builder can pull the
latest SPA shell from the Forgejo container registry on each cycle. The
existing syncSiteBuild logic checks for a newer image digest, extracts
the dist/ assets via crane export, and overlays generated JSON data files
on top before deploying to Cloudflare Pages.

- Dockerfile: install go-containerregistry crane binary (v0.20.2)
- sitebuild.go: new file with syncSiteBuild, craneDigest, craneExport,
  digest caching, fallback to baked-in /app/web/dist
- main.go: wire initCraneAuth at startup, replace hardcoded webDistDir
  with syncSiteBuild call in runBuildCycle
- sitebuild_test.go: 18 tests for extractRegistry, digest caching,
  fallback logic, crane auth config, and copyWebAssets overlay behavior

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
jedarden 2026-04-22 17:52:03 -04:00
parent b2e9ba8319
commit 32d7dd07e7
4 changed files with 468 additions and 2 deletions

View file

@ -30,6 +30,14 @@ FROM node:22-alpine
# Install wrangler CLI for Cloudflare Pages deployment
RUN npm install -g wrangler@3
# Install crane for pulling site build images from container registry
ARG CRANE_VERSION=v0.20.2
RUN apk add --no-cache curl && \
curl -fsSL "https://github.com/google/go-containerregistry/releases/download/${CRANE_VERSION}/go-containerregistry_Linux_x86_64.tar.gz" \
| tar -xz -C /usr/local/bin crane && \
chmod +x /usr/local/bin/crane && \
apk del curl
# Install ca-certificates for HTTPS calls
RUN apk add --no-cache ca-certificates tzdata

View file

@ -58,6 +58,11 @@ func main() {
os.Exit(1)
}
// Initialize crane auth for pulling site build images from the registry
if err := initCraneAuth(cfg); err != nil {
slog.Warn("Failed to initialize crane auth, site builds will use baked-in assets", "error", err)
}
// Handle graceful shutdown
sigChan := make(chan os.Signal, 1)
signal.Notify(sigChan, syscall.SIGTERM, syscall.SIGINT)
@ -143,8 +148,9 @@ func runBuildCycle(ctx context.Context, db *sql.DB, cfg *Config) error {
}
}
// Copy web frontend assets into output directory
const webDistDir = "/app/web/dist"
// Sync site build from registry (if configured) and copy into output directory.
// Falls back to baked-in assets when registry is unreachable.
webDistDir := syncSiteBuild(ctx, cfg)
if _, err := os.Stat(webDistDir); err == nil {
if err := copyWebAssets(cfg, webDistDir); err != nil {
slog.Error("Failed to copy web assets", "error", err)

View file

@ -0,0 +1,185 @@
package main
import (
"context"
"encoding/base64"
"encoding/json"
"fmt"
"log/slog"
"os"
"os/exec"
"path/filepath"
"strings"
)
var (
craneConfigDir = "/tmp/crane-config"
siteBuildDigestFile = ".site-build-digest"
siteBuildExtractDir = "/tmp/acb-site-build"
bakedInWebDist = "/app/web/dist"
)
// initCraneAuth writes a Docker config.json for crane to authenticate with
// the container registry. No-op if registry auth is not configured.
func initCraneAuth(cfg *Config) error {
if cfg.RegistryUsername == "" || cfg.SiteBuildImage == "" {
return nil
}
if err := os.MkdirAll(craneConfigDir, 0700); err != nil {
return fmt.Errorf("create crane config dir: %w", err)
}
registry := extractRegistry(cfg.SiteBuildImage)
auth := base64.StdEncoding.EncodeToString([]byte(cfg.RegistryUsername + ":" + cfg.RegistryPassword))
config := map[string]interface{}{
"auths": map[string]interface{}{
registry: map[string]string{"auth": auth},
},
}
data, err := json.Marshal(config)
if err != nil {
return fmt.Errorf("marshal docker config: %w", err)
}
return os.WriteFile(filepath.Join(craneConfigDir, "config.json"), data, 0600)
}
// craneEnviron returns the process environment with DOCKER_CONFIG set if auth
// was configured.
func craneEnviron() []string {
env := os.Environ()
if _, err := os.Stat(filepath.Join(craneConfigDir, "config.json")); err == nil {
env = append(env, "DOCKER_CONFIG="+craneConfigDir)
}
return env
}
// syncSiteBuild checks for a newer site build image in the container registry
// and extracts it if available. Returns the path to the web assets directory.
// Falls back to baked-in assets when the registry is unreachable or crane is
// not installed.
func syncSiteBuild(ctx context.Context, cfg *Config) string {
if cfg.SiteBuildImage == "" {
return bakedInWebDist
}
if _, err := exec.LookPath("crane"); err != nil {
slog.Warn("crane not found in PATH, using baked-in web assets")
return bakedInWebDist
}
remoteDigest, err := craneDigest(ctx, cfg)
if err != nil {
slog.Warn("Failed to query remote site build digest, using cached or baked-in assets", "error", err)
return fallbackWebDir(cfg)
}
cachedDigest := readCachedDigest(cfg.OutputDir)
if cachedDigest == remoteDigest {
slog.Debug("Site build image unchanged", "digest", remoteDigest)
return extractedDistPath(cfg)
}
slog.Info("New site build image detected",
"image", cfg.SiteBuildImage,
"old_digest", cachedDigest,
"new_digest", remoteDigest,
)
if err := craneExport(ctx, cfg); err != nil {
slog.Error("Failed to extract site build image", "error", err)
return fallbackWebDir(cfg)
}
writeCachedDigest(cfg.OutputDir, remoteDigest)
return extractedDistPath(cfg)
}
// extractedDistPath returns the path to the dist directory within the
// extraction staging area.
func extractedDistPath(cfg *Config) string {
return filepath.Join(siteBuildExtractDir, cfg.SiteBuildPath)
}
// craneDigest uses crane to get the digest of the configured site build image.
func craneDigest(ctx context.Context, cfg *Config) (string, error) {
cmd := exec.CommandContext(ctx, "crane", "digest", cfg.SiteBuildImage)
cmd.Env = craneEnviron()
out, err := cmd.Output()
if err != nil {
return "", fmt.Errorf("crane digest %s: %w", cfg.SiteBuildImage, err)
}
return strings.TrimSpace(string(out)), nil
}
// craneExport uses crane to export the image filesystem and extracts it into
// the staging directory.
func craneExport(ctx context.Context, cfg *Config) error {
os.RemoveAll(siteBuildExtractDir)
if err := os.MkdirAll(siteBuildExtractDir, 0755); err != nil {
return fmt.Errorf("create extract dir: %w", err)
}
craneCmd := exec.CommandContext(ctx, "crane", "export", cfg.SiteBuildImage, "-")
craneCmd.Env = craneEnviron()
tarCmd := exec.CommandContext(ctx, "tar", "-xf", "-", "-C", siteBuildExtractDir)
pipe, err := craneCmd.StdoutPipe()
if err != nil {
return fmt.Errorf("crane pipe: %w", err)
}
tarCmd.Stdin = pipe
if err := craneCmd.Start(); err != nil {
return fmt.Errorf("start crane: %w", err)
}
if err := tarCmd.Run(); err != nil {
return fmt.Errorf("extract tar: %w", err)
}
if err := craneCmd.Wait(); err != nil {
return fmt.Errorf("crane export: %w", err)
}
slog.Info("Extracted site build image", "path", siteBuildExtractDir)
return nil
}
// fallbackWebDir returns the best available web asset directory when the
// registry is unreachable.
func fallbackWebDir(cfg *Config) string {
p := extractedDistPath(cfg)
if fi, err := os.Stat(p); err == nil && fi.IsDir() {
slog.Info("Using previously extracted site build")
return p
}
if _, err := os.Stat(bakedInWebDist); err == nil {
slog.Info("Using baked-in web assets")
return bakedInWebDist
}
slog.Warn("No web assets available")
return bakedInWebDist
}
func readCachedDigest(outputDir string) string {
data, err := os.ReadFile(filepath.Join(outputDir, siteBuildDigestFile))
if err != nil {
return ""
}
return strings.TrimSpace(string(data))
}
func writeCachedDigest(outputDir, digest string) {
if err := os.WriteFile(filepath.Join(outputDir, siteBuildDigestFile), []byte(digest+"\n"), 0644); err != nil {
slog.Warn("Failed to cache site build digest", "error", err)
}
}
// extractRegistry parses the registry host from an image reference.
// "forgejo.example.com/ns/image:tag" → "forgejo.example.com"
func extractRegistry(imageRef string) string {
parts := strings.SplitN(imageRef, "/", 2)
if len(parts) == 2 && strings.Contains(parts[0], ".") {
return parts[0]
}
return "https://index.docker.io/v1/"
}

View file

@ -0,0 +1,267 @@
package main
import (
"encoding/json"
"os"
"path/filepath"
"strings"
"testing"
)
func TestExtractRegistry(t *testing.T) {
tests := []struct {
image string
want string
}{
{"forgejo.ardenone.com/ai-code-battle/acb-site-build:latest", "forgejo.ardenone.com"},
{"forgejo.ardenone.com/ai-code-battle/acb-site-build", "forgejo.ardenone.com"},
{"docker.io/library/nginx:latest", "docker.io"},
{"nginx:latest", "https://index.docker.io/v1/"},
{"localhost:5000/myimage:tag", "https://index.docker.io/v1/"},
{"", "https://index.docker.io/v1/"},
}
for _, tt := range tests {
got := extractRegistry(tt.image)
if got != tt.want {
t.Errorf("extractRegistry(%q) = %q, want %q", tt.image, got, tt.want)
}
}
}
func TestReadWriteCachedDigest(t *testing.T) {
dir := t.TempDir()
if d := readCachedDigest(dir); d != "" {
t.Errorf("expected empty digest for missing file, got %q", d)
}
writeCachedDigest(dir, "sha256:abc123\n")
if d := readCachedDigest(dir); d != "sha256:abc123" {
t.Errorf("readCachedDigest = %q, want %q", d, "sha256:abc123")
}
writeCachedDigest(dir, "sha256:def456\n")
if d := readCachedDigest(dir); d != "sha256:def456" {
t.Errorf("readCachedDigest after overwrite = %q, want %q", d, "sha256:def456")
}
}
func TestReadCachedDigest_InvalidPath(t *testing.T) {
d := readCachedDigest("/nonexistent/path/that/does/not/exist")
if d != "" {
t.Errorf("expected empty digest for nonexistent dir, got %q", d)
}
}
func TestWriteCachedDigest_InvalidPath(t *testing.T) {
// Should not panic, just log a warning
writeCachedDigest("/nonexistent/path", "sha256:abc")
}
func TestExtractedDistPath(t *testing.T) {
cfg := &Config{SiteBuildPath: "dist"}
got := extractedDistPath(cfg)
want := filepath.Join(siteBuildExtractDir, "dist")
if got != want {
t.Errorf("extractedDistPath() = %q, want %q", got, want)
}
cfg2 := &Config{SiteBuildPath: "build/output"}
got2 := extractedDistPath(cfg2)
want2 := filepath.Join(siteBuildExtractDir, "build/output")
if got2 != want2 {
t.Errorf("extractedDistPath() = %q, want %q", got2, want2)
}
}
func TestFallbackWebDir_NothingExists(t *testing.T) {
oldBakedIn := bakedInWebDist
bakedInWebDist = filepath.Join(t.TempDir(), "baked-in")
defer func() { bakedInWebDist = oldBakedIn }()
cfg := &Config{
SiteBuildPath: "dist",
OutputDir: t.TempDir(),
}
got := fallbackWebDir(cfg)
if got != bakedInWebDist {
t.Errorf("fallbackWebDir() = %q, want %q (baked-in)", got, bakedInWebDist)
}
}
func TestFallbackWebDir_ExtractedExists(t *testing.T) {
oldExtractDir := siteBuildExtractDir
siteBuildExtractDir = filepath.Join(t.TempDir(), "extract")
defer func() { siteBuildExtractDir = oldExtractDir }()
cfg := &Config{
SiteBuildPath: "dist",
OutputDir: t.TempDir(),
}
extractedPath := extractedDistPath(cfg)
if err := os.MkdirAll(extractedPath, 0755); err != nil {
t.Fatal(err)
}
got := fallbackWebDir(cfg)
if got != extractedPath {
t.Errorf("fallbackWebDir() = %q, want %q (extracted)", got, extractedPath)
}
}
func TestFallbackWebDir_BakedInExists(t *testing.T) {
bakedInDir := filepath.Join(t.TempDir(), "baked-in")
oldBakedIn := bakedInWebDist
bakedInWebDist = bakedInDir
defer func() { bakedInWebDist = oldBakedIn }()
oldExtractDir := siteBuildExtractDir
siteBuildExtractDir = filepath.Join(t.TempDir(), "extract")
defer func() { siteBuildExtractDir = oldExtractDir }()
if err := os.MkdirAll(bakedInDir, 0755); err != nil {
t.Fatal(err)
}
cfg := &Config{
SiteBuildPath: "dist",
OutputDir: t.TempDir(),
}
got := fallbackWebDir(cfg)
if got != bakedInDir {
t.Errorf("fallbackWebDir() = %q, want %q (baked-in)", got, bakedInDir)
}
}
func TestInitCraneAuth_NoAuth(t *testing.T) {
cfg := &Config{RegistryUsername: "", SiteBuildImage: ""}
if err := initCraneAuth(cfg); err != nil {
t.Errorf("initCraneAuth with no auth should be no-op, got %v", err)
}
}
func TestInitCraneAuth_WithAuth(t *testing.T) {
tmpDir := t.TempDir()
oldDir := craneConfigDir
craneConfigDir = filepath.Join(tmpDir, "crane-cfg")
defer func() { craneConfigDir = oldDir }()
cfg := &Config{
RegistryUsername: "testuser",
RegistryPassword: "testpass",
SiteBuildImage: "forgejo.example.com/ns/image:tag",
}
if err := initCraneAuth(cfg); err != nil {
t.Fatalf("initCraneAuth: %v", err)
}
data, err := os.ReadFile(filepath.Join(craneConfigDir, "config.json"))
if err != nil {
t.Fatalf("read config.json: %v", err)
}
var config map[string]interface{}
if err := json.Unmarshal(data, &config); err != nil {
t.Fatalf("parse config.json: %v", err)
}
auths, ok := config["auths"].(map[string]interface{})
if !ok {
t.Fatal("config.json missing 'auths' key")
}
if _, ok := auths["forgejo.example.com"]; !ok {
t.Error("config.json missing registry entry for forgejo.example.com")
}
}
func TestInitCraneAuth_PasswordOnly(t *testing.T) {
cfg := &Config{RegistryUsername: "", RegistryPassword: "pass", SiteBuildImage: "img"}
if err := initCraneAuth(cfg); err != nil {
t.Errorf("initCraneAuth with no username should be no-op, got %v", err)
}
}
func TestCraneEnviron_NoConfig(t *testing.T) {
env := craneEnviron()
hasDockerConfig := false
for _, e := range env {
if strings.HasPrefix(e, "DOCKER_CONFIG=") {
hasDockerConfig = true
}
}
if hasDockerConfig {
t.Error("craneEnviron should not set DOCKER_CONFIG when config.json doesn't exist")
}
}
func TestCopyWebAssets(t *testing.T) {
srcDir := t.TempDir()
dstDir := t.TempDir()
os.MkdirAll(filepath.Join(srcDir, "js"), 0755)
os.MkdirAll(filepath.Join(srcDir, "css"), 0755)
os.WriteFile(filepath.Join(srcDir, "index.html"), []byte("<html></html>"), 0644)
os.WriteFile(filepath.Join(srcDir, "js", "app.js"), []byte("// app"), 0644)
os.WriteFile(filepath.Join(srcDir, "css", "style.css"), []byte("body{}"), 0644)
cfg := &Config{OutputDir: dstDir}
if err := copyWebAssets(cfg, srcDir); err != nil {
t.Fatalf("copyWebAssets: %v", err)
}
assertFileContent(t, filepath.Join(dstDir, "index.html"), "<html></html>")
assertFileContent(t, filepath.Join(dstDir, "js", "app.js"), "// app")
assertFileContent(t, filepath.Join(dstDir, "css", "style.css"), "body{}")
}
func TestCopyWebAssets_OverlaysOnExistingData(t *testing.T) {
srcDir := t.TempDir()
dstDir := t.TempDir()
// Pre-existing data files in output directory
os.MkdirAll(filepath.Join(dstDir, "data"), 0755)
os.WriteFile(filepath.Join(dstDir, "data", "leaderboard.json"), []byte(`{"old": true}`), 0644)
// Site build has its own data placeholder
os.MkdirAll(filepath.Join(srcDir, "data"), 0755)
os.WriteFile(filepath.Join(srcDir, "index.html"), []byte("<html>"), 0644)
os.WriteFile(filepath.Join(srcDir, "data", "leaderboard.json"), []byte(`{"placeholder": true}`), 0644)
cfg := &Config{OutputDir: dstDir}
if err := copyWebAssets(cfg, srcDir); err != nil {
t.Fatalf("copyWebAssets: %v", err)
}
// Should have the site build's data (will be overwritten by generateAllIndexes later)
assertFileContent(t, filepath.Join(dstDir, "index.html"), "<html>")
assertFileContent(t, filepath.Join(dstDir, "data", "leaderboard.json"), `{"placeholder": true}`)
}
func TestCopyWebAssets_EmptySource(t *testing.T) {
srcDir := t.TempDir()
dstDir := t.TempDir()
cfg := &Config{OutputDir: dstDir}
if err := copyWebAssets(cfg, srcDir); err != nil {
t.Fatalf("copyWebAssets with empty source: %v", err)
}
}
func TestCopyWebAssets_NonexistentSource(t *testing.T) {
cfg := &Config{OutputDir: t.TempDir()}
err := copyWebAssets(cfg, "/nonexistent/path")
if err == nil {
t.Error("expected error for nonexistent source")
}
}
func assertFileContent(t *testing.T, path, want string) {
t.Helper()
data, err := os.ReadFile(path)
if err != nil {
t.Fatalf("read %s: %v", path, err)
}
if string(data) != want {
t.Errorf("content of %s = %q, want %q", path, string(data), want)
}
}