diff --git a/README.md b/README.md index 22beaaf..792c76b 100644 --- a/README.md +++ b/README.md @@ -38,6 +38,7 @@ Resolution order: package override, then ecosystem override, then global default | Conan | C/C++ | | ✓ | | Conda | Python/R | Yes | ✓ | | CRAN | R | | ✓ | +| Julia | Julia | | ✓ | | Container | Docker/OCI | | ✓ | | Debian | Debian/Ubuntu | | ✓ | | RPM | RHEL/Fedora | | ✓ | @@ -312,6 +313,21 @@ local({ }) ``` +### Julia + +Set the Pkg server before starting Julia: + +```bash +export JULIA_PKG_SERVER=http://localhost:8080/julia +``` + +Or inside a running session: + +```julia +ENV["JULIA_PKG_SERVER"] = "http://localhost:8080/julia" +using Pkg; Pkg.update() +``` + ### Docker / Container Registry Configure Docker to use the proxy as a registry mirror in `/etc/docker/daemon.json`: @@ -593,6 +609,7 @@ Recently cached: | `GET /conan/*` | Conan C/C++ protocol | | `GET /conda/*` | Conda/Anaconda protocol | | `GET /cran/*` | CRAN (R) protocol | +| `GET /julia/*` | Julia Pkg server protocol | | `GET /v2/*` | OCI/Docker registry protocol | | `GET /debian/*` | Debian/APT repository protocol | | `GET /rpm/*` | RPM/Yum repository protocol | diff --git a/docs/architecture.md b/docs/architecture.md index 81c41cf..f6c5458 100644 --- a/docs/architecture.md +++ b/docs/architecture.md @@ -14,7 +14,7 @@ The proxy is a caching HTTP server that sits between package manager clients and │ │ /npm/* -> NPMHandler /health -> healthHandler │ │ │ │ /cargo/* -> CargoHandler /stats -> statsHandler │ │ │ │ /gem/* -> GemHandler /metrics -> prometheus │ │ -│ │ ...16 ecosystems /api/* -> APIHandler │ │ +│ │ ...17 ecosystems /api/* -> APIHandler │ │ │ │ / -> Web UI │ │ │ └──────────────────────────────────────────────────────────┘ │ │ │ │ │ │ diff --git a/go.mod b/go.mod index 87fd2db..e91ab4f 100644 --- a/go.mod +++ b/go.mod @@ -3,6 +3,7 @@ module github.com/git-pkgs/proxy go 1.25.6 require ( + github.com/BurntSushi/toml v1.6.0 github.com/CycloneDX/cyclonedx-go v0.10.0 github.com/git-pkgs/archives v0.3.0 github.com/git-pkgs/enrichment v0.2.2 @@ -49,7 +50,6 @@ require ( github.com/Azure/go-autorest v14.2.0+incompatible // indirect github.com/Azure/go-autorest/autorest/to v0.4.1 // indirect github.com/AzureAD/microsoft-authentication-library-for-go v1.6.0 // indirect - github.com/BurntSushi/toml v1.6.0 // indirect github.com/Djarvur/go-err113 v0.1.1 // indirect github.com/KyleBanks/depth v1.2.1 // indirect github.com/Masterminds/semver/v3 v3.4.0 // indirect diff --git a/internal/handler/julia.go b/internal/handler/julia.go new file mode 100644 index 0000000..08b1fdf --- /dev/null +++ b/internal/handler/julia.go @@ -0,0 +1,347 @@ +package handler + +import ( + "archive/tar" + "bufio" + "bytes" + "compress/gzip" + "context" + "fmt" + "io" + "net/http" + "regexp" + "strings" + "sync" + + "github.com/BurntSushi/toml" +) + +const ( + juliaUpstream = "https://pkg.julialang.org" + juliaGeneralRegistryUUID = "23338594-aafe-5451-b93e-139f81909106" + juliaArtifactName = "_artifact" + juliaRegistryName = "_registry" +) + +var ( + juliaHexPattern = regexp.MustCompile(`^[0-9a-f]{40,64}$`) + juliaUUIDPattern = regexp.MustCompile(`^[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}$`) +) + +// JuliaHandler handles Julia Pkg server protocol requests. +// +// See https://pkgdocs.julialang.org/v1/registries/ and the PkgServer.jl +// reference implementation. The protocol is content-addressed: registry, +// package and artifact resources are all identified by git tree hashes +// and are immutable once published. +type JuliaHandler struct { + proxy *Proxy + upstreamURL string + + mu sync.RWMutex + names map[string]string + namesHash string + loadMu sync.Mutex +} + +// NewJuliaHandler creates a new Julia Pkg server handler. +func NewJuliaHandler(proxy *Proxy, _ string) *JuliaHandler { + return &JuliaHandler{ + proxy: proxy, + upstreamURL: juliaUpstream, + names: make(map[string]string), + } +} + +// Routes returns the HTTP handler for Julia requests. +func (h *JuliaHandler) Routes() http.Handler { + mux := http.NewServeMux() + + mux.HandleFunc("GET /registries", h.handleRegistries) + mux.HandleFunc("GET /registries.eager", h.handleRegistries) + mux.HandleFunc("GET /registries.conservative", h.handleRegistries) + mux.HandleFunc("GET /registry/{uuid}/{hash}", h.handleRegistry) + mux.HandleFunc("GET /package/{uuid}/{hash}", h.handlePackage) + mux.HandleFunc("GET /artifact/{hash}", h.handleArtifact) + mux.HandleFunc("GET /meta", h.proxyUpstream) + + return mux +} + +// handleRegistries serves the list of available registries. This is the only +// mutable endpoint in the protocol so it goes through the metadata cache. +func (h *JuliaHandler) handleRegistries(w http.ResponseWriter, r *http.Request) { + cacheKey := strings.TrimPrefix(r.URL.Path, "/") + h.proxy.ProxyCached(w, r, h.upstreamURL+r.URL.Path, "julia", cacheKey, "*/*") +} + +// handleRegistry serves an immutable registry tarball and refreshes the +// UUID→name map from its Registry.toml. +func (h *JuliaHandler) handleRegistry(w http.ResponseWriter, r *http.Request) { + uuid := r.PathValue("uuid") + hash := r.PathValue("hash") + if !validJuliaUUID(uuid) || !juliaHexPattern.MatchString(hash) { + http.Error(w, "invalid registry reference", http.StatusBadRequest) + return + } + + h.proxy.Logger.Info("julia registry request", "uuid", uuid, "hash", hash) + + upstreamURL := h.upstreamURL + r.URL.Path + result, err := h.proxy.GetOrFetchArtifactFromURL(r.Context(), "julia", juliaRegistryName, hash, hash+".tar.gz", upstreamURL) + if err != nil { + h.proxy.Logger.Error("failed to get registry", "error", err) + http.Error(w, "failed to fetch registry", http.StatusBadGateway) + return + } + + go h.refreshNamesFromRegistry(uuid, hash) + + ServeArtifact(w, result) +} + +// handlePackage serves an immutable package source tarball. +func (h *JuliaHandler) handlePackage(w http.ResponseWriter, r *http.Request) { + uuid := r.PathValue("uuid") + hash := r.PathValue("hash") + if !validJuliaUUID(uuid) || !juliaHexPattern.MatchString(hash) { + http.Error(w, "invalid package reference", http.StatusBadRequest) + return + } + + if err := h.ensureNames(r.Context()); err != nil { + h.proxy.Logger.Warn("julia name map unavailable, using uuid", "error", err) + } + name := h.resolveName(uuid) + + h.proxy.Logger.Info("julia package request", "name", name, "uuid", uuid, "hash", hash) + + upstreamURL := h.upstreamURL + r.URL.Path + result, err := h.proxy.GetOrFetchArtifactFromURL(r.Context(), "julia", name, hash, hash+".tar.gz", upstreamURL) + if err != nil { + h.proxy.Logger.Error("failed to get package", "error", err) + http.Error(w, "failed to fetch package", http.StatusBadGateway) + return + } + + ServeArtifact(w, result) +} + +// handleArtifact serves an immutable binary artifact tarball. Artifacts are +// anonymous content-addressed blobs with no associated package name. +func (h *JuliaHandler) handleArtifact(w http.ResponseWriter, r *http.Request) { + hash := r.PathValue("hash") + if !juliaHexPattern.MatchString(hash) { + http.Error(w, "invalid artifact hash", http.StatusBadRequest) + return + } + + h.proxy.Logger.Info("julia artifact request", "hash", hash) + + upstreamURL := h.upstreamURL + r.URL.Path + result, err := h.proxy.GetOrFetchArtifactFromURL(r.Context(), "julia", juliaArtifactName, hash, hash+".tar.gz", upstreamURL) + if err != nil { + h.proxy.Logger.Error("failed to get artifact", "error", err) + http.Error(w, "failed to fetch artifact", http.StatusBadGateway) + return + } + + ServeArtifact(w, result) +} + +// proxyUpstream forwards a request to the upstream Pkg server without caching. +func (h *JuliaHandler) proxyUpstream(w http.ResponseWriter, r *http.Request) { + h.proxy.ProxyUpstream(w, r, h.upstreamURL+r.URL.Path, nil) +} + +// resolveName returns the human-readable package name for a UUID, falling +// back to the UUID itself if it is not present in the loaded registry. +func (h *JuliaHandler) resolveName(uuid string) string { + h.mu.RLock() + defer h.mu.RUnlock() + if name, ok := h.names[uuid]; ok { + return name + } + return uuid +} + +// ensureNames lazily populates the UUID→name map from the General registry. +// Returns immediately if the map is already populated; otherwise blocks until +// a single in-flight load completes. Failed loads are retried on the next call. +func (h *JuliaHandler) ensureNames(ctx context.Context) error { + if h.namesLoaded() { + return nil + } + + h.loadMu.Lock() + defer h.loadMu.Unlock() + + if h.namesLoaded() { + return nil + } + return h.loadNamesFromUpstream(ctx) +} + +func (h *JuliaHandler) namesLoaded() bool { + h.mu.RLock() + defer h.mu.RUnlock() + return len(h.names) > 0 +} + +// loadNamesFromUpstream fetches the current /registries listing, downloads the +// General registry tarball at its current hash, and parses Registry.toml. +func (h *JuliaHandler) loadNamesFromUpstream(ctx context.Context) error { + hash, err := h.fetchGeneralRegistryHash(ctx) + if err != nil { + return err + } + return h.loadRegistryTarball(ctx, juliaGeneralRegistryUUID, hash) +} + +// fetchGeneralRegistryHash reads /registries and returns the current tree hash +// for the General registry. +func (h *JuliaHandler) fetchGeneralRegistryHash(ctx context.Context) (string, error) { + req, err := http.NewRequestWithContext(ctx, http.MethodGet, h.upstreamURL+"/registries", nil) + if err != nil { + return "", err + } + resp, err := h.proxy.HTTPClient.Do(req) + if err != nil { + return "", err + } + defer func() { _ = resp.Body.Close() }() + + if resp.StatusCode != http.StatusOK { + return "", fmt.Errorf("upstream /registries returned %d", resp.StatusCode) + } + + scanner := bufio.NewScanner(resp.Body) + for scanner.Scan() { + uuid, hash, ok := parseRegistryLine(scanner.Text()) + if ok && uuid == juliaGeneralRegistryUUID { + return hash, nil + } + } + if err := scanner.Err(); err != nil { + return "", err + } + return "", fmt.Errorf("general registry not listed in /registries") +} + +// refreshNamesFromRegistry reloads the UUID→name map from a registry tarball +// that has just been cached. Errors are logged but do not affect the response. +func (h *JuliaHandler) refreshNamesFromRegistry(uuid, hash string) { + if uuid != juliaGeneralRegistryUUID { + return + } + h.mu.RLock() + current := h.namesHash + h.mu.RUnlock() + if current == hash { + return + } + if err := h.loadRegistryTarball(context.Background(), uuid, hash); err != nil { + h.proxy.Logger.Warn("failed to refresh julia name map", "error", err) + } +} + +// loadRegistryTarball downloads a registry tarball and replaces the name map +// with the contents of its Registry.toml. +func (h *JuliaHandler) loadRegistryTarball(ctx context.Context, uuid, hash string) error { + url := fmt.Sprintf("%s/registry/%s/%s", h.upstreamURL, uuid, hash) + req, err := http.NewRequestWithContext(ctx, http.MethodGet, url, nil) + if err != nil { + return err + } + resp, err := h.proxy.HTTPClient.Do(req) + if err != nil { + return err + } + defer func() { _ = resp.Body.Close() }() + + if resp.StatusCode != http.StatusOK { + return fmt.Errorf("upstream registry returned %d", resp.StatusCode) + } + + names, err := extractRegistryNames(resp.Body) + if err != nil { + return err + } + + h.mu.Lock() + h.names = names + h.namesHash = hash + h.mu.Unlock() + + h.proxy.Logger.Info("loaded julia registry name map", "packages", len(names), "hash", hash) + return nil +} + +// extractRegistryNames reads a gzipped registry tarball, finds Registry.toml +// at the root, and returns its [packages] table as a UUID→name map. +func extractRegistryNames(r io.Reader) (map[string]string, error) { + gz, err := gzip.NewReader(r) + if err != nil { + return nil, fmt.Errorf("opening gzip stream: %w", err) + } + defer func() { _ = gz.Close() }() + + tr := tar.NewReader(gz) + for { + hdr, err := tr.Next() + if err == io.EOF { + return nil, fmt.Errorf("no Registry.toml in tarball") + } + if err != nil { + return nil, err + } + if strings.TrimPrefix(hdr.Name, "./") != "Registry.toml" { + continue + } + + data, err := io.ReadAll(tr) + if err != nil { + return nil, err + } + return parseRegistryToml(data) + } +} + +type juliaRegistryFile struct { + Packages map[string]struct { + Name string `toml:"name"` + } `toml:"packages"` +} + +// parseRegistryToml decodes the [packages] table of a Registry.toml file. +func parseRegistryToml(data []byte) (map[string]string, error) { + var reg juliaRegistryFile + if _, err := toml.NewDecoder(bytes.NewReader(data)).Decode(®); err != nil { + return nil, fmt.Errorf("parsing Registry.toml: %w", err) + } + + names := make(map[string]string, len(reg.Packages)) + for uuid, pkg := range reg.Packages { + if pkg.Name != "" { + names[uuid] = pkg.Name + } + } + return names, nil +} + +// parseRegistryLine parses a single line from /registries of the form +// "/registry/{uuid}/{hash}" and returns the uuid and hash. +func parseRegistryLine(line string) (uuid, hash string, ok bool) { + line = strings.TrimSpace(line) + line = strings.TrimPrefix(line, "/registry/") + uuid, hash, found := strings.Cut(line, "/") + if !found || !validJuliaUUID(uuid) || !juliaHexPattern.MatchString(hash) { + return "", "", false + } + return uuid, hash, true +} + +// validJuliaUUID reports whether s looks like a lowercase RFC 4122 UUID. +func validJuliaUUID(s string) bool { + return juliaUUIDPattern.MatchString(s) +} diff --git a/internal/handler/julia_test.go b/internal/handler/julia_test.go new file mode 100644 index 0000000..68fb975 --- /dev/null +++ b/internal/handler/julia_test.go @@ -0,0 +1,167 @@ +package handler + +import ( + "archive/tar" + "bytes" + "compress/gzip" + "log/slog" + "net/http" + "net/http/httptest" + "testing" +) + +func TestJuliaParseRegistryLine(t *testing.T) { + tests := []struct { + line string + wantUUID string + wantHash string + wantOK bool + }{ + { + "/registry/23338594-aafe-5451-b93e-139f81909106/342327538ed6c1ec54c69fa145e7b6bf5934201e", + "23338594-aafe-5451-b93e-139f81909106", + "342327538ed6c1ec54c69fa145e7b6bf5934201e", + true, + }, + { + " /registry/23338594-aafe-5451-b93e-139f81909106/342327538ed6c1ec54c69fa145e7b6bf5934201e\n", + "23338594-aafe-5451-b93e-139f81909106", + "342327538ed6c1ec54c69fa145e7b6bf5934201e", + true, + }, + {"/registry/not-a-uuid/0000", "", "", false}, + {"junk", "", "", false}, + {"", "", "", false}, + } + + for _, tt := range tests { + uuid, hash, ok := parseRegistryLine(tt.line) + if uuid != tt.wantUUID || hash != tt.wantHash || ok != tt.wantOK { + t.Errorf("parseRegistryLine(%q) = (%q, %q, %v), want (%q, %q, %v)", + tt.line, uuid, hash, ok, tt.wantUUID, tt.wantHash, tt.wantOK) + } + } +} + +func TestJuliaValidUUID(t *testing.T) { + tests := []struct { + s string + want bool + }{ + {"23338594-aafe-5451-b93e-139f81909106", true}, + {"295af30f-e4ad-537b-8983-00126c2a3abe", true}, + {"23338594-AAFE-5451-b93e-139f81909106", false}, + {"23338594aafe5451b93e139f81909106", false}, + {"23338594-aafe-5451-b93e-139f8190910", false}, + {"23338594-aafe-5451-b93e-139f81909106-", false}, + {"23338594-gafe-5451-b93e-139f81909106", false}, + {"", false}, + } + + for _, tt := range tests { + if got := validJuliaUUID(tt.s); got != tt.want { + t.Errorf("validJuliaUUID(%q) = %v, want %v", tt.s, got, tt.want) + } + } +} + +func TestJuliaParseRegistryToml(t *testing.T) { + data := []byte(`name = "General" +uuid = "23338594-aafe-5451-b93e-139f81909106" + +[packages] +295af30f-e4ad-537b-8983-00126c2a3abe = { name = "Revise", path = "R/Revise" } +91a5bcdd-55d7-5caf-9e0b-520d859cae80 = { name = "Plots", path = "P/Plots" } +`) + + names, err := parseRegistryToml(data) + if err != nil { + t.Fatalf("parseRegistryToml: %v", err) + } + if got := names["295af30f-e4ad-537b-8983-00126c2a3abe"]; got != "Revise" { + t.Errorf("names[Revise uuid] = %q, want Revise", got) + } + if got := names["91a5bcdd-55d7-5caf-9e0b-520d859cae80"]; got != "Plots" { + t.Errorf("names[Plots uuid] = %q, want Plots", got) + } + if len(names) != 2 { + t.Errorf("len(names) = %d, want 2", len(names)) + } +} + +func TestJuliaExtractRegistryNames(t *testing.T) { + registryToml := `name = "General" +[packages] +295af30f-e4ad-537b-8983-00126c2a3abe = { name = "Revise", path = "R/Revise" } +` + var buf bytes.Buffer + gw := gzip.NewWriter(&buf) + tw := tar.NewWriter(gw) + + for _, f := range []struct{ name, body string }{ + {"R/Revise/Package.toml", "name = \"Revise\"\n"}, + {"Registry.toml", registryToml}, + } { + if err := tw.WriteHeader(&tar.Header{Name: f.name, Mode: 0o644, Size: int64(len(f.body))}); err != nil { + t.Fatalf("WriteHeader: %v", err) + } + if _, err := tw.Write([]byte(f.body)); err != nil { + t.Fatalf("Write: %v", err) + } + } + if err := tw.Close(); err != nil { + t.Fatalf("tar Close: %v", err) + } + if err := gw.Close(); err != nil { + t.Fatalf("gzip Close: %v", err) + } + + names, err := extractRegistryNames(bytes.NewReader(buf.Bytes())) + if err != nil { + t.Fatalf("extractRegistryNames: %v", err) + } + if got := names["295af30f-e4ad-537b-8983-00126c2a3abe"]; got != "Revise" { + t.Errorf("names[Revise uuid] = %q, want Revise", got) + } +} + +func TestJuliaResolveName(t *testing.T) { + h := &JuliaHandler{ + proxy: &Proxy{Logger: slog.Default()}, + names: map[string]string{ + "295af30f-e4ad-537b-8983-00126c2a3abe": "Revise", + }, + } + + if got := h.resolveName("295af30f-e4ad-537b-8983-00126c2a3abe"); got != "Revise" { + t.Errorf("resolveName(known) = %q, want Revise", got) + } + if got := h.resolveName("00000000-0000-0000-0000-000000000000"); got != "00000000-0000-0000-0000-000000000000" { + t.Errorf("resolveName(unknown) = %q, want uuid fallback", got) + } +} + +func TestJuliaRoutesValidation(t *testing.T) { + h := NewJuliaHandler(&Proxy{Logger: slog.Default()}, "") + routes := h.Routes() + + tests := []struct { + path string + want int + }{ + {"/package/not-a-uuid/342327538ed6c1ec54c69fa145e7b6bf5934201e", http.StatusBadRequest}, + {"/package/295af30f-e4ad-537b-8983-00126c2a3abe/short", http.StatusBadRequest}, + {"/registry/295af30f-e4ad-537b-8983-00126c2a3abe/zzzz", http.StatusBadRequest}, + {"/artifact/nothex", http.StatusBadRequest}, + {"/nope", http.StatusNotFound}, + } + + for _, tt := range tests { + req := httptest.NewRequest(http.MethodGet, tt.path, nil) + rr := httptest.NewRecorder() + routes.ServeHTTP(rr, req) + if rr.Code != tt.want { + t.Errorf("GET %s = %d, want %d", tt.path, rr.Code, tt.want) + } + } +} diff --git a/internal/server/dashboard.go b/internal/server/dashboard.go index 1fe5388..1de294c 100644 --- a/internal/server/dashboard.go +++ b/internal/server/dashboard.go @@ -127,6 +127,7 @@ func supportedEcosystems() []string { "gem", "golang", "hex", + "julia", "maven", "npm", "nuget", @@ -176,6 +177,8 @@ func ecosystemBadgeClasses(ecosystem string) string { return base + " bg-green-100 text-green-700 dark:bg-green-900/50 dark:text-green-300" case "cran": return base + " bg-slate-100 text-slate-700 dark:bg-slate-800 dark:text-slate-300" + case "julia": + return base + " bg-emerald-100 text-emerald-700 dark:bg-emerald-900/50 dark:text-emerald-300" case "oci": return base + " bg-sky-100 text-sky-700 dark:bg-sky-900/50 dark:text-sky-300" case "deb": @@ -377,6 +380,17 @@ local({ r["CRAN"] <- "` + baseURL + `/cran" options(repos = r) })`), + }, + { + ID: "julia", + Name: "Julia", + Language: "Julia", + Endpoint: "/julia/", + Instructions: template.HTML(`

Set the Pkg server before starting Julia:

+
export JULIA_PKG_SERVER=` + baseURL + `/julia
+

Or inside a running session:

+
ENV["JULIA_PKG_SERVER"] = "` + baseURL + `/julia"
+using Pkg; Pkg.update()
`), }, { ID: "oci", diff --git a/internal/server/server.go b/internal/server/server.go index a0983e5..7274365 100644 --- a/internal/server/server.go +++ b/internal/server/server.go @@ -15,6 +15,7 @@ // - /conan/* - Conan C/C++ protocol // - /conda/* - Conda/Anaconda protocol // - /cran/* - CRAN (R) protocol +// - /julia/* - Julia Pkg server protocol // - /v2/* - OCI/Docker container registry protocol // - /debian/* - Debian/APT repository protocol // - /rpm/* - RPM/Yum repository protocol @@ -189,6 +190,7 @@ func (s *Server) Start() error { conanHandler := handler.NewConanHandler(proxy, s.cfg.BaseURL) condaHandler := handler.NewCondaHandler(proxy, s.cfg.BaseURL) cranHandler := handler.NewCRANHandler(proxy, s.cfg.BaseURL) + juliaHandler := handler.NewJuliaHandler(proxy, s.cfg.BaseURL) containerHandler := handler.NewContainerHandler(proxy, s.cfg.BaseURL) debianHandler := handler.NewDebianHandler(proxy, s.cfg.BaseURL) rpmHandler := handler.NewRPMHandler(proxy, s.cfg.BaseURL) @@ -207,6 +209,7 @@ func (s *Server) Start() error { r.Mount("/conan", http.StripPrefix("/conan", conanHandler.Routes())) r.Mount("/conda", http.StripPrefix("/conda", condaHandler.Routes())) r.Mount("/cran", http.StripPrefix("/cran", cranHandler.Routes())) + r.Mount("/julia", http.StripPrefix("/julia", juliaHandler.Routes())) r.Mount("/v2", http.StripPrefix("/v2", containerHandler.Routes())) r.Mount("/debian", http.StripPrefix("/debian", debianHandler.Routes())) r.Mount("/rpm", http.StripPrefix("/rpm", rpmHandler.Routes()))