From 0ad3eaacc2350d4e6af889a4e64f050d7b4b630c Mon Sep 17 00:00:00 2001 From: Joe Corall Date: Sun, 21 Jun 2026 22:09:25 +0000 Subject: [PATCH] [minor] Add UptimeRobot bypass and tighten lifecycle handling --- .github/workflows/lint-test.yml | 2 +- .github/workflows/stress-test.yaml | 2 +- README.md | 7 +- ci/docker-compose.yml | 2 + ci_behavior_test.go | 22 ++++++ internal/helper/google.go | 21 +++++- internal/helper/google_test.go | 23 +++++++ internal/helper/uptimerobot.go | 96 ++++++++++++++++++++++++++ internal/helper/uptimerobot_test.go | 101 ++++++++++++++++++++++++++++ internal/state/lock.go | 5 +- main.go | 89 +++++++++++++++++------- main_test.go | 43 +++++++++++- 12 files changed, 377 insertions(+), 36 deletions(-) create mode 100644 internal/helper/uptimerobot.go create mode 100644 internal/helper/uptimerobot_test.go diff --git a/.github/workflows/lint-test.yml b/.github/workflows/lint-test.yml index 76ba854..9a9a730 100644 --- a/.github/workflows/lint-test.yml +++ b/.github/workflows/lint-test.yml @@ -13,7 +13,7 @@ jobs: - uses: actions/setup-go@4a3601121dd01d1626a1e23e37211e3254c1c06c # v6 with: - go-version: "1.26.3" + go-version: "1.26.4" - name: golangci-lint uses: golangci/golangci-lint-action@82606bf257cbaff209d206a39f5134f0cfbfd2ee # v9 diff --git a/.github/workflows/stress-test.yaml b/.github/workflows/stress-test.yaml index ca4531c..8c6a91d 100644 --- a/.github/workflows/stress-test.yaml +++ b/.github/workflows/stress-test.yaml @@ -14,7 +14,7 @@ jobs: - uses: actions/setup-go@4a3601121dd01d1626a1e23e37211e3254c1c06c # v6 with: - go-version: "1.26.3" + go-version: "1.26.4" - name: Run stress tests id: stress_test diff --git a/README.md b/README.md index e7a3fcc..ef2166f 100644 --- a/README.md +++ b/README.md @@ -73,6 +73,7 @@ services: traefik.http.middlewares.captcha-protect.plugin.captcha-protect.persistentStateFile: /tmp/state.json traefik.http.middlewares.captcha-protect.plugin.captcha-protect.enableStateReconciliation: "false" traefik.http.middlewares.captcha-protect.plugin.captcha-protect.enableGooglebotIPCheck: "true" + traefik.http.middlewares.captcha-protect.plugin.captcha-protect.enableUptimeRobotBypass: "false" traefik.http.middlewares.captcha-protect.plugin.captcha-protect.periodSeconds: 30 traefik.http.middlewares.captcha-protect.plugin.captcha-protect.failureThreshold: 3 networks: @@ -90,7 +91,7 @@ services: --providers.docker=true --providers.docker.network=default --experimental.plugins.captcha-protect.modulename=github.com/libops/captcha-protect - --experimental.plugins.captcha-protect.version=v1.12.5 + --experimental.plugins.captcha-protect.version=v1.13.0 volumes: - /var/run/docker.sock:/var/run/docker.sock:z - /CHANGEME/TO/A/HOST/PATH/FOR/STATE/FILE:/tmp/state.json:rw @@ -126,6 +127,7 @@ services: | `ipDepth` | `int` | `0` | How deep past the last non-exempt IP to fetch the real IP from `ipForwardedHeader`. Default 0 returns the last IP in the forward header | | `goodBots` | `[]string` (encouraged) | *see below* | List of second-level domains for bots that are never challenged or rate-limited. | | `enableGooglebotIPCheck`| `string`. | `"false"` | Treat IPs coming from googlebot's known IP ranges as good bots | +| `enableUptimeRobotBypass` | `string` | `"false"` | When `"true"`, bypass challenges for IP ranges published by UptimeRobot. The ranges are refreshed every 24 hours. | | `protectParameters` | `string` | `"false"` | Forces rate limiting even for good bots if URL parameters are present. Useful for protecting faceted search pages. | | `protectFileExtensions` | `[]string` | `""` | Comma-separated file extensions to protect. By default, your protected routes only protect html files. This is to prevent files like CSS/JS/img from tripping the rate limit. | | `protectHttpMethods` | `[]string` | `"GET,HEAD"` | Comma-separated list of HTTP methods to protect against | @@ -167,11 +169,14 @@ A good default value for `goodBots` would be: ``` enableGooglebotIPCheck: "true" +enableUptimeRobotBypass: "true" goodBots: apple.com,archive.org,duckduckgo.com,facebook.com,google.com,instagram.com,kagibot.org,linkedin.com,msn.com,openalex.org,twitter.com,x.com ``` Since google publishes their bot IPs, we can also leverage their API to let google crawl the site unchallenged based on client IP. This can be enabled with `enableGooglebotIPCheck: "true"` +UptimeRobot publishes its monitoring IP ranges at `https://api.uptimerobot.com/meta/ips`. Set `enableUptimeRobotBypass: "true"` to exempt those IPs; the list is fetched at startup and refreshed every 24 hours. The default is `"false"`. + **However** if you set the config parameter `protectParameters="true"`, even good bots won't be allowed to crawl protected routes if a URL parameter is on the request (e.g. `/foo?bar=baz`). This `protectParameters` feature is meant to help protect faceted search pages. diff --git a/ci/docker-compose.yml b/ci/docker-compose.yml index 3559bb4..de8bfec 100644 --- a/ci/docker-compose.yml +++ b/ci/docker-compose.yml @@ -21,6 +21,7 @@ services: traefik.http.middlewares.captcha-protect.plugin.captcha-protect.protectParameters: "${PROTECT_PARAMETERS:-false}" traefik.http.middlewares.captcha-protect.plugin.captcha-protect.goodBots: "" traefik.http.middlewares.captcha-protect.plugin.captcha-protect.enableGooglebotIPCheck: "false" + traefik.http.middlewares.captcha-protect.plugin.captcha-protect.enableUptimeRobotBypass: "false" traefik.http.middlewares.captcha-protect.plugin.captcha-protect.mode: "regex" traefik.http.middlewares.captcha-protect.plugin.captcha-protect.protectRoutes: "^/" traefik.http.middlewares.captcha-protect.plugin.captcha-protect.excludeRoutes: "\\/oai\\/request,\\/node\\/\\d+\\/(book-)?manifest" @@ -55,6 +56,7 @@ services: traefik.http.middlewares.captcha-protect.plugin.captcha-protect.protectParameters: "${PROTECT_PARAMETERS:-false}" traefik.http.middlewares.captcha-protect.plugin.captcha-protect.goodBots: "" traefik.http.middlewares.captcha-protect.plugin.captcha-protect.enableGooglebotIPCheck: "false" + traefik.http.middlewares.captcha-protect.plugin.captcha-protect.enableUptimeRobotBypass: "false" traefik.http.middlewares.captcha-protect.plugin.captcha-protect.mode: "regex" traefik.http.middlewares.captcha-protect.plugin.captcha-protect.protectRoutes: "^/" traefik.http.middlewares.captcha-protect.plugin.captcha-protect.excludeRoutes: "\\/oai\\/request,\\/node\\/\\d+\\/(book-)?manifest" diff --git a/ci_behavior_test.go b/ci_behavior_test.go index 1c1668f..edfe291 100644 --- a/ci_behavior_test.go +++ b/ci_behavior_test.go @@ -66,6 +66,27 @@ func TestCILabelEquivalentGooglebotParameterBehavior(t *testing.T) { assertRedirect(t, protectedParams, googleIP, "/?foo=bar", "/challenge?destination=%2F%3Ffoo%3Dbar") } +func TestCILabelEquivalentUptimeRobotBypassBehavior(t *testing.T) { + uptimeRobotIP := "203.0.113.10" + + bypass := newCILabelEquivalentMiddleware(t, nil) + bypass.uptimeRobotIPs = helper.NewUptimeRobotIPs() + bypass.uptimeRobotIPs.Update([]string{"203.0.113.10/32"}, discardLogger()) + bypass.config.EnableUptimeRobotBypass = "true" + + for i := uint(0); i < ciRateLimit+1; i++ { + assertNoRedirect(t, bypass, uptimeRobotIP, "/") + } + + disabled := newCILabelEquivalentMiddleware(t, nil) + disabled.uptimeRobotIPs = helper.NewUptimeRobotIPs() + disabled.uptimeRobotIPs.Update([]string{"203.0.113.10/32"}, discardLogger()) + for i := uint(0); i < ciRateLimit; i++ { + assertNoRedirect(t, disabled, uptimeRobotIP, "/") + } + assertRedirect(t, disabled, uptimeRobotIP, "/", "/challenge?destination=%2F") +} + func TestPersistentStateSharingWithSynctest(t *testing.T) { synctest.Test(t, func(t *testing.T) { stateFile := filepath.Join(t.TempDir(), "state.json") @@ -133,6 +154,7 @@ func ciLabelEquivalentConfig() *Config { config.ProtectParameters = "false" config.GoodBots = []string{} config.EnableGooglebotIPCheck = "false" + config.EnableUptimeRobotBypass = "false" config.Mode = "regex" config.ProtectRoutes = []string{"^/"} config.ExcludeRoutes = []string{ diff --git a/internal/helper/google.go b/internal/helper/google.go index ac82d5a..03a0d7a 100644 --- a/internal/helper/google.go +++ b/internal/helper/google.go @@ -78,6 +78,11 @@ func (g *GooglebotIPs) Contains(ip net.IP) bool { // FetchGooglebotIPs fetches the list of Googlebot IPs from Google's official endpoint, // parses the JSON response, and returns a slice of CIDR strings. func FetchGooglebotIPs(log *slog.Logger, httpClient *http.Client, url string) ([]string, error) { + return FetchGooglebotIPsContext(context.Background(), log, httpClient, url) +} + +// FetchGooglebotIPsContext fetches Googlebot IPs and cancels the request with ctx. +func FetchGooglebotIPsContext(parent context.Context, log *slog.Logger, httpClient *http.Client, url string) ([]string, error) { log.Debug("Fetching Googlebot IPs") req, err := http.NewRequest(http.MethodGet, url, nil) @@ -85,7 +90,7 @@ func FetchGooglebotIPs(log *slog.Logger, httpClient *http.Client, url string) ([ return nil, fmt.Errorf("failed to create Googlebot IP request: %w", err) } - ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second) + ctx, cancel := context.WithTimeout(parent, 30*time.Second) defer cancel() req = req.WithContext(ctx) @@ -121,13 +126,18 @@ func FetchGooglebotIPs(log *slog.Logger, httpClient *http.Client, url string) ([ // FetchGoogleCrawlerIPs fetches crawler IP ranges from multiple Google-managed endpoints, // then returns a canonical, unique list where broader prefixes replace narrower prefixes. func FetchGoogleCrawlerIPs(log *slog.Logger, httpClient *http.Client, urls []string) ([]string, error) { + return FetchGoogleCrawlerIPsContext(context.Background(), log, httpClient, urls) +} + +// FetchGoogleCrawlerIPsContext fetches all configured Google crawler ranges with cancellation. +func FetchGoogleCrawlerIPsContext(ctx context.Context, log *slog.Logger, httpClient *http.Client, urls []string) ([]string, error) { if len(urls) == 0 { return nil, nil } allCIDRs := make([]string, 0) for _, url := range urls { - cidrs, err := FetchGooglebotIPs(log, httpClient, url) + cidrs, err := FetchGooglebotIPsContext(ctx, log, httpClient, url) if err != nil { return nil, err } @@ -140,7 +150,12 @@ func FetchGoogleCrawlerIPs(log *slog.Logger, httpClient *http.Client, urls []str // RefreshGoogleCrawlerIPs fetches crawler IPs from all configured URLs and updates // the provided GooglebotIPs set. Returns the number of CIDRs loaded. func RefreshGoogleCrawlerIPs(log *slog.Logger, httpClient *http.Client, target *GooglebotIPs, urls []string) (int, error) { - cidrs, err := FetchGoogleCrawlerIPs(log, httpClient, urls) + return RefreshGoogleCrawlerIPsContext(context.Background(), log, httpClient, target, urls) +} + +// RefreshGoogleCrawlerIPsContext refreshes the active crawler ranges with cancellation. +func RefreshGoogleCrawlerIPsContext(ctx context.Context, log *slog.Logger, httpClient *http.Client, target *GooglebotIPs, urls []string) (int, error) { + cidrs, err := FetchGoogleCrawlerIPsContext(ctx, log, httpClient, urls) if err != nil { return 0, err } diff --git a/internal/helper/google_test.go b/internal/helper/google_test.go index 955d9bc..83ce309 100644 --- a/internal/helper/google_test.go +++ b/internal/helper/google_test.go @@ -1,6 +1,7 @@ package helper import ( + "context" "log/slog" "net" "net/http" @@ -41,6 +42,28 @@ func TestGooglebotIPs(t *testing.T) { t.Error("Expected 2001:db8::1 not to be a Googlebot IP") } } + +func TestFetchGooglebotIPsContextHonorsCancellation(t *testing.T) { + requestStarted := make(chan struct{}) + server := httptest.NewServer(http.HandlerFunc(func(_ http.ResponseWriter, r *http.Request) { + close(requestStarted) + <-r.Context().Done() + })) + defer server.Close() + + ctx, cancel := context.WithCancel(context.Background()) + done := make(chan error, 1) + go func() { + _, err := FetchGooglebotIPsContext(ctx, slog.Default(), server.Client(), server.URL) + done <- err + }() + <-requestStarted + cancel() + + if err := <-done; err == nil { + t.Fatal("expected canceled fetch to fail") + } +} func TestFetchGooglebotIPs(t *testing.T) { log := slog.New(slog.NewTextHandler(os.Stdout, nil)) // Mock server diff --git a/internal/helper/uptimerobot.go b/internal/helper/uptimerobot.go new file mode 100644 index 0000000..80b93c6 --- /dev/null +++ b/internal/helper/uptimerobot.go @@ -0,0 +1,96 @@ +package helper + +import ( + "context" + "encoding/json" + "fmt" + "io" + "log/slog" + "net" + "net/http" + "time" +) + +const maxUptimeRobotIPResponseSize = 1 << 20 + +// UptimeRobotIPRangeURL is the official UptimeRobot checker range endpoint. +var UptimeRobotIPRangeURL = "https://api.uptimerobot.com/meta/ips" + +// UptimeRobotIPs is a thread-safe set of UptimeRobot IP ranges. +type UptimeRobotIPs = GooglebotIPs + +// NewUptimeRobotIPs creates an empty UptimeRobot IP range set. +func NewUptimeRobotIPs() *UptimeRobotIPs { + return NewGooglebotIPs() +} + +type uptimeRobotIPsJSON struct { + Prefixes []struct { + IPv4Prefix string `json:"ip_prefix"` + IPv6Prefix string `json:"ipv6_prefix"` + } `json:"prefixes"` +} + +// FetchUptimeRobotIPs fetches and validates UptimeRobot's published checker IP ranges. +func FetchUptimeRobotIPs(ctx context.Context, httpClient *http.Client, endpoint string) ([]string, error) { + req, err := http.NewRequestWithContext(ctx, http.MethodGet, endpoint, nil) + if err != nil { + return nil, fmt.Errorf("failed to create UptimeRobot IP request: %w", err) + } + + resp, err := httpClient.Do(req) + if err != nil { + return nil, fmt.Errorf("failed to fetch UptimeRobot IPs: %w", err) + } + defer resp.Body.Close() + + if resp.StatusCode != http.StatusOK { + return nil, fmt.Errorf("failed to fetch UptimeRobot IPs, status code: %d", resp.StatusCode) + } + + body, err := io.ReadAll(io.LimitReader(resp.Body, maxUptimeRobotIPResponseSize+1)) + if err != nil { + return nil, fmt.Errorf("failed to read UptimeRobot IPs: %w", err) + } + if len(body) > maxUptimeRobotIPResponseSize { + return nil, fmt.Errorf("UptimeRobot IP response exceeds %d bytes", maxUptimeRobotIPResponseSize) + } + + var payload uptimeRobotIPsJSON + if err := json.Unmarshal(body, &payload); err != nil { + return nil, fmt.Errorf("failed to decode UptimeRobot IPs: %w", err) + } + + cidrs := make([]string, 0, len(payload.Prefixes)) + for _, prefix := range payload.Prefixes { + for _, cidr := range []string{prefix.IPv4Prefix, prefix.IPv6Prefix} { + if cidr == "" { + continue + } + if _, _, err := net.ParseCIDR(cidr); err != nil { + return nil, fmt.Errorf("invalid UptimeRobot CIDR %q: %w", cidr, err) + } + cidrs = append(cidrs, cidr) + } + } + if len(cidrs) == 0 { + return nil, fmt.Errorf("UptimeRobot IP response contained no ranges") + } + + return cidrs, nil +} + +// RefreshUptimeRobotIPs atomically replaces the active ranges after a successful fetch. +func RefreshUptimeRobotIPs(parent context.Context, log *slog.Logger, httpClient *http.Client, target *UptimeRobotIPs, endpoint string) (int, error) { + ctx, cancel := context.WithTimeout(parent, 30*time.Second) + defer cancel() + + cidrs, err := FetchUptimeRobotIPs(ctx, httpClient, endpoint) + if err != nil { + return 0, err + } + cidrs = ReduceCIDRs(cidrs, log) + target.Update(cidrs, log) + + return len(cidrs), nil +} diff --git a/internal/helper/uptimerobot_test.go b/internal/helper/uptimerobot_test.go new file mode 100644 index 0000000..1da371a --- /dev/null +++ b/internal/helper/uptimerobot_test.go @@ -0,0 +1,101 @@ +package helper + +import ( + "context" + "io" + "log/slog" + "net" + "net/http" + "net/http/httptest" + "reflect" + "testing" +) + +func TestFetchUptimeRobotIPs(t *testing.T) { + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) { + w.Header().Set("Content-Type", "application/json") + _, _ = w.Write([]byte(`{ + "prefixes": [ + {"ip_prefix":"203.0.113.10/32","service":"checker"}, + {"ipv6_prefix":"2001:db8::10/128","service":"checker"} + ] + }`)) + })) + defer server.Close() + + got, err := FetchUptimeRobotIPs(context.Background(), server.Client(), server.URL) + if err != nil { + t.Fatal(err) + } + want := []string{"203.0.113.10/32", "2001:db8::10/128"} + if !reflect.DeepEqual(got, want) { + t.Fatalf("UptimeRobot CIDRs = %v, want %v", got, want) + } +} + +func TestFetchUptimeRobotIPsRejectsInvalidResponses(t *testing.T) { + tests := []struct { + name string + statusCode int + body string + }{ + {name: "non-200", statusCode: http.StatusBadGateway, body: `{}`}, + {name: "invalid JSON", statusCode: http.StatusOK, body: `{`}, + {name: "empty ranges", statusCode: http.StatusOK, body: `{"prefixes":[]}`}, + {name: "invalid CIDR", statusCode: http.StatusOK, body: `{"prefixes":[{"ip_prefix":"not-a-cidr"}]}`}, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) { + w.WriteHeader(tt.statusCode) + _, _ = w.Write([]byte(tt.body)) + })) + defer server.Close() + + if _, err := FetchUptimeRobotIPs(context.Background(), server.Client(), server.URL); err == nil { + t.Fatal("expected invalid response to fail") + } + }) + } +} + +func TestRefreshUptimeRobotIPsKeepsLastGoodSetOnError(t *testing.T) { + log := slog.New(slog.NewTextHandler(io.Discard, nil)) + ips := NewUptimeRobotIPs() + ips.Update([]string{"203.0.113.10/32"}, log) + + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) { + http.Error(w, "boom", http.StatusInternalServerError) + })) + defer server.Close() + + if _, err := RefreshUptimeRobotIPs(context.Background(), log, server.Client(), ips, server.URL); err == nil { + t.Fatal("expected refresh error") + } + if !ips.Contains(net.ParseIP("203.0.113.10")) { + t.Fatal("expected failed refresh to retain the previous ranges") + } +} + +func TestFetchUptimeRobotIPsHonorsCancellation(t *testing.T) { + requestStarted := make(chan struct{}) + server := httptest.NewServer(http.HandlerFunc(func(_ http.ResponseWriter, r *http.Request) { + close(requestStarted) + <-r.Context().Done() + })) + defer server.Close() + + ctx, cancel := context.WithCancel(context.Background()) + done := make(chan error, 1) + go func() { + _, err := FetchUptimeRobotIPs(ctx, server.Client(), server.URL) + done <- err + }() + <-requestStarted + cancel() + + if err := <-done; err == nil { + t.Fatal("expected canceled fetch to fail") + } +} diff --git a/internal/state/lock.go b/internal/state/lock.go index 53b077f..3387b37 100644 --- a/internal/state/lock.go +++ b/internal/state/lock.go @@ -51,7 +51,8 @@ func newLockOwner(pid int) string { // Lock acquires an exclusive lock by creating a lock file. // It will retry for up to 5 seconds if the lock is held by another process. func (fl *FileLock) Lock() error { - timeout := time.After(5 * time.Second) + timeout := time.NewTimer(5 * time.Second) + defer timeout.Stop() ticker := time.NewTicker(10 * time.Millisecond) defer ticker.Stop() @@ -78,7 +79,7 @@ func (fl *FileLock) Lock() error { // If stat failed (e.g., file removed between OpenFile and Stat) // or lock is not stale, wait for next tick select { - case <-timeout: + case <-timeout.C: return fmt.Errorf("timeout waiting for file lock") case <-ticker.C: // Continue to next iteration diff --git a/main.go b/main.go index 81aa754..e0241e5 100644 --- a/main.go +++ b/main.go @@ -81,6 +81,7 @@ type Config struct { // Performance warning: Not recommended for sites with >1M unique visitors (see internal/state/state_stress_test.go). EnableStateReconciliation string `json:"enableStateReconciliation"` EnableGooglebotIPCheck string `json:"enableGooglebotIPCheck"` + EnableUptimeRobotBypass string `json:"enableUptimeRobotBypass"` Mode string `json:"mode"` PeriodSeconds int `json:"periodSeconds"` FailureThreshold int `json:"failureThreshold"` @@ -96,6 +97,7 @@ type CaptchaProtect struct { verifiedCache *lru.Cache botCache *lru.Cache googlebotIPs *helper.GooglebotIPs + uptimeRobotIPs *helper.UptimeRobotIPs captchaConfig CaptchaConfig exemptIps []*net.IPNet tmpl *htemplate.Template @@ -159,6 +161,7 @@ func CreateConfig() *Config { Mode: "prefix", EnableStateReconciliation: "false", EnableGooglebotIPCheck: "false", + EnableUptimeRobotBypass: "false", PeriodSeconds: DefaultHealthCheckPeriodSeconds, FailureThreshold: DefaultHealthCheckFailureThreshold, } @@ -339,46 +342,63 @@ func NewCaptchaProtect(ctx context.Context, next http.Handler, config *Config, n "failureThreshold", config.FailureThreshold) // Start health check goroutine - childCtx, cancel := context.WithCancel(ctx) - go bc.healthCheckLoop(childCtx) - go func() { - <-ctx.Done() - log.Debug("Context canceled, stopping health check") - cancel() - }() + go bc.healthCheckLoop(ctx) } if config.PersistentStateFile != "" { bc.loadState() - childCtx, cancel := context.WithCancel(ctx) - go bc.saveState(childCtx) - go func() { - <-ctx.Done() - bc.log.Debug("Context canceled, calling child cancel") - cancel() - }() + go bc.saveState(ctx) } if config.EnableGooglebotIPCheck == "true" { log.Info("Googlebot IP check enabled") bc.googlebotIPs = helper.NewGooglebotIPs() - childCtx, cancel := context.WithCancel(ctx) - go bc.googlebotIPCheckLoop(childCtx) - go func() { - <-ctx.Done() - log.Debug("Context canceled, stopping Googlebot IP check loop") - cancel() - }() + go bc.googlebotIPCheckLoop(ctx) + } + if config.EnableUptimeRobotBypass == "true" { + log.Info("UptimeRobot bypass enabled") + bc.uptimeRobotIPs = helper.NewUptimeRobotIPs() + go bc.uptimeRobotIPCheckLoop(ctx) } return &bc, nil } +func (bc *CaptchaProtect) uptimeRobotIPCheckLoop(ctx context.Context) { + ticker := time.NewTicker(24 * time.Hour) + defer ticker.Stop() + + refresh := func() { + count, err := helper.RefreshUptimeRobotIPs(ctx, bc.log, bc.httpClient, bc.uptimeRobotIPs, helper.UptimeRobotIPRangeURL) + if err != nil { + bc.log.Error("failed to fetch UptimeRobot IPs", "err", err) + return + } + bc.log.Info("Updated UptimeRobot IPs", "count", count) + } + + if ctx.Err() != nil { + return + } + refresh() + for { + select { + case <-ticker.C: + refresh() + case <-ctx.Done(): + return + } + } +} + func (bc *CaptchaProtect) googlebotIPCheckLoop(ctx context.Context) { ticker := time.NewTicker(24 * time.Hour) defer ticker.Stop() // Initial fetch - count, err := helper.RefreshGoogleCrawlerIPs(bc.log, bc.httpClient, bc.googlebotIPs, helper.GoogleCrawlerIPRangeURLs) + if ctx.Err() != nil { + return + } + count, err := helper.RefreshGoogleCrawlerIPsContext(ctx, bc.log, bc.httpClient, bc.googlebotIPs, helper.GoogleCrawlerIPRangeURLs) if err != nil { bc.log.Error("failed to fetch googlebot ips", "err", err) } else { @@ -388,7 +408,7 @@ func (bc *CaptchaProtect) googlebotIPCheckLoop(ctx context.Context) { for { select { case <-ticker.C: - count, err := helper.RefreshGoogleCrawlerIPs(bc.log, bc.httpClient, bc.googlebotIPs, helper.GoogleCrawlerIPRangeURLs) + count, err := helper.RefreshGoogleCrawlerIPsContext(ctx, bc.log, bc.httpClient, bc.googlebotIPs, helper.GoogleCrawlerIPRangeURLs) if err != nil { bc.log.Error("failed to fetch googlebot ips", "err", err) continue @@ -463,7 +483,7 @@ func (bc *CaptchaProtect) healthCheckLoop(ctx context.Context) { for { select { case <-ticker.C: - bc.performHealthCheck() + bc.performHealthCheckContext(ctx) case <-ctx.Done(): bc.log.Debug("Health check loop stopped") return @@ -474,6 +494,10 @@ func (bc *CaptchaProtect) healthCheckLoop(ctx context.Context) { // performHealthCheck executes a HEAD request to the primary captcha provider's JS file // and updates the circuit breaker state based on the response. func (bc *CaptchaProtect) performHealthCheck() { + bc.performHealthCheckContext(context.Background()) +} + +func (bc *CaptchaProtect) performHealthCheckContext(parent context.Context) { // Perform HEAD request to primary provider's JS URL req, err := http.NewRequest(http.MethodHead, bc.captchaConfig.js, nil) if err != nil { @@ -482,7 +506,7 @@ func (bc *CaptchaProtect) performHealthCheck() { return } - ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second) + ctx, cancel := context.WithTimeout(parent, 10*time.Second) defer cancel() req = req.WithContext(ctx) @@ -663,7 +687,14 @@ func (bc *CaptchaProtect) verifyChallengePage(rw http.ResponseWriter, req *http. var body = url.Values{} body.Add("secret", bc.config.SecretKey) body.Add("response", response) - resp, err := bc.httpClient.PostForm(activeConfig.validate, body) + validationReq, err := http.NewRequestWithContext(req.Context(), http.MethodPost, activeConfig.validate, strings.NewReader(body.Encode())) + if err != nil { + bc.log.Error("unable to create captcha validation request", "url", activeConfig.validate, "err", err) + http.Error(rw, "Internal error", http.StatusInternalServerError) + return http.StatusInternalServerError + } + validationReq.Header.Set("Content-Type", "application/x-www-form-urlencoded") + resp, err := bc.httpClient.Do(validationReq) if err != nil { bc.log.Error("unable to validate captcha", "url", activeConfig.validate, "err", err) http.Error(rw, "Internal error", http.StatusInternalServerError) @@ -1006,6 +1037,12 @@ func (bc *CaptchaProtect) isGoodBot(req *http.Request, clientIP string) bool { return false } } + if bc.config.EnableUptimeRobotBypass == "true" && bc.uptimeRobotIPs != nil { + ip := net.ParseIP(clientIP) + if ip != nil && bc.uptimeRobotIPs.Contains(ip) { + return true + } + } bot, ok := bc.botCache.Get(clientIP) if ok { diff --git a/main_test.go b/main_test.go index b63dc8c..c7fc803 100644 --- a/main_test.go +++ b/main_test.go @@ -1721,7 +1721,7 @@ func TestServePojJS(t *testing.T) { config.SecretKey = "test-secret" config.ProtectRoutes = []string{"/"} - bc, err := NewCaptchaProtect(context.Background(), http.HandlerFunc(func(rw http.ResponseWriter, req *http.Request) {}), config, "test") + bc, err := NewCaptchaProtect(t.Context(), http.HandlerFunc(func(rw http.ResponseWriter, req *http.Request) {}), config, "test") if err != nil { t.Fatalf("Failed to create CaptchaProtect: %v", err) } @@ -1801,7 +1801,7 @@ func TestCircuitBreakerUsesPojProvider(t *testing.T) { config.PeriodSeconds = 30 config.FailureThreshold = 3 - bc, err := NewCaptchaProtect(context.Background(), http.HandlerFunc(func(rw http.ResponseWriter, req *http.Request) {}), config, "test") + bc, err := NewCaptchaProtect(t.Context(), http.HandlerFunc(func(rw http.ResponseWriter, req *http.Request) {}), config, "test") if err != nil { t.Fatalf("Failed to create CaptchaProtect: %v", err) } @@ -2091,6 +2091,45 @@ func TestGooglebotIPCheckLoopInitialFetchError(t *testing.T) { } } +func TestUptimeRobotIPCheckLoopInitialFetch(t *testing.T) { + originalURL := helper.UptimeRobotIPRangeURL + defer func() { helper.UptimeRobotIPRangeURL = originalURL }() + + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) { + w.Header().Set("Content-Type", "application/json") + _, _ = w.Write([]byte(`{"prefixes":[{"ip_prefix":"203.0.113.10/32"}]}`)) + })) + defer server.Close() + helper.UptimeRobotIPRangeURL = server.URL + + bc := &CaptchaProtect{ + log: slog.New(slog.NewTextHandler(os.Stdout, nil)), + httpClient: server.Client(), + uptimeRobotIPs: helper.NewUptimeRobotIPs(), + } + + ctx, cancel := context.WithCancel(context.Background()) + done := make(chan struct{}) + go func() { + bc.uptimeRobotIPCheckLoop(ctx) + close(done) + }() + + deadline := time.Now().Add(2 * time.Second) + for time.Now().Before(deadline) { + if bc.uptimeRobotIPs.Contains(net.ParseIP("203.0.113.10")) { + cancel() + <-done + return + } + time.Sleep(20 * time.Millisecond) + } + + cancel() + <-done + t.Fatal("expected UptimeRobot IPs to be updated from initial fetch") +} + func TestServeChallengePageEscapesDestination(t *testing.T) { config := CreateConfig() config.SiteKey = "test-site-key"