Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 1 addition & 4 deletions .github/workflows/lint-test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -58,17 +58,14 @@ jobs:
permissions:
contents: read
runs-on: ubuntu-24.04
strategy:
matrix:
traefik: [latest]
steps:
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6

- name: run
run: go run test.go
working-directory: ./ci
env:
TRAEFIK_TAG: ${{ matrix.traefik }}
TRAEFIK_TAG: latest

- name: cleanup
if: ${{ always() }}
Expand Down
8 changes: 7 additions & 1 deletion ci/docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ services:
traefik.http.middlewares.captcha-protect.plugin.captcha-protect.enableStateReconciliation: "true"
healthcheck:
test: curl -fs http://localhost/healthz | grep -q OK || exit 1
start_period: 5s
volumes:
- ./conf/nginx/default.conf:/etc/nginx/conf.d/default.conf:r
networks:
Expand Down Expand Up @@ -57,6 +58,7 @@ services:
traefik.http.middlewares.captcha-protect.plugin.captcha-protect.enableStateReconciliation: "true"
healthcheck:
test: curl -fs http://localhost/healthz | grep -q OK || exit 1
start_period: 5s
volumes:
- ./conf/nginx/default.conf:/etc/nginx/conf.d/default.conf:r
networks:
Expand All @@ -71,7 +73,8 @@ services:
--api.debug=true
--ping=true
--entryPoints.http.address=:80
--entryPoints.http.forwardedHeaders.trustedIPs=10.0.0.0/8,172.16.0.0/12,192.168.0.0/16
--entryPoints.http.forwardedHeaders.insecure=true
--entryPoints.http.forwardedHeaders.trustedIPs=127.0.0.1/32,::1/128,10.0.0.0/8,172.16.0.0/12,192.168.0.0/16
--providers.docker=true
--providers.docker.network=default
--experimental.localPlugins.captcha-protect.moduleName=github.com/libops/captcha-protect
Expand All @@ -90,6 +93,9 @@ services:
- traefik
healthcheck:
test: traefik healthcheck --ping
start_period: 5s
depends_on:
nginx:
condition: service_healthy
nginx2:
condition: service_healthy
62 changes: 52 additions & 10 deletions ci/test.go
Original file line number Diff line number Diff line change
Expand Up @@ -28,9 +28,9 @@ const parallelism = 10

func main() {
log := slog.New(slog.NewTextHandler(os.Stdout, nil))
googleCIDRs, err := helper.FetchGooglebotIPs(log, http.DefaultClient, "https://developers.google.com/static/search/apis/ipranges/googlebot.json")
googleCIDRs, err := helper.FetchGoogleCrawlerIPs(log, http.DefaultClient, helper.GoogleCrawlerIPRangeURLs)
if err != nil {
slog.Error("unable to fetch google bot ips", "err", err)
slog.Error("unable to fetch google crawler ips", "err", err)
os.Exit(1)
}

Expand All @@ -56,18 +56,18 @@ func main() {
runCommand("docker", "compose", "up", "-d")
waitForService("http://localhost")
waitForService("http://localhost/app2")
waitForGoogleExemptionReady(googleCIDRs)

fmt.Printf("Making sure %d attempt(s) pass\n", rateLimit)
runParallelChecks(ips, rateLimit, "http://localhost")

time.Sleep(cp.StateSaveInterval + cp.StateSaveJitter + (1 * time.Second))
runCommand("jq", ".", "tmp/state.json")
statePath := "./tmp/state.json"
runCommand("jq", ".", statePath)

fmt.Printf("Making sure attempt #%d causes a redirect to the challenge page\n", rateLimit+1)
ensureRedirect(ips, "http://localhost")

fmt.Println("\nTesting state sharing between nginx instances...")
time.Sleep(cp.StateSaveInterval + cp.StateSaveJitter + (1 * time.Second))
time.Sleep(cp.StateSaveInterval + cp.StateSaveJitter + (5 * time.Second))

testStateSharing(ips)
testGoogleBotGetsThrough(googleCIDRs)
Expand All @@ -81,7 +81,7 @@ func main() {
time.Sleep(10 * time.Second)
checkStateReload()

runCommand("rm", "tmp/state.json")
runCommand("rm", "-f", statePath)

}

Expand Down Expand Up @@ -147,7 +147,7 @@ func runParallelChecks(ips []string, rateLimit int, url string) {
var wg sync.WaitGroup
sem := make(chan struct{}, parallelism)

for i := 0; i < rateLimit; i++ {
for range rateLimit {
for _, ip := range ips {
wg.Add(1)
sem <- struct{}{}
Expand Down Expand Up @@ -305,7 +305,7 @@ func checkStateReload() {
os.Exit(1)
}

if len(botsMap) != numIPs {
if len(botsMap) < numIPs {
slog.Error("Unexpected number of bots", "expected", numIPs, "received", len(botsMap))
os.Exit(1)
}
Expand Down Expand Up @@ -400,7 +400,7 @@ func testGoogleBotGetsThrough(googleCIDRs []string) {

// Prime the rate limiter for the GoogleBot IP with parameters
fmt.Printf("Priming rate limiter for GoogleBot IP %s with params (%d requests)\n", googleIP, rateLimit)
for i := 0; i < rateLimit; i++ {
for i := range rateLimit {
output = httpRequest(googleIP, "http://localhost/?foo=bar") // Assign value
if output != "" {
slog.Error(fmt.Sprintf("GoogleBot with params was challenged prematurely on request #%d", i+1), "ip", googleIP, "output", output)
Expand All @@ -421,3 +421,45 @@ func testGoogleBotGetsThrough(googleCIDRs []string) {
// set things back to normal for other tests
runCommand("docker", "compose", "down")
}

func waitForGoogleExemptionReady(googleCIDRs []string) {
googleIP, err := firstUsableIPv4FromCIDRs(googleCIDRs)
if err != nil {
slog.Warn("Unable to select Google IP for readiness check; skipping warmup", "err", err)
return
}

deadline := time.Now().Add(90 * time.Second)
for time.Now().Before(deadline) {
ready := true
for i := 0; i < rateLimit+1; i++ {
if output := httpRequest(googleIP, "http://localhost"); output != "" {
ready = false
break
}
}
if ready {
fmt.Printf("Google exemption is active for %s\n", googleIP)
return
}
time.Sleep(500 * time.Millisecond)
}

slog.Error("Timed out waiting for Google crawler IP exemption to become active", "googleIP", googleIP)
os.Exit(1)
}

func firstUsableIPv4FromCIDRs(cidrs []string) (string, error) {
for _, cidr := range cidrs {
ip, err := getIPFromCIDR(cidr)
if err != nil {
continue
}
parsed := net.ParseIP(ip)
if parsed != nil && parsed.To4() != nil {
return ip, nil
}
}

return "", fmt.Errorf("no usable IPv4 found in CIDR list")
}
95 changes: 95 additions & 0 deletions internal/helper/google.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,19 @@ import (
"log/slog"
"net"
"net/http"
"net/netip"
"sort"
"sync"
"time"
)

var GoogleCrawlerIPRangeURLs = []string{
"https://developers.google.com/static/search/apis/ipranges/googlebot.json",
"https://developers.google.com/static/crawling/ipranges/common-crawlers.json",
"https://developers.google.com/static/crawling/ipranges/special-crawlers.json",
"https://developers.google.com/static/crawling/ipranges/user-triggered-fetchers-google.json",
}

// GooglebotIPs holds the list of Googlebot IP ranges, providing a thread-safe way to check if an IP is a Googlebot.
type GooglebotIPs struct {
cidrs []*net.IPNet
Expand Down Expand Up @@ -108,3 +117,89 @@ func FetchGooglebotIPs(log *slog.Logger, httpClient *http.Client, url string) ([

return cidrs, nil
}

// FetchGoogleCrawlerIPs fetches crawler IP ranges from multiple Google-managed endpoints,
// then returns a canonical, unique list where broader prefixes replace narrower prefixes.
func FetchGoogleCrawlerIPs(log *slog.Logger, httpClient *http.Client, urls []string) ([]string, error) {
if len(urls) == 0 {
return nil, nil
}

allCIDRs := make([]string, 0)
for _, url := range urls {
cidrs, err := FetchGooglebotIPs(log, httpClient, url)
if err != nil {
return nil, err
}
allCIDRs = append(allCIDRs, cidrs...)
}

return ReduceCIDRs(allCIDRs, log), nil
}

// RefreshGoogleCrawlerIPs fetches crawler IPs from all configured URLs and updates
// the provided GooglebotIPs set. Returns the number of CIDRs loaded.
func RefreshGoogleCrawlerIPs(log *slog.Logger, httpClient *http.Client, target *GooglebotIPs, urls []string) (int, error) {
cidrs, err := FetchGoogleCrawlerIPs(log, httpClient, urls)
if err != nil {
return 0, err
}

target.Update(cidrs, log)

return len(cidrs), nil
}

// ReduceCIDRs canonicalizes CIDRs, removes exact duplicates, and removes narrower
// ranges when they are fully covered by broader ranges.
func ReduceCIDRs(cidrs []string, log *slog.Logger) []string {
prefixes := make([]netip.Prefix, 0, len(cidrs))
for _, cidr := range cidrs {
prefix, err := netip.ParsePrefix(cidr)
if err != nil {
if log != nil {
log.Error("error parsing CIDR", "cidr", cidr, "err", err)
}
continue
}
prefixes = append(prefixes, prefix.Masked())
}

sort.Slice(prefixes, func(i, j int) bool {
a := prefixes[i]
b := prefixes[j]

aIs4 := a.Addr().Is4()
bIs4 := b.Addr().Is4()
if aIs4 != bIs4 {
return aIs4
}

if a.Bits() != b.Bits() {
return a.Bits() < b.Bits()
}

return a.Addr().Compare(b.Addr()) < 0
})

reduced := make([]netip.Prefix, 0, len(prefixes))
for _, candidate := range prefixes {
covered := false
for _, existing := range reduced {
if existing.Bits() <= candidate.Bits() && existing.Contains(candidate.Addr()) {
covered = true
break
}
}
if !covered {
reduced = append(reduced, candidate)
}
}

result := make([]string, 0, len(reduced))
for _, prefix := range reduced {
result = append(result, prefix.String())
}

return result
}
Loading