Skip to content

Commit af40707

Browse files
committed
Fixes for e2e race conditions
Signed-off-by: Tero Saarni <[email protected]>
1 parent 3fcf197 commit af40707

14 files changed

Lines changed: 115 additions & 45 deletions

File tree

test/e2e/bootstrap/bootstrap_test.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -79,6 +79,7 @@ var _ = Describe("Bootstrap", func() {
7979

8080
// Wait for Envoy to be healthy.
8181
require.NoError(f.T(), f.Deployment.WaitForEnvoyUpdated())
82+
require.NoError(f.T(), f.WaitForReachable())
8283

8384
kubectlCmd, err = f.Kubectl.StartKubectlPortForward(19001, 9001, "projectcontour", f.Deployment.EnvoyResourceAndName())
8485
require.NoError(f.T(), err)

test/e2e/deployment.go

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -416,6 +416,9 @@ func (d *Deployment) EnsureRateLimitResources(namespace, configContents string)
416416
if err := d.ensureResource(deployment, new(apps_v1.Deployment)); err != nil {
417417
return err
418418
}
419+
if err := WaitForDeployment(deployment, d.client); err != nil {
420+
return err
421+
}
419422

420423
service := d.RateLimitService.DeepCopy()
421424
service.Namespace = setNamespace
@@ -439,6 +442,9 @@ func (d *Deployment) EnsureGlobalExternalAuthResources(namespace string) error {
439442
if err := d.ensureResource(deployment, new(apps_v1.Deployment)); err != nil {
440443
return err
441444
}
445+
if err := WaitForDeployment(deployment, d.client); err != nil {
446+
return err
447+
}
442448

443449
service := d.GlobalExtAuthService.DeepCopy()
444450
service.Namespace = setNamespace

test/e2e/fixtures.go

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -159,6 +159,10 @@ func (e *Echo) DeployN(ns, name string, replicas int32) (func(), *apps_v1.Deploy
159159
}
160160
require.NoError(e.t, e.client.Create(context.TODO(), deployment))
161161

162+
if err := WaitForDeployment(deployment, e.client); err != nil {
163+
require.NoError(e.t, err)
164+
}
165+
162166
service := &core_v1.Service{
163167
ObjectMeta: meta_v1.ObjectMeta{
164168
Namespace: ns,
@@ -421,6 +425,10 @@ func (e *EchoSecure) Deploy(ns, name string, preApplyHook func(deployment *apps_
421425
require.NoError(e.t, e.client.Create(context.TODO(), deployment))
422426
require.NoError(e.t, e.client.Create(context.TODO(), service))
423427

428+
if err := WaitForDeployment(deployment, e.client); err != nil {
429+
require.NoError(e.t, err)
430+
}
431+
424432
return func() {
425433
require.NoError(e.t, e.client.Delete(context.TODO(), service))
426434
require.NoError(e.t, e.client.Delete(context.TODO(), deployment))
@@ -514,6 +522,10 @@ func (g *GRPC) Deploy(ns, name string) func() {
514522
}
515523
require.NoError(g.t, g.client.Create(context.TODO(), deployment))
516524

525+
if err := WaitForDeployment(deployment, g.client); err != nil {
526+
require.NoError(g.t, err)
527+
}
528+
517529
service := &core_v1.Service{
518530
ObjectMeta: meta_v1.ObjectMeta{
519531
Namespace: ns,

test/e2e/framework.go

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -572,3 +572,15 @@ func VerifyTLSServerCert(caCert []byte) func(*tls.Config) {
572572
c.InsecureSkipVerify = false
573573
}
574574
}
575+
576+
func (f *Framework) WaitForReachable() error {
577+
return wait.PollUntilContextTimeout(context.Background(), f.RetryInterval, f.RetryTimeout, true, func(context.Context) (bool, error) {
578+
res, err := f.HTTP.Request(&HTTPRequestOpts{
579+
Path: "/",
580+
})
581+
if err != nil {
582+
return false, nil
583+
}
584+
return res != nil, nil
585+
})
586+
}

test/e2e/gateway/gateway_test.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -128,6 +128,7 @@ var _ = Describe("Gateway API", func() {
128128

129129
// Wait for Envoy to be healthy.
130130
require.NoError(f.T(), f.Deployment.WaitForEnvoyUpdated())
131+
require.NoError(f.T(), f.WaitForReachable())
131132

132133
// Since we're reconciling a specific Gateway,
133134
// we don't expect GatewayClasses to be reconciled

test/e2e/httpproxy/cookie_rewrite_test.go

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -878,6 +878,10 @@ func deployEchoServer(t require.TestingT, c client.Client, ns, name string) {
878878
}
879879
require.NoError(t, c.Create(context.TODO(), deployment))
880880

881+
if err := e2e.WaitForDeployment(deployment, c); err != nil {
882+
require.NoError(t, err)
883+
}
884+
881885
service := &core_v1.Service{
882886
ObjectMeta: meta_v1.ObjectMeta{
883887
Namespace: ns,

test/e2e/httpproxy/httpproxy_test.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -102,6 +102,7 @@ var _ = Describe("HTTPProxy", func() {
102102

103103
// Wait for Envoy to be healthy.
104104
require.NoError(f.T(), f.Deployment.WaitForEnvoyUpdated())
105+
require.NoError(f.T(), f.WaitForReachable())
105106
})
106107

107108
AfterEach(func() {

test/e2e/incluster/incluster_test.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -79,6 +79,7 @@ var _ = Describe("Incluster", func() {
7979
require.NoError(f.T(), f.Deployment.EnsureContourDeployment())
8080
require.NoError(f.T(), f.Deployment.WaitForContourDeploymentUpdated())
8181
require.NoError(f.T(), f.Deployment.WaitForEnvoyUpdated())
82+
require.NoError(f.T(), f.WaitForReachable())
8283
})
8384

8485
AfterEach(func() {

test/e2e/infra/infra_test.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -113,6 +113,7 @@ var _ = Describe("Infra", func() {
113113

114114
// Wait for Envoy to be healthy.
115115
require.NoError(f.T(), f.Deployment.WaitForEnvoyUpdated())
116+
require.NoError(f.T(), f.WaitForReachable())
116117

117118
kubectlCmd, err = f.Kubectl.StartKubectlPortForward(19001, 9001, "projectcontour", f.Deployment.EnvoyResourceAndName(), additionalContourArgs...)
118119
require.NoError(f.T(), err)

test/e2e/infra/metrics_test.go

Lines changed: 26 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -18,77 +18,40 @@ package infra
1818
import (
1919
"crypto/tls"
2020
"net/http"
21-
"time"
2221

2322
. "github.com/onsi/ginkgo/v2"
2423
"github.com/onsi/gomega"
24+
"github.com/onsi/gomega/gexec"
2525
"github.com/stretchr/testify/require"
2626

2727
"github.com/projectcontour/contour/test/e2e"
2828
)
2929

3030
func testMetrics() {
3131
Specify("requests to default metrics listener are served", func() {
32-
t := f.T()
33-
3432
res, ok := f.HTTP.MetricsRequestUntil(&e2e.HTTPRequestOpts{
3533
Path: "/stats",
3634
Condition: e2e.HasStatusCode(200),
3735
})
38-
require.NotNil(t, res, "request never succeeded")
39-
require.Truef(t, ok, "expected 200 response code, got %d", res.StatusCode)
36+
require.NotNil(f.T(), res, "request never succeeded")
37+
require.Truef(f.T(), ok, "expected 200 response code, got %d", res.StatusCode)
4038
})
4139
}
4240

4341
func testReady() {
4442
Specify("requests to default ready listener are served", func() {
45-
t := f.T()
46-
4743
res, ok := f.HTTP.MetricsRequestUntil(&e2e.HTTPRequestOpts{
4844
Path: "/ready",
4945
Condition: e2e.HasStatusCode(200),
5046
})
51-
require.NotNil(t, res, "request never succeeded")
52-
require.Truef(t, ok, "expected 200 response code, got %d", res.StatusCode)
47+
require.NotNil(f.T(), res, "request never succeeded")
48+
require.Truef(f.T(), ok, "expected 200 response code, got %d", res.StatusCode)
5349
})
5450
}
5551

5652
func testEnvoyMetricsOverHTTPS() {
5753
// Flake tracking issue: https://github.com/projectcontour/contour/issues/5932
5854
Specify("requests to metrics listener are served", FlakeAttempts(3), func() {
59-
t := f.T()
60-
61-
// Port-forward seems to be flaky. Following sequence happens:
62-
//
63-
// 1. Envoy becomes ready.
64-
// 2. Port-forward is started.
65-
// 3. HTTPS request is sent but the connection times out with errors
66-
// "error creating error stream for port 18003 -> 8003: Timeout occurred",
67-
// "error creating forwarding stream for port 18003 -> 8003: Timeout occurred"
68-
// 4. Meanwhile the metrics listener gets added.
69-
// 5. Sometimes (one out of ~1-50 runs) port-forward gets stuck and packets are not forwarded
70-
// even after listener is up and connection attempts are still regularly retried.
71-
//
72-
// When the problem occurs, Wireshark does not show any traffic on the container side.
73-
// The problem could be e.g. undiscovered race condition with Kubernetes port-forward.
74-
//
75-
// Following workarounds seem to work:
76-
//
77-
// a) Add a fixed delay before port-forwarding.
78-
// b) Wait for Envoy to have listener by observing Envoy logs before port-forwarding.
79-
// c) Restart port-forwarding when connection attempts fail.
80-
//
81-
// Executing port-forward started in BeforeEach(), JustBeforeEach() or combining metrics
82-
// port with the admin port-forward command (127.0.0.1:19001 -> 9001) did not help.
83-
//
84-
// The simplest workaround (a) is taken here.
85-
time.Sleep(5 * time.Second)
86-
87-
// Port-forward for metrics over HTTPS
88-
kubectlCmd, err := f.Kubectl.StartKubectlPortForward(18003, 8003, "projectcontour", f.Deployment.EnvoyResourceAndName())
89-
require.NoError(t, err)
90-
defer f.Kubectl.StopKubectlPortForward(kubectlCmd)
91-
9255
clientCert, caBundle := f.Certs.GetTLSCertificate("projectcontour", "metrics-client")
9356
client := http.Client{
9457
Transport: &http.Transport{
@@ -100,13 +63,32 @@ func testEnvoyMetricsOverHTTPS() {
10063
},
10164
}
10265

66+
var kubectlCmd *gexec.Session
67+
defer func() {
68+
if kubectlCmd != nil {
69+
f.Kubectl.StopKubectlPortForward(kubectlCmd)
70+
}
71+
}()
72+
10373
gomega.Eventually(func() int {
74+
var err error
75+
if kubectlCmd == nil {
76+
kubectlCmd, err = f.Kubectl.StartKubectlPortForward(18003, 8003, "projectcontour", f.Deployment.EnvoyResourceAndName())
77+
if err != nil {
78+
GinkgoWriter.Println("failed to start port-forward:", err)
79+
return 0
80+
}
81+
}
82+
10483
resp, err := client.Get("https://localhost:18003/stats")
10584
if err != nil {
106-
GinkgoWriter.Println(err)
85+
GinkgoWriter.Println("request failed, restarting port-forward:", err)
86+
f.Kubectl.StopKubectlPortForward(kubectlCmd)
87+
kubectlCmd = nil
10788
return 0
10889
}
90+
defer resp.Body.Close()
10991
return resp.StatusCode
110-
}, "10s", "1s").Should(gomega.Equal(http.StatusOK))
92+
}, "30s", "1s").Should(gomega.Equal(http.StatusOK))
11193
})
11294
}

0 commit comments

Comments
 (0)