diff --git a/test/e2e/bootstrap/bootstrap_test.go b/test/e2e/bootstrap/bootstrap_test.go index 7dc61afaf84..994dd90b45a 100644 --- a/test/e2e/bootstrap/bootstrap_test.go +++ b/test/e2e/bootstrap/bootstrap_test.go @@ -76,17 +76,15 @@ var _ = Describe("Bootstrap", func() { var err error contourCmd, contourConfigFile, err = f.Deployment.StartLocalContour(contourConfig, contourConfiguration) require.NoError(f.T(), err) + DeferCleanup(f.Deployment.StopLocalContour, contourCmd, contourConfigFile) // Wait for Envoy to be healthy. require.NoError(f.T(), f.Deployment.WaitForEnvoyUpdated()) + require.NoError(f.T(), f.WaitForReachable()) kubectlCmd, err = f.Kubectl.StartKubectlPortForward(19001, 9001, "projectcontour", f.Deployment.EnvoyResourceAndName()) require.NoError(f.T(), err) - }) - - AfterEach(func() { - f.Kubectl.StopKubectlPortForward(kubectlCmd) - require.NoError(f.T(), f.Deployment.StopLocalContour(contourCmd, contourConfigFile)) + DeferCleanup(f.Kubectl.StopKubectlPortForward, kubectlCmd) }) f.Test(func() { diff --git a/test/e2e/deployment.go b/test/e2e/deployment.go index 1bd83c6c56f..d366711e78c 100644 --- a/test/e2e/deployment.go +++ b/test/e2e/deployment.go @@ -326,7 +326,7 @@ func (d *Deployment) EnsureCertgenJob() error { return err } } - if err := wait.PollUntilContextTimeout(context.Background(), time.Millisecond*50, time.Minute, true, jobDeleted); err != nil { + if err := wait.PollUntilContextTimeout(context.Background(), time.Millisecond*200, time.Minute, true, jobDeleted); err != nil { return err } return d.client.Create(context.TODO(), d.CertgenJob) @@ -416,6 +416,9 @@ func (d *Deployment) EnsureRateLimitResources(namespace, configContents string) if err := d.ensureResource(deployment, new(apps_v1.Deployment)); err != nil { return err } + if err := WaitForDeployment(deployment, d.client); err != nil { + return err + } service := d.RateLimitService.DeepCopy() service.Namespace = setNamespace @@ -439,6 +442,9 @@ func (d *Deployment) EnsureGlobalExternalAuthResources(namespace string) error { if err := d.ensureResource(deployment, new(apps_v1.Deployment)); err != nil { return err } + if err := WaitForDeployment(deployment, d.client); err != nil { + return err + } service := d.GlobalExtAuthService.DeepCopy() service.Namespace = setNamespace diff --git a/test/e2e/fixtures.go b/test/e2e/fixtures.go index 17e2d944844..6cdd1d05534 100644 --- a/test/e2e/fixtures.go +++ b/test/e2e/fixtures.go @@ -159,6 +159,10 @@ func (e *Echo) DeployN(ns, name string, replicas int32) (func(), *apps_v1.Deploy } require.NoError(e.t, e.client.Create(context.TODO(), deployment)) + if err := WaitForDeployment(deployment, e.client); err != nil { + require.NoError(e.t, err) + } + service := &core_v1.Service{ ObjectMeta: meta_v1.ObjectMeta{ Namespace: ns, @@ -297,6 +301,7 @@ func (e *EchoSecure) Deploy(ns, name string, preApplyHook func(deployment *apps_ Name: name, }, Spec: apps_v1.DeploymentSpec{ + Replicas: ptr.To(int32(1)), Selector: &meta_v1.LabelSelector{ MatchLabels: map[string]string{"app.kubernetes.io/name": name}, }, @@ -421,6 +426,10 @@ func (e *EchoSecure) Deploy(ns, name string, preApplyHook func(deployment *apps_ require.NoError(e.t, e.client.Create(context.TODO(), deployment)) require.NoError(e.t, e.client.Create(context.TODO(), service)) + if err := WaitForDeployment(deployment, e.client); err != nil { + require.NoError(e.t, err) + } + return func() { require.NoError(e.t, e.client.Delete(context.TODO(), service)) require.NoError(e.t, e.client.Delete(context.TODO(), deployment)) @@ -514,6 +523,10 @@ func (g *GRPC) Deploy(ns, name string) func() { } require.NoError(g.t, g.client.Create(context.TODO(), deployment)) + if err := WaitForDeployment(deployment, g.client); err != nil { + require.NoError(g.t, err) + } + service := &core_v1.Service{ ObjectMeta: meta_v1.ObjectMeta{ Namespace: ns, diff --git a/test/e2e/framework.go b/test/e2e/framework.go index 497e996a0c3..1c292343e49 100644 --- a/test/e2e/framework.go +++ b/test/e2e/framework.go @@ -572,3 +572,16 @@ func VerifyTLSServerCert(caCert []byte) func(*tls.Config) { c.InsecureSkipVerify = false } } + +func (f *Framework) WaitForReachable() error { + return wait.PollUntilContextTimeout(context.Background(), f.RetryInterval, f.RetryTimeout, true, func(context.Context) (bool, error) { + res, err := f.HTTP.Request(&HTTPRequestOpts{ + OverrideURL: f.HTTP.HTTPURLMetricsBase, + Path: "/ready", + }) + if err != nil { + return false, nil + } + return res.StatusCode == http.StatusOK, nil + }) +} diff --git a/test/e2e/gateway/gateway_test.go b/test/e2e/gateway/gateway_test.go index f5cfe470fbd..1816b99ad03 100644 --- a/test/e2e/gateway/gateway_test.go +++ b/test/e2e/gateway/gateway_test.go @@ -125,6 +125,7 @@ var _ = Describe("Gateway API", func() { var err error contourCmd, contourConfigFile, err = f.Deployment.StartLocalContour(contourConfig, contourConfiguration, additionalContourArgs...) require.NoError(f.T(), err) + DeferCleanup(f.Deployment.StopLocalContour, contourCmd, contourConfigFile) // Wait for Envoy to be healthy. require.NoError(f.T(), f.Deployment.WaitForEnvoyUpdated()) @@ -135,12 +136,11 @@ var _ = Describe("Gateway API", func() { gatewayClassCond := func(*gatewayapi_v1.GatewayClass) bool { return true } require.True(f.T(), f.CreateGatewayClassAndWaitFor(contourGatewayClass, gatewayClassCond)) + DeferCleanup(f.DeleteGatewayClass, contourGatewayClass, false) + require.True(f.T(), f.CreateGatewayAndWaitFor(contourGateway, e2e.GatewayProgrammed)) - }) - AfterEach(func() { - require.NoError(f.T(), f.DeleteGatewayClass(contourGatewayClass, false)) - require.NoError(f.T(), f.Deployment.StopLocalContour(contourCmd, contourConfigFile)) + require.NoError(f.T(), f.WaitForReachable()) }) Describe("Gateway with one HTTP listener", func() { diff --git a/test/e2e/httpproxy/cookie_rewrite_test.go b/test/e2e/httpproxy/cookie_rewrite_test.go index de8bf09965b..ff88feec909 100644 --- a/test/e2e/httpproxy/cookie_rewrite_test.go +++ b/test/e2e/httpproxy/cookie_rewrite_test.go @@ -878,6 +878,10 @@ func deployEchoServer(t require.TestingT, c client.Client, ns, name string) { } require.NoError(t, c.Create(context.TODO(), deployment)) + if err := e2e.WaitForDeployment(deployment, c); err != nil { + require.NoError(t, err) + } + service := &core_v1.Service{ ObjectMeta: meta_v1.ObjectMeta{ Namespace: ns, diff --git a/test/e2e/httpproxy/httpproxy_test.go b/test/e2e/httpproxy/httpproxy_test.go index 0a838a2294a..9005016bd65 100644 --- a/test/e2e/httpproxy/httpproxy_test.go +++ b/test/e2e/httpproxy/httpproxy_test.go @@ -99,13 +99,11 @@ var _ = Describe("HTTPProxy", func() { var err error contourCmd, contourConfigFile, err = f.Deployment.StartLocalContour(contourConfig, contourConfiguration, additionalContourArgs...) require.NoError(f.T(), err) + DeferCleanup(f.Deployment.StopLocalContour, contourCmd, contourConfigFile) // Wait for Envoy to be healthy. require.NoError(f.T(), f.Deployment.WaitForEnvoyUpdated()) - }) - - AfterEach(func() { - require.NoError(f.T(), f.Deployment.StopLocalContour(contourCmd, contourConfigFile)) + require.NoError(f.T(), f.WaitForReachable()) }) f.NamespacedTest("httpproxy-direct-response-policy", testDirectResponseRule) diff --git a/test/e2e/incluster/incluster_test.go b/test/e2e/incluster/incluster_test.go index 50801b6b7b0..957a99bc818 100644 --- a/test/e2e/incluster/incluster_test.go +++ b/test/e2e/incluster/incluster_test.go @@ -79,25 +79,26 @@ var _ = Describe("Incluster", func() { require.NoError(f.T(), f.Deployment.EnsureContourDeployment()) require.NoError(f.T(), f.Deployment.WaitForContourDeploymentUpdated()) require.NoError(f.T(), f.Deployment.WaitForEnvoyUpdated()) - }) - - AfterEach(func() { - require.NoError(f.T(), f.Deployment.DumpContourLogs()) - - // Clean out contour after each test. - require.NoError(f.T(), f.Deployment.EnsureDeleted(f.Deployment.ContourDeployment)) - require.NoError(f.T(), f.Deployment.EnsureDeleted(contourConfig)) - require.Eventually(f.T(), func() bool { - pods := new(core_v1.PodList) - podListOptions := &client.ListOptions{ - LabelSelector: labels.SelectorFromSet(f.Deployment.ContourDeployment.Spec.Selector.MatchLabels), - Namespace: f.Deployment.ContourDeployment.Namespace, - } - if err := f.Client.List(context.TODO(), pods, podListOptions); err != nil { - return false - } - return len(pods.Items) == 0 - }, time.Minute, time.Millisecond*50) + require.NoError(f.T(), f.WaitForReachable()) + + DeferCleanup(func() { + require.NoError(f.T(), f.Deployment.DumpContourLogs()) + + // Clean out contour after each test. + require.NoError(f.T(), f.Deployment.EnsureDeleted(f.Deployment.ContourDeployment)) + require.NoError(f.T(), f.Deployment.EnsureDeleted(contourConfig)) + require.Eventually(f.T(), func() bool { + pods := new(core_v1.PodList) + podListOptions := &client.ListOptions{ + LabelSelector: labels.SelectorFromSet(f.Deployment.ContourDeployment.Spec.Selector.MatchLabels), + Namespace: f.Deployment.ContourDeployment.Namespace, + } + if err := f.Client.List(context.TODO(), pods, podListOptions); err != nil { + return false + } + return len(pods.Items) == 0 + }, time.Minute, time.Millisecond*50) + }) }) f.NamespacedTest("smoke-test", testSimpleSmoke) diff --git a/test/e2e/infra/infra_test.go b/test/e2e/infra/infra_test.go index 4453633cec9..94e7a782aff 100644 --- a/test/e2e/infra/infra_test.go +++ b/test/e2e/infra/infra_test.go @@ -110,17 +110,15 @@ var _ = Describe("Infra", func() { var err error contourCmd, contourConfigFile, err = f.Deployment.StartLocalContour(contourConfig, contourConfiguration, additionalContourArgs...) require.NoError(f.T(), err) + DeferCleanup(f.Deployment.StopLocalContour, contourCmd, contourConfigFile) // Wait for Envoy to be healthy. require.NoError(f.T(), f.Deployment.WaitForEnvoyUpdated()) + require.NoError(f.T(), f.WaitForReachable()) kubectlCmd, err = f.Kubectl.StartKubectlPortForward(19001, 9001, "projectcontour", f.Deployment.EnvoyResourceAndName(), additionalContourArgs...) require.NoError(f.T(), err) - }) - - AfterEach(func() { - f.Kubectl.StopKubectlPortForward(kubectlCmd) - require.NoError(f.T(), f.Deployment.StopLocalContour(contourCmd, contourConfigFile)) + DeferCleanup(f.Kubectl.StopKubectlPortForward, kubectlCmd) }) f.Test(testMetrics) diff --git a/test/e2e/infra/metrics_test.go b/test/e2e/infra/metrics_test.go index 05e2502d4d7..5fd0c188446 100644 --- a/test/e2e/infra/metrics_test.go +++ b/test/e2e/infra/metrics_test.go @@ -18,10 +18,10 @@ package infra import ( "crypto/tls" "net/http" - "time" . "github.com/onsi/ginkgo/v2" "github.com/onsi/gomega" + "github.com/onsi/gomega/gexec" "github.com/stretchr/testify/require" "github.com/projectcontour/contour/test/e2e" @@ -29,66 +29,29 @@ import ( func testMetrics() { Specify("requests to default metrics listener are served", func() { - t := f.T() - res, ok := f.HTTP.MetricsRequestUntil(&e2e.HTTPRequestOpts{ Path: "/stats", Condition: e2e.HasStatusCode(200), }) - require.NotNil(t, res, "request never succeeded") - require.Truef(t, ok, "expected 200 response code, got %d", res.StatusCode) + require.NotNil(f.T(), res, "request never succeeded") + require.Truef(f.T(), ok, "expected 200 response code, got %d", res.StatusCode) }) } func testReady() { Specify("requests to default ready listener are served", func() { - t := f.T() - res, ok := f.HTTP.MetricsRequestUntil(&e2e.HTTPRequestOpts{ Path: "/ready", Condition: e2e.HasStatusCode(200), }) - require.NotNil(t, res, "request never succeeded") - require.Truef(t, ok, "expected 200 response code, got %d", res.StatusCode) + require.NotNil(f.T(), res, "request never succeeded") + require.Truef(f.T(), ok, "expected 200 response code, got %d", res.StatusCode) }) } func testEnvoyMetricsOverHTTPS() { // Flake tracking issue: https://github.com/projectcontour/contour/issues/5932 Specify("requests to metrics listener are served", FlakeAttempts(3), func() { - t := f.T() - - // Port-forward seems to be flaky. Following sequence happens: - // - // 1. Envoy becomes ready. - // 2. Port-forward is started. - // 3. HTTPS request is sent but the connection times out with errors - // "error creating error stream for port 18003 -> 8003: Timeout occurred", - // "error creating forwarding stream for port 18003 -> 8003: Timeout occurred" - // 4. Meanwhile the metrics listener gets added. - // 5. Sometimes (one out of ~1-50 runs) port-forward gets stuck and packets are not forwarded - // even after listener is up and connection attempts are still regularly retried. - // - // When the problem occurs, Wireshark does not show any traffic on the container side. - // The problem could be e.g. undiscovered race condition with Kubernetes port-forward. - // - // Following workarounds seem to work: - // - // a) Add a fixed delay before port-forwarding. - // b) Wait for Envoy to have listener by observing Envoy logs before port-forwarding. - // c) Restart port-forwarding when connection attempts fail. - // - // Executing port-forward started in BeforeEach(), JustBeforeEach() or combining metrics - // port with the admin port-forward command (127.0.0.1:19001 -> 9001) did not help. - // - // The simplest workaround (a) is taken here. - time.Sleep(5 * time.Second) - - // Port-forward for metrics over HTTPS - kubectlCmd, err := f.Kubectl.StartKubectlPortForward(18003, 8003, "projectcontour", f.Deployment.EnvoyResourceAndName()) - require.NoError(t, err) - defer f.Kubectl.StopKubectlPortForward(kubectlCmd) - clientCert, caBundle := f.Certs.GetTLSCertificate("projectcontour", "metrics-client") client := http.Client{ Transport: &http.Transport{ @@ -100,13 +63,32 @@ func testEnvoyMetricsOverHTTPS() { }, } + var kubectlCmd *gexec.Session + defer func() { + if kubectlCmd != nil { + f.Kubectl.StopKubectlPortForward(kubectlCmd) + } + }() + gomega.Eventually(func() int { + var err error + if kubectlCmd == nil { + kubectlCmd, err = f.Kubectl.StartKubectlPortForward(18003, 8003, "projectcontour", f.Deployment.EnvoyResourceAndName()) + if err != nil { + GinkgoWriter.Println("failed to start port-forward:", err) + return 0 + } + } + resp, err := client.Get("https://localhost:18003/stats") if err != nil { - GinkgoWriter.Println(err) + GinkgoWriter.Println("request failed, restarting port-forward:", err) + f.Kubectl.StopKubectlPortForward(kubectlCmd) + kubectlCmd = nil return 0 } + defer resp.Body.Close() return resp.StatusCode - }, "10s", "1s").Should(gomega.Equal(http.StatusOK)) + }, "30s", "1s").Should(gomega.Equal(http.StatusOK)) }) } diff --git a/test/e2e/ingress/ingress_test.go b/test/e2e/ingress/ingress_test.go index 0e48a3cf481..7f82577692a 100644 --- a/test/e2e/ingress/ingress_test.go +++ b/test/e2e/ingress/ingress_test.go @@ -81,15 +81,12 @@ var _ = Describe("Ingress", func() { var err error contourCmd, contourConfigFile, err = f.Deployment.StartLocalContour(contourConfig, contourConfiguration, additionalContourArgs...) require.NoError(f.T(), err) + DeferCleanup(f.Deployment.StopLocalContour, contourCmd, contourConfigFile) // Wait for Envoy to be healthy. require.NoError(f.T(), f.Deployment.WaitForEnvoyUpdated()) }) - AfterEach(func() { - require.NoError(f.T(), f.Deployment.StopLocalContour(contourCmd, contourConfigFile)) - }) - f.NamespacedTest("ingress-tls-wildcard-host", testTLSWildcardHost) f.NamespacedTest("backend-tls", func(namespace string) { diff --git a/test/e2e/kubectl.go b/test/e2e/kubectl.go index 87381705681..2d0eea46baf 100644 --- a/test/e2e/kubectl.go +++ b/test/e2e/kubectl.go @@ -18,6 +18,7 @@ package e2e import ( "fmt" "io" + "net" "os/exec" "time" @@ -46,9 +47,36 @@ func (k *Kubectl) StartKubectlPortForward(localPort, containerPort int, namespac } // Wait until port-forward to be up and running. gomega.Eventually(session).Should(gbytes.Say("Forwarding from")) + + // Also wait for the local port to be open. + if err := k.WaitPort(localPort); err != nil { + session.Terminate().Wait(time.Minute) + return nil, err + } + return session, nil } +func (k *Kubectl) WaitPort(port int) error { + address := net.JoinHostPort("127.0.0.1", fmt.Sprintf("%d", port)) + timeout := time.After(30 * time.Second) + tick := time.NewTicker(100 * time.Millisecond) + defer tick.Stop() + + for { + select { + case <-timeout: + return fmt.Errorf("timed out waiting for port %d to be open", port) + case <-tick.C: + conn, err := net.DialTimeout("tcp", address, 500*time.Millisecond) + if err == nil { + conn.Close() + return nil + } + } + } +} + func (k *Kubectl) StopKubectlPortForward(cmd *gexec.Session) { // Default timeout of 1s produces test flakes, // a minute should be more than enough to avoid them. diff --git a/test/e2e/provisioner.go b/test/e2e/provisioner.go index 60bb0bc94da..a0cc71e36f5 100644 --- a/test/e2e/provisioner.go +++ b/test/e2e/provisioner.go @@ -174,7 +174,7 @@ func (p *Provisioner) EnsureResourcesForInclusterProvisioner() error { } } - return nil + return WaitForDeployment(p.Deployment, p.client) } // DeleteResourcesForInclusterContour ensures deletion of all resources diff --git a/test/e2e/provisioner/provisioner_test.go b/test/e2e/provisioner/provisioner_test.go index dba410892b3..21cfc65f2c0 100644 --- a/test/e2e/provisioner/provisioner_test.go +++ b/test/e2e/provisioner/provisioner_test.go @@ -756,6 +756,7 @@ var _ = Describe("Gateway provisioner", func() { })) By("Skip reconciling the TLSRoute if disabledFeatures includes it") + f.Certs.CreateSelfSignedCert(namespace, "backend-server-cert", "backend-server-cert", "echo-secure") f.Fixtures.EchoSecure.Deploy(namespace, "echo-secure", nil) route := &gatewayapi_v1alpha2.TLSRoute{ ObjectMeta: meta_v1.ObjectMeta{ diff --git a/test/e2e/waiter.go b/test/e2e/waiter.go index 6d9e4988496..26d597e8c9e 100644 --- a/test/e2e/waiter.go +++ b/test/e2e/waiter.go @@ -45,7 +45,7 @@ func WaitForContourDeploymentUpdated(deployment *apps_v1.Deployment, cli client. return updatedPods == int(*deployment.Spec.Replicas), nil } - return wait.PollUntilContextTimeout(context.Background(), time.Millisecond*50, time.Minute*3, true, updatedPods) + return wait.PollUntilContextTimeout(context.Background(), time.Millisecond*200, time.Minute*5, true, updatedPods) } func WaitForEnvoyDaemonSetUpdated(daemonset *apps_v1.DaemonSet, cli client.Client, image string) error { @@ -64,7 +64,7 @@ func WaitForEnvoyDaemonSetUpdated(daemonset *apps_v1.DaemonSet, cli client.Clien return updatedPods == int(ds.Status.DesiredNumberScheduled) && ds.Status.NumberReady > 0, nil } - return wait.PollUntilContextTimeout(context.Background(), time.Millisecond*50, time.Minute*3, true, updatedPods) + return wait.PollUntilContextTimeout(context.Background(), time.Millisecond*200, time.Minute*5, true, updatedPods) } func WaitForEnvoyDeploymentUpdated(deployment *apps_v1.Deployment, cli client.Client, image string) error { @@ -83,7 +83,7 @@ func WaitForEnvoyDeploymentUpdated(deployment *apps_v1.Deployment, cli client.Cl int(dp.Status.ReadyReplicas) == updatedPods && int(dp.Status.UnavailableReplicas) == 0, nil } - return wait.PollUntilContextTimeout(context.Background(), time.Millisecond*50, time.Minute*3, true, updatedPods) + return wait.PollUntilContextTimeout(context.Background(), time.Millisecond*200, time.Minute*5, true, updatedPods) } func getPodsUpdatedWithContourImage(ctx context.Context, labelSelector labels.Selector, namespace, image string, cli client.Client) int { @@ -115,3 +115,23 @@ func getPodsUpdatedWithContourImage(ctx context.Context, labelSelector labels.Se } return updatedPods } + +func WaitForDeployment(deployment *apps_v1.Deployment, cli client.Client) error { + return wait.PollUntilContextTimeout(context.Background(), time.Millisecond*200, time.Minute*5, true, func(ctx context.Context) (bool, error) { + dp := new(apps_v1.Deployment) + if err := cli.Get(ctx, client.ObjectKeyFromObject(deployment), dp); err != nil { + return false, nil + } + + if dp.Spec.Replicas == nil { + return false, nil + } + + replicas := *dp.Spec.Replicas + return dp.Status.ObservedGeneration == dp.Generation && + dp.Status.Replicas == replicas && + dp.Status.ReadyReplicas == replicas && + dp.Status.UpdatedReplicas == replicas && + dp.Status.AvailableReplicas == replicas, nil + }) +}