Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 6 additions & 4 deletions Makefile
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
KUBERNETES_VERSION = 1.33.0
Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I would prefer to use 1.33.1, but since setup-envtest cannot find the 1.33.1 archive.


# Tool versions
MYSQLSH_VERSION = 8.4.6-1
OS_VERSION := $(shell . /etc/os-release; echo $$VERSION_ID)
Expand Down Expand Up @@ -91,16 +93,16 @@ check-generate:

.PHONY: envtest
envtest: aqua-install
source <($(SETUP_ENVTEST) use -p env); \
source <($(SETUP_ENVTEST) use -p env ${KUBERNETES_VERSION}); \
export MOCO_CHECK_INTERVAL=100ms; \
export MOCO_CLONE_WAIT_DURATION=100ms; \
go test -v -count 1 -race ./clustering -ginkgo.randomize-all -ginkgo.v -ginkgo.fail-fast
source <($(SETUP_ENVTEST) use -p env); \
source <($(SETUP_ENVTEST) use -p env ${KUBERNETES_VERSION}); \
export DEBUG_CONTROLLER=1; \
go test -v -count 1 -race ./controllers -ginkgo.randomize-all -ginkgo.v -ginkgo.fail-fast
source <($(SETUP_ENVTEST) use -p env); \
source <($(SETUP_ENVTEST) use -p env ${KUBERNETES_VERSION}); \
go test -v -count 1 -race ./api/... -ginkgo.randomize-all -ginkgo.v
source <($(SETUP_ENVTEST) use -p env); \
source <($(SETUP_ENVTEST) use -p env ${KUBERNETES_VERSION}); \
go test -v -count 1 -race ./backup -ginkgo.randomize-all -ginkgo.v -ginkgo.fail-fast

.PHONY: test-dbop
Expand Down
38 changes: 36 additions & 2 deletions clustering/operations.go
Original file line number Diff line number Diff line change
Expand Up @@ -150,13 +150,47 @@ func (p *managerProcess) switchover(ctx context.Context, ss *StatusSet) error {
log.Info("begin switchover the primary", "current", ss.Primary, "next", ss.Candidate)

pdb := ss.DBOps[ss.Primary]
if err := pdb.SetReadOnly(ctx, true); err != nil {
return fmt.Errorf("failed to make instance %d read-only: %w", ss.Primary, err)

// Determine the switchover timeout based on `PreStopSeconds`
// If the switchover takes longer than PreStopSeconds, the switchover will fail and failover will occur.
preStopSeconds, err := strconv.Atoi(constants.PreStopSeconds)
if err != nil {
return err
}
switchOverTimeoutSeconds := preStopSeconds / 2

// SetReadOnly waits for a running DML.
// Therefore, if it waits for a long time, deleteGracePeriodSeconds may be reached.
// To avoid this, execute killConnections after a certain period of time.
done := make(chan error, 1)
go func() {
done <- pdb.SetReadOnly(ctx, true)
}()
select {
case err := <-done:
if err != nil {
// If SetReadOnly fails, kill connections and retry switchover.
if kerr := pdb.KillConnections(ctx); kerr != nil {
return fmt.Errorf("failed to make instance %d read-only: %w, and failed to kill connections: %w", ss.Primary, err, kerr)
}
return fmt.Errorf("failed to make instance %d read-only: %w", ss.Primary, err)
}
case <-time.After(time.Duration(switchOverTimeoutSeconds) * time.Second):
log.Info("setReadOnly is taking too long, kill connections", "instance", ss.Primary)
if err := pdb.KillConnections(ctx); err != nil {
return fmt.Errorf("failed to kill connections in instance %d: %w", ss.Primary, err)
}
err := <-done
if err != nil {
return fmt.Errorf("failed to make instance %d read-only: %w", ss.Primary, err)
}
}

time.Sleep(100 * time.Millisecond)
if err := pdb.KillConnections(ctx); err != nil {
return fmt.Errorf("failed to kill connections in instance %d: %w", ss.Primary, err)
}

pst, err := pdb.GetStatus(ctx)
if err != nil {
return fmt.Errorf("failed to get the primary status: %w", err)
Expand Down
147 changes: 147 additions & 0 deletions e2e/switchover_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,147 @@
package e2e

import (
_ "embed"
"errors"
"fmt"
mocov1beta2 "github.com/cybozu-go/moco/api/v1beta2"
. "github.com/onsi/ginkgo/v2"
. "github.com/onsi/gomega"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
)

//go:embed testdata/switchover.yaml
var switchoverYAML string

var _ = Context("switchover", Ordered, func() {
if doUpgrade {
return
}

It("should construct a 3-instance cluster", func() {
kubectlSafe(fillTemplate(switchoverYAML), "apply", "-f", "-")
Eventually(func() error {
cluster, err := getCluster("switchover", "test")
if err != nil {
return err
}
for _, cond := range cluster.Status.Conditions {
if cond.Type != mocov1beta2.ConditionHealthy {
continue
}
if cond.Status == metav1.ConditionTrue {
return nil
}
return fmt.Errorf("cluster is not healthy: %s", cond.Status)
}
return errors.New("no health condition")
}).Should(Succeed())

kubectlSafe(nil, "moco", "mysql", "test",
"-n", "switchover",
"-u", "moco-writable",
"--", "-e", "CREATE DATABASE test;")
kubectlSafe(nil, "moco", "mysql", "test",
"-n", "switchover",
"-u", "moco-admin",
"--", "-e", "CREATE USER 'user'@'%' IDENTIFIED BY 'abc';")
kubectlSafe(nil, "moco", "mysql", "test",
"-n", "switchover",
"-u", "moco-admin",
"--", "-e", "GRANT ALL PRIVILEGES ON test.* TO 'user'@'%';")
kubectlSafe(nil, "moco", "mysql", "test",
"-n", "switchover",
"-u", "moco-writable",
"--", "-e", "CREATE TABLE test.t1 (foo int);")
kubectlSafe(nil, "moco", "mysql", "test",
"-n", "switchover",
"-u", "moco-writable",
"--", "-e", "INSERT INTO test.t1 (foo) VALUES (1); COMMIT;")
})

It("should switch the primary if requested, even when a long global read lock is acquired", func() {
cluster, err := getCluster("switchover", "test")
Expect(err).NotTo(HaveOccurred())

beforePrimaryIndex := cluster.Status.CurrentPrimaryIndex

go func() {
// Calling SLEEP within an UPDATE statement creates a situation where a global read lock is intentionally acquired.
// The value specified for SLEEP must be less than half the value of `PreStopSeconds`.
runInPod("mysql", "-u", "user", "-pabc",
"-h", "moco-test-primary.switchover.svc.cluster.local", "test",
"-e", "UPDATE test.t1 SET foo = SLEEP(5)")
}()
kubectlSafe(nil, "moco", "-n", "switchover", "switchover", "test")
Eventually(func() int {
cluster, err := getCluster("switchover", "test")
if err != nil {
return 0
}
return cluster.Status.CurrentPrimaryIndex
}).ShouldNot(Equal(beforePrimaryIndex))

Eventually(func() error {
cluster, err := getCluster("switchover", "test")
if err != nil {
return err
}
for _, cond := range cluster.Status.Conditions {
if cond.Type != mocov1beta2.ConditionHealthy {
continue
}
if cond.Status == metav1.ConditionTrue {
return nil
}
return fmt.Errorf("cluster is not healthy: %s", cond.Status)
}
return errors.New("no health condition")
}).Should(Succeed())
})

It("should switch the primary if requested, even when a holding global read lock exceeds timeout", func() {
cluster, err := getCluster("switchover", "test")
Expect(err).NotTo(HaveOccurred())

beforePrimaryIndex := cluster.Status.CurrentPrimaryIndex

go func() {
// Calling SLEEP within an UPDATE statement creates a situation where a global read lock is intentionally acquired.
// The value specified for SLEEP must be more than half the value of `PreStopSeconds`.
runInPod("mysql", "-u", "user", "-pabc",
"-h", "moco-test-primary.switchover.svc.cluster.local", "test",
"-e", "UPDATE test.t1 SET foo = SLEEP(15)")
}()

kubectlSafe(nil, "moco", "-n", "switchover", "switchover", "test")
Eventually(func() int {
cluster, err := getCluster("switchover", "test")
if err != nil {
return 0
}
return cluster.Status.CurrentPrimaryIndex
}).ShouldNot(Equal(beforePrimaryIndex))

Eventually(func() error {
cluster, err := getCluster("switchover", "test")
if err != nil {
return err
}
for _, cond := range cluster.Status.Conditions {
if cond.Type != mocov1beta2.ConditionHealthy {
continue
}
if cond.Status == metav1.ConditionTrue {
return nil
}
return fmt.Errorf("cluster is not healthy: %s", cond.Status)
}
return errors.New("no health condition")
}).Should(Succeed())
})

It("should delete clusters", func() {
kubectlSafe(nil, "delete", "-n", "switchover", "mysqlclusters", "--all")
verifyAllPodsDeleted("switchover")
})
})
25 changes: 25 additions & 0 deletions e2e/testdata/switchover.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
apiVersion: v1
kind: Namespace
metadata:
name: switchover
---
apiVersion: moco.cybozu.com/v1beta2
kind: MySQLCluster
metadata:
namespace: switchover
name: test
spec:
replicas: 3
podTemplate:
spec:
containers:
- name: mysqld
image: ghcr.io/cybozu-go/moco/mysql:{{ . }}
volumeClaimTemplates:
- metadata:
name: mysql-data
spec:
accessModes: [ "ReadWriteOnce" ]
resources:
requests:
storage: 1Gi