Skip to content

Commit 977b618

Browse files
authored
[misc] fully deprecate predictor (#53)
1 parent e64a54a commit 977b618

17 files changed

Lines changed: 992 additions & 3652 deletions

pkg/controller/v1beta1/inferenceservice/components/predictor.go

Lines changed: 0 additions & 1037 deletions
This file was deleted.

pkg/controller/v1beta1/inferenceservice/components/predictor_v2.go

Lines changed: 0 additions & 803 deletions
This file was deleted.

pkg/controller/v1beta1/inferenceservice/components/predictor_v2_test.go

Lines changed: 0 additions & 989 deletions
This file was deleted.

pkg/controller/v1beta1/inferenceservice/controller.go

Lines changed: 157 additions & 186 deletions
Large diffs are not rendered by default.

pkg/controller/v1beta1/inferenceservice/controller_test.go

Lines changed: 66 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ import (
1616
appsv1 "k8s.io/api/apps/v1"
1717
autoscalingv2 "k8s.io/api/autoscaling/v2"
1818
v1 "k8s.io/api/core/v1"
19+
apierrors "k8s.io/apimachinery/pkg/api/errors"
1920
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
2021
"k8s.io/apimachinery/pkg/runtime"
2122
"k8s.io/apimachinery/pkg/types"
@@ -335,6 +336,41 @@ func TestInferenceServiceReconcile(t *testing.T) {
335336
err = c.Create(context.TODO(), baseModel)
336337
g.Expect(err).NotTo(gomega.HaveOccurred())
337338

339+
// Create an old predictor deployment to simulate existing resources
340+
oldDeployment := &appsv1.Deployment{
341+
ObjectMeta: metav1.ObjectMeta{
342+
Name: "test-legacy",
343+
Namespace: "default",
344+
Labels: map[string]string{
345+
constants.InferenceServicePodLabelKey: "test-legacy",
346+
},
347+
},
348+
Spec: appsv1.DeploymentSpec{
349+
Selector: &metav1.LabelSelector{
350+
MatchLabels: map[string]string{
351+
constants.InferenceServicePodLabelKey: "test-legacy",
352+
},
353+
},
354+
Template: v1.PodTemplateSpec{
355+
ObjectMeta: metav1.ObjectMeta{
356+
Labels: map[string]string{
357+
constants.InferenceServicePodLabelKey: "test-legacy",
358+
},
359+
},
360+
Spec: v1.PodSpec{
361+
Containers: []v1.Container{
362+
{
363+
Name: "predictor",
364+
Image: "old-predictor-image:latest",
365+
},
366+
},
367+
},
368+
},
369+
},
370+
}
371+
err = c.Create(context.TODO(), oldDeployment)
372+
g.Expect(err).NotTo(gomega.HaveOccurred())
373+
338374
// Create sklearn runtime
339375
rt := &v1beta1.ServingRuntime{
340376
ObjectMeta: metav1.ObjectMeta{
@@ -371,34 +407,42 @@ func TestInferenceServiceReconcile(t *testing.T) {
371407
g.Expect(err).NotTo(gomega.HaveOccurred())
372408
},
373409
validate: func(t *testing.T, c client.Client, isvc *v1beta1.InferenceService) {
374-
// Check that predictor deployment was created
375-
// The deployment name should be either "test-legacy-predictor-default" or "test-legacy"
376-
deployment := &appsv1.Deployment{}
377-
378-
// Try the default predictor name first
410+
// The migration should have happened, check that the resource was updated
411+
updatedIsvc := &v1beta1.InferenceService{}
379412
err := c.Get(context.TODO(), types.NamespacedName{
380-
Name: "test-legacy-predictor-default",
413+
Name: "test-legacy",
381414
Namespace: "default",
382-
}, deployment)
415+
}, updatedIsvc)
416+
g.Expect(err).NotTo(gomega.HaveOccurred())
383417

384-
if err != nil {
385-
// If not found, try the shorter name
386-
err = c.Get(context.TODO(), types.NamespacedName{
387-
Name: "test-legacy",
388-
Namespace: "default",
389-
}, deployment)
390-
}
418+
// Check that migration happened
419+
g.Expect(updatedIsvc.Spec.Model).NotTo(gomega.BeNil(), "Expected model to be migrated")
420+
g.Expect(updatedIsvc.Spec.Model.Name).To(gomega.Equal("sklearn-model"))
421+
g.Expect(updatedIsvc.Spec.Engine).NotTo(gomega.BeNil(), "Expected engine to be created")
391422

392-
// The deployment should exist
393-
g.Expect(err).NotTo(gomega.HaveOccurred(), "Expected predictor deployment to be created")
423+
// Check that predictor is cleared
424+
g.Expect(updatedIsvc.Spec.Predictor.Model).To(gomega.BeNil(), "Expected predictor.Model to be cleared")
394425

395-
// Verify it's using the sklearn runtime image
396-
g.Expect(deployment.Spec.Template.Spec.Containers).To(gomega.HaveLen(1))
397-
g.Expect(deployment.Spec.Template.Spec.Containers[0].Image).To(gomega.Equal("sklearn-server:v1"))
426+
// Check for deprecation warning
427+
g.Expect(updatedIsvc.ObjectMeta.Annotations).NotTo(gomega.BeNil())
428+
g.Expect(updatedIsvc.ObjectMeta.Annotations[constants.DeprecationWarning]).To(gomega.Equal("The Predictor field is deprecated and will be removed in a future release. Please use Engine and Model fields instead."))
398429

399-
// Verify labels
400-
g.Expect(deployment.Spec.Template.Labels).To(gomega.HaveKey(constants.InferenceServicePodLabelKey))
401-
g.Expect(deployment.Spec.Template.Labels[constants.InferenceServicePodLabelKey]).To(gomega.Equal("test-legacy"))
430+
// Check that old predictor deployment was deleted
431+
deployment := &appsv1.Deployment{}
432+
err = c.Get(context.TODO(), types.NamespacedName{
433+
Name: "test-legacy", // predictor deployment uses inference service name
434+
Namespace: "default",
435+
}, deployment)
436+
g.Expect(err).To(gomega.HaveOccurred(), "Expected old predictor deployment to be deleted")
437+
g.Expect(apierrors.IsNotFound(err)).To(gomega.BeTrue(), "Expected deployment to not be found")
438+
439+
// Check that new engine deployment was created
440+
engineDeployment := &appsv1.Deployment{}
441+
err = c.Get(context.TODO(), types.NamespacedName{
442+
Name: "test-legacy-engine",
443+
Namespace: "default",
444+
}, engineDeployment)
445+
g.Expect(err).NotTo(gomega.HaveOccurred(), "Expected engine deployment to be created")
402446
},
403447
},
404448
{

pkg/controller/v1beta1/inferenceservice/status/status_reconciler.go

Lines changed: 37 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -27,41 +27,41 @@ func NewStatusReconciler() *StatusReconciler {
2727
}
2828

2929
// PropagateRawStatus propagates status from raw Kubernetes deployment
30-
func (sm *StatusReconciler) PropagateRawStatus(
30+
func (sr *StatusReconciler) PropagateRawStatus(
3131
status *v1beta1.InferenceServiceStatus,
3232
component v1beta1.ComponentType,
3333
deployment *appsv1.Deployment,
3434
url *apis.URL) {
3535

36-
statusSpec := sm.initializeComponentStatus(status, component)
36+
statusSpec := sr.initializeComponentStatus(status, component)
3737

3838
statusSpec.LatestCreatedRevision = deployment.GetObjectMeta().GetAnnotations()["deployment.kubernetes.io/revision"]
39-
condition := sm.getDeploymentCondition(deployment, appsv1.DeploymentAvailable)
39+
condition := sr.getDeploymentCondition(deployment, appsv1.DeploymentAvailable)
4040
if condition != nil && condition.Status == v1.ConditionTrue {
4141
statusSpec.URL = url
4242
}
43-
readyCondition := sm.getReadyConditionsMap()[component]
44-
sm.setCondition(status, readyCondition, condition)
43+
readyCondition := sr.getReadyConditionsMap()[component]
44+
sr.setCondition(status, readyCondition, condition)
4545
status.Components[component] = statusSpec
4646
status.ObservedGeneration = deployment.Status.ObservedGeneration
4747
}
4848

4949
// PropagateMultiNodeStatus propagates status from LeaderWorkerSet
50-
func (sm *StatusReconciler) PropagateMultiNodeStatus(
50+
func (sr *StatusReconciler) PropagateMultiNodeStatus(
5151
status *v1beta1.InferenceServiceStatus,
5252
component v1beta1.ComponentType,
5353
lws *lwsspec.LeaderWorkerSet,
5454
url *apis.URL) {
5555

56-
statusSpec := sm.initializeComponentStatus(status, component)
56+
statusSpec := sr.initializeComponentStatus(status, component)
5757

5858
statusSpec.LatestCreatedRevision = lws.GetObjectMeta().GetAnnotations()["resourceVersion"]
59-
lwsCondition := sm.getLWSConditions(lws, lwsspec.LeaderWorkerSetAvailable)
59+
lwsCondition := sr.getLWSConditions(lws, lwsspec.LeaderWorkerSetAvailable)
6060
if lwsCondition != nil && lwsCondition.Status == v1.ConditionTrue {
6161
statusSpec.URL = url
6262
}
6363

64-
readyCondition := sm.getReadyConditionsMap()[component]
64+
readyCondition := sr.getReadyConditionsMap()[component]
6565

6666
// Create a new condition with the correct component ready condition type
6767
// instead of using the LWS condition type directly
@@ -73,27 +73,27 @@ func (sm *StatusReconciler) PropagateMultiNodeStatus(
7373
Reason: lwsCondition.Reason,
7474
LastTransitionTime: lwsCondition.LastTransitionTime,
7575
}
76-
sm.setCondition(status, readyCondition, componentCondition)
76+
sr.setCondition(status, readyCondition, componentCondition)
7777
}
7878

7979
status.Components[component] = statusSpec
8080
status.ObservedGeneration = lws.Generation
8181
}
8282

8383
// PropagateMultiNodeRayVLLMStatus propagates status from multiple deployments
84-
func (sm *StatusReconciler) PropagateMultiNodeRayVLLMStatus(
84+
func (sr *StatusReconciler) PropagateMultiNodeRayVLLMStatus(
8585
status *v1beta1.InferenceServiceStatus,
8686
component v1beta1.ComponentType,
8787
deployments []*appsv1.Deployment,
8888
url *apis.URL) {
8989

90-
statusSpec := sm.initializeComponentStatus(status, component)
90+
statusSpec := sr.initializeComponentStatus(status, component)
9191

92-
firstDeployment, err := sm.getFirstDeployment(deployments)
92+
firstDeployment, err := sr.getFirstDeployment(deployments)
9393
if err != nil {
9494
// Handle error case gracefully - set a default state
95-
sm.setCondition(status, sm.getReadyConditionsMap()[component], &apis.Condition{
96-
Type: sm.getReadyConditionsMap()[component],
95+
sr.setCondition(status, sr.getReadyConditionsMap()[component], &apis.Condition{
96+
Type: sr.getReadyConditionsMap()[component],
9797
Status: v1.ConditionFalse,
9898
Reason: "NoDeployments",
9999
Message: "No deployments available",
@@ -103,23 +103,23 @@ func (sm *StatusReconciler) PropagateMultiNodeRayVLLMStatus(
103103

104104
statusSpec.LatestCreatedRevision = firstDeployment.GetObjectMeta().GetAnnotations()["deployment.kubernetes.io/revision"]
105105

106-
condition := sm.getMultiDeploymentCondition(deployments, appsv1.DeploymentAvailable)
106+
condition := sr.getMultiDeploymentCondition(deployments, appsv1.DeploymentAvailable)
107107
if condition != nil && condition.Status == v1.ConditionTrue {
108108
statusSpec.URL = url
109109
}
110-
readyCondition := sm.getReadyConditionsMap()[component]
111-
sm.setCondition(status, readyCondition, condition)
110+
readyCondition := sr.getReadyConditionsMap()[component]
111+
sr.setCondition(status, readyCondition, condition)
112112
status.Components[component] = statusSpec
113113
status.ObservedGeneration = firstDeployment.Status.ObservedGeneration
114114
}
115115

116116
// PropagateStatus propagates status from Knative Service
117-
func (sm *StatusReconciler) PropagateStatus(
117+
func (sr *StatusReconciler) PropagateStatus(
118118
status *v1beta1.InferenceServiceStatus,
119119
component v1beta1.ComponentType,
120120
serviceStatus *knservingv1.ServiceStatus) {
121121

122-
statusSpec := sm.initializeComponentStatus(status, component)
122+
statusSpec := sr.initializeComponentStatus(status, component)
123123

124124
statusSpec.LatestCreatedRevision = serviceStatus.LatestCreatedRevisionName
125125
revisionTraffic := map[string]int64{}
@@ -130,21 +130,21 @@ func (sm *StatusReconciler) PropagateStatus(
130130
}
131131

132132
// Handle traffic routing logic
133-
sm.handleTrafficRouting(&statusSpec, serviceStatus, revisionTraffic)
133+
sr.handleTrafficRouting(&statusSpec, serviceStatus, revisionTraffic)
134134

135135
if serviceStatus.LatestReadyRevisionName != statusSpec.LatestReadyRevision {
136136
statusSpec.LatestReadyRevision = serviceStatus.LatestReadyRevisionName
137137
}
138138

139139
// Propagate conditions
140-
sm.propagateServiceConditions(status, component, serviceStatus, &statusSpec)
140+
sr.propagateServiceConditions(status, component, serviceStatus, &statusSpec)
141141

142142
status.Components[component] = statusSpec
143143
status.ObservedGeneration = serviceStatus.ObservedGeneration
144144
}
145145

146146
// PropagateModelStatus propagates model status from pod information
147-
func (sm *StatusReconciler) PropagateModelStatus(
147+
func (sr *StatusReconciler) PropagateModelStatus(
148148
status *v1beta1.InferenceServiceStatus,
149149
statusSpec v1beta1.ComponentStatusSpec,
150150
podList *v1.PodList,
@@ -153,34 +153,34 @@ func (sm *StatusReconciler) PropagateModelStatus(
153153
// Check at least one pod is running for the latest revision of inferenceservice
154154
totalCopies := len(podList.Items)
155155
if totalCopies == 0 {
156-
sm.UpdateModelRevisionStates(status, v1beta1.Pending, totalCopies, nil)
156+
sr.UpdateModelRevisionStates(status, v1beta1.Pending, totalCopies, nil)
157157
return
158158
}
159159

160160
// Use helper function to safely get the first pod
161-
firstPod, err := sm.getFirstPod(podList)
161+
firstPod, err := sr.getFirstPod(podList)
162162
if err != nil {
163-
sm.UpdateModelRevisionStates(status, v1beta1.Pending, totalCopies, nil)
163+
sr.UpdateModelRevisionStates(status, v1beta1.Pending, totalCopies, nil)
164164
return
165165
}
166166

167167
// Update model state to 'Loaded' if inferenceservice status is ready.
168168
if status.IsReady() {
169169
if rawDeployment {
170-
sm.UpdateModelRevisionStates(status, v1beta1.Loaded, totalCopies, nil)
170+
sr.UpdateModelRevisionStates(status, v1beta1.Loaded, totalCopies, nil)
171171
return
172172
} else if statusSpec.LatestCreatedRevision == statusSpec.LatestReadyRevision {
173-
sm.UpdateModelRevisionStates(status, v1beta1.Loaded, totalCopies, nil)
173+
sr.UpdateModelRevisionStates(status, v1beta1.Loaded, totalCopies, nil)
174174
return
175175
}
176176
}
177177

178178
// Check container statuses
179-
sm.checkContainerStatuses(status, firstPod, totalCopies)
179+
sr.checkContainerStatuses(status, firstPod, totalCopies)
180180
}
181181

182182
// UpdateModelRevisionStates updates the model revision states
183-
func (sm *StatusReconciler) UpdateModelRevisionStates(
183+
func (sr *StatusReconciler) UpdateModelRevisionStates(
184184
status *v1beta1.InferenceServiceStatus,
185185
modelState v1beta1.ModelState,
186186
totalCopies int,
@@ -205,12 +205,12 @@ func (sm *StatusReconciler) UpdateModelRevisionStates(
205205
}
206206

207207
if info != nil {
208-
sm.SetModelFailureInfo(status, info)
208+
sr.SetModelFailureInfo(status, info)
209209
}
210210
}
211211

212212
// UpdateModelTransitionStatus updates the model transition status
213-
func (sm *StatusReconciler) UpdateModelTransitionStatus(
213+
func (sr *StatusReconciler) UpdateModelTransitionStatus(
214214
status *v1beta1.InferenceServiceStatus,
215215
transitionStatus v1beta1.TransitionStatus,
216216
info *v1beta1.FailureInfo) {
@@ -227,12 +227,12 @@ func (sm *StatusReconciler) UpdateModelTransitionStatus(
227227
}
228228

229229
if info != nil {
230-
sm.SetModelFailureInfo(status, info)
230+
sr.SetModelFailureInfo(status, info)
231231
}
232232
}
233233

234234
// SetModelFailureInfo sets the model failure information
235-
func (sm *StatusReconciler) SetModelFailureInfo(status *v1beta1.InferenceServiceStatus, info *v1beta1.FailureInfo) bool {
235+
func (sr *StatusReconciler) SetModelFailureInfo(status *v1beta1.InferenceServiceStatus, info *v1beta1.FailureInfo) bool {
236236
if reflect.DeepEqual(info, status.ModelStatus.LastFailureInfo) {
237237
return false
238238
}
@@ -241,12 +241,12 @@ func (sm *StatusReconciler) SetModelFailureInfo(status *v1beta1.InferenceService
241241
}
242242

243243
// PropagateCrossComponentStatus aggregates conditions across components
244-
func (sm *StatusReconciler) PropagateCrossComponentStatus(
244+
func (sr *StatusReconciler) PropagateCrossComponentStatus(
245245
status *v1beta1.InferenceServiceStatus,
246246
componentList []v1beta1.ComponentType,
247247
conditionType apis.ConditionType) {
248248

249-
conditionsMap, ok := sm.getConditionsMapIndex()[conditionType]
249+
conditionsMap, ok := sr.getConditionsMapIndex()[conditionType]
250250
if !ok {
251251
return
252252
}
@@ -266,5 +266,5 @@ func (sm *StatusReconciler) PropagateCrossComponentStatus(
266266
}
267267
}
268268

269-
sm.setCondition(status, conditionType, crossComponentCondition)
269+
sr.setCondition(status, conditionType, crossComponentCondition)
270270
}

pkg/controller/v1beta1/inferenceservice/utils/annotations.go

Lines changed: 0 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,6 @@ package utils
33
import (
44
"strings"
55

6-
"github.com/sgl-project/ome/pkg/apis/ome/v1beta1"
76
"github.com/sgl-project/ome/pkg/constants"
87
"github.com/sgl-project/ome/pkg/controller/v1beta1/controllerconfig"
98
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
@@ -34,25 +33,6 @@ func IsOriginalModelVolumeMountNecessary(annotations map[string]string) bool {
3433
annotations[constants.FTServingWithMergedWeightsAnnotationKey] != "true"
3534
}
3635

37-
// IsEntrypointRouter checks if the InferenceService has the
38-
// entrypoint-component annotation set to router.
39-
// Returns true if the annotation is set to router, false otherwise.
40-
func IsEntrypointRouter(annotations map[string]string) bool {
41-
componentValue, hasAnnotation := annotations[constants.EntrypointComponent]
42-
if !hasAnnotation {
43-
return false
44-
}
45-
46-
// Validate against known component types
47-
switch v1beta1.ComponentType(componentValue) {
48-
case v1beta1.RouterComponent:
49-
return true
50-
default:
51-
// Invalid component type
52-
return false
53-
}
54-
}
55-
5636
func IsEmptyModelDirVolumeRequired(annotations map[string]string) bool {
5737
modelInitInject := annotations[constants.ModelInitInjectionKey]
5838
fineTunedAdapterInject := annotations[constants.FineTunedAdapterInjectionKey]

0 commit comments

Comments
 (0)