From ae1e108e5f71943fe329b34b758374221f970df7 Mon Sep 17 00:00:00 2001 From: suryans-commit <80422715+suryans-commit@users.noreply.github.com> Date: Mon, 11 Mar 2024 15:45:38 -0700 Subject: [PATCH] Support InferenceComponent CRD. (#260) Description of changes: - Add new `InferenceComponent` CRD - Add unit and e2e tests By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. --- apis/v1alpha1/ack-generate-metadata.yaml | 6 +- apis/v1alpha1/enums.go | 12 +- apis/v1alpha1/generator.yaml | 70 +- apis/v1alpha1/inference_component.go | 103 +++ apis/v1alpha1/types.go | 56 +- apis/v1alpha1/zz_generated.deepcopy.go | 294 ++++++++ cmd/controller/main.go | 1 + ....services.k8s.aws_inferencecomponents.yaml | 249 +++++++ config/crd/kustomization.yaml | 1 + config/rbac/cluster-role-controller.yaml | 20 + config/rbac/role-reader.yaml | 1 + config/rbac/role-writer.yaml | 2 + generator.yaml | 70 +- ....services.k8s.aws_inferencecomponents.yaml | 249 +++++++ helm/templates/_helpers.tpl | 20 + helm/templates/role-reader.yaml | 1 + helm/templates/role-writer.yaml | 2 + pkg/resource/endpoint_config/manager.go | 13 +- .../sdkapi/describe/success_describe.json | 1 + .../endpoint_config/testdata/test_suite.yaml | 4 +- .../readone/desired/late_initialize.yaml | 45 ++ .../readone/observed/right_after_create.yaml | 40 ++ .../custom_update_conditions.go | 55 ++ pkg/resource/inference_component/delta.go | 166 +++++ .../inference_component/descriptor.go | 155 ++++ pkg/resource/inference_component/hooks.go | 166 +++++ .../inference_component/identifiers.go | 55 ++ pkg/resource/inference_component/manager.go | 360 ++++++++++ .../inference_component/manager_factory.go | 96 +++ .../manager_test_suite_test.go | 139 ++++ .../inference_component/references.go | 56 ++ pkg/resource/inference_component/resource.go | 100 +++ pkg/resource/inference_component/sdk.go | 661 ++++++++++++++++++ pkg/resource/inference_component/tags.go | 63 ++ .../sdkapi/create/create_success.json | 3 + .../describe/creating_after_create.json | 23 + .../testdata/sdkapi/describe/deleting.json | 23 + .../sdkapi/describe/failed_after_create.json | 24 + .../describe/inservice_no_failure_reason.json | 23 + .../testdata/sdkapi/describe/updating.json | 22 + .../sdkapi/update/update_success.json | 3 + .../testdata/test_suite.yaml | 195 ++++++ .../create/desired/invalid_before_create.yaml | 23 + .../create/desired/success_before_create.yaml | 23 + .../observed/invalid_create_attempted.yaml | 32 + .../create/observed/success_after_create.yaml | 31 + .../desired/creating_before_delete.yaml | 36 + .../desired/inservice_before_delete.yaml | 36 + .../delete/observed/deleting_no_error.yaml | 39 ++ .../delete/observed/error_on_delete.yaml | 39 ++ .../desired/after_update_apicall_success.yaml | 36 + .../desired/failed_right_after_create.yaml | 32 + .../desired/missing_required_field.yaml | 6 + .../readone/desired/right_after_create.yaml | 32 + .../observed/creating_after_describe.yaml | 36 + .../readone/observed/error_on_describe.yaml | 35 + .../observed/failed_status_on_describe.yaml | 41 ++ .../inservice_no_failure_after_describe.yaml | 36 + .../observed/updating_on_describe.yaml | 36 + .../update/desired/latest_failed.yaml | 34 + .../desired/latest_inservice_pre_update.yaml | 29 + .../update/desired/latest_updating.yaml | 29 + .../update/desired/update_common.yaml | 29 + .../update/observed/error_on_update.yaml | 32 + .../update/observed/no_update_on_failed.yaml | 34 + .../observed/update_attempted_success.yaml | 29 + .../sdk_read_one_post_set_output.go.tpl | 6 + .../sdk_update_post_set_output.go.tpl | 3 + .../sdk_update_pre_build_request.go.tpl | 7 + test/e2e/__init__.py | 60 ++ test/e2e/common/config.py | 8 + .../endpoint_config_inference_component.yaml | 18 + test/e2e/resources/inference_component.yaml | 22 + .../xgboost_model_inference_component.yaml | 20 + ...th_model_location_inference_component.yaml | 20 + test/e2e/tests/test_inference_component.py | 341 +++++++++ 76 files changed, 4896 insertions(+), 22 deletions(-) create mode 100644 apis/v1alpha1/inference_component.go create mode 100644 config/crd/bases/sagemaker.services.k8s.aws_inferencecomponents.yaml create mode 100644 helm/crds/sagemaker.services.k8s.aws_inferencecomponents.yaml create mode 100644 pkg/resource/endpoint_config/testdata/v1alpha1/readone/desired/late_initialize.yaml create mode 100644 pkg/resource/endpoint_config/testdata/v1alpha1/readone/observed/right_after_create.yaml create mode 100644 pkg/resource/inference_component/custom_update_conditions.go create mode 100644 pkg/resource/inference_component/delta.go create mode 100644 pkg/resource/inference_component/descriptor.go create mode 100644 pkg/resource/inference_component/hooks.go create mode 100644 pkg/resource/inference_component/identifiers.go create mode 100644 pkg/resource/inference_component/manager.go create mode 100644 pkg/resource/inference_component/manager_factory.go create mode 100644 pkg/resource/inference_component/manager_test_suite_test.go create mode 100644 pkg/resource/inference_component/references.go create mode 100644 pkg/resource/inference_component/resource.go create mode 100644 pkg/resource/inference_component/sdk.go create mode 100644 pkg/resource/inference_component/tags.go create mode 100644 pkg/resource/inference_component/testdata/sdkapi/create/create_success.json create mode 100644 pkg/resource/inference_component/testdata/sdkapi/describe/creating_after_create.json create mode 100644 pkg/resource/inference_component/testdata/sdkapi/describe/deleting.json create mode 100644 pkg/resource/inference_component/testdata/sdkapi/describe/failed_after_create.json create mode 100644 pkg/resource/inference_component/testdata/sdkapi/describe/inservice_no_failure_reason.json create mode 100644 pkg/resource/inference_component/testdata/sdkapi/describe/updating.json create mode 100644 pkg/resource/inference_component/testdata/sdkapi/update/update_success.json create mode 100644 pkg/resource/inference_component/testdata/test_suite.yaml create mode 100644 pkg/resource/inference_component/testdata/v1alpha1/create/desired/invalid_before_create.yaml create mode 100644 pkg/resource/inference_component/testdata/v1alpha1/create/desired/success_before_create.yaml create mode 100644 pkg/resource/inference_component/testdata/v1alpha1/create/observed/invalid_create_attempted.yaml create mode 100644 pkg/resource/inference_component/testdata/v1alpha1/create/observed/success_after_create.yaml create mode 100644 pkg/resource/inference_component/testdata/v1alpha1/delete/desired/creating_before_delete.yaml create mode 100644 pkg/resource/inference_component/testdata/v1alpha1/delete/desired/inservice_before_delete.yaml create mode 100644 pkg/resource/inference_component/testdata/v1alpha1/delete/observed/deleting_no_error.yaml create mode 100644 pkg/resource/inference_component/testdata/v1alpha1/delete/observed/error_on_delete.yaml create mode 100644 pkg/resource/inference_component/testdata/v1alpha1/readone/desired/after_update_apicall_success.yaml create mode 100644 pkg/resource/inference_component/testdata/v1alpha1/readone/desired/failed_right_after_create.yaml create mode 100644 pkg/resource/inference_component/testdata/v1alpha1/readone/desired/missing_required_field.yaml create mode 100644 pkg/resource/inference_component/testdata/v1alpha1/readone/desired/right_after_create.yaml create mode 100644 pkg/resource/inference_component/testdata/v1alpha1/readone/observed/creating_after_describe.yaml create mode 100644 pkg/resource/inference_component/testdata/v1alpha1/readone/observed/error_on_describe.yaml create mode 100644 pkg/resource/inference_component/testdata/v1alpha1/readone/observed/failed_status_on_describe.yaml create mode 100644 pkg/resource/inference_component/testdata/v1alpha1/readone/observed/inservice_no_failure_after_describe.yaml create mode 100644 pkg/resource/inference_component/testdata/v1alpha1/readone/observed/updating_on_describe.yaml create mode 100644 pkg/resource/inference_component/testdata/v1alpha1/update/desired/latest_failed.yaml create mode 100644 pkg/resource/inference_component/testdata/v1alpha1/update/desired/latest_inservice_pre_update.yaml create mode 100644 pkg/resource/inference_component/testdata/v1alpha1/update/desired/latest_updating.yaml create mode 100644 pkg/resource/inference_component/testdata/v1alpha1/update/desired/update_common.yaml create mode 100644 pkg/resource/inference_component/testdata/v1alpha1/update/observed/error_on_update.yaml create mode 100644 pkg/resource/inference_component/testdata/v1alpha1/update/observed/no_update_on_failed.yaml create mode 100644 pkg/resource/inference_component/testdata/v1alpha1/update/observed/update_attempted_success.yaml create mode 100644 templates/inference_component/sdk_read_one_post_set_output.go.tpl create mode 100644 templates/inference_component/sdk_update_post_set_output.go.tpl create mode 100644 templates/inference_component/sdk_update_pre_build_request.go.tpl create mode 100644 test/e2e/resources/endpoint_config_inference_component.yaml create mode 100644 test/e2e/resources/inference_component.yaml create mode 100644 test/e2e/resources/xgboost_model_inference_component.yaml create mode 100644 test/e2e/resources/xgboost_model_with_model_location_inference_component.yaml create mode 100644 test/e2e/tests/test_inference_component.py diff --git a/apis/v1alpha1/ack-generate-metadata.yaml b/apis/v1alpha1/ack-generate-metadata.yaml index 1fc28856..c6857552 100755 --- a/apis/v1alpha1/ack-generate-metadata.yaml +++ b/apis/v1alpha1/ack-generate-metadata.yaml @@ -1,13 +1,13 @@ ack_generate_info: - build_date: "2024-03-06T21:33:47Z" + build_date: "2024-03-11T20:40:41Z" build_hash: a5ba3c851434263128a1464a2c41e528779eeefa go_version: go1.22.0 version: v0.32.1 -api_directory_checksum: 731faf4c5d6d6f5140b4e0786127df447f773217 +api_directory_checksum: 476c2a15949ae95e3cefcc07b7eb39de6cd7548b api_version: v1alpha1 aws_sdk_go_version: v1.50.15 generator_config_info: - file_checksum: 0d728ab3662c7e538aff6727f087b54c5969fdcf + file_checksum: 708e5a5444203d25538466e7e1914c3869d5a93e original_file_name: generator.yaml last_modification: reason: API generation diff --git a/apis/v1alpha1/enums.go b/apis/v1alpha1/enums.go index 1d6311ce..0146045f 100644 --- a/apis/v1alpha1/enums.go +++ b/apis/v1alpha1/enums.go @@ -1127,14 +1127,14 @@ const ( InferenceComponentSortKey_Status InferenceComponentSortKey = "Status" ) -type InferenceComponentStatus string +type InferenceComponentStatus_SDK string const ( - InferenceComponentStatus_InService InferenceComponentStatus = "InService" - InferenceComponentStatus_Creating InferenceComponentStatus = "Creating" - InferenceComponentStatus_Updating InferenceComponentStatus = "Updating" - InferenceComponentStatus_Failed InferenceComponentStatus = "Failed" - InferenceComponentStatus_Deleting InferenceComponentStatus = "Deleting" + InferenceComponentStatus_SDK_InService InferenceComponentStatus_SDK = "InService" + InferenceComponentStatus_SDK_Creating InferenceComponentStatus_SDK = "Creating" + InferenceComponentStatus_SDK_Updating InferenceComponentStatus_SDK = "Updating" + InferenceComponentStatus_SDK_Failed InferenceComponentStatus_SDK = "Failed" + InferenceComponentStatus_SDK_Deleting InferenceComponentStatus_SDK = "Deleting" ) type InferenceExecutionMode string diff --git a/apis/v1alpha1/generator.yaml b/apis/v1alpha1/generator.yaml index 5ba794e6..aa0a0674 100644 --- a/apis/v1alpha1/generator.yaml +++ b/apis/v1alpha1/generator.yaml @@ -54,12 +54,80 @@ resources: - InvalidParameterValue - MissingParameter fields: + EnableNetworkIsolation: + late_initialize: + min_backoff_seconds: 5 Tags: compare: is_ignored: true hooks: delta_pre_compare: code: customSetDefaults(a, b) + InferenceComponent: + reconcile: + requeue_on_success_seconds: 30 + update_conditions_custom_method_name: CustomUpdateConditions + exceptions: + errors: + 404: + code: ValidationException + message_prefix: Could not find inference component + terminal_codes: + - InvalidParameterCombination + - InvalidParameterValue + - MissingParameter + # Custom error + - InferenceComponentUpdateError + hooks: + sdk_read_one_post_set_output: + template_path: inference_component/sdk_read_one_post_set_output.go.tpl + sdk_update_pre_build_request: + template_path: inference_component/sdk_update_pre_build_request.go.tpl + sdk_update_post_set_output: + template_path: inference_component/sdk_update_post_set_output.go.tpl + sdk_delete_pre_build_request: + template_path: common/sdk_delete_pre_build_request.go.tpl + sdk_delete_post_request: + template_path: common/sdk_delete_post_request.go.tpl + fields: + InferenceComponentStatus: + is_read_only: true + print: + name: STATUS + from: + operation: DescribeInferenceComponent + path: InferenceComponentStatus + FailureReason: + is_read_only: true + print: + name: FAILURE-REASON + priority: 1 + from: + operation: DescribeInferenceComponent + path: FailureReason + CreationTime: + is_read_only: true + from: + operation: DescribeInferenceComponent + path: CreationTime + LastModifiedTime: + is_read_only: true + from: + operation: DescribeInferenceComponent + path: LastModifiedTime + EndpointName: + is_read_only: true + from: + operation: DescribeInferenceComponent + path: EndpointName + VariantName: + is_read_only: true + from: + operation: DescribeInferenceComponent + path: VariantName + Tags: + compare: + is_ignored: true Endpoint: reconcile: requeue_on_success_seconds: 30 @@ -1007,7 +1075,7 @@ ignore: - Hub - HumanTaskUi # - HyperParameterTuningJob - - InferenceComponent + # - InferenceComponent - InferenceRecommendationsJob - Image - ImageVersion diff --git a/apis/v1alpha1/inference_component.go b/apis/v1alpha1/inference_component.go new file mode 100644 index 00000000..5d568b67 --- /dev/null +++ b/apis/v1alpha1/inference_component.go @@ -0,0 +1,103 @@ +// Copyright Amazon.com Inc. or its affiliates. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"). You may +// not use this file except in compliance with the License. A copy of the +// License is located at +// +// http://aws.amazon.com/apache2.0/ +// +// or in the "license" file accompanying this file. This file is distributed +// on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either +// express or implied. See the License for the specific language governing +// permissions and limitations under the License. + +// Code generated by ack-generate. DO NOT EDIT. + +package v1alpha1 + +import ( + ackv1alpha1 "github.com/aws-controllers-k8s/runtime/apis/core/v1alpha1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" +) + +// InferenceComponentSpec defines the desired state of InferenceComponent. +type InferenceComponentSpec struct { + + // The name of an existing endpoint where you host the inference component. + // +kubebuilder:validation:Required + EndpointName *string `json:"endpointName,omitempty"` + // A unique name to assign to the inference component. + // +kubebuilder:validation:Required + InferenceComponentName *string `json:"inferenceComponentName"` + // Runtime settings for a model that is deployed with an inference component. + // +kubebuilder:validation:Required + RuntimeConfig *InferenceComponentRuntimeConfig `json:"runtimeConfig"` + // Details about the resources to deploy with this inference component, including + // the model, container, and compute resources. + // +kubebuilder:validation:Required + Specification *InferenceComponentSpecification `json:"specification"` + // A list of key-value pairs associated with the model. For more information, + // see Tagging Amazon Web Services resources (https://docs.aws.amazon.com/general/latest/gr/aws_tagging.html) + // in the Amazon Web Services General Reference. + Tags []*Tag `json:"tags,omitempty"` + // The name of an existing production variant where you host the inference component. + // +kubebuilder:validation:Required + VariantName *string `json:"variantName,omitempty"` +} + +// InferenceComponentStatus defines the observed state of InferenceComponent +type InferenceComponentStatus struct { + // All CRs managed by ACK have a common `Status.ACKResourceMetadata` member + // that is used to contain resource sync state, account ownership, + // constructed ARN for the resource + // +kubebuilder:validation:Optional + ACKResourceMetadata *ackv1alpha1.ResourceMetadata `json:"ackResourceMetadata"` + // All CRS managed by ACK have a common `Status.Conditions` member that + // contains a collection of `ackv1alpha1.Condition` objects that describe + // the various terminal states of the CR and its backend AWS service API + // resource + // +kubebuilder:validation:Optional + Conditions []*ackv1alpha1.Condition `json:"conditions"` + // The time when the inference component was created. + // +kubebuilder:validation:Optional + CreationTime *metav1.Time `json:"creationTime,omitempty"` + // The name of the endpoint that hosts the inference component. + // +kubebuilder:validation:Optional + EndpointName *string `json:"endpointName,omitempty"` + // If the inference component status is Failed, the reason for the failure. + // +kubebuilder:validation:Optional + FailureReason *string `json:"failureReason,omitempty"` + // The status of the inference component. + // +kubebuilder:validation:Optional + InferenceComponentStatus *string `json:"inferenceComponentStatus,omitempty"` + // The time when the inference component was last updated. + // +kubebuilder:validation:Optional + LastModifiedTime *metav1.Time `json:"lastModifiedTime,omitempty"` + // The name of the production variant that hosts the inference component. + // +kubebuilder:validation:Optional + VariantName *string `json:"variantName,omitempty"` +} + +// InferenceComponent is the Schema for the InferenceComponents API +// +kubebuilder:object:root=true +// +kubebuilder:subresource:status +// +kubebuilder:printcolumn:name="FAILURE-REASON",type=string,priority=1,JSONPath=`.status.failureReason` +// +kubebuilder:printcolumn:name="STATUS",type=string,priority=0,JSONPath=`.status.inferenceComponentStatus` +type InferenceComponent struct { + metav1.TypeMeta `json:",inline"` + metav1.ObjectMeta `json:"metadata,omitempty"` + Spec InferenceComponentSpec `json:"spec,omitempty"` + Status InferenceComponentStatus `json:"status,omitempty"` +} + +// InferenceComponentList contains a list of InferenceComponent +// +kubebuilder:object:root=true +type InferenceComponentList struct { + metav1.TypeMeta `json:",inline"` + metav1.ListMeta `json:"metadata,omitempty"` + Items []InferenceComponent `json:"items"` +} + +func init() { + SchemeBuilder.Register(&InferenceComponent{}, &InferenceComponentList{}) +} diff --git a/apis/v1alpha1/types.go b/apis/v1alpha1/types.go index acb9dbea..56eccb42 100644 --- a/apis/v1alpha1/types.go +++ b/apis/v1alpha1/types.go @@ -1785,6 +1785,16 @@ type ImageVersion struct { Version *int64 `json:"version,omitempty"` } +// Defines the compute resources to allocate to run a model that you assign +// to an inference component. These resources include CPU cores, accelerators, +// and memory. +type InferenceComponentComputeResourceRequirements struct { + MaxMemoryRequiredInMb *int64 `json:"maxMemoryRequiredInMb,omitempty"` + MinMemoryRequiredInMb *int64 `json:"minMemoryRequiredInMb,omitempty"` + NumberOfAcceleratorDevicesRequired *float64 `json:"numberOfAcceleratorDevicesRequired,omitempty"` + NumberOfCPUCoresRequired *float64 `json:"numberOfCPUCoresRequired,omitempty"` +} + // Defines a container that provides the runtime environment for a model that // you deploy with an inference component. type InferenceComponentContainerSpecification struct { @@ -1809,15 +1819,44 @@ type InferenceComponentContainerSpecificationSummary struct { Environment map[string]*string `json:"environment,omitempty"` } +// Runtime settings for a model that is deployed with an inference component. +type InferenceComponentRuntimeConfig struct { + CopyCount *int64 `json:"copyCount,omitempty"` +} + +// Details about the runtime settings for the model that is deployed with the +// inference component. +type InferenceComponentRuntimeConfigSummary struct { + CurrentCopyCount *int64 `json:"currentCopyCount,omitempty"` + DesiredCopyCount *int64 `json:"desiredCopyCount,omitempty"` +} + // Details about the resources to deploy with this inference component, including // the model, container, and compute resources. type InferenceComponentSpecification struct { - ModelName *string `json:"modelName,omitempty"` + // Defines the compute resources to allocate to run a model that you assign + // to an inference component. These resources include CPU cores, accelerators, + // and memory. + ComputeResourceRequirements *InferenceComponentComputeResourceRequirements `json:"computeResourceRequirements,omitempty"` + // Defines a container that provides the runtime environment for a model that + // you deploy with an inference component. + Container *InferenceComponentContainerSpecification `json:"container,omitempty"` + ModelName *string `json:"modelName,omitempty"` + // Settings that take effect while the model container starts up. + StartupParameters *InferenceComponentStartupParameters `json:"startupParameters,omitempty"` } // Details about the resources that are deployed with this inference component. type InferenceComponentSpecificationSummary struct { - ModelName *string `json:"modelName,omitempty"` + // Defines the compute resources to allocate to run a model that you assign + // to an inference component. These resources include CPU cores, accelerators, + // and memory. + ComputeResourceRequirements *InferenceComponentComputeResourceRequirements `json:"computeResourceRequirements,omitempty"` + // Details about the resources that are deployed with this inference component. + Container *InferenceComponentContainerSpecificationSummary `json:"container,omitempty"` + ModelName *string `json:"modelName,omitempty"` + // Settings that take effect while the model container starts up. + StartupParameters *InferenceComponentStartupParameters `json:"startupParameters,omitempty"` } // Settings that take effect while the model container starts up. @@ -1828,11 +1867,14 @@ type InferenceComponentStartupParameters struct { // A summary of the properties of an inference component. type InferenceComponentSummary struct { - CreationTime *metav1.Time `json:"creationTime,omitempty"` - EndpointARN *string `json:"endpointARN,omitempty"` - EndpointName *string `json:"endpointName,omitempty"` - LastModifiedTime *metav1.Time `json:"lastModifiedTime,omitempty"` - VariantName *string `json:"variantName,omitempty"` + CreationTime *metav1.Time `json:"creationTime,omitempty"` + EndpointARN *string `json:"endpointARN,omitempty"` + EndpointName *string `json:"endpointName,omitempty"` + InferenceComponentARN *string `json:"inferenceComponentARN,omitempty"` + InferenceComponentName *string `json:"inferenceComponentName,omitempty"` + InferenceComponentStatus *string `json:"inferenceComponentStatus,omitempty"` + LastModifiedTime *metav1.Time `json:"lastModifiedTime,omitempty"` + VariantName *string `json:"variantName,omitempty"` } // Specifies details about how containers in a multi-container endpoint are diff --git a/apis/v1alpha1/zz_generated.deepcopy.go b/apis/v1alpha1/zz_generated.deepcopy.go index 5b2e7c60..39e36b3a 100644 --- a/apis/v1alpha1/zz_generated.deepcopy.go +++ b/apis/v1alpha1/zz_generated.deepcopy.go @@ -6651,6 +6651,68 @@ func (in *ImageVersion) DeepCopy() *ImageVersion { return out } +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *InferenceComponent) DeepCopyInto(out *InferenceComponent) { + *out = *in + out.TypeMeta = in.TypeMeta + in.ObjectMeta.DeepCopyInto(&out.ObjectMeta) + in.Spec.DeepCopyInto(&out.Spec) + in.Status.DeepCopyInto(&out.Status) +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new InferenceComponent. +func (in *InferenceComponent) DeepCopy() *InferenceComponent { + if in == nil { + return nil + } + out := new(InferenceComponent) + in.DeepCopyInto(out) + return out +} + +// DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object. +func (in *InferenceComponent) DeepCopyObject() runtime.Object { + if c := in.DeepCopy(); c != nil { + return c + } + return nil +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *InferenceComponentComputeResourceRequirements) DeepCopyInto(out *InferenceComponentComputeResourceRequirements) { + *out = *in + if in.MaxMemoryRequiredInMb != nil { + in, out := &in.MaxMemoryRequiredInMb, &out.MaxMemoryRequiredInMb + *out = new(int64) + **out = **in + } + if in.MinMemoryRequiredInMb != nil { + in, out := &in.MinMemoryRequiredInMb, &out.MinMemoryRequiredInMb + *out = new(int64) + **out = **in + } + if in.NumberOfAcceleratorDevicesRequired != nil { + in, out := &in.NumberOfAcceleratorDevicesRequired, &out.NumberOfAcceleratorDevicesRequired + *out = new(float64) + **out = **in + } + if in.NumberOfCPUCoresRequired != nil { + in, out := &in.NumberOfCPUCoresRequired, &out.NumberOfCPUCoresRequired + *out = new(float64) + **out = **in + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new InferenceComponentComputeResourceRequirements. +func (in *InferenceComponentComputeResourceRequirements) DeepCopy() *InferenceComponentComputeResourceRequirements { + if in == nil { + return nil + } + out := new(InferenceComponentComputeResourceRequirements) + in.DeepCopyInto(out) + return out +} + // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *InferenceComponentContainerSpecification) DeepCopyInto(out *InferenceComponentContainerSpecification) { *out = *in @@ -6733,14 +6795,157 @@ func (in *InferenceComponentContainerSpecificationSummary) DeepCopy() *Inference return out } +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *InferenceComponentList) DeepCopyInto(out *InferenceComponentList) { + *out = *in + out.TypeMeta = in.TypeMeta + in.ListMeta.DeepCopyInto(&out.ListMeta) + if in.Items != nil { + in, out := &in.Items, &out.Items + *out = make([]InferenceComponent, len(*in)) + for i := range *in { + (*in)[i].DeepCopyInto(&(*out)[i]) + } + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new InferenceComponentList. +func (in *InferenceComponentList) DeepCopy() *InferenceComponentList { + if in == nil { + return nil + } + out := new(InferenceComponentList) + in.DeepCopyInto(out) + return out +} + +// DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object. +func (in *InferenceComponentList) DeepCopyObject() runtime.Object { + if c := in.DeepCopy(); c != nil { + return c + } + return nil +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *InferenceComponentRuntimeConfig) DeepCopyInto(out *InferenceComponentRuntimeConfig) { + *out = *in + if in.CopyCount != nil { + in, out := &in.CopyCount, &out.CopyCount + *out = new(int64) + **out = **in + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new InferenceComponentRuntimeConfig. +func (in *InferenceComponentRuntimeConfig) DeepCopy() *InferenceComponentRuntimeConfig { + if in == nil { + return nil + } + out := new(InferenceComponentRuntimeConfig) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *InferenceComponentRuntimeConfigSummary) DeepCopyInto(out *InferenceComponentRuntimeConfigSummary) { + *out = *in + if in.CurrentCopyCount != nil { + in, out := &in.CurrentCopyCount, &out.CurrentCopyCount + *out = new(int64) + **out = **in + } + if in.DesiredCopyCount != nil { + in, out := &in.DesiredCopyCount, &out.DesiredCopyCount + *out = new(int64) + **out = **in + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new InferenceComponentRuntimeConfigSummary. +func (in *InferenceComponentRuntimeConfigSummary) DeepCopy() *InferenceComponentRuntimeConfigSummary { + if in == nil { + return nil + } + out := new(InferenceComponentRuntimeConfigSummary) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *InferenceComponentSpec) DeepCopyInto(out *InferenceComponentSpec) { + *out = *in + if in.EndpointName != nil { + in, out := &in.EndpointName, &out.EndpointName + *out = new(string) + **out = **in + } + if in.InferenceComponentName != nil { + in, out := &in.InferenceComponentName, &out.InferenceComponentName + *out = new(string) + **out = **in + } + if in.RuntimeConfig != nil { + in, out := &in.RuntimeConfig, &out.RuntimeConfig + *out = new(InferenceComponentRuntimeConfig) + (*in).DeepCopyInto(*out) + } + if in.Specification != nil { + in, out := &in.Specification, &out.Specification + *out = new(InferenceComponentSpecification) + (*in).DeepCopyInto(*out) + } + if in.Tags != nil { + in, out := &in.Tags, &out.Tags + *out = make([]*Tag, len(*in)) + for i := range *in { + if (*in)[i] != nil { + in, out := &(*in)[i], &(*out)[i] + *out = new(Tag) + (*in).DeepCopyInto(*out) + } + } + } + if in.VariantName != nil { + in, out := &in.VariantName, &out.VariantName + *out = new(string) + **out = **in + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new InferenceComponentSpec. +func (in *InferenceComponentSpec) DeepCopy() *InferenceComponentSpec { + if in == nil { + return nil + } + out := new(InferenceComponentSpec) + in.DeepCopyInto(out) + return out +} + // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *InferenceComponentSpecification) DeepCopyInto(out *InferenceComponentSpecification) { *out = *in + if in.ComputeResourceRequirements != nil { + in, out := &in.ComputeResourceRequirements, &out.ComputeResourceRequirements + *out = new(InferenceComponentComputeResourceRequirements) + (*in).DeepCopyInto(*out) + } + if in.Container != nil { + in, out := &in.Container, &out.Container + *out = new(InferenceComponentContainerSpecification) + (*in).DeepCopyInto(*out) + } if in.ModelName != nil { in, out := &in.ModelName, &out.ModelName *out = new(string) **out = **in } + if in.StartupParameters != nil { + in, out := &in.StartupParameters, &out.StartupParameters + *out = new(InferenceComponentStartupParameters) + (*in).DeepCopyInto(*out) + } } // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new InferenceComponentSpecification. @@ -6756,11 +6961,26 @@ func (in *InferenceComponentSpecification) DeepCopy() *InferenceComponentSpecifi // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *InferenceComponentSpecificationSummary) DeepCopyInto(out *InferenceComponentSpecificationSummary) { *out = *in + if in.ComputeResourceRequirements != nil { + in, out := &in.ComputeResourceRequirements, &out.ComputeResourceRequirements + *out = new(InferenceComponentComputeResourceRequirements) + (*in).DeepCopyInto(*out) + } + if in.Container != nil { + in, out := &in.Container, &out.Container + *out = new(InferenceComponentContainerSpecificationSummary) + (*in).DeepCopyInto(*out) + } if in.ModelName != nil { in, out := &in.ModelName, &out.ModelName *out = new(string) **out = **in } + if in.StartupParameters != nil { + in, out := &in.StartupParameters, &out.StartupParameters + *out = new(InferenceComponentStartupParameters) + (*in).DeepCopyInto(*out) + } } // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new InferenceComponentSpecificationSummary. @@ -6798,6 +7018,65 @@ func (in *InferenceComponentStartupParameters) DeepCopy() *InferenceComponentSta return out } +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *InferenceComponentStatus) DeepCopyInto(out *InferenceComponentStatus) { + *out = *in + if in.ACKResourceMetadata != nil { + in, out := &in.ACKResourceMetadata, &out.ACKResourceMetadata + *out = new(corev1alpha1.ResourceMetadata) + (*in).DeepCopyInto(*out) + } + if in.Conditions != nil { + in, out := &in.Conditions, &out.Conditions + *out = make([]*corev1alpha1.Condition, len(*in)) + for i := range *in { + if (*in)[i] != nil { + in, out := &(*in)[i], &(*out)[i] + *out = new(corev1alpha1.Condition) + (*in).DeepCopyInto(*out) + } + } + } + if in.CreationTime != nil { + in, out := &in.CreationTime, &out.CreationTime + *out = (*in).DeepCopy() + } + if in.EndpointName != nil { + in, out := &in.EndpointName, &out.EndpointName + *out = new(string) + **out = **in + } + if in.FailureReason != nil { + in, out := &in.FailureReason, &out.FailureReason + *out = new(string) + **out = **in + } + if in.InferenceComponentStatus != nil { + in, out := &in.InferenceComponentStatus, &out.InferenceComponentStatus + *out = new(string) + **out = **in + } + if in.LastModifiedTime != nil { + in, out := &in.LastModifiedTime, &out.LastModifiedTime + *out = (*in).DeepCopy() + } + if in.VariantName != nil { + in, out := &in.VariantName, &out.VariantName + *out = new(string) + **out = **in + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new InferenceComponentStatus. +func (in *InferenceComponentStatus) DeepCopy() *InferenceComponentStatus { + if in == nil { + return nil + } + out := new(InferenceComponentStatus) + in.DeepCopyInto(out) + return out +} + // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *InferenceComponentSummary) DeepCopyInto(out *InferenceComponentSummary) { *out = *in @@ -6815,6 +7094,21 @@ func (in *InferenceComponentSummary) DeepCopyInto(out *InferenceComponentSummary *out = new(string) **out = **in } + if in.InferenceComponentARN != nil { + in, out := &in.InferenceComponentARN, &out.InferenceComponentARN + *out = new(string) + **out = **in + } + if in.InferenceComponentName != nil { + in, out := &in.InferenceComponentName, &out.InferenceComponentName + *out = new(string) + **out = **in + } + if in.InferenceComponentStatus != nil { + in, out := &in.InferenceComponentStatus, &out.InferenceComponentStatus + *out = new(string) + **out = **in + } if in.LastModifiedTime != nil { in, out := &in.LastModifiedTime, &out.LastModifiedTime *out = (*in).DeepCopy() diff --git a/cmd/controller/main.go b/cmd/controller/main.go index c73e7f02..5b37d3c6 100644 --- a/cmd/controller/main.go +++ b/cmd/controller/main.go @@ -46,6 +46,7 @@ import ( _ "github.com/aws-controllers-k8s/sagemaker-controller/pkg/resource/endpoint_config" _ "github.com/aws-controllers-k8s/sagemaker-controller/pkg/resource/feature_group" _ "github.com/aws-controllers-k8s/sagemaker-controller/pkg/resource/hyper_parameter_tuning_job" + _ "github.com/aws-controllers-k8s/sagemaker-controller/pkg/resource/inference_component" _ "github.com/aws-controllers-k8s/sagemaker-controller/pkg/resource/model" _ "github.com/aws-controllers-k8s/sagemaker-controller/pkg/resource/model_bias_job_definition" _ "github.com/aws-controllers-k8s/sagemaker-controller/pkg/resource/model_explainability_job_definition" diff --git a/config/crd/bases/sagemaker.services.k8s.aws_inferencecomponents.yaml b/config/crd/bases/sagemaker.services.k8s.aws_inferencecomponents.yaml new file mode 100644 index 00000000..ee016da6 --- /dev/null +++ b/config/crd/bases/sagemaker.services.k8s.aws_inferencecomponents.yaml @@ -0,0 +1,249 @@ +--- +apiVersion: apiextensions.k8s.io/v1 +kind: CustomResourceDefinition +metadata: + annotations: + controller-gen.kubebuilder.io/version: v0.14.0 + name: inferencecomponents.sagemaker.services.k8s.aws +spec: + group: sagemaker.services.k8s.aws + names: + kind: InferenceComponent + listKind: InferenceComponentList + plural: inferencecomponents + singular: inferencecomponent + scope: Namespaced + versions: + - additionalPrinterColumns: + - jsonPath: .status.failureReason + name: FAILURE-REASON + priority: 1 + type: string + - jsonPath: .status.inferenceComponentStatus + name: STATUS + type: string + name: v1alpha1 + schema: + openAPIV3Schema: + description: InferenceComponent is the Schema for the InferenceComponents + API + properties: + apiVersion: + description: |- + APIVersion defines the versioned schema of this representation of an object. + Servers should convert recognized schemas to the latest internal value, and + may reject unrecognized values. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources + type: string + kind: + description: |- + Kind is a string value representing the REST resource this object represents. + Servers may infer this from the endpoint the client submits requests to. + Cannot be updated. + In CamelCase. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds + type: string + metadata: + type: object + spec: + description: InferenceComponentSpec defines the desired state of InferenceComponent. + properties: + endpointName: + description: The name of an existing endpoint where you host the inference + component. + type: string + inferenceComponentName: + description: A unique name to assign to the inference component. + type: string + runtimeConfig: + description: Runtime settings for a model that is deployed with an + inference component. + properties: + copyCount: + format: int64 + type: integer + type: object + specification: + description: |- + Details about the resources to deploy with this inference component, including + the model, container, and compute resources. + properties: + computeResourceRequirements: + description: |- + Defines the compute resources to allocate to run a model that you assign + to an inference component. These resources include CPU cores, accelerators, + and memory. + properties: + maxMemoryRequiredInMb: + format: int64 + type: integer + minMemoryRequiredInMb: + format: int64 + type: integer + numberOfAcceleratorDevicesRequired: + type: number + numberOfCPUCoresRequired: + type: number + type: object + container: + description: |- + Defines a container that provides the runtime environment for a model that + you deploy with an inference component. + properties: + artifactURL: + type: string + environment: + additionalProperties: + type: string + type: object + image: + type: string + type: object + modelName: + type: string + startupParameters: + description: Settings that take effect while the model container + starts up. + properties: + containerStartupHealthCheckTimeoutInSeconds: + format: int64 + type: integer + modelDataDownloadTimeoutInSeconds: + format: int64 + type: integer + type: object + type: object + tags: + description: |- + A list of key-value pairs associated with the model. For more information, + see Tagging Amazon Web Services resources (https://docs.aws.amazon.com/general/latest/gr/aws_tagging.html) + in the Amazon Web Services General Reference. + items: + description: |- + A tag object that consists of a key and an optional value, used to manage + metadata for SageMaker Amazon Web Services resources. + + + You can add tags to notebook instances, training jobs, hyperparameter tuning + jobs, batch transform jobs, models, labeling jobs, work teams, endpoint configurations, + and endpoints. For more information on adding tags to SageMaker resources, + see AddTags (https://docs.aws.amazon.com/sagemaker/latest/APIReference/API_AddTags.html). + + + For more information on adding metadata to your Amazon Web Services resources + with tagging, see Tagging Amazon Web Services resources (https://docs.aws.amazon.com/general/latest/gr/aws_tagging.html). + For advice on best practices for managing Amazon Web Services resources with + tagging, see Tagging Best Practices: Implement an Effective Amazon Web Services + Resource Tagging Strategy (https://d1.awsstatic.com/whitepapers/aws-tagging-best-practices.pdf). + properties: + key: + type: string + value: + type: string + type: object + type: array + variantName: + description: The name of an existing production variant where you + host the inference component. + type: string + required: + - inferenceComponentName + - runtimeConfig + - specification + type: object + status: + description: InferenceComponentStatus defines the observed state of InferenceComponent + properties: + ackResourceMetadata: + description: |- + All CRs managed by ACK have a common `Status.ACKResourceMetadata` member + that is used to contain resource sync state, account ownership, + constructed ARN for the resource + properties: + arn: + description: |- + ARN is the Amazon Resource Name for the resource. This is a + globally-unique identifier and is set only by the ACK service controller + once the controller has orchestrated the creation of the resource OR + when it has verified that an "adopted" resource (a resource where the + ARN annotation was set by the Kubernetes user on the CR) exists and + matches the supplied CR's Spec field values. + TODO(vijat@): Find a better strategy for resources that do not have ARN in CreateOutputResponse + https://github.com/aws/aws-controllers-k8s/issues/270 + type: string + ownerAccountID: + description: |- + OwnerAccountID is the AWS Account ID of the account that owns the + backend AWS service API resource. + type: string + region: + description: Region is the AWS region in which the resource exists + or will exist. + type: string + required: + - ownerAccountID + - region + type: object + conditions: + description: |- + All CRS managed by ACK have a common `Status.Conditions` member that + contains a collection of `ackv1alpha1.Condition` objects that describe + the various terminal states of the CR and its backend AWS service API + resource + items: + description: |- + Condition is the common struct used by all CRDs managed by ACK service + controllers to indicate terminal states of the CR and its backend AWS + service API resource + properties: + lastTransitionTime: + description: Last time the condition transitioned from one status + to another. + format: date-time + type: string + message: + description: A human readable message indicating details about + the transition. + type: string + reason: + description: The reason for the condition's last transition. + type: string + status: + description: Status of the condition, one of True, False, Unknown. + type: string + type: + description: Type is the type of the Condition + type: string + required: + - status + - type + type: object + type: array + creationTime: + description: The time when the inference component was created. + format: date-time + type: string + endpointName: + description: The name of the endpoint that hosts the inference component. + type: string + failureReason: + description: If the inference component status is Failed, the reason + for the failure. + type: string + inferenceComponentStatus: + description: The status of the inference component. + type: string + lastModifiedTime: + description: The time when the inference component was last updated. + format: date-time + type: string + variantName: + description: The name of the production variant that hosts the inference + component. + type: string + type: object + type: object + served: true + storage: true + subresources: + status: {} diff --git a/config/crd/kustomization.yaml b/config/crd/kustomization.yaml index 113f72e7..42faa663 100644 --- a/config/crd/kustomization.yaml +++ b/config/crd/kustomization.yaml @@ -9,6 +9,7 @@ resources: - bases/sagemaker.services.k8s.aws_endpointconfigs.yaml - bases/sagemaker.services.k8s.aws_featuregroups.yaml - bases/sagemaker.services.k8s.aws_hyperparametertuningjobs.yaml + - bases/sagemaker.services.k8s.aws_inferencecomponents.yaml - bases/sagemaker.services.k8s.aws_models.yaml - bases/sagemaker.services.k8s.aws_modelbiasjobdefinitions.yaml - bases/sagemaker.services.k8s.aws_modelexplainabilityjobdefinitions.yaml diff --git a/config/rbac/cluster-role-controller.yaml b/config/rbac/cluster-role-controller.yaml index 243c6966..77015544 100644 --- a/config/rbac/cluster-role-controller.yaml +++ b/config/rbac/cluster-role-controller.yaml @@ -170,6 +170,26 @@ rules: - get - patch - update +- apiGroups: + - sagemaker.services.k8s.aws + resources: + - inferencecomponents + verbs: + - create + - delete + - get + - list + - patch + - update + - watch +- apiGroups: + - sagemaker.services.k8s.aws + resources: + - inferencecomponents/status + verbs: + - get + - patch + - update - apiGroups: - sagemaker.services.k8s.aws resources: diff --git a/config/rbac/role-reader.yaml b/config/rbac/role-reader.yaml index 2ffb60a9..3213508a 100644 --- a/config/rbac/role-reader.yaml +++ b/config/rbac/role-reader.yaml @@ -16,6 +16,7 @@ rules: - endpointconfigs - featuregroups - hyperparametertuningjobs + - inferencecomponents - models - modelbiasjobdefinitions - modelexplainabilityjobdefinitions diff --git a/config/rbac/role-writer.yaml b/config/rbac/role-writer.yaml index 747d6b0d..7a0c5044 100644 --- a/config/rbac/role-writer.yaml +++ b/config/rbac/role-writer.yaml @@ -16,6 +16,7 @@ rules: - endpointconfigs - featuregroups - hyperparametertuningjobs + - inferencecomponents - models - modelbiasjobdefinitions - modelexplainabilityjobdefinitions @@ -49,6 +50,7 @@ rules: - endpointconfigs - featuregroups - hyperparametertuningjobs + - inferencecomponents - models - modelbiasjobdefinitions - modelexplainabilityjobdefinitions diff --git a/generator.yaml b/generator.yaml index 5ba794e6..aa0a0674 100644 --- a/generator.yaml +++ b/generator.yaml @@ -54,12 +54,80 @@ resources: - InvalidParameterValue - MissingParameter fields: + EnableNetworkIsolation: + late_initialize: + min_backoff_seconds: 5 Tags: compare: is_ignored: true hooks: delta_pre_compare: code: customSetDefaults(a, b) + InferenceComponent: + reconcile: + requeue_on_success_seconds: 30 + update_conditions_custom_method_name: CustomUpdateConditions + exceptions: + errors: + 404: + code: ValidationException + message_prefix: Could not find inference component + terminal_codes: + - InvalidParameterCombination + - InvalidParameterValue + - MissingParameter + # Custom error + - InferenceComponentUpdateError + hooks: + sdk_read_one_post_set_output: + template_path: inference_component/sdk_read_one_post_set_output.go.tpl + sdk_update_pre_build_request: + template_path: inference_component/sdk_update_pre_build_request.go.tpl + sdk_update_post_set_output: + template_path: inference_component/sdk_update_post_set_output.go.tpl + sdk_delete_pre_build_request: + template_path: common/sdk_delete_pre_build_request.go.tpl + sdk_delete_post_request: + template_path: common/sdk_delete_post_request.go.tpl + fields: + InferenceComponentStatus: + is_read_only: true + print: + name: STATUS + from: + operation: DescribeInferenceComponent + path: InferenceComponentStatus + FailureReason: + is_read_only: true + print: + name: FAILURE-REASON + priority: 1 + from: + operation: DescribeInferenceComponent + path: FailureReason + CreationTime: + is_read_only: true + from: + operation: DescribeInferenceComponent + path: CreationTime + LastModifiedTime: + is_read_only: true + from: + operation: DescribeInferenceComponent + path: LastModifiedTime + EndpointName: + is_read_only: true + from: + operation: DescribeInferenceComponent + path: EndpointName + VariantName: + is_read_only: true + from: + operation: DescribeInferenceComponent + path: VariantName + Tags: + compare: + is_ignored: true Endpoint: reconcile: requeue_on_success_seconds: 30 @@ -1007,7 +1075,7 @@ ignore: - Hub - HumanTaskUi # - HyperParameterTuningJob - - InferenceComponent + # - InferenceComponent - InferenceRecommendationsJob - Image - ImageVersion diff --git a/helm/crds/sagemaker.services.k8s.aws_inferencecomponents.yaml b/helm/crds/sagemaker.services.k8s.aws_inferencecomponents.yaml new file mode 100644 index 00000000..ee016da6 --- /dev/null +++ b/helm/crds/sagemaker.services.k8s.aws_inferencecomponents.yaml @@ -0,0 +1,249 @@ +--- +apiVersion: apiextensions.k8s.io/v1 +kind: CustomResourceDefinition +metadata: + annotations: + controller-gen.kubebuilder.io/version: v0.14.0 + name: inferencecomponents.sagemaker.services.k8s.aws +spec: + group: sagemaker.services.k8s.aws + names: + kind: InferenceComponent + listKind: InferenceComponentList + plural: inferencecomponents + singular: inferencecomponent + scope: Namespaced + versions: + - additionalPrinterColumns: + - jsonPath: .status.failureReason + name: FAILURE-REASON + priority: 1 + type: string + - jsonPath: .status.inferenceComponentStatus + name: STATUS + type: string + name: v1alpha1 + schema: + openAPIV3Schema: + description: InferenceComponent is the Schema for the InferenceComponents + API + properties: + apiVersion: + description: |- + APIVersion defines the versioned schema of this representation of an object. + Servers should convert recognized schemas to the latest internal value, and + may reject unrecognized values. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources + type: string + kind: + description: |- + Kind is a string value representing the REST resource this object represents. + Servers may infer this from the endpoint the client submits requests to. + Cannot be updated. + In CamelCase. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds + type: string + metadata: + type: object + spec: + description: InferenceComponentSpec defines the desired state of InferenceComponent. + properties: + endpointName: + description: The name of an existing endpoint where you host the inference + component. + type: string + inferenceComponentName: + description: A unique name to assign to the inference component. + type: string + runtimeConfig: + description: Runtime settings for a model that is deployed with an + inference component. + properties: + copyCount: + format: int64 + type: integer + type: object + specification: + description: |- + Details about the resources to deploy with this inference component, including + the model, container, and compute resources. + properties: + computeResourceRequirements: + description: |- + Defines the compute resources to allocate to run a model that you assign + to an inference component. These resources include CPU cores, accelerators, + and memory. + properties: + maxMemoryRequiredInMb: + format: int64 + type: integer + minMemoryRequiredInMb: + format: int64 + type: integer + numberOfAcceleratorDevicesRequired: + type: number + numberOfCPUCoresRequired: + type: number + type: object + container: + description: |- + Defines a container that provides the runtime environment for a model that + you deploy with an inference component. + properties: + artifactURL: + type: string + environment: + additionalProperties: + type: string + type: object + image: + type: string + type: object + modelName: + type: string + startupParameters: + description: Settings that take effect while the model container + starts up. + properties: + containerStartupHealthCheckTimeoutInSeconds: + format: int64 + type: integer + modelDataDownloadTimeoutInSeconds: + format: int64 + type: integer + type: object + type: object + tags: + description: |- + A list of key-value pairs associated with the model. For more information, + see Tagging Amazon Web Services resources (https://docs.aws.amazon.com/general/latest/gr/aws_tagging.html) + in the Amazon Web Services General Reference. + items: + description: |- + A tag object that consists of a key and an optional value, used to manage + metadata for SageMaker Amazon Web Services resources. + + + You can add tags to notebook instances, training jobs, hyperparameter tuning + jobs, batch transform jobs, models, labeling jobs, work teams, endpoint configurations, + and endpoints. For more information on adding tags to SageMaker resources, + see AddTags (https://docs.aws.amazon.com/sagemaker/latest/APIReference/API_AddTags.html). + + + For more information on adding metadata to your Amazon Web Services resources + with tagging, see Tagging Amazon Web Services resources (https://docs.aws.amazon.com/general/latest/gr/aws_tagging.html). + For advice on best practices for managing Amazon Web Services resources with + tagging, see Tagging Best Practices: Implement an Effective Amazon Web Services + Resource Tagging Strategy (https://d1.awsstatic.com/whitepapers/aws-tagging-best-practices.pdf). + properties: + key: + type: string + value: + type: string + type: object + type: array + variantName: + description: The name of an existing production variant where you + host the inference component. + type: string + required: + - inferenceComponentName + - runtimeConfig + - specification + type: object + status: + description: InferenceComponentStatus defines the observed state of InferenceComponent + properties: + ackResourceMetadata: + description: |- + All CRs managed by ACK have a common `Status.ACKResourceMetadata` member + that is used to contain resource sync state, account ownership, + constructed ARN for the resource + properties: + arn: + description: |- + ARN is the Amazon Resource Name for the resource. This is a + globally-unique identifier and is set only by the ACK service controller + once the controller has orchestrated the creation of the resource OR + when it has verified that an "adopted" resource (a resource where the + ARN annotation was set by the Kubernetes user on the CR) exists and + matches the supplied CR's Spec field values. + TODO(vijat@): Find a better strategy for resources that do not have ARN in CreateOutputResponse + https://github.com/aws/aws-controllers-k8s/issues/270 + type: string + ownerAccountID: + description: |- + OwnerAccountID is the AWS Account ID of the account that owns the + backend AWS service API resource. + type: string + region: + description: Region is the AWS region in which the resource exists + or will exist. + type: string + required: + - ownerAccountID + - region + type: object + conditions: + description: |- + All CRS managed by ACK have a common `Status.Conditions` member that + contains a collection of `ackv1alpha1.Condition` objects that describe + the various terminal states of the CR and its backend AWS service API + resource + items: + description: |- + Condition is the common struct used by all CRDs managed by ACK service + controllers to indicate terminal states of the CR and its backend AWS + service API resource + properties: + lastTransitionTime: + description: Last time the condition transitioned from one status + to another. + format: date-time + type: string + message: + description: A human readable message indicating details about + the transition. + type: string + reason: + description: The reason for the condition's last transition. + type: string + status: + description: Status of the condition, one of True, False, Unknown. + type: string + type: + description: Type is the type of the Condition + type: string + required: + - status + - type + type: object + type: array + creationTime: + description: The time when the inference component was created. + format: date-time + type: string + endpointName: + description: The name of the endpoint that hosts the inference component. + type: string + failureReason: + description: If the inference component status is Failed, the reason + for the failure. + type: string + inferenceComponentStatus: + description: The status of the inference component. + type: string + lastModifiedTime: + description: The time when the inference component was last updated. + format: date-time + type: string + variantName: + description: The name of the production variant that hosts the inference + component. + type: string + type: object + type: object + served: true + storage: true + subresources: + status: {} diff --git a/helm/templates/_helpers.tpl b/helm/templates/_helpers.tpl index ec750d62..3286a235 100644 --- a/helm/templates/_helpers.tpl +++ b/helm/templates/_helpers.tpl @@ -217,6 +217,26 @@ rules: - get - patch - update +- apiGroups: + - sagemaker.services.k8s.aws + resources: + - inferencecomponents + verbs: + - create + - delete + - get + - list + - patch + - update + - watch +- apiGroups: + - sagemaker.services.k8s.aws + resources: + - inferencecomponents/status + verbs: + - get + - patch + - update - apiGroups: - sagemaker.services.k8s.aws resources: diff --git a/helm/templates/role-reader.yaml b/helm/templates/role-reader.yaml index 990ac13a..b741731a 100644 --- a/helm/templates/role-reader.yaml +++ b/helm/templates/role-reader.yaml @@ -16,6 +16,7 @@ rules: - endpointconfigs - featuregroups - hyperparametertuningjobs + - inferencecomponents - models - modelbiasjobdefinitions - modelexplainabilityjobdefinitions diff --git a/helm/templates/role-writer.yaml b/helm/templates/role-writer.yaml index 5c0ed697..3218b2d5 100644 --- a/helm/templates/role-writer.yaml +++ b/helm/templates/role-writer.yaml @@ -16,6 +16,7 @@ rules: - endpointconfigs - featuregroups - hyperparametertuningjobs + - inferencecomponents - models - modelbiasjobdefinitions - modelexplainabilityjobdefinitions @@ -49,6 +50,7 @@ rules: - endpointconfigs - featuregroups - hyperparametertuningjobs + - inferencecomponents - models - modelbiasjobdefinitions - modelexplainabilityjobdefinitions diff --git a/pkg/resource/endpoint_config/manager.go b/pkg/resource/endpoint_config/manager.go index 5b1e86b4..91b05218 100644 --- a/pkg/resource/endpoint_config/manager.go +++ b/pkg/resource/endpoint_config/manager.go @@ -51,7 +51,7 @@ var ( // +kubebuilder:rbac:groups=sagemaker.services.k8s.aws,resources=endpointconfigs,verbs=get;list;watch;create;update;patch;delete // +kubebuilder:rbac:groups=sagemaker.services.k8s.aws,resources=endpointconfigs/status,verbs=get;update;patch -var lateInitializeFieldNames = []string{} +var lateInitializeFieldNames = []string{"EnableNetworkIsolation"} // resourceManager is responsible for providing a consistent way to perform // CRUD operations in a backend AWS service API for Book custom resources. @@ -248,6 +248,10 @@ func (rm *resourceManager) LateInitialize( func (rm *resourceManager) incompleteLateInitialization( res acktypes.AWSResource, ) bool { + ko := rm.concreteResource(res).ko.DeepCopy() + if ko.Spec.EnableNetworkIsolation == nil { + return true + } return false } @@ -257,7 +261,12 @@ func (rm *resourceManager) lateInitializeFromReadOneOutput( observed acktypes.AWSResource, latest acktypes.AWSResource, ) acktypes.AWSResource { - return latest + observedKo := rm.concreteResource(observed).ko.DeepCopy() + latestKo := rm.concreteResource(latest).ko.DeepCopy() + if observedKo.Spec.EnableNetworkIsolation != nil && latestKo.Spec.EnableNetworkIsolation == nil { + latestKo.Spec.EnableNetworkIsolation = observedKo.Spec.EnableNetworkIsolation + } + return &resource{latestKo} } // IsSynced returns true if the resource is synced. diff --git a/pkg/resource/endpoint_config/testdata/sdkapi/describe/success_describe.json b/pkg/resource/endpoint_config/testdata/sdkapi/describe/success_describe.json index 36dacf9b..23552d0f 100644 --- a/pkg/resource/endpoint_config/testdata/sdkapi/describe/success_describe.json +++ b/pkg/resource/endpoint_config/testdata/sdkapi/describe/success_describe.json @@ -25,6 +25,7 @@ "EndpointConfigArn": "arn:aws:sagemaker:us-west-2:123456789012:endpoint-config/single-variant-config", "EndpointConfigName": "single-variant-config", "KmsKeyId": null, + "EnableNetworkIsolation": false, "ProductionVariants": [ { "AcceleratorType": null, diff --git a/pkg/resource/endpoint_config/testdata/test_suite.yaml b/pkg/resource/endpoint_config/testdata/test_suite.yaml index f99da594..e5720e0b 100644 --- a/pkg/resource/endpoint_config/testdata/test_suite.yaml +++ b/pkg/resource/endpoint_config/testdata/test_suite.yaml @@ -69,7 +69,7 @@ output_fixture: "sdkapi/describe/success_describe.json" invoke: ReadOne expect: - latest_state: "v1alpha1/readone/desired/right_after_create.yaml" + latest_state: "v1alpha1/readone/observed/right_after_create.yaml" - name: "ReadOne=LateInitialize" description: "Testing late initialize after created, should expect no diff since there is nothing to late initialize" given: @@ -79,7 +79,7 @@ output_fixture: "sdkapi/describe/success_describe.json" invoke: LateInitialize expect: - latest_state: "v1alpha1/readone/desired/right_after_create.yaml" + latest_state: "v1alpha1/readone/desired/late_initialize.yaml" - name: "ReadOne=SuccessClearsConditions" description: "Testing a successful reconciliation clears conditions if terminal/recoverable condition were already set to true" given: diff --git a/pkg/resource/endpoint_config/testdata/v1alpha1/readone/desired/late_initialize.yaml b/pkg/resource/endpoint_config/testdata/v1alpha1/readone/desired/late_initialize.yaml new file mode 100644 index 00000000..7b6e687d --- /dev/null +++ b/pkg/resource/endpoint_config/testdata/v1alpha1/readone/desired/late_initialize.yaml @@ -0,0 +1,45 @@ +apiVersion: sagemaker.services.k8s.aws/v1alpha1 +kind: EndpointConfig +metadata: + creationTimestamp: null + name: single-variant-config +spec: + dataCaptureConfig: + captureContentTypeHeader: + csvContentTypes: + - text/csv + jsonContentTypes: + - application/json + captureOptions: + - captureMode: Input + - captureMode: Output + destinationS3URI: s3://source-data-bucket-592697580195-us-west-2/sagemaker/endpoint_config/datacapture + enableCapture: true + initialSamplingPercentage: 100 + endpointConfigName: single-variant-config + enableNetworkIsolation: false + productionVariants: + - initialInstanceCount: 1 + initialVariantWeight: 1 + instanceType: ml.c5.large + modelName: single-variant-config-model + variantName: AllTraffic + enableSSMAccess: true + tags: + - key: confidentiality + value: public + - key: environment + value: testing + - key: customer + value: test-user +status: + ackResourceMetadata: + arn: arn:aws:sagemaker:us-west-2:123456789012:endpoint-config/single-variant-config + ownerAccountID: "" + region: "" + conditions: + - lastTransitionTime: "0001-01-01T00:00:00Z" + message: Late initialization successful + reason: Late initialization successful + status: "True" + type: ACK.LateInitialized diff --git a/pkg/resource/endpoint_config/testdata/v1alpha1/readone/observed/right_after_create.yaml b/pkg/resource/endpoint_config/testdata/v1alpha1/readone/observed/right_after_create.yaml new file mode 100644 index 00000000..dd9560a9 --- /dev/null +++ b/pkg/resource/endpoint_config/testdata/v1alpha1/readone/observed/right_after_create.yaml @@ -0,0 +1,40 @@ +apiVersion: sagemaker.services.k8s.aws/v1alpha1 +kind: EndpointConfig +metadata: + creationTimestamp: null + name: single-variant-config +spec: + dataCaptureConfig: + captureContentTypeHeader: + csvContentTypes: + - text/csv + jsonContentTypes: + - application/json + captureOptions: + - captureMode: Input + - captureMode: Output + destinationS3URI: s3://source-data-bucket-592697580195-us-west-2/sagemaker/endpoint_config/datacapture + enableCapture: true + initialSamplingPercentage: 100 + endpointConfigName: single-variant-config + enableNetworkIsolation: false + productionVariants: + - initialInstanceCount: 1 + initialVariantWeight: 1 + instanceType: ml.c5.large + modelName: single-variant-config-model + variantName: AllTraffic + enableSSMAccess: true + tags: + - key: confidentiality + value: public + - key: environment + value: testing + - key: customer + value: test-user +status: + ackResourceMetadata: + arn: arn:aws:sagemaker:us-west-2:123456789012:endpoint-config/single-variant-config + ownerAccountID: "" + region: "" + conditions: [] diff --git a/pkg/resource/inference_component/custom_update_conditions.go b/pkg/resource/inference_component/custom_update_conditions.go new file mode 100644 index 00000000..bc5e5c84 --- /dev/null +++ b/pkg/resource/inference_component/custom_update_conditions.go @@ -0,0 +1,55 @@ +// Copyright Amazon.com Inc. or its affiliates. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"). You may +// not use this file except in compliance with the License. A copy of the +// License is located at +// +// http://aws.amazon.com/apache2.0/ +// +// or in the "license" file accompanying this file. This file is distributed +// on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either +// express or implied. See the License for the specific language governing +// permissions and limitations under the License. + +package inference_component + +import ( + ackcondition "github.com/aws-controllers-k8s/runtime/pkg/condition" + ackerr "github.com/aws-controllers-k8s/runtime/pkg/errors" + svcapitypes "github.com/aws-controllers-k8s/sagemaker-controller/apis/v1alpha1" + svccommon "github.com/aws-controllers-k8s/sagemaker-controller/pkg/common" + "github.com/aws/aws-sdk-go/aws" + svcsdk "github.com/aws/aws-sdk-go/service/sagemaker" + corev1 "k8s.io/api/core/v1" +) + +// CustomUpdateConditions sets conditions (terminal) on supplied inference component. +// it examines supplied resource to determine conditions. +// It returns true if conditions are updated. +func (rm *resourceManager) CustomUpdateConditions( + ko *svcapitypes.InferenceComponent, + r *resource, + err error, +) bool { + latestStatus := r.ko.Status.InferenceComponentStatus + terminalStatus := svcsdk.InferenceComponentStatusFailed + conditionManager := &resource{ko} + resourceName := GroupKind.Kind + // If the latestStatus == terminalStatus we will set + // the terminal condition and terminal message. + updated := svccommon.SetTerminalState(conditionManager, latestStatus, &resourceName, terminalStatus) + + // Continue setting ResourceSynced condition to false in case of failed update + // since desired and latest will be different until the issue is fixed. + // Customer can use this condition state and FailureReason to determine + // the correct course of action in case the update to InferenceComponent fails. + if err != nil { + awsErr, ok := ackerr.AWSError(err) + if ok && awsErr.Code() == "InferenceComponentUpdateError" { + ackcondition.SetSynced(conditionManager, corev1.ConditionFalse, aws.String(awsErr.Error()), nil) + return true + } + } + + return updated +} diff --git a/pkg/resource/inference_component/delta.go b/pkg/resource/inference_component/delta.go new file mode 100644 index 00000000..01041f51 --- /dev/null +++ b/pkg/resource/inference_component/delta.go @@ -0,0 +1,166 @@ +// Copyright Amazon.com Inc. or its affiliates. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"). You may +// not use this file except in compliance with the License. A copy of the +// License is located at +// +// http://aws.amazon.com/apache2.0/ +// +// or in the "license" file accompanying this file. This file is distributed +// on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either +// express or implied. See the License for the specific language governing +// permissions and limitations under the License. + +// Code generated by ack-generate. DO NOT EDIT. + +package inference_component + +import ( + "bytes" + "reflect" + + ackcompare "github.com/aws-controllers-k8s/runtime/pkg/compare" + acktags "github.com/aws-controllers-k8s/runtime/pkg/tags" +) + +// Hack to avoid import errors during build... +var ( + _ = &bytes.Buffer{} + _ = &reflect.Method{} + _ = &acktags.Tags{} +) + +// newResourceDelta returns a new `ackcompare.Delta` used to compare two +// resources +func newResourceDelta( + a *resource, + b *resource, +) *ackcompare.Delta { + delta := ackcompare.NewDelta() + if (a == nil && b != nil) || + (a != nil && b == nil) { + delta.Add("", a, b) + return delta + } + + if ackcompare.HasNilDifference(a.ko.Spec.EndpointName, b.ko.Spec.EndpointName) { + delta.Add("Spec.EndpointName", a.ko.Spec.EndpointName, b.ko.Spec.EndpointName) + } else if a.ko.Spec.EndpointName != nil && b.ko.Spec.EndpointName != nil { + if *a.ko.Spec.EndpointName != *b.ko.Spec.EndpointName { + delta.Add("Spec.EndpointName", a.ko.Spec.EndpointName, b.ko.Spec.EndpointName) + } + } + if ackcompare.HasNilDifference(a.ko.Spec.InferenceComponentName, b.ko.Spec.InferenceComponentName) { + delta.Add("Spec.InferenceComponentName", a.ko.Spec.InferenceComponentName, b.ko.Spec.InferenceComponentName) + } else if a.ko.Spec.InferenceComponentName != nil && b.ko.Spec.InferenceComponentName != nil { + if *a.ko.Spec.InferenceComponentName != *b.ko.Spec.InferenceComponentName { + delta.Add("Spec.InferenceComponentName", a.ko.Spec.InferenceComponentName, b.ko.Spec.InferenceComponentName) + } + } + if ackcompare.HasNilDifference(a.ko.Spec.RuntimeConfig, b.ko.Spec.RuntimeConfig) { + delta.Add("Spec.RuntimeConfig", a.ko.Spec.RuntimeConfig, b.ko.Spec.RuntimeConfig) + } else if a.ko.Spec.RuntimeConfig != nil && b.ko.Spec.RuntimeConfig != nil { + if ackcompare.HasNilDifference(a.ko.Spec.RuntimeConfig.CopyCount, b.ko.Spec.RuntimeConfig.CopyCount) { + delta.Add("Spec.RuntimeConfig.CopyCount", a.ko.Spec.RuntimeConfig.CopyCount, b.ko.Spec.RuntimeConfig.CopyCount) + } else if a.ko.Spec.RuntimeConfig.CopyCount != nil && b.ko.Spec.RuntimeConfig.CopyCount != nil { + if *a.ko.Spec.RuntimeConfig.CopyCount != *b.ko.Spec.RuntimeConfig.CopyCount { + delta.Add("Spec.RuntimeConfig.CopyCount", a.ko.Spec.RuntimeConfig.CopyCount, b.ko.Spec.RuntimeConfig.CopyCount) + } + } + } + if ackcompare.HasNilDifference(a.ko.Spec.Specification, b.ko.Spec.Specification) { + delta.Add("Spec.Specification", a.ko.Spec.Specification, b.ko.Spec.Specification) + } else if a.ko.Spec.Specification != nil && b.ko.Spec.Specification != nil { + if ackcompare.HasNilDifference(a.ko.Spec.Specification.ComputeResourceRequirements, b.ko.Spec.Specification.ComputeResourceRequirements) { + delta.Add("Spec.Specification.ComputeResourceRequirements", a.ko.Spec.Specification.ComputeResourceRequirements, b.ko.Spec.Specification.ComputeResourceRequirements) + } else if a.ko.Spec.Specification.ComputeResourceRequirements != nil && b.ko.Spec.Specification.ComputeResourceRequirements != nil { + if ackcompare.HasNilDifference(a.ko.Spec.Specification.ComputeResourceRequirements.MaxMemoryRequiredInMb, b.ko.Spec.Specification.ComputeResourceRequirements.MaxMemoryRequiredInMb) { + delta.Add("Spec.Specification.ComputeResourceRequirements.MaxMemoryRequiredInMb", a.ko.Spec.Specification.ComputeResourceRequirements.MaxMemoryRequiredInMb, b.ko.Spec.Specification.ComputeResourceRequirements.MaxMemoryRequiredInMb) + } else if a.ko.Spec.Specification.ComputeResourceRequirements.MaxMemoryRequiredInMb != nil && b.ko.Spec.Specification.ComputeResourceRequirements.MaxMemoryRequiredInMb != nil { + if *a.ko.Spec.Specification.ComputeResourceRequirements.MaxMemoryRequiredInMb != *b.ko.Spec.Specification.ComputeResourceRequirements.MaxMemoryRequiredInMb { + delta.Add("Spec.Specification.ComputeResourceRequirements.MaxMemoryRequiredInMb", a.ko.Spec.Specification.ComputeResourceRequirements.MaxMemoryRequiredInMb, b.ko.Spec.Specification.ComputeResourceRequirements.MaxMemoryRequiredInMb) + } + } + if ackcompare.HasNilDifference(a.ko.Spec.Specification.ComputeResourceRequirements.MinMemoryRequiredInMb, b.ko.Spec.Specification.ComputeResourceRequirements.MinMemoryRequiredInMb) { + delta.Add("Spec.Specification.ComputeResourceRequirements.MinMemoryRequiredInMb", a.ko.Spec.Specification.ComputeResourceRequirements.MinMemoryRequiredInMb, b.ko.Spec.Specification.ComputeResourceRequirements.MinMemoryRequiredInMb) + } else if a.ko.Spec.Specification.ComputeResourceRequirements.MinMemoryRequiredInMb != nil && b.ko.Spec.Specification.ComputeResourceRequirements.MinMemoryRequiredInMb != nil { + if *a.ko.Spec.Specification.ComputeResourceRequirements.MinMemoryRequiredInMb != *b.ko.Spec.Specification.ComputeResourceRequirements.MinMemoryRequiredInMb { + delta.Add("Spec.Specification.ComputeResourceRequirements.MinMemoryRequiredInMb", a.ko.Spec.Specification.ComputeResourceRequirements.MinMemoryRequiredInMb, b.ko.Spec.Specification.ComputeResourceRequirements.MinMemoryRequiredInMb) + } + } + if ackcompare.HasNilDifference(a.ko.Spec.Specification.ComputeResourceRequirements.NumberOfAcceleratorDevicesRequired, b.ko.Spec.Specification.ComputeResourceRequirements.NumberOfAcceleratorDevicesRequired) { + delta.Add("Spec.Specification.ComputeResourceRequirements.NumberOfAcceleratorDevicesRequired", a.ko.Spec.Specification.ComputeResourceRequirements.NumberOfAcceleratorDevicesRequired, b.ko.Spec.Specification.ComputeResourceRequirements.NumberOfAcceleratorDevicesRequired) + } else if a.ko.Spec.Specification.ComputeResourceRequirements.NumberOfAcceleratorDevicesRequired != nil && b.ko.Spec.Specification.ComputeResourceRequirements.NumberOfAcceleratorDevicesRequired != nil { + if *a.ko.Spec.Specification.ComputeResourceRequirements.NumberOfAcceleratorDevicesRequired != *b.ko.Spec.Specification.ComputeResourceRequirements.NumberOfAcceleratorDevicesRequired { + delta.Add("Spec.Specification.ComputeResourceRequirements.NumberOfAcceleratorDevicesRequired", a.ko.Spec.Specification.ComputeResourceRequirements.NumberOfAcceleratorDevicesRequired, b.ko.Spec.Specification.ComputeResourceRequirements.NumberOfAcceleratorDevicesRequired) + } + } + if ackcompare.HasNilDifference(a.ko.Spec.Specification.ComputeResourceRequirements.NumberOfCPUCoresRequired, b.ko.Spec.Specification.ComputeResourceRequirements.NumberOfCPUCoresRequired) { + delta.Add("Spec.Specification.ComputeResourceRequirements.NumberOfCPUCoresRequired", a.ko.Spec.Specification.ComputeResourceRequirements.NumberOfCPUCoresRequired, b.ko.Spec.Specification.ComputeResourceRequirements.NumberOfCPUCoresRequired) + } else if a.ko.Spec.Specification.ComputeResourceRequirements.NumberOfCPUCoresRequired != nil && b.ko.Spec.Specification.ComputeResourceRequirements.NumberOfCPUCoresRequired != nil { + if *a.ko.Spec.Specification.ComputeResourceRequirements.NumberOfCPUCoresRequired != *b.ko.Spec.Specification.ComputeResourceRequirements.NumberOfCPUCoresRequired { + delta.Add("Spec.Specification.ComputeResourceRequirements.NumberOfCPUCoresRequired", a.ko.Spec.Specification.ComputeResourceRequirements.NumberOfCPUCoresRequired, b.ko.Spec.Specification.ComputeResourceRequirements.NumberOfCPUCoresRequired) + } + } + } + if ackcompare.HasNilDifference(a.ko.Spec.Specification.Container, b.ko.Spec.Specification.Container) { + delta.Add("Spec.Specification.Container", a.ko.Spec.Specification.Container, b.ko.Spec.Specification.Container) + } else if a.ko.Spec.Specification.Container != nil && b.ko.Spec.Specification.Container != nil { + if ackcompare.HasNilDifference(a.ko.Spec.Specification.Container.ArtifactURL, b.ko.Spec.Specification.Container.ArtifactURL) { + delta.Add("Spec.Specification.Container.ArtifactURL", a.ko.Spec.Specification.Container.ArtifactURL, b.ko.Spec.Specification.Container.ArtifactURL) + } else if a.ko.Spec.Specification.Container.ArtifactURL != nil && b.ko.Spec.Specification.Container.ArtifactURL != nil { + if *a.ko.Spec.Specification.Container.ArtifactURL != *b.ko.Spec.Specification.Container.ArtifactURL { + delta.Add("Spec.Specification.Container.ArtifactURL", a.ko.Spec.Specification.Container.ArtifactURL, b.ko.Spec.Specification.Container.ArtifactURL) + } + } + if len(a.ko.Spec.Specification.Container.Environment) != len(b.ko.Spec.Specification.Container.Environment) { + delta.Add("Spec.Specification.Container.Environment", a.ko.Spec.Specification.Container.Environment, b.ko.Spec.Specification.Container.Environment) + } else if len(a.ko.Spec.Specification.Container.Environment) > 0 { + if !ackcompare.MapStringStringPEqual(a.ko.Spec.Specification.Container.Environment, b.ko.Spec.Specification.Container.Environment) { + delta.Add("Spec.Specification.Container.Environment", a.ko.Spec.Specification.Container.Environment, b.ko.Spec.Specification.Container.Environment) + } + } + if ackcompare.HasNilDifference(a.ko.Spec.Specification.Container.Image, b.ko.Spec.Specification.Container.Image) { + delta.Add("Spec.Specification.Container.Image", a.ko.Spec.Specification.Container.Image, b.ko.Spec.Specification.Container.Image) + } else if a.ko.Spec.Specification.Container.Image != nil && b.ko.Spec.Specification.Container.Image != nil { + if *a.ko.Spec.Specification.Container.Image != *b.ko.Spec.Specification.Container.Image { + delta.Add("Spec.Specification.Container.Image", a.ko.Spec.Specification.Container.Image, b.ko.Spec.Specification.Container.Image) + } + } + } + if ackcompare.HasNilDifference(a.ko.Spec.Specification.ModelName, b.ko.Spec.Specification.ModelName) { + delta.Add("Spec.Specification.ModelName", a.ko.Spec.Specification.ModelName, b.ko.Spec.Specification.ModelName) + } else if a.ko.Spec.Specification.ModelName != nil && b.ko.Spec.Specification.ModelName != nil { + if *a.ko.Spec.Specification.ModelName != *b.ko.Spec.Specification.ModelName { + delta.Add("Spec.Specification.ModelName", a.ko.Spec.Specification.ModelName, b.ko.Spec.Specification.ModelName) + } + } + if ackcompare.HasNilDifference(a.ko.Spec.Specification.StartupParameters, b.ko.Spec.Specification.StartupParameters) { + delta.Add("Spec.Specification.StartupParameters", a.ko.Spec.Specification.StartupParameters, b.ko.Spec.Specification.StartupParameters) + } else if a.ko.Spec.Specification.StartupParameters != nil && b.ko.Spec.Specification.StartupParameters != nil { + if ackcompare.HasNilDifference(a.ko.Spec.Specification.StartupParameters.ContainerStartupHealthCheckTimeoutInSeconds, b.ko.Spec.Specification.StartupParameters.ContainerStartupHealthCheckTimeoutInSeconds) { + delta.Add("Spec.Specification.StartupParameters.ContainerStartupHealthCheckTimeoutInSeconds", a.ko.Spec.Specification.StartupParameters.ContainerStartupHealthCheckTimeoutInSeconds, b.ko.Spec.Specification.StartupParameters.ContainerStartupHealthCheckTimeoutInSeconds) + } else if a.ko.Spec.Specification.StartupParameters.ContainerStartupHealthCheckTimeoutInSeconds != nil && b.ko.Spec.Specification.StartupParameters.ContainerStartupHealthCheckTimeoutInSeconds != nil { + if *a.ko.Spec.Specification.StartupParameters.ContainerStartupHealthCheckTimeoutInSeconds != *b.ko.Spec.Specification.StartupParameters.ContainerStartupHealthCheckTimeoutInSeconds { + delta.Add("Spec.Specification.StartupParameters.ContainerStartupHealthCheckTimeoutInSeconds", a.ko.Spec.Specification.StartupParameters.ContainerStartupHealthCheckTimeoutInSeconds, b.ko.Spec.Specification.StartupParameters.ContainerStartupHealthCheckTimeoutInSeconds) + } + } + if ackcompare.HasNilDifference(a.ko.Spec.Specification.StartupParameters.ModelDataDownloadTimeoutInSeconds, b.ko.Spec.Specification.StartupParameters.ModelDataDownloadTimeoutInSeconds) { + delta.Add("Spec.Specification.StartupParameters.ModelDataDownloadTimeoutInSeconds", a.ko.Spec.Specification.StartupParameters.ModelDataDownloadTimeoutInSeconds, b.ko.Spec.Specification.StartupParameters.ModelDataDownloadTimeoutInSeconds) + } else if a.ko.Spec.Specification.StartupParameters.ModelDataDownloadTimeoutInSeconds != nil && b.ko.Spec.Specification.StartupParameters.ModelDataDownloadTimeoutInSeconds != nil { + if *a.ko.Spec.Specification.StartupParameters.ModelDataDownloadTimeoutInSeconds != *b.ko.Spec.Specification.StartupParameters.ModelDataDownloadTimeoutInSeconds { + delta.Add("Spec.Specification.StartupParameters.ModelDataDownloadTimeoutInSeconds", a.ko.Spec.Specification.StartupParameters.ModelDataDownloadTimeoutInSeconds, b.ko.Spec.Specification.StartupParameters.ModelDataDownloadTimeoutInSeconds) + } + } + } + } + if ackcompare.HasNilDifference(a.ko.Spec.VariantName, b.ko.Spec.VariantName) { + delta.Add("Spec.VariantName", a.ko.Spec.VariantName, b.ko.Spec.VariantName) + } else if a.ko.Spec.VariantName != nil && b.ko.Spec.VariantName != nil { + if *a.ko.Spec.VariantName != *b.ko.Spec.VariantName { + delta.Add("Spec.VariantName", a.ko.Spec.VariantName, b.ko.Spec.VariantName) + } + } + + return delta +} diff --git a/pkg/resource/inference_component/descriptor.go b/pkg/resource/inference_component/descriptor.go new file mode 100644 index 00000000..fe250c28 --- /dev/null +++ b/pkg/resource/inference_component/descriptor.go @@ -0,0 +1,155 @@ +// Copyright Amazon.com Inc. or its affiliates. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"). You may +// not use this file except in compliance with the License. A copy of the +// License is located at +// +// http://aws.amazon.com/apache2.0/ +// +// or in the "license" file accompanying this file. This file is distributed +// on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either +// express or implied. See the License for the specific language governing +// permissions and limitations under the License. + +// Code generated by ack-generate. DO NOT EDIT. + +package inference_component + +import ( + ackv1alpha1 "github.com/aws-controllers-k8s/runtime/apis/core/v1alpha1" + ackcompare "github.com/aws-controllers-k8s/runtime/pkg/compare" + acktypes "github.com/aws-controllers-k8s/runtime/pkg/types" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/runtime/schema" + rtclient "sigs.k8s.io/controller-runtime/pkg/client" + k8sctrlutil "sigs.k8s.io/controller-runtime/pkg/controller/controllerutil" + + svcapitypes "github.com/aws-controllers-k8s/sagemaker-controller/apis/v1alpha1" +) + +const ( + finalizerString = "finalizers.sagemaker.services.k8s.aws/InferenceComponent" +) + +var ( + GroupVersionResource = svcapitypes.GroupVersion.WithResource("inferencecomponents") + GroupKind = metav1.GroupKind{ + Group: "sagemaker.services.k8s.aws", + Kind: "InferenceComponent", + } +) + +// resourceDescriptor implements the +// `aws-service-operator-k8s/pkg/types.AWSResourceDescriptor` interface +type resourceDescriptor struct { +} + +// GroupVersionKind returns a Kubernetes schema.GroupVersionKind struct that +// describes the API Group, Version and Kind of CRs described by the descriptor +func (d *resourceDescriptor) GroupVersionKind() schema.GroupVersionKind { + return svcapitypes.GroupVersion.WithKind(GroupKind.Kind) +} + +// EmptyRuntimeObject returns an empty object prototype that may be used in +// apimachinery and k8s client operations +func (d *resourceDescriptor) EmptyRuntimeObject() rtclient.Object { + return &svcapitypes.InferenceComponent{} +} + +// ResourceFromRuntimeObject returns an AWSResource that has been initialized +// with the supplied runtime.Object +func (d *resourceDescriptor) ResourceFromRuntimeObject( + obj rtclient.Object, +) acktypes.AWSResource { + return &resource{ + ko: obj.(*svcapitypes.InferenceComponent), + } +} + +// Delta returns an `ackcompare.Delta` object containing the difference between +// one `AWSResource` and another. +func (d *resourceDescriptor) Delta(a, b acktypes.AWSResource) *ackcompare.Delta { + return newResourceDelta(a.(*resource), b.(*resource)) +} + +// IsManaged returns true if the supplied AWSResource is under the management +// of an ACK service controller. What this means in practice is that the +// underlying custom resource (CR) in the AWSResource has had a +// resource-specific finalizer associated with it. +func (d *resourceDescriptor) IsManaged( + res acktypes.AWSResource, +) bool { + obj := res.RuntimeObject() + if obj == nil { + // Should not happen. If it does, there is a bug in the code + panic("nil RuntimeMetaObject in AWSResource") + } + // Remove use of custom code once + // https://github.com/kubernetes-sigs/controller-runtime/issues/994 is + // fixed. This should be able to be: + // + // return k8sctrlutil.ContainsFinalizer(obj, finalizerString) + return containsFinalizer(obj, finalizerString) +} + +// Remove once https://github.com/kubernetes-sigs/controller-runtime/issues/994 +// is fixed. +func containsFinalizer(obj rtclient.Object, finalizer string) bool { + f := obj.GetFinalizers() + for _, e := range f { + if e == finalizer { + return true + } + } + return false +} + +// MarkManaged places the supplied resource under the management of ACK. What +// this typically means is that the resource manager will decorate the +// underlying custom resource (CR) with a finalizer that indicates ACK is +// managing the resource and the underlying CR may not be deleted until ACK is +// finished cleaning up any backend AWS service resources associated with the +// CR. +func (d *resourceDescriptor) MarkManaged( + res acktypes.AWSResource, +) { + obj := res.RuntimeObject() + if obj == nil { + // Should not happen. If it does, there is a bug in the code + panic("nil RuntimeMetaObject in AWSResource") + } + k8sctrlutil.AddFinalizer(obj, finalizerString) +} + +// MarkUnmanaged removes the supplied resource from management by ACK. What +// this typically means is that the resource manager will remove a finalizer +// underlying custom resource (CR) that indicates ACK is managing the resource. +// This will allow the Kubernetes API server to delete the underlying CR. +func (d *resourceDescriptor) MarkUnmanaged( + res acktypes.AWSResource, +) { + obj := res.RuntimeObject() + if obj == nil { + // Should not happen. If it does, there is a bug in the code + panic("nil RuntimeMetaObject in AWSResource") + } + k8sctrlutil.RemoveFinalizer(obj, finalizerString) +} + +// MarkAdopted places descriptors on the custom resource that indicate the +// resource was not created from within ACK. +func (d *resourceDescriptor) MarkAdopted( + res acktypes.AWSResource, +) { + obj := res.RuntimeObject() + if obj == nil { + // Should not happen. If it does, there is a bug in the code + panic("nil RuntimeObject in AWSResource") + } + curr := obj.GetAnnotations() + if curr == nil { + curr = make(map[string]string) + } + curr[ackv1alpha1.AnnotationAdopted] = "true" + obj.SetAnnotations(curr) +} diff --git a/pkg/resource/inference_component/hooks.go b/pkg/resource/inference_component/hooks.go new file mode 100644 index 00000000..561efb38 --- /dev/null +++ b/pkg/resource/inference_component/hooks.go @@ -0,0 +1,166 @@ +// Copyright Amazon.com Inc. or its affiliates. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"). You may +// not use this file except in compliance with the License. A copy of the +// License is located at +// +// http://aws.amazon.com/apache2.0/ +// +// or in the "license" file accompanying this file. This file is distributed +// on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either +// express or implied. See the License for the specific language governing +// permissions and limitations under the License. + +package inference_component + +import ( + "context" + "encoding/json" + "errors" + "fmt" + ackcompare "github.com/aws-controllers-k8s/runtime/pkg/compare" + ackrequeue "github.com/aws-controllers-k8s/runtime/pkg/requeue" + svcapitypes "github.com/aws-controllers-k8s/sagemaker-controller/apis/v1alpha1" + svccommon "github.com/aws-controllers-k8s/sagemaker-controller/pkg/common" + "github.com/aws/aws-sdk-go/aws" + "github.com/aws/aws-sdk-go/aws/awserr" + svcsdk "github.com/aws/aws-sdk-go/service/sagemaker" + "reflect" +) + +var ( + modifyingStatuses = []string{ + svcsdk.InferenceComponentStatusCreating, + svcsdk.InferenceComponentStatusUpdating, + svcsdk.InferenceComponentStatusDeleting, + } + + resourceName = GroupKind.Kind + + lastSpecForUpdateAnnotation = fmt.Sprintf("%s/last-spec-for-update", GroupKind.Group) + + requeueWaitWhileDeleting = ackrequeue.NeededAfter( + errors.New(resourceName+" is Deleting."), + ackrequeue.DefaultRequeueAfterDuration, + ) +) + +// customDescribeInferenceComponentSetOutput sets the resource ResourceSynced condition to False if +// InferenceComponent is being modified by AWS +func (rm *resourceManager) customDescribeInferenceComponentSetOutput(ko *svcapitypes.InferenceComponent) { + latestStatus := ko.Status.InferenceComponentStatus + svccommon.SetSyncedCondition(&resource{ko}, latestStatus, &resourceName, &modifyingStatuses) +} + +// customUpdateInferenceComponentSetOutput sets ConditionTypeResourceSynced condition to True or False +// based on the InferenceComponentStatus on AWS so the reconciler can determine if a +// requeue is needed +func (rm *resourceManager) customUpdateInferenceComponentSetOutput(ko *svcapitypes.InferenceComponent) error { + //set last inference component spec used for update in annotations + annotations := ko.ObjectMeta.GetAnnotations() + if annotations == nil { + annotations = make(map[string]string) + } + spec := ko.Spec.DeepCopy() + spec.Tags = nil + serializedSpec, err := json.Marshal(spec) + if err != nil { + return err + } + annotations[lastSpecForUpdateAnnotation] = string(serializedSpec) + ko.ObjectMeta.SetAnnotations(annotations) + + // injecting Updating status to keep the Sync condition message and status.InferenceComponentStatus in sync + ko.Status.InferenceComponentStatus = aws.String(svcsdk.InferenceComponentStatusUpdating) + + latestStatus := ko.Status.InferenceComponentStatus + svccommon.SetSyncedCondition(&resource{ko}, latestStatus, &resourceName, &modifyingStatuses) + + return nil +} + +// customUpdateInferenceComponentPreChecks adds specialized logic to check if controller should +// proceed with UpdateInferenceComponent call. +// Update is blocked in the following cases: +// 1. while InferenceComponentStatus != InService (handled by requeueUntilCanModify method). +// 2. InferenceComponentStatus == Failed. +// 3. A previous update to the InferenceComponent with same spec failed. +// +// Method returns nil if InferenceComponent can be updated, +// otherwise InferenceComponentUpdateError depending on above cases. +func (rm *resourceManager) customUpdateInferenceComponentPreChecks( + ctx context.Context, + desired *resource, + latest *resource, + delta *ackcompare.Delta, +) error { + latestStatus := latest.ko.Status.InferenceComponentStatus + if latestStatus == nil { + return nil + } + + failureReason := latest.ko.Status.FailureReason + + desiredSpec := desired.ko.Spec.DeepCopy() + desiredSpec.Tags = nil + + var lastSpecForUpdateString *string = nil + // get last endpoint config name used for update from annotations + annotations := desired.ko.ObjectMeta.GetAnnotations() + for k, v := range annotations { + if k == lastSpecForUpdateAnnotation { + lastSpecForUpdateString = &v + } + } + + var lastSpecForUpdate *svcapitypes.InferenceComponentSpec + + if lastSpecForUpdateString != nil { + err := json.Unmarshal([]byte(*lastSpecForUpdateString), &lastSpecForUpdate) + if err != nil { + return err + } + } + + // Case 2 - InferenceComponentStatus == Failed + if *latestStatus == svcsdk.InferenceComponentStatusFailed || + // Case 3 - A previous update to the InferenceComponent with same spec failed + // Following checks indicate FailureReason is related to a failed update + (failureReason != nil && lastSpecForUpdateString != nil && + EqualInferenceComponentSpec(desiredSpec, lastSpecForUpdate)) { + // 1. FailureReason alone doesn't mean an update failed it can appear because of other + // reasons(patching/scaling failed). + // 2. desiredSpec == lastSpecForUpdate only tells us an update was tried with lastSpecForUpdate + // but does not tell us anything if the update was successful or not in the past because + // it is set if updateInferenceComponent returns 200 (async operation). + // 3. Now, sdkUpdate can execute because of change in any field in Spec. + + // 1 & 2 does not guarantee an update Failed. Hence, we need to look at `lastSpecForUpdate` to determine if the update was unsuccessful + // `desiredSpec != latestSpec` + `desiredSpec == lastSpecForUpdate + //`+ `FailureReason != nil` indicate that an update is needed, has already been tried and failed. + return awserr.New("InferenceComponentUpdateError", "Unable to update inference component."+ + " Check FailureReason.", nil) + } + + return nil +} + +// EqualInferenceComponentSpec checks if two InferenceComponentSpec instances are equal +func EqualInferenceComponentSpec(desiredSpec *svcapitypes.InferenceComponentSpec, + lastSpec *svcapitypes.InferenceComponentSpec) bool { + if desiredSpec == nil || lastSpec == nil { + return desiredSpec == lastSpec + } + return reflect.DeepEqual(desiredSpec, lastSpec) +} + +// requeueUntilCanModify creates and returns an ackrequeue error +// if a resource's latest status matches any of the defined modifying statuses. +// This is so the controller requeues until the resource can be modifed +func (rm *resourceManager) requeueUntilCanModify( + ctx context.Context, + r *resource, +) error { + latestStatus := r.ko.Status.InferenceComponentStatus + return svccommon.RequeueIfModifying(latestStatus, &resourceName, &modifyingStatuses) +} diff --git a/pkg/resource/inference_component/identifiers.go b/pkg/resource/inference_component/identifiers.go new file mode 100644 index 00000000..7c88f887 --- /dev/null +++ b/pkg/resource/inference_component/identifiers.go @@ -0,0 +1,55 @@ +// Copyright Amazon.com Inc. or its affiliates. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"). You may +// not use this file except in compliance with the License. A copy of the +// License is located at +// +// http://aws.amazon.com/apache2.0/ +// +// or in the "license" file accompanying this file. This file is distributed +// on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either +// express or implied. See the License for the specific language governing +// permissions and limitations under the License. + +// Code generated by ack-generate. DO NOT EDIT. + +package inference_component + +import ( + ackv1alpha1 "github.com/aws-controllers-k8s/runtime/apis/core/v1alpha1" +) + +// resourceIdentifiers implements the +// `aws-service-operator-k8s/pkg/types.AWSResourceIdentifiers` interface +type resourceIdentifiers struct { + meta *ackv1alpha1.ResourceMetadata +} + +// ARN returns the AWS Resource Name for the backend AWS resource. If nil, +// this means the resource has not yet been created in the backend AWS +// service. +func (ri *resourceIdentifiers) ARN() *ackv1alpha1.AWSResourceName { + if ri.meta != nil { + return ri.meta.ARN + } + return nil +} + +// OwnerAccountID returns the AWS account identifier in which the +// backend AWS resource resides, or nil if this information is not known +// for the resource +func (ri *resourceIdentifiers) OwnerAccountID() *ackv1alpha1.AWSAccountID { + if ri.meta != nil { + return ri.meta.OwnerAccountID + } + return nil +} + +// Region returns the AWS region in which the resource exists, or +// nil if this information is not known. +func (ri *resourceIdentifiers) Region() *ackv1alpha1.AWSRegion { + if ri.meta != nil { + return ri.meta.Region + } + return nil +} diff --git a/pkg/resource/inference_component/manager.go b/pkg/resource/inference_component/manager.go new file mode 100644 index 00000000..943ba2fc --- /dev/null +++ b/pkg/resource/inference_component/manager.go @@ -0,0 +1,360 @@ +// Copyright Amazon.com Inc. or its affiliates. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"). You may +// not use this file except in compliance with the License. A copy of the +// License is located at +// +// http://aws.amazon.com/apache2.0/ +// +// or in the "license" file accompanying this file. This file is distributed +// on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either +// express or implied. See the License for the specific language governing +// permissions and limitations under the License. + +// Code generated by ack-generate. DO NOT EDIT. + +package inference_component + +import ( + "context" + "fmt" + "time" + + ackv1alpha1 "github.com/aws-controllers-k8s/runtime/apis/core/v1alpha1" + ackcompare "github.com/aws-controllers-k8s/runtime/pkg/compare" + ackcondition "github.com/aws-controllers-k8s/runtime/pkg/condition" + ackcfg "github.com/aws-controllers-k8s/runtime/pkg/config" + ackerr "github.com/aws-controllers-k8s/runtime/pkg/errors" + ackmetrics "github.com/aws-controllers-k8s/runtime/pkg/metrics" + ackrequeue "github.com/aws-controllers-k8s/runtime/pkg/requeue" + ackrt "github.com/aws-controllers-k8s/runtime/pkg/runtime" + ackrtlog "github.com/aws-controllers-k8s/runtime/pkg/runtime/log" + acktags "github.com/aws-controllers-k8s/runtime/pkg/tags" + acktypes "github.com/aws-controllers-k8s/runtime/pkg/types" + ackutil "github.com/aws-controllers-k8s/runtime/pkg/util" + "github.com/aws/aws-sdk-go/aws/session" + svcsdk "github.com/aws/aws-sdk-go/service/sagemaker" + svcsdkapi "github.com/aws/aws-sdk-go/service/sagemaker/sagemakeriface" + "github.com/go-logr/logr" + corev1 "k8s.io/api/core/v1" + + svcapitypes "github.com/aws-controllers-k8s/sagemaker-controller/apis/v1alpha1" +) + +var ( + _ = ackutil.InStrings + _ = acktags.NewTags() + _ = ackrt.MissingImageTagValue + _ = svcapitypes.InferenceComponent{} +) + +// +kubebuilder:rbac:groups=sagemaker.services.k8s.aws,resources=inferencecomponents,verbs=get;list;watch;create;update;patch;delete +// +kubebuilder:rbac:groups=sagemaker.services.k8s.aws,resources=inferencecomponents/status,verbs=get;update;patch + +var lateInitializeFieldNames = []string{} + +// resourceManager is responsible for providing a consistent way to perform +// CRUD operations in a backend AWS service API for Book custom resources. +type resourceManager struct { + // cfg is a copy of the ackcfg.Config object passed on start of the service + // controller + cfg ackcfg.Config + // log refers to the logr.Logger object handling logging for the service + // controller + log logr.Logger + // metrics contains a collection of Prometheus metric objects that the + // service controller and its reconcilers track + metrics *ackmetrics.Metrics + // rr is the Reconciler which can be used for various utility + // functions such as querying for Secret values given a SecretReference + rr acktypes.Reconciler + // awsAccountID is the AWS account identifier that contains the resources + // managed by this resource manager + awsAccountID ackv1alpha1.AWSAccountID + // The AWS Region that this resource manager targets + awsRegion ackv1alpha1.AWSRegion + // sess is the AWS SDK Session object used to communicate with the backend + // AWS service API + sess *session.Session + // sdk is a pointer to the AWS service API interface exposed by the + // aws-sdk-go/services/{alias}/{alias}iface package. + sdkapi svcsdkapi.SageMakerAPI +} + +// concreteResource returns a pointer to a resource from the supplied +// generic AWSResource interface +func (rm *resourceManager) concreteResource( + res acktypes.AWSResource, +) *resource { + // cast the generic interface into a pointer type specific to the concrete + // implementing resource type managed by this resource manager + return res.(*resource) +} + +// ReadOne returns the currently-observed state of the supplied AWSResource in +// the backend AWS service API. +func (rm *resourceManager) ReadOne( + ctx context.Context, + res acktypes.AWSResource, +) (acktypes.AWSResource, error) { + r := rm.concreteResource(res) + if r.ko == nil { + // Should never happen... if it does, it's buggy code. + panic("resource manager's ReadOne() method received resource with nil CR object") + } + observed, err := rm.sdkFind(ctx, r) + if err != nil { + if observed != nil { + return rm.onError(observed, err) + } + return rm.onError(r, err) + } + return rm.onSuccess(observed) +} + +// Create attempts to create the supplied AWSResource in the backend AWS +// service API, returning an AWSResource representing the newly-created +// resource +func (rm *resourceManager) Create( + ctx context.Context, + res acktypes.AWSResource, +) (acktypes.AWSResource, error) { + r := rm.concreteResource(res) + if r.ko == nil { + // Should never happen... if it does, it's buggy code. + panic("resource manager's Create() method received resource with nil CR object") + } + created, err := rm.sdkCreate(ctx, r) + if err != nil { + if created != nil { + return rm.onError(created, err) + } + return rm.onError(r, err) + } + return rm.onSuccess(created) +} + +// Update attempts to mutate the supplied desired AWSResource in the backend AWS +// service API, returning an AWSResource representing the newly-mutated +// resource. +// Note for specialized logic implementers can check to see how the latest +// observed resource differs from the supplied desired state. The +// higher-level reonciler determines whether or not the desired differs +// from the latest observed and decides whether to call the resource +// manager's Update method +func (rm *resourceManager) Update( + ctx context.Context, + resDesired acktypes.AWSResource, + resLatest acktypes.AWSResource, + delta *ackcompare.Delta, +) (acktypes.AWSResource, error) { + desired := rm.concreteResource(resDesired) + latest := rm.concreteResource(resLatest) + if desired.ko == nil || latest.ko == nil { + // Should never happen... if it does, it's buggy code. + panic("resource manager's Update() method received resource with nil CR object") + } + updated, err := rm.sdkUpdate(ctx, desired, latest, delta) + if err != nil { + if updated != nil { + return rm.onError(updated, err) + } + return rm.onError(latest, err) + } + return rm.onSuccess(updated) +} + +// Delete attempts to destroy the supplied AWSResource in the backend AWS +// service API, returning an AWSResource representing the +// resource being deleted (if delete is asynchronous and takes time) +func (rm *resourceManager) Delete( + ctx context.Context, + res acktypes.AWSResource, +) (acktypes.AWSResource, error) { + r := rm.concreteResource(res) + if r.ko == nil { + // Should never happen... if it does, it's buggy code. + panic("resource manager's Update() method received resource with nil CR object") + } + observed, err := rm.sdkDelete(ctx, r) + if err != nil { + if observed != nil { + return rm.onError(observed, err) + } + return rm.onError(r, err) + } + + return rm.onSuccess(observed) +} + +// ARNFromName returns an AWS Resource Name from a given string name. This +// is useful for constructing ARNs for APIs that require ARNs in their +// GetAttributes operations but all we have (for new CRs at least) is a +// name for the resource +func (rm *resourceManager) ARNFromName(name string) string { + return fmt.Sprintf( + "arn:aws:sagemaker:%s:%s:%s", + rm.awsRegion, + rm.awsAccountID, + name, + ) +} + +// LateInitialize returns an acktypes.AWSResource after setting the late initialized +// fields from the readOne call. This method will initialize the optional fields +// which were not provided by the k8s user but were defaulted by the AWS service. +// If there are no such fields to be initialized, the returned object is similar to +// object passed in the parameter. +func (rm *resourceManager) LateInitialize( + ctx context.Context, + latest acktypes.AWSResource, +) (acktypes.AWSResource, error) { + rlog := ackrtlog.FromContext(ctx) + // If there are no fields to late initialize, do nothing + if len(lateInitializeFieldNames) == 0 { + rlog.Debug("no late initialization required.") + return latest, nil + } + latestCopy := latest.DeepCopy() + lateInitConditionReason := "" + lateInitConditionMessage := "" + observed, err := rm.ReadOne(ctx, latestCopy) + if err != nil { + lateInitConditionMessage = "Unable to complete Read operation required for late initialization" + lateInitConditionReason = "Late Initialization Failure" + ackcondition.SetLateInitialized(latestCopy, corev1.ConditionFalse, &lateInitConditionMessage, &lateInitConditionReason) + ackcondition.SetSynced(latestCopy, corev1.ConditionFalse, nil, nil) + return latestCopy, err + } + lateInitializedRes := rm.lateInitializeFromReadOneOutput(observed, latestCopy) + incompleteInitialization := rm.incompleteLateInitialization(lateInitializedRes) + if incompleteInitialization { + // Add the condition with LateInitialized=False + lateInitConditionMessage = "Late initialization did not complete, requeuing with delay of 5 seconds" + lateInitConditionReason = "Delayed Late Initialization" + ackcondition.SetLateInitialized(lateInitializedRes, corev1.ConditionFalse, &lateInitConditionMessage, &lateInitConditionReason) + ackcondition.SetSynced(lateInitializedRes, corev1.ConditionFalse, nil, nil) + return lateInitializedRes, ackrequeue.NeededAfter(nil, time.Duration(5)*time.Second) + } + // Set LateInitialized condition to True + lateInitConditionMessage = "Late initialization successful" + lateInitConditionReason = "Late initialization successful" + ackcondition.SetLateInitialized(lateInitializedRes, corev1.ConditionTrue, &lateInitConditionMessage, &lateInitConditionReason) + return lateInitializedRes, nil +} + +// incompleteLateInitialization return true if there are fields which were supposed to be +// late initialized but are not. If all the fields are late initialized, false is returned +func (rm *resourceManager) incompleteLateInitialization( + res acktypes.AWSResource, +) bool { + return false +} + +// lateInitializeFromReadOneOutput late initializes the 'latest' resource from the 'observed' +// resource and returns 'latest' resource +func (rm *resourceManager) lateInitializeFromReadOneOutput( + observed acktypes.AWSResource, + latest acktypes.AWSResource, +) acktypes.AWSResource { + return latest +} + +// IsSynced returns true if the resource is synced. +func (rm *resourceManager) IsSynced(ctx context.Context, res acktypes.AWSResource) (bool, error) { + r := rm.concreteResource(res) + if r.ko == nil { + // Should never happen... if it does, it's buggy code. + panic("resource manager's IsSynced() method received resource with nil CR object") + } + + return true, nil +} + +// EnsureTags ensures that tags are present inside the AWSResource. +// If the AWSResource does not have any existing resource tags, the 'tags' +// field is initialized and the controller tags are added. +// If the AWSResource has existing resource tags, then controller tags are +// added to the existing resource tags without overriding them. +// If the AWSResource does not support tags, only then the controller tags +// will not be added to the AWSResource. +func (rm *resourceManager) EnsureTags( + ctx context.Context, + res acktypes.AWSResource, + md acktypes.ServiceControllerMetadata, +) error { + r := rm.concreteResource(res) + if r.ko == nil { + // Should never happen... if it does, it's buggy code. + panic("resource manager's EnsureTags method received resource with nil CR object") + } + defaultTags := ackrt.GetDefaultTags(&rm.cfg, r.ko, md) + var existingTags []*svcapitypes.Tag + existingTags = r.ko.Spec.Tags + resourceTags := ToACKTags(existingTags) + tags := acktags.Merge(resourceTags, defaultTags) + r.ko.Spec.Tags = FromACKTags(tags) + return nil +} + +// newResourceManager returns a new struct implementing +// acktypes.AWSResourceManager +func newResourceManager( + cfg ackcfg.Config, + log logr.Logger, + metrics *ackmetrics.Metrics, + rr acktypes.Reconciler, + sess *session.Session, + id ackv1alpha1.AWSAccountID, + region ackv1alpha1.AWSRegion, +) (*resourceManager, error) { + return &resourceManager{ + cfg: cfg, + log: log, + metrics: metrics, + rr: rr, + awsAccountID: id, + awsRegion: region, + sess: sess, + sdkapi: svcsdk.New(sess), + }, nil +} + +// onError updates resource conditions and returns updated resource +// it returns nil if no condition is updated. +func (rm *resourceManager) onError( + r *resource, + err error, +) (acktypes.AWSResource, error) { + if r == nil { + return nil, err + } + r1, updated := rm.updateConditions(r, false, err) + if !updated { + return r, err + } + for _, condition := range r1.Conditions() { + if condition.Type == ackv1alpha1.ConditionTypeTerminal && + condition.Status == corev1.ConditionTrue { + // resource is in Terminal condition + // return Terminal error + return r1, ackerr.Terminal + } + } + return r1, err +} + +// onSuccess updates resource conditions and returns updated resource +// it returns the supplied resource if no condition is updated. +func (rm *resourceManager) onSuccess( + r *resource, +) (acktypes.AWSResource, error) { + if r == nil { + return nil, nil + } + r1, updated := rm.updateConditions(r, true, nil) + if !updated { + return r, nil + } + return r1, nil +} diff --git a/pkg/resource/inference_component/manager_factory.go b/pkg/resource/inference_component/manager_factory.go new file mode 100644 index 00000000..0163c105 --- /dev/null +++ b/pkg/resource/inference_component/manager_factory.go @@ -0,0 +1,96 @@ +// Copyright Amazon.com Inc. or its affiliates. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"). You may +// not use this file except in compliance with the License. A copy of the +// License is located at +// +// http://aws.amazon.com/apache2.0/ +// +// or in the "license" file accompanying this file. This file is distributed +// on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either +// express or implied. See the License for the specific language governing +// permissions and limitations under the License. + +// Code generated by ack-generate. DO NOT EDIT. + +package inference_component + +import ( + "fmt" + "sync" + + ackv1alpha1 "github.com/aws-controllers-k8s/runtime/apis/core/v1alpha1" + ackcfg "github.com/aws-controllers-k8s/runtime/pkg/config" + ackmetrics "github.com/aws-controllers-k8s/runtime/pkg/metrics" + acktypes "github.com/aws-controllers-k8s/runtime/pkg/types" + "github.com/aws/aws-sdk-go/aws/session" + "github.com/go-logr/logr" + + svcresource "github.com/aws-controllers-k8s/sagemaker-controller/pkg/resource" +) + +// resourceManagerFactory produces resourceManager objects. It implements the +// `types.AWSResourceManagerFactory` interface. +type resourceManagerFactory struct { + sync.RWMutex + // rmCache contains resource managers for a particular AWS account ID + rmCache map[string]*resourceManager +} + +// ResourcePrototype returns an AWSResource that resource managers produced by +// this factory will handle +func (f *resourceManagerFactory) ResourceDescriptor() acktypes.AWSResourceDescriptor { + return &resourceDescriptor{} +} + +// ManagerFor returns a resource manager object that can manage resources for a +// supplied AWS account +func (f *resourceManagerFactory) ManagerFor( + cfg ackcfg.Config, + log logr.Logger, + metrics *ackmetrics.Metrics, + rr acktypes.Reconciler, + sess *session.Session, + id ackv1alpha1.AWSAccountID, + region ackv1alpha1.AWSRegion, +) (acktypes.AWSResourceManager, error) { + rmId := fmt.Sprintf("%s/%s", id, region) + f.RLock() + rm, found := f.rmCache[rmId] + f.RUnlock() + + if found { + return rm, nil + } + + f.Lock() + defer f.Unlock() + + rm, err := newResourceManager(cfg, log, metrics, rr, sess, id, region) + if err != nil { + return nil, err + } + f.rmCache[rmId] = rm + return rm, nil +} + +// IsAdoptable returns true if the resource is able to be adopted +func (f *resourceManagerFactory) IsAdoptable() bool { + return true +} + +// RequeueOnSuccessSeconds returns true if the resource should be requeued after specified seconds +// Default is false which means resource will not be requeued after success. +func (f *resourceManagerFactory) RequeueOnSuccessSeconds() int { + return 30 +} + +func newResourceManagerFactory() *resourceManagerFactory { + return &resourceManagerFactory{ + rmCache: map[string]*resourceManager{}, + } +} + +func init() { + svcresource.RegisterManagerFactory(newResourceManagerFactory()) +} diff --git a/pkg/resource/inference_component/manager_test_suite_test.go b/pkg/resource/inference_component/manager_test_suite_test.go new file mode 100644 index 00000000..1efe749a --- /dev/null +++ b/pkg/resource/inference_component/manager_test_suite_test.go @@ -0,0 +1,139 @@ +// Copyright Amazon.com Inc. or its affiliates. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"). You may +// not use this file except in compliance with the License. A copy of the +// License is located at +// +// http://aws.amazon.com/apache2.0/ +// +// or in the "license" file accompanying this file. This file is distributed +// on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either +// express or implied. See the License for the specific language governing +// permissions and limitations under the License. + +package inference_component + +import ( + "errors" + "fmt" + + "path/filepath" + "testing" + + ackv1alpha1 "github.com/aws-controllers-k8s/runtime/apis/core/v1alpha1" + ackmetrics "github.com/aws-controllers-k8s/runtime/pkg/metrics" + acktypes "github.com/aws-controllers-k8s/runtime/pkg/types" + svcapitypes "github.com/aws-controllers-k8s/sagemaker-controller/apis/v1alpha1" + "github.com/aws-controllers-k8s/sagemaker-controller/pkg/testutil" + mocksvcsdkapi "github.com/aws-controllers-k8s/sagemaker-controller/test/mocks/aws-sdk-go/sagemaker" + svcsdk "github.com/aws/aws-sdk-go/service/sagemaker" + "github.com/google/go-cmp/cmp" + "github.com/google/go-cmp/cmp/cmpopts" + "go.uber.org/zap/zapcore" + ctrlrtzap "sigs.k8s.io/controller-runtime/pkg/log/zap" +) + +// provideResourceManagerWithMockSDKAPI accepts MockSageMakerAPI and returns pointer to resourceManager +// the returned resourceManager is configured to use mockapi api. +func provideResourceManagerWithMockSDKAPI(mockSageMakerAPI *mocksvcsdkapi.SageMakerAPI) *resourceManager { + zapOptions := ctrlrtzap.Options{ + Development: true, + Level: zapcore.InfoLevel, + } + fakeLogger := ctrlrtzap.New(ctrlrtzap.UseFlagOptions(&zapOptions)) + return &resourceManager{ + rr: nil, + awsAccountID: "", + awsRegion: "", + sess: nil, + sdkapi: mockSageMakerAPI, + log: fakeLogger, + metrics: ackmetrics.NewMetrics("sagemaker"), + } +} + +// TestInferenceComponentTestSuite runs the test suite for InferenceComponent +func TestInferenceComponentTestSuite(t *testing.T) { + defer func() { + if r := recover(); r != nil { + fmt.Println(testutil.RecoverPanicString, r) + t.Fail() + } + }() + var ts = testutil.TestSuite{} + testutil.LoadFromFixture(filepath.Join("testdata", "test_suite.yaml"), &ts) + var delegate = testRunnerDelegate{t: t} + var runner = testutil.TestSuiteRunner{TestSuite: &ts, Delegate: &delegate} + runner.RunTests() +} + +// testRunnerDelegate implements testutil.TestRunnerDelegate +type testRunnerDelegate struct { + t *testing.T +} + +func (d *testRunnerDelegate) ResourceDescriptor() acktypes.AWSResourceDescriptor { + return &resourceDescriptor{} +} + +func (d *testRunnerDelegate) ResourceManager(mocksdkapi *mocksvcsdkapi.SageMakerAPI) acktypes.AWSResourceManager { + return provideResourceManagerWithMockSDKAPI(mocksdkapi) +} + +func (d *testRunnerDelegate) GoTestRunner() *testing.T { + return d.t +} + +func (d *testRunnerDelegate) EmptyServiceAPIOutput(apiName string) (interface{}, error) { + if apiName == "" { + return nil, errors.New("no API name specified") + } + //TODO: use reflection, template to auto generate this block/method. + switch apiName { + case "CreateInferenceComponentWithContext": + var output svcsdk.CreateInferenceComponentOutput + return &output, nil + case "DeleteInferenceComponentWithContext": + var output svcsdk.DeleteInferenceComponentOutput + return &output, nil + case "DescribeInferenceComponentWithContext": + var output svcsdk.DescribeInferenceComponentOutput + return &output, nil + case "UpdateInferenceComponentWithContext": + var output svcsdk.UpdateInferenceComponentOutput + return &output, nil + } + return nil, errors.New(fmt.Sprintf("no matching API name found for: %s", apiName)) +} + +func (d *testRunnerDelegate) Equal(a acktypes.AWSResource, b acktypes.AWSResource) bool { + ac := a.(*resource) + bc := b.(*resource) + // Ignore LastTransitionTime since it gets updated each run. + opts := []cmp.Option{ + cmpopts.EquateEmpty(), + cmpopts.IgnoreFields(ackv1alpha1.Condition{}, "LastTransitionTime"), + cmpopts.IgnoreFields(svcapitypes.InferenceComponentStatus{}, "CreationTime", "LastModifiedTime"), + cmpopts.IgnoreFields(svcapitypes.DeployedImage{}, "ResolutionTime"), + } + + var specMatch = false + if cmp.Equal(ac.ko.Spec, bc.ko.Spec, opts...) { + specMatch = true + } else { + fmt.Printf("Difference ko.Spec (-expected +actual):\n\n") + fmt.Println(cmp.Diff(ac.ko.Spec, bc.ko.Spec, opts...)) + specMatch = false + } + + var statusMatch = false + if cmp.Equal(ac.ko.Status, bc.ko.Status, opts...) { + statusMatch = true + } else { + fmt.Printf("Difference ko.Status (-expected +actual):\n\n") + fmt.Println(cmp.Diff(ac.ko.Status, bc.ko.Status, opts...)) + statusMatch = false + } + + return statusMatch && specMatch +} diff --git a/pkg/resource/inference_component/references.go b/pkg/resource/inference_component/references.go new file mode 100644 index 00000000..84e08502 --- /dev/null +++ b/pkg/resource/inference_component/references.go @@ -0,0 +1,56 @@ +// Copyright Amazon.com Inc. or its affiliates. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"). You may +// not use this file except in compliance with the License. A copy of the +// License is located at +// +// http://aws.amazon.com/apache2.0/ +// +// or in the "license" file accompanying this file. This file is distributed +// on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either +// express or implied. See the License for the specific language governing +// permissions and limitations under the License. + +// Code generated by ack-generate. DO NOT EDIT. + +package inference_component + +import ( + "context" + "sigs.k8s.io/controller-runtime/pkg/client" + + acktypes "github.com/aws-controllers-k8s/runtime/pkg/types" + + svcapitypes "github.com/aws-controllers-k8s/sagemaker-controller/apis/v1alpha1" +) + +// ClearResolvedReferences removes any reference values that were made +// concrete in the spec. It returns a copy of the input AWSResource which +// contains the original *Ref values, but none of their respective concrete +// values. +func (rm *resourceManager) ClearResolvedReferences(res acktypes.AWSResource) acktypes.AWSResource { + ko := rm.concreteResource(res).ko.DeepCopy() + + return &resource{ko} +} + +// ResolveReferences finds if there are any Reference field(s) present +// inside AWSResource passed in the parameter and attempts to resolve those +// reference field(s) into their respective target field(s). It returns a +// copy of the input AWSResource with resolved reference(s), a boolean which +// is set to true if the resource contains any references (regardless of if +// they are resolved successfully) and an error if the passed AWSResource's +// reference field(s) could not be resolved. +func (rm *resourceManager) ResolveReferences( + ctx context.Context, + apiReader client.Reader, + res acktypes.AWSResource, +) (acktypes.AWSResource, bool, error) { + return res, false, nil +} + +// validateReferenceFields validates the reference field and corresponding +// identifier field. +func validateReferenceFields(ko *svcapitypes.InferenceComponent) error { + return nil +} diff --git a/pkg/resource/inference_component/resource.go b/pkg/resource/inference_component/resource.go new file mode 100644 index 00000000..2a9a473e --- /dev/null +++ b/pkg/resource/inference_component/resource.go @@ -0,0 +1,100 @@ +// Copyright Amazon.com Inc. or its affiliates. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"). You may +// not use this file except in compliance with the License. A copy of the +// License is located at +// +// http://aws.amazon.com/apache2.0/ +// +// or in the "license" file accompanying this file. This file is distributed +// on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either +// express or implied. See the License for the specific language governing +// permissions and limitations under the License. + +// Code generated by ack-generate. DO NOT EDIT. + +package inference_component + +import ( + ackv1alpha1 "github.com/aws-controllers-k8s/runtime/apis/core/v1alpha1" + ackerrors "github.com/aws-controllers-k8s/runtime/pkg/errors" + acktypes "github.com/aws-controllers-k8s/runtime/pkg/types" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + rtclient "sigs.k8s.io/controller-runtime/pkg/client" + + svcapitypes "github.com/aws-controllers-k8s/sagemaker-controller/apis/v1alpha1" +) + +// Hack to avoid import errors during build... +var ( + _ = &ackerrors.MissingNameIdentifier +) + +// resource implements the `aws-controller-k8s/runtime/pkg/types.AWSResource` +// interface +type resource struct { + // The Kubernetes-native CR representing the resource + ko *svcapitypes.InferenceComponent +} + +// Identifiers returns an AWSResourceIdentifiers object containing various +// identifying information, including the AWS account ID that owns the +// resource, the resource's AWS Resource Name (ARN) +func (r *resource) Identifiers() acktypes.AWSResourceIdentifiers { + return &resourceIdentifiers{r.ko.Status.ACKResourceMetadata} +} + +// IsBeingDeleted returns true if the Kubernetes resource has a non-zero +// deletion timestamp +func (r *resource) IsBeingDeleted() bool { + return !r.ko.DeletionTimestamp.IsZero() +} + +// RuntimeObject returns the Kubernetes apimachinery/runtime representation of +// the AWSResource +func (r *resource) RuntimeObject() rtclient.Object { + return r.ko +} + +// MetaObject returns the Kubernetes apimachinery/apis/meta/v1.Object +// representation of the AWSResource +func (r *resource) MetaObject() metav1.Object { + return r.ko.GetObjectMeta() +} + +// Conditions returns the ACK Conditions collection for the AWSResource +func (r *resource) Conditions() []*ackv1alpha1.Condition { + return r.ko.Status.Conditions +} + +// ReplaceConditions sets the Conditions status field for the resource +func (r *resource) ReplaceConditions(conditions []*ackv1alpha1.Condition) { + r.ko.Status.Conditions = conditions +} + +// SetObjectMeta sets the ObjectMeta field for the resource +func (r *resource) SetObjectMeta(meta metav1.ObjectMeta) { + r.ko.ObjectMeta = meta +} + +// SetStatus will set the Status field for the resource +func (r *resource) SetStatus(desired acktypes.AWSResource) { + r.ko.Status = desired.(*resource).ko.Status +} + +// SetIdentifiers sets the Spec or Status field that is referenced as the unique +// resource identifier +func (r *resource) SetIdentifiers(identifier *ackv1alpha1.AWSIdentifiers) error { + if identifier.NameOrID == "" { + return ackerrors.MissingNameIdentifier + } + r.ko.Spec.InferenceComponentName = &identifier.NameOrID + + return nil +} + +// DeepCopy will return a copy of the resource +func (r *resource) DeepCopy() acktypes.AWSResource { + koCopy := r.ko.DeepCopy() + return &resource{koCopy} +} diff --git a/pkg/resource/inference_component/sdk.go b/pkg/resource/inference_component/sdk.go new file mode 100644 index 00000000..e248eae9 --- /dev/null +++ b/pkg/resource/inference_component/sdk.go @@ -0,0 +1,661 @@ +// Copyright Amazon.com Inc. or its affiliates. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"). You may +// not use this file except in compliance with the License. A copy of the +// License is located at +// +// http://aws.amazon.com/apache2.0/ +// +// or in the "license" file accompanying this file. This file is distributed +// on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either +// express or implied. See the License for the specific language governing +// permissions and limitations under the License. + +// Code generated by ack-generate. DO NOT EDIT. + +package inference_component + +import ( + "context" + "errors" + "fmt" + "reflect" + "strings" + + ackv1alpha1 "github.com/aws-controllers-k8s/runtime/apis/core/v1alpha1" + ackcompare "github.com/aws-controllers-k8s/runtime/pkg/compare" + ackcondition "github.com/aws-controllers-k8s/runtime/pkg/condition" + ackerr "github.com/aws-controllers-k8s/runtime/pkg/errors" + ackrequeue "github.com/aws-controllers-k8s/runtime/pkg/requeue" + ackrtlog "github.com/aws-controllers-k8s/runtime/pkg/runtime/log" + "github.com/aws/aws-sdk-go/aws" + svcsdk "github.com/aws/aws-sdk-go/service/sagemaker" + corev1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + + svcapitypes "github.com/aws-controllers-k8s/sagemaker-controller/apis/v1alpha1" +) + +// Hack to avoid import errors during build... +var ( + _ = &metav1.Time{} + _ = strings.ToLower("") + _ = &aws.JSONValue{} + _ = &svcsdk.SageMaker{} + _ = &svcapitypes.InferenceComponent{} + _ = ackv1alpha1.AWSAccountID("") + _ = &ackerr.NotFound + _ = &ackcondition.NotManagedMessage + _ = &reflect.Value{} + _ = fmt.Sprintf("") + _ = &ackrequeue.NoRequeue{} +) + +// sdkFind returns SDK-specific information about a supplied resource +func (rm *resourceManager) sdkFind( + ctx context.Context, + r *resource, +) (latest *resource, err error) { + rlog := ackrtlog.FromContext(ctx) + exit := rlog.Trace("rm.sdkFind") + defer func() { + exit(err) + }() + // If any required fields in the input shape are missing, AWS resource is + // not created yet. Return NotFound here to indicate to callers that the + // resource isn't yet created. + if rm.requiredFieldsMissingFromReadOneInput(r) { + return nil, ackerr.NotFound + } + + input, err := rm.newDescribeRequestPayload(r) + if err != nil { + return nil, err + } + + var resp *svcsdk.DescribeInferenceComponentOutput + resp, err = rm.sdkapi.DescribeInferenceComponentWithContext(ctx, input) + rm.metrics.RecordAPICall("READ_ONE", "DescribeInferenceComponent", err) + if err != nil { + if reqErr, ok := ackerr.AWSRequestFailure(err); ok && reqErr.StatusCode() == 404 { + return nil, ackerr.NotFound + } + if awsErr, ok := ackerr.AWSError(err); ok && awsErr.Code() == "ValidationException" && strings.HasPrefix(awsErr.Message(), "Could not find inference component") { + return nil, ackerr.NotFound + } + return nil, err + } + + // Merge in the information we read from the API call above to the copy of + // the original Kubernetes object we passed to the function + ko := r.ko.DeepCopy() + + if resp.CreationTime != nil { + ko.Status.CreationTime = &metav1.Time{*resp.CreationTime} + } else { + ko.Status.CreationTime = nil + } + if resp.EndpointName != nil { + ko.Spec.EndpointName = resp.EndpointName + } else { + ko.Spec.EndpointName = nil + } + if resp.FailureReason != nil { + ko.Status.FailureReason = resp.FailureReason + } else { + ko.Status.FailureReason = nil + } + if ko.Status.ACKResourceMetadata == nil { + ko.Status.ACKResourceMetadata = &ackv1alpha1.ResourceMetadata{} + } + if resp.InferenceComponentArn != nil { + arn := ackv1alpha1.AWSResourceName(*resp.InferenceComponentArn) + ko.Status.ACKResourceMetadata.ARN = &arn + } + if resp.InferenceComponentName != nil { + ko.Spec.InferenceComponentName = resp.InferenceComponentName + } else { + ko.Spec.InferenceComponentName = nil + } + if resp.InferenceComponentStatus != nil { + ko.Status.InferenceComponentStatus = resp.InferenceComponentStatus + } else { + ko.Status.InferenceComponentStatus = nil + } + if resp.LastModifiedTime != nil { + ko.Status.LastModifiedTime = &metav1.Time{*resp.LastModifiedTime} + } else { + ko.Status.LastModifiedTime = nil + } + if resp.RuntimeConfig != nil { + f8 := &svcapitypes.InferenceComponentRuntimeConfig{} + ko.Spec.RuntimeConfig = f8 + } else { + ko.Spec.RuntimeConfig = nil + } + if resp.Specification != nil { + f9 := &svcapitypes.InferenceComponentSpecification{} + if resp.Specification.ComputeResourceRequirements != nil { + f9f0 := &svcapitypes.InferenceComponentComputeResourceRequirements{} + if resp.Specification.ComputeResourceRequirements.MaxMemoryRequiredInMb != nil { + f9f0.MaxMemoryRequiredInMb = resp.Specification.ComputeResourceRequirements.MaxMemoryRequiredInMb + } + if resp.Specification.ComputeResourceRequirements.MinMemoryRequiredInMb != nil { + f9f0.MinMemoryRequiredInMb = resp.Specification.ComputeResourceRequirements.MinMemoryRequiredInMb + } + if resp.Specification.ComputeResourceRequirements.NumberOfAcceleratorDevicesRequired != nil { + f9f0.NumberOfAcceleratorDevicesRequired = resp.Specification.ComputeResourceRequirements.NumberOfAcceleratorDevicesRequired + } + if resp.Specification.ComputeResourceRequirements.NumberOfCpuCoresRequired != nil { + f9f0.NumberOfCPUCoresRequired = resp.Specification.ComputeResourceRequirements.NumberOfCpuCoresRequired + } + f9.ComputeResourceRequirements = f9f0 + } + if resp.Specification.Container != nil { + f9f1 := &svcapitypes.InferenceComponentContainerSpecification{} + if resp.Specification.Container.ArtifactUrl != nil { + f9f1.ArtifactURL = resp.Specification.Container.ArtifactUrl + } + if resp.Specification.Container.Environment != nil { + f9f1f2 := map[string]*string{} + for f9f1f2key, f9f1f2valiter := range resp.Specification.Container.Environment { + var f9f1f2val string + f9f1f2val = *f9f1f2valiter + f9f1f2[f9f1f2key] = &f9f1f2val + } + f9f1.Environment = f9f1f2 + } + f9.Container = f9f1 + } + if resp.Specification.ModelName != nil { + f9.ModelName = resp.Specification.ModelName + } + if resp.Specification.StartupParameters != nil { + f9f3 := &svcapitypes.InferenceComponentStartupParameters{} + if resp.Specification.StartupParameters.ContainerStartupHealthCheckTimeoutInSeconds != nil { + f9f3.ContainerStartupHealthCheckTimeoutInSeconds = resp.Specification.StartupParameters.ContainerStartupHealthCheckTimeoutInSeconds + } + if resp.Specification.StartupParameters.ModelDataDownloadTimeoutInSeconds != nil { + f9f3.ModelDataDownloadTimeoutInSeconds = resp.Specification.StartupParameters.ModelDataDownloadTimeoutInSeconds + } + f9.StartupParameters = f9f3 + } + ko.Spec.Specification = f9 + } else { + ko.Spec.Specification = nil + } + if resp.VariantName != nil { + ko.Spec.VariantName = resp.VariantName + } else { + ko.Spec.VariantName = nil + } + + rm.setStatusDefaults(ko) + // Manually set the RuntimeConfig.CopyCount from read response RuntimeConfig.DesiredCopyCount + if resp.RuntimeConfig != nil && ko.Spec.RuntimeConfig != nil { + ko.Spec.RuntimeConfig.CopyCount = resp.RuntimeConfig.DesiredCopyCount + } + + rm.customDescribeInferenceComponentSetOutput(ko) + + return &resource{ko}, nil +} + +// requiredFieldsMissingFromReadOneInput returns true if there are any fields +// for the ReadOne Input shape that are required but not present in the +// resource's Spec or Status +func (rm *resourceManager) requiredFieldsMissingFromReadOneInput( + r *resource, +) bool { + return r.ko.Spec.InferenceComponentName == nil + +} + +// newDescribeRequestPayload returns SDK-specific struct for the HTTP request +// payload of the Describe API call for the resource +func (rm *resourceManager) newDescribeRequestPayload( + r *resource, +) (*svcsdk.DescribeInferenceComponentInput, error) { + res := &svcsdk.DescribeInferenceComponentInput{} + + if r.ko.Spec.InferenceComponentName != nil { + res.SetInferenceComponentName(*r.ko.Spec.InferenceComponentName) + } + + return res, nil +} + +// sdkCreate creates the supplied resource in the backend AWS service API and +// returns a copy of the resource with resource fields (in both Spec and +// Status) filled in with values from the CREATE API operation's Output shape. +func (rm *resourceManager) sdkCreate( + ctx context.Context, + desired *resource, +) (created *resource, err error) { + rlog := ackrtlog.FromContext(ctx) + exit := rlog.Trace("rm.sdkCreate") + defer func() { + exit(err) + }() + input, err := rm.newCreateRequestPayload(ctx, desired) + if err != nil { + return nil, err + } + + var resp *svcsdk.CreateInferenceComponentOutput + _ = resp + resp, err = rm.sdkapi.CreateInferenceComponentWithContext(ctx, input) + rm.metrics.RecordAPICall("CREATE", "CreateInferenceComponent", err) + if err != nil { + return nil, err + } + // Merge in the information we read from the API call above to the copy of + // the original Kubernetes object we passed to the function + ko := desired.ko.DeepCopy() + + if ko.Status.ACKResourceMetadata == nil { + ko.Status.ACKResourceMetadata = &ackv1alpha1.ResourceMetadata{} + } + if resp.InferenceComponentArn != nil { + arn := ackv1alpha1.AWSResourceName(*resp.InferenceComponentArn) + ko.Status.ACKResourceMetadata.ARN = &arn + } + + rm.setStatusDefaults(ko) + return &resource{ko}, nil +} + +// newCreateRequestPayload returns an SDK-specific struct for the HTTP request +// payload of the Create API call for the resource +func (rm *resourceManager) newCreateRequestPayload( + ctx context.Context, + r *resource, +) (*svcsdk.CreateInferenceComponentInput, error) { + res := &svcsdk.CreateInferenceComponentInput{} + + if r.ko.Spec.EndpointName != nil { + res.SetEndpointName(*r.ko.Spec.EndpointName) + } + if r.ko.Spec.InferenceComponentName != nil { + res.SetInferenceComponentName(*r.ko.Spec.InferenceComponentName) + } + if r.ko.Spec.RuntimeConfig != nil { + f2 := &svcsdk.InferenceComponentRuntimeConfig{} + if r.ko.Spec.RuntimeConfig.CopyCount != nil { + f2.SetCopyCount(*r.ko.Spec.RuntimeConfig.CopyCount) + } + res.SetRuntimeConfig(f2) + } + if r.ko.Spec.Specification != nil { + f3 := &svcsdk.InferenceComponentSpecification{} + if r.ko.Spec.Specification.ComputeResourceRequirements != nil { + f3f0 := &svcsdk.InferenceComponentComputeResourceRequirements{} + if r.ko.Spec.Specification.ComputeResourceRequirements.MaxMemoryRequiredInMb != nil { + f3f0.SetMaxMemoryRequiredInMb(*r.ko.Spec.Specification.ComputeResourceRequirements.MaxMemoryRequiredInMb) + } + if r.ko.Spec.Specification.ComputeResourceRequirements.MinMemoryRequiredInMb != nil { + f3f0.SetMinMemoryRequiredInMb(*r.ko.Spec.Specification.ComputeResourceRequirements.MinMemoryRequiredInMb) + } + if r.ko.Spec.Specification.ComputeResourceRequirements.NumberOfAcceleratorDevicesRequired != nil { + f3f0.SetNumberOfAcceleratorDevicesRequired(*r.ko.Spec.Specification.ComputeResourceRequirements.NumberOfAcceleratorDevicesRequired) + } + if r.ko.Spec.Specification.ComputeResourceRequirements.NumberOfCPUCoresRequired != nil { + f3f0.SetNumberOfCpuCoresRequired(*r.ko.Spec.Specification.ComputeResourceRequirements.NumberOfCPUCoresRequired) + } + f3.SetComputeResourceRequirements(f3f0) + } + if r.ko.Spec.Specification.Container != nil { + f3f1 := &svcsdk.InferenceComponentContainerSpecification{} + if r.ko.Spec.Specification.Container.ArtifactURL != nil { + f3f1.SetArtifactUrl(*r.ko.Spec.Specification.Container.ArtifactURL) + } + if r.ko.Spec.Specification.Container.Environment != nil { + f3f1f1 := map[string]*string{} + for f3f1f1key, f3f1f1valiter := range r.ko.Spec.Specification.Container.Environment { + var f3f1f1val string + f3f1f1val = *f3f1f1valiter + f3f1f1[f3f1f1key] = &f3f1f1val + } + f3f1.SetEnvironment(f3f1f1) + } + if r.ko.Spec.Specification.Container.Image != nil { + f3f1.SetImage(*r.ko.Spec.Specification.Container.Image) + } + f3.SetContainer(f3f1) + } + if r.ko.Spec.Specification.ModelName != nil { + f3.SetModelName(*r.ko.Spec.Specification.ModelName) + } + if r.ko.Spec.Specification.StartupParameters != nil { + f3f3 := &svcsdk.InferenceComponentStartupParameters{} + if r.ko.Spec.Specification.StartupParameters.ContainerStartupHealthCheckTimeoutInSeconds != nil { + f3f3.SetContainerStartupHealthCheckTimeoutInSeconds(*r.ko.Spec.Specification.StartupParameters.ContainerStartupHealthCheckTimeoutInSeconds) + } + if r.ko.Spec.Specification.StartupParameters.ModelDataDownloadTimeoutInSeconds != nil { + f3f3.SetModelDataDownloadTimeoutInSeconds(*r.ko.Spec.Specification.StartupParameters.ModelDataDownloadTimeoutInSeconds) + } + f3.SetStartupParameters(f3f3) + } + res.SetSpecification(f3) + } + if r.ko.Spec.Tags != nil { + f4 := []*svcsdk.Tag{} + for _, f4iter := range r.ko.Spec.Tags { + f4elem := &svcsdk.Tag{} + if f4iter.Key != nil { + f4elem.SetKey(*f4iter.Key) + } + if f4iter.Value != nil { + f4elem.SetValue(*f4iter.Value) + } + f4 = append(f4, f4elem) + } + res.SetTags(f4) + } + if r.ko.Spec.VariantName != nil { + res.SetVariantName(*r.ko.Spec.VariantName) + } + + return res, nil +} + +// sdkUpdate patches the supplied resource in the backend AWS service API and +// returns a new resource with updated fields. +func (rm *resourceManager) sdkUpdate( + ctx context.Context, + desired *resource, + latest *resource, + delta *ackcompare.Delta, +) (updated *resource, err error) { + rlog := ackrtlog.FromContext(ctx) + exit := rlog.Trace("rm.sdkUpdate") + defer func() { + exit(err) + }() + if err = rm.requeueUntilCanModify(ctx, latest); err != nil { + return nil, err + } + + if err = rm.customUpdateInferenceComponentPreChecks(ctx, desired, latest, delta); err != nil { + return nil, err + } + + input, err := rm.newUpdateRequestPayload(ctx, desired, delta) + if err != nil { + return nil, err + } + + var resp *svcsdk.UpdateInferenceComponentOutput + _ = resp + resp, err = rm.sdkapi.UpdateInferenceComponentWithContext(ctx, input) + rm.metrics.RecordAPICall("UPDATE", "UpdateInferenceComponent", err) + if err != nil { + return nil, err + } + // Merge in the information we read from the API call above to the copy of + // the original Kubernetes object we passed to the function + ko := desired.ko.DeepCopy() + + if ko.Status.ACKResourceMetadata == nil { + ko.Status.ACKResourceMetadata = &ackv1alpha1.ResourceMetadata{} + } + if resp.InferenceComponentArn != nil { + arn := ackv1alpha1.AWSResourceName(*resp.InferenceComponentArn) + ko.Status.ACKResourceMetadata.ARN = &arn + } + + rm.setStatusDefaults(ko) + if err = rm.customUpdateInferenceComponentSetOutput(ko); err != nil { + return nil, err + } + return &resource{ko}, nil +} + +// newUpdateRequestPayload returns an SDK-specific struct for the HTTP request +// payload of the Update API call for the resource +func (rm *resourceManager) newUpdateRequestPayload( + ctx context.Context, + r *resource, + delta *ackcompare.Delta, +) (*svcsdk.UpdateInferenceComponentInput, error) { + res := &svcsdk.UpdateInferenceComponentInput{} + + if r.ko.Spec.InferenceComponentName != nil { + res.SetInferenceComponentName(*r.ko.Spec.InferenceComponentName) + } + if r.ko.Spec.RuntimeConfig != nil { + f1 := &svcsdk.InferenceComponentRuntimeConfig{} + if r.ko.Spec.RuntimeConfig.CopyCount != nil { + f1.SetCopyCount(*r.ko.Spec.RuntimeConfig.CopyCount) + } + res.SetRuntimeConfig(f1) + } + if r.ko.Spec.Specification != nil { + f2 := &svcsdk.InferenceComponentSpecification{} + if r.ko.Spec.Specification.ComputeResourceRequirements != nil { + f2f0 := &svcsdk.InferenceComponentComputeResourceRequirements{} + if r.ko.Spec.Specification.ComputeResourceRequirements.MaxMemoryRequiredInMb != nil { + f2f0.SetMaxMemoryRequiredInMb(*r.ko.Spec.Specification.ComputeResourceRequirements.MaxMemoryRequiredInMb) + } + if r.ko.Spec.Specification.ComputeResourceRequirements.MinMemoryRequiredInMb != nil { + f2f0.SetMinMemoryRequiredInMb(*r.ko.Spec.Specification.ComputeResourceRequirements.MinMemoryRequiredInMb) + } + if r.ko.Spec.Specification.ComputeResourceRequirements.NumberOfAcceleratorDevicesRequired != nil { + f2f0.SetNumberOfAcceleratorDevicesRequired(*r.ko.Spec.Specification.ComputeResourceRequirements.NumberOfAcceleratorDevicesRequired) + } + if r.ko.Spec.Specification.ComputeResourceRequirements.NumberOfCPUCoresRequired != nil { + f2f0.SetNumberOfCpuCoresRequired(*r.ko.Spec.Specification.ComputeResourceRequirements.NumberOfCPUCoresRequired) + } + f2.SetComputeResourceRequirements(f2f0) + } + if r.ko.Spec.Specification.Container != nil { + f2f1 := &svcsdk.InferenceComponentContainerSpecification{} + if r.ko.Spec.Specification.Container.ArtifactURL != nil { + f2f1.SetArtifactUrl(*r.ko.Spec.Specification.Container.ArtifactURL) + } + if r.ko.Spec.Specification.Container.Environment != nil { + f2f1f1 := map[string]*string{} + for f2f1f1key, f2f1f1valiter := range r.ko.Spec.Specification.Container.Environment { + var f2f1f1val string + f2f1f1val = *f2f1f1valiter + f2f1f1[f2f1f1key] = &f2f1f1val + } + f2f1.SetEnvironment(f2f1f1) + } + if r.ko.Spec.Specification.Container.Image != nil { + f2f1.SetImage(*r.ko.Spec.Specification.Container.Image) + } + f2.SetContainer(f2f1) + } + if r.ko.Spec.Specification.ModelName != nil { + f2.SetModelName(*r.ko.Spec.Specification.ModelName) + } + if r.ko.Spec.Specification.StartupParameters != nil { + f2f3 := &svcsdk.InferenceComponentStartupParameters{} + if r.ko.Spec.Specification.StartupParameters.ContainerStartupHealthCheckTimeoutInSeconds != nil { + f2f3.SetContainerStartupHealthCheckTimeoutInSeconds(*r.ko.Spec.Specification.StartupParameters.ContainerStartupHealthCheckTimeoutInSeconds) + } + if r.ko.Spec.Specification.StartupParameters.ModelDataDownloadTimeoutInSeconds != nil { + f2f3.SetModelDataDownloadTimeoutInSeconds(*r.ko.Spec.Specification.StartupParameters.ModelDataDownloadTimeoutInSeconds) + } + f2.SetStartupParameters(f2f3) + } + res.SetSpecification(f2) + } + + return res, nil +} + +// sdkDelete deletes the supplied resource in the backend AWS service API +func (rm *resourceManager) sdkDelete( + ctx context.Context, + r *resource, +) (latest *resource, err error) { + rlog := ackrtlog.FromContext(ctx) + exit := rlog.Trace("rm.sdkDelete") + defer func() { + exit(err) + }() + if err = rm.requeueUntilCanModify(ctx, r); err != nil { + return r, err + } + + input, err := rm.newDeleteRequestPayload(r) + if err != nil { + return nil, err + } + var resp *svcsdk.DeleteInferenceComponentOutput + _ = resp + resp, err = rm.sdkapi.DeleteInferenceComponentWithContext(ctx, input) + rm.metrics.RecordAPICall("DELETE", "DeleteInferenceComponent", err) + + if err == nil { + if observed, err := rm.sdkFind(ctx, r); err != ackerr.NotFound { + if err != nil { + return nil, err + } + r.SetStatus(observed) + return r, requeueWaitWhileDeleting + } + } + + return nil, err +} + +// newDeleteRequestPayload returns an SDK-specific struct for the HTTP request +// payload of the Delete API call for the resource +func (rm *resourceManager) newDeleteRequestPayload( + r *resource, +) (*svcsdk.DeleteInferenceComponentInput, error) { + res := &svcsdk.DeleteInferenceComponentInput{} + + if r.ko.Spec.InferenceComponentName != nil { + res.SetInferenceComponentName(*r.ko.Spec.InferenceComponentName) + } + + return res, nil +} + +// setStatusDefaults sets default properties into supplied custom resource +func (rm *resourceManager) setStatusDefaults( + ko *svcapitypes.InferenceComponent, +) { + if ko.Status.ACKResourceMetadata == nil { + ko.Status.ACKResourceMetadata = &ackv1alpha1.ResourceMetadata{} + } + if ko.Status.ACKResourceMetadata.Region == nil { + ko.Status.ACKResourceMetadata.Region = &rm.awsRegion + } + if ko.Status.ACKResourceMetadata.OwnerAccountID == nil { + ko.Status.ACKResourceMetadata.OwnerAccountID = &rm.awsAccountID + } + if ko.Status.Conditions == nil { + ko.Status.Conditions = []*ackv1alpha1.Condition{} + } +} + +// updateConditions returns updated resource, true; if conditions were updated +// else it returns nil, false +func (rm *resourceManager) updateConditions( + r *resource, + onSuccess bool, + err error, +) (*resource, bool) { + ko := r.ko.DeepCopy() + rm.setStatusDefaults(ko) + + // Terminal condition + var terminalCondition *ackv1alpha1.Condition = nil + var recoverableCondition *ackv1alpha1.Condition = nil + var syncCondition *ackv1alpha1.Condition = nil + for _, condition := range ko.Status.Conditions { + if condition.Type == ackv1alpha1.ConditionTypeTerminal { + terminalCondition = condition + } + if condition.Type == ackv1alpha1.ConditionTypeRecoverable { + recoverableCondition = condition + } + if condition.Type == ackv1alpha1.ConditionTypeResourceSynced { + syncCondition = condition + } + } + var termError *ackerr.TerminalError + if rm.terminalAWSError(err) || err == ackerr.SecretTypeNotSupported || err == ackerr.SecretNotFound || errors.As(err, &termError) { + if terminalCondition == nil { + terminalCondition = &ackv1alpha1.Condition{ + Type: ackv1alpha1.ConditionTypeTerminal, + } + ko.Status.Conditions = append(ko.Status.Conditions, terminalCondition) + } + var errorMessage = "" + if err == ackerr.SecretTypeNotSupported || err == ackerr.SecretNotFound || errors.As(err, &termError) { + errorMessage = err.Error() + } else { + awsErr, _ := ackerr.AWSError(err) + errorMessage = awsErr.Error() + } + terminalCondition.Status = corev1.ConditionTrue + terminalCondition.Message = &errorMessage + } else { + // Clear the terminal condition if no longer present + if terminalCondition != nil { + terminalCondition.Status = corev1.ConditionFalse + terminalCondition.Message = nil + } + // Handling Recoverable Conditions + if err != nil { + if recoverableCondition == nil { + // Add a new Condition containing a non-terminal error + recoverableCondition = &ackv1alpha1.Condition{ + Type: ackv1alpha1.ConditionTypeRecoverable, + } + ko.Status.Conditions = append(ko.Status.Conditions, recoverableCondition) + } + recoverableCondition.Status = corev1.ConditionTrue + awsErr, _ := ackerr.AWSError(err) + errorMessage := err.Error() + if awsErr != nil { + errorMessage = awsErr.Error() + } + recoverableCondition.Message = &errorMessage + } else if recoverableCondition != nil { + recoverableCondition.Status = corev1.ConditionFalse + recoverableCondition.Message = nil + } + } + if syncCondition == nil && onSuccess { + syncCondition = &ackv1alpha1.Condition{ + Type: ackv1alpha1.ConditionTypeResourceSynced, + Status: corev1.ConditionTrue, + } + ko.Status.Conditions = append(ko.Status.Conditions, syncCondition) + } + // custom update conditions + customUpdate := rm.CustomUpdateConditions(ko, r, err) + if terminalCondition != nil || recoverableCondition != nil || syncCondition != nil || customUpdate { + return &resource{ko}, true // updated + } + return nil, false // not updated +} + +// terminalAWSError returns awserr, true; if the supplied error is an aws Error type +// and if the exception indicates that it is a Terminal exception +// 'Terminal' exception are specified in generator configuration +func (rm *resourceManager) terminalAWSError(err error) bool { + if err == nil { + return false + } + awsErr, ok := ackerr.AWSError(err) + if !ok { + return false + } + switch awsErr.Code() { + case "InvalidParameterCombination", + "InvalidParameterValue", + "MissingParameter", + "InferenceComponentUpdateError": + return true + default: + return false + } +} diff --git a/pkg/resource/inference_component/tags.go b/pkg/resource/inference_component/tags.go new file mode 100644 index 00000000..3a26853d --- /dev/null +++ b/pkg/resource/inference_component/tags.go @@ -0,0 +1,63 @@ +// Copyright Amazon.com Inc. or its affiliates. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"). You may +// not use this file except in compliance with the License. A copy of the +// License is located at +// +// http://aws.amazon.com/apache2.0/ +// +// or in the "license" file accompanying this file. This file is distributed +// on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either +// express or implied. See the License for the specific language governing +// permissions and limitations under the License. + +// Code generated by ack-generate. DO NOT EDIT. + +package inference_component + +import ( + acktags "github.com/aws-controllers-k8s/runtime/pkg/tags" + + svcapitypes "github.com/aws-controllers-k8s/sagemaker-controller/apis/v1alpha1" +) + +var ( + _ = svcapitypes.InferenceComponent{} + _ = acktags.NewTags() +) + +// ToACKTags converts the tags parameter into 'acktags.Tags' shape. +// This method helps in creating the hub(acktags.Tags) for merging +// default controller tags with existing resource tags. +func ToACKTags(tags []*svcapitypes.Tag) acktags.Tags { + result := acktags.NewTags() + if tags == nil || len(tags) == 0 { + return result + } + + for _, t := range tags { + if t.Key != nil { + if t.Value == nil { + result[*t.Key] = "" + } else { + result[*t.Key] = *t.Value + } + } + } + + return result +} + +// FromACKTags converts the tags parameter into []*svcapitypes.Tag shape. +// This method helps in setting the tags back inside AWSResource after merging +// default controller tags with existing resource tags. +func FromACKTags(tags acktags.Tags) []*svcapitypes.Tag { + result := []*svcapitypes.Tag{} + for k, v := range tags { + kCopy := k + vCopy := v + tag := svcapitypes.Tag{Key: &kCopy, Value: &vCopy} + result = append(result, &tag) + } + return result +} diff --git a/pkg/resource/inference_component/testdata/sdkapi/create/create_success.json b/pkg/resource/inference_component/testdata/sdkapi/create/create_success.json new file mode 100644 index 00000000..fc0eb836 --- /dev/null +++ b/pkg/resource/inference_component/testdata/sdkapi/create/create_success.json @@ -0,0 +1,3 @@ +{ + "InferenceComponentArn": "arn:aws:sagemaker:us-west-2:123456789012:inference-component/xgboost-inference-component" +} \ No newline at end of file diff --git a/pkg/resource/inference_component/testdata/sdkapi/describe/creating_after_create.json b/pkg/resource/inference_component/testdata/sdkapi/describe/creating_after_create.json new file mode 100644 index 00000000..2a626040 --- /dev/null +++ b/pkg/resource/inference_component/testdata/sdkapi/describe/creating_after_create.json @@ -0,0 +1,23 @@ +{ + "CreationTime": "0001-01-01T00:00:00.109Z", + "InferenceComponentArn": "arn:aws:sagemaker:us-west-2:123456789012:inference-component/xgboost-inference-component", + "InferenceComponentName": "xgboost-inference-component", + "EndpointArn": "arn:aws:sagemaker:us-west-2:123456789012:endpoint/xgboost-endpoint", + "EndpointName": "xgboost-endpoint", + "VariantName": "variant-1", + "Specification": { + "ModelName": "model-1", + "ComputeResourceRequirements": { + "NumberOfCpuCoresRequired": 4.0, + "NumberOfAcceleratorDevicesRequired": 4.0, + "MinMemoryRequiredInMb": 1024 + } + }, + "RuntimeConfig": { + "DesiredCopyCount": 1, + "CurrentCopyCount": 1 + }, + "LastModifiedTime": "0001-01-01T00:00:00.109Z", + "InferenceComponentStatus": "Creating" +} + \ No newline at end of file diff --git a/pkg/resource/inference_component/testdata/sdkapi/describe/deleting.json b/pkg/resource/inference_component/testdata/sdkapi/describe/deleting.json new file mode 100644 index 00000000..c39fb6a8 --- /dev/null +++ b/pkg/resource/inference_component/testdata/sdkapi/describe/deleting.json @@ -0,0 +1,23 @@ +{ + "CreationTime": "0001-01-01T00:00:00.109Z", + "InferenceComponentArn": "arn:aws:sagemaker:us-west-2:123456789012:inference-component/xgboost-inference-component", + "InferenceComponentName": "xgboost-inference-component", + "EndpointArn": "arn:aws:sagemaker:us-west-2:123456789012:endpoint/xgboost-endpoint", + "EndpointName": "xgboost-endpoint", + "VariantName": "variant-1", + "Specification": { + "ModelName": "model-1", + "ComputeResourceRequirements": { + "NumberOfCpuCoresRequired": 4.0, + "NumberOfAcceleratorDevicesRequired": 4.0, + "MinMemoryRequiredInMb": 1024 + } + }, + "RuntimeConfig": { + "DesiredCopyCount": 1, + "CurrentCopyCount": 1 + }, + "LastModifiedTime": "0001-01-01T00:00:00.109Z", + "InferenceComponentStatus": "Deleting" +} + \ No newline at end of file diff --git a/pkg/resource/inference_component/testdata/sdkapi/describe/failed_after_create.json b/pkg/resource/inference_component/testdata/sdkapi/describe/failed_after_create.json new file mode 100644 index 00000000..0c888a20 --- /dev/null +++ b/pkg/resource/inference_component/testdata/sdkapi/describe/failed_after_create.json @@ -0,0 +1,24 @@ +{ + "CreationTime": "0001-01-01T00:00:00.109Z", + "InferenceComponentArn": "arn:aws:sagemaker:us-west-2:123456789012:inference-component/xgboost-inference-component", + "InferenceComponentName": "xgboost-inference-component", + "EndpointArn": "arn:aws:sagemaker:us-west-2:123456789012:endpoint/xgboost-endpoint", + "EndpointName": "xgboost-endpoint", + "VariantName": "variant-1", + "Specification": { + "ModelName": "model-1", + "ComputeResourceRequirements": { + "NumberOfCpuCoresRequired": 4.0, + "NumberOfAcceleratorDevicesRequired": 4.0, + "MinMemoryRequiredInMb": 1024 + } + }, + "RuntimeConfig": { + "DesiredCopyCount": 1, + "CurrentCopyCount": 1 + }, + "LastModifiedTime": "0001-01-01T00:00:00.109Z", + "InferenceComponentStatus": "Failed", + "FailureReason": "Random failure - Failed to download the model from the container URL." +} + \ No newline at end of file diff --git a/pkg/resource/inference_component/testdata/sdkapi/describe/inservice_no_failure_reason.json b/pkg/resource/inference_component/testdata/sdkapi/describe/inservice_no_failure_reason.json new file mode 100644 index 00000000..f5d1af9b --- /dev/null +++ b/pkg/resource/inference_component/testdata/sdkapi/describe/inservice_no_failure_reason.json @@ -0,0 +1,23 @@ +{ + "CreationTime": "0001-01-01T00:00:00.109Z", + "InferenceComponentArn": "arn:aws:sagemaker:us-west-2:123456789012:inference-component/xgboost-inference-component", + "InferenceComponentName": "xgboost-inference-component", + "EndpointArn": "arn:aws:sagemaker:us-west-2:123456789012:endpoint/xgboost-endpoint", + "EndpointName": "xgboost-endpoint", + "VariantName": "variant-1", + "Specification": { + "ModelName": "model-1", + "ComputeResourceRequirements": { + "NumberOfCpuCoresRequired": 4.0, + "NumberOfAcceleratorDevicesRequired": 4.0, + "MinMemoryRequiredInMb": 1024 + } + }, + "RuntimeConfig": { + "DesiredCopyCount": 1, + "CurrentCopyCount": 1 + }, + "LastModifiedTime": "0001-01-01T00:00:00.109Z", + "InferenceComponentStatus": "InService" +} + \ No newline at end of file diff --git a/pkg/resource/inference_component/testdata/sdkapi/describe/updating.json b/pkg/resource/inference_component/testdata/sdkapi/describe/updating.json new file mode 100644 index 00000000..d5360745 --- /dev/null +++ b/pkg/resource/inference_component/testdata/sdkapi/describe/updating.json @@ -0,0 +1,22 @@ +{ + "CreationTime": "0001-01-01T00:00:00.109Z", + "InferenceComponentArn": "arn:aws:sagemaker:us-west-2:123456789012:inference-component/xgboost-inference-component", + "InferenceComponentName": "xgboost-inference-component", + "EndpointArn": "arn:aws:sagemaker:us-west-2:123456789012:endpoint/xgboost-endpoint", + "EndpointName": "xgboost-endpoint", + "VariantName": "variant-1", + "Specification": { + "ModelName": "model-1", + "ComputeResourceRequirements": { + "NumberOfCpuCoresRequired": 4.0, + "NumberOfAcceleratorDevicesRequired": 4.0, + "MinMemoryRequiredInMb": 1024 + } + }, + "RuntimeConfig": { + "DesiredCopyCount": 1, + "CurrentCopyCount": 1 + }, + "LastModifiedTime": "0001-01-01T00:00:00.109Z", + "InferenceComponentStatus": "Updating" +} \ No newline at end of file diff --git a/pkg/resource/inference_component/testdata/sdkapi/update/update_success.json b/pkg/resource/inference_component/testdata/sdkapi/update/update_success.json new file mode 100644 index 00000000..b6b39997 --- /dev/null +++ b/pkg/resource/inference_component/testdata/sdkapi/update/update_success.json @@ -0,0 +1,3 @@ +{ + "InferenceComponentArn": "arn:aws:sagemaker:us-west-2:123456789012:inference-component/xgboost-inference-component" +} \ No newline at end of file diff --git a/pkg/resource/inference_component/testdata/test_suite.yaml b/pkg/resource/inference_component/testdata/test_suite.yaml new file mode 100644 index 00000000..fbdb2be2 --- /dev/null +++ b/pkg/resource/inference_component/testdata/test_suite.yaml @@ -0,0 +1,195 @@ +tests: + - name: "Inference component create tests" + description: "Testing create operation" + scenarios: + - name: "Create=InvalidInput" + description: "Given one of the parameters is invalid, ko.Status shows a terminal condition" + given: + desired_state: "v1alpha1/create/desired/invalid_before_create.yaml" + svc_api: + - operation: CreateInferenceComponentWithContext + error: + code: InvalidParameterValue + message: "The inference component name must not include a special character." + invoke: Create + expect: + latest_state: "v1alpha1/create/observed/invalid_create_attempted.yaml" + error: resource is in terminal condition + - name: "Create=Valid" + description: "Create a new inference component successfully (ARN in status, inferenceComponentStatus: InProgress)." + given: + desired_state: "v1alpha1/create/desired/success_before_create.yaml" + svc_api: + - operation: CreateInferenceComponentWithContext + output_fixture: "sdkapi/create/create_success.json" + invoke: Create + expect: + latest_state: "v1alpha1/create/observed/success_after_create.yaml" + - name: "Inference component readOne tests" + description: "Testing the readOne operation" + scenarios: + - name: "ReadOne=MissingRequiredField" + description: "Testing readOne when required field is missing. No API call is made and returns error." + given: + desired_state: "v1alpha1/readone/desired/missing_required_field.yaml" + invoke: ReadOne + expect: + error: "resource not found" + - name: "ReadOne=NotFound" + description: "Testing readOne when Describe fails to find the resource on SageMaker" + given: + desired_state: "v1alpha1/readone/desired/right_after_create.yaml" + svc_api: + - operation: DescribeInferenceComponentWithContext + error: + code: ValidationException + message: "Could not find inference component arn:aws:sagemaker:us-west-2:123456789012:endpoint/xgboost-endpoint" + invoke: ReadOne + expect: + error: "resource not found" + - name: "ReadOne=Fail" + description: "This test checks if the condition is updated if describe fails and readOne returns error" + given: + desired_state: "v1alpha1/readone/desired/right_after_create.yaml" + svc_api: + - operation: DescribeInferenceComponentWithContext + error: + code: ServiceUnavailable + message: "Server is down" + invoke: ReadOne + expect: + latest_state: "v1alpha1/readone/observed/error_on_describe.yaml" + error: "ServiceUnavailable: Server is down\n\tstatus code: 0, request id: " + - name: "ReadOne=AfterCreate" + description: "Testing readOne right after create, the status should be in Creating with resource synced being false." + given: + desired_state: "v1alpha1/readone/desired/right_after_create.yaml" + svc_api: + - operation: DescribeInferenceComponentWithContext + output_fixture: "sdkapi/describe/creating_after_create.json" + invoke: ReadOne + expect: + latest_state: "v1alpha1/readone/observed/creating_after_describe.yaml" + - name: "ReadOne=LateInitialize" + description: "Testing late initialize after created, should expect no diff since there is nothing to late initialize" + given: + desired_state: "v1alpha1/readone/observed/creating_after_describe.yaml" + svc_api: + - operation: DescribeInferenceComponentWithContext + output_fixture: "sdkapi/describe/creating_after_create.json" + invoke: LateInitialize + expect: + latest_state: "v1alpha1/readone/observed/creating_after_describe.yaml" + - name: "ReadOne=InService" + description: "Testing readOne when InService, resource synced should be true." + given: + desired_state: "v1alpha1/readone/observed/creating_after_describe.yaml" + svc_api: + - operation: DescribeInferenceComponentWithContext + output_fixture: "sdkapi/describe/inservice_no_failure_reason.json" + invoke: ReadOne + expect: + latest_state: "v1alpha1/readone/observed/inservice_no_failure_after_describe.yaml" + - name: "ReadOne=AfterUpdate" + description: "Testing readOne after update, the status should be in Updating with resource synced being false." + given: + desired_state: "v1alpha1/readone/desired/after_update_apicall_success.yaml" + svc_api: + - operation: DescribeInferenceComponentWithContext + output_fixture: "sdkapi/describe/updating.json" + invoke: ReadOne + expect: + latest_state: "v1alpha1/readone/observed/updating_on_describe.yaml" + - name: "ReadOne=FailedStatus" + description: "Testing how readOne handles the Failed Status, it should set a terminal condition." + given: + desired_state: "v1alpha1/readone/desired/failed_right_after_create.yaml" + svc_api: + - operation: DescribeInferenceComponentWithContext + output_fixture: "sdkapi/describe/failed_after_create.json" + invoke: ReadOne + expect: + latest_state: "v1alpha1/readone/observed/failed_status_on_describe.yaml" +# TODO: +# - name: "ReadOne=UpdateDeploymentConfig" +# description: "Testing how readOne handles the Update with the deployment Config." +# given: +# desired_state: "v1alpha1/readone/desired/updating_deployment.yaml" +# svc_api: +# - operation: DescribeInferenceComponentWithContext +# output_fixture: "sdkapi/describe/update_deployment_config.json" +# invoke: ReadOne +# expect: +# latest_state: "v1alpha1/readone/observed/updating_deployment.yaml" + - name: "Inference Component update tests" + description: "Testing the Update operation" + scenarios: + - name: "Update=StatusUpdating" + description: "This test checks if the Inference Component requeues while in Updating state." + given: + desired_state: "v1alpha1/update/desired/update_common.yaml" + latest_state: "v1alpha1/update/desired/latest_updating.yaml" + invoke: Update + expect: + error: "InferenceComponent in Updating state cannot be modified or deleted." + - name: "Update=Fail" + description: "This test checks if the recoverable condition is updated if update fails and returns error" + given: + desired_state: "v1alpha1/update/desired/update_common.yaml" + latest_state: "v1alpha1/update/desired/latest_inservice_pre_update.yaml" + svc_api: + - operation: UpdateInferenceComponentWithContext + error: + code: ServiceUnavailable + message: "Server is down" + invoke: Update + expect: + latest_state: "v1alpha1/update/observed/error_on_update.yaml" + error: "ServiceUnavailable: Server is down\n\tstatus code: 0, request id: " + - name: "Update=InService" + description: "This test checks if update Inference Component is called" + given: + desired_state: "v1alpha1/update/desired/update_common.yaml" + latest_state: "v1alpha1/update/desired/latest_inservice_pre_update.yaml" + svc_api: + - operation: UpdateInferenceComponentWithContext + output_fixture: "sdkapi/update/update_success.json" + invoke: Update + expect: + latest_state: "v1alpha1/update/observed/update_attempted_success.yaml" + - name: "Inference component delete tests" + description: "Testing the delete operation" + scenarios: + - name: "Delete=RequeueOnCreating" + description: "This test checks if the controller requeues if inference component is in + creating status" + given: + desired_state: "v1alpha1/delete/desired/creating_before_delete.yaml" + invoke: Delete + expect: + error: "InferenceComponent in Creating state cannot be modified or deleted." + - name: "Delete=Fail" + description: "This test checks if the condition is updated if delete fails and returns error" + given: + desired_state: "v1alpha1/delete/desired/inservice_before_delete.yaml" + svc_api: + - operation: DeleteInferenceComponentWithContext + error: + code: ServiceUnavailable + message: "Server is down" + invoke: Delete + expect: + latest_state: "v1alpha1/delete/observed/error_on_delete.yaml" + error: "ServiceUnavailable: Server is down\n\tstatus code: 0, request id: " + - name: "Delete=Successful" + description: "This test checks if the Inference Component is deleted successfully if its InService" + given: + desired_state: "v1alpha1/delete/desired/inservice_before_delete.yaml" + svc_api: + - operation: DeleteInferenceComponentWithContext + - operation: DescribeInferenceComponentWithContext + output_fixture: "sdkapi/describe/deleting.json" + invoke: Delete + expect: + latest_state: "v1alpha1/delete/observed/deleting_no_error.yaml" + error: "InferenceComponent is Deleting." \ No newline at end of file diff --git a/pkg/resource/inference_component/testdata/v1alpha1/create/desired/invalid_before_create.yaml b/pkg/resource/inference_component/testdata/v1alpha1/create/desired/invalid_before_create.yaml new file mode 100644 index 00000000..775641c5 --- /dev/null +++ b/pkg/resource/inference_component/testdata/v1alpha1/create/desired/invalid_before_create.yaml @@ -0,0 +1,23 @@ +apiVersion: sagemaker.services.k8s.aws/v1alpha1 +kind: InferenceComponent +metadata: + name: xgboost-inference-component +spec: + inferenceComponentName: intentionally@invalid-name + endpointName: xgboost-endpoint + variantName: variant-1 + specification: + modelName: model-1 + computeResourceRequirements: + numberOfAcceleratorDevicesRequired: 4 + numberOfCPUCoresRequired: 4 + minMemoryRequiredInMb: 1024 + runtimeConfig: + copyCount: 1 + tags: + - key: confidentiality + value: public + - key: environment + value: testing + - key: customer + value: test-user \ No newline at end of file diff --git a/pkg/resource/inference_component/testdata/v1alpha1/create/desired/success_before_create.yaml b/pkg/resource/inference_component/testdata/v1alpha1/create/desired/success_before_create.yaml new file mode 100644 index 00000000..19d1375f --- /dev/null +++ b/pkg/resource/inference_component/testdata/v1alpha1/create/desired/success_before_create.yaml @@ -0,0 +1,23 @@ +apiVersion: sagemaker.services.k8s.aws/v1alpha1 +kind: InferenceComponent +metadata: + name: xgboost-inference-component +spec: + inferenceComponentName: xgboost-inference-component + endpointName: xgboost-endpoint + variantName: variant-1 + specification: + modelName: model-1 + computeResourceRequirements: + numberOfAcceleratorDevicesRequired: 4 + numberOfCPUCoresRequired: 4 + minMemoryRequiredInMb: 1024 + runtimeConfig: + copyCount: 1 + tags: + - key: confidentiality + value: public + - key: environment + value: testing + - key: customer + value: test-user \ No newline at end of file diff --git a/pkg/resource/inference_component/testdata/v1alpha1/create/observed/invalid_create_attempted.yaml b/pkg/resource/inference_component/testdata/v1alpha1/create/observed/invalid_create_attempted.yaml new file mode 100644 index 00000000..b0ea1366 --- /dev/null +++ b/pkg/resource/inference_component/testdata/v1alpha1/create/observed/invalid_create_attempted.yaml @@ -0,0 +1,32 @@ +apiVersion: sagemaker.services.k8s.aws/v1alpha1 +kind: InferenceComponent +metadata: + name: xgboost-inference-component +spec: + inferenceComponentName: intentionally@invalid-name + endpointName: xgboost-endpoint + variantName: variant-1 + specification: + modelName: model-1 + computeResourceRequirements: + numberOfAcceleratorDevicesRequired: 4 + numberOfCPUCoresRequired: 4 + minMemoryRequiredInMb: 1024 + runtimeConfig: + copyCount: 1 + tags: + - key: confidentiality + value: public + - key: environment + value: testing + - key: customer + value: test-user +status: + ackResourceMetadata: + ownerAccountID: "" + region: "" + conditions: + - message: "InvalidParameterValue: The inference component name must not include a special + character.\n\tstatus code: 0, request id: " + status: "True" + type: ACK.Terminal diff --git a/pkg/resource/inference_component/testdata/v1alpha1/create/observed/success_after_create.yaml b/pkg/resource/inference_component/testdata/v1alpha1/create/observed/success_after_create.yaml new file mode 100644 index 00000000..0b80de95 --- /dev/null +++ b/pkg/resource/inference_component/testdata/v1alpha1/create/observed/success_after_create.yaml @@ -0,0 +1,31 @@ +apiVersion: sagemaker.services.k8s.aws/v1alpha1 +kind: InferenceComponent +metadata: + name: xgboost-inference-component +spec: + inferenceComponentName: xgboost-inference-component + endpointName: xgboost-endpoint + variantName: variant-1 + specification: + modelName: model-1 + computeResourceRequirements: + numberOfAcceleratorDevicesRequired: 4 + numberOfCPUCoresRequired: 4 + minMemoryRequiredInMb: 1024 + runtimeConfig: + copyCount: 1 + tags: + - key: confidentiality + value: public + - key: environment + value: testing + - key: customer + value: test-user +status: + ackResourceMetadata: + arn: arn:aws:sagemaker:us-west-2:123456789012:inference-component/xgboost-inference-component + ownerAccountID: "" + region: "" + conditions: + - status: "True" + type: ACK.ResourceSynced diff --git a/pkg/resource/inference_component/testdata/v1alpha1/delete/desired/creating_before_delete.yaml b/pkg/resource/inference_component/testdata/v1alpha1/delete/desired/creating_before_delete.yaml new file mode 100644 index 00000000..20c3a168 --- /dev/null +++ b/pkg/resource/inference_component/testdata/v1alpha1/delete/desired/creating_before_delete.yaml @@ -0,0 +1,36 @@ +apiVersion: sagemaker.services.k8s.aws/v1alpha1 +kind: InferenceComponent +metadata: + name: xgboost-inference-component +spec: + inferenceComponentName: xgboost-inference-component + endpointName: xgboost-endpoint + variantName: variant-1 + specification: + modelName: model-1 + computeResourceRequirements: + numberOfAcceleratorDevicesRequired: 4 + numberOfCPUCoresRequired: 4 + minMemoryRequiredInMb: 1024 + runtimeConfig: + copyCount: 1 + tags: + - key: confidentiality + value: public + - key: environment + value: testing + - key: customer + value: test-user +status: + ackResourceMetadata: + arn: arn:aws:sagemaker:us-west-2:123456789012:inference-component/xgboost-inference-component + ownerAccountID: "" + region: "" + conditions: + - lastTransitionTime: "0001-01-01T00:00:00Z" + message: InferenceComponent is in Creating status. + status: "False" + type: ACK.ResourceSynced + creationTime: "0001-01-01T00:00:00Z" + inferenceComponentStatus: Creating + lastModifiedTime: "0001-01-01T00:00:00Z" diff --git a/pkg/resource/inference_component/testdata/v1alpha1/delete/desired/inservice_before_delete.yaml b/pkg/resource/inference_component/testdata/v1alpha1/delete/desired/inservice_before_delete.yaml new file mode 100644 index 00000000..00983b44 --- /dev/null +++ b/pkg/resource/inference_component/testdata/v1alpha1/delete/desired/inservice_before_delete.yaml @@ -0,0 +1,36 @@ +apiVersion: sagemaker.services.k8s.aws/v1alpha1 +kind: InferenceComponent +metadata: + name: xgboost-inference-component +spec: + inferenceComponentName: xgboost-inference-component + endpointName: xgboost-endpoint + variantName: variant-1 + specification: + modelName: model-1 + computeResourceRequirements: + numberOfAcceleratorDevicesRequired: 4 + numberOfCPUCoresRequired: 4 + minMemoryRequiredInMb: 1024 + runtimeConfig: + copyCount: 1 + tags: + - key: confidentiality + value: public + - key: environment + value: testing + - key: customer + value: test-user +status: + ackResourceMetadata: + arn: arn:aws:sagemaker:us-west-2:123456789012:inference-component/xgboost-inference-component + ownerAccountID: "" + region: "" + conditions: + - lastTransitionTime: "0001-01-01T00:00:00Z" + message: InferenceComponent is in InService status. + status: "True" + type: ACK.ResourceSynced + creationTime: "0001-01-01T00:00:00Z" + inferenceComponentStatus: InService + lastModifiedTime: "0001-01-01T00:00:00Z" diff --git a/pkg/resource/inference_component/testdata/v1alpha1/delete/observed/deleting_no_error.yaml b/pkg/resource/inference_component/testdata/v1alpha1/delete/observed/deleting_no_error.yaml new file mode 100644 index 00000000..a66f434d --- /dev/null +++ b/pkg/resource/inference_component/testdata/v1alpha1/delete/observed/deleting_no_error.yaml @@ -0,0 +1,39 @@ +apiVersion: sagemaker.services.k8s.aws/v1alpha1 +kind: InferenceComponent +metadata: + name: xgboost-inference-component +spec: + inferenceComponentName: xgboost-inference-component + endpointName: xgboost-endpoint + variantName: variant-1 + specification: + modelName: model-1 + computeResourceRequirements: + numberOfAcceleratorDevicesRequired: 4 + numberOfCPUCoresRequired: 4 + minMemoryRequiredInMb: 1024 + runtimeConfig: + copyCount: 1 + tags: + - key: confidentiality + value: public + - key: environment + value: testing + - key: customer + value: test-user +status: + ackResourceMetadata: + arn: arn:aws:sagemaker:us-west-2:123456789012:inference-component/xgboost-inference-component + ownerAccountID: "" + region: "" + conditions: + - lastTransitionTime: "0001-01-01T00:00:00Z" + message: InferenceComponent is in Deleting status. + status: "False" + type: ACK.ResourceSynced + - message: InferenceComponent is Deleting. + status: "True" + type: ACK.Recoverable + creationTime: "0001-01-01T00:00:00Z" + inferenceComponentStatus: Deleting + lastModifiedTime: "0001-01-01T00:00:00Z" diff --git a/pkg/resource/inference_component/testdata/v1alpha1/delete/observed/error_on_delete.yaml b/pkg/resource/inference_component/testdata/v1alpha1/delete/observed/error_on_delete.yaml new file mode 100644 index 00000000..94394642 --- /dev/null +++ b/pkg/resource/inference_component/testdata/v1alpha1/delete/observed/error_on_delete.yaml @@ -0,0 +1,39 @@ +apiVersion: sagemaker.services.k8s.aws/v1alpha1 +kind: InferenceComponent +metadata: + name: xgboost-inference-component +spec: + inferenceComponentName: xgboost-inference-component + endpointName: xgboost-endpoint + variantName: variant-1 + specification: + modelName: model-1 + computeResourceRequirements: + numberOfAcceleratorDevicesRequired: 4 + numberOfCPUCoresRequired: 4 + minMemoryRequiredInMb: 1024 + runtimeConfig: + copyCount: 1 + tags: + - key: confidentiality + value: public + - key: environment + value: testing + - key: customer + value: test-user +status: + ackResourceMetadata: + arn: arn:aws:sagemaker:us-west-2:123456789012:inference-component/xgboost-inference-component + ownerAccountID: "" + region: "" + conditions: + - message: InferenceComponent is in InService status. + status: "True" + type: ACK.ResourceSynced + - lastTransitionTime: "0001-01-01T00:00:00Z" + message: "ServiceUnavailable: Server is down\n\tstatus code: 0, request id: " + status: "True" + type: ACK.Recoverable + creationTime: "0001-01-01T00:00:00Z" + inferenceComponentStatus: InService + lastModifiedTime: "0001-01-01T00:00:00Z" diff --git a/pkg/resource/inference_component/testdata/v1alpha1/readone/desired/after_update_apicall_success.yaml b/pkg/resource/inference_component/testdata/v1alpha1/readone/desired/after_update_apicall_success.yaml new file mode 100644 index 00000000..6756e9c0 --- /dev/null +++ b/pkg/resource/inference_component/testdata/v1alpha1/readone/desired/after_update_apicall_success.yaml @@ -0,0 +1,36 @@ +apiVersion: sagemaker.services.k8s.aws/v1alpha1 +kind: InferenceComponent +metadata: + name: xgboost-inference-component +spec: + inferenceComponentName: xgboost-inference-component + endpointName: xgboost-endpoint + variantName: variant-1 + specification: + modelName: model-1 + computeResourceRequirements: + numberOfAcceleratorDevicesRequired: 4 + numberOfCPUCoresRequired: 4 + minMemoryRequiredInMb: 1024 + runtimeConfig: + copyCount: 1 + tags: + - key: confidentiality + value: public + - key: environment + value: testing + - key: customer + value: test-user +status: + ackResourceMetadata: + arn: arn:aws:sagemaker:us-west-2:123456789012:inference-component/xgboost-inference-component + ownerAccountID: "" + region: "" + conditions: + - lastTransitionTime: "0001-01-01T00:00:00Z" + message: InferenceComponent is in InService status. + status: "True" + type: ACK.ResourceSynced + creationTime: "0001-01-01T00:00:00Z" + inferenceComponentStatus: InService + lastModifiedTime: "0001-01-01T00:00:00Z" \ No newline at end of file diff --git a/pkg/resource/inference_component/testdata/v1alpha1/readone/desired/failed_right_after_create.yaml b/pkg/resource/inference_component/testdata/v1alpha1/readone/desired/failed_right_after_create.yaml new file mode 100644 index 00000000..40aad532 --- /dev/null +++ b/pkg/resource/inference_component/testdata/v1alpha1/readone/desired/failed_right_after_create.yaml @@ -0,0 +1,32 @@ +apiVersion: sagemaker.services.k8s.aws/v1alpha1 +kind: InferenceComponent +metadata: + name: xgboost-inference-component +spec: + inferenceComponentName: xgboost-inference-component + endpointName: xgboost-endpoint + variantName: variant-1 + specification: + modelName: model-1 + computeResourceRequirements: + numberOfAcceleratorDevicesRequired: 4 + numberOfCPUCoresRequired: 4 + minMemoryRequiredInMb: 1024 + runtimeConfig: + copyCount: 1 + tags: + - key: confidentiality + value: public + - key: environment + value: testing + - key: customer + value: test-user +status: + ackResourceMetadata: + arn: arn:aws:sagemaker:us-west-2:123456789012:inference-component/xgboost-inference-component + ownerAccountID: "" + region: "" + conditions: + - status: "True" + type: ACK.ResourceSynced + creationTime: "0001-01-01T00:00:00Z" \ No newline at end of file diff --git a/pkg/resource/inference_component/testdata/v1alpha1/readone/desired/missing_required_field.yaml b/pkg/resource/inference_component/testdata/v1alpha1/readone/desired/missing_required_field.yaml new file mode 100644 index 00000000..a94b99c0 --- /dev/null +++ b/pkg/resource/inference_component/testdata/v1alpha1/readone/desired/missing_required_field.yaml @@ -0,0 +1,6 @@ +apiVersion: sagemaker.services.k8s.aws/v1alpha1 +kind: InferenceComponent +metadata: + name: xgboost-inference-component +spec: + endpointName: xgboost-endpoint diff --git a/pkg/resource/inference_component/testdata/v1alpha1/readone/desired/right_after_create.yaml b/pkg/resource/inference_component/testdata/v1alpha1/readone/desired/right_after_create.yaml new file mode 100644 index 00000000..40aad532 --- /dev/null +++ b/pkg/resource/inference_component/testdata/v1alpha1/readone/desired/right_after_create.yaml @@ -0,0 +1,32 @@ +apiVersion: sagemaker.services.k8s.aws/v1alpha1 +kind: InferenceComponent +metadata: + name: xgboost-inference-component +spec: + inferenceComponentName: xgboost-inference-component + endpointName: xgboost-endpoint + variantName: variant-1 + specification: + modelName: model-1 + computeResourceRequirements: + numberOfAcceleratorDevicesRequired: 4 + numberOfCPUCoresRequired: 4 + minMemoryRequiredInMb: 1024 + runtimeConfig: + copyCount: 1 + tags: + - key: confidentiality + value: public + - key: environment + value: testing + - key: customer + value: test-user +status: + ackResourceMetadata: + arn: arn:aws:sagemaker:us-west-2:123456789012:inference-component/xgboost-inference-component + ownerAccountID: "" + region: "" + conditions: + - status: "True" + type: ACK.ResourceSynced + creationTime: "0001-01-01T00:00:00Z" \ No newline at end of file diff --git a/pkg/resource/inference_component/testdata/v1alpha1/readone/observed/creating_after_describe.yaml b/pkg/resource/inference_component/testdata/v1alpha1/readone/observed/creating_after_describe.yaml new file mode 100644 index 00000000..20c3a168 --- /dev/null +++ b/pkg/resource/inference_component/testdata/v1alpha1/readone/observed/creating_after_describe.yaml @@ -0,0 +1,36 @@ +apiVersion: sagemaker.services.k8s.aws/v1alpha1 +kind: InferenceComponent +metadata: + name: xgboost-inference-component +spec: + inferenceComponentName: xgboost-inference-component + endpointName: xgboost-endpoint + variantName: variant-1 + specification: + modelName: model-1 + computeResourceRequirements: + numberOfAcceleratorDevicesRequired: 4 + numberOfCPUCoresRequired: 4 + minMemoryRequiredInMb: 1024 + runtimeConfig: + copyCount: 1 + tags: + - key: confidentiality + value: public + - key: environment + value: testing + - key: customer + value: test-user +status: + ackResourceMetadata: + arn: arn:aws:sagemaker:us-west-2:123456789012:inference-component/xgboost-inference-component + ownerAccountID: "" + region: "" + conditions: + - lastTransitionTime: "0001-01-01T00:00:00Z" + message: InferenceComponent is in Creating status. + status: "False" + type: ACK.ResourceSynced + creationTime: "0001-01-01T00:00:00Z" + inferenceComponentStatus: Creating + lastModifiedTime: "0001-01-01T00:00:00Z" diff --git a/pkg/resource/inference_component/testdata/v1alpha1/readone/observed/error_on_describe.yaml b/pkg/resource/inference_component/testdata/v1alpha1/readone/observed/error_on_describe.yaml new file mode 100644 index 00000000..a860de63 --- /dev/null +++ b/pkg/resource/inference_component/testdata/v1alpha1/readone/observed/error_on_describe.yaml @@ -0,0 +1,35 @@ +apiVersion: sagemaker.services.k8s.aws/v1alpha1 +kind: InferenceComponent +metadata: + name: xgboost-inference-component +spec: + inferenceComponentName: xgboost-inference-component + endpointName: xgboost-endpoint + variantName: variant-1 + specification: + modelName: model-1 + computeResourceRequirements: + numberOfAcceleratorDevicesRequired: 4 + numberOfCPUCoresRequired: 4 + minMemoryRequiredInMb: 1024 + runtimeConfig: + copyCount: 1 + tags: + - key: confidentiality + value: public + - key: environment + value: testing + - key: customer + value: test-user +status: + ackResourceMetadata: + arn: arn:aws:sagemaker:us-west-2:123456789012:inference-component/xgboost-inference-component + ownerAccountID: "" + region: "" + conditions: + - status: "True" + type: ACK.ResourceSynced + - message: "ServiceUnavailable: Server is down\n\tstatus code: 0, request id: " + status: "True" + type: ACK.Recoverable + creationTime: null diff --git a/pkg/resource/inference_component/testdata/v1alpha1/readone/observed/failed_status_on_describe.yaml b/pkg/resource/inference_component/testdata/v1alpha1/readone/observed/failed_status_on_describe.yaml new file mode 100644 index 00000000..8bfe7e25 --- /dev/null +++ b/pkg/resource/inference_component/testdata/v1alpha1/readone/observed/failed_status_on_describe.yaml @@ -0,0 +1,41 @@ +apiVersion: sagemaker.services.k8s.aws/v1alpha1 +kind: InferenceComponent +metadata: + name: xgboost-inference-component +spec: + inferenceComponentName: xgboost-inference-component + endpointName: xgboost-endpoint + variantName: variant-1 + specification: + modelName: model-1 + computeResourceRequirements: + numberOfAcceleratorDevicesRequired: 4 + numberOfCPUCoresRequired: 4 + minMemoryRequiredInMb: 1024 + runtimeConfig: + copyCount: 1 + tags: + - key: confidentiality + value: public + - key: environment + value: testing + - key: customer + value: test-user +status: + ackResourceMetadata: + arn: arn:aws:sagemaker:us-west-2:123456789012:inference-component/xgboost-inference-component + ownerAccountID: "" + region: "" + conditions: + - lastTransitionTime: "0001-01-01T00:00:00Z" + message: InferenceComponent is in Failed status. + status: "True" + type: ACK.ResourceSynced + - lastTransitionTime: "0001-01-01T00:00:00Z" + message: 'InferenceComponent status reached terminal state: Failed. Check the FailureReason.' + status: "True" + type: ACK.Terminal + creationTime: "0001-01-01T00:00:00Z" + inferenceComponentStatus: Failed + failureReason: 'Random failure - Failed to download the model from the container URL.' + lastModifiedTime: "0001-01-01T00:00:00Z" diff --git a/pkg/resource/inference_component/testdata/v1alpha1/readone/observed/inservice_no_failure_after_describe.yaml b/pkg/resource/inference_component/testdata/v1alpha1/readone/observed/inservice_no_failure_after_describe.yaml new file mode 100644 index 00000000..00983b44 --- /dev/null +++ b/pkg/resource/inference_component/testdata/v1alpha1/readone/observed/inservice_no_failure_after_describe.yaml @@ -0,0 +1,36 @@ +apiVersion: sagemaker.services.k8s.aws/v1alpha1 +kind: InferenceComponent +metadata: + name: xgboost-inference-component +spec: + inferenceComponentName: xgboost-inference-component + endpointName: xgboost-endpoint + variantName: variant-1 + specification: + modelName: model-1 + computeResourceRequirements: + numberOfAcceleratorDevicesRequired: 4 + numberOfCPUCoresRequired: 4 + minMemoryRequiredInMb: 1024 + runtimeConfig: + copyCount: 1 + tags: + - key: confidentiality + value: public + - key: environment + value: testing + - key: customer + value: test-user +status: + ackResourceMetadata: + arn: arn:aws:sagemaker:us-west-2:123456789012:inference-component/xgboost-inference-component + ownerAccountID: "" + region: "" + conditions: + - lastTransitionTime: "0001-01-01T00:00:00Z" + message: InferenceComponent is in InService status. + status: "True" + type: ACK.ResourceSynced + creationTime: "0001-01-01T00:00:00Z" + inferenceComponentStatus: InService + lastModifiedTime: "0001-01-01T00:00:00Z" diff --git a/pkg/resource/inference_component/testdata/v1alpha1/readone/observed/updating_on_describe.yaml b/pkg/resource/inference_component/testdata/v1alpha1/readone/observed/updating_on_describe.yaml new file mode 100644 index 00000000..9779fdcd --- /dev/null +++ b/pkg/resource/inference_component/testdata/v1alpha1/readone/observed/updating_on_describe.yaml @@ -0,0 +1,36 @@ +apiVersion: sagemaker.services.k8s.aws/v1alpha1 +kind: InferenceComponent +metadata: + name: xgboost-inference-component +spec: + inferenceComponentName: xgboost-inference-component + endpointName: xgboost-endpoint + variantName: variant-1 + specification: + modelName: model-1 + computeResourceRequirements: + numberOfAcceleratorDevicesRequired: 4 + numberOfCPUCoresRequired: 4 + minMemoryRequiredInMb: 1024 + runtimeConfig: + copyCount: 1 + tags: + - key: confidentiality + value: public + - key: environment + value: testing + - key: customer + value: test-user +status: + ackResourceMetadata: + arn: arn:aws:sagemaker:us-west-2:123456789012:inference-component/xgboost-inference-component + ownerAccountID: "" + region: "" + conditions: + - lastTransitionTime: "0001-01-01T00:00:00Z" + message: InferenceComponent is in Updating status. + status: "False" + type: ACK.ResourceSynced + creationTime: "0001-01-01T00:00:00Z" + inferenceComponentStatus: Updating + lastModifiedTime: "0001-01-01T00:00:00Z" diff --git a/pkg/resource/inference_component/testdata/v1alpha1/update/desired/latest_failed.yaml b/pkg/resource/inference_component/testdata/v1alpha1/update/desired/latest_failed.yaml new file mode 100644 index 00000000..a82b1095 --- /dev/null +++ b/pkg/resource/inference_component/testdata/v1alpha1/update/desired/latest_failed.yaml @@ -0,0 +1,34 @@ +apiVersion: sagemaker.services.k8s.aws/v1alpha1 +kind: InferenceComponent +metadata: + name: xgboost-inference-component +spec: + inferenceComponentName: xgboost-inference-component + endpointName: xgboost-endpoint + variantName: variant-1 + specification: + modelName: model-1 + computeResourceRequirements: + numberOfAcceleratorDevicesRequired: 4 + numberOfCPUCoresRequired: 4 + minMemoryRequiredInMb: 1024 + runtimeConfig: + copyCount: 1 +status: + ackResourceMetadata: + arn: arn:aws:sagemaker:us-west-2:123456789012:inference-component/xgboost-inference-component + ownerAccountID: "" + region: "" + conditions: + - lastTransitionTime: "0001-01-01T00:00:00.109Z" + message: InferenceComponent is in Failed status. + status: "True" + type: ACK.ResourceSynced + - lastTransitionTime: "0001-01-01T00:00:00.109Z" + message: 'InferenceComponent status reached terminal state: Failed. Check the FailureReason.' + status: "True" + type: ACK.Terminal + creationTime: "0001-01-01T00:00:00.109Z" + inferenceComponentStatus: Failed + failureReason: 'Random failure - Failed to download the model from the container URL.' + lastModifiedTime: "0001-01-01T00:00:00.109Z" diff --git a/pkg/resource/inference_component/testdata/v1alpha1/update/desired/latest_inservice_pre_update.yaml b/pkg/resource/inference_component/testdata/v1alpha1/update/desired/latest_inservice_pre_update.yaml new file mode 100644 index 00000000..6d454cb1 --- /dev/null +++ b/pkg/resource/inference_component/testdata/v1alpha1/update/desired/latest_inservice_pre_update.yaml @@ -0,0 +1,29 @@ +apiVersion: sagemaker.services.k8s.aws/v1alpha1 +kind: InferenceComponent +metadata: + name: xgboost-inference-component +spec: + inferenceComponentName: xgboost-inference-component + endpointName: xgboost-endpoint + variantName: variant-1 + specification: + modelName: model-1 + computeResourceRequirements: + numberOfAcceleratorDevicesRequired: 4 + numberOfCPUCoresRequired: 4 + minMemoryRequiredInMb: 1024 + runtimeConfig: + copyCount: 1 +status: + ackResourceMetadata: + arn: arn:aws:sagemaker:us-west-2:123456789012:inference-component/xgboost-inference-component + ownerAccountID: "" + region: "" + conditions: + - lastTransitionTime: "0001-01-01T00:00:00.109Z" + message: InferenceComponent is in InService status. + status: "True" + type: ACK.ResourceSynced + creationTime: "0001-01-01T00:00:00.109Z" + inferenceComponentStatus: InService + lastModifiedTime: "0001-01-01T00:00:00.109Z" diff --git a/pkg/resource/inference_component/testdata/v1alpha1/update/desired/latest_updating.yaml b/pkg/resource/inference_component/testdata/v1alpha1/update/desired/latest_updating.yaml new file mode 100644 index 00000000..d6bb29b4 --- /dev/null +++ b/pkg/resource/inference_component/testdata/v1alpha1/update/desired/latest_updating.yaml @@ -0,0 +1,29 @@ +apiVersion: sagemaker.services.k8s.aws/v1alpha1 +kind: InferenceComponent +metadata: + name: xgboost-inference-component +spec: + inferenceComponentName: xgboost-inference-component + endpointName: xgboost-endpoint + variantName: variant-1 + specification: + modelName: model-1 + computeResourceRequirements: + numberOfAcceleratorDevicesRequired: 4 + numberOfCPUCoresRequired: 4 + minMemoryRequiredInMb: 1024 + runtimeConfig: + copyCount: 1 +status: + ackResourceMetadata: + arn: arn:aws:sagemaker:us-west-2:123456789012:inference-component/xgboost-inference-component + ownerAccountID: "" + region: "" + conditions: + - lastTransitionTime: "0001-01-01T00:00:00Z" + message: InferenceComponent is in Updating status. + status: "False" + type: ACK.ResourceSynced + creationTime: "0001-01-01T00:00:00Z" + inferenceComponentStatus: Updating + lastModifiedTime: "0001-01-01T00:00:00Z" diff --git a/pkg/resource/inference_component/testdata/v1alpha1/update/desired/update_common.yaml b/pkg/resource/inference_component/testdata/v1alpha1/update/desired/update_common.yaml new file mode 100644 index 00000000..66be0c58 --- /dev/null +++ b/pkg/resource/inference_component/testdata/v1alpha1/update/desired/update_common.yaml @@ -0,0 +1,29 @@ +apiVersion: sagemaker.services.k8s.aws/v1alpha1 +kind: InferenceComponent +metadata: + name: xgboost-inference-component +spec: + inferenceComponentName: xgboost-inference-component + endpointName: xgboost-endpoint + variantName: variant-1 + specification: + modelName: model-1-updated + computeResourceRequirements: + numberOfAcceleratorDevicesRequired: 4 + numberOfCPUCoresRequired: 4 + minMemoryRequiredInMb: 1024 + runtimeConfig: + copyCount: 1 +status: + ackResourceMetadata: + arn: arn:aws:sagemaker:us-west-2:123456789012:inference-component/xgboost-inference-component + ownerAccountID: "" + region: "" + conditions: + - lastTransitionTime: "0001-01-01T00:00:00Z" + message: InferenceComponent is in InService status. + status: "True" + type: ACK.ResourceSynced + creationTime: "0001-01-01T00:00:00.109Z" + inferenceComponentStatus: InService + lastModifiedTime: "0001-01-01T00:00:00.109Z" diff --git a/pkg/resource/inference_component/testdata/v1alpha1/update/observed/error_on_update.yaml b/pkg/resource/inference_component/testdata/v1alpha1/update/observed/error_on_update.yaml new file mode 100644 index 00000000..601394eb --- /dev/null +++ b/pkg/resource/inference_component/testdata/v1alpha1/update/observed/error_on_update.yaml @@ -0,0 +1,32 @@ +apiVersion: sagemaker.services.k8s.aws/v1alpha1 +kind: InferenceComponent +metadata: + name: xgboost-inference-component +spec: + inferenceComponentName: xgboost-inference-component + endpointName: xgboost-endpoint + variantName: variant-1 + specification: + modelName: model-1 + computeResourceRequirements: + numberOfAcceleratorDevicesRequired: 4 + numberOfCPUCoresRequired: 4 + minMemoryRequiredInMb: 1024 + runtimeConfig: + copyCount: 1 +status: + ackResourceMetadata: + arn: arn:aws:sagemaker:us-west-2:123456789012:inference-component/xgboost-inference-component + ownerAccountID: "" + region: "" + conditions: + - lastTransitionTime: "0001-01-01T00:00:00Z" + message: InferenceComponent is in InService status. + status: "True" + type: ACK.ResourceSynced + - message: "ServiceUnavailable: Server is down\n\tstatus code: 0, request id: " + status: "True" + type: ACK.Recoverable + creationTime: "0001-01-01T00:00:00Z" + inferenceComponentStatus: InService + lastModifiedTime: "0001-01-01T00:00:00Z" diff --git a/pkg/resource/inference_component/testdata/v1alpha1/update/observed/no_update_on_failed.yaml b/pkg/resource/inference_component/testdata/v1alpha1/update/observed/no_update_on_failed.yaml new file mode 100644 index 00000000..5ebcb9da --- /dev/null +++ b/pkg/resource/inference_component/testdata/v1alpha1/update/observed/no_update_on_failed.yaml @@ -0,0 +1,34 @@ +apiVersion: sagemaker.services.k8s.aws/v1alpha1 +kind: InferenceComponent +metadata: + name: xgboost-inference-component +spec: + inferenceComponentName: xgboost-inference-component + endpointName: xgboost-endpoint + variantName: variant-1 + specification: + modelName: model-1 + computeResourceRequirements: + numberOfAcceleratorDevicesRequired: 4 + numberOfCPUCoresRequired: 4 + minMemoryRequiredInMb: 1024 + runtimeConfig: + copyCount: 1 +status: + ackResourceMetadata: + arn: arn:aws:sagemaker:us-west-2:123456789012:inference-component/xgboost-inference-component + ownerAccountID: "" + region: "" + conditions: + - lastTransitionTime: "0001-01-01T00:00:00Z" + message: 'Unable to update inference component. check FailureReason.' + status: "False" + type: ACK.ResourceSynced + - lastTransitionTime: "0001-01-01T00:00:00Z" + message: 'Unable to update inference component. check FailureReason.' + status: "True" + type: ACK.Terminal + creationTime: "0001-01-01T00:00:00Z" + inferenceComponentStatus: Failed + failureReason: 'Random failure - Failed to download the model from the container URL.' + lastModifiedTime: "0001-01-01T00:00:00Z" diff --git a/pkg/resource/inference_component/testdata/v1alpha1/update/observed/update_attempted_success.yaml b/pkg/resource/inference_component/testdata/v1alpha1/update/observed/update_attempted_success.yaml new file mode 100644 index 00000000..925bfdbf --- /dev/null +++ b/pkg/resource/inference_component/testdata/v1alpha1/update/observed/update_attempted_success.yaml @@ -0,0 +1,29 @@ +apiVersion: sagemaker.services.k8s.aws/v1alpha1 +kind: InferenceComponent +metadata: + name: xgboost-inference-component +spec: + inferenceComponentName: xgboost-inference-component + endpointName: xgboost-endpoint + variantName: variant-1 + specification: + modelName: model-1-updated + computeResourceRequirements: + numberOfAcceleratorDevicesRequired: 4 + numberOfCPUCoresRequired: 4 + minMemoryRequiredInMb: 1024 + runtimeConfig: + copyCount: 1 +status: + ackResourceMetadata: + arn: arn:aws:sagemaker:us-west-2:123456789012:inference-component/xgboost-inference-component + ownerAccountID: "" + region: "" + conditions: + - lastTransitionTime: "0001-01-01T00:00:00Z" + message: InferenceComponent is in Updating status. + status: "False" + type: ACK.ResourceSynced + creationTime: "0001-01-01T00:00:00Z" + inferenceComponentStatus: Updating + lastModifiedTime: "0001-01-01T00:00:00Z" diff --git a/templates/inference_component/sdk_read_one_post_set_output.go.tpl b/templates/inference_component/sdk_read_one_post_set_output.go.tpl new file mode 100644 index 00000000..45d6029d --- /dev/null +++ b/templates/inference_component/sdk_read_one_post_set_output.go.tpl @@ -0,0 +1,6 @@ + // Manually set the RuntimeConfig.CopyCount from read response RuntimeConfig.DesiredCopyCount + if resp.RuntimeConfig != nil && ko.Spec.RuntimeConfig != nil { + ko.Spec.RuntimeConfig.CopyCount = resp.RuntimeConfig.DesiredCopyCount + } + + rm.customDescribeInferenceComponentSetOutput(ko) diff --git a/templates/inference_component/sdk_update_post_set_output.go.tpl b/templates/inference_component/sdk_update_post_set_output.go.tpl new file mode 100644 index 00000000..ed5c51dd --- /dev/null +++ b/templates/inference_component/sdk_update_post_set_output.go.tpl @@ -0,0 +1,3 @@ + if err = rm.customUpdateInferenceComponentSetOutput(ko); err != nil { + return nil, err + } \ No newline at end of file diff --git a/templates/inference_component/sdk_update_pre_build_request.go.tpl b/templates/inference_component/sdk_update_pre_build_request.go.tpl new file mode 100644 index 00000000..82cf2c57 --- /dev/null +++ b/templates/inference_component/sdk_update_pre_build_request.go.tpl @@ -0,0 +1,7 @@ + if err = rm.requeueUntilCanModify(ctx, latest); err != nil { + return nil, err + } + + if err = rm.customUpdateInferenceComponentPreChecks(ctx, desired, latest, delta); err != nil { + return nil, err + } diff --git a/test/e2e/__init__.py b/test/e2e/__init__.py index 08942cba..5b221db2 100644 --- a/test/e2e/__init__.py +++ b/test/e2e/__init__.py @@ -167,6 +167,56 @@ def assert_endpoint_status_in_sync(endpoint_name, reference, expected_status): ) +def get_inference_component_sagemaker_status(inference_component_name): + response = sagemaker_client().describe_inference_component( + InferenceComponentName=inference_component_name) + return response["InferenceComponentStatus"] + + +def get_inference_component_resource_status(reference: k8s.CustomResourceReference): + resource = k8s.get_resource(reference) + assert "inferenceComponentStatus" in resource["status"] + return resource["status"]["inferenceComponentStatus"] + + +def wait_sagemaker_inference_component_status( + endpoint_name, + expected_status: str, + wait_periods: int = 60, + period_length: int = 30, +): + return wait_for_status( + expected_status, + wait_periods, + period_length, + get_inference_component_sagemaker_status, + endpoint_name, + ) + + +def wait_resource_inference_component_status( + reference: k8s.CustomResourceReference, + expected_status: str, + wait_periods: int = 30, + period_length: int = 30, +): + return wait_for_status( + expected_status, + wait_periods, + period_length, + get_inference_component_resource_status, + reference, + ) + + +def assert_inference_component_status_in_sync(inference_component_name, reference, expected_status): + assert ( + wait_sagemaker_inference_component_status(inference_component_name, expected_status) + == wait_resource_inference_component_status(reference, expected_status, 2) + == expected_status + ) + + def get_model_package_sagemaker_status(model_package_arn): response = sagemaker_client().describe_model_package( ModelPackageName=model_package_arn @@ -330,6 +380,16 @@ def get_sagemaker_endpoint(endpoint_name: str): return None +def get_sagemaker_inference_component(inference_component_name: str): + try: + return sagemaker_client().describe_inference_component(InferenceComponentName=inference_component_name) + except botocore.exceptions.ClientError as error: + logging.error( + f"SageMaker could not find an inference component_name with the name {inference_component_name}. Error {error}" + ) + return None + + def get_sagemaker_model(model_name: str, sm_client=None): sm_client = sm_client or sagemaker_client() try: diff --git a/test/e2e/common/config.py b/test/e2e/common/config.py index 05d2f1d1..975b3887 100644 --- a/test/e2e/common/config.py +++ b/test/e2e/common/config.py @@ -14,6 +14,7 @@ """ ENDPOINT_CONFIG_RESOURCE_PLURAL = "endpointconfigs" +INFERENCE_COMPONENT_RESOURCE_PLURAL = "inferencecomponents" MODEL_RESOURCE_PLURAL = "models" ENDPOINT_RESOURCE_PLURAL = "endpoints" DATA_QUALITY_JOB_DEFINITION_RESOURCE_PLURAL = "dataqualityjobdefinitions" @@ -33,10 +34,17 @@ ENDPOINT_STATUS_CREATING = "Creating" ENDPOINT_STATUS_UPDATING = "Updating" +INFERENCE_COMPONENT_STATUS_INSERVICE = "InService" +INFERENCE_COMPONENT_STATUS_CREATING = "Creating" +INFERENCE_COMPONENT_STATUS_UPDATING = "Updating" + DELETE_WAIT_PERIOD = 4 DELETE_WAIT_LENGTH = 30 JOB_DELETE_WAIT_PERIODS = 18 JOB_DELETE_WAIT_LENGTH = 30 +INFERENCE_COMPONENT_DELETE_WAIT_PERIODS = 60 +INFERENCE_COMPONENT_DELETE_WAIT_LENGTH = 30 + TAG_DELAY_SLEEP = 20 diff --git a/test/e2e/resources/endpoint_config_inference_component.yaml b/test/e2e/resources/endpoint_config_inference_component.yaml new file mode 100644 index 00000000..6395575d --- /dev/null +++ b/test/e2e/resources/endpoint_config_inference_component.yaml @@ -0,0 +1,18 @@ +apiVersion: sagemaker.services.k8s.aws/v1alpha1 +kind: EndpointConfig +metadata: + name: $ENDPOINT_CONFIG_NAME +spec: + endpointConfigName: $ENDPOINT_CONFIG_NAME + executionRoleARN: $SAGEMAKER_EXECUTION_ROLE_ARN + productionVariants: + - variantName: variant-1 + initialInstanceCount: 2 + instanceType: $ENDPOINT_INSTANCE_TYPE + tags: + - key: confidentiality + value: public + - key: environment + value: testing + - key: customer + value: test-user \ No newline at end of file diff --git a/test/e2e/resources/inference_component.yaml b/test/e2e/resources/inference_component.yaml new file mode 100644 index 00000000..af95dd69 --- /dev/null +++ b/test/e2e/resources/inference_component.yaml @@ -0,0 +1,22 @@ +apiVersion: sagemaker.services.k8s.aws/v1alpha1 +kind: InferenceComponent +metadata: + name: $INFERENCE_COMPONENT_NAME +spec: + inferenceComponentName: $INFERENCE_COMPONENT_NAME + endpointName: $ENDPOINT_NAME + variantName: variant-1 + specification: + modelName: $MODEL_NAME + computeResourceRequirements: + numberOfCPUCoresRequired: 1 + minMemoryRequiredInMb: 1024 + runtimeConfig: + copyCount: 1 + tags: + - key: confidentiality + value: public + - key: environment + value: testing + - key: customer + value: test-user \ No newline at end of file diff --git a/test/e2e/resources/xgboost_model_inference_component.yaml b/test/e2e/resources/xgboost_model_inference_component.yaml new file mode 100644 index 00000000..a8291b7a --- /dev/null +++ b/test/e2e/resources/xgboost_model_inference_component.yaml @@ -0,0 +1,20 @@ +apiVersion: sagemaker.services.k8s.aws/v1alpha1 +kind: Model +metadata: + name: $MODEL_NAME +spec: + modelName: $MODEL_NAME + primaryContainer: + modelDataURL: s3://$SAGEMAKER_DATA_BUCKET/sagemaker/model/xgboost-mnist-model.tar.gz + image: $XGBOOST_V1_IMAGE_URI + environment: + my_var: my_value + my_var2: my_value2 + executionRoleARN: $SAGEMAKER_EXECUTION_ROLE_ARN + tags: + - key: algorithm + value: xgboost + - key: environment + value: testing + - key: customer + value: test-user \ No newline at end of file diff --git a/test/e2e/resources/xgboost_model_with_model_location_inference_component.yaml b/test/e2e/resources/xgboost_model_with_model_location_inference_component.yaml new file mode 100644 index 00000000..1eadb626 --- /dev/null +++ b/test/e2e/resources/xgboost_model_with_model_location_inference_component.yaml @@ -0,0 +1,20 @@ +apiVersion: sagemaker.services.k8s.aws/v1alpha1 +kind: Model +metadata: + name: $MODEL_NAME +spec: + modelName: $MODEL_NAME + primaryContainer: + modelDataURL: $MODEL_LOCATION + image: $XGBOOST_V1_IMAGE_URI + environment: + my_var: my_value + my_var2: my_value2 + executionRoleARN: $SAGEMAKER_EXECUTION_ROLE_ARN + tags: + - key: algorithm + value: xgboost + - key: environment + value: testing + - key: customer + value: test-user \ No newline at end of file diff --git a/test/e2e/tests/test_inference_component.py b/test/e2e/tests/test_inference_component.py new file mode 100644 index 00000000..c051a248 --- /dev/null +++ b/test/e2e/tests/test_inference_component.py @@ -0,0 +1,341 @@ +# Copyright Amazon.com Inc. or its affiliates. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"). You may +# not use this file except in compliance with the License. A copy of the +# License is located at +# +# http://aws.amazon.com/apache2.0/ +# +# or in the "license" file accompanying this file. This file is distributed +# on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either +# express or implied. See the License for the specific language governing +# permissions and limitations under the License. +"""Integration tests for the SageMaker Endpoint API. +""" + +import pytest +import logging + +from acktest.aws import s3 +from acktest.resources import random_suffix_name +from acktest.k8s import resource as k8s + +from e2e import ( + service_marker, + create_sagemaker_resource, + delete_custom_resource, + assert_inference_component_status_in_sync, + assert_endpoint_status_in_sync, + assert_tags_in_sync, + get_sagemaker_inference_component, + get_sagemaker_endpoint +) +from e2e.replacement_values import REPLACEMENT_VALUES +from e2e.common import config as cfg + +FAIL_UPDATE_ERROR_MESSAGE = ("InferenceComponentUpdateError: Unable to update inference component. " + "Check FailureReason.") + +@pytest.fixture(scope="module") +def name_suffix(): + return random_suffix_name("ic-xgboost", 32) + + +@pytest.fixture(scope="module") +def xgboost_model(name_suffix): + model_resource_name = name_suffix + "-model" + replacements = REPLACEMENT_VALUES.copy() + replacements["MODEL_NAME"] = model_resource_name + + model_reference, model_spec, model_resource = create_sagemaker_resource( + resource_plural=cfg.MODEL_RESOURCE_PLURAL, + resource_name=model_resource_name, + spec_file="xgboost_model_inference_component", + replacements=replacements, + ) + assert model_resource is not None + if k8s.get_resource_arn(model_resource) is None: + logging.error( + f"ARN for this resource is None, resource status is: {model_resource['status']}" + ) + assert k8s.get_resource_arn(model_resource) is not None + + yield (model_reference, model_resource) + + _, deleted = k8s.delete_custom_resource( + model_reference, cfg.DELETE_WAIT_PERIOD, cfg.DELETE_WAIT_LENGTH + ) + assert deleted + + +@pytest.fixture(scope="module") +def endpoint_config(name_suffix): + config_resource_name = name_suffix + "-endpoint-config" + replacements = REPLACEMENT_VALUES.copy() + replacements["ENDPOINT_CONFIG_NAME"] = config_resource_name + + config_reference, config_spec, config_resource = create_sagemaker_resource( + resource_plural=cfg.ENDPOINT_CONFIG_RESOURCE_PLURAL, + resource_name=config_resource_name, + spec_file="endpoint_config_inference_component", + replacements=replacements, + ) + assert config_resource is not None + if k8s.get_resource_arn(config_resource) is None: + logging.error( + f"ARN for this resource is None, resource status is: {config_resource['status']}" + ) + assert k8s.get_resource_arn(config_resource) is not None + + yield (config_reference, config_resource) + + _, deleted = k8s.delete_custom_resource( + config_reference, cfg.DELETE_WAIT_PERIOD, cfg.DELETE_WAIT_LENGTH + ) + assert deleted + + +@pytest.fixture(scope="module") +def endpoint(name_suffix, endpoint_config): + endpoint_resource_name = name_suffix + "-endpoint" + (_, config_resource) = endpoint_config + config_resource_name = config_resource["spec"].get("endpointConfigName", None) + + replacements = REPLACEMENT_VALUES.copy() + replacements["ENDPOINT_NAME"] = endpoint_resource_name + replacements["ENDPOINT_CONFIG_NAME"] = config_resource_name + + endpoint_reference, endpoint_spec, endpoint_resource = create_sagemaker_resource( + resource_plural=cfg.ENDPOINT_RESOURCE_PLURAL, + resource_name=endpoint_resource_name, + spec_file="endpoint_base", + replacements=replacements, + ) + + assert endpoint_resource is not None + + # endpoint has correct arn and status + endpoint_name = endpoint_resource["spec"].get("endpointName", None) + assert endpoint_name is not None + assert endpoint_name == endpoint_resource_name + + endpoint_desc = get_sagemaker_endpoint(endpoint_name) + endpoint_arn = endpoint_desc["EndpointArn"] + assert k8s.get_resource_arn(endpoint_resource) == endpoint_arn + + # endpoint transitions Creating -> InService state + assert_endpoint_status_in_sync( + endpoint_name, endpoint_reference, cfg.ENDPOINT_STATUS_CREATING + ) + assert k8s.wait_on_condition(endpoint_reference, "ACK.ResourceSynced", "False") + + assert_endpoint_status_in_sync( + endpoint_name, endpoint_reference, cfg.ENDPOINT_STATUS_INSERVICE + ) + assert k8s.wait_on_condition(endpoint_reference, "ACK.ResourceSynced", "True") + + yield (endpoint_reference, endpoint_resource) + + assert delete_custom_resource(endpoint_reference, 40, cfg.DELETE_WAIT_LENGTH) + + +@pytest.fixture(scope="module") +def inference_component(name_suffix, endpoint, xgboost_model): + inference_component_resource_name = name_suffix + "-inference-component" + (_, endpoint_resource) = endpoint + (_, model_resource) = xgboost_model + endpoint_resource_name = endpoint_resource["spec"].get("endpointName", None) + model_resource_name = model_resource["spec"].get("modelName", None) + + replacements = REPLACEMENT_VALUES.copy() + replacements["INFERENCE_COMPONENT_NAME"] = inference_component_resource_name + replacements["ENDPOINT_NAME"] = endpoint_resource_name + replacements["MODEL_NAME"] = model_resource_name + + reference, spec, resource = create_sagemaker_resource( + resource_plural=cfg.INFERENCE_COMPONENT_RESOURCE_PLURAL, + resource_name=inference_component_resource_name, + spec_file="inference_component", + replacements=replacements, + ) + + assert resource is not None + + yield (reference, resource, spec) + + # Delete the k8s resource if not already deleted by tests + assert delete_custom_resource(reference, 40, cfg.DELETE_WAIT_LENGTH) + + +@pytest.fixture(scope="module") +def faulty_model(name_suffix, xgboost_model): + replacements = REPLACEMENT_VALUES.copy() + + # copy model data to a temp S3 location and delete it after model is created on SageMaker + model_bucket = replacements["SAGEMAKER_DATA_BUCKET"] + copy_source = { + "Bucket": model_bucket, + "Key": "sagemaker/model/xgboost-mnist-model.tar.gz", + } + model_destination_key = "sagemaker/model/delete/xgboost-mnist-model.tar.gz" + s3.copy_object(model_bucket, copy_source, model_destination_key) + + model_resource_name = name_suffix + "-faulty-model" + replacements["MODEL_NAME"] = model_resource_name + replacements["MODEL_LOCATION"] = f"s3://{model_bucket}/{model_destination_key}" + model_reference, model_spec, model_resource = create_sagemaker_resource( + resource_plural=cfg.MODEL_RESOURCE_PLURAL, + resource_name=model_resource_name, + spec_file="xgboost_model_with_model_location_inference_component", + replacements=replacements, + ) + assert model_resource is not None + if k8s.get_resource_arn(model_resource) is None: + logging.error( + f"ARN for this resource is None, resource status is: {model_resource['status']}" + ) + assert k8s.get_resource_arn(model_resource) is not None + s3.delete_object(model_bucket, model_destination_key) + + yield (model_reference, model_resource) + + _, deleted = k8s.delete_custom_resource( + model_reference, cfg.DELETE_WAIT_PERIOD, cfg.DELETE_WAIT_LENGTH + ) + assert deleted + + +@service_marker +@pytest.mark.shallow_canary +@pytest.mark.canary +class TestInferenceComponent: + def create_inference_component_test(self, inference_component): + (reference, resource, _) = inference_component + assert k8s.get_resource_exists(reference) + + # inference component has correct arn and status + inference_component_name = resource["spec"].get("inferenceComponentName", None) + assert inference_component_name is not None + + inference_component_desc = get_sagemaker_inference_component(inference_component_name) + inference_component_arn = inference_component_desc["InferenceComponentArn"] + assert k8s.get_resource_arn(resource) == inference_component_arn + + # inference_component transitions Creating -> InService state + assert_inference_component_status_in_sync( + inference_component_name, reference, cfg.INFERENCE_COMPONENT_STATUS_CREATING + ) + assert k8s.wait_on_condition(reference, "ACK.ResourceSynced", "False") + + assert_inference_component_status_in_sync( + inference_component_name, reference, cfg.INFERENCE_COMPONENT_STATUS_INSERVICE + ) + assert k8s.wait_on_condition(reference, "ACK.ResourceSynced", "True") + + resource_tags = resource["spec"].get("tags", None) + assert_tags_in_sync(inference_component_arn, resource_tags) + + def update_inference_component_failed_test(self, inference_component, faulty_model): + (reference, _, spec) = inference_component + (_, faulty_model_resource) = faulty_model + faulty_model_name = faulty_model_resource["spec"].get( + "modelName", None + ) + spec["spec"]["specification"]["modelName"] = faulty_model_name + resource = k8s.patch_custom_resource(reference, spec) + resource = k8s.wait_resource_consumed_by_controller(reference) + assert resource is not None + + # inference component transitions Updating -> InService state + assert_inference_component_status_in_sync( + reference.name, + reference, + cfg.INFERENCE_COMPONENT_STATUS_UPDATING, + ) + + assert k8s.wait_on_condition(reference, "ACK.ResourceSynced", "False") + assert k8s.get_resource_condition(reference, "ACK.Terminal") is None + resource = k8s.get_resource(reference) + + assert_inference_component_status_in_sync( + reference.name, + reference, + cfg.INFERENCE_COMPONENT_STATUS_INSERVICE, + ) + + assert k8s.wait_on_condition(reference, "ACK.ResourceSynced", "False") + + assert k8s.assert_condition_state_message( + reference, + "ACK.Terminal", + "True", + FAIL_UPDATE_ERROR_MESSAGE, + ) + + resource = k8s.get_resource(reference) + assert resource["status"].get("failureReason", None) is not None + + def update_inference_component_successful_test(self, inference_component, xgboost_model): + (reference, resource, spec) = inference_component + inference_component_name = resource["spec"].get("inferenceComponentName", None) + (_, model_resource) = xgboost_model + model_name = model_resource["spec"].get( + "modelName", None + ) + spec["spec"]["specification"]["modelName"] = model_name + + desired_memory_required = 2024 + spec["spec"]["specification"]["computeResourceRequirements"]["minMemoryRequiredInMb"] = desired_memory_required + + resource = k8s.patch_custom_resource(reference, spec) + resource = k8s.wait_resource_consumed_by_controller(reference) + assert resource is not None + + # inference component transitions Updating -> InService state + assert_inference_component_status_in_sync( + reference.name, + reference, + cfg.INFERENCE_COMPONENT_STATUS_UPDATING, + ) + + assert k8s.wait_on_condition(reference, "ACK.ResourceSynced", "False") + assert k8s.get_resource_condition(reference, "ACK.Terminal") is None + resource = k8s.get_resource(reference) + + assert_inference_component_status_in_sync( + reference.name, + reference, + cfg.INFERENCE_COMPONENT_STATUS_INSERVICE, + ) + assert k8s.wait_on_condition(reference, "ACK.ResourceSynced", "True") + assert k8s.get_resource_condition(reference, "ACK.Terminal") is None + resource = k8s.get_resource(reference) + # We will not check for failureReason is None, since the InferenceComponent has + # consistently in testing shown successful update with failureReason still present. + # Instead, we rely on resource synced and no terminal status. + # assert resource["status"].get("failureReason", None) is None + new_memory_required = get_sagemaker_inference_component(inference_component_name)[ + "Specification"]["ComputeResourceRequirements"]["MinMemoryRequiredInMb"] + + assert desired_memory_required == new_memory_required + + def delete_inference_component_test(self, inference_component): + (reference, resource, _) = inference_component + inference_component_name = resource["spec"].get("inferenceComponentName", None) + + assert delete_custom_resource( + reference, cfg.INFERENCE_COMPONENT_DELETE_WAIT_PERIODS, cfg.INFERENCE_COMPONENT_DELETE_WAIT_LENGTH + ) + + assert get_sagemaker_inference_component(inference_component_name) is None + + def test_driver( + self, + inference_component, + faulty_model, + xgboost_model + ): + self.create_inference_component_test(inference_component) + self.update_inference_component_failed_test(inference_component, faulty_model) + self.update_inference_component_successful_test(inference_component, xgboost_model) + self.delete_inference_component_test(inference_component)