diff --git a/api/nvidia/v1/clusterpolicy_types.go b/api/nvidia/v1/clusterpolicy_types.go index ea4b21d86..4a1fb7290 100644 --- a/api/nvidia/v1/clusterpolicy_types.go +++ b/api/nvidia/v1/clusterpolicy_types.go @@ -94,6 +94,8 @@ type ClusterPolicySpec struct { CCManager CCManagerSpec `json:"ccManager,omitempty"` // HostPaths defines various paths on the host needed by GPU Operator components HostPaths HostPathsSpec `json:"hostPaths,omitempty"` + // FabricManager component spec + FabricManager FabricManagerSpec `json:"fabricManager,omitempty"` } // Runtime defines container runtime type @@ -1724,6 +1726,38 @@ type CDIConfigSpec struct { Default *bool `json:"default,omitempty"` } +// FabricMode defines the Fabric Manager mode +type FabricMode string + +const ( + // FabricModeFullPassthrough indicates Full-passthrough mode (FABRIC_MODE=0) + FabricModeFullPassthrough FabricMode = "full-passthrough" + // FabricModeSharedNVSwitch indicates Shared NVSwitch Virtualization mode (FABRIC_MODE=1) + FabricModeSharedNVSwitch FabricMode = "shared-nvswitch" +) + +func (f FabricMode) String() string { + switch f { + case FabricModeFullPassthrough: + return "full-passthrough" + case FabricModeSharedNVSwitch: + return "shared-nvswitch" + default: + return "" + } +} + +// FabricManagerSpec defines the properties for NVIDIA Fabric Manager configuration +type FabricManagerSpec struct { + // Mode indicates the Fabric Manager mode + // +kubebuilder:validation:Enum=full-passthrough;shared-nvswitch + // +kubebuilder:default=full-passthrough + // +operator-sdk:gen-csv:customresourcedefinitions.specDescriptors=true + // +operator-sdk:gen-csv:customresourcedefinitions.specDescriptors.displayName="Fabric Manager Mode" + // +operator-sdk:gen-csv:customresourcedefinitions.specDescriptors.x-descriptors="urn:alm:descriptor:com.tectonic.ui:select:full-passthrough,urn:alm:descriptor:com.tectonic.ui:select:shared-nvswitch" + Mode FabricMode `json:"mode,omitempty"` +} + // MIGStrategy indicates MIG mode type MIGStrategy string @@ -2218,3 +2252,18 @@ func (c *MIGPartedConfigSpec) GetName() string { func (c *VGPUDevicesConfigSpec) GetName() string { return ptr.Deref(c, VGPUDevicesConfigSpec{}).Name } + +// IsSharedNVSwitchMode returns true if Fabric Manager is configured for Shared NVSwitch mode +func (f *FabricManagerSpec) IsSharedNVSwitchMode() bool { + return f.Mode == FabricModeSharedNVSwitch +} + +// ValidateFabricManagerConfig validates the Fabric Manager configuration +func (c *ClusterPolicySpec) ValidateFabricManagerConfig() error { + if c.SandboxWorkloads.DefaultWorkload == "vm-passthrough" && + c.FabricManager.IsSharedNVSwitchMode() && + !c.Driver.IsEnabled() { + return fmt.Errorf("driver must be enabled when using vm-passthrough with Fabric Manager Shared NVSwitch mode") + } + return nil +} diff --git a/api/nvidia/v1/zz_generated.deepcopy.go b/api/nvidia/v1/zz_generated.deepcopy.go index 9e68fdb37..5b86cd8e2 100644 --- a/api/nvidia/v1/zz_generated.deepcopy.go +++ b/api/nvidia/v1/zz_generated.deepcopy.go @@ -209,6 +209,7 @@ func (in *ClusterPolicySpec) DeepCopyInto(out *ClusterPolicySpec) { in.KataManager.DeepCopyInto(&out.KataManager) in.CCManager.DeepCopyInto(&out.CCManager) out.HostPaths = in.HostPaths + out.FabricManager = in.FabricManager } // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ClusterPolicySpec. @@ -788,6 +789,21 @@ func (in *EnvVar) DeepCopy() *EnvVar { return out } +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *FabricManagerSpec) DeepCopyInto(out *FabricManagerSpec) { + *out = *in +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new FabricManagerSpec. +func (in *FabricManagerSpec) DeepCopy() *FabricManagerSpec { + if in == nil { + return nil + } + out := new(FabricManagerSpec) + in.DeepCopyInto(out) + return out +} + // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *GDRCopySpec) DeepCopyInto(out *GDRCopySpec) { *out = *in diff --git a/assets/state-driver/0400_configmap.yaml b/assets/state-driver/0400_configmap.yaml index 67aa1e2ca..b96e0bd65 100644 --- a/assets/state-driver/0400_configmap.yaml +++ b/assets/state-driver/0400_configmap.yaml @@ -22,8 +22,14 @@ data: fi if ! nvidia-smi; then - echo "nvidia-smi failed" - exit 1 + # For vm-passthrough with shared-nvswitch mode, nvidia-smi may fail due to unbound devices + # Fall back to checking if nvidia module is loaded when FABRIC_MANAGER_FABRIC_MODE=1 + if [ "${FABRIC_MANAGER_FABRIC_MODE:-}" = "1" ]; then + echo "nvidia-smi failed but nvidia module is loaded (vm-passthrough with shared-nvswitch mode)" + else + echo "nvidia-smi failed" + exit 1 + fi fi GPU_DIRECT_RDMA_ENABLED="${GPU_DIRECT_RDMA_ENABLED:-false}" diff --git a/assets/state-sandbox-validation/0200_role.yaml b/assets/state-sandbox-validation/0200_role.yaml index 79da66ff7..e1f616acb 100644 --- a/assets/state-sandbox-validation/0200_role.yaml +++ b/assets/state-sandbox-validation/0200_role.yaml @@ -12,3 +12,10 @@ rules: - use resourceNames: - privileged +- apiGroups: + - apps + resources: + - daemonsets + verbs: + - get + - list diff --git a/assets/state-sandbox-validation/0500_daemonset.yaml b/assets/state-sandbox-validation/0500_daemonset.yaml index fcc2aa12a..982f64b53 100644 --- a/assets/state-sandbox-validation/0500_daemonset.yaml +++ b/assets/state-sandbox-validation/0500_daemonset.yaml @@ -26,6 +26,36 @@ spec: priorityClassName: system-node-critical serviceAccountName: nvidia-sandbox-validator initContainers: + - name: driver-validation + image: "FILLED BY THE OPERATOR" + command: ["sh", "-c"] + args: ["nvidia-validator"] + env: + - name: WITH_WAIT + value: "true" + - name: COMPONENT + value: driver + - name: OPERATOR_NAMESPACE + valueFrom: + fieldRef: + fieldPath: metadata.namespace + securityContext: + privileged: true + seLinuxOptions: + level: "s0" + volumeMounts: + - name: host-root + mountPath: /host + readOnly: true + mountPropagation: HostToContainer + - name: driver-install-path + mountPath: /run/nvidia/driver + mountPropagation: HostToContainer + - name: run-nvidia-validations + mountPath: /run/nvidia/validations + mountPropagation: Bidirectional + - name: host-dev-char + mountPath: /host-dev-char - name: cc-manager-validation image: "FILLED BY THE OPERATOR" command: ['sh', '-c'] @@ -145,3 +175,6 @@ spec: - name: host-root hostPath: path: / + - name: host-dev-char + hostPath: + path: /dev/char diff --git a/assets/state-vfio-manager/0500_daemonset.yaml b/assets/state-vfio-manager/0500_daemonset.yaml index 1039cc874..ed867a70a 100644 --- a/assets/state-vfio-manager/0500_daemonset.yaml +++ b/assets/state-vfio-manager/0500_daemonset.yaml @@ -80,6 +80,9 @@ spec: readOnly: true - name: host-root mountPath: /host + - name: run-nvidia-validations + mountPath: /run/nvidia/validations + mountPropagation: Bidirectional securityContext: privileged: true seLinuxOptions: @@ -102,6 +105,10 @@ spec: hostPath: path: /run/nvidia type: DirectoryOrCreate + - name: run-nvidia-validations + hostPath: + path: /run/nvidia/validations + type: DirectoryOrCreate - name: host-root hostPath: path: "/" diff --git a/bundle/manifests/nvidia.com_clusterpolicies.yaml b/bundle/manifests/nvidia.com_clusterpolicies.yaml index 379e98d87..b8a6ad74a 100644 --- a/bundle/manifests/nvidia.com_clusterpolicies.yaml +++ b/bundle/manifests/nvidia.com_clusterpolicies.yaml @@ -1057,6 +1057,17 @@ spec: type: string type: object type: object + fabricManager: + description: FabricManager component spec + properties: + mode: + default: full-passthrough + description: Mode indicates the Fabric Manager mode + enum: + - full-passthrough + - shared-nvswitch + type: string + type: object gdrcopy: description: GDRCopy component spec properties: diff --git a/cmd/nvidia-validator/main.go b/cmd/nvidia-validator/main.go index ea6cb5d5d..cb235ae72 100644 --- a/cmd/nvidia-validator/main.go +++ b/cmd/nvidia-validator/main.go @@ -1624,18 +1624,22 @@ func (v *VfioPCI) validate() error { return err } - err = v.runValidation() - if err != nil { - return err - } - log.Info("Validation completed successfully - all devices are bound to vfio-pci") + for { + log.Info("Attempting to validate that all device are bound to vfio-pci") + err := v.runValidation() + if err != nil { + if !withWaitFlag { + return fmt.Errorf("error validating vfio-pci: %w", err) + } + log.Warningf("failed to validate vfio-pci, retrying after %d seconds\n", sleepIntervalSecondsFlag) + time.Sleep(time.Duration(sleepIntervalSecondsFlag) * time.Second) + continue + } - // delete status file is already present - err = createStatusFile(outputDirFlag + "/" + vfioPCIStatusFile) - if err != nil { - return err + log.Info("Validation completed successfully - all devices are bound to vfio-pci") + + return createStatusFile(outputDirFlag + "/" + vfioPCIStatusFile) } - return nil } func (v *VfioPCI) runValidation() error { diff --git a/config/crd/bases/nvidia.com_clusterpolicies.yaml b/config/crd/bases/nvidia.com_clusterpolicies.yaml index 379e98d87..b8a6ad74a 100644 --- a/config/crd/bases/nvidia.com_clusterpolicies.yaml +++ b/config/crd/bases/nvidia.com_clusterpolicies.yaml @@ -1057,6 +1057,17 @@ spec: type: string type: object type: object + fabricManager: + description: FabricManager component spec + properties: + mode: + default: full-passthrough + description: Mode indicates the Fabric Manager mode + enum: + - full-passthrough + - shared-nvswitch + type: string + type: object gdrcopy: description: GDRCopy component spec properties: diff --git a/controllers/object_controls.go b/controllers/object_controls.go index a2a862bb0..d093ca1d4 100644 --- a/controllers/object_controls.go +++ b/controllers/object_controls.go @@ -23,13 +23,12 @@ import ( "fmt" "os" "path" + "path/filepath" "regexp" "sort" "strconv" "strings" - "path/filepath" - apiconfigv1 "github.com/openshift/api/config/v1" apiimagev1 "github.com/openshift/api/image/v1" secv1 "github.com/openshift/api/security/v1" @@ -1648,6 +1647,31 @@ func TransformSandboxDevicePlugin(obj *appsv1.DaemonSet, config *gpuv1.ClusterPo setContainerEnv(&(obj.Spec.Template.Spec.Containers[0]), env.Name, env.Value) } } + + // Set ENABLE_FABRIC_MANAGER environment variable if shared-nvswitch mode is configured + if config.FabricManager.IsSharedNVSwitchMode() { + setContainerEnv(&(obj.Spec.Template.Spec.Containers[0]), "ENABLE_FABRIC_MANAGER", "true") + + // Add fabric manager volume mount to the container + fabricManagerVolMount := corev1.VolumeMount{ + Name: "run-nvidia-fabricmanager", + MountPath: "/run/nvidia-fabricmanager", + } + obj.Spec.Template.Spec.Containers[0].VolumeMounts = append(obj.Spec.Template.Spec.Containers[0].VolumeMounts, fabricManagerVolMount) + + // Add fabric manager volume to the pod spec + fabricManagerVol := corev1.Volume{ + Name: "run-nvidia-fabricmanager", + VolumeSource: corev1.VolumeSource{ + HostPath: &corev1.HostPathVolumeSource{ + Path: "/run/nvidia-fabricmanager", + Type: ptr.To(corev1.HostPathDirectoryOrCreate), + }, + }, + } + obj.Spec.Template.Spec.Volumes = append(obj.Spec.Template.Spec.Volumes, fabricManagerVol) + } + return nil } @@ -2013,10 +2037,65 @@ func TransformKataManager(obj *appsv1.DaemonSet, config *gpuv1.ClusterPolicySpec // TransformVFIOManager transforms VFIO-PCI Manager daemonset with required config as per ClusterPolicy func TransformVFIOManager(obj *appsv1.DaemonSet, config *gpuv1.ClusterPolicySpec, n ClusterPolicyController) error { - // update k8s-driver-manager initContainer - err := transformDriverManagerInitContainer(obj, &config.VFIOManager.DriverManager, nil) - if err != nil { - return fmt.Errorf("failed to transform k8s-driver-manager initContainer for VFIO Manager: %v", err) + // Check if we're in shared-nvswitch mode + if config.FabricManager.IsSharedNVSwitchMode() { + // In shared-nvswitch mode, replace driver uninstall with device unbind + // Find the k8s-driver-manager init container and replace it with vfio-manage unbind + container := findContainerByName(obj.Spec.Template.Spec.InitContainers, "k8s-driver-manager") + + // Get the main container image for consistency + mainImage, err := gpuv1.ImagePath(&config.VFIOManager) + if err != nil { + return err + } + + // Replace with synchronized vfio-manage unbind init container + container.Name = "vfio-device-unbind" + container.Image = mainImage + container.ImagePullPolicy = gpuv1.ImagePullPolicy(config.VFIOManager.ImagePullPolicy) + container.Command = []string{"/bin/sh"} + container.Args = []string{"-c", ` +# For shared-nvswitch mode, wait for driver to be ready before unbinding +echo "Shared NVSwitch mode detected, waiting for driver readiness..." +until [ -f /run/nvidia/validations/driver-ready ] +do + echo "waiting for the driver validations to be ready..." + sleep 5 +done + +set -o allexport +cat /run/nvidia/validations/driver-ready +. /run/nvidia/validations/driver-ready + +echo "Driver is ready, proceeding with device unbind" +exec vfio-manage unbind --all`} + + // Add HOST_ROOT env var needed by vfio-manage + setContainerEnv(container, "HOST_ROOT", "/host") + + // Add nvidia-validations volume mount for driver-ready file + container.VolumeMounts = append(container.VolumeMounts, corev1.VolumeMount{ + Name: "nvidia-validations", + MountPath: "/run/nvidia/validations", + ReadOnly: true, + }) + + // Add nvidia-validations volume + obj.Spec.Template.Spec.Volumes = append(obj.Spec.Template.Spec.Volumes, corev1.Volume{ + Name: "nvidia-validations", + VolumeSource: corev1.VolumeSource{ + HostPath: &corev1.HostPathVolumeSource{ + Path: "/run/nvidia/validations", + Type: &[]corev1.HostPathType{corev1.HostPathDirectoryOrCreate}[0], + }, + }, + }) + } else { + // Default behavior: update k8s-driver-manager initContainer + err := transformDriverManagerInitContainer(obj, &config.VFIOManager.DriverManager, nil) + if err != nil { + return fmt.Errorf("failed to transform k8s-driver-manager initContainer for VFIO Manager: %v", err) + } } // update image @@ -2232,12 +2311,27 @@ func TransformSandboxValidator(obj *appsv1.DaemonSet, config *gpuv1.ClusterPolic "vgpu-devices", } + // Add driver validation when FabricManager.Mode is shared-nvswitch + if config.FabricManager.IsSharedNVSwitchMode() { + components = append(components, "driver") + } + for _, component := range components { if err := TransformValidatorComponent(config, &obj.Spec.Template.Spec, component); err != nil { validatorErr = errors.Join(validatorErr, err) } } + // Remove driver validation init container if NOT in shared-nvswitch mode + if !config.FabricManager.IsSharedNVSwitchMode() { + for i, initContainer := range obj.Spec.Template.Spec.InitContainers { + if initContainer.Name == "driver-validation" { + obj.Spec.Template.Spec.InitContainers = append(obj.Spec.Template.Spec.InitContainers[:i], obj.Spec.Template.Spec.InitContainers[i+1:]...) + break + } + } + } + if validatorErr != nil { n.logger.Info("WARN: errors transforming the validator containers: %v", validatorErr) } @@ -3492,6 +3586,13 @@ func transformDriverContainer(obj *appsv1.DaemonSet, config *gpuv1.ClusterPolicy } } + // Set Fabric Manager environment variable if configured + if config.FabricManager.IsSharedNVSwitchMode() { + setContainerEnv(driverContainer, "FABRIC_MANAGER_FABRIC_MODE", "1") + } else if config.FabricManager.Mode == gpuv1.FabricModeFullPassthrough { + setContainerEnv(driverContainer, "FABRIC_MANAGER_FABRIC_MODE", "0") + } + // no further repo configuration required when using pre-compiled drivers, return here. if config.Driver.UsePrecompiledDrivers() { return nil diff --git a/controllers/object_controls_test.go b/controllers/object_controls_test.go index 2d84b2fc5..5c86f1e36 100644 --- a/controllers/object_controls_test.go +++ b/controllers/object_controls_test.go @@ -882,6 +882,8 @@ func getSandboxDevicePluginTestInput(testCase string) *gpuv1.ClusterPolicy { switch testCase { case "default": // Do nothing + case "fabric-manager-shared-nvswitch": + cp.Spec.FabricManager.Mode = gpuv1.FabricModeSharedNVSwitch default: return nil } @@ -897,11 +899,16 @@ func getSandboxDevicePluginTestOutput(testCase string) map[string]interface{} { "numDaemonsets": 1, "image": "nvcr.io/nvidia/kubevirt-device-plugin:v1.1.0", "imagePullSecret": "ngc-secret", + "env": map[string]string{}, } switch testCase { case "default": // Do nothing + case "fabric-manager-shared-nvswitch": + output["env"] = map[string]string{ + "ENABLE_FABRIC_MANAGER": "true", + } default: return nil } @@ -922,6 +929,11 @@ func TestSandboxDevicePlugin(t *testing.T) { getSandboxDevicePluginTestInput("default"), getSandboxDevicePluginTestOutput("default"), }, + { + "FabricManagerSharedNVSwitch", + getSandboxDevicePluginTestInput("fabric-manager-shared-nvswitch"), + getSandboxDevicePluginTestOutput("fabric-manager-shared-nvswitch"), + }, } for _, tc := range testCases { @@ -935,14 +947,26 @@ func TestSandboxDevicePlugin(t *testing.T) { } image := "" + containerEnv := make(map[string]string) for _, container := range ds.Spec.Template.Spec.Containers { if strings.Contains(container.Name, "nvidia-sandbox-device-plugin-ctr") { image = container.Image + for _, env := range container.Env { + containerEnv[env.Name] = env.Value + } continue } } require.Equal(t, tc.output["image"], image, "Unexpected configuration for nvidia-sandbox-device-plugin-ctr image") + + // Check environment variables + expectedEnv := tc.output["env"].(map[string]string) + for envName, expectedValue := range expectedEnv { + actualValue, found := containerEnv[envName] + require.True(t, found, "Expected environment variable %s not found", envName) + require.Equal(t, expectedValue, actualValue, "Unexpected value for environment variable %s", envName) + } // cleanup by deleting all kubernetes objects err = removeState(&clusterPolicyController, clusterPolicyController.idx-1) diff --git a/controllers/state_manager.go b/controllers/state_manager.go index 4ea634ebe..badd54afc 100644 --- a/controllers/state_manager.go +++ b/controllers/state_manager.go @@ -42,6 +42,7 @@ const ( commonGPULabelValue = "true" commonOperandsLabelKey = "nvidia.com/gpu.deploy.operands" commonOperandsLabelValue = "true" + driverLabelKey = "nvidia.com/gpu.deploy.driver" migManagerLabelKey = "nvidia.com/gpu.deploy.mig-manager" migManagerLabelValue = "true" migCapableLabelKey = "nvidia.com/mig.capable" @@ -116,9 +117,10 @@ var gpuNodeLabels = map[string]string{ } type gpuWorkloadConfiguration struct { - config string - node string - log logr.Logger + config string + node string + log logr.Logger + clusterPolicy *gpuv1.ClusterPolicy } // OpenShiftDriverToolkit contains the values required to deploy @@ -322,6 +324,15 @@ func isValidWorkloadConfig(workloadConfig string) bool { return ok } +// shouldDeployDriverForVMPassthrough returns true if driver should be deployed for vm-passthrough workload +// based on Fabric Manager configuration +func (w *gpuWorkloadConfiguration) shouldDeployDriverForVMPassthrough() bool { + if w.config != gpuWorkloadConfigVMPassthrough || w.clusterPolicy == nil { + return false + } + return w.clusterPolicy.Spec.FabricManager.IsSharedNVSwitchMode() +} + // getWorkloadConfig returns the GPU workload configured for the node. // If an error occurs when searching for the workload config, // return defaultGPUWorkloadConfig. @@ -382,6 +393,16 @@ func (w *gpuWorkloadConfiguration) addGPUStateLabels(labels map[string]string) b modified = true } } + + // Add conditional driver deployment for vm-passthrough workload + if w.shouldDeployDriverForVMPassthrough() { + if _, ok := labels[driverLabelKey]; !ok { + w.log.Info("Setting node label for driver deployment in vm-passthrough with Fabric Manager shared-nvswitch mode", "NodeName", w.node, "Label", driverLabelKey, "Value", "true") + labels[driverLabelKey] = "true" + modified = true + } + } + if w.config == gpuWorkloadConfigContainer && hasMIGCapableGPU(labels) && !hasMIGManagerLabel(labels) { w.log.Info("Setting node label", "NodeName", w.node, "Label", migManagerLabelKey, "Value", migManagerLabelValue) labels[migManagerLabelKey] = migManagerLabelValue @@ -506,7 +527,7 @@ func (n *ClusterPolicyController) labelGPUNodes() (bool, int, error) { "Error", err, "defaultGPUWorkloadConfig", defaultGPUWorkloadConfig) } n.logger.Info("GPU workload configuration", "NodeName", node.Name, "GpuWorkloadConfig", config) - gpuWorkloadConfig := &gpuWorkloadConfiguration{config, node.Name, n.logger} + gpuWorkloadConfig := &gpuWorkloadConfiguration{config, node.Name, n.logger, n.singleton} if !hasCommonGPULabel(labels) && hasGPULabels(labels) { n.logger.Info("Node has GPU(s)", "NodeName", node.Name) // label the node with common Nvidia GPU label diff --git a/controllers/state_manager_test.go b/controllers/state_manager_test.go index bd1641e94..584e51902 100644 --- a/controllers/state_manager_test.go +++ b/controllers/state_manager_test.go @@ -19,6 +19,8 @@ package controllers import ( "testing" + "github.com/go-logr/logr" + "github.com/stretchr/testify/assert" corev1 "k8s.io/api/core/v1" gpuv1 "github.com/NVIDIA/gpu-operator/api/nvidia/v1" @@ -186,3 +188,323 @@ func TestHasMIGCapableGPU(t *testing.T) { } } } + +func TestGpuWorkloadConfiguration_ShouldDeployDriverForVMPassthrough(t *testing.T) { + tests := []struct { + name string + config string + clusterPolicy *gpuv1.ClusterPolicy + expected bool + }{ + { + name: "non-vm-passthrough workload", + config: gpuWorkloadConfigContainer, + clusterPolicy: &gpuv1.ClusterPolicy{ + Spec: gpuv1.ClusterPolicySpec{ + FabricManager: gpuv1.FabricManagerSpec{ + Mode: gpuv1.FabricModeSharedNVSwitch, + }, + }, + }, + expected: false, + }, + { + name: "vm-passthrough with nil cluster policy", + config: gpuWorkloadConfigVMPassthrough, + clusterPolicy: nil, + expected: false, + }, + { + name: "vm-passthrough with shared-nvswitch mode", + config: gpuWorkloadConfigVMPassthrough, + clusterPolicy: &gpuv1.ClusterPolicy{ + Spec: gpuv1.ClusterPolicySpec{ + FabricManager: gpuv1.FabricManagerSpec{ + Mode: gpuv1.FabricModeSharedNVSwitch, + }, + }, + }, + expected: true, + }, + { + name: "vm-passthrough with full-passthrough mode", + config: gpuWorkloadConfigVMPassthrough, + clusterPolicy: &gpuv1.ClusterPolicy{ + Spec: gpuv1.ClusterPolicySpec{ + FabricManager: gpuv1.FabricManagerSpec{ + Mode: gpuv1.FabricModeFullPassthrough, + }, + }, + }, + expected: false, + }, + { + name: "vm-passthrough with default (empty) fabric manager mode", + config: gpuWorkloadConfigVMPassthrough, + clusterPolicy: &gpuv1.ClusterPolicy{ + Spec: gpuv1.ClusterPolicySpec{ + FabricManager: gpuv1.FabricManagerSpec{ + Mode: "", // empty defaults to full-passthrough + }, + }, + }, + expected: false, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + workloadConfig := &gpuWorkloadConfiguration{ + config: tt.config, + node: "test-node", + log: logr.Discard(), + clusterPolicy: tt.clusterPolicy, + } + + result := workloadConfig.shouldDeployDriverForVMPassthrough() + assert.Equal(t, tt.expected, result) + }) + } +} + +func TestGpuWorkloadConfiguration_AddGPUStateLabels(t *testing.T) { + tests := []struct { + name string + config string + clusterPolicy *gpuv1.ClusterPolicy + inputLabels map[string]string + expectedLabels map[string]string + expectModified bool + }{ + { + name: "vm-passthrough with shared-nvswitch adds driver label", + config: gpuWorkloadConfigVMPassthrough, + clusterPolicy: &gpuv1.ClusterPolicy{ + Spec: gpuv1.ClusterPolicySpec{ + FabricManager: gpuv1.FabricManagerSpec{ + Mode: gpuv1.FabricModeSharedNVSwitch, + }, + }, + }, + inputLabels: map[string]string{}, + expectedLabels: map[string]string{ + "nvidia.com/gpu.deploy.sandbox-device-plugin": "true", + "nvidia.com/gpu.deploy.sandbox-validator": "true", + "nvidia.com/gpu.deploy.vfio-manager": "true", + "nvidia.com/gpu.deploy.kata-manager": "true", + "nvidia.com/gpu.deploy.cc-manager": "true", + "nvidia.com/gpu.deploy.driver": "true", + }, + expectModified: true, + }, + { + name: "vm-passthrough with full-passthrough does not add driver label", + config: gpuWorkloadConfigVMPassthrough, + clusterPolicy: &gpuv1.ClusterPolicy{ + Spec: gpuv1.ClusterPolicySpec{ + FabricManager: gpuv1.FabricManagerSpec{ + Mode: gpuv1.FabricModeFullPassthrough, + }, + }, + }, + inputLabels: map[string]string{}, + expectedLabels: map[string]string{ + "nvidia.com/gpu.deploy.sandbox-device-plugin": "true", + "nvidia.com/gpu.deploy.sandbox-validator": "true", + "nvidia.com/gpu.deploy.vfio-manager": "true", + "nvidia.com/gpu.deploy.kata-manager": "true", + "nvidia.com/gpu.deploy.cc-manager": "true", + }, + expectModified: true, + }, + { + name: "container workload is not affected", + config: gpuWorkloadConfigContainer, + clusterPolicy: &gpuv1.ClusterPolicy{ + Spec: gpuv1.ClusterPolicySpec{ + FabricManager: gpuv1.FabricManagerSpec{ + Mode: gpuv1.FabricModeSharedNVSwitch, + }, + }, + }, + inputLabels: map[string]string{ + "existing-label": "value", + }, + expectedLabels: map[string]string{ + "existing-label": "value", + "nvidia.com/gpu.deploy.driver": "true", + "nvidia.com/gpu.deploy.gpu-feature-discovery": "true", + "nvidia.com/gpu.deploy.container-toolkit": "true", + "nvidia.com/gpu.deploy.device-plugin": "true", + "nvidia.com/gpu.deploy.dcgm": "true", + "nvidia.com/gpu.deploy.dcgm-exporter": "true", + "nvidia.com/gpu.deploy.node-status-exporter": "true", + "nvidia.com/gpu.deploy.operator-validator": "true", + }, + expectModified: true, + }, + { + name: "vm-passthrough with nil cluster policy does not add driver label", + config: gpuWorkloadConfigVMPassthrough, + clusterPolicy: nil, + inputLabels: map[string]string{}, + expectedLabels: map[string]string{ + "nvidia.com/gpu.deploy.sandbox-device-plugin": "true", + "nvidia.com/gpu.deploy.sandbox-validator": "true", + "nvidia.com/gpu.deploy.vfio-manager": "true", + "nvidia.com/gpu.deploy.kata-manager": "true", + "nvidia.com/gpu.deploy.cc-manager": "true", + }, + expectModified: true, + }, + { + name: "driver label already exists - no modification", + config: gpuWorkloadConfigVMPassthrough, + clusterPolicy: &gpuv1.ClusterPolicy{ + Spec: gpuv1.ClusterPolicySpec{ + FabricManager: gpuv1.FabricManagerSpec{ + Mode: gpuv1.FabricModeSharedNVSwitch, + }, + }, + }, + inputLabels: map[string]string{ + "nvidia.com/gpu.deploy.sandbox-device-plugin": "true", + "nvidia.com/gpu.deploy.sandbox-validator": "true", + "nvidia.com/gpu.deploy.vfio-manager": "true", + "nvidia.com/gpu.deploy.kata-manager": "true", + "nvidia.com/gpu.deploy.cc-manager": "true", + "nvidia.com/gpu.deploy.driver": "true", + }, + expectedLabels: map[string]string{ + "nvidia.com/gpu.deploy.sandbox-device-plugin": "true", + "nvidia.com/gpu.deploy.sandbox-validator": "true", + "nvidia.com/gpu.deploy.vfio-manager": "true", + "nvidia.com/gpu.deploy.kata-manager": "true", + "nvidia.com/gpu.deploy.cc-manager": "true", + "nvidia.com/gpu.deploy.driver": "true", + }, + expectModified: false, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + workloadConfig := &gpuWorkloadConfiguration{ + config: tt.config, + node: "test-node", + log: logr.Discard(), + clusterPolicy: tt.clusterPolicy, + } + + // Make a copy of input labels to avoid modifying the test data + labels := make(map[string]string) + for k, v := range tt.inputLabels { + labels[k] = v + } + + modified := workloadConfig.addGPUStateLabels(labels) + + assert.Equal(t, tt.expectModified, modified) + assert.Equal(t, tt.expectedLabels, labels) + }) + } +} + +func TestClusterPolicyValidateFabricManagerConfig(t *testing.T) { + tests := []struct { + name string + clusterPolicy *gpuv1.ClusterPolicySpec + expectError bool + errorMessage string + }{ + { + name: "valid configuration - vm-passthrough with shared-nvswitch and driver enabled", + clusterPolicy: &gpuv1.ClusterPolicySpec{ + SandboxWorkloads: gpuv1.SandboxWorkloadsSpec{ + DefaultWorkload: "vm-passthrough", + }, + FabricManager: gpuv1.FabricManagerSpec{ + Mode: gpuv1.FabricModeSharedNVSwitch, + }, + Driver: gpuv1.DriverSpec{ + Enabled: newBoolPtr(true), + }, + }, + expectError: false, + }, + { + name: "valid configuration - vm-passthrough with full-passthrough mode", + clusterPolicy: &gpuv1.ClusterPolicySpec{ + SandboxWorkloads: gpuv1.SandboxWorkloadsSpec{ + DefaultWorkload: "vm-passthrough", + }, + FabricManager: gpuv1.FabricManagerSpec{ + Mode: gpuv1.FabricModeFullPassthrough, + }, + Driver: gpuv1.DriverSpec{ + Enabled: newBoolPtr(false), + }, + }, + expectError: false, + }, + { + name: "valid configuration - container workload with any fabric manager mode", + clusterPolicy: &gpuv1.ClusterPolicySpec{ + SandboxWorkloads: gpuv1.SandboxWorkloadsSpec{ + DefaultWorkload: "container", + }, + FabricManager: gpuv1.FabricManagerSpec{ + Mode: gpuv1.FabricModeSharedNVSwitch, + }, + Driver: gpuv1.DriverSpec{ + Enabled: newBoolPtr(false), + }, + }, + expectError: false, + }, + { + name: "invalid configuration - vm-passthrough with shared-nvswitch but driver disabled", + clusterPolicy: &gpuv1.ClusterPolicySpec{ + SandboxWorkloads: gpuv1.SandboxWorkloadsSpec{ + DefaultWorkload: "vm-passthrough", + }, + FabricManager: gpuv1.FabricManagerSpec{ + Mode: gpuv1.FabricModeSharedNVSwitch, + }, + Driver: gpuv1.DriverSpec{ + Enabled: newBoolPtr(false), + }, + }, + expectError: true, + errorMessage: "driver must be enabled when using vm-passthrough with Fabric Manager Shared NVSwitch mode", + }, + { + name: "valid configuration - vm-passthrough with shared-nvswitch and driver not specified (defaults to enabled)", + clusterPolicy: &gpuv1.ClusterPolicySpec{ + SandboxWorkloads: gpuv1.SandboxWorkloadsSpec{ + DefaultWorkload: "vm-passthrough", + }, + FabricManager: gpuv1.FabricManagerSpec{ + Mode: gpuv1.FabricModeSharedNVSwitch, + }, + Driver: gpuv1.DriverSpec{ + // Enabled not specified, defaults to true + }, + }, + expectError: false, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + err := tt.clusterPolicy.ValidateFabricManagerConfig() + + if tt.expectError { + assert.Error(t, err) + assert.Contains(t, err.Error(), tt.errorMessage) + } else { + assert.NoError(t, err) + } + }) + } +} diff --git a/controllers/transforms_test.go b/controllers/transforms_test.go index cfab7da49..298fc32b6 100644 --- a/controllers/transforms_test.go +++ b/controllers/transforms_test.go @@ -1810,7 +1810,7 @@ func TestTransformVFIOManager(t *testing.T) { expectedDaemonset Daemonset }{ { - description: "transform vfio manager", + description: "transform vfio manager - normal mode", daemonset: NewDaemonset(). WithContainer(corev1.Container{Name: "nvidia-vfio-manager"}). WithContainer(corev1.Container{Name: "sidecar"}). @@ -1833,6 +1833,9 @@ func TestTransformVFIOManager(t *testing.T) { Env: mockEnv, }, }, + FabricManager: gpuv1.FabricManagerSpec{ + Mode: gpuv1.FabricModeFullPassthrough, + }, }, expectedDaemonset: NewDaemonset(). WithContainer(corev1.Container{ @@ -1855,6 +1858,80 @@ func TestTransformVFIOManager(t *testing.T) { }). WithPullSecret(secret), }, + { + description: "transform vfio manager - shared-nvswitch mode", + daemonset: NewDaemonset(). + WithContainer(corev1.Container{Name: "nvidia-vfio-manager"}). + WithContainer(corev1.Container{Name: "sidecar"}). + WithInitContainer(corev1.Container{Name: "k8s-driver-manager"}), + clusterPolicySpec: &gpuv1.ClusterPolicySpec{ + VFIOManager: gpuv1.VFIOManagerSpec{ + Repository: "nvcr.io/nvidia/cloud-native", + Image: "vfio-pci-manager", + Version: "v1.0.0", + ImagePullPolicy: "IfNotPresent", + ImagePullSecrets: []string{secret}, + Resources: &gpuv1.ResourceRequirements{Limits: resources.Limits, Requests: resources.Requests}, + Args: []string{"--test-flag"}, + Env: mockEnv, + }, + FabricManager: gpuv1.FabricManagerSpec{ + Mode: gpuv1.FabricModeSharedNVSwitch, + }, + }, + expectedDaemonset: NewDaemonset(). + WithContainer(corev1.Container{ + Name: "nvidia-vfio-manager", + Image: "nvcr.io/nvidia/cloud-native/vfio-pci-manager:v1.0.0", + ImagePullPolicy: corev1.PullIfNotPresent, + Args: []string{"--test-flag"}, + Env: mockEnvCore, + Resources: resources, + }). + WithContainer(corev1.Container{ + Name: "sidecar", + Resources: resources, + }). + WithInitContainer(corev1.Container{ + Name: "vfio-device-unbind", + Image: "nvcr.io/nvidia/cloud-native/vfio-pci-manager:v1.0.0", + ImagePullPolicy: corev1.PullIfNotPresent, + Command: []string{"/bin/sh"}, + Args: []string{"-c", ` +# For shared-nvswitch mode, wait for driver to be ready before unbinding +echo "Shared NVSwitch mode detected, waiting for driver readiness..." +until [ -f /run/nvidia/validations/driver-ready ] +do + echo "waiting for the driver validations to be ready..." + sleep 5 +done + +set -o allexport +cat /run/nvidia/validations/driver-ready +. /run/nvidia/validations/driver-ready + +echo "Driver is ready, proceeding with device unbind" +exec vfio-manage unbind --all`}, + Env: []corev1.EnvVar{{Name: "HOST_ROOT", Value: "/host"}}, + VolumeMounts: []corev1.VolumeMount{ + { + Name: "nvidia-validations", + MountPath: "/run/nvidia/validations", + ReadOnly: true, + }, + }, + }). + WithVolume(corev1.Volume{ + Name: "nvidia-validations", + VolumeSource: corev1.VolumeSource{ + HostPath: &corev1.HostPathVolumeSource{ + Path: "/run/nvidia/validations", + Type: &[]corev1.HostPathType{corev1.HostPathDirectoryOrCreate}[0], + }, + }, + }). + WithPullSecret(secret), + }, } for _, tc := range testCases { @@ -2612,6 +2689,104 @@ func TestTransformSandboxValidator(t *testing.T) { WithPullSecret("pull-secret"). WithRuntimeClassName("nvidia"), }, + { + description: "fabric manager shared-nvswitch mode - driver validation should be preserved", + ds: NewDaemonset(). + WithInitContainer(corev1.Container{Name: "driver-validation", Image: "old-image"}). + WithContainer(corev1.Container{ + Name: "dummy", + Image: "old-image", + }), + cpSpec: &gpuv1.ClusterPolicySpec{ + Validator: gpuv1.ValidatorSpec{ + Repository: "nvcr.io/nvidia/cloud-native", + Image: "gpu-operator-validator", + Version: "v1.0.0", + }, + FabricManager: gpuv1.FabricManagerSpec{ + Mode: gpuv1.FabricModeSharedNVSwitch, + }, + }, + expectedDs: NewDaemonset(). + WithInitContainer(corev1.Container{ + Name: "driver-validation", + Image: "nvcr.io/nvidia/cloud-native/gpu-operator-validator:v1.0.0", + SecurityContext: &corev1.SecurityContext{ + RunAsUser: rootUID, + }, + }). + WithContainer(corev1.Container{ + Name: "dummy", + Image: "nvcr.io/nvidia/cloud-native/gpu-operator-validator:v1.0.0", + ImagePullPolicy: corev1.PullIfNotPresent, + SecurityContext: &corev1.SecurityContext{ + RunAsUser: rootUID, + }, + }), + }, + { + description: "fabric manager full-passthrough mode - driver validation should be removed", + ds: NewDaemonset(). + WithInitContainer(corev1.Container{Name: "driver-validation", Image: "old-image"}). + WithContainer(corev1.Container{ + Name: "dummy", + Image: "old-image", + }), + cpSpec: &gpuv1.ClusterPolicySpec{ + Validator: gpuv1.ValidatorSpec{ + Repository: "nvcr.io/nvidia/cloud-native", + Image: "gpu-operator-validator", + Version: "v1.0.0", + }, + FabricManager: gpuv1.FabricManagerSpec{ + Mode: gpuv1.FabricModeFullPassthrough, + }, + }, + expectedDs: func() Daemonset { + ds := NewDaemonset(). + WithContainer(corev1.Container{ + Name: "dummy", + Image: "nvcr.io/nvidia/cloud-native/gpu-operator-validator:v1.0.0", + ImagePullPolicy: corev1.PullIfNotPresent, + SecurityContext: &corev1.SecurityContext{ + RunAsUser: rootUID, + }, + }) + // Set an empty InitContainers slice to match what happens after removal + ds.Spec.Template.Spec.InitContainers = []corev1.Container{} + return ds + }(), + }, + { + description: "no fabric manager mode specified - driver validation should be removed", + ds: NewDaemonset(). + WithInitContainer(corev1.Container{Name: "driver-validation", Image: "old-image"}). + WithContainer(corev1.Container{ + Name: "dummy", + Image: "old-image", + }), + cpSpec: &gpuv1.ClusterPolicySpec{ + Validator: gpuv1.ValidatorSpec{ + Repository: "nvcr.io/nvidia/cloud-native", + Image: "gpu-operator-validator", + Version: "v1.0.0", + }, + }, + expectedDs: func() Daemonset { + ds := NewDaemonset(). + WithContainer(corev1.Container{ + Name: "dummy", + Image: "nvcr.io/nvidia/cloud-native/gpu-operator-validator:v1.0.0", + ImagePullPolicy: corev1.PullIfNotPresent, + SecurityContext: &corev1.SecurityContext{ + RunAsUser: rootUID, + }, + }) + // Set an empty InitContainers slice to match what happens after removal + ds.Spec.Template.Spec.InitContainers = []corev1.Container{} + return ds + }(), + }, } for _, tc := range testCases { @@ -2771,6 +2946,78 @@ func TestTransformDriver(t *testing.T) { }), errorExpected: false, }, + { + description: "driver with fabric manager shared-nvswitch mode", + ds: NewDaemonset().WithContainer(corev1.Container{Name: "nvidia-driver-ctr"}). + WithInitContainer(corev1.Container{Name: "k8s-driver-manager"}), + cpSpec: &gpuv1.ClusterPolicySpec{ + Driver: gpuv1.DriverSpec{ + Repository: "nvcr.io/nvidia", + Image: "driver", + Version: "570.172.08", + Manager: gpuv1.DriverManagerSpec{ + Repository: "nvcr.io/nvidia/cloud-native", + Image: "k8s-driver-manager", + Version: "v0.8.0", + }, + }, + FabricManager: gpuv1.FabricManagerSpec{ + Mode: gpuv1.FabricModeSharedNVSwitch, + }, + }, + client: mockClientMap["secret-env-client"], + expectedDs: NewDaemonset().WithContainer(corev1.Container{ + Name: "nvidia-driver-ctr", + Image: "nvcr.io/nvidia/driver:570.172.08-", + ImagePullPolicy: corev1.PullIfNotPresent, + Env: []corev1.EnvVar{ + { + Name: "FABRIC_MANAGER_FABRIC_MODE", + Value: "1", + }, + }, + }).WithInitContainer(corev1.Container{ + Name: "k8s-driver-manager", + Image: "nvcr.io/nvidia/cloud-native/k8s-driver-manager:v0.8.0", + }), + errorExpected: false, + }, + { + description: "driver with fabric manager full-passthrough mode", + ds: NewDaemonset().WithContainer(corev1.Container{Name: "nvidia-driver-ctr"}). + WithInitContainer(corev1.Container{Name: "k8s-driver-manager"}), + cpSpec: &gpuv1.ClusterPolicySpec{ + Driver: gpuv1.DriverSpec{ + Repository: "nvcr.io/nvidia", + Image: "driver", + Version: "570.172.08", + Manager: gpuv1.DriverManagerSpec{ + Repository: "nvcr.io/nvidia/cloud-native", + Image: "k8s-driver-manager", + Version: "v0.8.0", + }, + }, + FabricManager: gpuv1.FabricManagerSpec{ + Mode: gpuv1.FabricModeFullPassthrough, + }, + }, + client: mockClientMap["secret-env-client"], + expectedDs: NewDaemonset().WithContainer(corev1.Container{ + Name: "nvidia-driver-ctr", + Image: "nvcr.io/nvidia/driver:570.172.08-", + ImagePullPolicy: corev1.PullIfNotPresent, + Env: []corev1.EnvVar{ + { + Name: "FABRIC_MANAGER_FABRIC_MODE", + Value: "0", + }, + }, + }).WithInitContainer(corev1.Container{ + Name: "k8s-driver-manager", + Image: "nvcr.io/nvidia/cloud-native/k8s-driver-manager:v0.8.0", + }), + errorExpected: false, + }, } for _, tc := range testCases { @@ -3418,3 +3665,109 @@ func TestTransformDriverVGPUTopologyConfig(t *testing.T) { require.NoError(t, err) require.EqualValues(t, expectedDs, ds) } + +func TestTransformSandboxDevicePlugin(t *testing.T) { + initMockK8sClients() + testCases := []struct { + description string + ds Daemonset + cpSpec *gpuv1.ClusterPolicySpec + expectedDs Daemonset + errorExpected bool + }{ + { + description: "sandbox device plugin with fabric manager shared-nvswitch mode", + ds: NewDaemonset().WithContainer(corev1.Container{Name: "nvidia-sandbox-device-plugin-ctr"}). + WithInitContainer(corev1.Container{Name: "toolkit-validation"}), + cpSpec: &gpuv1.ClusterPolicySpec{ + SandboxDevicePlugin: gpuv1.SandboxDevicePluginSpec{ + Repository: "nvcr.io/nvidia", + Image: "kubevirt-device-plugin", + Version: "v1.2.0", + }, + FabricManager: gpuv1.FabricManagerSpec{ + Mode: gpuv1.FabricModeSharedNVSwitch, + }, + Validator: gpuv1.ValidatorSpec{ + Repository: "nvcr.io/nvidia/cloud-native", + Image: "gpu-operator-validator", + Version: "v1.0.0", + }, + }, + expectedDs: NewDaemonset().WithContainer(corev1.Container{ + Name: "nvidia-sandbox-device-plugin-ctr", + Image: "nvcr.io/nvidia/kubevirt-device-plugin:v1.2.0", + ImagePullPolicy: corev1.PullIfNotPresent, + Env: []corev1.EnvVar{ + { + Name: "ENABLE_FABRIC_MANAGER", + Value: "true", + }, + }, + VolumeMounts: []corev1.VolumeMount{ + { + Name: "run-nvidia-fabricmanager", + MountPath: "/run/nvidia-fabricmanager", + }, + }, + }).WithInitContainer(corev1.Container{ + Name: "toolkit-validation", + Image: "nvcr.io/nvidia/cloud-native/gpu-operator-validator:v1.0.0", + SecurityContext: &corev1.SecurityContext{ + RunAsUser: rootUID, + }, + }).WithVolume(corev1.Volume{ + Name: "run-nvidia-fabricmanager", + VolumeSource: corev1.VolumeSource{ + HostPath: &corev1.HostPathVolumeSource{ + Path: "/run/nvidia-fabricmanager", + Type: ptr.To(corev1.HostPathDirectoryOrCreate), + }, + }, + }), + errorExpected: false, + }, + { + description: "sandbox device plugin without fabric manager shared-nvswitch mode", + ds: NewDaemonset().WithContainer(corev1.Container{Name: "nvidia-sandbox-device-plugin-ctr"}). + WithInitContainer(corev1.Container{Name: "toolkit-validation"}), + cpSpec: &gpuv1.ClusterPolicySpec{ + SandboxDevicePlugin: gpuv1.SandboxDevicePluginSpec{ + Repository: "nvcr.io/nvidia", + Image: "kubevirt-device-plugin", + Version: "v1.2.0", + }, + Validator: gpuv1.ValidatorSpec{ + Repository: "nvcr.io/nvidia/cloud-native", + Image: "gpu-operator-validator", + Version: "v1.0.0", + }, + }, + expectedDs: NewDaemonset().WithContainer(corev1.Container{ + Name: "nvidia-sandbox-device-plugin-ctr", + Image: "nvcr.io/nvidia/kubevirt-device-plugin:v1.2.0", + ImagePullPolicy: corev1.PullIfNotPresent, + }).WithInitContainer(corev1.Container{ + Name: "toolkit-validation", + Image: "nvcr.io/nvidia/cloud-native/gpu-operator-validator:v1.0.0", + SecurityContext: &corev1.SecurityContext{ + RunAsUser: rootUID, + }, + }), + errorExpected: false, + }, + } + + for _, tc := range testCases { + t.Run(tc.description, func(t *testing.T) { + err := TransformSandboxDevicePlugin(tc.ds.DaemonSet, tc.cpSpec, + ClusterPolicyController{operatorNamespace: "test-ns", logger: ctrl.Log.WithName("test")}) + if tc.errorExpected { + require.Error(t, err) + return + } + require.NoError(t, err) + require.EqualValues(t, tc.expectedDs, tc.ds) + }) + } +} diff --git a/deployments/gpu-operator/crds/nvidia.com_clusterpolicies.yaml b/deployments/gpu-operator/crds/nvidia.com_clusterpolicies.yaml index 379e98d87..b8a6ad74a 100644 --- a/deployments/gpu-operator/crds/nvidia.com_clusterpolicies.yaml +++ b/deployments/gpu-operator/crds/nvidia.com_clusterpolicies.yaml @@ -1057,6 +1057,17 @@ spec: type: string type: object type: object + fabricManager: + description: FabricManager component spec + properties: + mode: + default: full-passthrough + description: Mode indicates the Fabric Manager mode + enum: + - full-passthrough + - shared-nvswitch + type: string + type: object gdrcopy: description: GDRCopy component spec properties: diff --git a/internal/state/testdata/golden/driver-additional-configs.yaml b/internal/state/testdata/golden/driver-additional-configs.yaml index 88d0e7a09..f150861d2 100644 --- a/internal/state/testdata/golden/driver-additional-configs.yaml +++ b/internal/state/testdata/golden/driver-additional-configs.yaml @@ -106,8 +106,14 @@ data: fi if ! nvidia-smi; then - echo "nvidia-smi failed" - exit 1 + # For vm-passthrough with shared-nvswitch mode, nvidia-smi may fail due to unbound devices + # Fall back to checking if nvidia module is loaded when FABRIC_MANAGER_FABRIC_MODE=1 + if [ "${FABRIC_MANAGER_FABRIC_MODE:-}" = "1" ] && lsmod | grep -q "^nvidia "; then + echo "nvidia-smi failed but nvidia module is loaded (vm-passthrough with shared-nvswitch mode)" + else + echo "nvidia-smi failed" + exit 1 + fi fi GPU_DIRECT_RDMA_ENABLED="${GPU_DIRECT_RDMA_ENABLED:-false}" diff --git a/internal/state/testdata/golden/driver-full-spec.yaml b/internal/state/testdata/golden/driver-full-spec.yaml index 2397f42fb..0e9a01b81 100644 --- a/internal/state/testdata/golden/driver-full-spec.yaml +++ b/internal/state/testdata/golden/driver-full-spec.yaml @@ -106,8 +106,14 @@ data: fi if ! nvidia-smi; then - echo "nvidia-smi failed" - exit 1 + # For vm-passthrough with shared-nvswitch mode, nvidia-smi may fail due to unbound devices + # Fall back to checking if nvidia module is loaded when FABRIC_MANAGER_FABRIC_MODE=1 + if [ "${FABRIC_MANAGER_FABRIC_MODE:-}" = "1" ] && lsmod | grep -q "^nvidia "; then + echo "nvidia-smi failed but nvidia module is loaded (vm-passthrough with shared-nvswitch mode)" + else + echo "nvidia-smi failed" + exit 1 + fi fi GPU_DIRECT_RDMA_ENABLED="${GPU_DIRECT_RDMA_ENABLED:-false}" diff --git a/internal/state/testdata/golden/driver-gdrcopy-openshift.yaml b/internal/state/testdata/golden/driver-gdrcopy-openshift.yaml index 4b1a6f85b..fda787c54 100644 --- a/internal/state/testdata/golden/driver-gdrcopy-openshift.yaml +++ b/internal/state/testdata/golden/driver-gdrcopy-openshift.yaml @@ -106,8 +106,14 @@ data: fi if ! nvidia-smi; then - echo "nvidia-smi failed" - exit 1 + # For vm-passthrough with shared-nvswitch mode, nvidia-smi may fail due to unbound devices + # Fall back to checking if nvidia module is loaded when FABRIC_MANAGER_FABRIC_MODE=1 + if [ "${FABRIC_MANAGER_FABRIC_MODE:-}" = "1" ] && lsmod | grep -q "^nvidia "; then + echo "nvidia-smi failed but nvidia module is loaded (vm-passthrough with shared-nvswitch mode)" + else + echo "nvidia-smi failed" + exit 1 + fi fi GPU_DIRECT_RDMA_ENABLED="${GPU_DIRECT_RDMA_ENABLED:-false}" diff --git a/internal/state/testdata/golden/driver-gdrcopy.yaml b/internal/state/testdata/golden/driver-gdrcopy.yaml index cd56e8a93..0e1232f16 100644 --- a/internal/state/testdata/golden/driver-gdrcopy.yaml +++ b/internal/state/testdata/golden/driver-gdrcopy.yaml @@ -106,8 +106,14 @@ data: fi if ! nvidia-smi; then - echo "nvidia-smi failed" - exit 1 + # For vm-passthrough with shared-nvswitch mode, nvidia-smi may fail due to unbound devices + # Fall back to checking if nvidia module is loaded when FABRIC_MANAGER_FABRIC_MODE=1 + if [ "${FABRIC_MANAGER_FABRIC_MODE:-}" = "1" ] && lsmod | grep -q "^nvidia "; then + echo "nvidia-smi failed but nvidia module is loaded (vm-passthrough with shared-nvswitch mode)" + else + echo "nvidia-smi failed" + exit 1 + fi fi GPU_DIRECT_RDMA_ENABLED="${GPU_DIRECT_RDMA_ENABLED:-false}" diff --git a/internal/state/testdata/golden/driver-gds.yaml b/internal/state/testdata/golden/driver-gds.yaml index b14b03af3..6d7b523b6 100644 --- a/internal/state/testdata/golden/driver-gds.yaml +++ b/internal/state/testdata/golden/driver-gds.yaml @@ -106,8 +106,14 @@ data: fi if ! nvidia-smi; then - echo "nvidia-smi failed" - exit 1 + # For vm-passthrough with shared-nvswitch mode, nvidia-smi may fail due to unbound devices + # Fall back to checking if nvidia module is loaded when FABRIC_MANAGER_FABRIC_MODE=1 + if [ "${FABRIC_MANAGER_FABRIC_MODE:-}" = "1" ] && lsmod | grep -q "^nvidia "; then + echo "nvidia-smi failed but nvidia module is loaded (vm-passthrough with shared-nvswitch mode)" + else + echo "nvidia-smi failed" + exit 1 + fi fi GPU_DIRECT_RDMA_ENABLED="${GPU_DIRECT_RDMA_ENABLED:-false}" diff --git a/internal/state/testdata/golden/driver-minimal.yaml b/internal/state/testdata/golden/driver-minimal.yaml index 890a40cee..265ce4d4b 100644 --- a/internal/state/testdata/golden/driver-minimal.yaml +++ b/internal/state/testdata/golden/driver-minimal.yaml @@ -106,8 +106,14 @@ data: fi if ! nvidia-smi; then - echo "nvidia-smi failed" - exit 1 + # For vm-passthrough with shared-nvswitch mode, nvidia-smi may fail due to unbound devices + # Fall back to checking if nvidia module is loaded when FABRIC_MANAGER_FABRIC_MODE=1 + if [ "${FABRIC_MANAGER_FABRIC_MODE:-}" = "1" ] && lsmod | grep -q "^nvidia "; then + echo "nvidia-smi failed but nvidia module is loaded (vm-passthrough with shared-nvswitch mode)" + else + echo "nvidia-smi failed" + exit 1 + fi fi GPU_DIRECT_RDMA_ENABLED="${GPU_DIRECT_RDMA_ENABLED:-false}" diff --git a/internal/state/testdata/golden/driver-openshift-drivertoolkit.yaml b/internal/state/testdata/golden/driver-openshift-drivertoolkit.yaml index f979ac36e..1009c10d9 100644 --- a/internal/state/testdata/golden/driver-openshift-drivertoolkit.yaml +++ b/internal/state/testdata/golden/driver-openshift-drivertoolkit.yaml @@ -106,8 +106,14 @@ data: fi if ! nvidia-smi; then - echo "nvidia-smi failed" - exit 1 + # For vm-passthrough with shared-nvswitch mode, nvidia-smi may fail due to unbound devices + # Fall back to checking if nvidia module is loaded when FABRIC_MANAGER_FABRIC_MODE=1 + if [ "${FABRIC_MANAGER_FABRIC_MODE:-}" = "1" ] && lsmod | grep -q "^nvidia "; then + echo "nvidia-smi failed but nvidia module is loaded (vm-passthrough with shared-nvswitch mode)" + else + echo "nvidia-smi failed" + exit 1 + fi fi GPU_DIRECT_RDMA_ENABLED="${GPU_DIRECT_RDMA_ENABLED:-false}" diff --git a/internal/state/testdata/golden/driver-precompiled.yaml b/internal/state/testdata/golden/driver-precompiled.yaml index e6a37c48d..85c126f7e 100644 --- a/internal/state/testdata/golden/driver-precompiled.yaml +++ b/internal/state/testdata/golden/driver-precompiled.yaml @@ -106,8 +106,14 @@ data: fi if ! nvidia-smi; then - echo "nvidia-smi failed" - exit 1 + # For vm-passthrough with shared-nvswitch mode, nvidia-smi may fail due to unbound devices + # Fall back to checking if nvidia module is loaded when FABRIC_MANAGER_FABRIC_MODE=1 + if [ "${FABRIC_MANAGER_FABRIC_MODE:-}" = "1" ] && lsmod | grep -q "^nvidia "; then + echo "nvidia-smi failed but nvidia module is loaded (vm-passthrough with shared-nvswitch mode)" + else + echo "nvidia-smi failed" + exit 1 + fi fi GPU_DIRECT_RDMA_ENABLED="${GPU_DIRECT_RDMA_ENABLED:-false}" diff --git a/internal/state/testdata/golden/driver-rdma-hostmofed.yaml b/internal/state/testdata/golden/driver-rdma-hostmofed.yaml index e29367438..292808d84 100644 --- a/internal/state/testdata/golden/driver-rdma-hostmofed.yaml +++ b/internal/state/testdata/golden/driver-rdma-hostmofed.yaml @@ -106,8 +106,14 @@ data: fi if ! nvidia-smi; then - echo "nvidia-smi failed" - exit 1 + # For vm-passthrough with shared-nvswitch mode, nvidia-smi may fail due to unbound devices + # Fall back to checking if nvidia module is loaded when FABRIC_MANAGER_FABRIC_MODE=1 + if [ "${FABRIC_MANAGER_FABRIC_MODE:-}" = "1" ] && lsmod | grep -q "^nvidia "; then + echo "nvidia-smi failed but nvidia module is loaded (vm-passthrough with shared-nvswitch mode)" + else + echo "nvidia-smi failed" + exit 1 + fi fi GPU_DIRECT_RDMA_ENABLED="${GPU_DIRECT_RDMA_ENABLED:-false}" diff --git a/internal/state/testdata/golden/driver-rdma.yaml b/internal/state/testdata/golden/driver-rdma.yaml index 2efe95107..36ebfb36c 100644 --- a/internal/state/testdata/golden/driver-rdma.yaml +++ b/internal/state/testdata/golden/driver-rdma.yaml @@ -106,8 +106,14 @@ data: fi if ! nvidia-smi; then - echo "nvidia-smi failed" - exit 1 + # For vm-passthrough with shared-nvswitch mode, nvidia-smi may fail due to unbound devices + # Fall back to checking if nvidia module is loaded when FABRIC_MANAGER_FABRIC_MODE=1 + if [ "${FABRIC_MANAGER_FABRIC_MODE:-}" = "1" ] && lsmod | grep -q "^nvidia "; then + echo "nvidia-smi failed but nvidia module is loaded (vm-passthrough with shared-nvswitch mode)" + else + echo "nvidia-smi failed" + exit 1 + fi fi GPU_DIRECT_RDMA_ENABLED="${GPU_DIRECT_RDMA_ENABLED:-false}" diff --git a/internal/state/testdata/golden/driver-secret-env.yaml b/internal/state/testdata/golden/driver-secret-env.yaml index 8b2c277a5..ab767d741 100644 --- a/internal/state/testdata/golden/driver-secret-env.yaml +++ b/internal/state/testdata/golden/driver-secret-env.yaml @@ -106,8 +106,14 @@ data: fi if ! nvidia-smi; then - echo "nvidia-smi failed" - exit 1 + # For vm-passthrough with shared-nvswitch mode, nvidia-smi may fail due to unbound devices + # Fall back to checking if nvidia module is loaded when FABRIC_MANAGER_FABRIC_MODE=1 + if [ "${FABRIC_MANAGER_FABRIC_MODE:-}" = "1" ] && lsmod | grep -q "^nvidia "; then + echo "nvidia-smi failed but nvidia module is loaded (vm-passthrough with shared-nvswitch mode)" + else + echo "nvidia-smi failed" + exit 1 + fi fi GPU_DIRECT_RDMA_ENABLED="${GPU_DIRECT_RDMA_ENABLED:-false}" diff --git a/internal/state/testdata/golden/driver-vgpu-host-manager-openshift.yaml b/internal/state/testdata/golden/driver-vgpu-host-manager-openshift.yaml index 7e61f189f..1bc4ccd25 100644 --- a/internal/state/testdata/golden/driver-vgpu-host-manager-openshift.yaml +++ b/internal/state/testdata/golden/driver-vgpu-host-manager-openshift.yaml @@ -106,8 +106,14 @@ data: fi if ! nvidia-smi; then - echo "nvidia-smi failed" - exit 1 + # For vm-passthrough with shared-nvswitch mode, nvidia-smi may fail due to unbound devices + # Fall back to checking if nvidia module is loaded when FABRIC_MANAGER_FABRIC_MODE=1 + if [ "${FABRIC_MANAGER_FABRIC_MODE:-}" = "1" ] && lsmod | grep -q "^nvidia "; then + echo "nvidia-smi failed but nvidia module is loaded (vm-passthrough with shared-nvswitch mode)" + else + echo "nvidia-smi failed" + exit 1 + fi fi GPU_DIRECT_RDMA_ENABLED="${GPU_DIRECT_RDMA_ENABLED:-false}" diff --git a/internal/state/testdata/golden/driver-vgpu-host-manager.yaml b/internal/state/testdata/golden/driver-vgpu-host-manager.yaml index 6df18ad31..072052627 100644 --- a/internal/state/testdata/golden/driver-vgpu-host-manager.yaml +++ b/internal/state/testdata/golden/driver-vgpu-host-manager.yaml @@ -106,8 +106,14 @@ data: fi if ! nvidia-smi; then - echo "nvidia-smi failed" - exit 1 + # For vm-passthrough with shared-nvswitch mode, nvidia-smi may fail due to unbound devices + # Fall back to checking if nvidia module is loaded when FABRIC_MANAGER_FABRIC_MODE=1 + if [ "${FABRIC_MANAGER_FABRIC_MODE:-}" = "1" ] && lsmod | grep -q "^nvidia "; then + echo "nvidia-smi failed but nvidia module is loaded (vm-passthrough with shared-nvswitch mode)" + else + echo "nvidia-smi failed" + exit 1 + fi fi GPU_DIRECT_RDMA_ENABLED="${GPU_DIRECT_RDMA_ENABLED:-false}" diff --git a/internal/state/testdata/golden/driver-vgpu-licensing-secret.yaml b/internal/state/testdata/golden/driver-vgpu-licensing-secret.yaml index 66e04a502..ec0e41a5b 100644 --- a/internal/state/testdata/golden/driver-vgpu-licensing-secret.yaml +++ b/internal/state/testdata/golden/driver-vgpu-licensing-secret.yaml @@ -106,8 +106,14 @@ data: fi if ! nvidia-smi; then - echo "nvidia-smi failed" - exit 1 + # For vm-passthrough with shared-nvswitch mode, nvidia-smi may fail due to unbound devices + # Fall back to checking if nvidia module is loaded when FABRIC_MANAGER_FABRIC_MODE=1 + if [ "${FABRIC_MANAGER_FABRIC_MODE:-}" = "1" ] && lsmod | grep -q "^nvidia "; then + echo "nvidia-smi failed but nvidia module is loaded (vm-passthrough with shared-nvswitch mode)" + else + echo "nvidia-smi failed" + exit 1 + fi fi GPU_DIRECT_RDMA_ENABLED="${GPU_DIRECT_RDMA_ENABLED:-false}" diff --git a/internal/state/testdata/golden/driver-vgpu-licensing.yaml b/internal/state/testdata/golden/driver-vgpu-licensing.yaml index 6d95d1c09..dbb26b457 100644 --- a/internal/state/testdata/golden/driver-vgpu-licensing.yaml +++ b/internal/state/testdata/golden/driver-vgpu-licensing.yaml @@ -106,8 +106,14 @@ data: fi if ! nvidia-smi; then - echo "nvidia-smi failed" - exit 1 + # For vm-passthrough with shared-nvswitch mode, nvidia-smi may fail due to unbound devices + # Fall back to checking if nvidia module is loaded when FABRIC_MANAGER_FABRIC_MODE=1 + if [ "${FABRIC_MANAGER_FABRIC_MODE:-}" = "1" ] && lsmod | grep -q "^nvidia "; then + echo "nvidia-smi failed but nvidia module is loaded (vm-passthrough with shared-nvswitch mode)" + else + echo "nvidia-smi failed" + exit 1 + fi fi GPU_DIRECT_RDMA_ENABLED="${GPU_DIRECT_RDMA_ENABLED:-false}" diff --git a/manifests/state-driver/0400_configmap.yaml b/manifests/state-driver/0400_configmap.yaml index 55ba3df55..34802a6d5 100644 --- a/manifests/state-driver/0400_configmap.yaml +++ b/manifests/state-driver/0400_configmap.yaml @@ -26,8 +26,14 @@ data: fi if ! nvidia-smi; then - echo "nvidia-smi failed" - exit 1 + # For vm-passthrough with shared-nvswitch mode, nvidia-smi may fail due to unbound devices + # Fall back to checking if nvidia module is loaded when FABRIC_MANAGER_FABRIC_MODE=1 + if [ "${FABRIC_MANAGER_FABRIC_MODE:-}" = "1" ] && lsmod | grep -q "^nvidia "; then + echo "nvidia-smi failed but nvidia module is loaded (vm-passthrough with shared-nvswitch mode)" + else + echo "nvidia-smi failed" + exit 1 + fi fi GPU_DIRECT_RDMA_ENABLED="${GPU_DIRECT_RDMA_ENABLED:-false}"