Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion assets/state-cc-manager/0500_daemonset.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,6 @@ spec:
effect: NoSchedule
nodeSelector:
nvidia.com/gpu.deploy.cc-manager: "true"
nvidia.com/cc.capable: "true"
priorityClassName: system-node-critical
serviceAccountName: nvidia-cc-manager
containers:
Expand Down
105 changes: 0 additions & 105 deletions deployments/gpu-operator/templates/nodefeaturerules.yaml
Original file line number Diff line number Diff line change
@@ -1,111 +1,6 @@
{{- if .Values.nfd.nodefeaturerules }}
apiVersion: nfd.k8s-sigs.io/v1alpha1
kind: NodeFeatureRule
metadata:
name: nvidia-nfd-nodefeaturerules
spec:
rules:
- name: "TDX rule"
labels:
tdx.enabled: "true"
matchFeatures:
- feature: cpu.security
matchExpressions:
tdx.enabled: {op: IsTrue}
- name: "TDX total keys rule"
extendedResources:
tdx.total_keys: "@cpu.security.tdx.total_keys"
matchFeatures:
- feature: cpu.security
matchExpressions:
tdx.enabled: {op: IsTrue}
- name: "SEV-SNP rule"
labels:
sev.snp.enabled: "true"
matchFeatures:
- feature: cpu.security
matchExpressions:
sev.snp.enabled:
op: IsTrue
- name: "SEV-ES rule"
labels:
sev.es.enabled: "true"
matchFeatures:
- feature: cpu.security
matchExpressions:
sev.es.enabled:
op: IsTrue
- name: SEV system capacities
extendedResources:
sev_asids: '@cpu.security.sev.asids'
sev_es: '@cpu.security.sev.encrypted_state_ids'
matchFeatures:
- feature: cpu.security
matchExpressions:
sev.enabled:
op: Exists
- name: "NVIDIA H100"
labels:
"nvidia.com/gpu.H100": "true"
"nvidia.com/gpu.family": "hopper"
matchFeatures:
- feature: pci.device
matchExpressions:
vendor: {op: In, value: ["10de"]}
device: {op: In, value: ["2339"]}
- name: "NVIDIA H100 PCIe"
labels:
"nvidia.com/gpu.H100.pcie": "true"
"nvidia.com/gpu.family": "hopper"
matchFeatures:
- feature: pci.device
matchExpressions:
vendor: {op: In, value: ["10de"]}
device: {op: In, value: ["2331"]}
- name: "NVIDIA H100 80GB HBM3"
labels:
"nvidia.com/gpu.H100.HBM3": "true"
"nvidia.com/gpu.family": "hopper"
matchFeatures:
- feature: pci.device
matchExpressions:
vendor: {op: In, value: ["10de"]}
device: {op: In, value: ["2330"]}
- name: "NVIDIA H800"
labels:
"nvidia.com/gpu.H800": "true"
"nvidia.com/gpu.family": "hopper"
matchFeatures:
- feature: pci.device
matchExpressions:
vendor: {op: In, value: ["10de"]}
device: {op: In, value: ["2324"]}
- name: "NVIDIA H800 PCIE"
labels:
"nvidia.com/gpu.H800.pcie": "true"
"nvidia.com/gpu.family": "hopper"
matchFeatures:
- feature: pci.device
matchExpressions:
vendor: {op: In, value: ["10de"]}
device: {op: In, value: ["2322"]}
- name: "NVIDIA CC Enabled"
labels:
"nvidia.com/cc.capable": "true"
matchAny: # TDX/SEV + Hopper GPU
- matchFeatures:
- feature: rule.matched
matchExpressions:
nvidia.com/gpu.family: {op: In, value: ["hopper"]}
sev.snp.enabled: {op: IsTrue}
- matchFeatures:
- feature: rule.matched
matchExpressions:
nvidia.com/gpu.family: {op: In, value: ["hopper"]}
tdx.enabled: {op: IsTrue}
---
apiVersion: nfd.k8s-sigs.io/v1alpha1
kind: NodeFeatureRule
metadata:
name: nvidia-kernel-modules
spec:
Expand Down
4 changes: 2 additions & 2 deletions deployments/gpu-operator/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -503,8 +503,8 @@ sandboxDevicePlugin:
resources: {}

ccManager:
enabled: false
defaultMode: "off"
enabled: true
defaultMode: "on"
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Question -- does this mean that by default when cc-manager runs on a CC-capable system it will enable CC mode on the GPUs?

repository: nvcr.io/nvidia/cloud-native
image: k8s-cc-manager
version: v0.1.1
Expand Down