Skip to content

Commit

Permalink
fix: nvidia-dra-plugin Config
Browse files Browse the repository at this point in the history
  • Loading branch information
CoderTH committed Dec 14, 2023
1 parent a5f2b34 commit 1eb01b4
Show file tree
Hide file tree
Showing 5 changed files with 26 additions and 28 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -3,4 +3,5 @@
/set-nas-status
/nvidia-dra-controller
/nvidia-dra-plugin
.idea
[._]*.sw[a-p]
2 changes: 1 addition & 1 deletion cmd/nvidia-dra-plugin/device_state.go
Original file line number Diff line number Diff line change
Expand Up @@ -169,7 +169,7 @@ func NewDeviceState(ctx context.Context, config *Config) (*DeviceState, error) {
nvdevlib: nvdevlib,
}

err = state.syncPreparedDevicesFromCRDSpec(ctx, &config.nascrd.Spec)
err = state.syncPreparedDevicesFromCRDSpec(ctx, &config.nascr.Spec)
if err != nil {
return nil, fmt.Errorf("unable to sync prepared devices from CRD: %w", err)
}
Expand Down
21 changes: 10 additions & 11 deletions cmd/nvidia-dra-plugin/driver.go
Original file line number Diff line number Diff line change
Expand Up @@ -38,15 +38,14 @@ const (

type driver struct {
sync.Mutex
// TODO: Rename to nascr
nascrd *nascrd.NodeAllocationState
nascr *nascrd.NodeAllocationState
nasclient *nasclient.Client
state *DeviceState
}

func NewDriver(ctx context.Context, config *Config) (*driver, error) {
var d *driver
client := nasclient.New(config.nascrd, config.clientset.Nvidia.NasV1alpha1())
client := nasclient.New(config.nascr, config.clientsets.Nvidia.NasV1alpha1())
err := retry.RetryOnConflict(retry.DefaultRetry, func() error {
err := client.GetOrCreate(ctx)
if err != nil {
Expand All @@ -63,7 +62,7 @@ func NewDriver(ctx context.Context, config *Config) (*driver, error) {
return err
}

err = client.Update(ctx, state.GetUpdatedSpec(&config.nascrd.Spec))
err = client.Update(ctx, state.GetUpdatedSpec(&config.nascr.Spec))
if err != nil {
return err
}
Expand All @@ -74,7 +73,7 @@ func NewDriver(ctx context.Context, config *Config) (*driver, error) {
}

d = &driver{
nascrd: config.nascrd,
nascr: config.nascr,
nasclient: client,
state: state,
}
Expand Down Expand Up @@ -137,7 +136,7 @@ func (d *driver) IsPrepared(ctx context.Context, claimUID string) (bool, []strin
if err != nil {
return false, nil, err
}
if _, exists := d.nascrd.Spec.PreparedClaims[claimUID]; exists {
if _, exists := d.nascr.Spec.PreparedClaims[claimUID]; exists {
return true, d.state.cdi.GetClaimDevices(claimUID), nil
}
return false, nil, nil
Expand All @@ -152,12 +151,12 @@ func (d *driver) Prepare(ctx context.Context, claimUID string) ([]string, error)
return err
}

prepared, err = d.state.Prepare(ctx, claimUID, d.nascrd.Spec.AllocatedClaims[claimUID])
prepared, err = d.state.Prepare(ctx, claimUID, d.nascr.Spec.AllocatedClaims[claimUID])
if err != nil {
return err
}

err = d.nasclient.Update(ctx, d.state.GetUpdatedSpec(&d.nascrd.Spec))
err = d.nasclient.Update(ctx, d.state.GetUpdatedSpec(&d.nascr.Spec))
if err != nil {
return err
}
Expand All @@ -182,7 +181,7 @@ func (d *driver) Unprepare(ctx context.Context, claimUID string) error {
return err
}

err = d.nasclient.Update(ctx, d.state.GetUpdatedSpec(&d.nascrd.Spec))
err = d.nasclient.Update(ctx, d.state.GetUpdatedSpec(&d.nascr.Spec))
if err != nil {
return err
}
Expand Down Expand Up @@ -212,7 +211,7 @@ func (d *driver) CleanupStaleStateContinuously(ctx context.Context) {

func (d *driver) cleanupStaleStateOnce(ctx context.Context) (string, error) {
listOptions := metav1.ListOptions{
FieldSelector: fmt.Sprintf("metadata.name=%s", d.nascrd.Name),
FieldSelector: fmt.Sprintf("metadata.name=%s", d.nascr.Name),
}

list, err := d.nasclient.List(ctx, listOptions)
Expand All @@ -237,7 +236,7 @@ func (d *driver) cleanupStaleStateContinuously(ctx context.Context, resourceVers
watchOptions := metav1.ListOptions{
Watch: true,
ResourceVersion: resourceVersion,
FieldSelector: fmt.Sprintf("metadata.name=%s", d.nascrd.Name),
FieldSelector: fmt.Sprintf("metadata.name=%s", d.nascr.Name),
}

if previousError != nil {
Expand Down
14 changes: 6 additions & 8 deletions cmd/nvidia-dra-plugin/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -54,11 +54,9 @@ type Flags struct {
}

type Config struct {
flags *Flags
// TODO: Rename to nascr
nascrd *nascrd.NodeAllocationState
// TODO: Rename to clientsets
clientset flags.ClientSets
flags *Flags
nascr *nascrd.NodeAllocationState
clientsets flags.ClientSets
}

func main() {
Expand Down Expand Up @@ -132,9 +130,9 @@ func newApp() *cli.App {
}

config := &Config{
flags: flags,
nascrd: nascr,
clientset: clientSets,
flags: flags,
nascr: nascr,
clientsets: clientSets,
}

return StartPlugin(ctx, config)
Expand Down
16 changes: 8 additions & 8 deletions cmd/nvidia-dra-plugin/sharing.go
Original file line number Diff line number Diff line change
Expand Up @@ -131,8 +131,8 @@ func NewMpsManager(config *Config, deviceLib *deviceLib, controlFilesRoot, hostD

func (m *MpsManager) NewMpsControlDaemon(claim *nascrd.ClaimInfo, devices *PreparedDevices, config *nascrd.MpsConfig) *MpsControlDaemon {
return &MpsControlDaemon{
nodeName: m.config.nascrd.Name,
namespace: m.config.nascrd.Namespace,
nodeName: m.config.nascr.Name,
namespace: m.config.nascr.Namespace,
name: fmt.Sprintf(MpsControlDaemonNameFmt, claim.UID),
claim: claim,
rootDir: fmt.Sprintf("%s/%s", m.controlFilesRoot, claim.UID),
Expand All @@ -147,7 +147,7 @@ func (m *MpsManager) NewMpsControlDaemon(claim *nascrd.ClaimInfo, devices *Prepa

func (m *MpsManager) IsControlDaemonStarted(ctx context.Context, claim *nascrd.ClaimInfo) (bool, error) {
name := fmt.Sprintf(MpsControlDaemonNameFmt, claim.UID)
_, err := m.config.clientset.Core.AppsV1().Deployments(m.config.nascrd.Namespace).Get(ctx, name, metav1.GetOptions{})
_, err := m.config.clientsets.Core.AppsV1().Deployments(m.config.nascr.Namespace).Get(ctx, name, metav1.GetOptions{})
if errors.IsNotFound(err) {
return false, nil
}
Expand All @@ -159,7 +159,7 @@ func (m *MpsManager) IsControlDaemonStarted(ctx context.Context, claim *nascrd.C

func (m *MpsManager) IsControlDaemonStopped(ctx context.Context, claim *nascrd.ClaimInfo) (bool, error) {
name := fmt.Sprintf(MpsControlDaemonNameFmt, claim.UID)
_, err := m.config.clientset.Core.AppsV1().Deployments(m.config.nascrd.Namespace).Get(ctx, name, metav1.GetOptions{})
_, err := m.config.clientsets.Core.AppsV1().Deployments(m.config.nascr.Namespace).Get(ctx, name, metav1.GetOptions{})
if errors.IsNotFound(err) {
return true, nil
}
Expand Down Expand Up @@ -263,7 +263,7 @@ func (m *MpsControlDaemon) Start(ctx context.Context) error {
}
}

_, err = m.manager.config.clientset.Core.AppsV1().Deployments(m.namespace).Create(ctx, &deployment, metav1.CreateOptions{})
_, err = m.manager.config.clientsets.Core.AppsV1().Deployments(m.namespace).Create(ctx, &deployment, metav1.CreateOptions{})
if errors.IsAlreadyExists(err) {
return nil
}
Expand All @@ -289,7 +289,7 @@ func (m *MpsControlDaemon) AssertReady(ctx context.Context) error {
return true
},
func() error {
deployment, err := m.manager.config.clientset.Core.AppsV1().Deployments(m.namespace).Get(
deployment, err := m.manager.config.clientsets.Core.AppsV1().Deployments(m.namespace).Get(
ctx,
m.name,
metav1.GetOptions{},
Expand All @@ -304,7 +304,7 @@ func (m *MpsControlDaemon) AssertReady(ctx context.Context) error {

selector := deployment.Spec.Selector.MatchLabels

pods, err := m.manager.config.clientset.Core.CoreV1().Pods(m.namespace).List(
pods, err := m.manager.config.clientsets.Core.CoreV1().Pods(m.namespace).List(
ctx,
metav1.ListOptions{
LabelSelector: labels.Set(selector).AsSelector().String(),
Expand Down Expand Up @@ -366,7 +366,7 @@ func (m *MpsControlDaemon) Stop(ctx context.Context) error {
PropagationPolicy: &deletePolicy,
}

err = m.manager.config.clientset.Core.AppsV1().Deployments(m.namespace).Delete(ctx, m.name, deleteOptions)
err = m.manager.config.clientsets.Core.AppsV1().Deployments(m.namespace).Delete(ctx, m.name, deleteOptions)
if err != nil && !errors.IsNotFound(err) {
return fmt.Errorf("failed to delete deployment: %w", err)
}
Expand Down

0 comments on commit 1eb01b4

Please sign in to comment.