Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
Resolve MCAD is in crash loop backoff mode after deploying it in quot…
…a management mode #409
  • Loading branch information
z103cb committed Jun 14, 2023
commit e32caa0972b53e4ca82c89d54b8796692a7f0b86
Original file line number Diff line number Diff line change
Expand Up @@ -128,7 +128,7 @@ func NewQuotaManager(dispatchedAWDemands map[string]*clusterstateapi.Resource, d

// Set the name of the forest in the backend
qm.quotaManagerBackend.AddForest(QuotaManagerForestName)
klog.V(10).Infof("[NewQuotaManager] Before initialization QuotaSubtree informer - %s", qm.quotaManagerBackend.String())
klog.V(4).Infof("[NewQuotaManager] Before initialization QuotaSubtree informer - %s", qm.quotaManagerBackend.String())

// Create a resource plan manager
qm.quotaSubtreeManager, err = qstmanager.NewQuotaSubtreeManager(config, qm.quotaManagerBackend)
Expand Down Expand Up @@ -171,19 +171,47 @@ func (qm *QuotaManager) loadDispatchedAWs(dispatchedAWDemands map[string]*cluste
klog.V(4).Infof("[loadDispatchedAWs] No dispatched AppWrappers found to preload.")
return nil
}
allTrees := qm.GetValidQuotaLabels()
klog.V(4).Infof("[loadDispatchedAWs] valid quota labels:%v", allTrees)
if len(allTrees) == 0 && len(dispatchedAWs) > 0 {
klog.Warning("[loadDispatchedAWs] No quota trees are defined in the cluster.")
klog.Warning("[loadDispatchedAWs] The resources for the following app wrappers will not be counted in the quota tree:")
for k := range dispatchedAWDemands {
aw := getDispatchedAppWrapper(dispatchedAWs, k)
if aw != nil {
klog.Warningf("[loadDispatchedAWs] app wrapper %s/%s not counted. AW labels: %v", aw.Namespace, aw.Name, aw.GetLabels())
}
}
return nil
}

// Process list of AppWrappers that are already dispatched
var result *multierror.Error

for k, v := range dispatchedAWDemands {
aw := getDispatchedAppWrapper(dispatchedAWs, k)
if aw != nil {
klog.V(4).Infof("[loadDispatchedAWs] Dispatched AppWrappers %s/%s found to preload.", aw.Namespace, aw.Name)
newLabels := make(map[string]string)
for key, value := range aw.Labels {
newLabels[key] = value
}
for _, treeName := range allTrees {
if _, quotaSetForAW := newLabels[treeName]; !quotaSetForAW {
newLabels[treeName] = "default"
klog.V(4).Infof("[loadDispatchedAWs] Dispatched AppWrappers %s/%s adding default quota labels.", aw.Namespace, aw.Name)
}

}
aw.SetLabels(newLabels)

doesFit, preemptionIds, errorMessage := qm.Fits(aw, v, nil)
if !doesFit {
klog.Errorf("[loadDispatchedAWs] Loading of AppWrapper %s/%s failed.",
aw.Namespace, aw.Name)
result = multierror.Append(result, fmt.Errorf("loading of AppWrapper %s/%s failed, %s",
aw.Namespace, aw.Name, errorMessage))
qm.Release(aw)
}

if len(preemptionIds) > 0 {
Expand All @@ -192,7 +220,6 @@ func (qm *QuotaManager) loadDispatchedAWs(dispatchedAWDemands map[string]*cluste
result = multierror.Append(result, fmt.Errorf("loading of AppWrapper %s/%s caused invalid preemptions: %v. Quota Manager is in inconsistent state",
aw.Namespace, aw.Name, preemptionIds))
}
klog.V(4).Infof("[loadDispatchedAWs] Dispatched AppWrappers %s/%s found to preload.", aw.Namespace, aw.Name)
} else {
klog.Warningf("[loadDispatchedAWs] Unable to obtain AppWrapper from key: %s. Loading of AppWrapper will be skipped.", k)
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ func (qstm *QuotaSubtreeManager) addQST(obj interface{}) {
qstm.qstMap[qst.Namespace+"/"+qst.Name] = qst
qstm.setQuotasubtreeChanged()
qstm.qstMutex.Unlock()
klog.V(10).Infof("[addQST] Add complete for: %s/%s", qst.Name, qst.Namespace)
klog.V(4).Infof("[addQST] Add complete for: %s/%s", qst.Name, qst.Namespace)
}

func (qstm *QuotaSubtreeManager) updateQST(oldObj, newObj interface{}) {
Expand Down Expand Up @@ -65,7 +65,7 @@ func (qstm *QuotaSubtreeManager) updateQST(oldObj, newObj interface{}) {
qstm.setQuotasubtreeChanged()
qstm.qstMutex.Unlock()
}
klog.V(10).Infof("[updateQST] Update complete for: %s/%s", newQST.Name, newQST.Namespace)
klog.V(4).Infof("[updateQST] Update complete for: %s/%s", newQST.Name, newQST.Namespace)
}

func (qstm *QuotaSubtreeManager) deleteQST(obj interface{}) {
Expand All @@ -79,5 +79,5 @@ func (qstm *QuotaSubtreeManager) deleteQST(obj interface{}) {

delete(qstm.qstMap, string(qst.UID))
delete(qstm.qstMap, qst.Namespace+"/"+qst.Name)
klog.V(10).Infof("[deleteQST] Delete complete for: %s/%s", qst.Name, qst.Namespace)
klog.V(4).Infof("[deleteQST] Delete complete for: %s/%s", qst.Name, qst.Namespace)
}
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,7 @@ func newQuotaSubtreeManager(config *rest.Config, quotaManagerBackend *qmlib.Mana
qstm := &QuotaSubtreeManager{
quotaManagerBackend: quotaManagerBackend,
qstMap: make(map[string]*qstv1.QuotaSubtree),
qstChanged: true,
}
// QuotaSubtree informer setup
qstClient, err := qst.NewForConfigOrDie(config)
Expand All @@ -83,19 +84,19 @@ func newQuotaSubtreeManager(config *rest.Config, quotaManagerBackend *qmlib.Mana

// Start resource plan informers
neverStop := make(chan struct{})
klog.V(10).Infof("[newQuotaSubtreeManager] Starting QuotaSubtree Informer.")
klog.V(4).Infof("[newQuotaSubtreeManager] Starting QuotaSubtree Informer.")
go qstm.quotaSubtreeInformer.Informer().Run(neverStop)

// Wait for cache sync
klog.V(10).Infof("[newQuotaSubtreeManager] Waiting for QuotaSubtree informer cache sync. to complete.")
klog.V(4).Infof("[newQuotaSubtreeManager] Waiting for QuotaSubtree informer cache sync. to complete.")
qstm.qstSynced = qstm.quotaSubtreeInformer.Informer().HasSynced
if !cache.WaitForCacheSync(neverStop, qstm.qstSynced) {
return nil, errors.New("failed to wait for the quota sub tree informer to synch")
}

// Initialize Quota Trees
qstm.initializeQuotaTreeBackend()
klog.V(10).Infof("[newQuotaSubtreeManager] QuotaSubtree Manager initialization complete.")
klog.V(4).Infof("[newQuotaSubtreeManager] QuotaSubtree Manager initialization complete.")
return qstm, nil
}

Expand Down Expand Up @@ -129,7 +130,7 @@ func (qstm *QuotaSubtreeManager) clearQuotasubtreeChanged() {
func (qstm *QuotaSubtreeManager) IsQuotasubtreeChanged() bool {
qstm.qstMutex.RLock()
defer qstm.qstMutex.RUnlock()

klog.V(4).Infof("[IsQuotasubtreeChanged] QuotaSubtree Manager changed %t.", qstm.qstChanged)
return qstm.qstChanged
}

Expand Down Expand Up @@ -178,7 +179,7 @@ func (qstm *QuotaSubtreeManager) createTreeNodesFromQST(qst *qstv1.QuotaSubtree)
Quota: quota,
Hard: strconv.FormatBool(qstChild.Quotas.HardLimit),
}
klog.V(10).Infof("[createTreeNodesFromQST] Created node: %s=%#v for QuotaSubtree %s completed.",
klog.V(4).Infof("[createTreeNodesFromQST] Created node: %s=%#v for QuotaSubtree %s completed.",
child_key, *node, qst.Name)

//Add to the list of nodes from this quotasubtree
Expand Down