Skip to content
Closed

Doc #181

Show file tree
Hide file tree
Changes from 3 commits
Commits
Show all changes
285 commits
Select commit Hold shift + click to select a range
2a4ae7e
Adjusted logging levels in cleanup to debugging level.
dmatch01 Nov 16, 2020
e03d4c1
Added additional time for initial test case to account for new condit…
dmatch01 Nov 16, 2020
3ef8fc7
Add logic to remove initial deletion of AW resources.
dmatch01 Nov 16, 2020
61026b3
Backout changes of added conditions for backoff.
dmatch01 Nov 16, 2020
b4007a3
Revert changes to test for 100 AWs.
dmatch01 Nov 16, 2020
d8b333e
Removed additional query to cache for updates to etcd.
dmatch01 Nov 16, 2020
cd2a1fc
Pad more time before cleanup of test case 100 AWs.
dmatch01 Nov 16, 2020
f8a26bc
Added non-cache status update to conditions.
dmatch01 Nov 16, 2020
66802c0
Added refresh of AW from cache before updating initial backoff condit…
dmatch01 Nov 16, 2020
c9379d0
Cleanup for add conditions branch.
dmatch01 Nov 16, 2020
0a04bac
Merge pull request #83 from dmatch01/quota-management-add-conditions
dmatch01 Nov 16, 2020
5404a4c
Upgrade testing and helm deployment to k8s 1.17.0.
dmatch01 Dec 11, 2020
dff36da
Update kubectl version and modify kind config.
dmatch01 Dec 11, 2020
db49662
Start of change to add branch name to image tag.
dmatch01 Dec 11, 2020
75f8a62
Updated kind apiVersion for kind config file.
dmatch01 Dec 11, 2020
92cadd4
Changed generic deployment test to valid apiVersion for k8s v1.17.0
dmatch01 Dec 11, 2020
cf009b9
Fix syntax in Makefile.
dmatch01 Dec 11, 2020
9a8da8a
Merge pull request #84 from dmatch01/quota-management-k8s-1.17-support
dmatch01 Dec 11, 2020
307c4ab
Merge branch 'master' into quota-management
dmatch01 Dec 20, 2020
7a9585e
Resolved merge conflicts with master branch.
dmatch01 Dec 22, 2020
0e6b243
Merge from master.
dmatch01 Dec 23, 2020
ea02a53
Merge pull request #87 from dmatch01/quota-management
dmatch01 Dec 23, 2020
0fbeb47
Migrate Ubuntu to UBI8 minimal image.
tripathysa Apr 6, 2021
5af2b0a
Merge pull request #92 from tripathysa/patch-1
dmatch01 Apr 6, 2021
09b8760
Update controller version mod number after base image change to contr…
dmatch01 Apr 21, 2021
a4db18b
Merge pull request #98 from dmatch01/quota-management-update-version-mod
dmatch01 Apr 21, 2021
6c46fd2
Removed generated code from repo.
dmatch01 Apr 21, 2021
ad53572
Merge pull request #99 from dmatch01/quota-management-cp-remove-gener…
dmatch01 Apr 22, 2021
bb1c63b
Fix golang version for controller build.
dmatch01 Apr 15, 2021
50fb36a
Merge pull request #100 from dmatch01/quota-management-fix-golang-bui…
dmatch01 Apr 23, 2021
11accce
Bug fix to only include quota label from job in quota request (#101)
dmatch01 Apr 29, 2021
eb0b3e4
Cleanup of old dead code from scheduler component that has since been…
dmatch01 Apr 30, 2021
c00ee62
Removed extraneous backup file.
dmatch01 May 3, 2021
846561b
Merge pull request #105 from dmatch01/quota-management-cherry-pick-PR…
dmatch01 May 3, 2021
4122503
Increment version mod. for building new image.
dmatch01 May 4, 2021
5abee69
Merge pull request #106 from dmatch01/quota-management-increment-vers…
dmatch01 May 4, 2021
9a806a9
Partial cleanup of vendor folder 3rd set.
dmatch01 Jun 26, 2021
4bfcaa1
build: use go modules
kyleschlosser Apr 21, 2021
b9450fb
Suppress flag
Nhan-T-Hoang Apr 22, 2021
3db3c22
Resolve context argument missing
Nhan-T-Hoang Apr 23, 2021
886bee0
Update k8s version, api name change, syntax update
Nhan-T-Hoang Apr 26, 2021
29631cc
Fix InstrumentRouteFunc
Apr 27, 2021
bc01843
errors
Apr 27, 2021
1fb2fa3
Final fixes
Nhan-T-Hoang Apr 27, 2021
b58861b
Address 2 review comments
Nhan-T-Hoang Apr 28, 2021
c31a193
Resolve build test
Nhan-T-Hoang Apr 30, 2021
db639dd
Cleanup of old dead code from scheduler component that has since been…
dmatch01 Apr 30, 2021
c8220a1
Removed extraneous backup file.
dmatch01 May 3, 2021
1e62ac1
Resolve the scheduler dependency
Nhan-T-Hoang May 3, 2021
1c60d6c
Testing at 300sec
Nhan-T-Hoang May 4, 2021
f4bb277
upgrade go version to 1.16 in travis
May 4, 2021
287122f
update golang docker version
Nhan-T-Hoang May 4, 2021
a7c9190
Debug -i option removed
Nhan-T-Hoang May 5, 2021
cef5bbe
updating fake discovery
May 5, 2021
4f54fab
Removing the api server test to pass the tests
May 12, 2021
4a0fa24
updating go sum
May 12, 2021
15281eb
adding -mod=mod
May 12, 2021
171dd82
removing -mod
May 12, 2021
3fcc954
test
May 12, 2021
f404467
commenting out tests to check logs
May 13, 2021
8b53fda
debug message
May 13, 2021
3369508
Enable helm debug
May 13, 2021
4a21430
REVERTING TRAVIS GO VERSION
May 14, 2021
132d42e
Convert glog to klog
May 17, 2021
ca73f55
updating go version in Travis
May 18, 2021
65f9d08
remove go mod tidy
May 18, 2021
df52202
Update .travis.yml
dmatch01 May 19, 2021
b47dcdf
Adding deepcopy gen file
May 19, 2021
53f70d4
Adding a non generic test
May 21, 2021
ed60539
Updating kind config
Jun 11, 2021
ad95c32
updating kind cluster
Jun 16, 2021
5c5cef6
updating kind cluster
Jun 16, 2021
5a887dd
kind version
Jun 16, 2021
151364a
test7
Jun 17, 2021
0471fdf
updating generic resource
Jun 17, 2021
1b571a0
remove debug flag
Jun 18, 2021
93b511e
set log level
Jun 18, 2021
f6970af
running limited tests
Jun 18, 2021
03f98d8
running limited tests
Jun 18, 2021
124c48a
running limited tests
Jun 18, 2021
bb32ab0
Adding new cleanup routine
Jun 21, 2021
498de89
Adding new cleanup routine
Jun 21, 2021
ca837e4
Adding new cleanup routine
Jun 21, 2021
1d1a901
Fixing bugs #165 and #166
Jun 22, 2021
0d50839
Cleanup debug statements
Jun 22, 2021
2b63382
Cleaning
Jun 24, 2021
d2dc056
Cleaning
Jun 24, 2021
c991ab7
Cleaning
Jun 24, 2021
9910a9e
Partial 4 cleanup of vendor folder 2nd set (mostly .go files) restore…
dmatch01 Jun 27, 2021
941b7c0
Partial 4 cleanup of vendor folder 3rd set (mostly .go files) restore…
dmatch01 Jun 27, 2021
75dd99e
Partial 4 cleanup of vendor folder 4th set which restored files neede…
dmatch01 Jun 27, 2021
2bb772a
Partial 4 cleanup of vendor folder 5th set (mostly doc.go files).
dmatch01 Jun 28, 2021
0796321
Addressing PR comments
Jun 28, 2021
c129303
addressing PR comments
Jun 28, 2021
75f756e
addressing PR comments
Jun 28, 2021
21cf7e5
Addressing PR comments
Jun 29, 2021
fca3660
Cherry-pick of master branch go-mod PR.
dmatch01 Jul 6, 2021
db3f720
Update controller build version and controller pod name for helm depl…
dmatch01 Jul 6, 2021
639a445
Fix to e2e script to pickup mcad pod in the correct variable.
dmatch01 Jul 6, 2021
70e67c1
Fix for missing kind for generic resource.
dmatch01 Jul 7, 2021
2f8d1c3
Merge pull request #118 from dmatch01/quota-management-go-mod-adds-an…
dmatch01 Jul 7, 2021
bedaa7d
Added GOPRIVATE build.
dmatch01 Jul 8, 2021
e50bbb0
feat: add health probe http listener
kyleschlosser Jul 13, 2021
c948fe7
Merge pull request #119 from dmatch01/quota-management-alternative-build
dmatch01 Jul 13, 2021
1ea21d6
Increment version mod. number.
dmatch01 Jul 14, 2021
b1587b3
Enabled klog loglevel logging.
dmatch01 Jul 14, 2021
0c4b72a
Merge pull request #122 from dmatch01/quota-management-inc-mod-number
dmatch01 Jul 14, 2021
c81d6de
Merge branch 'quota-management' into quota-management-enable-klog-level
dmatch01 Jul 14, 2021
da3396d
Update CONTROLLER_VERSION
dmatch01 Jul 14, 2021
0b48e5a
Merge pull request #123 from dmatch01/quota-management-enable-klog-level
dmatch01 Jul 14, 2021
af844f6
Added a comment to explain by product of call to health probe.
dmatch01 Jul 14, 2021
4458e6a
Merge pull request #121 from kyleschlosser/health
dmatch01 Jul 14, 2021
359dc49
Updated klog and flags version library
dmatch01 Jul 15, 2021
da89e0f
Replaced flag library.
dmatch01 Jul 15, 2021
0d71d28
Update CONTROLLER_VERSION
dmatch01 Jul 15, 2021
056c43d
Merge pull request #124 from dmatch01/quota-management-ver-mod-inc
dmatch01 Jul 15, 2021
fc29347
Merge branch 'quota-management' into quota-management-options-flag-lib
dmatch01 Jul 15, 2021
5ff7533
Update CONTROLLER_VERSION
dmatch01 Jul 20, 2021
c012491
Merge pull request #125 from dmatch01/quota-management-options-flag-lib
dmatch01 Jul 20, 2021
9b27085
Moved CRDs from apiextentions v1beta1 to v1 and minor cleanup
dmatch01 Jul 22, 2021
25c77f5
Synced config yaml files read in by controller and operator yaml, mer…
dmatch01 Aug 2, 2021
6014ad9
Merge pull request #130 from dmatch01/quota-management-crd-api-ext-v1
dmatch01 Aug 2, 2021
960a7a6
Increment version minor number.
dmatch01 Aug 2, 2021
96945fb
Increment version minor number beyond lastest master.
dmatch01 Aug 2, 2021
6f90f89
Merge pull request #131 from dmatch01/quota-management-inc-version-minor
dmatch01 Aug 3, 2021
651cc47
Part 1 of quota library integration. Initial movement of existing fi…
dmatch01 Aug 3, 2021
bdc0038
Additional prep work for QM integration.
dmatch01 Aug 3, 2021
da300c9
Added check for quota enablement during appwrapper cleanup.
dmatch01 Aug 4, 2021
48bc414
Add waiting for cleanup of pods before starting the next test.
dmatch01 Aug 4, 2021
1a42db0
feat: move from apiVersion v1alpha1 to v1beta1
kyleschlosser Aug 4, 2021
f5fde6c
Added additional check of pods for cleanup.
dmatch01 Aug 4, 2021
a8dfc1b
Corrected definitions to reflect correct names and labels.
dmatch01 Aug 4, 2021
04abbd2
remove PDB field
kyleschlosser Aug 4, 2021
e7a3062
Merge pull request #135 from dmatch01/quota-management-e2e-test-fixes
dmatch01 Aug 4, 2021
d85974a
Merge branch 'quota-management' into quota-management-quota-lib-int-0…
dmatch01 Aug 4, 2021
aff063b
Additional cleanup of PDB.
dmatch01 Aug 5, 2021
0c5447a
Merge pull request #134 from kyleschlosser/v1beta1
dmatch01 Aug 5, 2021
bd28e5f
feat: update sources to match generated schema
kyleschlosser Aug 6, 2021
c104898
build breaks
kyleschlosser Aug 6, 2021
c6d35ee
Review comments
kyleschlosser Aug 6, 2021
0e513c9
Cherry pick e2e annotation changes from master.
dmatch01 Aug 4, 2021
bc74b6b
Merge and resolve conflict of PR 141.
dmatch01 Aug 7, 2021
6f99765
Adjust controller cpu/mem limits in e2e testing.
dmatch01 Aug 8, 2021
9c785ae
Cherry pick PR 142 b89314c83bf100cc2f7b51d3b1376340b447c8d2
dmatch01 Aug 9, 2021
9cc8508
Fix QM bug generating errors in log causing e2e failure due to large …
dmatch01 Aug 9, 2021
0a83873
Reduce logging due to e2e Travis limits.
dmatch01 Aug 9, 2021
f8a0726
Removed crd queries from e2e testing to reduce logging due to Travis …
dmatch01 Aug 9, 2021
1fdaa09
Merge pull request #139 from dmatch01/quota-management-add-e2e-pod-an…
dmatch01 Aug 9, 2021
e3e3fee
Merge and resolve conflict to latest base branch quota-management.
dmatch01 Aug 9, 2021
67578a1
Fix to QM interface to use betav1 API.
dmatch01 Aug 9, 2021
89db368
Merge pull request #132 from dmatch01/quota-management-quota-lib-int-…
dmatch01 Aug 9, 2021
f8fa6cf
Removed unneeded requirements from schemas.
dmatch01 Aug 9, 2021
9f13176
Merge branch 'quota-management' into quota-management-remove-schema-req
dmatch01 Aug 9, 2021
3d0b393
Increment controller minor ver.
dmatch01 Aug 9, 2021
b55711f
Merge pull request #144 from dmatch01/quota-management-inc-minor-ver
dmatch01 Aug 9, 2021
367849e
Added respource plan integration.
dmatch01 Aug 9, 2021
b96a78a
Update CONTROLLER_VERSION
dmatch01 Aug 10, 2021
25bf3c5
Update CONTROLLER_VERSION
dmatch01 Aug 10, 2021
563b8f7
Update CONTROLLER_VERSION
dmatch01 Aug 10, 2021
bbc73d5
Merge commit to quota-management branch HEAD.
dmatch01 Aug 10, 2021
9accd7b
Merge pull request #140 from kyleschlosser/gen
dmatch01 Aug 10, 2021
c559c1f
Merge commit to quota-management branch HEAD.
dmatch01 Aug 10, 2021
80c894d
Merge pull request #143 from dmatch01/quota-management-remove-schema-req
dmatch01 Aug 10, 2021
1b84123
Update description of State in schema.
dmatch01 Aug 10, 2021
af3a4d4
Added respource plan integration. (#145)
dmatch01 Aug 10, 2021
379dc7d
Added quota integration calls to controller.
dmatch01 Aug 11, 2021
8ee0743
Merge branch 'quota-management' into quota-management-quota-lib-int-0…
dmatch01 Aug 11, 2021
d0e9532
Changed condition of values for multi-cluster from bool to string to …
dmatch01 Aug 11, 2021
84fd76e
Added additional test case
dmatch01 Aug 11, 2021
0c76ad8
Remove verbose output.
dmatch01 Aug 11, 2021
a79a329
Resolve merge conficts.
dmatch01 Aug 11, 2021
e981231
Add additional scripting to orchestrat e2e test cluster.
dmatch01 Aug 11, 2021
5c6c0df
Fix syntax.
dmatch01 Aug 11, 2021
e38e739
Fix grammer.
dmatch01 Aug 11, 2021
ea149f2
Merge pull request #148 from dmatch01/quota-management-orchestrate-e2…
dmatch01 Aug 11, 2021
fa0a3f4
Merge branch 'quota-management' into quota-management-schema-updates
dmatch01 Aug 11, 2021
2a5a0e9
Increament minor ver. number.
dmatch01 Aug 11, 2021
8eec3ad
Merge pull request #146 from dmatch01/quota-management-schema-updates
dmatch01 Aug 11, 2021
31adfc2
Merge branch 'quota-management' into quota-management-quota-lib-int-0…
dmatch01 Aug 11, 2021
ac156d0
Updated logging to klog v2 for consistency.
dmatch01 Aug 12, 2021
90155c6
Reduce logging due to Travis limits.
dmatch01 Aug 12, 2021
ac501bf
Increment minor ver.
dmatch01 Aug 12, 2021
a30ca1f
Merge pull request #149 from dmatch01/quota-management-logging-fix
dmatch01 Aug 12, 2021
98f580f
Fix to allocation resource calculation.
dmatch01 Aug 12, 2021
9163eab
Increment minor ver. number
dmatch01 Aug 12, 2021
17df129
Merge pull request #150 from dmatch01/quota-management-fix-to-resourc…
dmatch01 Aug 12, 2021
f9bd74f
Merge to latest commit to branch quota-management.
dmatch01 Aug 12, 2021
96f4158
Merge pull request #147 from dmatch01/quota-management-quota-lib-int-…
dmatch01 Aug 12, 2021
6c7bf2d
Updated CRDs to remove fields related to status. Causing status upda…
dmatch01 Aug 12, 2021
e3496cf
Increment minor ver.
dmatch01 Aug 12, 2021
ac77e54
Merge pull request #151 from dmatch01/quota-management-update-schema
dmatch01 Aug 12, 2021
cbfdc5d
Modified queuing accounting test for debugging.
dmatch01 Aug 16, 2021
1271da5
Increment minor ver. num.
dmatch01 Aug 16, 2021
742b960
Merge pull request #152 from dmatch01/quota-management-fix-queuing-e2…
dmatch01 Aug 16, 2021
54b42b3
Added additional messaging to quota failure.
dmatch01 Aug 18, 2021
b5f3581
Update WML specific copyrights.
dmatch01 Aug 19, 2021
70c256e
Added new e2e test for generic pod resource calculation.
dmatch01 Aug 19, 2021
4a7d4e4
Fix to handle generic resource with pod not podtemplate.
dmatch01 Aug 19, 2021
2679835
Increment minor ver. number.
dmatch01 Aug 19, 2021
5956e4c
Merge pull request #153 from dmatch01/quota-management-generic-resour…
dmatch01 Aug 19, 2021
8610d0d
Enabled additional messaging in conditions regarding quota management.
dmatch01 Aug 19, 2021
ee3f723
Merge branch 'quota-management' into quota-management-add-missing-quo…
dmatch01 Aug 19, 2021
cf0ab63
Added additional logging to gerenic resource compute resource allocat…
dmatch01 Aug 19, 2021
40bc55b
Incremented minor ver.
dmatch01 Aug 19, 2021
1aa8563
Merge pull request #154 from dmatch01/quota-management-add-missing-qu…
dmatch01 Aug 19, 2021
0b1055f
Fix server option preempt default.
dmatch01 Aug 19, 2021
3687f88
Incremented minor ver.
dmatch01 Aug 19, 2021
d720b6f
Merge pull request #155 from dmatch01/quota-management-match-preempt-…
dmatch01 Aug 19, 2021
f1bc826
Added node histogram check for GPU requests.
dmatch01 Aug 20, 2021
3c62212
Increment minor ver. number.
dmatch01 Aug 20, 2021
efbf500
Merge pull request #156 from dmatch01/quota-management-add-gpu-histgram
dmatch01 Aug 20, 2021
ff65867
Merge branch 'quota-management' into quota-management-copyright-01
dmatch01 Aug 23, 2021
1026462
Added updated copyright for quota manager.
dmatch01 Aug 23, 2021
72203a5
Increment minor ver. number.
dmatch01 Aug 24, 2021
182c48a
Merge pull request #157 from dmatch01/quota-management-copyright-01
dmatch01 Aug 24, 2021
b3852d0
Additional copyright for WML Core source.
dmatch01 Aug 24, 2021
bf2f54d
Merge branch 'quota-management' into quota-management-copyright-02
dmatch01 Aug 24, 2021
2280dc2
Copyright updates part 3.
dmatch01 Aug 24, 2021
d8421e0
Update copyright part 4.
dmatch01 Aug 24, 2021
921c10d
Merge pull request #158 from dmatch01/quota-management-copyright-02
dmatch01 Aug 24, 2021
fd81693
Merge branch 'quota-management' into quota-management-copyright-03
dmatch01 Aug 24, 2021
e05b027
Merge branch 'quota-management' into quota-management-copyright-04
dmatch01 Aug 24, 2021
0323b4f
Copyright updates part 5.
dmatch01 Aug 24, 2021
946efc8
Merge pull request #159 from dmatch01/quota-management-copyright-03
dmatch01 Aug 24, 2021
a56d247
Merge branch 'quota-management' into quota-management-copyright-04
dmatch01 Aug 24, 2021
354af0a
Copyright updates part 6.
dmatch01 Aug 24, 2021
9e76277
Fix syntax.
dmatch01 Aug 24, 2021
1b43715
Update copyrights part 7.
dmatch01 Aug 24, 2021
7670626
Merge pull request #160 from dmatch01/quota-management-copyright-04
dmatch01 Aug 24, 2021
9d278ca
Merge branch 'quota-management' into quota-management-copyright-05
dmatch01 Aug 24, 2021
7affebd
Copyright updates part 8.
dmatch01 Aug 24, 2021
0d56144
Merge pull request #161 from dmatch01/quota-management-copyright-05
dmatch01 Aug 24, 2021
599ff19
Merge branch 'quota-management' into quota-management-copyright-06
dmatch01 Aug 24, 2021
21f2eab
Updated copyrights part 9.
dmatch01 Aug 24, 2021
eb6e54a
Merge pull request #163 from dmatch01/quota-management-copyright-07
dmatch01 Aug 24, 2021
8d3d50b
Merge pull request #162 from dmatch01/quota-management-copyright-06
dmatch01 Aug 24, 2021
e0e4a4e
Merge branch 'quota-management' into quota-management-copyright-08
dmatch01 Aug 24, 2021
4f52c59
Update copyright source part 10.
dmatch01 Aug 24, 2021
799c77d
Update copyright part 11 of 11 total.
dmatch01 Aug 25, 2021
c33e5bc
Merge pull request #164 from dmatch01/quota-management-copyright-08
dmatch01 Aug 25, 2021
f55f4eb
Merge pull request #167 from dmatch01/quota-management-copyright-11
dmatch01 Aug 25, 2021
c2af337
Merge pull request #165 from dmatch01/quota-management-copyright-09
dmatch01 Aug 25, 2021
f55ac2c
Merge pull request #166 from dmatch01/quota-management-copyright-10
dmatch01 Aug 25, 2021
46b2c91
Fix to recovery of AppWrappers in Queue with pending state.
dmatch01 Aug 26, 2021
d391780
Increment minor ver. number.
dmatch01 Aug 26, 2021
b46dfce
Merge pull request #168 from dmatch01/quota-management-fix-recover-pe…
dmatch01 Aug 26, 2021
cce904e
Fix to recovery for QM.
dmatch01 Aug 26, 2021
c68353c
Update queuejob_controller_ex.go
dmatch01 Aug 26, 2021
7cefaab
Merge pull request #169 from dmatch01/quota-management-fix-recover-pe…
dmatch01 Aug 26, 2021
27f093f
Update genericresource.go
May 19, 2022
File filter

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion CONTROLLER_VERSION
Original file line number Diff line number Diff line change
@@ -1 +1 @@
1.29.35
1.29.36
8 changes: 4 additions & 4 deletions pkg/controller/clusterstate/cache/cache.go
Original file line number Diff line number Diff line change
Expand Up @@ -305,21 +305,21 @@ func (sc *ClusterStateCache) updateState() error {
}

klog.V(8).Infof("Total capacity %+v, used %+v, free space %+v", total, used, idle)
if klog.V(10).Enabled() {
if klog.V(12).Enabled() {
// CPU histogram
metricCPU := &dto.Metric{}
(*newIdleHistogram.MilliCPU).Write(metricCPU)
klog.V(10).Infof("[updateState] CPU histogram:\n%s", proto.MarshalTextString(metricCPU))
klog.V(12).Infof("[updateState] CPU histogram:\n%s", proto.MarshalTextString(metricCPU))

// Memory histogram
metricMem := &dto.Metric{}
(*newIdleHistogram.Memory).Write(metricMem)
klog.V(10).Infof("[updateState] Memory histogram:\n%s", proto.MarshalTextString(metricMem))
klog.V(12).Infof("[updateState] Memory histogram:\n%s", proto.MarshalTextString(metricMem))

// GPU histogram
metricGPU := &dto.Metric{}
(*newIdleHistogram.GPU).Write(metricGPU)
klog.V(10).Infof("[updateState] GPU histogram:\n%s", proto.MarshalTextString(metricGPU))
klog.V(12).Infof("[updateState] GPU histogram:\n%s", proto.MarshalTextString(metricGPU))
}

err := sc.saveState(idle, total, newIdleHistogram)
Expand Down
10 changes: 10 additions & 0 deletions pkg/controller/queuejob/queuejob_controller_ex.go
Original file line number Diff line number Diff line change
Expand Up @@ -1599,6 +1599,16 @@ func (cc *XController) manageQueueJob(qj *arbv1.AppWrapper, podPhaseChanges bool
return nil
} // End of first execution of qj to add to qjqueue for ScheduleNext

//Handle recovery condition
if !qj.Status.CanRun && qj.Status.State == arbv1.AppWrapperStateEnqueued &&
!cc.qjqueue.IfExistUnschedulableQ(qj) && !cc.qjqueue.IfExistActiveQ(qj){
cc.qjqueue.AddIfNotPresent(qj)
klog.V(3).Infof("[manageQueueJob] Recovered AppWrapper %s%s - added to active queue, Status=%+v",
qj.Namespace, qj.Name, qj.Status)

return nil
}

// add qj to Etcd for dispatch
if qj.Status.CanRun && qj.Status.State != arbv1.AppWrapperStateActive {
qj.Status.State = arbv1.AppWrapperStateActive
Expand Down