Skip to content

Commit 5e92f61

Browse files
authored
Fix nightly functional tests failures for datascience (microsoft#6591)
* Fix stack trace issues * Attempt to fix restart issues with restart kernel. * Add async dump for long running test * Fix loop for session * Try waiting longer for shutdown * Put async dump back the way it was before. * Use pypi to install ptvsd * Add more logging * More logging * More logging * Just run functional tests * Remote ready session work. Only wait for ready on restart. * Fix debugger tests. Try not shutting down during restart * More logging * Put off restart session * Add some more logging * More logging * More logging * Fix restart bug * More logging * Linter errors * Loop for idle for restart * Fix linter errors * Let ptvsd be installed * Update package-lock * Remove some logging * Put ptvsd back in the functional requirements. Installer isn't working * Remove some logging and put back full nightly * Remove some logging * Up timeout to 90 minutes
1 parent 9a4c279 commit 5e92f61

File tree

9 files changed

+141
-51
lines changed

9 files changed

+141
-51
lines changed

build/ci/templates/test_phases.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -203,6 +203,7 @@ steps:
203203
python -m pip install -U pip
204204
python -m pip install numpy
205205
python -m pip install --upgrade -r ./build/functional-test-requirements.txt
206+
python -m pip install --upgrade --pre ptvsd
206207
displayName: 'pip install functional requirements'
207208
condition: and(succeeded(), eq(variables['NeedsPythonFunctionalReqs'], 'true'))
208209

build/ci/vscode-python-nightly-ci.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@ jobs:
3434
# ignorePythonVersions: "3.6,3.5"
3535

3636
- job: 'Nightly'
37-
37+
timeoutInMinutes: 90
3838
strategy:
3939
matrix:
4040
# Each member of this list must contain these values:

build/functional-test-requirements.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,4 +4,4 @@ jupyter
44
numpy
55
matplotlib
66
pandas
7-
livelossplot
7+
livelossplot

src/client/datascience/jupyter/jupyterServer.ts

Lines changed: 24 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ import { CancellationToken } from 'vscode-jsonrpc';
1414

1515
import { ILiveShareApi } from '../../common/application/types';
1616
import { Cancellation, CancellationError } from '../../common/cancellation';
17-
import { traceInfo, traceWarning } from '../../common/logger';
17+
import { traceError, traceInfo, traceWarning } from '../../common/logger';
1818
import { IAsyncDisposableRegistry, IConfigurationService, IDisposableRegistry, ILogger } from '../../common/types';
1919
import { createDeferred, Deferred, waitForPromise } from '../../common/utils/async';
2020
import * as localize from '../../common/utils/localize';
@@ -44,17 +44,21 @@ class CellSubscriber {
4444
private cellRef: ICell;
4545
private subscriber: Subscriber<ICell>;
4646
private promiseComplete: (self: CellSubscriber) => void;
47-
private startTime: number;
47+
private _startTime: number;
4848

4949
constructor(cell: ICell, subscriber: Subscriber<ICell>, promiseComplete: (self: CellSubscriber) => void) {
5050
this.cellRef = cell;
5151
this.subscriber = subscriber;
5252
this.promiseComplete = promiseComplete;
53-
this.startTime = Date.now();
53+
this._startTime = Date.now();
54+
}
55+
56+
public get startTime(): number {
57+
return this._startTime;
5458
}
5559

5660
public isValid(sessionStartTime: number | undefined) {
57-
return sessionStartTime && this.startTime > sessionStartTime;
61+
return sessionStartTime && this.startTime >= sessionStartTime;
5862
}
5963

6064
public next(sessionStartTime: number | undefined) {
@@ -305,15 +309,19 @@ export class JupyterServerBase implements INotebookServer {
305309
// Update our start time so we don't keep sending responses
306310
this.sessionStartTime = Date.now();
307311

312+
traceInfo('restartKernel - finishing cells that are outstanding');
308313
// Complete all pending as an error. We're restarting
309314
this.finishUncompletedCells();
315+
traceInfo('restartKernel - restarting kernel');
310316

311317
// Restart our kernel
312318
await this.session.restart(timeoutMs);
313319

314320
// Rerun our initial setup for the notebook
315321
this.ranInitialSetup = false;
322+
traceInfo('restartKernel - initialSetup');
316323
await this.initialNotebookSetup();
324+
traceInfo('restartKernel - initialSetup completed');
317325

318326
return;
319327
}
@@ -531,6 +539,8 @@ export class JupyterServerBase implements INotebookServer {
531539
}
532540
}
533541

542+
traceError('No session during execute observable');
543+
534544
// Can't run because no session
535545
return new Observable<ICell[]>(subscriber => {
536546
subscriber.error(this.getDisposedError());
@@ -539,7 +549,7 @@ export class JupyterServerBase implements INotebookServer {
539549
}
540550

541551
private generateRequest = (code: string, silent?: boolean): Kernel.IFuture | undefined => {
542-
//this.logger.logInformation(`Executing code in jupyter : ${code}`)
552+
//traceInfo(`Executing code in jupyter : ${code}`);
543553
try {
544554
const cellMatcher = new CellMatcher(this.configService.getSettings().datascience);
545555
return this.session ? this.session.requestExecute(
@@ -563,25 +573,29 @@ export class JupyterServerBase implements INotebookServer {
563573
// Set up our initial plotting and imports
564574
private async initialNotebookSetup(cancelToken?: CancellationToken): Promise<void> {
565575
if (this.ranInitialSetup) {
576+
traceInfo(`Already ran setup for ${this.id}`);
566577
return;
567578
}
568579
this.ranInitialSetup = true;
569580

570581
try {
571582
// When we start our notebook initial, change to our workspace or user specified root directory
572583
if (this.launchInfo && this.launchInfo.workingDir && this.launchInfo.connectionInfo.localLaunch) {
584+
traceInfo(`Changing directory for ${this.id}`);
573585
await this.changeDirectoryIfPossible(this.launchInfo.workingDir);
574586
}
575587

576588
const settings = this.configService.getSettings().datascience;
577589
const matplobInit = !settings || settings.enablePlotViewer ? CodeSnippits.MatplotLibInitSvg : CodeSnippits.MatplotLibInitPng;
578590

591+
traceInfo(`Initialize matplotlib for ${this.id}`);
579592
// Force matplotlib to inline and save the default style. We'll use this later if we
580593
// get a request to update style
581594
await this.executeSilently(
582595
matplobInit,
583596
cancelToken
584597
);
598+
traceInfo(`Initial setup complete for ${this.id}`);
585599
} catch (e) {
586600
traceWarning(e);
587601
}
@@ -640,6 +654,7 @@ export class JupyterServerBase implements INotebookServer {
640654
if (this.launchInfo && this.launchInfo.connectionInfo && this.launchInfo.connectionInfo.localProcExitCode) {
641655
// Not running, just exit
642656
const exitCode = this.launchInfo.connectionInfo.localProcExitCode;
657+
traceError(`Jupyter crashed with code ${exitCode}`);
643658
subscriber.error(this.sessionStartTime, new Error(localize.DataScience.jupyterServerCrashed().format(exitCode.toString())));
644659
subscriber.complete(this.sessionStartTime);
645660
} else {
@@ -716,6 +731,10 @@ export class JupyterServerBase implements INotebookServer {
716731
}
717732
}
718733
} else {
734+
const sessionDate = new Date(this.sessionStartTime!);
735+
const cellDate = new Date(subscriber.startTime);
736+
traceInfo(`Session start time is newer than cell : \r\n${sessionDate.toTimeString()}\r\n${cellDate.toTimeString()}`);
737+
719738
// Otherwise just set to an error
720739
this.handleInterrupted(subscriber.cell);
721740
subscriber.cell.state = CellState.error;

src/client/datascience/jupyter/jupyterSession.ts

Lines changed: 93 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -39,13 +39,14 @@ export class JupyterSession implements IJupyterSession {
3939
private kernelSpec: IJupyterKernelSpec | undefined;
4040
private sessionManager: SessionManager | undefined;
4141
private session: Session.ISession | undefined;
42-
private restartSessionPromise: Promise<Session.ISession> | undefined;
42+
private restartSessionPromise: Promise<Session.ISession | undefined> | undefined;
4343
private contentsManager: ContentsManager | undefined;
4444
private notebookFiles: Contents.IModel[] = [];
4545
private onRestartedEvent: EventEmitter<void> | undefined;
4646
private statusHandler: Slot<Session.ISession, Kernel.Status> | undefined;
4747
private connected: boolean = false;
4848
private jupyterPasswordConnect: IJupyterPasswordConnect;
49+
private oldSessions: Session.ISession[] = [];
4950

5051
constructor(
5152
connInfo: IConnection,
@@ -87,44 +88,38 @@ export class JupyterSession implements IJupyterSession {
8788
return this.onRestartedEvent.event;
8889
}
8990

90-
public async waitForIdle(timeout: number): Promise<void> {
91-
if (this.session && this.session.kernel) {
92-
// This function seems to cause CI builds to timeout randomly on
93-
// different tests. Waiting for status to go idle doesn't seem to work and
94-
// in the past, waiting on the ready promise doesn't work either. Check status with a maximum of 5 seconds
95-
const startTime = Date.now();
96-
while (this.session &&
97-
this.session.kernel &&
98-
this.session.kernel.status !== 'idle' &&
99-
(Date.now() - startTime < timeout)) {
100-
traceInfo(`Waiting for idle: ${this.session.kernel.status}`);
101-
await sleep(100);
102-
}
103-
104-
// If we didn't make it out in ten seconds, indicate an error
105-
if (!this.session || !this.session.kernel || this.session.kernel.status !== 'idle') {
106-
throw new JupyterWaitForIdleError(localize.DataScience.jupyterLaunchTimedOut());
107-
}
108-
}
91+
public waitForIdle(timeout: number): Promise<void> {
92+
return this.waitForIdleOnSession(this.session, timeout);
10993
}
11094

11195
public async restart(_timeout: number): Promise<void> {
11296
// Just kill the current session and switch to the other
11397
if (this.restartSessionPromise && this.session && this.sessionManager && this.contentsManager) {
98+
traceInfo(`Restarting ${this.session.kernel.id}`);
99+
114100
// Save old state for shutdown
115101
const oldSession = this.session;
116102
const oldStatusHandler = this.statusHandler;
117103

118-
// Just switch to the other session.
104+
// Just switch to the other session. It should already be ready
119105
this.session = await this.restartSessionPromise;
106+
if (!this.session) {
107+
throw new Error(localize.DataScience.sessionDisposed());
108+
}
109+
traceInfo(`Got new session ${this.session.kernel.id}`);
120110

121111
// Rewire our status changed event.
122112
this.statusHandler = this.onStatusChanged.bind(this.onStatusChanged);
123113
this.session.statusChanged.connect(this.statusHandler);
124114

125115
// After switching, start another in case we restart again.
126-
this.restartSessionPromise = this.createSession(oldSession.serverSettings, this.contentsManager);
127-
this.shutdownSession(oldSession, oldStatusHandler).ignoreErrors();
116+
this.restartSessionPromise = this.createRestartSession(oldSession.serverSettings, this.contentsManager);
117+
traceInfo('Started new restart session');
118+
if (oldStatusHandler) {
119+
oldSession.statusChanged.disconnect(oldStatusHandler);
120+
}
121+
// Don't shutdown old sessions yet. This seems to hang tests.
122+
this.oldSessions.push(oldSession);
128123
} else {
129124
throw new Error(localize.DataScience.sessionDisposed());
130125
}
@@ -157,7 +152,7 @@ export class JupyterSession implements IJupyterSession {
157152
this.session = await this.createSession(serverSettings, this.contentsManager, cancelToken);
158153

159154
// Start another session to handle restarts
160-
this.restartSessionPromise = this.createSession(serverSettings, this.contentsManager, cancelToken);
155+
this.restartSessionPromise = this.createRestartSession(serverSettings, this.contentsManager, cancelToken);
161156

162157
// Listen for session status changes
163158
this.statusHandler = this.onStatusChanged.bind(this.onStatusChanged);
@@ -171,6 +166,54 @@ export class JupyterSession implements IJupyterSession {
171166
return this.connected;
172167
}
173168

169+
private async waitForIdleOnSession(session: Session.ISession | undefined, timeout: number): Promise<void> {
170+
if (session && session.kernel) {
171+
traceInfo(`Waiting for idle on: ${session.kernel.id}${session.kernel.status}`);
172+
173+
// This function seems to cause CI builds to timeout randomly on
174+
// different tests. Waiting for status to go idle doesn't seem to work and
175+
// in the past, waiting on the ready promise doesn't work either. Check status with a maximum of 5 seconds
176+
const startTime = Date.now();
177+
while (session &&
178+
session.kernel &&
179+
session.kernel.status !== 'idle' &&
180+
(Date.now() - startTime < timeout)) {
181+
await sleep(100);
182+
}
183+
184+
traceInfo(`Finished waiting for idle on: ${session.kernel.id}${session.kernel.status}`);
185+
186+
// If we didn't make it out in ten seconds, indicate an error
187+
if (!session || !session.kernel || session.kernel.status !== 'idle') {
188+
throw new JupyterWaitForIdleError(localize.DataScience.jupyterLaunchTimedOut());
189+
}
190+
}
191+
}
192+
193+
private async createRestartSession(serverSettings: ServerConnection.ISettings, contentsManager: ContentsManager, cancelToken?: CancellationToken): Promise<Session.ISession> {
194+
let result: Session.ISession | undefined;
195+
let tryCount = 0;
196+
// tslint:disable-next-line: no-any
197+
let exception: any;
198+
while (tryCount < 3) {
199+
try {
200+
result = await this.createSession(serverSettings, contentsManager, cancelToken);
201+
await this.waitForIdleOnSession(result, 30000);
202+
return result;
203+
} catch (exc) {
204+
traceInfo(`Error waiting for restart session: ${exc}`);
205+
tryCount += 1;
206+
if (result) {
207+
// Cleanup later.
208+
this.oldSessions.push(result);
209+
}
210+
result = undefined;
211+
exception = exc;
212+
}
213+
}
214+
throw exception;
215+
}
216+
174217
private async createSession(serverSettings: ServerConnection.ISettings, contentsManager: ContentsManager, cancelToken?: CancellationToken): Promise<Session.ISession> {
175218

176219
// Create a temporary notebook for this session.
@@ -277,7 +320,9 @@ export class JupyterSession implements IJupyterSession {
277320
}
278321

279322
private async shutdownSession(session: Session.ISession | undefined, statusHandler: Slot<Session.ISession, Kernel.Status> | undefined): Promise<void> {
280-
if (session) {
323+
if (session && session.kernel) {
324+
const kernelId = session.kernel.id;
325+
traceInfo(`shutdownSession ${kernelId} - start`);
281326
try {
282327
if (statusHandler) {
283328
session.statusChanged.disconnect(statusHandler);
@@ -288,23 +333,22 @@ export class JupyterSession implements IJupyterSession {
288333
// https://github.com/jupyterlab/jupyterlab/issues/4252
289334
// tslint:disable:no-any
290335
if (isTestExecution()) {
291-
if (session && session.kernel) {
292-
const defaultKernel = session.kernel as any;
293-
if (defaultKernel && defaultKernel._futures) {
294-
const futures = defaultKernel._futures as Map<any, any>;
295-
if (futures) {
296-
futures.forEach(f => {
297-
if (f._status !== undefined) {
298-
f._status |= 4;
299-
}
300-
});
301-
}
336+
const defaultKernel = session.kernel as any;
337+
if (defaultKernel && defaultKernel._futures) {
338+
const futures = defaultKernel._futures as Map<any, any>;
339+
if (futures) {
340+
futures.forEach(f => {
341+
if (f._status !== undefined) {
342+
f._status |= 4;
343+
}
344+
});
302345
}
303346
}
347+
await waitForPromise(session.shutdown(), 1000);
348+
} else {
349+
// Shutdown may fail if the process has been killed
350+
await waitForPromise(session.shutdown(), 1000);
304351
}
305-
306-
// Shutdown may fail if the process has been killed
307-
await waitForPromise(session.shutdown(), 1000);
308352
} catch {
309353
noop();
310354
}
@@ -315,22 +359,30 @@ export class JupyterSession implements IJupyterSession {
315359
// Ignore, just trace.
316360
traceWarning(e);
317361
}
362+
traceInfo(`shutdownSession ${kernelId} - shutdown complete`);
318363
}
319364
}
320365

321366
//tslint:disable:cyclomatic-complexity
322367
private async shutdownSessionAndConnection(): Promise<void> {
323368
if (this.contentsManager) {
369+
traceInfo('ShutdownSessionAndConnection - dispose contents manager');
324370
this.contentsManager.dispose();
325371
this.contentsManager = undefined;
326372
}
327373
if (this.session || this.sessionManager) {
328374
try {
375+
traceInfo('ShutdownSessionAndConnection - old sessions');
376+
await Promise.all(this.oldSessions.map(s => this.shutdownSession(s, undefined)));
377+
traceInfo('ShutdownSessionAndConnection - current session');
329378
await this.shutdownSession(this.session, this.statusHandler);
379+
traceInfo('ShutdownSessionAndConnection - get restart session');
330380
const restartSession = await this.restartSessionPromise;
381+
traceInfo('ShutdownSessionAndConnection - shutdown restart session');
331382
await this.shutdownSession(restartSession, undefined);
332383

333384
if (this.sessionManager && !this.sessionManager.isDisposed) {
385+
traceInfo('ShutdownSessionAndConnection - dispose session manager');
334386
this.sessionManager.dispose();
335387
}
336388
} catch {
@@ -344,9 +396,11 @@ export class JupyterSession implements IJupyterSession {
344396
this.onRestartedEvent.dispose();
345397
}
346398
if (this.connInfo) {
399+
traceInfo('ShutdownSessionAndConnection - dispose conn info');
347400
this.connInfo.dispose(); // This should kill the process that's running
348401
this.connInfo = undefined;
349402
}
403+
traceInfo('ShutdownSessionAndConnection -- complete');
350404
}
351405

352406
}

src/test/common/asyncDump.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,5 +5,5 @@ const log = require('why-is-node-running');
55

66
// Call this function to debug async hangs. It should print out stack traces of still running promises.
77
export function asyncDump() {
8-
log();
8+
log();
99
}

0 commit comments

Comments
 (0)