Skip to content
This repository was archived by the owner on Sep 11, 2024. It is now read-only.

Commit d1cfde6

Browse files
authored
Merge pull request #5989 from matrix-org/travis/voicemessages/safari
Support voice messages on Safari
2 parents 03bb3cc + 65f591b commit d1cfde6

File tree

7 files changed

+238
-78
lines changed

7 files changed

+238
-78
lines changed

__mocks__/empty.js

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
// Yes, this is empty.
2+
module.exports = {};

package.json

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -186,7 +186,10 @@
186186
],
187187
"moduleNameMapper": {
188188
"\\.(gif|png|svg|ttf|woff2)$": "<rootDir>/__mocks__/imageMock.js",
189-
"\\$webapp/i18n/languages.json": "<rootDir>/__mocks__/languages.json"
189+
"\\$webapp/i18n/languages.json": "<rootDir>/__mocks__/languages.json",
190+
"decoderWorker\\.min\\.js": "<rootDir>/__mocks__/empty.js",
191+
"decoderWorker\\.min\\.wasm": "<rootDir>/__mocks__/empty.js",
192+
"waveWorker\\.min\\.js": "<rootDir>/__mocks__/empty.js"
190193
},
191194
"transformIgnorePatterns": [
192195
"/node_modules/(?!matrix-js-sdk).+$"

src/@types/global.d.ts

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,9 @@ declare global {
5252
init: () => Promise<void>;
5353
};
5454

55+
// Needed for Safari, unknown to TypeScript
56+
webkitAudioContext: typeof AudioContext;
57+
5558
mxContentMessages: ContentMessages;
5659
mxToastStore: ToastStore;
5760
mxDeviceListener: DeviceListener;

src/rageshake/rageshake.js

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -73,7 +73,9 @@ class ConsoleLogger {
7373

7474
// Convert objects and errors to helpful things
7575
args = args.map((arg) => {
76-
if (arg instanceof Error) {
76+
if (arg instanceof DOMException) {
77+
return arg.message + ` (${arg.name} | ${arg.code}) ` + (arg.stack ? `\n${arg.stack}` : '');
78+
} else if (arg instanceof Error) {
7779
return arg.message + (arg.stack ? `\n${arg.stack}` : '');
7880
} else if (typeof (arg) === 'object') {
7981
try {

src/voice/Playback.ts

Lines changed: 19 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ import {SimpleObservable} from "matrix-widget-api";
2121
import {IDestroyable} from "../utils/IDestroyable";
2222
import {PlaybackClock} from "./PlaybackClock";
2323
import {clamp} from "../utils/numbers";
24+
import {createAudioContext, decodeOgg} from "./compat";
2425

2526
export enum PlaybackState {
2627
Decoding = "decoding",
@@ -49,7 +50,7 @@ export class Playback extends EventEmitter implements IDestroyable {
4950
*/
5051
constructor(private buf: ArrayBuffer, seedWaveform = DEFAULT_WAVEFORM) {
5152
super();
52-
this.context = new AudioContext();
53+
this.context = createAudioContext();
5354
this.resampledWaveform = arrayFastResample(seedWaveform ?? DEFAULT_WAVEFORM, PLAYBACK_WAVEFORM_SAMPLES);
5455
this.waveformObservable.update(this.resampledWaveform);
5556
this.clock = new PlaybackClock(this.context);
@@ -91,7 +92,23 @@ export class Playback extends EventEmitter implements IDestroyable {
9192
}
9293

9394
public async prepare() {
94-
this.audioBuf = await this.context.decodeAudioData(this.buf);
95+
// Safari compat: promise API not supported on this function
96+
this.audioBuf = await new Promise((resolve, reject) => {
97+
this.context.decodeAudioData(this.buf, b => resolve(b), async e => {
98+
// This error handler is largely for Safari as well, which doesn't support Opus/Ogg
99+
// very well.
100+
console.error("Error decoding recording: ", e);
101+
console.warn("Trying to re-encode to WAV instead...");
102+
103+
const wav = await decodeOgg(this.buf);
104+
105+
// noinspection ES6MissingAwait - not needed when using callbacks
106+
this.context.decodeAudioData(wav, b => resolve(b), e => {
107+
console.error("Still failed to decode recording: ", e);
108+
reject(e);
109+
});
110+
});
111+
});
95112

96113
// Update the waveform to the real waveform once we have channel data to use. We don't
97114
// exactly trust the user-provided waveform to be accurate...

src/voice/VoiceRecording.ts

Lines changed: 125 additions & 74 deletions
Original file line numberDiff line numberDiff line change
@@ -19,16 +19,17 @@ import encoderPath from 'opus-recorder/dist/encoderWorker.min.js';
1919
import {MatrixClient} from "matrix-js-sdk/src/client";
2020
import CallMediaHandler from "../CallMediaHandler";
2121
import {SimpleObservable} from "matrix-widget-api";
22-
import {clamp} from "../utils/numbers";
22+
import {clamp, percentageOf, percentageWithin} from "../utils/numbers";
2323
import EventEmitter from "events";
2424
import {IDestroyable} from "../utils/IDestroyable";
2525
import {Singleflight} from "../utils/Singleflight";
2626
import {PayloadEvent, WORKLET_NAME} from "./consts";
2727
import {UPDATE_EVENT} from "../stores/AsyncStore";
2828
import {Playback} from "./Playback";
29+
import {createAudioContext} from "./compat";
2930

3031
const CHANNELS = 1; // stereo isn't important
31-
const SAMPLE_RATE = 48000; // 48khz is what WebRTC uses. 12khz is where we lose quality.
32+
export const SAMPLE_RATE = 48000; // 48khz is what WebRTC uses. 12khz is where we lose quality.
3233
const BITRATE = 24000; // 24kbps is pretty high quality for our use case in opus.
3334
const TARGET_MAX_LENGTH = 120; // 2 minutes in seconds. Somewhat arbitrary, though longer == larger files.
3435
const TARGET_WARN_TIME_LEFT = 10; // 10 seconds, also somewhat arbitrary.
@@ -55,6 +56,7 @@ export class VoiceRecording extends EventEmitter implements IDestroyable {
5556
private recorderStream: MediaStream;
5657
private recorderFFT: AnalyserNode;
5758
private recorderWorklet: AudioWorkletNode;
59+
private recorderProcessor: ScriptProcessorNode;
5860
private buffer = new Uint8Array(0); // use this.audioBuffer to access
5961
private mxc: string;
6062
private recording = false;
@@ -90,78 +92,107 @@ export class VoiceRecording extends EventEmitter implements IDestroyable {
9092
}
9193

9294
private async makeRecorder() {
93-
this.recorderStream = await navigator.mediaDevices.getUserMedia({
94-
audio: {
95-
channelCount: CHANNELS,
96-
noiseSuppression: true, // browsers ignore constraints they can't honour
97-
deviceId: CallMediaHandler.getAudioInput(),
98-
},
99-
});
100-
this.recorderContext = new AudioContext({
101-
// latencyHint: "interactive", // we don't want a latency hint (this causes data smoothing)
102-
});
103-
this.recorderSource = this.recorderContext.createMediaStreamSource(this.recorderStream);
104-
this.recorderFFT = this.recorderContext.createAnalyser();
105-
106-
// Bring the FFT time domain down a bit. The default is 2048, and this must be a power
107-
// of two. We use 64 points because we happen to know down the line we need less than
108-
// that, but 32 would be too few. Large numbers are not helpful here and do not add
109-
// precision: they introduce higher precision outputs of the FFT (frequency data), but
110-
// it makes the time domain less than helpful.
111-
this.recorderFFT.fftSize = 64;
112-
113-
// Set up our worklet. We use this for timing information and waveform analysis: the
114-
// web audio API prefers this be done async to avoid holding the main thread with math.
115-
const mxRecorderWorkletPath = document.body.dataset.vectorRecorderWorkletScript;
116-
if (!mxRecorderWorkletPath) {
117-
throw new Error("Unable to create recorder: no worklet script registered");
118-
}
119-
await this.recorderContext.audioWorklet.addModule(mxRecorderWorkletPath);
120-
this.recorderWorklet = new AudioWorkletNode(this.recorderContext, WORKLET_NAME);
121-
122-
// Connect our inputs and outputs
123-
this.recorderSource.connect(this.recorderFFT);
124-
this.recorderSource.connect(this.recorderWorklet);
125-
this.recorderWorklet.connect(this.recorderContext.destination);
126-
127-
// Dev note: we can't use `addEventListener` for some reason. It just doesn't work.
128-
this.recorderWorklet.port.onmessage = (ev) => {
129-
switch (ev.data['ev']) {
130-
case PayloadEvent.Timekeep:
131-
this.processAudioUpdate(ev.data['timeSeconds']);
132-
break;
133-
case PayloadEvent.AmplitudeMark:
134-
// Sanity check to make sure we're adding about one sample per second
135-
if (ev.data['forSecond'] === this.amplitudes.length) {
136-
this.amplitudes.push(ev.data['amplitude']);
95+
try {
96+
this.recorderStream = await navigator.mediaDevices.getUserMedia({
97+
audio: {
98+
channelCount: CHANNELS,
99+
noiseSuppression: true, // browsers ignore constraints they can't honour
100+
deviceId: CallMediaHandler.getAudioInput(),
101+
},
102+
});
103+
this.recorderContext = createAudioContext({
104+
// latencyHint: "interactive", // we don't want a latency hint (this causes data smoothing)
105+
});
106+
this.recorderSource = this.recorderContext.createMediaStreamSource(this.recorderStream);
107+
this.recorderFFT = this.recorderContext.createAnalyser();
108+
109+
// Bring the FFT time domain down a bit. The default is 2048, and this must be a power
110+
// of two. We use 64 points because we happen to know down the line we need less than
111+
// that, but 32 would be too few. Large numbers are not helpful here and do not add
112+
// precision: they introduce higher precision outputs of the FFT (frequency data), but
113+
// it makes the time domain less than helpful.
114+
this.recorderFFT.fftSize = 64;
115+
116+
// Set up our worklet. We use this for timing information and waveform analysis: the
117+
// web audio API prefers this be done async to avoid holding the main thread with math.
118+
const mxRecorderWorkletPath = document.body.dataset.vectorRecorderWorkletScript;
119+
if (!mxRecorderWorkletPath) {
120+
// noinspection ExceptionCaughtLocallyJS
121+
throw new Error("Unable to create recorder: no worklet script registered");
122+
}
123+
124+
// Connect our inputs and outputs
125+
this.recorderSource.connect(this.recorderFFT);
126+
127+
if (this.recorderContext.audioWorklet) {
128+
await this.recorderContext.audioWorklet.addModule(mxRecorderWorkletPath);
129+
this.recorderWorklet = new AudioWorkletNode(this.recorderContext, WORKLET_NAME);
130+
this.recorderSource.connect(this.recorderWorklet);
131+
this.recorderWorklet.connect(this.recorderContext.destination);
132+
133+
// Dev note: we can't use `addEventListener` for some reason. It just doesn't work.
134+
this.recorderWorklet.port.onmessage = (ev) => {
135+
switch (ev.data['ev']) {
136+
case PayloadEvent.Timekeep:
137+
this.processAudioUpdate(ev.data['timeSeconds']);
138+
break;
139+
case PayloadEvent.AmplitudeMark:
140+
// Sanity check to make sure we're adding about one sample per second
141+
if (ev.data['forSecond'] === this.amplitudes.length) {
142+
this.amplitudes.push(ev.data['amplitude']);
143+
}
144+
break;
137145
}
138-
break;
146+
};
147+
} else {
148+
// Safari fallback: use a processor node instead, buffered to 1024 bytes of data
149+
// like the worklet is.
150+
this.recorderProcessor = this.recorderContext.createScriptProcessor(1024, CHANNELS, CHANNELS);
151+
this.recorderSource.connect(this.recorderProcessor);
152+
this.recorderProcessor.connect(this.recorderContext.destination);
153+
this.recorderProcessor.addEventListener("audioprocess", this.onAudioProcess);
139154
}
140-
};
141-
142-
this.recorder = new Recorder({
143-
encoderPath, // magic from webpack
144-
encoderSampleRate: SAMPLE_RATE,
145-
encoderApplication: 2048, // voice (default is "audio")
146-
streamPages: true, // this speeds up the encoding process by using CPU over time
147-
encoderFrameSize: 20, // ms, arbitrary frame size we send to the encoder
148-
numberOfChannels: CHANNELS,
149-
sourceNode: this.recorderSource,
150-
encoderBitRate: BITRATE,
151-
152-
// We use low values for the following to ease CPU usage - the resulting waveform
153-
// is indistinguishable for a voice message. Note that the underlying library will
154-
// pick defaults which prefer the highest possible quality, CPU be damned.
155-
encoderComplexity: 3, // 0-10, 10 is slow and high quality.
156-
resampleQuality: 3, // 0-10, 10 is slow and high quality
157-
});
158-
this.recorder.ondataavailable = (a: ArrayBuffer) => {
159-
const buf = new Uint8Array(a);
160-
const newBuf = new Uint8Array(this.buffer.length + buf.length);
161-
newBuf.set(this.buffer, 0);
162-
newBuf.set(buf, this.buffer.length);
163-
this.buffer = newBuf;
164-
};
155+
156+
this.recorder = new Recorder({
157+
encoderPath, // magic from webpack
158+
encoderSampleRate: SAMPLE_RATE,
159+
encoderApplication: 2048, // voice (default is "audio")
160+
streamPages: true, // this speeds up the encoding process by using CPU over time
161+
encoderFrameSize: 20, // ms, arbitrary frame size we send to the encoder
162+
numberOfChannels: CHANNELS,
163+
sourceNode: this.recorderSource,
164+
encoderBitRate: BITRATE,
165+
166+
// We use low values for the following to ease CPU usage - the resulting waveform
167+
// is indistinguishable for a voice message. Note that the underlying library will
168+
// pick defaults which prefer the highest possible quality, CPU be damned.
169+
encoderComplexity: 3, // 0-10, 10 is slow and high quality.
170+
resampleQuality: 3, // 0-10, 10 is slow and high quality
171+
});
172+
this.recorder.ondataavailable = (a: ArrayBuffer) => {
173+
const buf = new Uint8Array(a);
174+
const newBuf = new Uint8Array(this.buffer.length + buf.length);
175+
newBuf.set(this.buffer, 0);
176+
newBuf.set(buf, this.buffer.length);
177+
this.buffer = newBuf;
178+
};
179+
} catch (e) {
180+
console.error("Error starting recording: ", e);
181+
if (e instanceof DOMException) { // Unhelpful DOMExceptions are common - parse them sanely
182+
console.error(`${e.name} (${e.code}): ${e.message}`);
183+
}
184+
185+
// Clean up as best as possible
186+
if (this.recorderStream) this.recorderStream.getTracks().forEach(t => t.stop());
187+
if (this.recorderSource) this.recorderSource.disconnect();
188+
if (this.recorder) this.recorder.close();
189+
if (this.recorderContext) {
190+
// noinspection ES6MissingAwait - not important that we wait
191+
this.recorderContext.close();
192+
}
193+
194+
throw e; // rethrow so upstream can handle it
195+
}
165196
}
166197

167198
private get audioBuffer(): Uint8Array {
@@ -190,14 +221,30 @@ export class VoiceRecording extends EventEmitter implements IDestroyable {
190221
return this.mxc;
191222
}
192223

224+
private onAudioProcess = (ev: AudioProcessingEvent) => {
225+
this.processAudioUpdate(ev.playbackTime);
226+
227+
// We skip the functionality of the worklet regarding waveform calculations: we
228+
// should get that information pretty quick during the playback info.
229+
};
230+
193231
private processAudioUpdate = (timeSeconds: number) => {
194232
if (!this.recording) return;
195233

196234
// The time domain is the input to the FFT, which means we use an array of the same
197235
// size. The time domain is also known as the audio waveform. We're ignoring the
198236
// output of the FFT here (frequency data) because we're not interested in it.
199237
const data = new Float32Array(this.recorderFFT.fftSize);
200-
this.recorderFFT.getFloatTimeDomainData(data);
238+
if (!this.recorderFFT.getFloatTimeDomainData) {
239+
// Safari compat
240+
const data2 = new Uint8Array(this.recorderFFT.fftSize);
241+
this.recorderFFT.getByteTimeDomainData(data2);
242+
for (let i = 0; i < data2.length; i++) {
243+
data[i] = percentageWithin(percentageOf(data2[i], 0, 256), -1, 1);
244+
}
245+
} else {
246+
this.recorderFFT.getFloatTimeDomainData(data);
247+
}
201248

202249
// We can't just `Array.from()` the array because we're dealing with 32bit floats
203250
// and the built-in function won't consider that when converting between numbers.
@@ -268,7 +315,11 @@ export class VoiceRecording extends EventEmitter implements IDestroyable {
268315
// Disconnect the source early to start shutting down resources
269316
await this.recorder.stop(); // stop first to flush the last frame
270317
this.recorderSource.disconnect();
271-
this.recorderWorklet.disconnect();
318+
if (this.recorderWorklet) this.recorderWorklet.disconnect();
319+
if (this.recorderProcessor) {
320+
this.recorderProcessor.disconnect();
321+
this.recorderProcessor.removeEventListener("audioprocess", this.onAudioProcess);
322+
}
272323

273324
// close the context after the recorder so the recorder doesn't try to
274325
// connect anything to the context (this would generate a warning)

0 commit comments

Comments
 (0)