Skip to content

Commit 06726d3

Browse files
authored
Merge pull request matrix-org#5888 from matrix-org/travis/voice/event_type
Expand upon voice message event & include overall waveform
2 parents 21e7847 + 14809df commit 06726d3

File tree

6 files changed

+214
-20
lines changed

6 files changed

+214
-20
lines changed

src/@types/global.d.ts

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -129,4 +129,30 @@ declare global {
129129
// https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Error/columnNumber
130130
columnNumber?: number;
131131
}
132+
133+
// https://github.com/microsoft/TypeScript/issues/28308#issuecomment-650802278
134+
interface AudioWorkletProcessor {
135+
readonly port: MessagePort;
136+
process(
137+
inputs: Float32Array[][],
138+
outputs: Float32Array[][],
139+
parameters: Record<string, Float32Array>
140+
): boolean;
141+
}
142+
143+
// https://github.com/microsoft/TypeScript/issues/28308#issuecomment-650802278
144+
const AudioWorkletProcessor: {
145+
prototype: AudioWorkletProcessor;
146+
new (options?: AudioWorkletNodeOptions): AudioWorkletProcessor;
147+
};
148+
149+
// https://github.com/microsoft/TypeScript/issues/28308#issuecomment-650802278
150+
function registerProcessor(
151+
name: string,
152+
processorCtor: (new (
153+
options?: AudioWorkletNodeOptions
154+
) => AudioWorkletProcessor) & {
155+
parameterDescriptors?: AudioParamDescriptor[];
156+
}
157+
);
132158
}

src/components/views/rooms/VoiceRecordComposerTile.tsx

Lines changed: 32 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -53,9 +53,38 @@ export default class VoiceRecordComposerTile extends React.PureComponent<IProps,
5353
await this.state.recorder.stop();
5454
const mxc = await this.state.recorder.upload();
5555
MatrixClientPeg.get().sendMessage(this.props.room.roomId, {
56-
body: "Voice message",
57-
msgtype: "org.matrix.msc2516.voice",
58-
url: mxc,
56+
"body": "Voice message",
57+
"msgtype": "org.matrix.msc2516.voice",
58+
//"msgtype": MsgType.Audio,
59+
"url": mxc,
60+
"info": {
61+
duration: Math.round(this.state.recorder.durationSeconds * 1000),
62+
mimetype: this.state.recorder.contentType,
63+
size: this.state.recorder.contentLength,
64+
},
65+
66+
// MSC1767 experiment
67+
"org.matrix.msc1767.text": "Voice message",
68+
"org.matrix.msc1767.file": {
69+
url: mxc,
70+
name: "Voice message.ogg",
71+
mimetype: this.state.recorder.contentType,
72+
size: this.state.recorder.contentLength,
73+
},
74+
"org.matrix.msc1767.audio": {
75+
duration: Math.round(this.state.recorder.durationSeconds * 1000),
76+
// TODO: @@ TravisR: Waveform? (MSC1767 decision)
77+
},
78+
"org.matrix.experimental.msc2516.voice": { // MSC2516+MSC1767 experiment
79+
duration: Math.round(this.state.recorder.durationSeconds * 1000),
80+
81+
// Events can't have floats, so we try to maintain resolution by using 1024
82+
// as a maximum value. The waveform contains values between zero and 1, so this
83+
// should come out largely sane.
84+
//
85+
// We're expecting about one data point per second of audio.
86+
waveform: this.state.recorder.finalWaveform.map(v => Math.round(v * 1024)),
87+
},
5988
});
6089
await VoiceRecordingStore.instance.disposeRecording();
6190
this.setState({recorder: null});

src/utils/arrays.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,7 @@ export function arraySeed<T>(val: T, length: number): T[] {
5454
* @param a The array to clone. Must be defined.
5555
* @returns A copy of the array.
5656
*/
57-
export function arrayFastClone(a: any[]): any[] {
57+
export function arrayFastClone<T>(a: T[]): T[] {
5858
return a.slice(0, a.length);
5959
}
6060

src/voice/RecorderWorklet.ts

Lines changed: 67 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,67 @@
1+
/*
2+
Copyright 2021 The Matrix.org Foundation C.I.C.
3+
4+
Licensed under the Apache License, Version 2.0 (the "License");
5+
you may not use this file except in compliance with the License.
6+
You may obtain a copy of the License at
7+
8+
http://www.apache.org/licenses/LICENSE-2.0
9+
10+
Unless required by applicable law or agreed to in writing, software
11+
distributed under the License is distributed on an "AS IS" BASIS,
12+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
See the License for the specific language governing permissions and
14+
limitations under the License.
15+
*/
16+
17+
import {IAmplitudePayload, ITimingPayload, PayloadEvent, WORKLET_NAME} from "./consts";
18+
import {percentageOf} from "../utils/numbers";
19+
20+
// from AudioWorkletGlobalScope: https://developer.mozilla.org/en-US/docs/Web/API/AudioWorkletGlobalScope
21+
declare const currentTime: number;
22+
// declare const currentFrame: number;
23+
// declare const sampleRate: number;
24+
25+
class MxVoiceWorklet extends AudioWorkletProcessor {
26+
private nextAmplitudeSecond = 0;
27+
28+
process(inputs, outputs, parameters) {
29+
// We only fire amplitude updates once a second to avoid flooding the recording instance
30+
// with useless data. Much of the data would end up discarded, so we ratelimit ourselves
31+
// here.
32+
const currentSecond = Math.round(currentTime);
33+
if (currentSecond === this.nextAmplitudeSecond) {
34+
// We're expecting exactly one mono input source, so just grab the very first frame of
35+
// samples for the analysis.
36+
const monoChan = inputs[0][0];
37+
38+
// The amplitude of the frame's samples is effectively the loudness of the frame. This
39+
// translates into a bar which can be rendered as part of the whole recording clip's
40+
// waveform.
41+
//
42+
// We translate the amplitude down to 0-1 for sanity's sake.
43+
const minVal = Math.min(...monoChan);
44+
const maxVal = Math.max(...monoChan);
45+
const amplitude = percentageOf(maxVal, -1, 1) - percentageOf(minVal, -1, 1);
46+
47+
this.port.postMessage(<IAmplitudePayload>{
48+
ev: PayloadEvent.AmplitudeMark,
49+
amplitude: amplitude,
50+
forSecond: currentSecond,
51+
});
52+
this.nextAmplitudeSecond++;
53+
}
54+
55+
// We mostly use this worklet to fire regular clock updates through to components
56+
this.port.postMessage(<ITimingPayload>{ev: PayloadEvent.Timekeep, timeSeconds: currentTime});
57+
58+
// We're supposed to return false when we're "done" with the audio clip, but seeing as
59+
// we are acting as a passive processor we are never truly "done". The browser will clean
60+
// us up when it is done with us.
61+
return true;
62+
}
63+
}
64+
65+
registerProcessor(WORKLET_NAME, MxVoiceWorklet);
66+
67+
export default null; // to appease module loaders (we never use the export)

src/voice/VoiceRecording.ts

Lines changed: 51 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,8 @@ import {clamp} from "../utils/numbers";
2323
import EventEmitter from "events";
2424
import {IDestroyable} from "../utils/IDestroyable";
2525
import {Singleflight} from "../utils/Singleflight";
26+
import {PayloadEvent, WORKLET_NAME} from "./consts";
27+
import {arrayFastClone} from "../utils/arrays";
2628

2729
const CHANNELS = 1; // stereo isn't important
2830
const SAMPLE_RATE = 48000; // 48khz is what WebRTC uses. 12khz is where we lose quality.
@@ -49,16 +51,34 @@ export class VoiceRecording extends EventEmitter implements IDestroyable {
4951
private recorderSource: MediaStreamAudioSourceNode;
5052
private recorderStream: MediaStream;
5153
private recorderFFT: AnalyserNode;
52-
private recorderProcessor: ScriptProcessorNode;
54+
private recorderWorklet: AudioWorkletNode;
5355
private buffer = new Uint8Array(0);
5456
private mxc: string;
5557
private recording = false;
5658
private observable: SimpleObservable<IRecordingUpdate>;
59+
private amplitudes: number[] = []; // at each second mark, generated
5760

5861
public constructor(private client: MatrixClient) {
5962
super();
6063
}
6164

65+
public get finalWaveform(): number[] {
66+
return arrayFastClone(this.amplitudes);
67+
}
68+
69+
public get contentType(): string {
70+
return "audio/ogg";
71+
}
72+
73+
public get contentLength(): number {
74+
return this.buffer.length;
75+
}
76+
77+
public get durationSeconds(): number {
78+
if (!this.recorder) throw new Error("Duration not available without a recording");
79+
return this.recorderContext.currentTime;
80+
}
81+
6282
private async makeRecorder() {
6383
this.recorderStream = await navigator.mediaDevices.getUserMedia({
6484
audio: {
@@ -80,18 +100,34 @@ export class VoiceRecording extends EventEmitter implements IDestroyable {
80100
// it makes the time domain less than helpful.
81101
this.recorderFFT.fftSize = 64;
82102

83-
// We use an audio processor to get accurate timing information.
84-
// The size of the audio buffer largely decides how quickly we push timing/waveform data
85-
// out of this class. Smaller buffers mean we update more frequently as we can't hold as
86-
// many bytes. Larger buffers mean slower updates. For scale, 1024 gives us about 30Hz of
87-
// updates and 2048 gives us about 20Hz. We use 1024 to get as close to perceived realtime
88-
// as possible. Must be a power of 2.
89-
this.recorderProcessor = this.recorderContext.createScriptProcessor(1024, CHANNELS, CHANNELS);
103+
// Set up our worklet. We use this for timing information and waveform analysis: the
104+
// web audio API prefers this be done async to avoid holding the main thread with math.
105+
const mxRecorderWorkletPath = document.body.dataset.vectorRecorderWorkletScript;
106+
if (!mxRecorderWorkletPath) {
107+
throw new Error("Unable to create recorder: no worklet script registered");
108+
}
109+
await this.recorderContext.audioWorklet.addModule(mxRecorderWorkletPath);
110+
this.recorderWorklet = new AudioWorkletNode(this.recorderContext, WORKLET_NAME);
90111

91112
// Connect our inputs and outputs
92113
this.recorderSource.connect(this.recorderFFT);
93-
this.recorderSource.connect(this.recorderProcessor);
94-
this.recorderProcessor.connect(this.recorderContext.destination);
114+
this.recorderSource.connect(this.recorderWorklet);
115+
this.recorderWorklet.connect(this.recorderContext.destination);
116+
117+
// Dev note: we can't use `addEventListener` for some reason. It just doesn't work.
118+
this.recorderWorklet.port.onmessage = (ev) => {
119+
switch (ev.data['ev']) {
120+
case PayloadEvent.Timekeep:
121+
this.processAudioUpdate(ev.data['timeSeconds']);
122+
break;
123+
case PayloadEvent.AmplitudeMark:
124+
// Sanity check to make sure we're adding about one sample per second
125+
if (ev.data['forSecond'] === this.amplitudes.length) {
126+
this.amplitudes.push(ev.data['amplitude']);
127+
}
128+
break;
129+
}
130+
};
95131

96132
this.recorder = new Recorder({
97133
encoderPath, // magic from webpack
@@ -138,7 +174,7 @@ export class VoiceRecording extends EventEmitter implements IDestroyable {
138174
return this.mxc;
139175
}
140176

141-
private processAudioUpdate = (ev: AudioProcessingEvent) => {
177+
private processAudioUpdate = (timeSeconds: number) => {
142178
if (!this.recording) return;
143179

144180
// The time domain is the input to the FFT, which means we use an array of the same
@@ -162,12 +198,12 @@ export class VoiceRecording extends EventEmitter implements IDestroyable {
162198

163199
this.observable.update({
164200
waveform: translatedData,
165-
timeSeconds: ev.playbackTime,
201+
timeSeconds: timeSeconds,
166202
});
167203

168204
// Now that we've updated the data/waveform, let's do a time check. We don't want to
169205
// go horribly over the limit. We also emit a warning state if needed.
170-
const secondsLeft = TARGET_MAX_LENGTH - ev.playbackTime;
206+
const secondsLeft = TARGET_MAX_LENGTH - timeSeconds;
171207
if (secondsLeft <= 0) {
172208
// noinspection JSIgnoredPromiseFromCall - we aren't concerned with it overlapping
173209
this.stop();
@@ -191,7 +227,6 @@ export class VoiceRecording extends EventEmitter implements IDestroyable {
191227
}
192228
this.observable = new SimpleObservable<IRecordingUpdate>();
193229
await this.makeRecorder();
194-
this.recorderProcessor.addEventListener("audioprocess", this.processAudioUpdate);
195230
await this.recorder.start();
196231
this.recording = true;
197232
this.emit(RecordingState.Started);
@@ -205,6 +240,7 @@ export class VoiceRecording extends EventEmitter implements IDestroyable {
205240

206241
// Disconnect the source early to start shutting down resources
207242
this.recorderSource.disconnect();
243+
this.recorderWorklet.disconnect();
208244
await this.recorder.stop();
209245

210246
// close the context after the recorder so the recorder doesn't try to
@@ -216,7 +252,6 @@ export class VoiceRecording extends EventEmitter implements IDestroyable {
216252

217253
// Finally do our post-processing and clean up
218254
this.recording = false;
219-
this.recorderProcessor.removeEventListener("audioprocess", this.processAudioUpdate);
220255
await this.recorder.close();
221256
this.emit(RecordingState.Ended);
222257

@@ -240,7 +275,7 @@ export class VoiceRecording extends EventEmitter implements IDestroyable {
240275

241276
this.emit(RecordingState.Uploading);
242277
this.mxc = await this.client.uploadContent(new Blob([this.buffer], {
243-
type: "audio/ogg",
278+
type: this.contentType,
244279
}), {
245280
onlyContentUri: false, // to stop the warnings in the console
246281
}).then(r => r['content_uri']);

src/voice/consts.ts

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
/*
2+
Copyright 2021 The Matrix.org Foundation C.I.C.
3+
4+
Licensed under the Apache License, Version 2.0 (the "License");
5+
you may not use this file except in compliance with the License.
6+
You may obtain a copy of the License at
7+
8+
http://www.apache.org/licenses/LICENSE-2.0
9+
10+
Unless required by applicable law or agreed to in writing, software
11+
distributed under the License is distributed on an "AS IS" BASIS,
12+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
See the License for the specific language governing permissions and
14+
limitations under the License.
15+
*/
16+
17+
export const WORKLET_NAME = "mx-voice-worklet";
18+
19+
export enum PayloadEvent {
20+
Timekeep = "timekeep",
21+
AmplitudeMark = "amplitude_mark",
22+
}
23+
24+
export interface IPayload {
25+
ev: PayloadEvent;
26+
}
27+
28+
export interface ITimingPayload extends IPayload {
29+
ev: PayloadEvent.Timekeep;
30+
timeSeconds: number;
31+
}
32+
33+
export interface IAmplitudePayload extends IPayload {
34+
ev: PayloadEvent.AmplitudeMark;
35+
forSecond: number;
36+
amplitude: number;
37+
}

0 commit comments

Comments
 (0)