rummatee
diff --git a/‎src/@types/global.d.ts‎
Lines changed: 26 additions & 0 deletions b/‎src/@types/global.d.ts‎
Lines changed: 26 additions & 0 deletions
diff --git a/‎src/components/views/rooms/VoiceRecordComposerTile.tsx‎
Lines changed: 32 additions & 3 deletions b/‎src/components/views/rooms/VoiceRecordComposerTile.tsx‎
Lines changed: 32 additions & 3 deletions
diff --git a/‎src/utils/arrays.ts‎
Lines changed: 1 addition & 1 deletion b/‎src/utils/arrays.ts‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎src/voice/RecorderWorklet.ts‎
Lines changed: 67 additions & 0 deletions b/‎src/voice/RecorderWorklet.ts‎
Lines changed: 67 additions & 0 deletions
diff --git a/‎src/voice/VoiceRecording.ts‎
Lines changed: 51 additions & 16 deletions b/‎src/voice/VoiceRecording.ts‎
Lines changed: 51 additions & 16 deletions
diff --git a/‎src/voice/consts.ts‎
Lines changed: 37 additions & 0 deletions b/‎src/voice/consts.ts‎
Lines changed: 37 additions & 0 deletions
@@ -129,4 +129,30 @@ declare global {
  // https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Error/columnNumber
  columnNumber?: number;
  }
+
+ // https://github.com/microsoft/TypeScript/issues/28308#issuecomment-650802278
+ interface AudioWorkletProcessor {
+ readonly port: MessagePort;
+ process(
+ inputs: Float32Array[][],
+ outputs: Float32Array[][],
+ parameters: Record<string, Float32Array>
+ ): boolean;
+ }
+
+ // https://github.com/microsoft/TypeScript/issues/28308#issuecomment-650802278
+ const AudioWorkletProcessor: {
+ prototype: AudioWorkletProcessor;
+ new (options?: AudioWorkletNodeOptions): AudioWorkletProcessor;
+ };
+
+ // https://github.com/microsoft/TypeScript/issues/28308#issuecomment-650802278
+ function registerProcessor(
+ name: string,
+ processorCtor: (new (
+ options?: AudioWorkletNodeOptions
+ ) => AudioWorkletProcessor) & {
+ parameterDescriptors?: AudioParamDescriptor[];
+ }
+ );
 }
@@ -53,9 +53,38 @@ export default class VoiceRecordComposerTile extends React.PureComponent<IProps,
  await this.state.recorder.stop();
  const mxc = await this.state.recorder.upload();
  MatrixClientPeg.get().sendMessage(this.props.room.roomId, {
- body: "Voice message",
- msgtype: "org.matrix.msc2516.voice",
- url: mxc,
+ "body": "Voice message",
+ "msgtype": "org.matrix.msc2516.voice",
+ //"msgtype": MsgType.Audio,
+ "url": mxc,
+ "info": {
+ duration: Math.round(this.state.recorder.durationSeconds * 1000),
+ mimetype: this.state.recorder.contentType,
+ size: this.state.recorder.contentLength,
+ },
+
+ // MSC1767 experiment
+ "org.matrix.msc1767.text": "Voice message",
+ "org.matrix.msc1767.file": {
+ url: mxc,
+ name: "Voice message.ogg",
+ mimetype: this.state.recorder.contentType,
+ size: this.state.recorder.contentLength,
+ },
+ "org.matrix.msc1767.audio": {
+ duration: Math.round(this.state.recorder.durationSeconds * 1000),
+ // TODO: @@ TravisR: Waveform? (MSC1767 decision)
+ },
+ "org.matrix.experimental.msc2516.voice": { // MSC2516+MSC1767 experiment
+ duration: Math.round(this.state.recorder.durationSeconds * 1000),
+
+ // Events can't have floats, so we try to maintain resolution by using 1024
+ // as a maximum value. The waveform contains values between zero and 1, so this
+ // should come out largely sane.
+ //
+ // We're expecting about one data point per second of audio.
+ waveform: this.state.recorder.finalWaveform.map(v => Math.round(v * 1024)),
+ },
  });
  await VoiceRecordingStore.instance.disposeRecording();
  this.setState({recorder: null});
 
@@ -54,7 +54,7 @@ export function arraySeed<T>(val: T, length: number): T[] {
  * @param a The array to clone. Must be defined.
  * @returns A copy of the array.
  */
-export function arrayFastClone(a: any[]): any[] {
+export function arrayFastClone<T>(a: T[]): T[] {
  return a.slice(0, a.length);
 }
 
 
@@ -0,0 +1,67 @@
+/*
+Copyright 2021 The Matrix.org Foundation C.I.C.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+import {IAmplitudePayload, ITimingPayload, PayloadEvent, WORKLET_NAME} from "./consts";
+import {percentageOf} from "../utils/numbers";
+
+// from AudioWorkletGlobalScope: https://developer.mozilla.org/en-US/docs/Web/API/AudioWorkletGlobalScope
+declare const currentTime: number;
+// declare const currentFrame: number;
+// declare const sampleRate: number;
+
+class MxVoiceWorklet extends AudioWorkletProcessor {
+ private nextAmplitudeSecond = 0;
+
+ process(inputs, outputs, parameters) {
+ // We only fire amplitude updates once a second to avoid flooding the recording instance
+ // with useless data. Much of the data would end up discarded, so we ratelimit ourselves
+ // here.
+ const currentSecond = Math.round(currentTime);
+ if (currentSecond === this.nextAmplitudeSecond) {
+ // We're expecting exactly one mono input source, so just grab the very first frame of
+ // samples for the analysis.
+ const monoChan = inputs[0][0];
+
+ // The amplitude of the frame's samples is effectively the loudness of the frame. This
+ // translates into a bar which can be rendered as part of the whole recording clip's
+ // waveform.
+ //
+ // We translate the amplitude down to 0-1 for sanity's sake.
+ const minVal = Math.min(...monoChan);
+ const maxVal = Math.max(...monoChan);
+ const amplitude = percentageOf(maxVal, -1, 1) - percentageOf(minVal, -1, 1);
+
+ this.port.postMessage(<IAmplitudePayload>{
+ ev: PayloadEvent.AmplitudeMark,
+ amplitude: amplitude,
+ forSecond: currentSecond,
+ });
+ this.nextAmplitudeSecond++;
+ }
+
+ // We mostly use this worklet to fire regular clock updates through to components
+ this.port.postMessage(<ITimingPayload>{ev: PayloadEvent.Timekeep, timeSeconds: currentTime});
+
+ // We're supposed to return false when we're "done" with the audio clip, but seeing as
+ // we are acting as a passive processor we are never truly "done". The browser will clean
+ // us up when it is done with us.
+ return true;
+ }
+}
+
+registerProcessor(WORKLET_NAME, MxVoiceWorklet);
+
+export default null; // to appease module loaders (we never use the export)
@@ -23,6 +23,8 @@ import {clamp} from "../utils/numbers";
 import EventEmitter from "events";
 import {IDestroyable} from "../utils/IDestroyable";
 import {Singleflight} from "../utils/Singleflight";
+import {PayloadEvent, WORKLET_NAME} from "./consts";
+import {arrayFastClone} from "../utils/arrays";
 
 const CHANNELS = 1; // stereo isn't important
 const SAMPLE_RATE = 48000; // 48khz is what WebRTC uses. 12khz is where we lose quality.
@@ -49,16 +51,34 @@ export class VoiceRecording extends EventEmitter implements IDestroyable {
  private recorderSource: MediaStreamAudioSourceNode;
  private recorderStream: MediaStream;
  private recorderFFT: AnalyserNode;
- private recorderProcessor: ScriptProcessorNode;
+ private recorderWorklet: AudioWorkletNode;
  private buffer = new Uint8Array(0);
  private mxc: string;
  private recording = false;
  private observable: SimpleObservable<IRecordingUpdate>;
+ private amplitudes: number[] = []; // at each second mark, generated
 
  public constructor(private client: MatrixClient) {
  super();
  }
 
+ public get finalWaveform(): number[] {
+ return arrayFastClone(this.amplitudes);
+ }
+
+ public get contentType(): string {
+ return "audio/ogg";
+ }
+
+ public get contentLength(): number {
+ return this.buffer.length;
+ }
+
+ public get durationSeconds(): number {
+ if (!this.recorder) throw new Error("Duration not available without a recording");
+ return this.recorderContext.currentTime;
+ }
+
  private async makeRecorder() {
  this.recorderStream = await navigator.mediaDevices.getUserMedia({
  audio: {
@@ -80,18 +100,34 @@ export class VoiceRecording extends EventEmitter implements IDestroyable {
  // it makes the time domain less than helpful.
  this.recorderFFT.fftSize = 64;
 
- // We use an audio processor to get accurate timing information.
- // The size of the audio buffer largely decides how quickly we push timing/waveform data
- // out of this class. Smaller buffers mean we update more frequently as we can't hold as
- // many bytes. Larger buffers mean slower updates. For scale, 1024 gives us about 30Hz of
- // updates and 2048 gives us about 20Hz. We use 1024 to get as close to perceived realtime
- // as possible. Must be a power of 2.
- this.recorderProcessor = this.recorderContext.createScriptProcessor(1024, CHANNELS, CHANNELS);
+ // Set up our worklet. We use this for timing information and waveform analysis: the
+ // web audio API prefers this be done async to avoid holding the main thread with math.
+ const mxRecorderWorkletPath = document.body.dataset.vectorRecorderWorkletScript;
+ if (!mxRecorderWorkletPath) {
+ throw new Error("Unable to create recorder: no worklet script registered");
+ }
+ await this.recorderContext.audioWorklet.addModule(mxRecorderWorkletPath);
+ this.recorderWorklet = new AudioWorkletNode(this.recorderContext, WORKLET_NAME);
 
  // Connect our inputs and outputs
  this.recorderSource.connect(this.recorderFFT);
- this.recorderSource.connect(this.recorderProcessor);
- this.recorderProcessor.connect(this.recorderContext.destination);
+ this.recorderSource.connect(this.recorderWorklet);
+ this.recorderWorklet.connect(this.recorderContext.destination);
+
+ // Dev note: we can't use `addEventListener` for some reason. It just doesn't work.
+ this.recorderWorklet.port.onmessage = (ev) => {
+ switch (ev.data['ev']) {
+ case PayloadEvent.Timekeep:
+ this.processAudioUpdate(ev.data['timeSeconds']);
+ break;
+ case PayloadEvent.AmplitudeMark:
+ // Sanity check to make sure we're adding about one sample per second
+ if (ev.data['forSecond'] === this.amplitudes.length) {
+ this.amplitudes.push(ev.data['amplitude']);
+ }
+ break;
+ }
+ };
 
  this.recorder = new Recorder({
  encoderPath, // magic from webpack
@@ -138,7 +174,7 @@ export class VoiceRecording extends EventEmitter implements IDestroyable {
  return this.mxc;
  }
 
- private processAudioUpdate = (ev: AudioProcessingEvent) => {
+ private processAudioUpdate = (timeSeconds: number) => {
  if (!this.recording) return;
 
  // The time domain is the input to the FFT, which means we use an array of the same
@@ -162,12 +198,12 @@ export class VoiceRecording extends EventEmitter implements IDestroyable {
 
  this.observable.update({
  waveform: translatedData,
- timeSeconds: ev.playbackTime,
+ timeSeconds: timeSeconds,
  });
 
  // Now that we've updated the data/waveform, let's do a time check. We don't want to
  // go horribly over the limit. We also emit a warning state if needed.
- const secondsLeft = TARGET_MAX_LENGTH - ev.playbackTime;
+ const secondsLeft = TARGET_MAX_LENGTH - timeSeconds;
  if (secondsLeft <= 0) {
  // noinspection JSIgnoredPromiseFromCall - we aren't concerned with it overlapping
  this.stop();
@@ -191,7 +227,6 @@ export class VoiceRecording extends EventEmitter implements IDestroyable {
  }
  this.observable = new SimpleObservable<IRecordingUpdate>();
  await this.makeRecorder();
- this.recorderProcessor.addEventListener("audioprocess", this.processAudioUpdate);
  await this.recorder.start();
  this.recording = true;
  this.emit(RecordingState.Started);
@@ -205,6 +240,7 @@ export class VoiceRecording extends EventEmitter implements IDestroyable {
 
  // Disconnect the source early to start shutting down resources
  this.recorderSource.disconnect();
+ this.recorderWorklet.disconnect();
  await this.recorder.stop();
 
  // close the context after the recorder so the recorder doesn't try to
@@ -216,7 +252,6 @@ export class VoiceRecording extends EventEmitter implements IDestroyable {
 
  // Finally do our post-processing and clean up
  this.recording = false;
- this.recorderProcessor.removeEventListener("audioprocess", this.processAudioUpdate);
  await this.recorder.close();
  this.emit(RecordingState.Ended);
 
@@ -240,7 +275,7 @@ export class VoiceRecording extends EventEmitter implements IDestroyable {
 
  this.emit(RecordingState.Uploading);
  this.mxc = await this.client.uploadContent(new Blob([this.buffer], {
- type: "audio/ogg",
+ type: this.contentType,
  }), {
  onlyContentUri: false, // to stop the warnings in the console
  }).then(r => r['content_uri']);
 
@@ -0,0 +1,37 @@
+/*
+Copyright 2021 The Matrix.org Foundation C.I.C.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+export const WORKLET_NAME = "mx-voice-worklet";
+
+export enum PayloadEvent {
+ Timekeep = "timekeep",
+ AmplitudeMark = "amplitude_mark",
+}
+
+export interface IPayload {
+ ev: PayloadEvent;
+}
+
+export interface ITimingPayload extends IPayload {
+ ev: PayloadEvent.Timekeep;
+ timeSeconds: number;
+}
+
+export interface IAmplitudePayload extends IPayload {
+ ev: PayloadEvent.AmplitudeMark;
+ forSecond: number;
+ amplitude: number;
+}
Original file line number	Diff line number	Diff line change
`@@ -54,7 +54,7 @@ export function arraySeed<T>(val: T, length: number): T[] {`
`54`	`54`	`* @param a The array to clone. Must be defined.`
`55`	`55`	`* @returns A copy of the array.`
`56`	`56`	`*/`
`57`		`-export function arrayFastClone(a: any[]): any[] {`
	`57`	`+export function arrayFastClone<T>(a: T[]): T[] {`
`58`	`58`	`return a.slice(0, a.length);`
`59`	`59`	`}`
`60`	`60`