import { RecordRTCPromisesHandler } from 'recordrtc';

import { TranscriptionController, TranscriptionMessage, AudioMetadata } from './types';

const KYRON_BACKEND_WHISPER_ENDPOINT = '/api/v1/whisper/transcribe';

// If the variance of the first audio channel is below this value,
// we will consider it to be white noise and refrain from sending to backend
export const WHITE_NOISE_VARIANCE_THRESHOLD = 0.0001;

// If the user does not record at least this many seconds of audio,
// we will not send it to the backend
export const MINIMUM_AUDIO_DURATION_IN_SECONDS = 1;

export class WhisperTranscriptionController implements TranscriptionController {
  handlePartialTranscript: (message: TranscriptionMessage) => void;

  handleFinalTranscript: (message: TranscriptionMessage) => void;

  handleAudioMetadata: (data: AudioMetadata) => void;

  stream: MediaStream | null = null;

  recorder: RecordRTCPromisesHandler | null = null;

  paused: boolean = false; // If true, don't record audio at this time

  intervalId: number | null = null;

  constructor(
    handlePartialTranscript: (message: TranscriptionMessage) => void,
    handleFinalTranscript: (message: TranscriptionMessage) => void,
    handleAudioMetadata?: (data: AudioMetadata) => void,
  ) {
    this.handlePartialTranscript = handlePartialTranscript;
    this.handleFinalTranscript = handleFinalTranscript;
    this.handleAudioMetadata = handleAudioMetadata || (() => {});
  }

  public async initiate(callback: () => void): Promise<void> {
    this.stream = await navigator.mediaDevices.getUserMedia({ audio: true });
    this.recorder = new RecordRTCPromisesHandler(this.stream, {
      type: 'audio',
    });
    this.recorder.startRecording();

    callback();
  }

  public stop() {
    if (this.recorder) this.recorder.stopRecording();

    if (this.stream) {
      this.stream.getTracks().forEach(track => {
        if (track.readyState === 'live') {
          track.stop();
        }
      });
    }
    this.recorder = null;
    this.stream = null;
  }

  public pause() {
    this.stopMonitoringAudioLevel();
  }

  public async awaitTranscription(): Promise<string> {
    if (!this.recorder) {
      throw new Error('Cannot finish recording: no recorder');
    }

    try {
      await this.recorder!.stopRecording();
      const blob: Blob = await this.recorder!.getBlob();

      if (await this.isTooShort(blob)) {
        throw new Error('Recording is too short');
      }
      if (await this.isWhiteNoise(blob)) {
        throw new Error('Recording is white noise');
      }

      const transcription: string = await this.transcribe(blob);

      // Only need to do this if we want to show the result in the text input before sending
      // this.handleFinalTranscript({ metadata: { start_time: 0, transcript: transcription } });

      await this.recorder!.reset();
      await this.recorder!.startRecording();

      return transcription;
    } catch (error) {
      await this.recorder!.reset();
      await this.recorder!.startRecording();
      throw error;
    }
  }

  public async reset(): Promise<void> {
    if (!this.recorder) {
      throw new Error('Cannot reset recording: no recorder');
    }
    await this.recorder!.reset();
    await this.recorder!.startRecording();
    this.monitorAudioLevel();
  }

  public isWhiteNoise = async (audioBlob: Blob): Promise<boolean> => {
    const audioContext = new window.AudioContext();
    const buffer = await audioBlob.arrayBuffer();
    const audioBuffer = await audioContext.decodeAudioData(buffer);

    const rawData = audioBuffer.getChannelData(0); // Assuming mono audio
    const samples = 1000; // Number of samples to check

    let sum = 0;
    let variance = 0;

    // Take 'samples' number of random samples from the audio data
    for (let i = 0; i < samples; i++) {
      const idx = Math.floor(Math.random() * rawData.length);
      sum += rawData[idx];
    }

    const mean = sum / samples;

    // Calculate variance
    for (let i = 0; i < samples; i++) {
      const idx = Math.floor(Math.random() * rawData.length);
      variance += (rawData[idx] - mean) ** 2;
    }

    variance /= samples;

    // This will be useful for us to tune the threshold
    console.debug(`Variance: ${variance}`);
    console.debug(`Mean: ${mean}`);

    // If variance is below a certain threshold, it might be white noise
    return variance < WHITE_NOISE_VARIANCE_THRESHOLD;
  };

  isTooShort = (audioBlob: Blob): Promise<boolean> =>
    new Promise((resolve, reject) => {
      const audioUrl = URL.createObjectURL(audioBlob);
      const audio = new Audio(audioUrl);

      audio.onloadedmetadata = () => {
        const { duration } = audio;
        URL.revokeObjectURL(audioUrl); // Clean up URL
        resolve(duration < MINIMUM_AUDIO_DURATION_IN_SECONDS);
      };

      audio.onerror = () => {
        URL.revokeObjectURL(audioUrl); // Clean up URL
        reject(new Error('Error loading audio metadata'));
      };
    });

  private monitorAudioLevel() {
    if (!this.stream) return;

    const audioContext = new window.AudioContext();
    const source = audioContext.createMediaStreamSource(this.stream);
    const analyser = audioContext.createAnalyser();
    source.connect(analyser);
    analyser.fftSize = 256;

    const dataArray = new Uint8Array(analyser.frequencyBinCount);
    const startTime = Date.now();

    const updateAudioData = () => {
      analyser.getByteFrequencyData(dataArray);
      const audioLevel = dataArray.reduce((a, b) => a + b) / dataArray.length;
      const durationInSeconds = (Date.now() - startTime) / 1000; // in seconds

      this.handleAudioMetadata({ audioLevel, durationInSeconds });
    };

    this.intervalId = window.setInterval(updateAudioData, 200); // Update every 200ms
  }

  private stopMonitoringAudioLevel() {
    if (this.intervalId) {
      window.clearInterval(this.intervalId);
      this.intervalId = null;
    }
  }

  private transcribe = async (audioBlob: Blob): Promise<string> => {
    const formData = new FormData();
    formData.append('file', audioBlob, 'audio.wav');

    const response = await fetch(KYRON_BACKEND_WHISPER_ENDPOINT, {
      method: 'POST',
      body: formData,
    });
    return response.json().then(data => {
      if (data.text) {
        return data.text;
      }
      throw new Error(`No transcription returned in response: ${JSON.stringify(data)}`);
    });
  };
}
