import KrispSDK, { IAudioFilterNode } from "@krispai/javascript-sdk";
import model16 from "@krispai/javascript-sdk/dist/models/model_16.kw";
import model32 from "@krispai/javascript-sdk/dist/models/model_32.kw";
import model8 from "@krispai/javascript-sdk/dist/models/model_8.kw";
import { dumpMediaStream } from "../../../shared/helpers/dumpMediaStream.js";
import { logger } from "../../../shared/infra/logger.js";
import { ClientContainerInfo } from "../../injection/IClientContainer.js";
import { IRedux } from "../../injection/redux/IRedux.js";
import { PostProcObjectCache } from "../PostProcObjectCache.js";
import {
  DefaultCompressionRatio,
  DefaultCompressionThreshold,
  DefaultPostGain,
} from "../avStreamShared.js";
import { ensureAudioContextClosed } from "../helpers.js";
import { PostProcAudioSpec } from "../interfaces/IAudioPipelineLauncher.js";
import { IPostProcAudioPipeline } from "../interfaces/IPostProcAudioPipeline.js";
import { AudioLevelVoiceDetector } from "./AudioLevelVoiceDetector.js";

interface Injected {
  info(): ClientContainerInfo;
  redux(): IRedux;
}

export type KrispWorkletNode = AudioWorkletNode & IAudioFilterNode;

export interface KrispPostProcAudioCachedObject {
  context: AudioContext;
  noiseNode: KrispWorkletNode;
  cleanup(): void;
}

export class KrispPostProcAudioPipeline implements IPostProcAudioPipeline {
  // These should only be modified inside updateInner
  private context: AudioContext | undefined;
  private source: MediaStreamAudioSourceNode | undefined;
  private noiseNode: KrispWorkletNode | undefined;
  private cleanup: (() => void) | undefined;
  private analyserNode: AnalyserNode | undefined;
  private compressionNode: DynamicsCompressorNode | undefined;
  private postGainNode: GainNode | undefined;
  private destination: MediaStreamAudioDestinationNode | undefined;
  // The ID of the last "raw" stream we were given
  private lastRawStreamID: string | undefined;
  // The clone of that stream that we pipe through krisp
  private VADAudioStream: MediaStream | undefined;

  public readonly voiceDetector = new AudioLevelVoiceDetector();

  private contextNoiseNodePromise:
    | Promise<{ context: AudioContext; noiseNode: KrispWorkletNode; cleanup: () => void }>
    | undefined;
  private contextNoiseNodeTimeout = 5_000;

  public static async createObject(container: Injected): Promise<KrispPostProcAudioCachedObject> {
    let context: AudioContext | undefined;
    let noiseNode: KrispWorkletNode | undefined;
    let sdk: KrispSDK | undefined;
    try {
      sdk = new KrispSDK({
        params: {
          bufferOverflowMS: 200,
          debugLogs: false,
          logProcessStats: false,
          models: {
            model8: new URL(model8, location.href).href,
            model16: new URL(model16, location.href).href,
            model32: new URL(model32, location.href).href,
          },
          useSharedArrayBuffer: false, // TODO: enable this for better performance (requires changing some security headers)
        },
      });
      await sdk.init();

      context = new AudioContext({
        latencyHint: "interactive",
        sampleRate: 48000,
      });
      await context.suspend();
      noiseNode = await sdk.createNoiseFilter(context, () => {
        if (noiseNode) noiseNode.enable();
      });
      const noiseNodeListener = (event: any) => {
        const { bufferSizeMS, isBufferDropped, overflowCount } = event.data;
        // bufferSizeMS - current buffer size in ms.
        // isBufferDropped - indicates if buffer was dropped due to overflow. Max buffer size is 500ms; it will start dropping once it reaches 500ms.
        // overflowCount - count of buffer overflow occurrences after filterNode is enabled.
        logger.warn(
          `Krisp buffer overflow, bufferSizeMS: ${bufferSizeMS}, isBufferDropped: ${isBufferDropped}, overflowCount: ${overflowCount}`
        );
      };
      noiseNode.addEventListener("buffer_overflow", noiseNodeListener);

      return {
        context,
        noiseNode,
        cleanup: () => {
          if (context) {
            ensureAudioContextClosed(context);
          }
          if (noiseNode) {
            noiseNode.removeEventListener("buffer_overflow", noiseNodeListener);
            noiseNode.dispose();
          }
          if (sdk) {
            sdk.dispose();
          }
        },
      };
    } catch (err) {
      if (context) {
        ensureAudioContextClosed(context);
      }
      if (noiseNode) {
        noiseNode.dispose();
      }
      if (sdk) {
        sdk.dispose();
      }
      throw err;
    }
  }

  constructor(
    private container: Injected,
    private postProcObjectCache: PostProcObjectCache<KrispPostProcAudioCachedObject>
  ) {}

  async update(
    spec: PostProcAudioSpec | undefined,
    rawStream: MediaStream | undefined
  ): Promise<MediaStream | undefined> {
    if (rawStream) {
      // Note: This should all run even if noise suppression is not requested; we need the chain of nodes set up to do VAD
      if (!this.context || !this.noiseNode) {
        let result:
          | { context: AudioContext; noiseNode: KrispWorkletNode; cleanup: () => void }
          | undefined;
        try {
          if (!this.contextNoiseNodePromise) {
            this.contextNoiseNodePromise = this.postProcObjectCache.get();
          }
          // Wait for it to finish, but with a timeout
          result = await Promise.race([
            this.contextNoiseNodePromise,
            new Promise<undefined>((resolve) =>
              setTimeout(() => resolve(undefined), this.contextNoiseNodeTimeout)
            ),
          ]);
        } catch (err: any) {
          // Error loading
          // Try to load again on the next call to update()
          this.contextNoiseNodePromise = undefined;
          this.contextNoiseNodeTimeout = 5_000;
          // Re-raise the error
          throw err;
        }
        if (result) {
          // Successfully loaded
          this.contextNoiseNodePromise = undefined;
          this.context = result.context;
          this.noiseNode = result.noiseNode;
          this.cleanup = result.cleanup;
        } else {
          // Timeout
          // Don't wait at all on future calls to update() -- if it isn't ready immediately, we'll just
          // throw an error and let the rest of the pipeline keep going
          this.contextNoiseNodeTimeout = 0;
          throw new Error("Timeout loading AudioContext and NoiseNode");
        }
      }

      // Resume the AudioContext if it was suspended
      if (this.context.state === "suspended") {
        await new Promise((resolve, reject) => {
          // AudioContext.resume() hangs if it is called before there has been a user gesture on the page
          // As a precaution, use a short timeout
          let timeoutId: ReturnType<typeof setTimeout> | undefined = setTimeout(() => {
            timeoutId = undefined;
            reject(new Error("AudioContext.resume took too long"));
          }, 1000);
          if (!this.context) throw new Error("this.context is unexpectedly undefined");
          this.context
            .resume()
            .then(resolve)
            .catch(reject)
            .finally(() => {
              if (timeoutId) {
                clearTimeout(timeoutId);
              }
            });
        });
      }
      // Make sure the AudioContext *is* actually now running
      if (this.context.state !== "running") {
        // If it isn't, then trying to proceed will result in no audio flowing. Throw an error; we'll
        // fall back on not using noise suppression (which is preferable).
        throw new Error(`AudioContext has unexpected state '${this.context.state}'`);
      }

      if (!this.analyserNode) {
        this.analyserNode = new AnalyserNode(this.context);
        this.analyserNode.fftSize = 2048;
        this.noiseNode.connect(this.analyserNode);
      }

      this.voiceDetector.start(this.analyserNode);

      const ratio = spec?.compressionRatio ?? DefaultCompressionRatio;
      const threshold = spec?.compressionThreshold ?? DefaultCompressionThreshold;
      if (this.compressionNode) {
        if (this.compressionNode.ratio.value !== ratio) {
          this.compressionNode.ratio.value = ratio;
        }
        if (this.compressionNode.threshold.value !== threshold) {
          this.compressionNode.threshold.value = threshold;
        }
      } else {
        this.compressionNode = new DynamicsCompressorNode(this.context, {
          attack: 0.3,
          knee: 30,
          ratio,
          release: 0.25,
          threshold,
        });
      }

      const gain = spec?.postGain ?? DefaultPostGain;
      if (this.postGainNode) {
        if (this.postGainNode.gain.value !== gain) {
          this.postGainNode.gain.value = gain;
        }
      } else {
        this.postGainNode = new GainNode(this.context, { gain });
        this.compressionNode.connect(this.postGainNode);
        this.postGainNode.connect(this.noiseNode);
      }

      if (!this.destination || this.destination.stream.getTracks()[0]?.readyState !== "live") {
        // If the captured stream from the destination node has ended (or if we don't have a
        // destination node), get a new destination node
        this.destination?.disconnect();
        this.destination = this.context.createMediaStreamDestination();
        this.noiseNode.connect(this.destination);
      }

      if (!this.source || this.source.mediaStream !== rawStream) {
        // Disconnect the old source node (if any)
        this.source?.disconnect();
        this.source = undefined;
        // Create a new source node and connect it
        this.source = this.context.createMediaStreamSource(rawStream);
        this.source.connect(this.compressionNode);
      }

      // Only actually return the stream if spec is not undefined
      return spec ? this.destination.stream : undefined;
    } else {
      // Suspend the context (if we have one) to relinquish access to the hardware (necessary to avoid
      // audio quality issues with some applications that don't like other things accessing audio)
      await this.context?.suspend();

      this.voiceDetector.stop();

      // Disconnect the source node (if there is one)
      this.source?.disconnect();
      this.source = undefined;
      return undefined;
    }
  }

  public async close() {
    this.voiceDetector.stop();

    this.analyserNode?.disconnect();
    this.analyserNode = undefined;

    if (this.context && this.noiseNode && this.cleanup) {
      // Extract the AudioContext and NoiseNode
      const context = this.context;
      const noiseNode = this.noiseNode;
      const cleanup = this.cleanup;

      this.context = undefined;
      this.noiseNode = undefined;
      this.cleanup = undefined;

      // Suspend the context
      context
        .suspend()
        .then(() => {
          // Once it's been suspended, put it in the cache
          this.postProcObjectCache.put({
            context,
            noiseNode,
            cleanup,
          });
        })
        .catch(() => {
          // Error suspending (?) - just try to clean up the node and the context
          cleanup();
        });
    } else {
      // We either don't have a AudioContext, don't have a NoiseNode, or don't have a cleanup function.
      // If we have a cleanup function, call it.
      this.cleanup?.();
      this.cleanup = undefined;
    }

    if (this.contextNoiseNodePromise) {
      void this.contextNoiseNodePromise
        .then((result) => {
          this.postProcObjectCache.put(result);
        })
        .catch(() => {});
      this.contextNoiseNodePromise = undefined;
    }
  }

  public dump(): any {
    return {
      hasContext: !!this.context,
      lastRawStreamID: this.lastRawStreamID,
      VADAudioStream: this.VADAudioStream ? dumpMediaStream(this.VADAudioStream) : undefined,
      voiceDetector: this.voiceDetector.dump(),
      contextNoiseNodeTimeout: this.contextNoiseNodeTimeout,
    };
  }
}
