{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "scrolled": true
   },
   "outputs": [],
   "source": [
    "#!pip install ffmpeg-python\n",
    "#!conda update ffmpeg\n",
    "!conda install -c conda-forge ffmpeg-python -y #Success\n",
    "!conda update ffmpeg -y # Needed for libopenh264.so lib missing issue"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "scrolled": true
   },
   "outputs": [],
   "source": [
    "!pip install audio2numpy # Success"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import ffmpeg\n",
    "import numpy as np\n",
    "from scipy.fft import fft, fftfreq\n",
    "import audio2numpy as a2n\n",
    "from IPython.display import Audio,Image,display\n",
    "import subprocess\n",
    "import re"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "try:\n",
    "    stream = ffmpeg.input('test_video.ts')\n",
    "    out = ffmpeg.output(stream['2'],'output_no16k.mp3',format='mp3')\n",
    "    out.run()\n",
    "except ffmpeg.Error as err:\n",
    "    print(err.stderr)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "try:\n",
    "    out, err = (\n",
    "        ffmpeg\n",
    "        .input('/home/sagemaker-user/test/1.mp4')\n",
    "        .output('tmp.mp3',format='mp3',ar='16000')\n",
    "        .run(capture_stdout=True, capture_stderr=True, overwrite_output=True)\n",
    "    )\n",
    "except ffmpeg.Error as err:\n",
    "    print(err.stderr)\n",
    "    raise"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "def plot_freq(x1, sr):\n",
    "    # Number of sample points\n",
    "    N = len(x1)\n",
    "    # sample spacing\n",
    "    T = 1.0 / sr\n",
    "\n",
    "    #y = np.sin(50.0 * 2.0*np.pi*x) + 0.5*np.sin(80.0 * 2.0*np.pi*x)\n",
    "    y = x1\n",
    "    yf = fft(y)\n",
    "    xf = fftfreq(N, T)[:N//2]\n",
    "    import matplotlib.pyplot as plt\n",
    "    plt.plot(xf, 2.0/N * np.abs(yf[0:N//2]))\n",
    "    plt.ylim(0,0.0012)\n",
    "    plt.grid()\n",
    "    plt.show()\n",
    "    return yf"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "class ffmpegProcessor:    \n",
    "    def extract_audio(self, filename, track=1):\n",
    "        tmp_file = filename.split('.')[-2][-3:]\n",
    "        tmp_file += '_tmp.mp3'\n",
    "        try:\n",
    "            stream = ffmpeg.input(filename)\n",
    "            out, err = (\n",
    "                #.output('-', format='f32le', acodec='pcm_f32le', ac=1, ar='16000')\n",
    "                ffmpeg.output(stream[str(track)],tmp_file,format='mp3',ar='16000')\n",
    "                .run(capture_stdout=True, capture_stderr=True,overwrite_output=True)\n",
    "            )\n",
    "        except ffmpeg.Error as err:\n",
    "            print(err.stderr)\n",
    "            raise\n",
    "        #return np.frombuffer(out, np.float32)\n",
    "        x,sr = a2n.audio_from_file(tmp_file)\n",
    "        x1 = [x2[0] for x2 in x]\n",
    "        x1_norm = x1/np.max(x1)\n",
    "        return np.array(x1_norm), sr\n",
    "    \n",
    "ap = ffmpegProcessor()\n",
    "x_ffmpeg, sr = ap.extract_audio('crowdnoise.mp3',0)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "scrolled": true
   },
   "outputs": [],
   "source": [
    "from scipy.fft import fft, fftfreq\n",
    "import numpy as np\n",
    "\n",
    "def extract_audio(filename, track=1):\n",
    "\n",
    "    tmp_file = filename.split('/')[-1]\n",
    "    tmp_file = tmp_file.split('.')[-2]\n",
    "    tmp_file += f'_track{track}.mp3'\n",
    "    print('Create tmp file',tmp_file)\n",
    "    try:\n",
    "        stream = ffmpeg.input(filename)\n",
    "        out, err = (\n",
    "            ffmpeg.output(stream[str(track)],tmp_file,format='mp3',ar='16000')\n",
    "            .run(capture_stdout=True, capture_stderr=True,overwrite_output=True)\n",
    "        )\n",
    "    except ffmpeg.Error as err:\n",
    "        print(err.stderr.decode('utf-8'))\n",
    "        raise\n",
    "    #return np.frombuffer(out, np.float32)\n",
    "    x,sr = a2n.audio_from_file(tmp_file)\n",
    "    x1 = [x2[0] for x2 in x]\n",
    "    # Normaliz the audio based on peak value\n",
    "    x1_norm = x1/np.max(x1)\n",
    "    return np.array(x1_norm), sr\n",
    "    \n",
    "def fft_power_output(audio_t, sr, beg, end, low_cut,high_cut,plot_f=False):\n",
    "    # Number of sample points\n",
    "    #x = audio_t[]\n",
    "    x = audio_t[beg:end]\n",
    "    N = len(x)\n",
    "    # sample spacing\n",
    "    T = 1.0 / sr\n",
    "    if plot_f:\n",
    "        plt.figure()\n",
    "        plt.title('Audio Signal in Time Domain')\n",
    "        plt.plot(x)\n",
    "        #plt.ylim(-0.5,0.5)\n",
    "    \n",
    "    yf = fft(x)\n",
    "    xf = fftfreq(N, T)[:N//2]\n",
    "    y_fft = np.abs(yf[0:N//2])\n",
    "    if plot_f:\n",
    "        plt.figure()\n",
    "        plt.title('Audio Signal in Freq Domain')\n",
    "        plt.plot(xf, 2.0/N * y_fft)\n",
    "        #plt.ylim(0,0.0012)\n",
    "        plt.grid()\n",
    "    lc = int(low_cut*N//2)\n",
    "    hc = int(-1*high_cut*N//2)\n",
    "    #print(N,low_cut,lc,hc,len(y_fft))\n",
    "    return np.sum(y_fft[:lc]), np.sum(y_fft[hc:])\n",
    "\n",
    "\n",
    "def feature_extraction(media_path, track=1, wsize=5, low_cut=0.1,high_cut=0.1,plot_f=False):\n",
    "    # read in audio file by ffmpeg and convert to 16bit codec\n",
    "    x_ffmpeg, sr = extract_audio(media_path, track)\n",
    "    if plot_f:\n",
    "        plt.title('Over all Audio Signal in Time Domain')\n",
    "        plt.plot(x_ffmpeg)\n",
    "        #plt.ylim(-0.5,0.5)\n",
    "    \n",
    "    nsamples = len(x_ffmpeg)\n",
    "    print(f'Sample rate of the radio is {sr}, total samples {nsamples}')\n",
    "    nw = nsamples//(sr*wsize)\n",
    "    print(f'Total length is {nsamples/sr}s with window size {wsize}s. Num of windows is {nw+1}')\n",
    "    features=[]\n",
    "    for i in range(nw):\n",
    "        beg = i*sr*wsize\n",
    "        end = (i+1)*sr*wsize\n",
    "        print(f'Get FFT features from sample {beg} to {nsamples}')\n",
    "        low, high = fft_power_output(x_ffmpeg, sr, beg, end, low_cut, high_cut, plot_f)\n",
    "        features.append([low,high])\n",
    "\n",
    "    beg = nw*sr*wsize\n",
    "    if (nsamples-beg)/(sr*wsize) > 0.3:\n",
    "        print(f'Get FFT features from sample {beg} to {nsamples}')\n",
    "        low, high = fft_power_output(x_ffmpeg, sr, beg, nsamples, low_cut, high_cut, plot_f)\n",
    "        features.append([low,high])\n",
    "    else:\n",
    "        print(f'Skip last {nsamples-beg} samples, {(nsamples-beg)/sr} sec, from {beg} to {nsamples}')\n",
    "    \n",
    "    \n",
    "    return features"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "scrolled": true
   },
   "outputs": [],
   "source": [
    "def split_audio(filename, track=1, len=5):\n",
    "    totlen = getLength(filename)\n",
    "    nwin = totlen//len\n",
    "    if totlen%len/len > 0.3:\n",
    "        print(totlen%len/len)\n",
    "        nwin +=1\n",
    "    else:\n",
    "        print(f'Skip last {totlen%len} seconds')\n",
    "    print(f'Total have {nwin} windows')\n",
    "    for i in range(nwin):\n",
    "    #tmp_file = filename.split('.')[-2]\n",
    "        tmp_file = 'tmp_'\n",
    "        tmp_file += str(i)\n",
    "        tmp_file += '.mp3'\n",
    "        try:\n",
    "            stream = ffmpeg.input(filename,ss=i*len)\n",
    "            out, err = (\n",
    "                ffmpeg.output(stream[str(track)],tmp_file,t=len,format='mp3',ar='16000')\n",
    "                .run(capture_stdout=True, capture_stderr=True,overwrite_output=True)\n",
    "            )\n",
    "        except ffmpeg.Error as err:\n",
    "            print(err.stderr)\n",
    "            raise\n",
    "        display(Audio('tmp_'+str(i)+'.mp3'))\n",
    "#split_audio('crowdnoise.mp3',0,5)"
   ]
  }
 ],
 "metadata": {
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.10"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}