move pkg, resources, dbm_lib, to under 1 opendbm directory

2022-09-14 23:53:10 +07:00
parent a1816eb4b5
commit 5a585a7996
46 changed files with 48 additions and 53 deletions
--- a/opendbm/dbm_lib/dbm_features/raw_features/audio/formant_freq.py
+++ b/opendbm/dbm_lib/dbm_features/raw_features/audio/formant_freq.py
@@ -0,0 +1,133 @@
+"""
+file_name: formant_freq
+project_name: DBM
+created: 2020-20-07
+"""
+
+import pandas as pd
+import parselmouth
+import numpy as np
+import parselmouth
+import librosa
+import glob
+from os.path import join
+import logging
+
+from opendbm.dbm_lib.dbm_features.raw_features.util import util as ut
+
+logging.basicConfig(level=logging.INFO)
+logger=logging.getLogger()
+
+formant_dir = 'acoustic/formant_freq'
+csv_ext = '_formant.csv'
+error_txt = 'error: length less than 0.064'
+
+def formant_list(formant,snd):
+    """
+    Getting formant frequency per second
+    Args:
+        formant: Formant object for sound wave
+        snd: Parselmouth sound object
+    Returns:
+        List of first through fourth formant for each frame
+    """
+    f1_list = []
+    f2_list = []
+    f3_list = []
+    f4_list = []
+    
+    dur = snd.duration-0.02
+    dur_round = round(dur, 2)
+    
+    time_list = np.arange(0.001, dur_round, 0.001)
+    for time in time_list:
+        
+        f1 = formant.get_value_at_time(1,time)
+        f2 = formant.get_value_at_time(2,time)
+        f3 = formant.get_value_at_time(3,time)
+        f4 = formant.get_value_at_time(4,time)
+        
+        f1_list.append(f1)
+        f2_list.append(f2)
+        f3_list.append(f3)
+        f4_list.append(f4)
+    return f1_list,f2_list,f3_list,f4_list
+
+def formant_score(path):
+    """
+    Using parselmouth library fetching Formant Frequency
+    Args:
+        path: (.wav) audio file location
+    Returns:
+        (list) list of Formant freq for each voice frame
+    """
+    sound_pat = parselmouth.Sound(path)
+    formant = sound_pat.to_formant_burg(time_step=.001)
+    f_score = formant_list(formant,sound_pat)
+    return f_score
+
+def calc_formant(video_uri, audio_file, out_loc, fl_name, r_config):
+    """
+    Preparing Formant freq matrix
+    Args:
+        audio_file: (.wav) parsed audio file; fl_name: input file name
+        out_loc: (str) Output directory; r_config: raw variable config
+    """
+    
+    f1_list,f2_list,f3_list,f4_list = formant_score(audio_file)
+    df_formant = pd.DataFrame(f1_list, columns=[r_config.aco_fm1])
+    
+    df_formant[r_config.aco_fm2] = f2_list
+    df_formant[r_config.aco_fm3] = f3_list
+    df_formant[r_config.aco_fm4] = f4_list
+    
+    df_formant.replace('', np.nan, regex=True,inplace=True)
+    df_formant[r_config.err_reason] = 'Pass'# will replace with threshold in future release
+    
+    df_formant['Frames'] = df_formant.index
+    df_formant['dbm_master_url'] = video_uri
+    
+    logger.info('Saving Output file {} '.format(out_loc))
+    ut.save_output(df_formant, out_loc, fl_name, formant_dir, csv_ext)
+    
+def empty_fm(video_uri, out_loc, fl_name, r_config):
+    
+    """
+    Preparing empty formant frequency matrix if something fails
+    """
+    cols = ['Frames', r_config.aco_fm1, r_config.aco_fm2, r_config.aco_fm3, r_config.aco_fm4, r_config.err_reason]
+    out_val = [[np.nan, np.nan, np.nan, np.nan, np.nan, error_txt]]
+    df_fm = pd.DataFrame(out_val, columns = cols)
+    df_fm['dbm_master_url'] = video_uri
+    
+    logger.info('Saving Output file {} '.format(out_loc))
+    ut.save_output(df_fm, out_loc, fl_name, formant_dir, csv_ext)
+
+def run_formant(video_uri, out_dir, r_config):
+    
+    """
+    Processing all patient's for fetching Formant freq
+    ---------------
+    ---------------
+    Args:
+        video_uri: video path; r_config: raw variable config object
+        out_dir: (str) Output directory for processed output
+    """
+    try:
+        
+        input_loc, out_loc, fl_name = ut.filter_path(video_uri, out_dir)
+        aud_filter = glob.glob(join(input_loc, fl_name + '.wav'))
+        if len(aud_filter)>0:
+
+            audio_file = aud_filter[0]
+            aud_dur = librosa.get_duration(filename=audio_file)
+
+            if float(aud_dur) < 0.064:
+                logger.info('Output file {} size is less than 0.064sec'.format(audio_file))
+
+                empty_fm(video_uri, out_loc, fl_name, r_config)
+                return
+
+            calc_formant(video_uri, audio_file, out_loc, fl_name, r_config)
+    except Exception as e:
+        logger.error('Failed to process audio file')
--- a/opendbm/dbm_lib/dbm_features/raw_features/audio/gne.py
+++ b/opendbm/dbm_lib/dbm_features/raw_features/audio/gne.py
@@ -0,0 +1,161 @@
+"""
+file_name: gne
+project_name: DBM
+created: 2020-20-07
+"""
+
+import pandas as pd
+import numpy as np
+import os
+import glob
+import parselmouth
+import librosa
+import more_itertools as mit
+from os.path import join
+import logging
+
+from opendbm.dbm_lib.dbm_features.raw_features.util import util as ut
+
+logging.basicConfig(level=logging.INFO)
+logger=logging.getLogger()
+
+gne_dir = 'acoustic/glottal_noise'
+ff_dir =  'acoustic/pitch'
+csv_ext = '_gne.csv'
+
+def gne_ratio(sound):
+    """
+    Using parselmouth library fetching glottal noise excitation ratio
+    Args:
+        sound: parselmouth object
+    Returns:
+        (list) list of gne ratio for each voice frame
+    """
+    harmonicity_gne = sound.to_harmonicity_gne()
+    gne_all_bands = harmonicity_gne.values
+    gne_all_bands = np.where(gne_all_bands==-200, np.NaN, gne_all_bands)
+    
+    gne = np.nanmax(gne_all_bands) # following http://www.fon.hum.uva.nl/rob/NKI_TEVA/TEVA/HTML/NKI_TEVA.pdf
+    return gne
+
+def empty_gne(video_uri, out_loc, fl_name, r_config, error_txt):
+    """
+    Preparing empty GNE matrix if something fails
+    """
+    cols = ['Frames', r_config.aco_gne, r_config.err_reason]
+    out_val = [[np.nan, np.nan, error_txt]]
+    
+    df_gne = pd.DataFrame(out_val, columns = cols)
+    df_gne['dbm_master_url'] = video_uri
+    
+    logger.info('Saving Output file {} '.format(out_loc))
+    ut.save_output(df_gne, out_loc, fl_name, gne_dir, csv_ext)
+    
+def segment_pitch(dir_path, r_config):
+    """
+    segmenting pitch freq for each voice segment
+    """
+    com_speech_sort, voiced_yes, voiced_no  = ([], ) * 3
+    for file in os.listdir(dir_path):
+        try:
+            
+            if file.endswith('_pitch.csv'):
+                
+                ff_df = pd.read_csv((dir_path+'/'+file))
+                voice_label = ff_df[r_config.aco_voiceLabel]
+                
+                indices_yes = [i for i, x in enumerate(voice_label) if x == "yes"]
+                voiced_yes = [list(group) for group in mit.consecutive_groups(indices_yes)]
+                
+                indices_no = [i for i, x in enumerate(voice_label) if x == "no"]
+                voiced_no = [list(group) for group in mit.consecutive_groups(indices_no)]
+                
+                com_speech = voiced_yes + voiced_no
+                com_speech_sort = sorted(com_speech, key=lambda x: x[0])
+        except:
+            pass
+        
+    return com_speech_sort, voiced_yes, voiced_no
+
+def segment_gne(com_speech_sort, voiced_yes, voiced_no, gne_all_frames, audio_file):
+    """
+    calculating gne for each voice segment
+    """
+    snd = parselmouth.Sound(audio_file)
+    pitch = snd.to_pitch(time_step=.001)
+    
+    for idx, vs in enumerate(com_speech_sort):
+        try:
+            
+            max_gne = np.NaN
+            if vs in voiced_yes and len(vs)>1:
+                
+                start_time = pitch.get_time_from_frame_number(vs[0])
+                end_time = pitch.get_time_from_frame_number(vs[-1])
+
+                snd_start = int(snd.get_frame_number_from_time(start_time))
+                snd_end = int(snd.get_frame_number_from_time(end_time))
+
+                samples = parselmouth.Sound(snd.as_array()[0][snd_start:snd_end])
+                max_gne = gne_ratio(samples)
+        except:
+            pass
+
+        gne_all_frames[idx] = max_gne
+    return gne_all_frames
+    
+def calc_gne(video_uri, audio_file, out_loc, fl_name, r_config):
+    """
+    Preparing gne matrix
+    Args:
+        audio_file: (.wav) parsed audio file
+        out_loc: (str) Output directory for csv's
+    """
+    dir_path = os.path.join(out_loc, ff_dir)
+    if os.path.isdir(dir_path):
+        voice_seg = segment_pitch(dir_path, r_config)
+        
+        gne_all_frames = [np.NaN] * len(voice_seg[0])
+        gne_segment_frames = segment_gne(voice_seg[0], voice_seg[1], voice_seg[2], gne_all_frames, audio_file)
+        
+        df_gne = pd.DataFrame(gne_segment_frames, columns=[r_config.aco_gne])
+        df_gne[r_config.err_reason] = 'Pass'# will replace with threshold in future release
+        
+        df_gne['Frames'] = df_gne.index
+        df_gne['dbm_master_url'] = video_uri
+        
+        logger.info('Processing Output file {} '.format(out_loc))
+        ut.save_output(df_gne, out_loc, fl_name, gne_dir, csv_ext)
+        
+    else:
+        error_txt = 'error: pitch freq not available'
+        empty_gne(video_uri, out_loc, fl_name, r_config, error_txt)
+
+def run_gne(video_uri, out_dir, r_config):
+    """
+    Processing all patient's for fetching glottal noise ratio
+    ---------------
+    ---------------
+    Args:
+        video_uri: video path; r_config: raw variable config object
+        out_dir: (str) Output directory for processed output
+    """
+    try:
+        
+        input_loc, out_loc, fl_name = ut.filter_path(video_uri, out_dir)
+        aud_filter = glob.glob(join(input_loc, fl_name + '.wav'))
+        if len(aud_filter)>0:
+
+            audio_file = aud_filter[0]
+            aud_dur = librosa.get_duration(filename=audio_file)
+
+            if float(aud_dur) < 0.064:
+                logger.info('Output file {} size is less than 0.064sec'.format(audio_file))
+
+                error_txt = 'error: length less than 0.064'
+                empty_gne(video_uri, out_loc, fl_name, r_config, error_txt)
+                return
+
+            calc_gne(video_uri, audio_file, out_loc, fl_name, r_config)
+    except Exception as e:
+        logger.error('Failed to process audio file')
--- a/opendbm/dbm_lib/dbm_features/raw_features/audio/hnr.py
+++ b/opendbm/dbm_lib/dbm_features/raw_features/audio/hnr.py
@@ -0,0 +1,96 @@
+"""
+file_name: hnr
+project_name: DBM
+created: 2020-20-07
+"""
+
+import pandas as pd
+import numpy as np
+import os
+import glob
+import parselmouth
+import librosa
+from os.path import join
+import logging
+
+from opendbm.dbm_lib.dbm_features.raw_features.util import util as ut
+
+logging.basicConfig(level=logging.INFO)
+logger=logging.getLogger()
+
+hnr_dir = 'acoustic/harmonic_noise'
+csv_ext = '_hnr.csv'
+error_txt = 'error: length less than 0.064'
+
+def hnr_ratio(filepath):
+    """
+    Using parselmouth library fetching harmonic noise ratio ratio
+    Args:
+        path: (.wav) audio file location
+    Returns:
+        (list) list of hnr ratio for each voice frame, min,max and mean hnr
+    """
+    sound = parselmouth.Sound(filepath)
+    harmonicity = sound.to_harmonicity_ac(time_step=.001)
+    
+    hnr_all_frames = harmonicity.values#[harmonicity.values != -200] nan it (****)
+    hnr_all_frames = np.where(hnr_all_frames==-200, np.NaN, hnr_all_frames)
+    return hnr_all_frames.transpose()
+
+def calc_hnr(video_uri, audio_file, out_loc, fl_name, r_config):
+    """
+    Preparing harmonic noise matrix
+    Args:
+        audio_file: (.wav) parsed audio file
+        out_loc: (str) Output directory for csv's
+    """
+    
+    hnr_all_frames = hnr_ratio(audio_file)
+    df_hnr = pd.DataFrame(hnr_all_frames, columns=[r_config.aco_hnr])
+    
+    df_hnr['Frames'] = df_hnr.index
+    df_hnr['dbm_master_url'] = video_uri
+    df_hnr[r_config.err_reason] = 'Pass'# will replace with threshold in future release
+    
+    logger.info('Saving Output file {} '.format(out_loc))
+    ut.save_output(df_hnr, out_loc, fl_name, hnr_dir, csv_ext)
+    
+def empty_hnr(video_uri, out_loc, fl_name, r_config):
+    """
+    Preparing empty HNR matrix if something fails
+    """
+    cols = ['Frames', r_config.aco_hnr, r_config.err_reason]
+    out_val = [[np.nan, np.nan, error_txt]]
+    df_hnr = pd.DataFrame(out_val, columns = cols)
+    df_hnr['dbm_master_url'] = video_uri
+    
+    logger.info('Saving Output file {} '.format(out_loc))
+    ut.save_output(df_hnr, out_loc, fl_name, hnr_dir, csv_ext)
+
+def run_hnr(video_uri, out_dir, r_config):
+    """
+    Processing all patient's for fetching harmonic noise ratio
+    -------------------
+    -------------------
+    Args:
+        video_uri: video path; r_config: raw variable config object
+        out_dir: (str) Output directory for processed output
+    """
+    try:
+        
+        input_loc, out_loc, fl_name = ut.filter_path(video_uri, out_dir)
+        aud_filter = glob.glob(join(input_loc, fl_name + '.wav'))
+        if len(aud_filter)>0:
+
+            audio_file = aud_filter[0]
+            aud_dur = librosa.get_duration(filename=audio_file)
+
+            if float(aud_dur) < 0.064:
+                logger.info('Output file {} size is less than 0.064sec'.format(audio_file))
+
+                empty_hnr(video_uri, out_loc, fl_name, r_config)
+                return
+
+            calc_hnr(video_uri, audio_file, out_loc, fl_name, r_config)
+    except Exception as e:
+        logger.error('Failed to process audio file')
--- a/opendbm/dbm_lib/dbm_features/raw_features/audio/intensity.py
+++ b/opendbm/dbm_lib/dbm_features/raw_features/audio/intensity.py
@@ -0,0 +1,92 @@
+"""
+file_name: intensity
+project_name: DBM
+created: 2020-20-07
+"""
+
+import pandas as pd
+import numpy as np
+import glob
+import parselmouth
+import librosa
+from os.path import join
+import logging
+
+from opendbm.dbm_lib.dbm_features.raw_features.util import util as ut
+
+logging.basicConfig(level=logging.INFO)
+logger=logging.getLogger()
+
+intensity_dir = 'acoustic/intensity'
+csv_ext = '_intensity.csv'
+error_txt = 'error: length less than 0.064'
+
+def intensity_score(path):
+    """
+    Using parselmouth library fetching Intensity
+    Args:
+        path: (.wav) audio file location
+    Returns:
+        (list) list of Intensity for each voice frame
+    """
+    sound_pat = parselmouth.Sound(path)
+    intensity = sound_pat.to_intensity(time_step=.001)
+    return intensity.values[0]
+
+def calc_intensity(video_uri, audio_file, out_loc, fl_name, r_config):
+    """
+    Preparing Intensity matrix
+    Args:
+        audio_file: (.wav) parsed audio file
+        out_loc: (str) Output directory for csv's
+    """
+    
+    intensity_frames = intensity_score(audio_file)
+    df_intensity = pd.DataFrame(intensity_frames, columns=[r_config.aco_int])
+    
+    df_intensity['Frames'] = df_intensity.index
+    df_intensity['dbm_master_url'] = video_uri
+    df_intensity[r_config.err_reason] = 'Pass'# will replace with threshold in future release
+    
+    logger.info('Saving Output file {} '.format(out_loc))
+    ut.save_output(df_intensity, out_loc, fl_name, intensity_dir, csv_ext)
+    
+def empty_intensity(video_uri, out_loc, fl_name, r_config):
+    """
+    Preparing empty Intensity matrix if something fails
+    """
+    cols = ['Frames', r_config.aco_int, r_config.err_reason]
+    out_val = [[np.nan, np.nan, error_txt]]
+    df_int = pd.DataFrame(out_val, columns = cols)
+    df_int['dbm_master_url'] = video_uri
+    
+    logger.info('Saving Output file {} '.format(out_loc))
+    ut.save_output(df_int, out_loc, fl_name, intensity_dir, csv_ext)
+
+def run_intensity(video_uri, out_dir, r_config):
+    """
+    Processing all patient's for fetching Intensity
+    -------------------
+    -------------------
+    Args:
+        video_uri: video path; r_config: raw variable config object
+        out_dir: (str) Output directory for processed output
+    """
+    try:
+        
+        input_loc, out_loc, fl_name = ut.filter_path(video_uri, out_dir)
+        aud_filter = glob.glob(join(input_loc, fl_name + '.wav'))
+        if len(aud_filter)>0:
+
+            audio_file = aud_filter[0]
+            aud_dur = librosa.get_duration(filename=audio_file)
+
+            if float(aud_dur) < 0.064:
+                logger.info('Output file {} size is less than 0.064sec'.format(audio_file))
+
+                empty_intensity(video_uri, out_loc, fl_name, r_config)
+                return
+
+            calc_intensity(video_uri, audio_file, out_loc, fl_name, r_config)
+    except Exception as e:
+        logger.error('Failed to process audio file')
--- a/opendbm/dbm_lib/dbm_features/raw_features/audio/jitter.py
+++ b/opendbm/dbm_lib/dbm_features/raw_features/audio/jitter.py
@@ -0,0 +1,159 @@
+"""
+file_name: jitter_processing
+project_name: DBM
+created: 2020-20-07
+"""
+
+import pandas as pd
+import numpy as np
+import os
+import glob
+import parselmouth
+import librosa
+import numpy as np
+import more_itertools as mit
+from os.path import join
+import logging
+
+from opendbm.dbm_lib.dbm_features.raw_features.util import util as ut
+
+logging.basicConfig(level=logging.INFO)
+logger=logging.getLogger()
+
+jitter_dir = 'acoustic/jitter'
+ff_dir =  'acoustic/pitch'
+csv_ext = '_jitter.csv'
+
+def audio_jitter(sound):
+    """
+    Using parselmouth library fetching jitter
+    Args:
+        sound: parselmouth object
+    Returns:
+        (list) list of jitters for each voice frame
+    """
+    pointProcess = parselmouth.praat.call(sound, "To PointProcess (periodic, cc)...", 80, 500)
+    jitter = parselmouth.praat.call(pointProcess, "Get jitter (local)", 0, 0, 0.0001, 0.02, 1.3)
+    return jitter
+
+def empty_jitter(video_uri, out_loc, fl_name, r_config, error_txt):
+    """
+    Preparing empty jitter matrix if something fails
+    """
+    cols = ['Frames', r_config.aco_jitter, r_config.err_reason]
+    out_val = [[np.nan, np.nan, error_txt]]
+    df_jitter = pd.DataFrame(out_val, columns = cols)
+    df_jitter['dbm_master_url'] = video_uri
+    
+    logger.info('Saving Output file {} '.format(out_loc))
+    ut.save_output(df_jitter, out_loc, fl_name, jitter_dir, csv_ext)
+    
+def segment_pitch(dir_path, r_config):
+    """
+    segmenting pitch freq for each voice segment
+    """
+    com_speech_sort, voiced_yes, voiced_no  = ([], ) * 3
+    for file in os.listdir(dir_path):
+        try:
+            
+            if file.endswith('_pitch.csv'):
+                
+                ff_df = pd.read_csv((dir_path+'/'+file))
+                voice_label = ff_df[r_config.aco_voiceLabel]
+                
+                indices_yes = [i for i, x in enumerate(voice_label) if x == "yes"]
+                voiced_yes = [list(group) for group in mit.consecutive_groups(indices_yes)]
+                
+                indices_no = [i for i, x in enumerate(voice_label) if x == "no"]
+                voiced_no = [list(group) for group in mit.consecutive_groups(indices_no)]
+                
+                com_speech = voiced_yes + voiced_no
+                com_speech_sort = sorted(com_speech, key=lambda x: x[0])
+        except:
+            pass
+        
+    return com_speech_sort, voiced_yes, voiced_no
+
+def segment_jitter(com_speech_sort, voiced_yes, voiced_no, jitter_frames, audio_file):
+    """
+    calculating jitter for each voice segment
+    """
+    snd = parselmouth.Sound(audio_file)
+    pitch = snd.to_pitch(time_step=.001)
+    
+    for idx, vs in enumerate(com_speech_sort):
+        try:
+            
+            jitter = np.NaN
+            if vs in voiced_yes and len(vs)>1:
+                
+                start_time = pitch.get_time_from_frame_number(vs[0])
+                end_time = pitch.get_time_from_frame_number(vs[-1])
+
+                snd_start = int(snd.get_frame_number_from_time(start_time))
+                snd_end = int(snd.get_frame_number_from_time(end_time))
+
+                samples = parselmouth.Sound(snd.as_array()[0][snd_start:snd_end])
+                jitter = audio_jitter(samples)
+        except:
+            pass
+
+        jitter_frames[idx] = jitter
+    return jitter_frames
+    
+def calc_jitter(video_uri, audio_file, out_loc, fl_name, r_config):
+    """
+    Preparing jitter matrix
+    Args:
+        audio_file: (.wav) parsed audio file
+        out_loc: (str) Output directory for csv
+        r_config: config.config_raw_feature.pyConfigFeatureNmReader object
+    """
+    dir_path = os.path.join(out_loc, ff_dir)
+    if os.path.isdir(dir_path):
+        voice_seg = segment_pitch(dir_path, r_config)
+        
+        jitter_frames = [np.NaN] * len(voice_seg[0])
+        jitter_segment_frames = segment_jitter(voice_seg[0], voice_seg[1], voice_seg[2], jitter_frames, audio_file)
+        
+        df_jitter = pd.DataFrame(jitter_segment_frames, columns=[r_config.aco_jitter])
+        df_jitter[r_config.err_reason] = 'Pass'# will replace with threshold in future release
+        
+        df_jitter['Frames'] = df_jitter.index
+        df_jitter['dbm_master_url'] = video_uri
+        
+        logger.info('Processing Output file {} '.format(out_loc))
+        ut.save_output(df_jitter, out_loc, fl_name, jitter_dir, csv_ext)
+        
+    else:
+        error_txt = 'error: fundamental freq not available'
+        empty_jitter(video_uri, out_loc, fl_name, r_config, error_txt)
+    
+def run_jitter(video_uri, out_dir, r_config):
+    """
+    Processing all patient's videos for fetching jitter
+    -------------------
+    -------------------
+    Args:
+        video_uri: video path; r_config: raw variable config object
+        out_dir: (str) Output directory for processed output 
+    """
+    try:
+        
+        input_loc, out_loc, fl_name = ut.filter_path(video_uri, out_dir)
+        aud_filter = glob.glob(join(input_loc, fl_name + '.wav'))
+        if len(aud_filter)>0:
+
+            audio_file = aud_filter[0]
+            aud_dur = librosa.get_duration(filename=audio_file)
+
+            if float(aud_dur) < 0.064:
+                logger.info('Output file {} size is less than 0.064sec'.format(audio_file))
+
+                error_txt = 'error: length less than 0.064'
+                empty_jitter(video_uri, out_loc, fl_name, r_config, error_txt)
+                return
+
+            calc_jitter(video_uri, audio_file, out_loc, fl_name, r_config)
+    except Exception as e:
+        logger.error('Failed to process audio file')
--- a/opendbm/dbm_lib/dbm_features/raw_features/audio/mfcc.py
+++ b/opendbm/dbm_lib/dbm_features/raw_features/audio/mfcc.py
@@ -0,0 +1,105 @@
+"""
+file_name: mfcc
+project_name: DBM
+created: 2020-20-07
+"""
+
+import pandas as pd
+import os
+import glob
+import parselmouth
+import librosa
+import numpy as np
+import librosa
+from os.path import join
+import logging
+
+from opendbm.dbm_lib.dbm_features.raw_features.util import util as ut
+
+logging.basicConfig(level=logging.INFO)
+logger=logging.getLogger()
+
+mfcc_dir = 'acoustic/mfcc'
+csv_ext = '_mfcc.csv'
+error_txt = 'error: length less than 0.064'
+
+def empty_mfcc(video_uri, out_loc, fl_name, r_config):
+    
+    """
+    Preparing empty empty_mfcc matrix if something fails
+    """
+    cols = ['Frames', r_config.aco_mfcc1, r_config.aco_mfcc2, r_config.aco_mfcc3, r_config.aco_mfcc4, r_config.aco_mfcc5,
+            r_config.aco_mfcc6, r_config.aco_mfcc7, r_config.aco_mfcc8, r_config.aco_mfcc9, r_config.aco_mfcc10, 
+            r_config.aco_mfcc11, r_config.aco_mfcc12, r_config.err_reason]
+    out_val = [[np.nan, np.nan, np.nan, np.nan, np.nan, np.nan, np.nan, np.nan, np.nan, np.nan, np.nan, np.nan, np.nan, 
+                error_txt]]
+    df_mfcc = pd.DataFrame(out_val, columns = cols)
+    df_mfcc['dbm_master_url'] = video_uri
+    
+    logger.info('Saving Output file {} '.format(out_loc))
+    ut.save_output(df_mfcc, out_loc, fl_name, mfcc_dir, csv_ext)
+
+def audio_mfcc(path):
+    """
+    Using parselmouth library fetching mfccs
+    Args:
+        path: (.wav) audio file location
+    Returns:
+        (list) list of mfccs for each voice frame
+    """
+    sound = parselmouth.Sound(path)
+    mfcc_object = sound.to_mfcc(time_step=.001,number_of_coefficients=12)
+    mfccs = mfcc_object.to_array()
+    mfccs = np.delete(mfccs, (0), axis=0)
+    return mfccs
+    
+def calc_mfcc(video_uri, audio_file, out_loc, fl_name, r_config):
+    """
+    Preparing mfcc matrix
+    Args:
+        audio_file: (.wav) parsed audio file
+        out_loc: output location to save csv
+        fl_name: (str) name of audio file
+        r_config: config.config_raw_feature.pyConfigFeatureNmReader object
+    """
+    dict_ = {}
+    mfccs = audio_mfcc(audio_file)
+    
+    for i in range(1,13):
+        conf_str = r_config.base_raw['raw_feature']
+        dict_[conf_str['aco_mfcc' + str(i)]] = mfccs[i-1, :]
+        
+    df = pd.DataFrame(dict_)
+    df['Frames'] = df.index
+    
+    df[r_config.err_reason] = 'Pass'# may replace based on threshold in future release
+    df['dbm_master_url'] = video_uri
+    
+    ut.save_output(df, out_loc, fl_name, mfcc_dir, csv_ext)
+    
+def run_mfcc(video_uri, out_dir, r_config):
+    """
+    Processing all patients to fetch mfccs
+    
+    Args:
+        video_uri: video path; r_config: raw variable config object
+        out_dir: (str) Output directory for processed output 
+    """
+    try:
+        
+        input_loc, out_loc, fl_name = ut.filter_path(video_uri, out_dir)
+        aud_filter = glob.glob(join(input_loc, fl_name + '.wav'))
+        if len(aud_filter)>0:
+
+            audio_file = aud_filter[0]
+            aud_dur = librosa.get_duration(filename=audio_file)
+
+            if float(aud_dur) < 0.064:
+                logger.info('Output file {} size is less than 0.064sec'.format(audio_file))
+
+                empty_mfcc(video_uri, out_loc, fl_name, r_config)
+                return
+
+            calc_mfcc(video_uri, audio_file, out_loc, fl_name, r_config) 
+    except Exception as e:
+        logger.error('Failed to process audio file')
--- a/opendbm/dbm_lib/dbm_features/raw_features/audio/pause_segment.py
+++ b/opendbm/dbm_lib/dbm_features/raw_features/audio/pause_segment.py
@@ -0,0 +1,171 @@
+"""
+file_name: pause_segment
+project_name: DBM
+created: 2020-20-07
+"""
+
+import os
+import glob
+from pydub import AudioSegment
+import librosa
+import pandas as pd
+import numpy as np
+import webrtcvad
+from os.path import join
+import logging
+
+from opendbm.dbm_lib.dbm_features.raw_features.util import vad_utilities as vu, util as ut
+
+logging.basicConfig(level=logging.INFO)
+logger=logging.getLogger()
+
+pause_seg_dir = 'acoustic/pause_segment'
+csv_ext = '_pausechar.csv'
+
+def get_timing_cues(seg_starts_sec, seg_ends_sec, r_config):
+    """
+    Get timing cues from segmented speech
+    Args:
+        seg_starts_sec: Audio segment start time in seconds
+        seg_ends_sec: Audio segment end time in seconds
+    Returns:
+        Dictionary with pause features
+    """
+    total_time = seg_ends_sec[-1] - seg_starts_sec[0]
+    speaking_time = np.sum(np.asarray(seg_ends_sec) - np.asarray(seg_starts_sec))
+    num_pauses = len(seg_starts_sec) - 1
+    pause_len = np.zeros(num_pauses)
+    
+    for p in range(num_pauses):
+        pause_len[p] = seg_starts_sec[p+1] - seg_ends_sec[p]
+        
+    if len(pause_len)>0:
+        pause_len_mean = np.mean(pause_len)
+        pause_len_std = np.std(pause_len)
+        pause_time = np.sum(pause_len)
+        
+    else:
+        pause_len_mean = 0
+        pause_len_std = 0
+        pause_time = 0
+        
+    pause_frac = pause_time / total_time
+    timing_dict = {r_config.aco_totaltime: total_time, r_config.aco_speakingtime: speaking_time,
+                   r_config.aco_numpauses: num_pauses, r_config.aco_pausetime: pause_time, r_config.aco_pausefrac: pause_frac}
+    return timing_dict
+
+def process_silence(audio_file, r_config):
+    """
+    Returns dataframe for pause between words using voice activity detection
+    Args:
+        audio_file: Audio file location
+    Returns:
+        Dataframe value
+    """
+    feat_dict_list = []
+    y, sr = vu.read_wave(audio_file)
+    
+    # 3 is most aggressive (splits most), 0 least (better for low snr)
+    aggressiveness = 3
+    frame_dur_ms = 20
+    
+    #pause segment(long & short pad) 
+    long_pad_around_voice_ms = 200
+    short_pad_around_voice_ms = 100
+    
+    if len(y)>0:
+        vad = webrtcvad.Vad(aggressiveness)
+        
+        frames = vu.frame_generator(frame_dur_ms, y, sr)
+        frames = list(frames)
+        
+        #longer pad time screens out little blips, but misses short silences
+        long_seg_starts, long_seg_ends = vu.vad_get_segment_times(sr, frame_dur_ms, long_pad_around_voice_ms, vad, frames)
+        
+        #Logic to handle blank audio file
+        if len(long_seg_starts) == 0 or len(long_seg_ends) == 0:
+            return ''
+        
+        t_start = long_seg_starts[0]
+        t_end = long_seg_ends[-1]
+        # shorter pad time captures short silences (but misfires on little blips)
+        short_seg_starts, short_seg_ends = vu.vad_get_segment_times(sr, frame_dur_ms, short_pad_around_voice_ms, vad, frames)
+        
+        seg_starts = []
+        seg_ends = []
+        for k in range(len(short_seg_starts)):  # logic to clean up some typical misfires
+            if (short_seg_starts[k] >=t_start) and (short_seg_starts[k] <= t_end):
+                
+                seg_starts.append(short_seg_starts[k])
+                seg_ends.append(short_seg_ends[k])
+        if len(seg_starts) == 0 or len(seg_ends) == 0:
+            return ''
+        
+        timing_dict = get_timing_cues(seg_starts, seg_ends, r_config)
+        feat_dict_list.append(timing_dict)
+        
+    df = pd.DataFrame(feat_dict_list)
+    df[r_config.err_reason] = 'Pass'# will replace with threshold in future release
+    return df
+
+def empty_pause_segment(video_uri, out_loc, fl_name, r_config, error_txt):
+    """
+    Preparing empty Pause Segment matrix if something fails
+    """
+    cols = [r_config.aco_totaltime, r_config.aco_speakingtime, r_config.aco_numpauses, r_config.aco_pausetime,
+            r_config.aco_pausefrac, r_config.err_reason]
+    out_val = [[np.nan, np.nan, np.nan, np.nan, np.nan, error_txt]]
+    df_pause = pd.DataFrame(out_val, columns = cols)
+    df_pause['dbm_master_url'] = video_uri
+    
+    logger.info('Saving Output file {} '.format(out_loc))
+    ut.save_output(df_pause, out_loc, fl_name, pause_seg_dir, csv_ext)
+
+def run_pause_segment(video_uri, out_dir, r_config):
+    """
+    Processing all patient's for getting Pause Segment
+    ---------------
+    ---------------
+    Args:
+        video_uri: video path; r_config: raw variable config object
+        out_dir: (str) Output directory for processed output
+    """
+    try:
+        
+        input_loc, out_loc, fl_name = ut.filter_path(video_uri, out_dir)
+        aud_filter = glob.glob(join(input_loc, fl_name + '.wav'))
+        if len(aud_filter)>0:
+
+            audio_file = aud_filter[0]
+            aud_dur = librosa.get_duration(filename=audio_file)
+
+            if float(aud_dur) < 0.064:
+                logger.info('Output file {} size is less than 0.064sec'.format(audio_file))
+
+                error_txt = 'error: length less than 0.064'
+                empty_pause_segment(video_uri, out_loc, fl_name, r_config, error_txt)
+                return
+
+            logger.info('Converting stereo sound to mono-lD')
+            sound_mono = AudioSegment.from_wav(audio_file)
+            sound_mono = sound_mono.set_channels(1)
+            sound_mono = sound_mono.set_frame_rate(48000)
+
+            mono_wav = os.path.join(input_loc, fl_name + '_mono.wav')
+            sound_mono.export(mono_wav, format="wav")
+
+            df_pause_seg = process_silence(mono_wav, r_config)
+            os.remove(mono_wav)#removing mono wav file
+
+            if isinstance(df_pause_seg, pd.DataFrame) and len(df_pause_seg)>0:
+                logger.info('Processing Output file {} '.format(out_loc))
+
+                df_pause_seg['dbm_master_url'] = video_uri
+                ut.save_output(df_pause_seg, out_loc, fl_name, pause_seg_dir, csv_ext)
+
+            else:
+                error_txt = 'error: webrtcvad returns no segment'
+                empty_pause_segment(video_uri, out_loc, fl_name, r_config, error_txt)
+                
+    except Exception as e:
+        logger.error('Failed to process audio file')
--- a/opendbm/dbm_lib/dbm_features/raw_features/audio/pitch_freq.py
+++ b/opendbm/dbm_lib/dbm_features/raw_features/audio/pitch_freq.py
@@ -0,0 +1,113 @@
+"""
+file_name: pitch_freq
+project_name: DBM
+created: 2020-20-07
+"""
+
+import pandas as pd
+import os
+import glob
+import parselmouth
+import librosa
+import numpy as np
+from os.path import join
+import logging
+
+from opendbm.dbm_lib.dbm_features.raw_features.util import util as ut
+
+logging.basicConfig(level=logging.INFO)
+logger=logging.getLogger()
+
+ff_dir = 'acoustic/pitch'
+csv_ext = '_pitch.csv'
+error_txt = 'error: length less than 0.064'
+
+def audio_pitch(path):
+    """
+    Using parselmouth library fetching pitch/fundamental frequency
+    Args:
+        path: (.wav) audio file location
+    Returns:
+        (list) list of pitch/fundamental frequency for each voice frame
+    """
+    sound_pat = parselmouth.Sound(path)
+    pitch = sound_pat.to_pitch(time_step=.001)
+    pitch_values = pitch.selected_array['frequency']
+    
+    return list(pitch_values)
+
+def label_speech(row,fd_freq):
+    """
+    identify whether frame is voiced or not
+    Args:
+        row: (item) pitch frequency value
+    Returns:
+        (str) yes or no indicator for voice
+    """
+    if row[fd_freq] > 0 :
+        return 'yes'
+    else:
+        return 'no'
+
+def calc_pitch(video_uri, audio_file, out_loc, fl_name, r_config):
+    
+    """
+    Preparing pitch frequency matrix
+    Args:
+        audio_file: (.wav) parsed audio file
+        row: (dataframe) subject details from master csv
+        new_out_base_dir: (str) Output directory for csv
+    """
+
+    ff_frames = audio_pitch(audio_file)
+    df_ffreq = pd.DataFrame(ff_frames, columns=[r_config.aco_ff])
+    
+    df_ffreq['Frames'] = df_ffreq.index
+    df_ffreq[r_config.aco_voiceLabel] = df_ffreq.apply(lambda row: label_speech(row, r_config.aco_ff),axis=1)
+    
+    df_ffreq[r_config.err_reason] = 'Pass'# will replace with threshold in future release
+    df_ffreq['dbm_master_url'] = video_uri
+    
+    logger.info('Processing Output file {} '.format(out_loc))
+    ut.save_output(df_ffreq, out_loc, fl_name, ff_dir, csv_ext)
+    
+def empty_pitch(video_uri, out_loc, fl_name, r_config):
+    """
+    Preparing empty pitch frequency matrix if something fails
+    """
+    
+    df_ffreq = pd.DataFrame([[np.nan, np.nan, 'no', error_txt]], 
+                            columns=['Frames', r_config.aco_ff, r_config.aco_voiceLabel, r_config.err_reason])
+    df_ffreq['dbm_master_url'] = video_uri
+    
+    logger.info('Saving Output file {} '.format(out_loc))
+    ut.save_output(df_ffreq, out_loc, fl_name, ff_dir, csv_ext)  
+
+def run_pitch(video_uri, out_dir, r_config):
+    
+    """
+    Processing audio for fetching pitch
+    -------------------
+    -------------------
+    Args:
+        video_uri: video path; r_config: raw variable config object
+        out_dir: (str) Output directory for processed output
+    """
+    try:
+        
+        input_loc, out_loc, fl_name = ut.filter_path(video_uri, out_dir)
+        aud_filter = glob.glob(join(input_loc, fl_name + '.wav'))
+        if len(aud_filter)>0:
+
+            audio_file = aud_filter[0]
+            aud_dur = librosa.get_duration(filename=audio_file)
+
+            if float(aud_dur) < 0.064:
+                logger.info('Output file {} size is less than 0.064sec'.format(audio_file))
+
+                empty_pitch(video_uri, out_loc, fl_name, r_config)
+                return
+
+            calc_pitch(video_uri, audio_file, out_loc, fl_name, r_config)
+    except Exception as e:
+        logger.error('Failed to process audio file')
--- a/opendbm/dbm_lib/dbm_features/raw_features/audio/shimmer.py
+++ b/opendbm/dbm_lib/dbm_features/raw_features/audio/shimmer.py
@@ -0,0 +1,160 @@
+"""
+file_name: shimmer_processing
+project_name: DBM
+created: 2020-20-07
+"""
+
+import pandas as pd
+import numpy as np
+import os
+import glob
+import parselmouth
+import librosa
+import numpy as np
+import more_itertools as mit
+from os.path import join
+
+import logging
+
+from opendbm.dbm_lib.dbm_features.raw_features.util import util as ut
+
+logging.basicConfig(level=logging.INFO)
+logger=logging.getLogger()
+
+shimmer_dir = 'acoustic/shimmer'
+ff_dir = 'acoustic/pitch'
+csv_ext = '_shimmer.csv'
+
+def audio_shimmer(sound):
+    """
+    Using parselmouth library fetching shimmer
+    Args:
+        sound: parselmouth object
+    Returns:
+        (list) list of shimmers for each voice frame
+    """
+    pointProcess = parselmouth.praat.call(sound, "To PointProcess (periodic, cc)...", 80, 500)
+    shimmer = parselmouth.praat.call([sound, pointProcess], "Get shimmer (local)", 0, 0, 0.0001, 0.02, 1.3, 1.6)
+    return shimmer
+
+def empty_shimmer(video_uri, out_loc, fl_name, r_config, error_txt):
+    """
+    Preparing empty shimmer matrix if something fails
+    """
+    cols = ['Frames', r_config.aco_shimmer, r_config.err_reason]
+    out_val = [[np.nan, np.nan, error_txt]]
+    df_shimmer = pd.DataFrame(out_val, columns = cols)
+    df_shimmer['dbm_master_url'] = video_uri
+    
+    logger.info('Saving Output file {} '.format(out_loc))
+    ut.save_output(df_shimmer, out_loc, fl_name, shimmer_dir, csv_ext)
+    
+def segment_pitch(dir_path, r_config):
+    """
+    segmenting pitch freq for each voice segment
+    """
+    com_speech_sort, voiced_yes, voiced_no  = ([], ) * 3
+    for file in os.listdir(dir_path):
+        try:
+            
+            if file.endswith('_pitch.csv'):
+                
+                ff_df = pd.read_csv((dir_path+'/'+file))
+                voice_label = ff_df[r_config.aco_voiceLabel]
+                
+                indices_yes = [i for i, x in enumerate(voice_label) if x == "yes"]
+                voiced_yes = [list(group) for group in mit.consecutive_groups(indices_yes)]
+                
+                indices_no = [i for i, x in enumerate(voice_label) if x == "no"]
+                voiced_no = [list(group) for group in mit.consecutive_groups(indices_no)]
+                
+                com_speech = voiced_yes + voiced_no
+                com_speech_sort = sorted(com_speech, key=lambda x: x[0])
+        except:
+            pass
+        
+    return com_speech_sort, voiced_yes, voiced_no
+
+def segment_shimmer(com_speech_sort, voiced_yes, voiced_no, shimmer_frames, audio_file):
+    """
+    calculating shimmer for each voice segment
+    """
+    snd = parselmouth.Sound(audio_file)
+    pitch = snd.to_pitch(time_step=.001)
+    
+    for idx, vs in enumerate(com_speech_sort):
+        try:
+            
+            shimmer = np.NaN
+            if vs in voiced_yes and len(vs)>1:
+                
+                start_time = pitch.get_time_from_frame_number(vs[0])
+                end_time = pitch.get_time_from_frame_number(vs[-1])
+
+                snd_start = int(snd.get_frame_number_from_time(start_time))
+                snd_end = int(snd.get_frame_number_from_time(end_time))
+
+                samples = parselmouth.Sound(snd.as_array()[0][snd_start:snd_end])
+                shimmer = audio_shimmer(samples)
+        except:
+            pass
+
+        shimmer_frames[idx] = shimmer
+    return shimmer_frames
+    
+def calc_shimmer(video_uri, audio_file, out_loc, fl_name, r_config):
+    """
+    Preparing shimmer matrix
+    Args:
+        audio_file: (.wav) parsed audio file
+        out_loc: (str) Output directory for csv
+        r_config: config.config_raw_feature.pyConfigFeatureNmReader object
+    """
+    dir_path = os.path.join(out_loc, ff_dir)
+    if os.path.isdir(dir_path):
+        voice_seg = segment_pitch(dir_path, r_config)
+        
+        shimmer_frames = [np.NaN] * len(voice_seg[0])
+        shimmer_segment_frames = segment_shimmer(voice_seg[0], voice_seg[1], voice_seg[2], shimmer_frames, audio_file)
+        
+        df_shimmer = pd.DataFrame(shimmer_segment_frames, columns=[r_config.aco_shimmer])
+        df_shimmer[r_config.err_reason] = 'Pass'# will replace with threshold in future release
+        
+        df_shimmer['Frames'] = df_shimmer.index
+        df_shimmer['dbm_master_url'] = video_uri
+        
+        logger.info('Processing Output file {} '.format(out_loc))
+        ut.save_output(df_shimmer, out_loc, fl_name, shimmer_dir, csv_ext)
+        
+    else:
+        error_txt = 'error: fundamental freq not available'
+        empty_shimmer(video_uri, out_loc, fl_name, r_config, error_txt)
+    
+def run_shimmer(video_uri, out_dir, r_config):
+    """
+    Processing all patients to fetch shimmer
+    ---------------
+    ---------------
+    Args:
+        video_uri: video path; r_config: raw variable config object
+        out_dir: (str) Output directory for processed output 
+    """
+    try:
+        
+        input_loc, out_loc, fl_name = ut.filter_path(video_uri, out_dir)
+        aud_filter = glob.glob(join(input_loc, fl_name + '.wav'))
+        if len(aud_filter)>0:
+
+            audio_file = aud_filter[0]
+            aud_dur = librosa.get_duration(filename=audio_file)
+
+            if float(aud_dur) < 0.064:
+                logger.info('Output file {} size is less than 0.064sec'.format(audio_file))
+
+                error_txt = 'error: length less than 0.064'
+                empty_shimmer(video_uri, out_loc, fl_name, r_config, error_txt)
+                return
+
+            calc_shimmer(video_uri, audio_file, out_loc, fl_name, r_config)
+    except Exception as e:
+        logger.error('Failed to process audio file')
--- a/opendbm/dbm_lib/dbm_features/raw_features/audio/voice_frame_score.py
+++ b/opendbm/dbm_lib/dbm_features/raw_features/audio/voice_frame_score.py
@@ -0,0 +1,111 @@
+"""
+file_name: voice_frame_score
+project_name: DBM
+created: 2020-20-07
+"""
+
+import parselmouth
+import pandas as pd
+import numpy as np
+import glob
+import librosa
+from os.path import join
+import logging
+
+from opendbm.dbm_lib.dbm_features.raw_features.util import util as ut
+
+logging.basicConfig(level=logging.INFO)
+logger=logging.getLogger()
+
+vfs_dir = 'acoustic/voice_frame_score'
+csv_ext = '_voiceprev.csv'
+error_txt = 'error: length less than 0.064'
+
+def audio_pitch_frame(pitch):
+    """
+        Computing total number of speech and participant voiced frames
+        Args:
+            pitch: speech pitch
+        Returns:
+            (float) total voice frames and participant voiced frames
+    """
+    total_frames = pitch.get_number_of_frames()
+    voiced_frames = pitch.count_voiced_frames()
+    return total_frames, voiced_frames
+
+def voice_segment(path):
+    """
+        Using parselmouth library for fundamental frequency
+        Args:
+            path: (.wav) audio file location
+        Returns:
+            (float) total voice frames, participant voiced frames and voiced frames percentage
+    """
+    sound_pat = parselmouth.Sound(path)
+    pitch = sound_pat.to_pitch()
+    total_frames,voiced_frames = audio_pitch_frame(pitch)
+    
+    voiced_percentage = (voiced_frames/total_frames)*100
+    return voiced_percentage, voiced_frames, total_frames
+
+def calc_vfs(video_uri, audio_file, out_loc, fl_name, r_config):
+    """
+        creating dataframe matrix for voice frame score
+        Args:
+            audio_file: Audio file path
+            new_out_base_dir: AWS instance output base directory path
+            f_nm_config: Config file object
+    """
+
+    voice_percentage,voiced_frames, total_frames = voice_segment(audio_file)
+    df_vfs = pd.DataFrame([voiced_frames], columns=[r_config.aco_voiceFrame])
+    
+    df_vfs[r_config.aco_totVoiceFrame] = [total_frames]
+    df_vfs[r_config.aco_voicePct] = [voice_percentage]
+    df_vfs[r_config.err_reason] = 'Pass'# will replace with threshold in future release
+    
+    df_vfs['Frames'] = df_vfs.index
+    df_vfs['dbm_master_url'] = video_uri
+    
+    logger.info('Saving Output file {} '.format(out_loc))
+    ut.save_output(df_vfs, out_loc, fl_name, vfs_dir, csv_ext) 
+    
+def empty_vfs(video_uri, out_loc, fl_name, r_config):
+    """
+    Preparing empty VFS matrix if something fails
+    """
+    cols = ['Frames', r_config.aco_voiceFrame, r_config.aco_totVoiceFrame, r_config.aco_voicePct, r_config.err_reason]
+    out_val = [[np.nan, np.nan, np.nan, np.nan, error_txt]]
+    df_vfs = pd.DataFrame(out_val, columns = cols)
+    df_vfs['dbm_master_url'] = video_uri
+    
+    logger.info('Saving Output file {} '.format(out_loc))
+    ut.save_output(df_vfs, out_loc, fl_name, vfs_dir, csv_ext)  
+
+def run_vfs(video_uri, out_dir, r_config):
+    """
+    Processing all participants for fetching voice frame score
+    ---------------
+    ---------------
+    Args:
+        video_uri: video path; r_config: raw variable config object
+        out_dir: (str) Output directory for processed output
+    """
+    try:
+        
+        input_loc, out_loc, fl_name = ut.filter_path(video_uri, out_dir)
+        aud_filter = glob.glob(join(input_loc, fl_name + '.wav'))
+        if len(aud_filter)>0:
+
+            audio_file = aud_filter[0]
+            aud_dur = librosa.get_duration(filename=audio_file)
+
+            if float(aud_dur) < 0.064:
+                logger.info('Output file {} size is less than 0.064sec'.format(audio_file))
+
+                empty_vfs(video_uri, out_loc, fl_name, r_config)
+                return
+
+            calc_vfs(video_uri, audio_file, out_loc, fl_name, r_config)
+    except Exception as e:
+        logger.error('Failed to process audio file')