diff --git a/opendbm/dbm_lib/dbm_features/raw_features/audio/formant_freq.py b/opendbm/dbm_lib/dbm_features/raw_features/audio/formant_freq.py index 2b1bbba8..7cd5346f 100644 --- a/opendbm/dbm_lib/dbm_features/raw_features/audio/formant_freq.py +++ b/opendbm/dbm_lib/dbm_features/raw_features/audio/formant_freq.py @@ -4,25 +4,25 @@ project_name: DBM created: 2020-20-07 """ +import glob +import logging +from os.path import join + +import numpy as np import pandas as pd import parselmouth -import numpy as np -import parselmouth -import librosa -import glob -from os.path import join -import logging from opendbm.dbm_lib.dbm_features.raw_features.util import util as ut logging.basicConfig(level=logging.INFO) -logger=logging.getLogger() +logger = logging.getLogger() -formant_dir = 'acoustic/formant_freq' -csv_ext = '_formant.csv' -error_txt = 'error: length less than 0.064' +formant_dir = "acoustic/formant_freq" +csv_ext = "_formant.csv" +error_txt = "error: length less than 0.064" -def formant_list(formant,snd): + +def formant_list(formant, snd): """ Getting formant frequency per second Args: @@ -35,23 +35,24 @@ def formant_list(formant,snd): f2_list = [] f3_list = [] f4_list = [] - - dur = snd.duration-0.02 + + dur = snd.duration - 0.02 dur_round = round(dur, 2) - + time_list = np.arange(0.001, dur_round, 0.001) for time in time_list: - - f1 = formant.get_value_at_time(1,time) - f2 = formant.get_value_at_time(2,time) - f3 = formant.get_value_at_time(3,time) - f4 = formant.get_value_at_time(4,time) - + + f1 = formant.get_value_at_time(1, time) + f2 = formant.get_value_at_time(2, time) + f3 = formant.get_value_at_time(3, time) + f4 = formant.get_value_at_time(4, time) + f1_list.append(f1) f2_list.append(f2) f3_list.append(f3) f4_list.append(f4) - return f1_list,f2_list,f3_list,f4_list + return f1_list, f2_list, f3_list, f4_list + def formant_score(path): """ @@ -62,49 +63,65 @@ def formant_score(path): (list) list of Formant freq for each voice frame """ sound_pat = parselmouth.Sound(path) - formant = sound_pat.to_formant_burg(time_step=.001) - f_score = formant_list(formant,sound_pat) + formant = sound_pat.to_formant_burg(time_step=0.001) + f_score = formant_list(formant, sound_pat) return f_score -def calc_formant(video_uri, audio_file, out_loc, fl_name, r_config): + +def calc_formant(video_uri, audio_file, out_loc, fl_name, r_config, save=True): """ Preparing Formant freq matrix Args: audio_file: (.wav) parsed audio file; fl_name: input file name out_loc: (str) Output directory; r_config: raw variable config """ - - f1_list,f2_list,f3_list,f4_list = formant_score(audio_file) + + f1_list, f2_list, f3_list, f4_list = formant_score(audio_file) df_formant = pd.DataFrame(f1_list, columns=[r_config.aco_fm1]) - + df_formant[r_config.aco_fm2] = f2_list df_formant[r_config.aco_fm3] = f3_list df_formant[r_config.aco_fm4] = f4_list - - df_formant.replace('', np.nan, regex=True,inplace=True) - df_formant[r_config.err_reason] = 'Pass'# will replace with threshold in future release - - df_formant['Frames'] = df_formant.index - df_formant['dbm_master_url'] = video_uri - - logger.info('Saving Output file {} '.format(out_loc)) - ut.save_output(df_formant, out_loc, fl_name, formant_dir, csv_ext) - -def empty_fm(video_uri, out_loc, fl_name, r_config): - + + df_formant.replace("", np.nan, regex=True, inplace=True) + df_formant[ + r_config.err_reason + ] = "Pass" # will replace with threshold in future release + + df_formant["Frames"] = df_formant.index + df_formant["dbm_master_url"] = video_uri + + if save: + logger.info("Saving Output file {} ".format(out_loc)) + ut.save_output(df_formant, out_loc, fl_name, formant_dir, csv_ext) + return df_formant + + +def empty_fm(video_uri, out_loc, fl_name, r_config, save=True): + """ Preparing empty formant frequency matrix if something fails """ - cols = ['Frames', r_config.aco_fm1, r_config.aco_fm2, r_config.aco_fm3, r_config.aco_fm4, r_config.err_reason] + cols = [ + "Frames", + r_config.aco_fm1, + r_config.aco_fm2, + r_config.aco_fm3, + r_config.aco_fm4, + r_config.err_reason, + ] out_val = [[np.nan, np.nan, np.nan, np.nan, np.nan, error_txt]] - df_fm = pd.DataFrame(out_val, columns = cols) - df_fm['dbm_master_url'] = video_uri - - logger.info('Saving Output file {} '.format(out_loc)) - ut.save_output(df_fm, out_loc, fl_name, formant_dir, csv_ext) + df_fm = pd.DataFrame(out_val, columns=cols) + df_fm["dbm_master_url"] = video_uri + + if save: + logger.info("Saving Output file {} ".format(out_loc)) + ut.save_output(df_fm, out_loc, fl_name, formant_dir, csv_ext) + return df_fm + + +def run_formant(video_uri, out_dir, r_config, save=True): -def run_formant(video_uri, out_dir, r_config): - """ Processing all patient's for fetching Formant freq --------------- @@ -114,20 +131,25 @@ def run_formant(video_uri, out_dir, r_config): out_dir: (str) Output directory for processed output """ try: - + input_loc, out_loc, fl_name = ut.filter_path(video_uri, out_dir) - aud_filter = glob.glob(join(input_loc, fl_name + '.wav')) - if len(aud_filter)>0: + aud_filter = glob.glob(join(input_loc, fl_name + ".wav")) + if len(aud_filter) > 0: audio_file = aud_filter[0] - aud_dur = librosa.get_duration(filename=audio_file) + aud_dur = ut.get_length(audio_file) if float(aud_dur) < 0.064: - logger.info('Output file {} size is less than 0.064sec'.format(audio_file)) + logger.info( + "Output file {} size is less than 0.064sec".format(audio_file) + ) - empty_fm(video_uri, out_loc, fl_name, r_config) - return - - calc_formant(video_uri, audio_file, out_loc, fl_name, r_config) + df = empty_fm(video_uri, out_loc, fl_name, r_config, save=save) + else: + df = calc_formant( + video_uri, audio_file, out_loc, fl_name, r_config, save=save + ) + return df except Exception as e: - logger.error('Failed to process audio file') + e + logger.error("Failed to process audio file") diff --git a/opendbm/dbm_lib/dbm_features/raw_features/audio/gne.py b/opendbm/dbm_lib/dbm_features/raw_features/audio/gne.py index 2e5b5c47..7601da14 100644 --- a/opendbm/dbm_lib/dbm_features/raw_features/audio/gne.py +++ b/opendbm/dbm_lib/dbm_features/raw_features/audio/gne.py @@ -4,24 +4,25 @@ project_name: DBM created: 2020-20-07 """ -import pandas as pd -import numpy as np -import os import glob -import parselmouth -import librosa -import more_itertools as mit -from os.path import join import logging +import os +from os.path import join + +import more_itertools as mit +import numpy as np +import pandas as pd +import parselmouth from opendbm.dbm_lib.dbm_features.raw_features.util import util as ut logging.basicConfig(level=logging.INFO) -logger=logging.getLogger() +logger = logging.getLogger() + +gne_dir = "acoustic/glottal_noise" +ff_dir = "acoustic/pitch" +csv_ext = "_gne.csv" -gne_dir = 'acoustic/glottal_noise' -ff_dir = 'acoustic/pitch' -csv_ext = '_gne.csv' def gne_ratio(sound): """ @@ -33,63 +34,43 @@ def gne_ratio(sound): """ harmonicity_gne = sound.to_harmonicity_gne() gne_all_bands = harmonicity_gne.values - gne_all_bands = np.where(gne_all_bands==-200, np.NaN, gne_all_bands) - - gne = np.nanmax(gne_all_bands) # following http://www.fon.hum.uva.nl/rob/NKI_TEVA/TEVA/HTML/NKI_TEVA.pdf + gne_all_bands = np.where(gne_all_bands == -200, np.NaN, gne_all_bands) + + gne = np.nanmax( + gne_all_bands + ) # following http://www.fon.hum.uva.nl/rob/NKI_TEVA/TEVA/HTML/NKI_TEVA.pdf return gne -def empty_gne(video_uri, out_loc, fl_name, r_config, error_txt): + +def empty_gne(video_uri, out_loc, fl_name, r_config, error_txt, save=True): """ Preparing empty GNE matrix if something fails """ - cols = ['Frames', r_config.aco_gne, r_config.err_reason] + cols = ["Frames", r_config.aco_gne, r_config.err_reason] out_val = [[np.nan, np.nan, error_txt]] - - df_gne = pd.DataFrame(out_val, columns = cols) - df_gne['dbm_master_url'] = video_uri - - logger.info('Saving Output file {} '.format(out_loc)) - ut.save_output(df_gne, out_loc, fl_name, gne_dir, csv_ext) - -def segment_pitch(dir_path, r_config): - """ - segmenting pitch freq for each voice segment - """ - com_speech_sort, voiced_yes, voiced_no = ([], ) * 3 - for file in os.listdir(dir_path): - try: - - if file.endswith('_pitch.csv'): - - ff_df = pd.read_csv((dir_path+'/'+file)) - voice_label = ff_df[r_config.aco_voiceLabel] - - indices_yes = [i for i, x in enumerate(voice_label) if x == "yes"] - voiced_yes = [list(group) for group in mit.consecutive_groups(indices_yes)] - - indices_no = [i for i, x in enumerate(voice_label) if x == "no"] - voiced_no = [list(group) for group in mit.consecutive_groups(indices_no)] - - com_speech = voiced_yes + voiced_no - com_speech_sort = sorted(com_speech, key=lambda x: x[0]) - except: - pass - - return com_speech_sort, voiced_yes, voiced_no + + df_gne = pd.DataFrame(out_val, columns=cols) + df_gne["dbm_master_url"] = video_uri + + if save: + logger.info("Saving Output file {} ".format(out_loc)) + ut.save_output(df_gne, out_loc, fl_name, gne_dir, csv_ext) + return df_gne + def segment_gne(com_speech_sort, voiced_yes, voiced_no, gne_all_frames, audio_file): """ calculating gne for each voice segment """ snd = parselmouth.Sound(audio_file) - pitch = snd.to_pitch(time_step=.001) - + pitch = snd.to_pitch(time_step=0.001) + for idx, vs in enumerate(com_speech_sort): try: - + max_gne = np.NaN - if vs in voiced_yes and len(vs)>1: - + if vs in voiced_yes and len(vs) > 1: + start_time = pitch.get_time_from_frame_number(vs[0]) end_time = pitch.get_time_from_frame_number(vs[-1]) @@ -103,8 +84,9 @@ def segment_gne(com_speech_sort, voiced_yes, voiced_no, gne_all_frames, audio_fi gne_all_frames[idx] = max_gne return gne_all_frames - -def calc_gne(video_uri, audio_file, out_loc, fl_name, r_config): + + +def calc_gne(video_uri, audio_file, out_loc, fl_name, r_config, save=True, ff_df=None): """ Preparing gne matrix Args: @@ -112,26 +94,36 @@ def calc_gne(video_uri, audio_file, out_loc, fl_name, r_config): out_loc: (str) Output directory for csv's """ dir_path = os.path.join(out_loc, ff_dir) - if os.path.isdir(dir_path): - voice_seg = segment_pitch(dir_path, r_config) - - gne_all_frames = [np.NaN] * len(voice_seg[0]) - gne_segment_frames = segment_gne(voice_seg[0], voice_seg[1], voice_seg[2], gne_all_frames, audio_file) - - df_gne = pd.DataFrame(gne_segment_frames, columns=[r_config.aco_gne]) - df_gne[r_config.err_reason] = 'Pass'# will replace with threshold in future release - - df_gne['Frames'] = df_gne.index - df_gne['dbm_master_url'] = video_uri - - logger.info('Processing Output file {} '.format(out_loc)) - ut.save_output(df_gne, out_loc, fl_name, gne_dir, csv_ext) - - else: - error_txt = 'error: pitch freq not available' - empty_gne(video_uri, out_loc, fl_name, r_config, error_txt) + if os.path.isdir(dir_path) or ff_df is not None: + if ff_df is not None: + voice_seg = ut.process_segment_pitch(ff_df, r_config) + else: + voice_seg = ut.segment_pitch(dir_path, r_config, ff_df=ff_df) -def run_gne(video_uri, out_dir, r_config): + gne_all_frames = [np.NaN] * len(voice_seg[0]) + gne_segment_frames = segment_gne( + voice_seg[0], voice_seg[1], voice_seg[2], gne_all_frames, audio_file + ) + + df_gne = pd.DataFrame(gne_segment_frames, columns=[r_config.aco_gne]) + df_gne[ + r_config.err_reason + ] = "Pass" # will replace with threshold in future release + + df_gne["Frames"] = df_gne.index + df_gne["dbm_master_url"] = video_uri + + if save: + logger.info("Processing Output file {} ".format(out_loc)) + ut.save_output(df_gne, out_loc, fl_name, gne_dir, csv_ext) + return df_gne + + else: + error_txt = "error: pitch freq not available" + return empty_gne(video_uri, out_loc, fl_name, r_config, error_txt, save=save) + + +def run_gne(video_uri, out_dir, r_config, save=True, ff_df=None): """ Processing all patient's for fetching glottal noise ratio --------------- @@ -141,21 +133,34 @@ def run_gne(video_uri, out_dir, r_config): out_dir: (str) Output directory for processed output """ try: - + input_loc, out_loc, fl_name = ut.filter_path(video_uri, out_dir) - aud_filter = glob.glob(join(input_loc, fl_name + '.wav')) - if len(aud_filter)>0: + aud_filter = glob.glob(join(input_loc, fl_name + ".wav")) + if len(aud_filter) > 0: audio_file = aud_filter[0] - aud_dur = librosa.get_duration(filename=audio_file) + aud_dur = ut.get_length(audio_file) if float(aud_dur) < 0.064: - logger.info('Output file {} size is less than 0.064sec'.format(audio_file)) + logger.info( + "Output file {} size is less than 0.064sec".format(audio_file) + ) - error_txt = 'error: length less than 0.064' - empty_gne(video_uri, out_loc, fl_name, r_config, error_txt) - return - - calc_gne(video_uri, audio_file, out_loc, fl_name, r_config) + error_txt = "error: length less than 0.064" + df = empty_gne( + video_uri, out_loc, fl_name, r_config, error_txt, save=save + ) + else: + df = calc_gne( + video_uri, + audio_file, + out_loc, + fl_name, + r_config, + save=save, + ff_df=ff_df, + ) + return df except Exception as e: - logger.error('Failed to process audio file') \ No newline at end of file + e + logger.error("Failed to process audio file") diff --git a/opendbm/dbm_lib/dbm_features/raw_features/audio/hnr.py b/opendbm/dbm_lib/dbm_features/raw_features/audio/hnr.py index c319bb8e..7601da14 100644 --- a/opendbm/dbm_lib/dbm_features/raw_features/audio/hnr.py +++ b/opendbm/dbm_lib/dbm_features/raw_features/audio/hnr.py @@ -1,96 +1,166 @@ """ -file_name: hnr +file_name: gne project_name: DBM created: 2020-20-07 """ -import pandas as pd -import numpy as np -import os import glob -import parselmouth -import librosa -from os.path import join import logging +import os +from os.path import join + +import more_itertools as mit +import numpy as np +import pandas as pd +import parselmouth from opendbm.dbm_lib.dbm_features.raw_features.util import util as ut logging.basicConfig(level=logging.INFO) -logger=logging.getLogger() +logger = logging.getLogger() -hnr_dir = 'acoustic/harmonic_noise' -csv_ext = '_hnr.csv' -error_txt = 'error: length less than 0.064' +gne_dir = "acoustic/glottal_noise" +ff_dir = "acoustic/pitch" +csv_ext = "_gne.csv" -def hnr_ratio(filepath): + +def gne_ratio(sound): """ - Using parselmouth library fetching harmonic noise ratio ratio + Using parselmouth library fetching glottal noise excitation ratio Args: - path: (.wav) audio file location + sound: parselmouth object Returns: - (list) list of hnr ratio for each voice frame, min,max and mean hnr + (list) list of gne ratio for each voice frame """ - sound = parselmouth.Sound(filepath) - harmonicity = sound.to_harmonicity_ac(time_step=.001) - - hnr_all_frames = harmonicity.values#[harmonicity.values != -200] nan it (****) - hnr_all_frames = np.where(hnr_all_frames==-200, np.NaN, hnr_all_frames) - return hnr_all_frames.transpose() + harmonicity_gne = sound.to_harmonicity_gne() + gne_all_bands = harmonicity_gne.values + gne_all_bands = np.where(gne_all_bands == -200, np.NaN, gne_all_bands) -def calc_hnr(video_uri, audio_file, out_loc, fl_name, r_config): + gne = np.nanmax( + gne_all_bands + ) # following http://www.fon.hum.uva.nl/rob/NKI_TEVA/TEVA/HTML/NKI_TEVA.pdf + return gne + + +def empty_gne(video_uri, out_loc, fl_name, r_config, error_txt, save=True): """ - Preparing harmonic noise matrix + Preparing empty GNE matrix if something fails + """ + cols = ["Frames", r_config.aco_gne, r_config.err_reason] + out_val = [[np.nan, np.nan, error_txt]] + + df_gne = pd.DataFrame(out_val, columns=cols) + df_gne["dbm_master_url"] = video_uri + + if save: + logger.info("Saving Output file {} ".format(out_loc)) + ut.save_output(df_gne, out_loc, fl_name, gne_dir, csv_ext) + return df_gne + + +def segment_gne(com_speech_sort, voiced_yes, voiced_no, gne_all_frames, audio_file): + """ + calculating gne for each voice segment + """ + snd = parselmouth.Sound(audio_file) + pitch = snd.to_pitch(time_step=0.001) + + for idx, vs in enumerate(com_speech_sort): + try: + + max_gne = np.NaN + if vs in voiced_yes and len(vs) > 1: + + start_time = pitch.get_time_from_frame_number(vs[0]) + end_time = pitch.get_time_from_frame_number(vs[-1]) + + snd_start = int(snd.get_frame_number_from_time(start_time)) + snd_end = int(snd.get_frame_number_from_time(end_time)) + + samples = parselmouth.Sound(snd.as_array()[0][snd_start:snd_end]) + max_gne = gne_ratio(samples) + except: + pass + + gne_all_frames[idx] = max_gne + return gne_all_frames + + +def calc_gne(video_uri, audio_file, out_loc, fl_name, r_config, save=True, ff_df=None): + """ + Preparing gne matrix Args: audio_file: (.wav) parsed audio file out_loc: (str) Output directory for csv's """ - - hnr_all_frames = hnr_ratio(audio_file) - df_hnr = pd.DataFrame(hnr_all_frames, columns=[r_config.aco_hnr]) - - df_hnr['Frames'] = df_hnr.index - df_hnr['dbm_master_url'] = video_uri - df_hnr[r_config.err_reason] = 'Pass'# will replace with threshold in future release - - logger.info('Saving Output file {} '.format(out_loc)) - ut.save_output(df_hnr, out_loc, fl_name, hnr_dir, csv_ext) - -def empty_hnr(video_uri, out_loc, fl_name, r_config): - """ - Preparing empty HNR matrix if something fails - """ - cols = ['Frames', r_config.aco_hnr, r_config.err_reason] - out_val = [[np.nan, np.nan, error_txt]] - df_hnr = pd.DataFrame(out_val, columns = cols) - df_hnr['dbm_master_url'] = video_uri - - logger.info('Saving Output file {} '.format(out_loc)) - ut.save_output(df_hnr, out_loc, fl_name, hnr_dir, csv_ext) + dir_path = os.path.join(out_loc, ff_dir) + if os.path.isdir(dir_path) or ff_df is not None: + if ff_df is not None: + voice_seg = ut.process_segment_pitch(ff_df, r_config) + else: + voice_seg = ut.segment_pitch(dir_path, r_config, ff_df=ff_df) -def run_hnr(video_uri, out_dir, r_config): + gne_all_frames = [np.NaN] * len(voice_seg[0]) + gne_segment_frames = segment_gne( + voice_seg[0], voice_seg[1], voice_seg[2], gne_all_frames, audio_file + ) + + df_gne = pd.DataFrame(gne_segment_frames, columns=[r_config.aco_gne]) + df_gne[ + r_config.err_reason + ] = "Pass" # will replace with threshold in future release + + df_gne["Frames"] = df_gne.index + df_gne["dbm_master_url"] = video_uri + + if save: + logger.info("Processing Output file {} ".format(out_loc)) + ut.save_output(df_gne, out_loc, fl_name, gne_dir, csv_ext) + return df_gne + + else: + error_txt = "error: pitch freq not available" + return empty_gne(video_uri, out_loc, fl_name, r_config, error_txt, save=save) + + +def run_gne(video_uri, out_dir, r_config, save=True, ff_df=None): """ - Processing all patient's for fetching harmonic noise ratio - ------------------- - ------------------- + Processing all patient's for fetching glottal noise ratio + --------------- + --------------- Args: video_uri: video path; r_config: raw variable config object out_dir: (str) Output directory for processed output """ try: - + input_loc, out_loc, fl_name = ut.filter_path(video_uri, out_dir) - aud_filter = glob.glob(join(input_loc, fl_name + '.wav')) - if len(aud_filter)>0: + aud_filter = glob.glob(join(input_loc, fl_name + ".wav")) + if len(aud_filter) > 0: audio_file = aud_filter[0] - aud_dur = librosa.get_duration(filename=audio_file) + aud_dur = ut.get_length(audio_file) if float(aud_dur) < 0.064: - logger.info('Output file {} size is less than 0.064sec'.format(audio_file)) + logger.info( + "Output file {} size is less than 0.064sec".format(audio_file) + ) - empty_hnr(video_uri, out_loc, fl_name, r_config) - return - - calc_hnr(video_uri, audio_file, out_loc, fl_name, r_config) + error_txt = "error: length less than 0.064" + df = empty_gne( + video_uri, out_loc, fl_name, r_config, error_txt, save=save + ) + else: + df = calc_gne( + video_uri, + audio_file, + out_loc, + fl_name, + r_config, + save=save, + ff_df=ff_df, + ) + return df except Exception as e: - logger.error('Failed to process audio file') \ No newline at end of file + e + logger.error("Failed to process audio file") diff --git a/opendbm/dbm_lib/dbm_features/raw_features/audio/intensity.py b/opendbm/dbm_lib/dbm_features/raw_features/audio/intensity.py index 9df1a344..7601da14 100644 --- a/opendbm/dbm_lib/dbm_features/raw_features/audio/intensity.py +++ b/opendbm/dbm_lib/dbm_features/raw_features/audio/intensity.py @@ -1,92 +1,166 @@ """ -file_name: intensity +file_name: gne project_name: DBM created: 2020-20-07 """ -import pandas as pd -import numpy as np import glob -import parselmouth -import librosa -from os.path import join import logging +import os +from os.path import join + +import more_itertools as mit +import numpy as np +import pandas as pd +import parselmouth from opendbm.dbm_lib.dbm_features.raw_features.util import util as ut logging.basicConfig(level=logging.INFO) -logger=logging.getLogger() +logger = logging.getLogger() -intensity_dir = 'acoustic/intensity' -csv_ext = '_intensity.csv' -error_txt = 'error: length less than 0.064' +gne_dir = "acoustic/glottal_noise" +ff_dir = "acoustic/pitch" +csv_ext = "_gne.csv" -def intensity_score(path): + +def gne_ratio(sound): """ - Using parselmouth library fetching Intensity + Using parselmouth library fetching glottal noise excitation ratio Args: - path: (.wav) audio file location + sound: parselmouth object Returns: - (list) list of Intensity for each voice frame + (list) list of gne ratio for each voice frame """ - sound_pat = parselmouth.Sound(path) - intensity = sound_pat.to_intensity(time_step=.001) - return intensity.values[0] + harmonicity_gne = sound.to_harmonicity_gne() + gne_all_bands = harmonicity_gne.values + gne_all_bands = np.where(gne_all_bands == -200, np.NaN, gne_all_bands) -def calc_intensity(video_uri, audio_file, out_loc, fl_name, r_config): + gne = np.nanmax( + gne_all_bands + ) # following http://www.fon.hum.uva.nl/rob/NKI_TEVA/TEVA/HTML/NKI_TEVA.pdf + return gne + + +def empty_gne(video_uri, out_loc, fl_name, r_config, error_txt, save=True): """ - Preparing Intensity matrix + Preparing empty GNE matrix if something fails + """ + cols = ["Frames", r_config.aco_gne, r_config.err_reason] + out_val = [[np.nan, np.nan, error_txt]] + + df_gne = pd.DataFrame(out_val, columns=cols) + df_gne["dbm_master_url"] = video_uri + + if save: + logger.info("Saving Output file {} ".format(out_loc)) + ut.save_output(df_gne, out_loc, fl_name, gne_dir, csv_ext) + return df_gne + + +def segment_gne(com_speech_sort, voiced_yes, voiced_no, gne_all_frames, audio_file): + """ + calculating gne for each voice segment + """ + snd = parselmouth.Sound(audio_file) + pitch = snd.to_pitch(time_step=0.001) + + for idx, vs in enumerate(com_speech_sort): + try: + + max_gne = np.NaN + if vs in voiced_yes and len(vs) > 1: + + start_time = pitch.get_time_from_frame_number(vs[0]) + end_time = pitch.get_time_from_frame_number(vs[-1]) + + snd_start = int(snd.get_frame_number_from_time(start_time)) + snd_end = int(snd.get_frame_number_from_time(end_time)) + + samples = parselmouth.Sound(snd.as_array()[0][snd_start:snd_end]) + max_gne = gne_ratio(samples) + except: + pass + + gne_all_frames[idx] = max_gne + return gne_all_frames + + +def calc_gne(video_uri, audio_file, out_loc, fl_name, r_config, save=True, ff_df=None): + """ + Preparing gne matrix Args: audio_file: (.wav) parsed audio file out_loc: (str) Output directory for csv's """ - - intensity_frames = intensity_score(audio_file) - df_intensity = pd.DataFrame(intensity_frames, columns=[r_config.aco_int]) - - df_intensity['Frames'] = df_intensity.index - df_intensity['dbm_master_url'] = video_uri - df_intensity[r_config.err_reason] = 'Pass'# will replace with threshold in future release - - logger.info('Saving Output file {} '.format(out_loc)) - ut.save_output(df_intensity, out_loc, fl_name, intensity_dir, csv_ext) - -def empty_intensity(video_uri, out_loc, fl_name, r_config): - """ - Preparing empty Intensity matrix if something fails - """ - cols = ['Frames', r_config.aco_int, r_config.err_reason] - out_val = [[np.nan, np.nan, error_txt]] - df_int = pd.DataFrame(out_val, columns = cols) - df_int['dbm_master_url'] = video_uri - - logger.info('Saving Output file {} '.format(out_loc)) - ut.save_output(df_int, out_loc, fl_name, intensity_dir, csv_ext) + dir_path = os.path.join(out_loc, ff_dir) + if os.path.isdir(dir_path) or ff_df is not None: + if ff_df is not None: + voice_seg = ut.process_segment_pitch(ff_df, r_config) + else: + voice_seg = ut.segment_pitch(dir_path, r_config, ff_df=ff_df) -def run_intensity(video_uri, out_dir, r_config): + gne_all_frames = [np.NaN] * len(voice_seg[0]) + gne_segment_frames = segment_gne( + voice_seg[0], voice_seg[1], voice_seg[2], gne_all_frames, audio_file + ) + + df_gne = pd.DataFrame(gne_segment_frames, columns=[r_config.aco_gne]) + df_gne[ + r_config.err_reason + ] = "Pass" # will replace with threshold in future release + + df_gne["Frames"] = df_gne.index + df_gne["dbm_master_url"] = video_uri + + if save: + logger.info("Processing Output file {} ".format(out_loc)) + ut.save_output(df_gne, out_loc, fl_name, gne_dir, csv_ext) + return df_gne + + else: + error_txt = "error: pitch freq not available" + return empty_gne(video_uri, out_loc, fl_name, r_config, error_txt, save=save) + + +def run_gne(video_uri, out_dir, r_config, save=True, ff_df=None): """ - Processing all patient's for fetching Intensity - ------------------- - ------------------- + Processing all patient's for fetching glottal noise ratio + --------------- + --------------- Args: video_uri: video path; r_config: raw variable config object out_dir: (str) Output directory for processed output """ try: - + input_loc, out_loc, fl_name = ut.filter_path(video_uri, out_dir) - aud_filter = glob.glob(join(input_loc, fl_name + '.wav')) - if len(aud_filter)>0: + aud_filter = glob.glob(join(input_loc, fl_name + ".wav")) + if len(aud_filter) > 0: audio_file = aud_filter[0] - aud_dur = librosa.get_duration(filename=audio_file) + aud_dur = ut.get_length(audio_file) if float(aud_dur) < 0.064: - logger.info('Output file {} size is less than 0.064sec'.format(audio_file)) + logger.info( + "Output file {} size is less than 0.064sec".format(audio_file) + ) - empty_intensity(video_uri, out_loc, fl_name, r_config) - return - - calc_intensity(video_uri, audio_file, out_loc, fl_name, r_config) + error_txt = "error: length less than 0.064" + df = empty_gne( + video_uri, out_loc, fl_name, r_config, error_txt, save=save + ) + else: + df = calc_gne( + video_uri, + audio_file, + out_loc, + fl_name, + r_config, + save=save, + ff_df=ff_df, + ) + return df except Exception as e: - logger.error('Failed to process audio file') \ No newline at end of file + e + logger.error("Failed to process audio file") diff --git a/opendbm/dbm_lib/dbm_features/raw_features/audio/jitter.py b/opendbm/dbm_lib/dbm_features/raw_features/audio/jitter.py index b7e98419..c2cd08e6 100644 --- a/opendbm/dbm_lib/dbm_features/raw_features/audio/jitter.py +++ b/opendbm/dbm_lib/dbm_features/raw_features/audio/jitter.py @@ -4,25 +4,24 @@ project_name: DBM created: 2020-20-07 """ -import pandas as pd -import numpy as np -import os import glob -import parselmouth -import librosa -import numpy as np -import more_itertools as mit -from os.path import join import logging +import os +from os.path import join + +import numpy as np +import pandas as pd +import parselmouth from opendbm.dbm_lib.dbm_features.raw_features.util import util as ut logging.basicConfig(level=logging.INFO) -logger=logging.getLogger() +logger = logging.getLogger() + +jitter_dir = "acoustic/jitter" +ff_dir = "acoustic/pitch" +csv_ext = "_jitter.csv" -jitter_dir = 'acoustic/jitter' -ff_dir = 'acoustic/pitch' -csv_ext = '_jitter.csv' def audio_jitter(sound): """ @@ -32,61 +31,43 @@ def audio_jitter(sound): Returns: (list) list of jitters for each voice frame """ - pointProcess = parselmouth.praat.call(sound, "To PointProcess (periodic, cc)...", 80, 500) - jitter = parselmouth.praat.call(pointProcess, "Get jitter (local)", 0, 0, 0.0001, 0.02, 1.3) + pointProcess = parselmouth.praat.call( + sound, "To PointProcess (periodic, cc)...", 80, 500 + ) + jitter = parselmouth.praat.call( + pointProcess, "Get jitter (local)", 0, 0, 0.0001, 0.02, 1.3 + ) return jitter -def empty_jitter(video_uri, out_loc, fl_name, r_config, error_txt): + +def empty_jitter(video_uri, out_loc, fl_name, r_config, error_txt, save=True): """ Preparing empty jitter matrix if something fails """ - cols = ['Frames', r_config.aco_jitter, r_config.err_reason] + cols = ["Frames", r_config.aco_jitter, r_config.err_reason] out_val = [[np.nan, np.nan, error_txt]] - df_jitter = pd.DataFrame(out_val, columns = cols) - df_jitter['dbm_master_url'] = video_uri - - logger.info('Saving Output file {} '.format(out_loc)) - ut.save_output(df_jitter, out_loc, fl_name, jitter_dir, csv_ext) - -def segment_pitch(dir_path, r_config): - """ - segmenting pitch freq for each voice segment - """ - com_speech_sort, voiced_yes, voiced_no = ([], ) * 3 - for file in os.listdir(dir_path): - try: - - if file.endswith('_pitch.csv'): - - ff_df = pd.read_csv((dir_path+'/'+file)) - voice_label = ff_df[r_config.aco_voiceLabel] - - indices_yes = [i for i, x in enumerate(voice_label) if x == "yes"] - voiced_yes = [list(group) for group in mit.consecutive_groups(indices_yes)] - - indices_no = [i for i, x in enumerate(voice_label) if x == "no"] - voiced_no = [list(group) for group in mit.consecutive_groups(indices_no)] - - com_speech = voiced_yes + voiced_no - com_speech_sort = sorted(com_speech, key=lambda x: x[0]) - except: - pass - - return com_speech_sort, voiced_yes, voiced_no + df_jitter = pd.DataFrame(out_val, columns=cols) + df_jitter["dbm_master_url"] = video_uri + + if save: + logger.info("Saving Output file {} ".format(out_loc)) + ut.save_output(df_jitter, out_loc, fl_name, jitter_dir, csv_ext) + return df_jitter + def segment_jitter(com_speech_sort, voiced_yes, voiced_no, jitter_frames, audio_file): """ calculating jitter for each voice segment """ snd = parselmouth.Sound(audio_file) - pitch = snd.to_pitch(time_step=.001) - + pitch = snd.to_pitch(time_step=0.001) + for idx, vs in enumerate(com_speech_sort): try: - + jitter = np.NaN - if vs in voiced_yes and len(vs)>1: - + if vs in voiced_yes and len(vs) > 1: + start_time = pitch.get_time_from_frame_number(vs[0]) end_time = pitch.get_time_from_frame_number(vs[-1]) @@ -100,8 +81,11 @@ def segment_jitter(com_speech_sort, voiced_yes, voiced_no, jitter_frames, audio_ jitter_frames[idx] = jitter return jitter_frames - -def calc_jitter(video_uri, audio_file, out_loc, fl_name, r_config): + + +def calc_jitter( + video_uri, audio_file, out_loc, fl_name, r_config, save=True, ff_df=None +): """ Preparing jitter matrix Args: @@ -110,50 +94,73 @@ def calc_jitter(video_uri, audio_file, out_loc, fl_name, r_config): r_config: config.config_raw_feature.pyConfigFeatureNmReader object """ dir_path = os.path.join(out_loc, ff_dir) - if os.path.isdir(dir_path): - voice_seg = segment_pitch(dir_path, r_config) - + if os.path.isdir(dir_path) or ff_df is not None: + + if ff_df is not None: + voice_seg = ut.process_segment_pitch(ff_df, r_config) + else: + voice_seg = ut.segment_pitch(dir_path, r_config, ff_df=ff_df) + jitter_frames = [np.NaN] * len(voice_seg[0]) - jitter_segment_frames = segment_jitter(voice_seg[0], voice_seg[1], voice_seg[2], jitter_frames, audio_file) - + jitter_segment_frames = segment_jitter( + voice_seg[0], voice_seg[1], voice_seg[2], jitter_frames, audio_file + ) + df_jitter = pd.DataFrame(jitter_segment_frames, columns=[r_config.aco_jitter]) - df_jitter[r_config.err_reason] = 'Pass'# will replace with threshold in future release - - df_jitter['Frames'] = df_jitter.index - df_jitter['dbm_master_url'] = video_uri - - logger.info('Processing Output file {} '.format(out_loc)) - ut.save_output(df_jitter, out_loc, fl_name, jitter_dir, csv_ext) - + df_jitter[ + r_config.err_reason + ] = "Pass" # will replace with threshold in future release + + df_jitter["Frames"] = df_jitter.index + df_jitter["dbm_master_url"] = video_uri + if save: + logger.info("Processing Output file {} ".format(out_loc)) + ut.save_output(df_jitter, out_loc, fl_name, jitter_dir, csv_ext) + df = df_jitter else: - error_txt = 'error: fundamental freq not available' - empty_jitter(video_uri, out_loc, fl_name, r_config, error_txt) - -def run_jitter(video_uri, out_dir, r_config): + error_txt = "error: fundamental freq not available" + df = empty_jitter(video_uri, out_loc, fl_name, r_config, error_txt, save=save) + return df + + +def run_jitter(video_uri, out_dir, r_config, save=True, ff_df=None): """ Processing all patient's videos for fetching jitter ------------------- ------------------- Args: video_uri: video path; r_config: raw variable config object - out_dir: (str) Output directory for processed output + out_dir: (str) Output directory for processed output """ try: - + input_loc, out_loc, fl_name = ut.filter_path(video_uri, out_dir) - aud_filter = glob.glob(join(input_loc, fl_name + '.wav')) - if len(aud_filter)>0: + aud_filter = glob.glob(join(input_loc, fl_name + ".wav")) + if len(aud_filter) > 0: audio_file = aud_filter[0] - aud_dur = librosa.get_duration(filename=audio_file) + aud_dur = ut.get_length(audio_file) if float(aud_dur) < 0.064: - logger.info('Output file {} size is less than 0.064sec'.format(audio_file)) + logger.info( + "Output file {} size is less than 0.064sec".format(audio_file) + ) - error_txt = 'error: length less than 0.064' - empty_jitter(video_uri, out_loc, fl_name, r_config, error_txt) - return - - calc_jitter(video_uri, audio_file, out_loc, fl_name, r_config) + error_txt = "error: length less than 0.064" + df = empty_jitter( + video_uri, out_loc, fl_name, r_config, error_txt, save=save + ) + else: + df = calc_jitter( + video_uri, + audio_file, + out_loc, + fl_name, + r_config, + save=save, + ff_df=ff_df, + ) + return df except Exception as e: - logger.error('Failed to process audio file') \ No newline at end of file + logger.error("Error in jitter: {}".format(e)) + logger.error("Failed to process audio file") diff --git a/opendbm/dbm_lib/dbm_features/raw_features/audio/mfcc.py b/opendbm/dbm_lib/dbm_features/raw_features/audio/mfcc.py index d26a48e3..16047910 100644 --- a/opendbm/dbm_lib/dbm_features/raw_features/audio/mfcc.py +++ b/opendbm/dbm_lib/dbm_features/raw_features/audio/mfcc.py @@ -4,40 +4,73 @@ project_name: DBM created: 2020-20-07 """ -import pandas as pd -import os import glob -import parselmouth -import librosa -import numpy as np -import librosa -from os.path import join import logging +import os +from os.path import join + +import numpy as np +import pandas as pd +import parselmouth from opendbm.dbm_lib.dbm_features.raw_features.util import util as ut logging.basicConfig(level=logging.INFO) -logger=logging.getLogger() +logger = logging.getLogger() -mfcc_dir = 'acoustic/mfcc' -csv_ext = '_mfcc.csv' -error_txt = 'error: length less than 0.064' +mfcc_dir = "acoustic/mfcc" +csv_ext = "_mfcc.csv" +error_txt = "error: length less than 0.064" + + +def empty_mfcc(video_uri, out_loc, fl_name, r_config, save=True): -def empty_mfcc(video_uri, out_loc, fl_name, r_config): - """ Preparing empty empty_mfcc matrix if something fails """ - cols = ['Frames', r_config.aco_mfcc1, r_config.aco_mfcc2, r_config.aco_mfcc3, r_config.aco_mfcc4, r_config.aco_mfcc5, - r_config.aco_mfcc6, r_config.aco_mfcc7, r_config.aco_mfcc8, r_config.aco_mfcc9, r_config.aco_mfcc10, - r_config.aco_mfcc11, r_config.aco_mfcc12, r_config.err_reason] - out_val = [[np.nan, np.nan, np.nan, np.nan, np.nan, np.nan, np.nan, np.nan, np.nan, np.nan, np.nan, np.nan, np.nan, - error_txt]] - df_mfcc = pd.DataFrame(out_val, columns = cols) - df_mfcc['dbm_master_url'] = video_uri - - logger.info('Saving Output file {} '.format(out_loc)) - ut.save_output(df_mfcc, out_loc, fl_name, mfcc_dir, csv_ext) + cols = [ + "Frames", + r_config.aco_mfcc1, + r_config.aco_mfcc2, + r_config.aco_mfcc3, + r_config.aco_mfcc4, + r_config.aco_mfcc5, + r_config.aco_mfcc6, + r_config.aco_mfcc7, + r_config.aco_mfcc8, + r_config.aco_mfcc9, + r_config.aco_mfcc10, + r_config.aco_mfcc11, + r_config.aco_mfcc12, + r_config.err_reason, + ] + out_val = [ + [ + np.nan, + np.nan, + np.nan, + np.nan, + np.nan, + np.nan, + np.nan, + np.nan, + np.nan, + np.nan, + np.nan, + np.nan, + np.nan, + error_txt, + ] + ] + df_mfcc = pd.DataFrame(out_val, columns=cols) + df_mfcc["dbm_master_url"] = video_uri + + if save: + logger.info("Saving Output file {} ".format(out_loc)) + ut.save_output(df_mfcc, out_loc, fl_name, mfcc_dir, csv_ext) + + return df_mfcc + def audio_mfcc(path): """ @@ -48,12 +81,13 @@ def audio_mfcc(path): (list) list of mfccs for each voice frame """ sound = parselmouth.Sound(path) - mfcc_object = sound.to_mfcc(time_step=.001,number_of_coefficients=12) + mfcc_object = sound.to_mfcc(time_step=0.001, number_of_coefficients=12) mfccs = mfcc_object.to_array() mfccs = np.delete(mfccs, (0), axis=0) return mfccs - -def calc_mfcc(video_uri, audio_file, out_loc, fl_name, r_config): + + +def calc_mfcc(video_uri, audio_file, out_loc, fl_name, r_config, save=True): """ Preparing mfcc matrix Args: @@ -64,42 +98,50 @@ def calc_mfcc(video_uri, audio_file, out_loc, fl_name, r_config): """ dict_ = {} mfccs = audio_mfcc(audio_file) - - for i in range(1,13): - conf_str = r_config.base_raw['raw_feature'] - dict_[conf_str['aco_mfcc' + str(i)]] = mfccs[i-1, :] - + + for i in range(1, 13): + conf_str = r_config.base_raw["raw_feature"] + dict_[conf_str["aco_mfcc" + str(i)]] = mfccs[i - 1, :] + df = pd.DataFrame(dict_) - df['Frames'] = df.index - - df[r_config.err_reason] = 'Pass'# may replace based on threshold in future release - df['dbm_master_url'] = video_uri - - ut.save_output(df, out_loc, fl_name, mfcc_dir, csv_ext) - -def run_mfcc(video_uri, out_dir, r_config): + df["Frames"] = df.index + + df[r_config.err_reason] = "Pass" # may replace based on threshold in future release + df["dbm_master_url"] = video_uri + + if save: + logger.info("Saving Output file {} ".format(out_loc)) + ut.save_output(df, out_loc, fl_name, mfcc_dir, csv_ext) + return df + + +def run_mfcc(video_uri, out_dir, r_config, save=True): """ Processing all patients to fetch mfccs - + Args: video_uri: video path; r_config: raw variable config object - out_dir: (str) Output directory for processed output + out_dir: (str) Output directory for processed output """ try: - + input_loc, out_loc, fl_name = ut.filter_path(video_uri, out_dir) - aud_filter = glob.glob(join(input_loc, fl_name + '.wav')) - if len(aud_filter)>0: + aud_filter = glob.glob(join(input_loc, fl_name + ".wav")) + if len(aud_filter) > 0: audio_file = aud_filter[0] - aud_dur = librosa.get_duration(filename=audio_file) + aud_dur = ut.get_length(audio_file) if float(aud_dur) < 0.064: - logger.info('Output file {} size is less than 0.064sec'.format(audio_file)) + logger.info( + "Output file {} size is less than 0.064sec".format(audio_file) + ) - empty_mfcc(video_uri, out_loc, fl_name, r_config) - return + return empty_mfcc(video_uri, out_loc, fl_name, r_config, save=save) - calc_mfcc(video_uri, audio_file, out_loc, fl_name, r_config) + return calc_mfcc( + video_uri, audio_file, out_loc, fl_name, r_config, save=save + ) except Exception as e: - logger.error('Failed to process audio file') \ No newline at end of file + e + logger.error("Failed to process audio file") diff --git a/opendbm/dbm_lib/dbm_features/raw_features/audio/pause_segment.py b/opendbm/dbm_lib/dbm_features/raw_features/audio/pause_segment.py index 72928883..1ce1e048 100644 --- a/opendbm/dbm_lib/dbm_features/raw_features/audio/pause_segment.py +++ b/opendbm/dbm_lib/dbm_features/raw_features/audio/pause_segment.py @@ -4,23 +4,25 @@ project_name: DBM created: 2020-20-07 """ -import os import glob -from pydub import AudioSegment -import librosa -import pandas as pd -import numpy as np -import webrtcvad -from os.path import join import logging +import os +from os.path import join -from opendbm.dbm_lib.dbm_features.raw_features.util import vad_utilities as vu, util as ut +import numpy as np +import pandas as pd +import webrtcvad +from pydub import AudioSegment + +from opendbm.dbm_lib.dbm_features.raw_features.util import util as ut +from opendbm.dbm_lib.dbm_features.raw_features.util import vad_utilities as vu logging.basicConfig(level=logging.INFO) -logger=logging.getLogger() +logger = logging.getLogger() + +pause_seg_dir = "acoustic/pause_segment" +csv_ext = "_pausechar.csv" -pause_seg_dir = 'acoustic/pause_segment' -csv_ext = '_pausechar.csv' def get_timing_cues(seg_starts_sec, seg_ends_sec, r_config): """ @@ -35,25 +37,27 @@ def get_timing_cues(seg_starts_sec, seg_ends_sec, r_config): speaking_time = np.sum(np.asarray(seg_ends_sec) - np.asarray(seg_starts_sec)) num_pauses = len(seg_starts_sec) - 1 pause_len = np.zeros(num_pauses) - + for p in range(num_pauses): - pause_len[p] = seg_starts_sec[p+1] - seg_ends_sec[p] - - if len(pause_len)>0: - pause_len_mean = np.mean(pause_len) - pause_len_std = np.std(pause_len) + pause_len[p] = seg_starts_sec[p + 1] - seg_ends_sec[p] + + if len(pause_len) > 0: pause_time = np.sum(pause_len) - + else: - pause_len_mean = 0 - pause_len_std = 0 pause_time = 0 - + pause_frac = pause_time / total_time - timing_dict = {r_config.aco_totaltime: total_time, r_config.aco_speakingtime: speaking_time, - r_config.aco_numpauses: num_pauses, r_config.aco_pausetime: pause_time, r_config.aco_pausefrac: pause_frac} + timing_dict = { + r_config.aco_totaltime: total_time, + r_config.aco_speakingtime: speaking_time, + r_config.aco_numpauses: num_pauses, + r_config.aco_pausetime: pause_time, + r_config.aco_pausefrac: pause_frac, + } return timing_dict + def process_silence(audio_file, r_config): """ Returns dataframe for pause between words using voice activity detection @@ -64,64 +68,80 @@ def process_silence(audio_file, r_config): """ feat_dict_list = [] y, sr = vu.read_wave(audio_file) - + # 3 is most aggressive (splits most), 0 least (better for low snr) aggressiveness = 3 frame_dur_ms = 20 - - #pause segment(long & short pad) + + # pause segment(long & short pad) long_pad_around_voice_ms = 200 short_pad_around_voice_ms = 100 - - if len(y)>0: + + if len(y) > 0: vad = webrtcvad.Vad(aggressiveness) - + frames = vu.frame_generator(frame_dur_ms, y, sr) frames = list(frames) - - #longer pad time screens out little blips, but misses short silences - long_seg_starts, long_seg_ends = vu.vad_get_segment_times(sr, frame_dur_ms, long_pad_around_voice_ms, vad, frames) - - #Logic to handle blank audio file + + # longer pad time screens out little blips, but misses short silences + long_seg_starts, long_seg_ends = vu.vad_get_segment_times( + sr, frame_dur_ms, long_pad_around_voice_ms, vad, frames + ) + + # Logic to handle blank audio file if len(long_seg_starts) == 0 or len(long_seg_ends) == 0: - return '' - + return "" + t_start = long_seg_starts[0] t_end = long_seg_ends[-1] # shorter pad time captures short silences (but misfires on little blips) - short_seg_starts, short_seg_ends = vu.vad_get_segment_times(sr, frame_dur_ms, short_pad_around_voice_ms, vad, frames) - + short_seg_starts, short_seg_ends = vu.vad_get_segment_times( + sr, frame_dur_ms, short_pad_around_voice_ms, vad, frames + ) + seg_starts = [] seg_ends = [] - for k in range(len(short_seg_starts)): # logic to clean up some typical misfires - if (short_seg_starts[k] >=t_start) and (short_seg_starts[k] <= t_end): - + for k in range( + len(short_seg_starts) + ): # logic to clean up some typical misfires + if (short_seg_starts[k] >= t_start) and (short_seg_starts[k] <= t_end): + seg_starts.append(short_seg_starts[k]) seg_ends.append(short_seg_ends[k]) if len(seg_starts) == 0 or len(seg_ends) == 0: - return '' - + return "" + timing_dict = get_timing_cues(seg_starts, seg_ends, r_config) feat_dict_list.append(timing_dict) - + df = pd.DataFrame(feat_dict_list) - df[r_config.err_reason] = 'Pass'# will replace with threshold in future release + df[r_config.err_reason] = "Pass" # will replace with threshold in future release return df -def empty_pause_segment(video_uri, out_loc, fl_name, r_config, error_txt): + +def empty_pause_segment(video_uri, out_loc, fl_name, r_config, error_txt, save=True): """ Preparing empty Pause Segment matrix if something fails """ - cols = [r_config.aco_totaltime, r_config.aco_speakingtime, r_config.aco_numpauses, r_config.aco_pausetime, - r_config.aco_pausefrac, r_config.err_reason] + cols = [ + r_config.aco_totaltime, + r_config.aco_speakingtime, + r_config.aco_numpauses, + r_config.aco_pausetime, + r_config.aco_pausefrac, + r_config.err_reason, + ] out_val = [[np.nan, np.nan, np.nan, np.nan, np.nan, error_txt]] - df_pause = pd.DataFrame(out_val, columns = cols) - df_pause['dbm_master_url'] = video_uri - - logger.info('Saving Output file {} '.format(out_loc)) - ut.save_output(df_pause, out_loc, fl_name, pause_seg_dir, csv_ext) + df_pause = pd.DataFrame(out_val, columns=cols) + df_pause["dbm_master_url"] = video_uri -def run_pause_segment(video_uri, out_dir, r_config): + if save: + logger.info("Saving Output file {} ".format(out_loc)) + ut.save_output(df_pause, out_loc, fl_name, pause_seg_dir, csv_ext) + return df_pause + + +def run_pause_segment(video_uri, out_dir, r_config, save=True): """ Processing all patient's for getting Pause Segment --------------- @@ -131,41 +151,50 @@ def run_pause_segment(video_uri, out_dir, r_config): out_dir: (str) Output directory for processed output """ try: - + input_loc, out_loc, fl_name = ut.filter_path(video_uri, out_dir) - aud_filter = glob.glob(join(input_loc, fl_name + '.wav')) - if len(aud_filter)>0: + aud_filter = glob.glob(join(input_loc, fl_name + ".wav")) + if len(aud_filter) > 0: audio_file = aud_filter[0] - aud_dur = librosa.get_duration(filename=audio_file) + aud_dur = ut.get_length(audio_file) if float(aud_dur) < 0.064: - logger.info('Output file {} size is less than 0.064sec'.format(audio_file)) + logger.info( + "Output file {} size is less than 0.064sec".format(audio_file) + ) - error_txt = 'error: length less than 0.064' + error_txt = "error: length less than 0.064" empty_pause_segment(video_uri, out_loc, fl_name, r_config, error_txt) return - logger.info('Converting stereo sound to mono-lD') + logger.info("Converting stereo sound to mono-lD") sound_mono = AudioSegment.from_wav(audio_file) sound_mono = sound_mono.set_channels(1) sound_mono = sound_mono.set_frame_rate(48000) - mono_wav = os.path.join(input_loc, fl_name + '_mono.wav') + mono_wav = os.path.join(input_loc, fl_name + "_mono.wav") sound_mono.export(mono_wav, format="wav") df_pause_seg = process_silence(mono_wav, r_config) - os.remove(mono_wav)#removing mono wav file + os.remove(mono_wav) # removing mono wav file - if isinstance(df_pause_seg, pd.DataFrame) and len(df_pause_seg)>0: - logger.info('Processing Output file {} '.format(out_loc)) - - df_pause_seg['dbm_master_url'] = video_uri - ut.save_output(df_pause_seg, out_loc, fl_name, pause_seg_dir, csv_ext) + if isinstance(df_pause_seg, pd.DataFrame) and len(df_pause_seg) > 0: + df_pause_seg["dbm_master_url"] = video_uri + if save: + logger.info("Processing Output file {} ".format(out_loc)) + ut.save_output( + df_pause_seg, out_loc, fl_name, pause_seg_dir, csv_ext + ) + df = df_pause_seg else: - error_txt = 'error: webrtcvad returns no segment' - empty_pause_segment(video_uri, out_loc, fl_name, r_config, error_txt) - + error_txt = "error: webrtcvad returns no segment" + df = empty_pause_segment( + video_uri, out_loc, fl_name, r_config, error_txt, save=save + ) + return df + except Exception as e: - logger.error('Failed to process audio file') \ No newline at end of file + e + logger.error("Failed to process audio file", str(e)) diff --git a/opendbm/dbm_lib/dbm_features/raw_features/audio/pitch_freq.py b/opendbm/dbm_lib/dbm_features/raw_features/audio/pitch_freq.py index ffd017c6..3b4a23ad 100644 --- a/opendbm/dbm_lib/dbm_features/raw_features/audio/pitch_freq.py +++ b/opendbm/dbm_lib/dbm_features/raw_features/audio/pitch_freq.py @@ -4,23 +4,24 @@ project_name: DBM created: 2020-20-07 """ -import pandas as pd -import os import glob -import parselmouth -import librosa -import numpy as np -from os.path import join import logging +import os +from os.path import join + +import numpy as np +import pandas as pd +import parselmouth from opendbm.dbm_lib.dbm_features.raw_features.util import util as ut logging.basicConfig(level=logging.INFO) -logger=logging.getLogger() +logger = logging.getLogger() + +ff_dir = "acoustic/pitch" +csv_ext = "_pitch.csv" +error_txt = "error: length less than 0.064" -ff_dir = 'acoustic/pitch' -csv_ext = '_pitch.csv' -error_txt = 'error: length less than 0.064' def audio_pitch(path): """ @@ -31,12 +32,13 @@ def audio_pitch(path): (list) list of pitch/fundamental frequency for each voice frame """ sound_pat = parselmouth.Sound(path) - pitch = sound_pat.to_pitch(time_step=.001) - pitch_values = pitch.selected_array['frequency'] - + pitch = sound_pat.to_pitch(time_step=0.001) + pitch_values = pitch.selected_array["frequency"] + return list(pitch_values) -def label_speech(row,fd_freq): + +def label_speech(row, fd_freq): """ identify whether frame is voiced or not Args: @@ -44,13 +46,14 @@ def label_speech(row,fd_freq): Returns: (str) yes or no indicator for voice """ - if row[fd_freq] > 0 : - return 'yes' + if row[fd_freq] > 0: + return "yes" else: - return 'no' + return "no" + + +def calc_pitch(video_uri, audio_file, out_loc, fl_name, r_config, save=True): -def calc_pitch(video_uri, audio_file, out_loc, fl_name, r_config): - """ Preparing pitch frequency matrix Args: @@ -61,30 +64,47 @@ def calc_pitch(video_uri, audio_file, out_loc, fl_name, r_config): ff_frames = audio_pitch(audio_file) df_ffreq = pd.DataFrame(ff_frames, columns=[r_config.aco_ff]) - - df_ffreq['Frames'] = df_ffreq.index - df_ffreq[r_config.aco_voiceLabel] = df_ffreq.apply(lambda row: label_speech(row, r_config.aco_ff),axis=1) - - df_ffreq[r_config.err_reason] = 'Pass'# will replace with threshold in future release - df_ffreq['dbm_master_url'] = video_uri - - logger.info('Processing Output file {} '.format(out_loc)) - ut.save_output(df_ffreq, out_loc, fl_name, ff_dir, csv_ext) - -def empty_pitch(video_uri, out_loc, fl_name, r_config): + + df_ffreq["Frames"] = df_ffreq.index + df_ffreq[r_config.aco_voiceLabel] = df_ffreq.apply( + lambda row: label_speech(row, r_config.aco_ff), axis=1 + ) + + df_ffreq[ + r_config.err_reason + ] = "Pass" # will replace with threshold in future release + df_ffreq["dbm_master_url"] = video_uri + + if save: + logger.info("Processing Output file {} ".format(out_loc)) + ut.save_output(df_ffreq, out_loc, fl_name, ff_dir, csv_ext) + return df_ffreq + + +def empty_pitch(video_uri, out_loc, fl_name, r_config, save=True): """ Preparing empty pitch frequency matrix if something fails """ - - df_ffreq = pd.DataFrame([[np.nan, np.nan, 'no', error_txt]], - columns=['Frames', r_config.aco_ff, r_config.aco_voiceLabel, r_config.err_reason]) - df_ffreq['dbm_master_url'] = video_uri - - logger.info('Saving Output file {} '.format(out_loc)) - ut.save_output(df_ffreq, out_loc, fl_name, ff_dir, csv_ext) -def run_pitch(video_uri, out_dir, r_config): - + df_ffreq = pd.DataFrame( + [[np.nan, np.nan, "no", error_txt]], + columns=[ + "Frames", + r_config.aco_ff, + r_config.aco_voiceLabel, + r_config.err_reason, + ], + ) + df_ffreq["dbm_master_url"] = video_uri + + if save: + logger.info("Saving Output file {} ".format(out_loc)) + ut.save_output(df_ffreq, out_loc, fl_name, ff_dir, csv_ext) + return df_ffreq + + +def run_pitch(video_uri, out_dir, r_config, save=True): + """ Processing audio for fetching pitch ------------------- @@ -94,20 +114,26 @@ def run_pitch(video_uri, out_dir, r_config): out_dir: (str) Output directory for processed output """ try: - + input_loc, out_loc, fl_name = ut.filter_path(video_uri, out_dir) - aud_filter = glob.glob(join(input_loc, fl_name + '.wav')) - if len(aud_filter)>0: + aud_filter = glob.glob(join(input_loc, fl_name + ".wav")) + if len(aud_filter) > 0: audio_file = aud_filter[0] - aud_dur = librosa.get_duration(filename=audio_file) + aud_dur = ut.get_length(audio_file) if float(aud_dur) < 0.064: - logger.info('Output file {} size is less than 0.064sec'.format(audio_file)) + logger.info( + "Output file {} size is less than 0.064sec".format(audio_file) + ) - empty_pitch(video_uri, out_loc, fl_name, r_config) - return + df = empty_pitch(video_uri, out_loc, fl_name, r_config, save=save) + else: + df = calc_pitch( + video_uri, audio_file, out_loc, fl_name, r_config, save=save + ) + return df - calc_pitch(video_uri, audio_file, out_loc, fl_name, r_config) except Exception as e: - logger.error('Failed to process audio file') \ No newline at end of file + e + logger.error("Failed to process audio file") diff --git a/opendbm/dbm_lib/dbm_features/raw_features/audio/shimmer.py b/opendbm/dbm_lib/dbm_features/raw_features/audio/shimmer.py index ba6053be..d0d74b98 100644 --- a/opendbm/dbm_lib/dbm_features/raw_features/audio/shimmer.py +++ b/opendbm/dbm_lib/dbm_features/raw_features/audio/shimmer.py @@ -4,26 +4,25 @@ project_name: DBM created: 2020-20-07 """ -import pandas as pd -import numpy as np -import os import glob -import parselmouth -import librosa -import numpy as np -import more_itertools as mit +import logging +import os from os.path import join -import logging +import more_itertools as mit +import numpy as np +import pandas as pd +import parselmouth from opendbm.dbm_lib.dbm_features.raw_features.util import util as ut logging.basicConfig(level=logging.INFO) -logger=logging.getLogger() +logger = logging.getLogger() + +shimmer_dir = "acoustic/shimmer" +ff_dir = "acoustic/pitch" +csv_ext = "_shimmer.csv" -shimmer_dir = 'acoustic/shimmer' -ff_dir = 'acoustic/pitch' -csv_ext = '_shimmer.csv' def audio_shimmer(sound): """ @@ -33,61 +32,43 @@ def audio_shimmer(sound): Returns: (list) list of shimmers for each voice frame """ - pointProcess = parselmouth.praat.call(sound, "To PointProcess (periodic, cc)...", 80, 500) - shimmer = parselmouth.praat.call([sound, pointProcess], "Get shimmer (local)", 0, 0, 0.0001, 0.02, 1.3, 1.6) + pointProcess = parselmouth.praat.call( + sound, "To PointProcess (periodic, cc)...", 80, 500 + ) + shimmer = parselmouth.praat.call( + [sound, pointProcess], "Get shimmer (local)", 0, 0, 0.0001, 0.02, 1.3, 1.6 + ) return shimmer -def empty_shimmer(video_uri, out_loc, fl_name, r_config, error_txt): + +def empty_shimmer(video_uri, out_loc, fl_name, r_config, error_txt, save=True): """ Preparing empty shimmer matrix if something fails """ - cols = ['Frames', r_config.aco_shimmer, r_config.err_reason] + cols = ["Frames", r_config.aco_shimmer, r_config.err_reason] out_val = [[np.nan, np.nan, error_txt]] - df_shimmer = pd.DataFrame(out_val, columns = cols) - df_shimmer['dbm_master_url'] = video_uri - - logger.info('Saving Output file {} '.format(out_loc)) - ut.save_output(df_shimmer, out_loc, fl_name, shimmer_dir, csv_ext) - -def segment_pitch(dir_path, r_config): - """ - segmenting pitch freq for each voice segment - """ - com_speech_sort, voiced_yes, voiced_no = ([], ) * 3 - for file in os.listdir(dir_path): - try: - - if file.endswith('_pitch.csv'): - - ff_df = pd.read_csv((dir_path+'/'+file)) - voice_label = ff_df[r_config.aco_voiceLabel] - - indices_yes = [i for i, x in enumerate(voice_label) if x == "yes"] - voiced_yes = [list(group) for group in mit.consecutive_groups(indices_yes)] - - indices_no = [i for i, x in enumerate(voice_label) if x == "no"] - voiced_no = [list(group) for group in mit.consecutive_groups(indices_no)] - - com_speech = voiced_yes + voiced_no - com_speech_sort = sorted(com_speech, key=lambda x: x[0]) - except: - pass - - return com_speech_sort, voiced_yes, voiced_no + df_shimmer = pd.DataFrame(out_val, columns=cols) + df_shimmer["dbm_master_url"] = video_uri + + if save: + logger.info("Saving Output file {} ".format(out_loc)) + ut.save_output(df_shimmer, out_loc, fl_name, shimmer_dir, csv_ext) + return df_shimmer + def segment_shimmer(com_speech_sort, voiced_yes, voiced_no, shimmer_frames, audio_file): """ calculating shimmer for each voice segment """ snd = parselmouth.Sound(audio_file) - pitch = snd.to_pitch(time_step=.001) - + pitch = snd.to_pitch(time_step=0.001) + for idx, vs in enumerate(com_speech_sort): try: - + shimmer = np.NaN - if vs in voiced_yes and len(vs)>1: - + if vs in voiced_yes and len(vs) > 1: + start_time = pitch.get_time_from_frame_number(vs[0]) end_time = pitch.get_time_from_frame_number(vs[-1]) @@ -101,8 +82,11 @@ def segment_shimmer(com_speech_sort, voiced_yes, voiced_no, shimmer_frames, audi shimmer_frames[idx] = shimmer return shimmer_frames - -def calc_shimmer(video_uri, audio_file, out_loc, fl_name, r_config): + + +def calc_shimmer( + video_uri, audio_file, out_loc, fl_name, r_config, save=True, ff_df=None +): """ Preparing shimmer matrix Args: @@ -111,50 +95,72 @@ def calc_shimmer(video_uri, audio_file, out_loc, fl_name, r_config): r_config: config.config_raw_feature.pyConfigFeatureNmReader object """ dir_path = os.path.join(out_loc, ff_dir) - if os.path.isdir(dir_path): - voice_seg = segment_pitch(dir_path, r_config) - + if os.path.isdir(dir_path) or ff_df is not None: + if ff_df is not None: + voice_seg = ut.process_segment_pitch(ff_df, r_config) + else: + voice_seg = ut.segment_pitch(dir_path, r_config, ff_df=ff_df) + shimmer_frames = [np.NaN] * len(voice_seg[0]) - shimmer_segment_frames = segment_shimmer(voice_seg[0], voice_seg[1], voice_seg[2], shimmer_frames, audio_file) - - df_shimmer = pd.DataFrame(shimmer_segment_frames, columns=[r_config.aco_shimmer]) - df_shimmer[r_config.err_reason] = 'Pass'# will replace with threshold in future release - - df_shimmer['Frames'] = df_shimmer.index - df_shimmer['dbm_master_url'] = video_uri - - logger.info('Processing Output file {} '.format(out_loc)) - ut.save_output(df_shimmer, out_loc, fl_name, shimmer_dir, csv_ext) - + shimmer_segment_frames = segment_shimmer( + voice_seg[0], voice_seg[1], voice_seg[2], shimmer_frames, audio_file + ) + + df_shimmer = pd.DataFrame( + shimmer_segment_frames, columns=[r_config.aco_shimmer] + ) + df_shimmer[ + r_config.err_reason + ] = "Pass" # will replace with threshold in future release + + df_shimmer["Frames"] = df_shimmer.index + df_shimmer["dbm_master_url"] = video_uri + if save: + logger.info("Processing Output file {} ".format(out_loc)) + ut.save_output(df_shimmer, out_loc, fl_name, shimmer_dir, csv_ext) + df = df_shimmer else: - error_txt = 'error: fundamental freq not available' - empty_shimmer(video_uri, out_loc, fl_name, r_config, error_txt) - -def run_shimmer(video_uri, out_dir, r_config): + error_txt = "error: fundamental freq not available" + df = empty_shimmer(video_uri, out_loc, fl_name, r_config, error_txt, save=save) + return df + + +def run_shimmer(video_uri, out_dir, r_config, save=True, ff_df=None): """ Processing all patients to fetch shimmer --------------- --------------- Args: video_uri: video path; r_config: raw variable config object - out_dir: (str) Output directory for processed output + out_dir: (str) Output directory for processed output """ - try: - - input_loc, out_loc, fl_name = ut.filter_path(video_uri, out_dir) - aud_filter = glob.glob(join(input_loc, fl_name + '.wav')) - if len(aud_filter)>0: + # try: - audio_file = aud_filter[0] - aud_dur = librosa.get_duration(filename=audio_file) + input_loc, out_loc, fl_name = ut.filter_path(video_uri, out_dir) + aud_filter = glob.glob(join(input_loc, fl_name + ".wav")) + if len(aud_filter) > 0: - if float(aud_dur) < 0.064: - logger.info('Output file {} size is less than 0.064sec'.format(audio_file)) + audio_file = aud_filter[0] + aud_dur = ut.get_length(audio_file) - error_txt = 'error: length less than 0.064' - empty_shimmer(video_uri, out_loc, fl_name, r_config, error_txt) - return + if float(aud_dur) < 0.064: + logger.info("Output file {} size is less than 0.064sec".format(audio_file)) - calc_shimmer(video_uri, audio_file, out_loc, fl_name, r_config) - except Exception as e: - logger.error('Failed to process audio file') \ No newline at end of file + error_txt = "error: length less than 0.064" + df = empty_shimmer( + video_uri, out_loc, fl_name, r_config, error_txt, save=save + ) + else: + df = calc_shimmer( + video_uri, + audio_file, + out_loc, + fl_name, + r_config, + save=save, + ff_df=ff_df, + ) + return df + # except Exception as e: + # logger.error('Error in shimmer: {}'.format(e)) + # logger.error('Failed to process audio file') diff --git a/opendbm/dbm_lib/dbm_features/raw_features/audio/voice_frame_score.py b/opendbm/dbm_lib/dbm_features/raw_features/audio/voice_frame_score.py index 3cf523e5..5126a156 100644 --- a/opendbm/dbm_lib/dbm_features/raw_features/audio/voice_frame_score.py +++ b/opendbm/dbm_lib/dbm_features/raw_features/audio/voice_frame_score.py @@ -4,85 +4,100 @@ project_name: DBM created: 2020-20-07 """ -import parselmouth -import pandas as pd -import numpy as np import glob -import librosa -from os.path import join import logging +from os.path import join + +import numpy as np +import pandas as pd +import parselmouth from opendbm.dbm_lib.dbm_features.raw_features.util import util as ut logging.basicConfig(level=logging.INFO) -logger=logging.getLogger() +logger = logging.getLogger() + +vfs_dir = "acoustic/voice_frame_score" +csv_ext = "_voiceprev.csv" +error_txt = "error: length less than 0.064" -vfs_dir = 'acoustic/voice_frame_score' -csv_ext = '_voiceprev.csv' -error_txt = 'error: length less than 0.064' def audio_pitch_frame(pitch): """ - Computing total number of speech and participant voiced frames - Args: - pitch: speech pitch - Returns: - (float) total voice frames and participant voiced frames + Computing total number of speech and participant voiced frames + Args: + pitch: speech pitch + Returns: + (float) total voice frames and participant voiced frames """ total_frames = pitch.get_number_of_frames() voiced_frames = pitch.count_voiced_frames() return total_frames, voiced_frames + def voice_segment(path): """ - Using parselmouth library for fundamental frequency - Args: - path: (.wav) audio file location - Returns: - (float) total voice frames, participant voiced frames and voiced frames percentage + Using parselmouth library for fundamental frequency + Args: + path: (.wav) audio file location + Returns: + (float) total voice frames, participant voiced frames and voiced frames percentage """ sound_pat = parselmouth.Sound(path) pitch = sound_pat.to_pitch() - total_frames,voiced_frames = audio_pitch_frame(pitch) - - voiced_percentage = (voiced_frames/total_frames)*100 + total_frames, voiced_frames = audio_pitch_frame(pitch) + + voiced_percentage = (voiced_frames / total_frames) * 100 return voiced_percentage, voiced_frames, total_frames -def calc_vfs(video_uri, audio_file, out_loc, fl_name, r_config): + +def calc_vfs(video_uri, audio_file, out_loc, fl_name, r_config, save=True): """ - creating dataframe matrix for voice frame score - Args: - audio_file: Audio file path - new_out_base_dir: AWS instance output base directory path - f_nm_config: Config file object + creating dataframe matrix for voice frame score + Args: + audio_file: Audio file path + new_out_base_dir: AWS instance output base directory path + f_nm_config: Config file object """ - voice_percentage,voiced_frames, total_frames = voice_segment(audio_file) + voice_percentage, voiced_frames, total_frames = voice_segment(audio_file) df_vfs = pd.DataFrame([voiced_frames], columns=[r_config.aco_voiceFrame]) - + df_vfs[r_config.aco_totVoiceFrame] = [total_frames] df_vfs[r_config.aco_voicePct] = [voice_percentage] - df_vfs[r_config.err_reason] = 'Pass'# will replace with threshold in future release - - df_vfs['Frames'] = df_vfs.index - df_vfs['dbm_master_url'] = video_uri - - logger.info('Saving Output file {} '.format(out_loc)) - ut.save_output(df_vfs, out_loc, fl_name, vfs_dir, csv_ext) - -def empty_vfs(video_uri, out_loc, fl_name, r_config): + df_vfs[ + r_config.err_reason + ] = "Pass" # will replace with threshold in future release + + df_vfs["Frames"] = df_vfs.index + df_vfs["dbm_master_url"] = video_uri + if save: + logger.info("Saving Output file {} ".format(out_loc)) + ut.save_output(df_vfs, out_loc, fl_name, vfs_dir, csv_ext) + return df_vfs + + +def empty_vfs(video_uri, out_loc, fl_name, r_config, save=True): """ Preparing empty VFS matrix if something fails """ - cols = ['Frames', r_config.aco_voiceFrame, r_config.aco_totVoiceFrame, r_config.aco_voicePct, r_config.err_reason] + cols = [ + "Frames", + r_config.aco_voiceFrame, + r_config.aco_totVoiceFrame, + r_config.aco_voicePct, + r_config.err_reason, + ] out_val = [[np.nan, np.nan, np.nan, np.nan, error_txt]] - df_vfs = pd.DataFrame(out_val, columns = cols) - df_vfs['dbm_master_url'] = video_uri - - logger.info('Saving Output file {} '.format(out_loc)) - ut.save_output(df_vfs, out_loc, fl_name, vfs_dir, csv_ext) + df_vfs = pd.DataFrame(out_val, columns=cols) + df_vfs["dbm_master_url"] = video_uri + if save: + logger.info("Saving Output file {} ".format(out_loc)) + ut.save_output(df_vfs, out_loc, fl_name, vfs_dir, csv_ext) + return df_vfs -def run_vfs(video_uri, out_dir, r_config): + +def run_vfs(video_uri, out_dir, r_config, save=True): """ Processing all participants for fetching voice frame score --------------- @@ -92,20 +107,25 @@ def run_vfs(video_uri, out_dir, r_config): out_dir: (str) Output directory for processed output """ try: - + input_loc, out_loc, fl_name = ut.filter_path(video_uri, out_dir) - aud_filter = glob.glob(join(input_loc, fl_name + '.wav')) - if len(aud_filter)>0: + aud_filter = glob.glob(join(input_loc, fl_name + ".wav")) + if len(aud_filter) > 0: audio_file = aud_filter[0] - aud_dur = librosa.get_duration(filename=audio_file) + aud_dur = ut.get_length(audio_file) if float(aud_dur) < 0.064: - logger.info('Output file {} size is less than 0.064sec'.format(audio_file)) + logger.info( + "Output file {} size is less than 0.064sec".format(audio_file) + ) - empty_vfs(video_uri, out_loc, fl_name, r_config) - return - - calc_vfs(video_uri, audio_file, out_loc, fl_name, r_config) + df = empty_vfs(video_uri, out_loc, fl_name, r_config, save=save) + else: + df = calc_vfs( + video_uri, audio_file, out_loc, fl_name, r_config, save=save + ) + return df except Exception as e: - logger.error('Failed to process audio file') \ No newline at end of file + e + logger.error("Failed to process audio file")