code refactoring only

This commit is contained in:
jordi.hasianta
2022-09-15 20:30:49 +07:00
parent c6cd54c376
commit 42455a1a2b
10 changed files with 954 additions and 653 deletions

View File

@@ -4,25 +4,25 @@ project_name: DBM
created: 2020-20-07 created: 2020-20-07
""" """
import glob
import logging
from os.path import join
import numpy as np
import pandas as pd import pandas as pd
import parselmouth import parselmouth
import numpy as np
import parselmouth
import librosa
import glob
from os.path import join
import logging
from opendbm.dbm_lib.dbm_features.raw_features.util import util as ut from opendbm.dbm_lib.dbm_features.raw_features.util import util as ut
logging.basicConfig(level=logging.INFO) logging.basicConfig(level=logging.INFO)
logger=logging.getLogger() logger = logging.getLogger()
formant_dir = 'acoustic/formant_freq' formant_dir = "acoustic/formant_freq"
csv_ext = '_formant.csv' csv_ext = "_formant.csv"
error_txt = 'error: length less than 0.064' error_txt = "error: length less than 0.064"
def formant_list(formant,snd):
def formant_list(formant, snd):
""" """
Getting formant frequency per second Getting formant frequency per second
Args: Args:
@@ -36,22 +36,23 @@ def formant_list(formant,snd):
f3_list = [] f3_list = []
f4_list = [] f4_list = []
dur = snd.duration-0.02 dur = snd.duration - 0.02
dur_round = round(dur, 2) dur_round = round(dur, 2)
time_list = np.arange(0.001, dur_round, 0.001) time_list = np.arange(0.001, dur_round, 0.001)
for time in time_list: for time in time_list:
f1 = formant.get_value_at_time(1,time) f1 = formant.get_value_at_time(1, time)
f2 = formant.get_value_at_time(2,time) f2 = formant.get_value_at_time(2, time)
f3 = formant.get_value_at_time(3,time) f3 = formant.get_value_at_time(3, time)
f4 = formant.get_value_at_time(4,time) f4 = formant.get_value_at_time(4, time)
f1_list.append(f1) f1_list.append(f1)
f2_list.append(f2) f2_list.append(f2)
f3_list.append(f3) f3_list.append(f3)
f4_list.append(f4) f4_list.append(f4)
return f1_list,f2_list,f3_list,f4_list return f1_list, f2_list, f3_list, f4_list
def formant_score(path): def formant_score(path):
""" """
@@ -62,11 +63,12 @@ def formant_score(path):
(list) list of Formant freq for each voice frame (list) list of Formant freq for each voice frame
""" """
sound_pat = parselmouth.Sound(path) sound_pat = parselmouth.Sound(path)
formant = sound_pat.to_formant_burg(time_step=.001) formant = sound_pat.to_formant_burg(time_step=0.001)
f_score = formant_list(formant,sound_pat) f_score = formant_list(formant, sound_pat)
return f_score return f_score
def calc_formant(video_uri, audio_file, out_loc, fl_name, r_config):
def calc_formant(video_uri, audio_file, out_loc, fl_name, r_config, save=True):
""" """
Preparing Formant freq matrix Preparing Formant freq matrix
Args: Args:
@@ -74,36 +76,51 @@ def calc_formant(video_uri, audio_file, out_loc, fl_name, r_config):
out_loc: (str) Output directory; r_config: raw variable config out_loc: (str) Output directory; r_config: raw variable config
""" """
f1_list,f2_list,f3_list,f4_list = formant_score(audio_file) f1_list, f2_list, f3_list, f4_list = formant_score(audio_file)
df_formant = pd.DataFrame(f1_list, columns=[r_config.aco_fm1]) df_formant = pd.DataFrame(f1_list, columns=[r_config.aco_fm1])
df_formant[r_config.aco_fm2] = f2_list df_formant[r_config.aco_fm2] = f2_list
df_formant[r_config.aco_fm3] = f3_list df_formant[r_config.aco_fm3] = f3_list
df_formant[r_config.aco_fm4] = f4_list df_formant[r_config.aco_fm4] = f4_list
df_formant.replace('', np.nan, regex=True,inplace=True) df_formant.replace("", np.nan, regex=True, inplace=True)
df_formant[r_config.err_reason] = 'Pass'# will replace with threshold in future release df_formant[
r_config.err_reason
] = "Pass" # will replace with threshold in future release
df_formant['Frames'] = df_formant.index df_formant["Frames"] = df_formant.index
df_formant['dbm_master_url'] = video_uri df_formant["dbm_master_url"] = video_uri
logger.info('Saving Output file {} '.format(out_loc)) if save:
logger.info("Saving Output file {} ".format(out_loc))
ut.save_output(df_formant, out_loc, fl_name, formant_dir, csv_ext) ut.save_output(df_formant, out_loc, fl_name, formant_dir, csv_ext)
return df_formant
def empty_fm(video_uri, out_loc, fl_name, r_config):
def empty_fm(video_uri, out_loc, fl_name, r_config, save=True):
""" """
Preparing empty formant frequency matrix if something fails Preparing empty formant frequency matrix if something fails
""" """
cols = ['Frames', r_config.aco_fm1, r_config.aco_fm2, r_config.aco_fm3, r_config.aco_fm4, r_config.err_reason] cols = [
"Frames",
r_config.aco_fm1,
r_config.aco_fm2,
r_config.aco_fm3,
r_config.aco_fm4,
r_config.err_reason,
]
out_val = [[np.nan, np.nan, np.nan, np.nan, np.nan, error_txt]] out_val = [[np.nan, np.nan, np.nan, np.nan, np.nan, error_txt]]
df_fm = pd.DataFrame(out_val, columns = cols) df_fm = pd.DataFrame(out_val, columns=cols)
df_fm['dbm_master_url'] = video_uri df_fm["dbm_master_url"] = video_uri
logger.info('Saving Output file {} '.format(out_loc)) if save:
logger.info("Saving Output file {} ".format(out_loc))
ut.save_output(df_fm, out_loc, fl_name, formant_dir, csv_ext) ut.save_output(df_fm, out_loc, fl_name, formant_dir, csv_ext)
return df_fm
def run_formant(video_uri, out_dir, r_config):
def run_formant(video_uri, out_dir, r_config, save=True):
""" """
Processing all patient's for fetching Formant freq Processing all patient's for fetching Formant freq
@@ -116,18 +133,23 @@ def run_formant(video_uri, out_dir, r_config):
try: try:
input_loc, out_loc, fl_name = ut.filter_path(video_uri, out_dir) input_loc, out_loc, fl_name = ut.filter_path(video_uri, out_dir)
aud_filter = glob.glob(join(input_loc, fl_name + '.wav')) aud_filter = glob.glob(join(input_loc, fl_name + ".wav"))
if len(aud_filter)>0: if len(aud_filter) > 0:
audio_file = aud_filter[0] audio_file = aud_filter[0]
aud_dur = librosa.get_duration(filename=audio_file) aud_dur = ut.get_length(audio_file)
if float(aud_dur) < 0.064: if float(aud_dur) < 0.064:
logger.info('Output file {} size is less than 0.064sec'.format(audio_file)) logger.info(
"Output file {} size is less than 0.064sec".format(audio_file)
)
empty_fm(video_uri, out_loc, fl_name, r_config) df = empty_fm(video_uri, out_loc, fl_name, r_config, save=save)
return else:
df = calc_formant(
calc_formant(video_uri, audio_file, out_loc, fl_name, r_config) video_uri, audio_file, out_loc, fl_name, r_config, save=save
)
return df
except Exception as e: except Exception as e:
logger.error('Failed to process audio file') e
logger.error("Failed to process audio file")

View File

@@ -4,24 +4,25 @@ project_name: DBM
created: 2020-20-07 created: 2020-20-07
""" """
import pandas as pd
import numpy as np
import os
import glob import glob
import parselmouth
import librosa
import more_itertools as mit
from os.path import join
import logging import logging
import os
from os.path import join
import more_itertools as mit
import numpy as np
import pandas as pd
import parselmouth
from opendbm.dbm_lib.dbm_features.raw_features.util import util as ut from opendbm.dbm_lib.dbm_features.raw_features.util import util as ut
logging.basicConfig(level=logging.INFO) logging.basicConfig(level=logging.INFO)
logger=logging.getLogger() logger = logging.getLogger()
gne_dir = "acoustic/glottal_noise"
ff_dir = "acoustic/pitch"
csv_ext = "_gne.csv"
gne_dir = 'acoustic/glottal_noise'
ff_dir = 'acoustic/pitch'
csv_ext = '_gne.csv'
def gne_ratio(sound): def gne_ratio(sound):
""" """
@@ -33,62 +34,42 @@ def gne_ratio(sound):
""" """
harmonicity_gne = sound.to_harmonicity_gne() harmonicity_gne = sound.to_harmonicity_gne()
gne_all_bands = harmonicity_gne.values gne_all_bands = harmonicity_gne.values
gne_all_bands = np.where(gne_all_bands==-200, np.NaN, gne_all_bands) gne_all_bands = np.where(gne_all_bands == -200, np.NaN, gne_all_bands)
gne = np.nanmax(gne_all_bands) # following http://www.fon.hum.uva.nl/rob/NKI_TEVA/TEVA/HTML/NKI_TEVA.pdf gne = np.nanmax(
gne_all_bands
) # following http://www.fon.hum.uva.nl/rob/NKI_TEVA/TEVA/HTML/NKI_TEVA.pdf
return gne return gne
def empty_gne(video_uri, out_loc, fl_name, r_config, error_txt):
def empty_gne(video_uri, out_loc, fl_name, r_config, error_txt, save=True):
""" """
Preparing empty GNE matrix if something fails Preparing empty GNE matrix if something fails
""" """
cols = ['Frames', r_config.aco_gne, r_config.err_reason] cols = ["Frames", r_config.aco_gne, r_config.err_reason]
out_val = [[np.nan, np.nan, error_txt]] out_val = [[np.nan, np.nan, error_txt]]
df_gne = pd.DataFrame(out_val, columns = cols) df_gne = pd.DataFrame(out_val, columns=cols)
df_gne['dbm_master_url'] = video_uri df_gne["dbm_master_url"] = video_uri
logger.info('Saving Output file {} '.format(out_loc)) if save:
logger.info("Saving Output file {} ".format(out_loc))
ut.save_output(df_gne, out_loc, fl_name, gne_dir, csv_ext) ut.save_output(df_gne, out_loc, fl_name, gne_dir, csv_ext)
return df_gne
def segment_pitch(dir_path, r_config):
"""
segmenting pitch freq for each voice segment
"""
com_speech_sort, voiced_yes, voiced_no = ([], ) * 3
for file in os.listdir(dir_path):
try:
if file.endswith('_pitch.csv'):
ff_df = pd.read_csv((dir_path+'/'+file))
voice_label = ff_df[r_config.aco_voiceLabel]
indices_yes = [i for i, x in enumerate(voice_label) if x == "yes"]
voiced_yes = [list(group) for group in mit.consecutive_groups(indices_yes)]
indices_no = [i for i, x in enumerate(voice_label) if x == "no"]
voiced_no = [list(group) for group in mit.consecutive_groups(indices_no)]
com_speech = voiced_yes + voiced_no
com_speech_sort = sorted(com_speech, key=lambda x: x[0])
except:
pass
return com_speech_sort, voiced_yes, voiced_no
def segment_gne(com_speech_sort, voiced_yes, voiced_no, gne_all_frames, audio_file): def segment_gne(com_speech_sort, voiced_yes, voiced_no, gne_all_frames, audio_file):
""" """
calculating gne for each voice segment calculating gne for each voice segment
""" """
snd = parselmouth.Sound(audio_file) snd = parselmouth.Sound(audio_file)
pitch = snd.to_pitch(time_step=.001) pitch = snd.to_pitch(time_step=0.001)
for idx, vs in enumerate(com_speech_sort): for idx, vs in enumerate(com_speech_sort):
try: try:
max_gne = np.NaN max_gne = np.NaN
if vs in voiced_yes and len(vs)>1: if vs in voiced_yes and len(vs) > 1:
start_time = pitch.get_time_from_frame_number(vs[0]) start_time = pitch.get_time_from_frame_number(vs[0])
end_time = pitch.get_time_from_frame_number(vs[-1]) end_time = pitch.get_time_from_frame_number(vs[-1])
@@ -104,7 +85,8 @@ def segment_gne(com_speech_sort, voiced_yes, voiced_no, gne_all_frames, audio_fi
gne_all_frames[idx] = max_gne gne_all_frames[idx] = max_gne
return gne_all_frames return gne_all_frames
def calc_gne(video_uri, audio_file, out_loc, fl_name, r_config):
def calc_gne(video_uri, audio_file, out_loc, fl_name, r_config, save=True, ff_df=None):
""" """
Preparing gne matrix Preparing gne matrix
Args: Args:
@@ -112,26 +94,36 @@ def calc_gne(video_uri, audio_file, out_loc, fl_name, r_config):
out_loc: (str) Output directory for csv's out_loc: (str) Output directory for csv's
""" """
dir_path = os.path.join(out_loc, ff_dir) dir_path = os.path.join(out_loc, ff_dir)
if os.path.isdir(dir_path): if os.path.isdir(dir_path) or ff_df is not None:
voice_seg = segment_pitch(dir_path, r_config) if ff_df is not None:
voice_seg = ut.process_segment_pitch(ff_df, r_config)
else:
voice_seg = ut.segment_pitch(dir_path, r_config, ff_df=ff_df)
gne_all_frames = [np.NaN] * len(voice_seg[0]) gne_all_frames = [np.NaN] * len(voice_seg[0])
gne_segment_frames = segment_gne(voice_seg[0], voice_seg[1], voice_seg[2], gne_all_frames, audio_file) gne_segment_frames = segment_gne(
voice_seg[0], voice_seg[1], voice_seg[2], gne_all_frames, audio_file
)
df_gne = pd.DataFrame(gne_segment_frames, columns=[r_config.aco_gne]) df_gne = pd.DataFrame(gne_segment_frames, columns=[r_config.aco_gne])
df_gne[r_config.err_reason] = 'Pass'# will replace with threshold in future release df_gne[
r_config.err_reason
] = "Pass" # will replace with threshold in future release
df_gne['Frames'] = df_gne.index df_gne["Frames"] = df_gne.index
df_gne['dbm_master_url'] = video_uri df_gne["dbm_master_url"] = video_uri
logger.info('Processing Output file {} '.format(out_loc)) if save:
logger.info("Processing Output file {} ".format(out_loc))
ut.save_output(df_gne, out_loc, fl_name, gne_dir, csv_ext) ut.save_output(df_gne, out_loc, fl_name, gne_dir, csv_ext)
return df_gne
else: else:
error_txt = 'error: pitch freq not available' error_txt = "error: pitch freq not available"
empty_gne(video_uri, out_loc, fl_name, r_config, error_txt) return empty_gne(video_uri, out_loc, fl_name, r_config, error_txt, save=save)
def run_gne(video_uri, out_dir, r_config):
def run_gne(video_uri, out_dir, r_config, save=True, ff_df=None):
""" """
Processing all patient's for fetching glottal noise ratio Processing all patient's for fetching glottal noise ratio
--------------- ---------------
@@ -143,19 +135,32 @@ def run_gne(video_uri, out_dir, r_config):
try: try:
input_loc, out_loc, fl_name = ut.filter_path(video_uri, out_dir) input_loc, out_loc, fl_name = ut.filter_path(video_uri, out_dir)
aud_filter = glob.glob(join(input_loc, fl_name + '.wav')) aud_filter = glob.glob(join(input_loc, fl_name + ".wav"))
if len(aud_filter)>0: if len(aud_filter) > 0:
audio_file = aud_filter[0] audio_file = aud_filter[0]
aud_dur = librosa.get_duration(filename=audio_file) aud_dur = ut.get_length(audio_file)
if float(aud_dur) < 0.064: if float(aud_dur) < 0.064:
logger.info('Output file {} size is less than 0.064sec'.format(audio_file)) logger.info(
"Output file {} size is less than 0.064sec".format(audio_file)
)
error_txt = 'error: length less than 0.064' error_txt = "error: length less than 0.064"
empty_gne(video_uri, out_loc, fl_name, r_config, error_txt) df = empty_gne(
return video_uri, out_loc, fl_name, r_config, error_txt, save=save
)
calc_gne(video_uri, audio_file, out_loc, fl_name, r_config) else:
df = calc_gne(
video_uri,
audio_file,
out_loc,
fl_name,
r_config,
save=save,
ff_df=ff_df,
)
return df
except Exception as e: except Exception as e:
logger.error('Failed to process audio file') e
logger.error("Failed to process audio file")

View File

@@ -1,77 +1,133 @@
""" """
file_name: hnr file_name: gne
project_name: DBM project_name: DBM
created: 2020-20-07 created: 2020-20-07
""" """
import pandas as pd
import numpy as np
import os
import glob import glob
import parselmouth
import librosa
from os.path import join
import logging import logging
import os
from os.path import join
import more_itertools as mit
import numpy as np
import pandas as pd
import parselmouth
from opendbm.dbm_lib.dbm_features.raw_features.util import util as ut from opendbm.dbm_lib.dbm_features.raw_features.util import util as ut
logging.basicConfig(level=logging.INFO) logging.basicConfig(level=logging.INFO)
logger=logging.getLogger() logger = logging.getLogger()
hnr_dir = 'acoustic/harmonic_noise' gne_dir = "acoustic/glottal_noise"
csv_ext = '_hnr.csv' ff_dir = "acoustic/pitch"
error_txt = 'error: length less than 0.064' csv_ext = "_gne.csv"
def hnr_ratio(filepath):
def gne_ratio(sound):
""" """
Using parselmouth library fetching harmonic noise ratio ratio Using parselmouth library fetching glottal noise excitation ratio
Args: Args:
path: (.wav) audio file location sound: parselmouth object
Returns: Returns:
(list) list of hnr ratio for each voice frame, min,max and mean hnr (list) list of gne ratio for each voice frame
""" """
sound = parselmouth.Sound(filepath) harmonicity_gne = sound.to_harmonicity_gne()
harmonicity = sound.to_harmonicity_ac(time_step=.001) gne_all_bands = harmonicity_gne.values
gne_all_bands = np.where(gne_all_bands == -200, np.NaN, gne_all_bands)
hnr_all_frames = harmonicity.values#[harmonicity.values != -200] nan it (****) gne = np.nanmax(
hnr_all_frames = np.where(hnr_all_frames==-200, np.NaN, hnr_all_frames) gne_all_bands
return hnr_all_frames.transpose() ) # following http://www.fon.hum.uva.nl/rob/NKI_TEVA/TEVA/HTML/NKI_TEVA.pdf
return gne
def calc_hnr(video_uri, audio_file, out_loc, fl_name, r_config):
def empty_gne(video_uri, out_loc, fl_name, r_config, error_txt, save=True):
""" """
Preparing harmonic noise matrix Preparing empty GNE matrix if something fails
"""
cols = ["Frames", r_config.aco_gne, r_config.err_reason]
out_val = [[np.nan, np.nan, error_txt]]
df_gne = pd.DataFrame(out_val, columns=cols)
df_gne["dbm_master_url"] = video_uri
if save:
logger.info("Saving Output file {} ".format(out_loc))
ut.save_output(df_gne, out_loc, fl_name, gne_dir, csv_ext)
return df_gne
def segment_gne(com_speech_sort, voiced_yes, voiced_no, gne_all_frames, audio_file):
"""
calculating gne for each voice segment
"""
snd = parselmouth.Sound(audio_file)
pitch = snd.to_pitch(time_step=0.001)
for idx, vs in enumerate(com_speech_sort):
try:
max_gne = np.NaN
if vs in voiced_yes and len(vs) > 1:
start_time = pitch.get_time_from_frame_number(vs[0])
end_time = pitch.get_time_from_frame_number(vs[-1])
snd_start = int(snd.get_frame_number_from_time(start_time))
snd_end = int(snd.get_frame_number_from_time(end_time))
samples = parselmouth.Sound(snd.as_array()[0][snd_start:snd_end])
max_gne = gne_ratio(samples)
except:
pass
gne_all_frames[idx] = max_gne
return gne_all_frames
def calc_gne(video_uri, audio_file, out_loc, fl_name, r_config, save=True, ff_df=None):
"""
Preparing gne matrix
Args: Args:
audio_file: (.wav) parsed audio file audio_file: (.wav) parsed audio file
out_loc: (str) Output directory for csv's out_loc: (str) Output directory for csv's
""" """
dir_path = os.path.join(out_loc, ff_dir)
if os.path.isdir(dir_path) or ff_df is not None:
if ff_df is not None:
voice_seg = ut.process_segment_pitch(ff_df, r_config)
else:
voice_seg = ut.segment_pitch(dir_path, r_config, ff_df=ff_df)
hnr_all_frames = hnr_ratio(audio_file) gne_all_frames = [np.NaN] * len(voice_seg[0])
df_hnr = pd.DataFrame(hnr_all_frames, columns=[r_config.aco_hnr]) gne_segment_frames = segment_gne(
voice_seg[0], voice_seg[1], voice_seg[2], gne_all_frames, audio_file
)
df_hnr['Frames'] = df_hnr.index df_gne = pd.DataFrame(gne_segment_frames, columns=[r_config.aco_gne])
df_hnr['dbm_master_url'] = video_uri df_gne[
df_hnr[r_config.err_reason] = 'Pass'# will replace with threshold in future release r_config.err_reason
] = "Pass" # will replace with threshold in future release
logger.info('Saving Output file {} '.format(out_loc)) df_gne["Frames"] = df_gne.index
ut.save_output(df_hnr, out_loc, fl_name, hnr_dir, csv_ext) df_gne["dbm_master_url"] = video_uri
def empty_hnr(video_uri, out_loc, fl_name, r_config): if save:
logger.info("Processing Output file {} ".format(out_loc))
ut.save_output(df_gne, out_loc, fl_name, gne_dir, csv_ext)
return df_gne
else:
error_txt = "error: pitch freq not available"
return empty_gne(video_uri, out_loc, fl_name, r_config, error_txt, save=save)
def run_gne(video_uri, out_dir, r_config, save=True, ff_df=None):
""" """
Preparing empty HNR matrix if something fails Processing all patient's for fetching glottal noise ratio
""" ---------------
cols = ['Frames', r_config.aco_hnr, r_config.err_reason] ---------------
out_val = [[np.nan, np.nan, error_txt]]
df_hnr = pd.DataFrame(out_val, columns = cols)
df_hnr['dbm_master_url'] = video_uri
logger.info('Saving Output file {} '.format(out_loc))
ut.save_output(df_hnr, out_loc, fl_name, hnr_dir, csv_ext)
def run_hnr(video_uri, out_dir, r_config):
"""
Processing all patient's for fetching harmonic noise ratio
-------------------
-------------------
Args: Args:
video_uri: video path; r_config: raw variable config object video_uri: video path; r_config: raw variable config object
out_dir: (str) Output directory for processed output out_dir: (str) Output directory for processed output
@@ -79,18 +135,32 @@ def run_hnr(video_uri, out_dir, r_config):
try: try:
input_loc, out_loc, fl_name = ut.filter_path(video_uri, out_dir) input_loc, out_loc, fl_name = ut.filter_path(video_uri, out_dir)
aud_filter = glob.glob(join(input_loc, fl_name + '.wav')) aud_filter = glob.glob(join(input_loc, fl_name + ".wav"))
if len(aud_filter)>0: if len(aud_filter) > 0:
audio_file = aud_filter[0] audio_file = aud_filter[0]
aud_dur = librosa.get_duration(filename=audio_file) aud_dur = ut.get_length(audio_file)
if float(aud_dur) < 0.064: if float(aud_dur) < 0.064:
logger.info('Output file {} size is less than 0.064sec'.format(audio_file)) logger.info(
"Output file {} size is less than 0.064sec".format(audio_file)
)
empty_hnr(video_uri, out_loc, fl_name, r_config) error_txt = "error: length less than 0.064"
return df = empty_gne(
video_uri, out_loc, fl_name, r_config, error_txt, save=save
calc_hnr(video_uri, audio_file, out_loc, fl_name, r_config) )
else:
df = calc_gne(
video_uri,
audio_file,
out_loc,
fl_name,
r_config,
save=save,
ff_df=ff_df,
)
return df
except Exception as e: except Exception as e:
logger.error('Failed to process audio file') e
logger.error("Failed to process audio file")

View File

@@ -1,73 +1,133 @@
""" """
file_name: intensity file_name: gne
project_name: DBM project_name: DBM
created: 2020-20-07 created: 2020-20-07
""" """
import pandas as pd
import numpy as np
import glob import glob
import parselmouth
import librosa
from os.path import join
import logging import logging
import os
from os.path import join
import more_itertools as mit
import numpy as np
import pandas as pd
import parselmouth
from opendbm.dbm_lib.dbm_features.raw_features.util import util as ut from opendbm.dbm_lib.dbm_features.raw_features.util import util as ut
logging.basicConfig(level=logging.INFO) logging.basicConfig(level=logging.INFO)
logger=logging.getLogger() logger = logging.getLogger()
intensity_dir = 'acoustic/intensity' gne_dir = "acoustic/glottal_noise"
csv_ext = '_intensity.csv' ff_dir = "acoustic/pitch"
error_txt = 'error: length less than 0.064' csv_ext = "_gne.csv"
def intensity_score(path):
def gne_ratio(sound):
""" """
Using parselmouth library fetching Intensity Using parselmouth library fetching glottal noise excitation ratio
Args: Args:
path: (.wav) audio file location sound: parselmouth object
Returns: Returns:
(list) list of Intensity for each voice frame (list) list of gne ratio for each voice frame
""" """
sound_pat = parselmouth.Sound(path) harmonicity_gne = sound.to_harmonicity_gne()
intensity = sound_pat.to_intensity(time_step=.001) gne_all_bands = harmonicity_gne.values
return intensity.values[0] gne_all_bands = np.where(gne_all_bands == -200, np.NaN, gne_all_bands)
def calc_intensity(video_uri, audio_file, out_loc, fl_name, r_config): gne = np.nanmax(
gne_all_bands
) # following http://www.fon.hum.uva.nl/rob/NKI_TEVA/TEVA/HTML/NKI_TEVA.pdf
return gne
def empty_gne(video_uri, out_loc, fl_name, r_config, error_txt, save=True):
""" """
Preparing Intensity matrix Preparing empty GNE matrix if something fails
"""
cols = ["Frames", r_config.aco_gne, r_config.err_reason]
out_val = [[np.nan, np.nan, error_txt]]
df_gne = pd.DataFrame(out_val, columns=cols)
df_gne["dbm_master_url"] = video_uri
if save:
logger.info("Saving Output file {} ".format(out_loc))
ut.save_output(df_gne, out_loc, fl_name, gne_dir, csv_ext)
return df_gne
def segment_gne(com_speech_sort, voiced_yes, voiced_no, gne_all_frames, audio_file):
"""
calculating gne for each voice segment
"""
snd = parselmouth.Sound(audio_file)
pitch = snd.to_pitch(time_step=0.001)
for idx, vs in enumerate(com_speech_sort):
try:
max_gne = np.NaN
if vs in voiced_yes and len(vs) > 1:
start_time = pitch.get_time_from_frame_number(vs[0])
end_time = pitch.get_time_from_frame_number(vs[-1])
snd_start = int(snd.get_frame_number_from_time(start_time))
snd_end = int(snd.get_frame_number_from_time(end_time))
samples = parselmouth.Sound(snd.as_array()[0][snd_start:snd_end])
max_gne = gne_ratio(samples)
except:
pass
gne_all_frames[idx] = max_gne
return gne_all_frames
def calc_gne(video_uri, audio_file, out_loc, fl_name, r_config, save=True, ff_df=None):
"""
Preparing gne matrix
Args: Args:
audio_file: (.wav) parsed audio file audio_file: (.wav) parsed audio file
out_loc: (str) Output directory for csv's out_loc: (str) Output directory for csv's
""" """
dir_path = os.path.join(out_loc, ff_dir)
if os.path.isdir(dir_path) or ff_df is not None:
if ff_df is not None:
voice_seg = ut.process_segment_pitch(ff_df, r_config)
else:
voice_seg = ut.segment_pitch(dir_path, r_config, ff_df=ff_df)
intensity_frames = intensity_score(audio_file) gne_all_frames = [np.NaN] * len(voice_seg[0])
df_intensity = pd.DataFrame(intensity_frames, columns=[r_config.aco_int]) gne_segment_frames = segment_gne(
voice_seg[0], voice_seg[1], voice_seg[2], gne_all_frames, audio_file
)
df_intensity['Frames'] = df_intensity.index df_gne = pd.DataFrame(gne_segment_frames, columns=[r_config.aco_gne])
df_intensity['dbm_master_url'] = video_uri df_gne[
df_intensity[r_config.err_reason] = 'Pass'# will replace with threshold in future release r_config.err_reason
] = "Pass" # will replace with threshold in future release
logger.info('Saving Output file {} '.format(out_loc)) df_gne["Frames"] = df_gne.index
ut.save_output(df_intensity, out_loc, fl_name, intensity_dir, csv_ext) df_gne["dbm_master_url"] = video_uri
def empty_intensity(video_uri, out_loc, fl_name, r_config): if save:
logger.info("Processing Output file {} ".format(out_loc))
ut.save_output(df_gne, out_loc, fl_name, gne_dir, csv_ext)
return df_gne
else:
error_txt = "error: pitch freq not available"
return empty_gne(video_uri, out_loc, fl_name, r_config, error_txt, save=save)
def run_gne(video_uri, out_dir, r_config, save=True, ff_df=None):
""" """
Preparing empty Intensity matrix if something fails Processing all patient's for fetching glottal noise ratio
""" ---------------
cols = ['Frames', r_config.aco_int, r_config.err_reason] ---------------
out_val = [[np.nan, np.nan, error_txt]]
df_int = pd.DataFrame(out_val, columns = cols)
df_int['dbm_master_url'] = video_uri
logger.info('Saving Output file {} '.format(out_loc))
ut.save_output(df_int, out_loc, fl_name, intensity_dir, csv_ext)
def run_intensity(video_uri, out_dir, r_config):
"""
Processing all patient's for fetching Intensity
-------------------
-------------------
Args: Args:
video_uri: video path; r_config: raw variable config object video_uri: video path; r_config: raw variable config object
out_dir: (str) Output directory for processed output out_dir: (str) Output directory for processed output
@@ -75,18 +135,32 @@ def run_intensity(video_uri, out_dir, r_config):
try: try:
input_loc, out_loc, fl_name = ut.filter_path(video_uri, out_dir) input_loc, out_loc, fl_name = ut.filter_path(video_uri, out_dir)
aud_filter = glob.glob(join(input_loc, fl_name + '.wav')) aud_filter = glob.glob(join(input_loc, fl_name + ".wav"))
if len(aud_filter)>0: if len(aud_filter) > 0:
audio_file = aud_filter[0] audio_file = aud_filter[0]
aud_dur = librosa.get_duration(filename=audio_file) aud_dur = ut.get_length(audio_file)
if float(aud_dur) < 0.064: if float(aud_dur) < 0.064:
logger.info('Output file {} size is less than 0.064sec'.format(audio_file)) logger.info(
"Output file {} size is less than 0.064sec".format(audio_file)
)
empty_intensity(video_uri, out_loc, fl_name, r_config) error_txt = "error: length less than 0.064"
return df = empty_gne(
video_uri, out_loc, fl_name, r_config, error_txt, save=save
calc_intensity(video_uri, audio_file, out_loc, fl_name, r_config) )
else:
df = calc_gne(
video_uri,
audio_file,
out_loc,
fl_name,
r_config,
save=save,
ff_df=ff_df,
)
return df
except Exception as e: except Exception as e:
logger.error('Failed to process audio file') e
logger.error("Failed to process audio file")

View File

@@ -4,25 +4,24 @@ project_name: DBM
created: 2020-20-07 created: 2020-20-07
""" """
import pandas as pd
import numpy as np
import os
import glob import glob
import parselmouth
import librosa
import numpy as np
import more_itertools as mit
from os.path import join
import logging import logging
import os
from os.path import join
import numpy as np
import pandas as pd
import parselmouth
from opendbm.dbm_lib.dbm_features.raw_features.util import util as ut from opendbm.dbm_lib.dbm_features.raw_features.util import util as ut
logging.basicConfig(level=logging.INFO) logging.basicConfig(level=logging.INFO)
logger=logging.getLogger() logger = logging.getLogger()
jitter_dir = "acoustic/jitter"
ff_dir = "acoustic/pitch"
csv_ext = "_jitter.csv"
jitter_dir = 'acoustic/jitter'
ff_dir = 'acoustic/pitch'
csv_ext = '_jitter.csv'
def audio_jitter(sound): def audio_jitter(sound):
""" """
@@ -32,60 +31,42 @@ def audio_jitter(sound):
Returns: Returns:
(list) list of jitters for each voice frame (list) list of jitters for each voice frame
""" """
pointProcess = parselmouth.praat.call(sound, "To PointProcess (periodic, cc)...", 80, 500) pointProcess = parselmouth.praat.call(
jitter = parselmouth.praat.call(pointProcess, "Get jitter (local)", 0, 0, 0.0001, 0.02, 1.3) sound, "To PointProcess (periodic, cc)...", 80, 500
)
jitter = parselmouth.praat.call(
pointProcess, "Get jitter (local)", 0, 0, 0.0001, 0.02, 1.3
)
return jitter return jitter
def empty_jitter(video_uri, out_loc, fl_name, r_config, error_txt):
def empty_jitter(video_uri, out_loc, fl_name, r_config, error_txt, save=True):
""" """
Preparing empty jitter matrix if something fails Preparing empty jitter matrix if something fails
""" """
cols = ['Frames', r_config.aco_jitter, r_config.err_reason] cols = ["Frames", r_config.aco_jitter, r_config.err_reason]
out_val = [[np.nan, np.nan, error_txt]] out_val = [[np.nan, np.nan, error_txt]]
df_jitter = pd.DataFrame(out_val, columns = cols) df_jitter = pd.DataFrame(out_val, columns=cols)
df_jitter['dbm_master_url'] = video_uri df_jitter["dbm_master_url"] = video_uri
logger.info('Saving Output file {} '.format(out_loc)) if save:
logger.info("Saving Output file {} ".format(out_loc))
ut.save_output(df_jitter, out_loc, fl_name, jitter_dir, csv_ext) ut.save_output(df_jitter, out_loc, fl_name, jitter_dir, csv_ext)
return df_jitter
def segment_pitch(dir_path, r_config):
"""
segmenting pitch freq for each voice segment
"""
com_speech_sort, voiced_yes, voiced_no = ([], ) * 3
for file in os.listdir(dir_path):
try:
if file.endswith('_pitch.csv'):
ff_df = pd.read_csv((dir_path+'/'+file))
voice_label = ff_df[r_config.aco_voiceLabel]
indices_yes = [i for i, x in enumerate(voice_label) if x == "yes"]
voiced_yes = [list(group) for group in mit.consecutive_groups(indices_yes)]
indices_no = [i for i, x in enumerate(voice_label) if x == "no"]
voiced_no = [list(group) for group in mit.consecutive_groups(indices_no)]
com_speech = voiced_yes + voiced_no
com_speech_sort = sorted(com_speech, key=lambda x: x[0])
except:
pass
return com_speech_sort, voiced_yes, voiced_no
def segment_jitter(com_speech_sort, voiced_yes, voiced_no, jitter_frames, audio_file): def segment_jitter(com_speech_sort, voiced_yes, voiced_no, jitter_frames, audio_file):
""" """
calculating jitter for each voice segment calculating jitter for each voice segment
""" """
snd = parselmouth.Sound(audio_file) snd = parselmouth.Sound(audio_file)
pitch = snd.to_pitch(time_step=.001) pitch = snd.to_pitch(time_step=0.001)
for idx, vs in enumerate(com_speech_sort): for idx, vs in enumerate(com_speech_sort):
try: try:
jitter = np.NaN jitter = np.NaN
if vs in voiced_yes and len(vs)>1: if vs in voiced_yes and len(vs) > 1:
start_time = pitch.get_time_from_frame_number(vs[0]) start_time = pitch.get_time_from_frame_number(vs[0])
end_time = pitch.get_time_from_frame_number(vs[-1]) end_time = pitch.get_time_from_frame_number(vs[-1])
@@ -101,7 +82,10 @@ def segment_jitter(com_speech_sort, voiced_yes, voiced_no, jitter_frames, audio_
jitter_frames[idx] = jitter jitter_frames[idx] = jitter
return jitter_frames return jitter_frames
def calc_jitter(video_uri, audio_file, out_loc, fl_name, r_config):
def calc_jitter(
video_uri, audio_file, out_loc, fl_name, r_config, save=True, ff_df=None
):
""" """
Preparing jitter matrix Preparing jitter matrix
Args: Args:
@@ -110,26 +94,36 @@ def calc_jitter(video_uri, audio_file, out_loc, fl_name, r_config):
r_config: config.config_raw_feature.pyConfigFeatureNmReader object r_config: config.config_raw_feature.pyConfigFeatureNmReader object
""" """
dir_path = os.path.join(out_loc, ff_dir) dir_path = os.path.join(out_loc, ff_dir)
if os.path.isdir(dir_path): if os.path.isdir(dir_path) or ff_df is not None:
voice_seg = segment_pitch(dir_path, r_config)
if ff_df is not None:
voice_seg = ut.process_segment_pitch(ff_df, r_config)
else:
voice_seg = ut.segment_pitch(dir_path, r_config, ff_df=ff_df)
jitter_frames = [np.NaN] * len(voice_seg[0]) jitter_frames = [np.NaN] * len(voice_seg[0])
jitter_segment_frames = segment_jitter(voice_seg[0], voice_seg[1], voice_seg[2], jitter_frames, audio_file) jitter_segment_frames = segment_jitter(
voice_seg[0], voice_seg[1], voice_seg[2], jitter_frames, audio_file
)
df_jitter = pd.DataFrame(jitter_segment_frames, columns=[r_config.aco_jitter]) df_jitter = pd.DataFrame(jitter_segment_frames, columns=[r_config.aco_jitter])
df_jitter[r_config.err_reason] = 'Pass'# will replace with threshold in future release df_jitter[
r_config.err_reason
] = "Pass" # will replace with threshold in future release
df_jitter['Frames'] = df_jitter.index df_jitter["Frames"] = df_jitter.index
df_jitter['dbm_master_url'] = video_uri df_jitter["dbm_master_url"] = video_uri
if save:
logger.info('Processing Output file {} '.format(out_loc)) logger.info("Processing Output file {} ".format(out_loc))
ut.save_output(df_jitter, out_loc, fl_name, jitter_dir, csv_ext) ut.save_output(df_jitter, out_loc, fl_name, jitter_dir, csv_ext)
df = df_jitter
else: else:
error_txt = 'error: fundamental freq not available' error_txt = "error: fundamental freq not available"
empty_jitter(video_uri, out_loc, fl_name, r_config, error_txt) df = empty_jitter(video_uri, out_loc, fl_name, r_config, error_txt, save=save)
return df
def run_jitter(video_uri, out_dir, r_config):
def run_jitter(video_uri, out_dir, r_config, save=True, ff_df=None):
""" """
Processing all patient's videos for fetching jitter Processing all patient's videos for fetching jitter
------------------- -------------------
@@ -141,19 +135,32 @@ def run_jitter(video_uri, out_dir, r_config):
try: try:
input_loc, out_loc, fl_name = ut.filter_path(video_uri, out_dir) input_loc, out_loc, fl_name = ut.filter_path(video_uri, out_dir)
aud_filter = glob.glob(join(input_loc, fl_name + '.wav')) aud_filter = glob.glob(join(input_loc, fl_name + ".wav"))
if len(aud_filter)>0: if len(aud_filter) > 0:
audio_file = aud_filter[0] audio_file = aud_filter[0]
aud_dur = librosa.get_duration(filename=audio_file) aud_dur = ut.get_length(audio_file)
if float(aud_dur) < 0.064: if float(aud_dur) < 0.064:
logger.info('Output file {} size is less than 0.064sec'.format(audio_file)) logger.info(
"Output file {} size is less than 0.064sec".format(audio_file)
)
error_txt = 'error: length less than 0.064' error_txt = "error: length less than 0.064"
empty_jitter(video_uri, out_loc, fl_name, r_config, error_txt) df = empty_jitter(
return video_uri, out_loc, fl_name, r_config, error_txt, save=save
)
calc_jitter(video_uri, audio_file, out_loc, fl_name, r_config) else:
df = calc_jitter(
video_uri,
audio_file,
out_loc,
fl_name,
r_config,
save=save,
ff_df=ff_df,
)
return df
except Exception as e: except Exception as e:
logger.error('Failed to process audio file') logger.error("Error in jitter: {}".format(e))
logger.error("Failed to process audio file")

View File

@@ -4,41 +4,74 @@ project_name: DBM
created: 2020-20-07 created: 2020-20-07
""" """
import pandas as pd
import os
import glob import glob
import parselmouth
import librosa
import numpy as np
import librosa
from os.path import join
import logging import logging
import os
from os.path import join
import numpy as np
import pandas as pd
import parselmouth
from opendbm.dbm_lib.dbm_features.raw_features.util import util as ut from opendbm.dbm_lib.dbm_features.raw_features.util import util as ut
logging.basicConfig(level=logging.INFO) logging.basicConfig(level=logging.INFO)
logger=logging.getLogger() logger = logging.getLogger()
mfcc_dir = 'acoustic/mfcc' mfcc_dir = "acoustic/mfcc"
csv_ext = '_mfcc.csv' csv_ext = "_mfcc.csv"
error_txt = 'error: length less than 0.064' error_txt = "error: length less than 0.064"
def empty_mfcc(video_uri, out_loc, fl_name, r_config):
def empty_mfcc(video_uri, out_loc, fl_name, r_config, save=True):
""" """
Preparing empty empty_mfcc matrix if something fails Preparing empty empty_mfcc matrix if something fails
""" """
cols = ['Frames', r_config.aco_mfcc1, r_config.aco_mfcc2, r_config.aco_mfcc3, r_config.aco_mfcc4, r_config.aco_mfcc5, cols = [
r_config.aco_mfcc6, r_config.aco_mfcc7, r_config.aco_mfcc8, r_config.aco_mfcc9, r_config.aco_mfcc10, "Frames",
r_config.aco_mfcc11, r_config.aco_mfcc12, r_config.err_reason] r_config.aco_mfcc1,
out_val = [[np.nan, np.nan, np.nan, np.nan, np.nan, np.nan, np.nan, np.nan, np.nan, np.nan, np.nan, np.nan, np.nan, r_config.aco_mfcc2,
error_txt]] r_config.aco_mfcc3,
df_mfcc = pd.DataFrame(out_val, columns = cols) r_config.aco_mfcc4,
df_mfcc['dbm_master_url'] = video_uri r_config.aco_mfcc5,
r_config.aco_mfcc6,
r_config.aco_mfcc7,
r_config.aco_mfcc8,
r_config.aco_mfcc9,
r_config.aco_mfcc10,
r_config.aco_mfcc11,
r_config.aco_mfcc12,
r_config.err_reason,
]
out_val = [
[
np.nan,
np.nan,
np.nan,
np.nan,
np.nan,
np.nan,
np.nan,
np.nan,
np.nan,
np.nan,
np.nan,
np.nan,
np.nan,
error_txt,
]
]
df_mfcc = pd.DataFrame(out_val, columns=cols)
df_mfcc["dbm_master_url"] = video_uri
logger.info('Saving Output file {} '.format(out_loc)) if save:
logger.info("Saving Output file {} ".format(out_loc))
ut.save_output(df_mfcc, out_loc, fl_name, mfcc_dir, csv_ext) ut.save_output(df_mfcc, out_loc, fl_name, mfcc_dir, csv_ext)
return df_mfcc
def audio_mfcc(path): def audio_mfcc(path):
""" """
Using parselmouth library fetching mfccs Using parselmouth library fetching mfccs
@@ -48,12 +81,13 @@ def audio_mfcc(path):
(list) list of mfccs for each voice frame (list) list of mfccs for each voice frame
""" """
sound = parselmouth.Sound(path) sound = parselmouth.Sound(path)
mfcc_object = sound.to_mfcc(time_step=.001,number_of_coefficients=12) mfcc_object = sound.to_mfcc(time_step=0.001, number_of_coefficients=12)
mfccs = mfcc_object.to_array() mfccs = mfcc_object.to_array()
mfccs = np.delete(mfccs, (0), axis=0) mfccs = np.delete(mfccs, (0), axis=0)
return mfccs return mfccs
def calc_mfcc(video_uri, audio_file, out_loc, fl_name, r_config):
def calc_mfcc(video_uri, audio_file, out_loc, fl_name, r_config, save=True):
""" """
Preparing mfcc matrix Preparing mfcc matrix
Args: Args:
@@ -65,19 +99,23 @@ def calc_mfcc(video_uri, audio_file, out_loc, fl_name, r_config):
dict_ = {} dict_ = {}
mfccs = audio_mfcc(audio_file) mfccs = audio_mfcc(audio_file)
for i in range(1,13): for i in range(1, 13):
conf_str = r_config.base_raw['raw_feature'] conf_str = r_config.base_raw["raw_feature"]
dict_[conf_str['aco_mfcc' + str(i)]] = mfccs[i-1, :] dict_[conf_str["aco_mfcc" + str(i)]] = mfccs[i - 1, :]
df = pd.DataFrame(dict_) df = pd.DataFrame(dict_)
df['Frames'] = df.index df["Frames"] = df.index
df[r_config.err_reason] = 'Pass'# may replace based on threshold in future release df[r_config.err_reason] = "Pass" # may replace based on threshold in future release
df['dbm_master_url'] = video_uri df["dbm_master_url"] = video_uri
if save:
logger.info("Saving Output file {} ".format(out_loc))
ut.save_output(df, out_loc, fl_name, mfcc_dir, csv_ext) ut.save_output(df, out_loc, fl_name, mfcc_dir, csv_ext)
return df
def run_mfcc(video_uri, out_dir, r_config):
def run_mfcc(video_uri, out_dir, r_config, save=True):
""" """
Processing all patients to fetch mfccs Processing all patients to fetch mfccs
@@ -88,18 +126,22 @@ def run_mfcc(video_uri, out_dir, r_config):
try: try:
input_loc, out_loc, fl_name = ut.filter_path(video_uri, out_dir) input_loc, out_loc, fl_name = ut.filter_path(video_uri, out_dir)
aud_filter = glob.glob(join(input_loc, fl_name + '.wav')) aud_filter = glob.glob(join(input_loc, fl_name + ".wav"))
if len(aud_filter)>0: if len(aud_filter) > 0:
audio_file = aud_filter[0] audio_file = aud_filter[0]
aud_dur = librosa.get_duration(filename=audio_file) aud_dur = ut.get_length(audio_file)
if float(aud_dur) < 0.064: if float(aud_dur) < 0.064:
logger.info('Output file {} size is less than 0.064sec'.format(audio_file)) logger.info(
"Output file {} size is less than 0.064sec".format(audio_file)
)
empty_mfcc(video_uri, out_loc, fl_name, r_config) return empty_mfcc(video_uri, out_loc, fl_name, r_config, save=save)
return
calc_mfcc(video_uri, audio_file, out_loc, fl_name, r_config) return calc_mfcc(
video_uri, audio_file, out_loc, fl_name, r_config, save=save
)
except Exception as e: except Exception as e:
logger.error('Failed to process audio file') e
logger.error("Failed to process audio file")

View File

@@ -4,23 +4,25 @@ project_name: DBM
created: 2020-20-07 created: 2020-20-07
""" """
import os
import glob import glob
from pydub import AudioSegment
import librosa
import pandas as pd
import numpy as np
import webrtcvad
from os.path import join
import logging import logging
import os
from os.path import join
from opendbm.dbm_lib.dbm_features.raw_features.util import vad_utilities as vu, util as ut import numpy as np
import pandas as pd
import webrtcvad
from pydub import AudioSegment
from opendbm.dbm_lib.dbm_features.raw_features.util import util as ut
from opendbm.dbm_lib.dbm_features.raw_features.util import vad_utilities as vu
logging.basicConfig(level=logging.INFO) logging.basicConfig(level=logging.INFO)
logger=logging.getLogger() logger = logging.getLogger()
pause_seg_dir = "acoustic/pause_segment"
csv_ext = "_pausechar.csv"
pause_seg_dir = 'acoustic/pause_segment'
csv_ext = '_pausechar.csv'
def get_timing_cues(seg_starts_sec, seg_ends_sec, r_config): def get_timing_cues(seg_starts_sec, seg_ends_sec, r_config):
""" """
@@ -37,23 +39,25 @@ def get_timing_cues(seg_starts_sec, seg_ends_sec, r_config):
pause_len = np.zeros(num_pauses) pause_len = np.zeros(num_pauses)
for p in range(num_pauses): for p in range(num_pauses):
pause_len[p] = seg_starts_sec[p+1] - seg_ends_sec[p] pause_len[p] = seg_starts_sec[p + 1] - seg_ends_sec[p]
if len(pause_len)>0: if len(pause_len) > 0:
pause_len_mean = np.mean(pause_len)
pause_len_std = np.std(pause_len)
pause_time = np.sum(pause_len) pause_time = np.sum(pause_len)
else: else:
pause_len_mean = 0
pause_len_std = 0
pause_time = 0 pause_time = 0
pause_frac = pause_time / total_time pause_frac = pause_time / total_time
timing_dict = {r_config.aco_totaltime: total_time, r_config.aco_speakingtime: speaking_time, timing_dict = {
r_config.aco_numpauses: num_pauses, r_config.aco_pausetime: pause_time, r_config.aco_pausefrac: pause_frac} r_config.aco_totaltime: total_time,
r_config.aco_speakingtime: speaking_time,
r_config.aco_numpauses: num_pauses,
r_config.aco_pausetime: pause_time,
r_config.aco_pausefrac: pause_frac,
}
return timing_dict return timing_dict
def process_silence(audio_file, r_config): def process_silence(audio_file, r_config):
""" """
Returns dataframe for pause between words using voice activity detection Returns dataframe for pause between words using voice activity detection
@@ -69,59 +73,75 @@ def process_silence(audio_file, r_config):
aggressiveness = 3 aggressiveness = 3
frame_dur_ms = 20 frame_dur_ms = 20
#pause segment(long & short pad) # pause segment(long & short pad)
long_pad_around_voice_ms = 200 long_pad_around_voice_ms = 200
short_pad_around_voice_ms = 100 short_pad_around_voice_ms = 100
if len(y)>0: if len(y) > 0:
vad = webrtcvad.Vad(aggressiveness) vad = webrtcvad.Vad(aggressiveness)
frames = vu.frame_generator(frame_dur_ms, y, sr) frames = vu.frame_generator(frame_dur_ms, y, sr)
frames = list(frames) frames = list(frames)
#longer pad time screens out little blips, but misses short silences # longer pad time screens out little blips, but misses short silences
long_seg_starts, long_seg_ends = vu.vad_get_segment_times(sr, frame_dur_ms, long_pad_around_voice_ms, vad, frames) long_seg_starts, long_seg_ends = vu.vad_get_segment_times(
sr, frame_dur_ms, long_pad_around_voice_ms, vad, frames
)
#Logic to handle blank audio file # Logic to handle blank audio file
if len(long_seg_starts) == 0 or len(long_seg_ends) == 0: if len(long_seg_starts) == 0 or len(long_seg_ends) == 0:
return '' return ""
t_start = long_seg_starts[0] t_start = long_seg_starts[0]
t_end = long_seg_ends[-1] t_end = long_seg_ends[-1]
# shorter pad time captures short silences (but misfires on little blips) # shorter pad time captures short silences (but misfires on little blips)
short_seg_starts, short_seg_ends = vu.vad_get_segment_times(sr, frame_dur_ms, short_pad_around_voice_ms, vad, frames) short_seg_starts, short_seg_ends = vu.vad_get_segment_times(
sr, frame_dur_ms, short_pad_around_voice_ms, vad, frames
)
seg_starts = [] seg_starts = []
seg_ends = [] seg_ends = []
for k in range(len(short_seg_starts)): # logic to clean up some typical misfires for k in range(
if (short_seg_starts[k] >=t_start) and (short_seg_starts[k] <= t_end): len(short_seg_starts)
): # logic to clean up some typical misfires
if (short_seg_starts[k] >= t_start) and (short_seg_starts[k] <= t_end):
seg_starts.append(short_seg_starts[k]) seg_starts.append(short_seg_starts[k])
seg_ends.append(short_seg_ends[k]) seg_ends.append(short_seg_ends[k])
if len(seg_starts) == 0 or len(seg_ends) == 0: if len(seg_starts) == 0 or len(seg_ends) == 0:
return '' return ""
timing_dict = get_timing_cues(seg_starts, seg_ends, r_config) timing_dict = get_timing_cues(seg_starts, seg_ends, r_config)
feat_dict_list.append(timing_dict) feat_dict_list.append(timing_dict)
df = pd.DataFrame(feat_dict_list) df = pd.DataFrame(feat_dict_list)
df[r_config.err_reason] = 'Pass'# will replace with threshold in future release df[r_config.err_reason] = "Pass" # will replace with threshold in future release
return df return df
def empty_pause_segment(video_uri, out_loc, fl_name, r_config, error_txt):
def empty_pause_segment(video_uri, out_loc, fl_name, r_config, error_txt, save=True):
""" """
Preparing empty Pause Segment matrix if something fails Preparing empty Pause Segment matrix if something fails
""" """
cols = [r_config.aco_totaltime, r_config.aco_speakingtime, r_config.aco_numpauses, r_config.aco_pausetime, cols = [
r_config.aco_pausefrac, r_config.err_reason] r_config.aco_totaltime,
r_config.aco_speakingtime,
r_config.aco_numpauses,
r_config.aco_pausetime,
r_config.aco_pausefrac,
r_config.err_reason,
]
out_val = [[np.nan, np.nan, np.nan, np.nan, np.nan, error_txt]] out_val = [[np.nan, np.nan, np.nan, np.nan, np.nan, error_txt]]
df_pause = pd.DataFrame(out_val, columns = cols) df_pause = pd.DataFrame(out_val, columns=cols)
df_pause['dbm_master_url'] = video_uri df_pause["dbm_master_url"] = video_uri
logger.info('Saving Output file {} '.format(out_loc)) if save:
logger.info("Saving Output file {} ".format(out_loc))
ut.save_output(df_pause, out_loc, fl_name, pause_seg_dir, csv_ext) ut.save_output(df_pause, out_loc, fl_name, pause_seg_dir, csv_ext)
return df_pause
def run_pause_segment(video_uri, out_dir, r_config):
def run_pause_segment(video_uri, out_dir, r_config, save=True):
""" """
Processing all patient's for getting Pause Segment Processing all patient's for getting Pause Segment
--------------- ---------------
@@ -133,39 +153,48 @@ def run_pause_segment(video_uri, out_dir, r_config):
try: try:
input_loc, out_loc, fl_name = ut.filter_path(video_uri, out_dir) input_loc, out_loc, fl_name = ut.filter_path(video_uri, out_dir)
aud_filter = glob.glob(join(input_loc, fl_name + '.wav')) aud_filter = glob.glob(join(input_loc, fl_name + ".wav"))
if len(aud_filter)>0: if len(aud_filter) > 0:
audio_file = aud_filter[0] audio_file = aud_filter[0]
aud_dur = librosa.get_duration(filename=audio_file) aud_dur = ut.get_length(audio_file)
if float(aud_dur) < 0.064: if float(aud_dur) < 0.064:
logger.info('Output file {} size is less than 0.064sec'.format(audio_file)) logger.info(
"Output file {} size is less than 0.064sec".format(audio_file)
)
error_txt = 'error: length less than 0.064' error_txt = "error: length less than 0.064"
empty_pause_segment(video_uri, out_loc, fl_name, r_config, error_txt) empty_pause_segment(video_uri, out_loc, fl_name, r_config, error_txt)
return return
logger.info('Converting stereo sound to mono-lD') logger.info("Converting stereo sound to mono-lD")
sound_mono = AudioSegment.from_wav(audio_file) sound_mono = AudioSegment.from_wav(audio_file)
sound_mono = sound_mono.set_channels(1) sound_mono = sound_mono.set_channels(1)
sound_mono = sound_mono.set_frame_rate(48000) sound_mono = sound_mono.set_frame_rate(48000)
mono_wav = os.path.join(input_loc, fl_name + '_mono.wav') mono_wav = os.path.join(input_loc, fl_name + "_mono.wav")
sound_mono.export(mono_wav, format="wav") sound_mono.export(mono_wav, format="wav")
df_pause_seg = process_silence(mono_wav, r_config) df_pause_seg = process_silence(mono_wav, r_config)
os.remove(mono_wav)#removing mono wav file os.remove(mono_wav) # removing mono wav file
if isinstance(df_pause_seg, pd.DataFrame) and len(df_pause_seg)>0: if isinstance(df_pause_seg, pd.DataFrame) and len(df_pause_seg) > 0:
logger.info('Processing Output file {} '.format(out_loc)) df_pause_seg["dbm_master_url"] = video_uri
if save:
df_pause_seg['dbm_master_url'] = video_uri logger.info("Processing Output file {} ".format(out_loc))
ut.save_output(df_pause_seg, out_loc, fl_name, pause_seg_dir, csv_ext) ut.save_output(
df_pause_seg, out_loc, fl_name, pause_seg_dir, csv_ext
)
df = df_pause_seg
else: else:
error_txt = 'error: webrtcvad returns no segment' error_txt = "error: webrtcvad returns no segment"
empty_pause_segment(video_uri, out_loc, fl_name, r_config, error_txt) df = empty_pause_segment(
video_uri, out_loc, fl_name, r_config, error_txt, save=save
)
return df
except Exception as e: except Exception as e:
logger.error('Failed to process audio file') e
logger.error("Failed to process audio file", str(e))

View File

@@ -4,23 +4,24 @@ project_name: DBM
created: 2020-20-07 created: 2020-20-07
""" """
import pandas as pd
import os
import glob import glob
import parselmouth
import librosa
import numpy as np
from os.path import join
import logging import logging
import os
from os.path import join
import numpy as np
import pandas as pd
import parselmouth
from opendbm.dbm_lib.dbm_features.raw_features.util import util as ut from opendbm.dbm_lib.dbm_features.raw_features.util import util as ut
logging.basicConfig(level=logging.INFO) logging.basicConfig(level=logging.INFO)
logger=logging.getLogger() logger = logging.getLogger()
ff_dir = "acoustic/pitch"
csv_ext = "_pitch.csv"
error_txt = "error: length less than 0.064"
ff_dir = 'acoustic/pitch'
csv_ext = '_pitch.csv'
error_txt = 'error: length less than 0.064'
def audio_pitch(path): def audio_pitch(path):
""" """
@@ -31,12 +32,13 @@ def audio_pitch(path):
(list) list of pitch/fundamental frequency for each voice frame (list) list of pitch/fundamental frequency for each voice frame
""" """
sound_pat = parselmouth.Sound(path) sound_pat = parselmouth.Sound(path)
pitch = sound_pat.to_pitch(time_step=.001) pitch = sound_pat.to_pitch(time_step=0.001)
pitch_values = pitch.selected_array['frequency'] pitch_values = pitch.selected_array["frequency"]
return list(pitch_values) return list(pitch_values)
def label_speech(row,fd_freq):
def label_speech(row, fd_freq):
""" """
identify whether frame is voiced or not identify whether frame is voiced or not
Args: Args:
@@ -44,12 +46,13 @@ def label_speech(row,fd_freq):
Returns: Returns:
(str) yes or no indicator for voice (str) yes or no indicator for voice
""" """
if row[fd_freq] > 0 : if row[fd_freq] > 0:
return 'yes' return "yes"
else: else:
return 'no' return "no"
def calc_pitch(video_uri, audio_file, out_loc, fl_name, r_config):
def calc_pitch(video_uri, audio_file, out_loc, fl_name, r_config, save=True):
""" """
Preparing pitch frequency matrix Preparing pitch frequency matrix
@@ -62,28 +65,45 @@ def calc_pitch(video_uri, audio_file, out_loc, fl_name, r_config):
ff_frames = audio_pitch(audio_file) ff_frames = audio_pitch(audio_file)
df_ffreq = pd.DataFrame(ff_frames, columns=[r_config.aco_ff]) df_ffreq = pd.DataFrame(ff_frames, columns=[r_config.aco_ff])
df_ffreq['Frames'] = df_ffreq.index df_ffreq["Frames"] = df_ffreq.index
df_ffreq[r_config.aco_voiceLabel] = df_ffreq.apply(lambda row: label_speech(row, r_config.aco_ff),axis=1) df_ffreq[r_config.aco_voiceLabel] = df_ffreq.apply(
lambda row: label_speech(row, r_config.aco_ff), axis=1
)
df_ffreq[r_config.err_reason] = 'Pass'# will replace with threshold in future release df_ffreq[
df_ffreq['dbm_master_url'] = video_uri r_config.err_reason
] = "Pass" # will replace with threshold in future release
df_ffreq["dbm_master_url"] = video_uri
logger.info('Processing Output file {} '.format(out_loc)) if save:
logger.info("Processing Output file {} ".format(out_loc))
ut.save_output(df_ffreq, out_loc, fl_name, ff_dir, csv_ext) ut.save_output(df_ffreq, out_loc, fl_name, ff_dir, csv_ext)
return df_ffreq
def empty_pitch(video_uri, out_loc, fl_name, r_config):
def empty_pitch(video_uri, out_loc, fl_name, r_config, save=True):
""" """
Preparing empty pitch frequency matrix if something fails Preparing empty pitch frequency matrix if something fails
""" """
df_ffreq = pd.DataFrame([[np.nan, np.nan, 'no', error_txt]], df_ffreq = pd.DataFrame(
columns=['Frames', r_config.aco_ff, r_config.aco_voiceLabel, r_config.err_reason]) [[np.nan, np.nan, "no", error_txt]],
df_ffreq['dbm_master_url'] = video_uri columns=[
"Frames",
r_config.aco_ff,
r_config.aco_voiceLabel,
r_config.err_reason,
],
)
df_ffreq["dbm_master_url"] = video_uri
logger.info('Saving Output file {} '.format(out_loc)) if save:
logger.info("Saving Output file {} ".format(out_loc))
ut.save_output(df_ffreq, out_loc, fl_name, ff_dir, csv_ext) ut.save_output(df_ffreq, out_loc, fl_name, ff_dir, csv_ext)
return df_ffreq
def run_pitch(video_uri, out_dir, r_config):
def run_pitch(video_uri, out_dir, r_config, save=True):
""" """
Processing audio for fetching pitch Processing audio for fetching pitch
@@ -96,18 +116,24 @@ def run_pitch(video_uri, out_dir, r_config):
try: try:
input_loc, out_loc, fl_name = ut.filter_path(video_uri, out_dir) input_loc, out_loc, fl_name = ut.filter_path(video_uri, out_dir)
aud_filter = glob.glob(join(input_loc, fl_name + '.wav')) aud_filter = glob.glob(join(input_loc, fl_name + ".wav"))
if len(aud_filter)>0: if len(aud_filter) > 0:
audio_file = aud_filter[0] audio_file = aud_filter[0]
aud_dur = librosa.get_duration(filename=audio_file) aud_dur = ut.get_length(audio_file)
if float(aud_dur) < 0.064: if float(aud_dur) < 0.064:
logger.info('Output file {} size is less than 0.064sec'.format(audio_file)) logger.info(
"Output file {} size is less than 0.064sec".format(audio_file)
)
empty_pitch(video_uri, out_loc, fl_name, r_config) df = empty_pitch(video_uri, out_loc, fl_name, r_config, save=save)
return else:
df = calc_pitch(
video_uri, audio_file, out_loc, fl_name, r_config, save=save
)
return df
calc_pitch(video_uri, audio_file, out_loc, fl_name, r_config)
except Exception as e: except Exception as e:
logger.error('Failed to process audio file') e
logger.error("Failed to process audio file")

View File

@@ -4,26 +4,25 @@ project_name: DBM
created: 2020-20-07 created: 2020-20-07
""" """
import pandas as pd
import numpy as np
import os
import glob import glob
import parselmouth import logging
import librosa import os
import numpy as np
import more_itertools as mit
from os.path import join from os.path import join
import logging import more_itertools as mit
import numpy as np
import pandas as pd
import parselmouth
from opendbm.dbm_lib.dbm_features.raw_features.util import util as ut from opendbm.dbm_lib.dbm_features.raw_features.util import util as ut
logging.basicConfig(level=logging.INFO) logging.basicConfig(level=logging.INFO)
logger=logging.getLogger() logger = logging.getLogger()
shimmer_dir = "acoustic/shimmer"
ff_dir = "acoustic/pitch"
csv_ext = "_shimmer.csv"
shimmer_dir = 'acoustic/shimmer'
ff_dir = 'acoustic/pitch'
csv_ext = '_shimmer.csv'
def audio_shimmer(sound): def audio_shimmer(sound):
""" """
@@ -33,60 +32,42 @@ def audio_shimmer(sound):
Returns: Returns:
(list) list of shimmers for each voice frame (list) list of shimmers for each voice frame
""" """
pointProcess = parselmouth.praat.call(sound, "To PointProcess (periodic, cc)...", 80, 500) pointProcess = parselmouth.praat.call(
shimmer = parselmouth.praat.call([sound, pointProcess], "Get shimmer (local)", 0, 0, 0.0001, 0.02, 1.3, 1.6) sound, "To PointProcess (periodic, cc)...", 80, 500
)
shimmer = parselmouth.praat.call(
[sound, pointProcess], "Get shimmer (local)", 0, 0, 0.0001, 0.02, 1.3, 1.6
)
return shimmer return shimmer
def empty_shimmer(video_uri, out_loc, fl_name, r_config, error_txt):
def empty_shimmer(video_uri, out_loc, fl_name, r_config, error_txt, save=True):
""" """
Preparing empty shimmer matrix if something fails Preparing empty shimmer matrix if something fails
""" """
cols = ['Frames', r_config.aco_shimmer, r_config.err_reason] cols = ["Frames", r_config.aco_shimmer, r_config.err_reason]
out_val = [[np.nan, np.nan, error_txt]] out_val = [[np.nan, np.nan, error_txt]]
df_shimmer = pd.DataFrame(out_val, columns = cols) df_shimmer = pd.DataFrame(out_val, columns=cols)
df_shimmer['dbm_master_url'] = video_uri df_shimmer["dbm_master_url"] = video_uri
logger.info('Saving Output file {} '.format(out_loc)) if save:
logger.info("Saving Output file {} ".format(out_loc))
ut.save_output(df_shimmer, out_loc, fl_name, shimmer_dir, csv_ext) ut.save_output(df_shimmer, out_loc, fl_name, shimmer_dir, csv_ext)
return df_shimmer
def segment_pitch(dir_path, r_config):
"""
segmenting pitch freq for each voice segment
"""
com_speech_sort, voiced_yes, voiced_no = ([], ) * 3
for file in os.listdir(dir_path):
try:
if file.endswith('_pitch.csv'):
ff_df = pd.read_csv((dir_path+'/'+file))
voice_label = ff_df[r_config.aco_voiceLabel]
indices_yes = [i for i, x in enumerate(voice_label) if x == "yes"]
voiced_yes = [list(group) for group in mit.consecutive_groups(indices_yes)]
indices_no = [i for i, x in enumerate(voice_label) if x == "no"]
voiced_no = [list(group) for group in mit.consecutive_groups(indices_no)]
com_speech = voiced_yes + voiced_no
com_speech_sort = sorted(com_speech, key=lambda x: x[0])
except:
pass
return com_speech_sort, voiced_yes, voiced_no
def segment_shimmer(com_speech_sort, voiced_yes, voiced_no, shimmer_frames, audio_file): def segment_shimmer(com_speech_sort, voiced_yes, voiced_no, shimmer_frames, audio_file):
""" """
calculating shimmer for each voice segment calculating shimmer for each voice segment
""" """
snd = parselmouth.Sound(audio_file) snd = parselmouth.Sound(audio_file)
pitch = snd.to_pitch(time_step=.001) pitch = snd.to_pitch(time_step=0.001)
for idx, vs in enumerate(com_speech_sort): for idx, vs in enumerate(com_speech_sort):
try: try:
shimmer = np.NaN shimmer = np.NaN
if vs in voiced_yes and len(vs)>1: if vs in voiced_yes and len(vs) > 1:
start_time = pitch.get_time_from_frame_number(vs[0]) start_time = pitch.get_time_from_frame_number(vs[0])
end_time = pitch.get_time_from_frame_number(vs[-1]) end_time = pitch.get_time_from_frame_number(vs[-1])
@@ -102,7 +83,10 @@ def segment_shimmer(com_speech_sort, voiced_yes, voiced_no, shimmer_frames, audi
shimmer_frames[idx] = shimmer shimmer_frames[idx] = shimmer
return shimmer_frames return shimmer_frames
def calc_shimmer(video_uri, audio_file, out_loc, fl_name, r_config):
def calc_shimmer(
video_uri, audio_file, out_loc, fl_name, r_config, save=True, ff_df=None
):
""" """
Preparing shimmer matrix Preparing shimmer matrix
Args: Args:
@@ -111,26 +95,37 @@ def calc_shimmer(video_uri, audio_file, out_loc, fl_name, r_config):
r_config: config.config_raw_feature.pyConfigFeatureNmReader object r_config: config.config_raw_feature.pyConfigFeatureNmReader object
""" """
dir_path = os.path.join(out_loc, ff_dir) dir_path = os.path.join(out_loc, ff_dir)
if os.path.isdir(dir_path): if os.path.isdir(dir_path) or ff_df is not None:
voice_seg = segment_pitch(dir_path, r_config) if ff_df is not None:
voice_seg = ut.process_segment_pitch(ff_df, r_config)
else:
voice_seg = ut.segment_pitch(dir_path, r_config, ff_df=ff_df)
shimmer_frames = [np.NaN] * len(voice_seg[0]) shimmer_frames = [np.NaN] * len(voice_seg[0])
shimmer_segment_frames = segment_shimmer(voice_seg[0], voice_seg[1], voice_seg[2], shimmer_frames, audio_file) shimmer_segment_frames = segment_shimmer(
voice_seg[0], voice_seg[1], voice_seg[2], shimmer_frames, audio_file
)
df_shimmer = pd.DataFrame(shimmer_segment_frames, columns=[r_config.aco_shimmer]) df_shimmer = pd.DataFrame(
df_shimmer[r_config.err_reason] = 'Pass'# will replace with threshold in future release shimmer_segment_frames, columns=[r_config.aco_shimmer]
)
df_shimmer[
r_config.err_reason
] = "Pass" # will replace with threshold in future release
df_shimmer['Frames'] = df_shimmer.index df_shimmer["Frames"] = df_shimmer.index
df_shimmer['dbm_master_url'] = video_uri df_shimmer["dbm_master_url"] = video_uri
if save:
logger.info('Processing Output file {} '.format(out_loc)) logger.info("Processing Output file {} ".format(out_loc))
ut.save_output(df_shimmer, out_loc, fl_name, shimmer_dir, csv_ext) ut.save_output(df_shimmer, out_loc, fl_name, shimmer_dir, csv_ext)
df = df_shimmer
else: else:
error_txt = 'error: fundamental freq not available' error_txt = "error: fundamental freq not available"
empty_shimmer(video_uri, out_loc, fl_name, r_config, error_txt) df = empty_shimmer(video_uri, out_loc, fl_name, r_config, error_txt, save=save)
return df
def run_shimmer(video_uri, out_dir, r_config):
def run_shimmer(video_uri, out_dir, r_config, save=True, ff_df=None):
""" """
Processing all patients to fetch shimmer Processing all patients to fetch shimmer
--------------- ---------------
@@ -139,22 +134,33 @@ def run_shimmer(video_uri, out_dir, r_config):
video_uri: video path; r_config: raw variable config object video_uri: video path; r_config: raw variable config object
out_dir: (str) Output directory for processed output out_dir: (str) Output directory for processed output
""" """
try: # try:
input_loc, out_loc, fl_name = ut.filter_path(video_uri, out_dir) input_loc, out_loc, fl_name = ut.filter_path(video_uri, out_dir)
aud_filter = glob.glob(join(input_loc, fl_name + '.wav')) aud_filter = glob.glob(join(input_loc, fl_name + ".wav"))
if len(aud_filter)>0: if len(aud_filter) > 0:
audio_file = aud_filter[0] audio_file = aud_filter[0]
aud_dur = librosa.get_duration(filename=audio_file) aud_dur = ut.get_length(audio_file)
if float(aud_dur) < 0.064: if float(aud_dur) < 0.064:
logger.info('Output file {} size is less than 0.064sec'.format(audio_file)) logger.info("Output file {} size is less than 0.064sec".format(audio_file))
error_txt = 'error: length less than 0.064' error_txt = "error: length less than 0.064"
empty_shimmer(video_uri, out_loc, fl_name, r_config, error_txt) df = empty_shimmer(
return video_uri, out_loc, fl_name, r_config, error_txt, save=save
)
calc_shimmer(video_uri, audio_file, out_loc, fl_name, r_config) else:
except Exception as e: df = calc_shimmer(
logger.error('Failed to process audio file') video_uri,
audio_file,
out_loc,
fl_name,
r_config,
save=save,
ff_df=ff_df,
)
return df
# except Exception as e:
# logger.error('Error in shimmer: {}'.format(e))
# logger.error('Failed to process audio file')

View File

@@ -4,22 +4,23 @@ project_name: DBM
created: 2020-20-07 created: 2020-20-07
""" """
import parselmouth
import pandas as pd
import numpy as np
import glob import glob
import librosa
from os.path import join
import logging import logging
from os.path import join
import numpy as np
import pandas as pd
import parselmouth
from opendbm.dbm_lib.dbm_features.raw_features.util import util as ut from opendbm.dbm_lib.dbm_features.raw_features.util import util as ut
logging.basicConfig(level=logging.INFO) logging.basicConfig(level=logging.INFO)
logger=logging.getLogger() logger = logging.getLogger()
vfs_dir = "acoustic/voice_frame_score"
csv_ext = "_voiceprev.csv"
error_txt = "error: length less than 0.064"
vfs_dir = 'acoustic/voice_frame_score'
csv_ext = '_voiceprev.csv'
error_txt = 'error: length less than 0.064'
def audio_pitch_frame(pitch): def audio_pitch_frame(pitch):
""" """
@@ -33,6 +34,7 @@ def audio_pitch_frame(pitch):
voiced_frames = pitch.count_voiced_frames() voiced_frames = pitch.count_voiced_frames()
return total_frames, voiced_frames return total_frames, voiced_frames
def voice_segment(path): def voice_segment(path):
""" """
Using parselmouth library for fundamental frequency Using parselmouth library for fundamental frequency
@@ -43,12 +45,13 @@ def voice_segment(path):
""" """
sound_pat = parselmouth.Sound(path) sound_pat = parselmouth.Sound(path)
pitch = sound_pat.to_pitch() pitch = sound_pat.to_pitch()
total_frames,voiced_frames = audio_pitch_frame(pitch) total_frames, voiced_frames = audio_pitch_frame(pitch)
voiced_percentage = (voiced_frames/total_frames)*100 voiced_percentage = (voiced_frames / total_frames) * 100
return voiced_percentage, voiced_frames, total_frames return voiced_percentage, voiced_frames, total_frames
def calc_vfs(video_uri, audio_file, out_loc, fl_name, r_config):
def calc_vfs(video_uri, audio_file, out_loc, fl_name, r_config, save=True):
""" """
creating dataframe matrix for voice frame score creating dataframe matrix for voice frame score
Args: Args:
@@ -57,32 +60,44 @@ def calc_vfs(video_uri, audio_file, out_loc, fl_name, r_config):
f_nm_config: Config file object f_nm_config: Config file object
""" """
voice_percentage,voiced_frames, total_frames = voice_segment(audio_file) voice_percentage, voiced_frames, total_frames = voice_segment(audio_file)
df_vfs = pd.DataFrame([voiced_frames], columns=[r_config.aco_voiceFrame]) df_vfs = pd.DataFrame([voiced_frames], columns=[r_config.aco_voiceFrame])
df_vfs[r_config.aco_totVoiceFrame] = [total_frames] df_vfs[r_config.aco_totVoiceFrame] = [total_frames]
df_vfs[r_config.aco_voicePct] = [voice_percentage] df_vfs[r_config.aco_voicePct] = [voice_percentage]
df_vfs[r_config.err_reason] = 'Pass'# will replace with threshold in future release df_vfs[
r_config.err_reason
] = "Pass" # will replace with threshold in future release
df_vfs['Frames'] = df_vfs.index df_vfs["Frames"] = df_vfs.index
df_vfs['dbm_master_url'] = video_uri df_vfs["dbm_master_url"] = video_uri
if save:
logger.info('Saving Output file {} '.format(out_loc)) logger.info("Saving Output file {} ".format(out_loc))
ut.save_output(df_vfs, out_loc, fl_name, vfs_dir, csv_ext) ut.save_output(df_vfs, out_loc, fl_name, vfs_dir, csv_ext)
return df_vfs
def empty_vfs(video_uri, out_loc, fl_name, r_config):
def empty_vfs(video_uri, out_loc, fl_name, r_config, save=True):
""" """
Preparing empty VFS matrix if something fails Preparing empty VFS matrix if something fails
""" """
cols = ['Frames', r_config.aco_voiceFrame, r_config.aco_totVoiceFrame, r_config.aco_voicePct, r_config.err_reason] cols = [
"Frames",
r_config.aco_voiceFrame,
r_config.aco_totVoiceFrame,
r_config.aco_voicePct,
r_config.err_reason,
]
out_val = [[np.nan, np.nan, np.nan, np.nan, error_txt]] out_val = [[np.nan, np.nan, np.nan, np.nan, error_txt]]
df_vfs = pd.DataFrame(out_val, columns = cols) df_vfs = pd.DataFrame(out_val, columns=cols)
df_vfs['dbm_master_url'] = video_uri df_vfs["dbm_master_url"] = video_uri
if save:
logger.info('Saving Output file {} '.format(out_loc)) logger.info("Saving Output file {} ".format(out_loc))
ut.save_output(df_vfs, out_loc, fl_name, vfs_dir, csv_ext) ut.save_output(df_vfs, out_loc, fl_name, vfs_dir, csv_ext)
return df_vfs
def run_vfs(video_uri, out_dir, r_config):
def run_vfs(video_uri, out_dir, r_config, save=True):
""" """
Processing all participants for fetching voice frame score Processing all participants for fetching voice frame score
--------------- ---------------
@@ -94,18 +109,23 @@ def run_vfs(video_uri, out_dir, r_config):
try: try:
input_loc, out_loc, fl_name = ut.filter_path(video_uri, out_dir) input_loc, out_loc, fl_name = ut.filter_path(video_uri, out_dir)
aud_filter = glob.glob(join(input_loc, fl_name + '.wav')) aud_filter = glob.glob(join(input_loc, fl_name + ".wav"))
if len(aud_filter)>0: if len(aud_filter) > 0:
audio_file = aud_filter[0] audio_file = aud_filter[0]
aud_dur = librosa.get_duration(filename=audio_file) aud_dur = ut.get_length(audio_file)
if float(aud_dur) < 0.064: if float(aud_dur) < 0.064:
logger.info('Output file {} size is less than 0.064sec'.format(audio_file)) logger.info(
"Output file {} size is less than 0.064sec".format(audio_file)
)
empty_vfs(video_uri, out_loc, fl_name, r_config) df = empty_vfs(video_uri, out_loc, fl_name, r_config, save=save)
return else:
df = calc_vfs(
calc_vfs(video_uri, audio_file, out_loc, fl_name, r_config) video_uri, audio_file, out_loc, fl_name, r_config, save=save
)
return df
except Exception as e: except Exception as e:
logger.error('Failed to process audio file') e
logger.error("Failed to process audio file")