code refactoring only
This commit is contained in:
@@ -4,25 +4,25 @@ project_name: DBM
|
||||
created: 2020-20-07
|
||||
"""
|
||||
|
||||
import glob
|
||||
import logging
|
||||
from os.path import join
|
||||
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
import parselmouth
|
||||
import numpy as np
|
||||
import parselmouth
|
||||
import librosa
|
||||
import glob
|
||||
from os.path import join
|
||||
import logging
|
||||
|
||||
from opendbm.dbm_lib.dbm_features.raw_features.util import util as ut
|
||||
|
||||
logging.basicConfig(level=logging.INFO)
|
||||
logger=logging.getLogger()
|
||||
logger = logging.getLogger()
|
||||
|
||||
formant_dir = 'acoustic/formant_freq'
|
||||
csv_ext = '_formant.csv'
|
||||
error_txt = 'error: length less than 0.064'
|
||||
formant_dir = "acoustic/formant_freq"
|
||||
csv_ext = "_formant.csv"
|
||||
error_txt = "error: length less than 0.064"
|
||||
|
||||
def formant_list(formant,snd):
|
||||
|
||||
def formant_list(formant, snd):
|
||||
"""
|
||||
Getting formant frequency per second
|
||||
Args:
|
||||
@@ -36,22 +36,23 @@ def formant_list(formant,snd):
|
||||
f3_list = []
|
||||
f4_list = []
|
||||
|
||||
dur = snd.duration-0.02
|
||||
dur = snd.duration - 0.02
|
||||
dur_round = round(dur, 2)
|
||||
|
||||
time_list = np.arange(0.001, dur_round, 0.001)
|
||||
for time in time_list:
|
||||
|
||||
f1 = formant.get_value_at_time(1,time)
|
||||
f2 = formant.get_value_at_time(2,time)
|
||||
f3 = formant.get_value_at_time(3,time)
|
||||
f4 = formant.get_value_at_time(4,time)
|
||||
f1 = formant.get_value_at_time(1, time)
|
||||
f2 = formant.get_value_at_time(2, time)
|
||||
f3 = formant.get_value_at_time(3, time)
|
||||
f4 = formant.get_value_at_time(4, time)
|
||||
|
||||
f1_list.append(f1)
|
||||
f2_list.append(f2)
|
||||
f3_list.append(f3)
|
||||
f4_list.append(f4)
|
||||
return f1_list,f2_list,f3_list,f4_list
|
||||
return f1_list, f2_list, f3_list, f4_list
|
||||
|
||||
|
||||
def formant_score(path):
|
||||
"""
|
||||
@@ -62,11 +63,12 @@ def formant_score(path):
|
||||
(list) list of Formant freq for each voice frame
|
||||
"""
|
||||
sound_pat = parselmouth.Sound(path)
|
||||
formant = sound_pat.to_formant_burg(time_step=.001)
|
||||
f_score = formant_list(formant,sound_pat)
|
||||
formant = sound_pat.to_formant_burg(time_step=0.001)
|
||||
f_score = formant_list(formant, sound_pat)
|
||||
return f_score
|
||||
|
||||
def calc_formant(video_uri, audio_file, out_loc, fl_name, r_config):
|
||||
|
||||
def calc_formant(video_uri, audio_file, out_loc, fl_name, r_config, save=True):
|
||||
"""
|
||||
Preparing Formant freq matrix
|
||||
Args:
|
||||
@@ -74,36 +76,51 @@ def calc_formant(video_uri, audio_file, out_loc, fl_name, r_config):
|
||||
out_loc: (str) Output directory; r_config: raw variable config
|
||||
"""
|
||||
|
||||
f1_list,f2_list,f3_list,f4_list = formant_score(audio_file)
|
||||
f1_list, f2_list, f3_list, f4_list = formant_score(audio_file)
|
||||
df_formant = pd.DataFrame(f1_list, columns=[r_config.aco_fm1])
|
||||
|
||||
df_formant[r_config.aco_fm2] = f2_list
|
||||
df_formant[r_config.aco_fm3] = f3_list
|
||||
df_formant[r_config.aco_fm4] = f4_list
|
||||
|
||||
df_formant.replace('', np.nan, regex=True,inplace=True)
|
||||
df_formant[r_config.err_reason] = 'Pass'# will replace with threshold in future release
|
||||
df_formant.replace("", np.nan, regex=True, inplace=True)
|
||||
df_formant[
|
||||
r_config.err_reason
|
||||
] = "Pass" # will replace with threshold in future release
|
||||
|
||||
df_formant['Frames'] = df_formant.index
|
||||
df_formant['dbm_master_url'] = video_uri
|
||||
df_formant["Frames"] = df_formant.index
|
||||
df_formant["dbm_master_url"] = video_uri
|
||||
|
||||
logger.info('Saving Output file {} '.format(out_loc))
|
||||
if save:
|
||||
logger.info("Saving Output file {} ".format(out_loc))
|
||||
ut.save_output(df_formant, out_loc, fl_name, formant_dir, csv_ext)
|
||||
return df_formant
|
||||
|
||||
def empty_fm(video_uri, out_loc, fl_name, r_config):
|
||||
|
||||
def empty_fm(video_uri, out_loc, fl_name, r_config, save=True):
|
||||
|
||||
"""
|
||||
Preparing empty formant frequency matrix if something fails
|
||||
"""
|
||||
cols = ['Frames', r_config.aco_fm1, r_config.aco_fm2, r_config.aco_fm3, r_config.aco_fm4, r_config.err_reason]
|
||||
cols = [
|
||||
"Frames",
|
||||
r_config.aco_fm1,
|
||||
r_config.aco_fm2,
|
||||
r_config.aco_fm3,
|
||||
r_config.aco_fm4,
|
||||
r_config.err_reason,
|
||||
]
|
||||
out_val = [[np.nan, np.nan, np.nan, np.nan, np.nan, error_txt]]
|
||||
df_fm = pd.DataFrame(out_val, columns = cols)
|
||||
df_fm['dbm_master_url'] = video_uri
|
||||
df_fm = pd.DataFrame(out_val, columns=cols)
|
||||
df_fm["dbm_master_url"] = video_uri
|
||||
|
||||
logger.info('Saving Output file {} '.format(out_loc))
|
||||
if save:
|
||||
logger.info("Saving Output file {} ".format(out_loc))
|
||||
ut.save_output(df_fm, out_loc, fl_name, formant_dir, csv_ext)
|
||||
return df_fm
|
||||
|
||||
def run_formant(video_uri, out_dir, r_config):
|
||||
|
||||
def run_formant(video_uri, out_dir, r_config, save=True):
|
||||
|
||||
"""
|
||||
Processing all patient's for fetching Formant freq
|
||||
@@ -116,18 +133,23 @@ def run_formant(video_uri, out_dir, r_config):
|
||||
try:
|
||||
|
||||
input_loc, out_loc, fl_name = ut.filter_path(video_uri, out_dir)
|
||||
aud_filter = glob.glob(join(input_loc, fl_name + '.wav'))
|
||||
if len(aud_filter)>0:
|
||||
aud_filter = glob.glob(join(input_loc, fl_name + ".wav"))
|
||||
if len(aud_filter) > 0:
|
||||
|
||||
audio_file = aud_filter[0]
|
||||
aud_dur = librosa.get_duration(filename=audio_file)
|
||||
aud_dur = ut.get_length(audio_file)
|
||||
|
||||
if float(aud_dur) < 0.064:
|
||||
logger.info('Output file {} size is less than 0.064sec'.format(audio_file))
|
||||
logger.info(
|
||||
"Output file {} size is less than 0.064sec".format(audio_file)
|
||||
)
|
||||
|
||||
empty_fm(video_uri, out_loc, fl_name, r_config)
|
||||
return
|
||||
|
||||
calc_formant(video_uri, audio_file, out_loc, fl_name, r_config)
|
||||
df = empty_fm(video_uri, out_loc, fl_name, r_config, save=save)
|
||||
else:
|
||||
df = calc_formant(
|
||||
video_uri, audio_file, out_loc, fl_name, r_config, save=save
|
||||
)
|
||||
return df
|
||||
except Exception as e:
|
||||
logger.error('Failed to process audio file')
|
||||
e
|
||||
logger.error("Failed to process audio file")
|
||||
|
||||
@@ -4,24 +4,25 @@ project_name: DBM
|
||||
created: 2020-20-07
|
||||
"""
|
||||
|
||||
import pandas as pd
|
||||
import numpy as np
|
||||
import os
|
||||
import glob
|
||||
import parselmouth
|
||||
import librosa
|
||||
import more_itertools as mit
|
||||
from os.path import join
|
||||
import logging
|
||||
import os
|
||||
from os.path import join
|
||||
|
||||
import more_itertools as mit
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
import parselmouth
|
||||
|
||||
from opendbm.dbm_lib.dbm_features.raw_features.util import util as ut
|
||||
|
||||
logging.basicConfig(level=logging.INFO)
|
||||
logger=logging.getLogger()
|
||||
logger = logging.getLogger()
|
||||
|
||||
gne_dir = "acoustic/glottal_noise"
|
||||
ff_dir = "acoustic/pitch"
|
||||
csv_ext = "_gne.csv"
|
||||
|
||||
gne_dir = 'acoustic/glottal_noise'
|
||||
ff_dir = 'acoustic/pitch'
|
||||
csv_ext = '_gne.csv'
|
||||
|
||||
def gne_ratio(sound):
|
||||
"""
|
||||
@@ -33,62 +34,42 @@ def gne_ratio(sound):
|
||||
"""
|
||||
harmonicity_gne = sound.to_harmonicity_gne()
|
||||
gne_all_bands = harmonicity_gne.values
|
||||
gne_all_bands = np.where(gne_all_bands==-200, np.NaN, gne_all_bands)
|
||||
gne_all_bands = np.where(gne_all_bands == -200, np.NaN, gne_all_bands)
|
||||
|
||||
gne = np.nanmax(gne_all_bands) # following http://www.fon.hum.uva.nl/rob/NKI_TEVA/TEVA/HTML/NKI_TEVA.pdf
|
||||
gne = np.nanmax(
|
||||
gne_all_bands
|
||||
) # following http://www.fon.hum.uva.nl/rob/NKI_TEVA/TEVA/HTML/NKI_TEVA.pdf
|
||||
return gne
|
||||
|
||||
def empty_gne(video_uri, out_loc, fl_name, r_config, error_txt):
|
||||
|
||||
def empty_gne(video_uri, out_loc, fl_name, r_config, error_txt, save=True):
|
||||
"""
|
||||
Preparing empty GNE matrix if something fails
|
||||
"""
|
||||
cols = ['Frames', r_config.aco_gne, r_config.err_reason]
|
||||
cols = ["Frames", r_config.aco_gne, r_config.err_reason]
|
||||
out_val = [[np.nan, np.nan, error_txt]]
|
||||
|
||||
df_gne = pd.DataFrame(out_val, columns = cols)
|
||||
df_gne['dbm_master_url'] = video_uri
|
||||
df_gne = pd.DataFrame(out_val, columns=cols)
|
||||
df_gne["dbm_master_url"] = video_uri
|
||||
|
||||
logger.info('Saving Output file {} '.format(out_loc))
|
||||
if save:
|
||||
logger.info("Saving Output file {} ".format(out_loc))
|
||||
ut.save_output(df_gne, out_loc, fl_name, gne_dir, csv_ext)
|
||||
return df_gne
|
||||
|
||||
def segment_pitch(dir_path, r_config):
|
||||
"""
|
||||
segmenting pitch freq for each voice segment
|
||||
"""
|
||||
com_speech_sort, voiced_yes, voiced_no = ([], ) * 3
|
||||
for file in os.listdir(dir_path):
|
||||
try:
|
||||
|
||||
if file.endswith('_pitch.csv'):
|
||||
|
||||
ff_df = pd.read_csv((dir_path+'/'+file))
|
||||
voice_label = ff_df[r_config.aco_voiceLabel]
|
||||
|
||||
indices_yes = [i for i, x in enumerate(voice_label) if x == "yes"]
|
||||
voiced_yes = [list(group) for group in mit.consecutive_groups(indices_yes)]
|
||||
|
||||
indices_no = [i for i, x in enumerate(voice_label) if x == "no"]
|
||||
voiced_no = [list(group) for group in mit.consecutive_groups(indices_no)]
|
||||
|
||||
com_speech = voiced_yes + voiced_no
|
||||
com_speech_sort = sorted(com_speech, key=lambda x: x[0])
|
||||
except:
|
||||
pass
|
||||
|
||||
return com_speech_sort, voiced_yes, voiced_no
|
||||
|
||||
def segment_gne(com_speech_sort, voiced_yes, voiced_no, gne_all_frames, audio_file):
|
||||
"""
|
||||
calculating gne for each voice segment
|
||||
"""
|
||||
snd = parselmouth.Sound(audio_file)
|
||||
pitch = snd.to_pitch(time_step=.001)
|
||||
pitch = snd.to_pitch(time_step=0.001)
|
||||
|
||||
for idx, vs in enumerate(com_speech_sort):
|
||||
try:
|
||||
|
||||
max_gne = np.NaN
|
||||
if vs in voiced_yes and len(vs)>1:
|
||||
if vs in voiced_yes and len(vs) > 1:
|
||||
|
||||
start_time = pitch.get_time_from_frame_number(vs[0])
|
||||
end_time = pitch.get_time_from_frame_number(vs[-1])
|
||||
@@ -104,7 +85,8 @@ def segment_gne(com_speech_sort, voiced_yes, voiced_no, gne_all_frames, audio_fi
|
||||
gne_all_frames[idx] = max_gne
|
||||
return gne_all_frames
|
||||
|
||||
def calc_gne(video_uri, audio_file, out_loc, fl_name, r_config):
|
||||
|
||||
def calc_gne(video_uri, audio_file, out_loc, fl_name, r_config, save=True, ff_df=None):
|
||||
"""
|
||||
Preparing gne matrix
|
||||
Args:
|
||||
@@ -112,26 +94,36 @@ def calc_gne(video_uri, audio_file, out_loc, fl_name, r_config):
|
||||
out_loc: (str) Output directory for csv's
|
||||
"""
|
||||
dir_path = os.path.join(out_loc, ff_dir)
|
||||
if os.path.isdir(dir_path):
|
||||
voice_seg = segment_pitch(dir_path, r_config)
|
||||
if os.path.isdir(dir_path) or ff_df is not None:
|
||||
if ff_df is not None:
|
||||
voice_seg = ut.process_segment_pitch(ff_df, r_config)
|
||||
else:
|
||||
voice_seg = ut.segment_pitch(dir_path, r_config, ff_df=ff_df)
|
||||
|
||||
gne_all_frames = [np.NaN] * len(voice_seg[0])
|
||||
gne_segment_frames = segment_gne(voice_seg[0], voice_seg[1], voice_seg[2], gne_all_frames, audio_file)
|
||||
gne_segment_frames = segment_gne(
|
||||
voice_seg[0], voice_seg[1], voice_seg[2], gne_all_frames, audio_file
|
||||
)
|
||||
|
||||
df_gne = pd.DataFrame(gne_segment_frames, columns=[r_config.aco_gne])
|
||||
df_gne[r_config.err_reason] = 'Pass'# will replace with threshold in future release
|
||||
df_gne[
|
||||
r_config.err_reason
|
||||
] = "Pass" # will replace with threshold in future release
|
||||
|
||||
df_gne['Frames'] = df_gne.index
|
||||
df_gne['dbm_master_url'] = video_uri
|
||||
df_gne["Frames"] = df_gne.index
|
||||
df_gne["dbm_master_url"] = video_uri
|
||||
|
||||
logger.info('Processing Output file {} '.format(out_loc))
|
||||
if save:
|
||||
logger.info("Processing Output file {} ".format(out_loc))
|
||||
ut.save_output(df_gne, out_loc, fl_name, gne_dir, csv_ext)
|
||||
return df_gne
|
||||
|
||||
else:
|
||||
error_txt = 'error: pitch freq not available'
|
||||
empty_gne(video_uri, out_loc, fl_name, r_config, error_txt)
|
||||
error_txt = "error: pitch freq not available"
|
||||
return empty_gne(video_uri, out_loc, fl_name, r_config, error_txt, save=save)
|
||||
|
||||
def run_gne(video_uri, out_dir, r_config):
|
||||
|
||||
def run_gne(video_uri, out_dir, r_config, save=True, ff_df=None):
|
||||
"""
|
||||
Processing all patient's for fetching glottal noise ratio
|
||||
---------------
|
||||
@@ -143,19 +135,32 @@ def run_gne(video_uri, out_dir, r_config):
|
||||
try:
|
||||
|
||||
input_loc, out_loc, fl_name = ut.filter_path(video_uri, out_dir)
|
||||
aud_filter = glob.glob(join(input_loc, fl_name + '.wav'))
|
||||
if len(aud_filter)>0:
|
||||
aud_filter = glob.glob(join(input_loc, fl_name + ".wav"))
|
||||
if len(aud_filter) > 0:
|
||||
|
||||
audio_file = aud_filter[0]
|
||||
aud_dur = librosa.get_duration(filename=audio_file)
|
||||
aud_dur = ut.get_length(audio_file)
|
||||
|
||||
if float(aud_dur) < 0.064:
|
||||
logger.info('Output file {} size is less than 0.064sec'.format(audio_file))
|
||||
logger.info(
|
||||
"Output file {} size is less than 0.064sec".format(audio_file)
|
||||
)
|
||||
|
||||
error_txt = 'error: length less than 0.064'
|
||||
empty_gne(video_uri, out_loc, fl_name, r_config, error_txt)
|
||||
return
|
||||
|
||||
calc_gne(video_uri, audio_file, out_loc, fl_name, r_config)
|
||||
error_txt = "error: length less than 0.064"
|
||||
df = empty_gne(
|
||||
video_uri, out_loc, fl_name, r_config, error_txt, save=save
|
||||
)
|
||||
else:
|
||||
df = calc_gne(
|
||||
video_uri,
|
||||
audio_file,
|
||||
out_loc,
|
||||
fl_name,
|
||||
r_config,
|
||||
save=save,
|
||||
ff_df=ff_df,
|
||||
)
|
||||
return df
|
||||
except Exception as e:
|
||||
logger.error('Failed to process audio file')
|
||||
e
|
||||
logger.error("Failed to process audio file")
|
||||
|
||||
@@ -1,77 +1,133 @@
|
||||
"""
|
||||
file_name: hnr
|
||||
file_name: gne
|
||||
project_name: DBM
|
||||
created: 2020-20-07
|
||||
"""
|
||||
|
||||
import pandas as pd
|
||||
import numpy as np
|
||||
import os
|
||||
import glob
|
||||
import parselmouth
|
||||
import librosa
|
||||
from os.path import join
|
||||
import logging
|
||||
import os
|
||||
from os.path import join
|
||||
|
||||
import more_itertools as mit
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
import parselmouth
|
||||
|
||||
from opendbm.dbm_lib.dbm_features.raw_features.util import util as ut
|
||||
|
||||
logging.basicConfig(level=logging.INFO)
|
||||
logger=logging.getLogger()
|
||||
logger = logging.getLogger()
|
||||
|
||||
hnr_dir = 'acoustic/harmonic_noise'
|
||||
csv_ext = '_hnr.csv'
|
||||
error_txt = 'error: length less than 0.064'
|
||||
gne_dir = "acoustic/glottal_noise"
|
||||
ff_dir = "acoustic/pitch"
|
||||
csv_ext = "_gne.csv"
|
||||
|
||||
def hnr_ratio(filepath):
|
||||
|
||||
def gne_ratio(sound):
|
||||
"""
|
||||
Using parselmouth library fetching harmonic noise ratio ratio
|
||||
Using parselmouth library fetching glottal noise excitation ratio
|
||||
Args:
|
||||
path: (.wav) audio file location
|
||||
sound: parselmouth object
|
||||
Returns:
|
||||
(list) list of hnr ratio for each voice frame, min,max and mean hnr
|
||||
(list) list of gne ratio for each voice frame
|
||||
"""
|
||||
sound = parselmouth.Sound(filepath)
|
||||
harmonicity = sound.to_harmonicity_ac(time_step=.001)
|
||||
harmonicity_gne = sound.to_harmonicity_gne()
|
||||
gne_all_bands = harmonicity_gne.values
|
||||
gne_all_bands = np.where(gne_all_bands == -200, np.NaN, gne_all_bands)
|
||||
|
||||
hnr_all_frames = harmonicity.values#[harmonicity.values != -200] nan it (****)
|
||||
hnr_all_frames = np.where(hnr_all_frames==-200, np.NaN, hnr_all_frames)
|
||||
return hnr_all_frames.transpose()
|
||||
gne = np.nanmax(
|
||||
gne_all_bands
|
||||
) # following http://www.fon.hum.uva.nl/rob/NKI_TEVA/TEVA/HTML/NKI_TEVA.pdf
|
||||
return gne
|
||||
|
||||
def calc_hnr(video_uri, audio_file, out_loc, fl_name, r_config):
|
||||
|
||||
def empty_gne(video_uri, out_loc, fl_name, r_config, error_txt, save=True):
|
||||
"""
|
||||
Preparing harmonic noise matrix
|
||||
Preparing empty GNE matrix if something fails
|
||||
"""
|
||||
cols = ["Frames", r_config.aco_gne, r_config.err_reason]
|
||||
out_val = [[np.nan, np.nan, error_txt]]
|
||||
|
||||
df_gne = pd.DataFrame(out_val, columns=cols)
|
||||
df_gne["dbm_master_url"] = video_uri
|
||||
|
||||
if save:
|
||||
logger.info("Saving Output file {} ".format(out_loc))
|
||||
ut.save_output(df_gne, out_loc, fl_name, gne_dir, csv_ext)
|
||||
return df_gne
|
||||
|
||||
|
||||
def segment_gne(com_speech_sort, voiced_yes, voiced_no, gne_all_frames, audio_file):
|
||||
"""
|
||||
calculating gne for each voice segment
|
||||
"""
|
||||
snd = parselmouth.Sound(audio_file)
|
||||
pitch = snd.to_pitch(time_step=0.001)
|
||||
|
||||
for idx, vs in enumerate(com_speech_sort):
|
||||
try:
|
||||
|
||||
max_gne = np.NaN
|
||||
if vs in voiced_yes and len(vs) > 1:
|
||||
|
||||
start_time = pitch.get_time_from_frame_number(vs[0])
|
||||
end_time = pitch.get_time_from_frame_number(vs[-1])
|
||||
|
||||
snd_start = int(snd.get_frame_number_from_time(start_time))
|
||||
snd_end = int(snd.get_frame_number_from_time(end_time))
|
||||
|
||||
samples = parselmouth.Sound(snd.as_array()[0][snd_start:snd_end])
|
||||
max_gne = gne_ratio(samples)
|
||||
except:
|
||||
pass
|
||||
|
||||
gne_all_frames[idx] = max_gne
|
||||
return gne_all_frames
|
||||
|
||||
|
||||
def calc_gne(video_uri, audio_file, out_loc, fl_name, r_config, save=True, ff_df=None):
|
||||
"""
|
||||
Preparing gne matrix
|
||||
Args:
|
||||
audio_file: (.wav) parsed audio file
|
||||
out_loc: (str) Output directory for csv's
|
||||
"""
|
||||
dir_path = os.path.join(out_loc, ff_dir)
|
||||
if os.path.isdir(dir_path) or ff_df is not None:
|
||||
if ff_df is not None:
|
||||
voice_seg = ut.process_segment_pitch(ff_df, r_config)
|
||||
else:
|
||||
voice_seg = ut.segment_pitch(dir_path, r_config, ff_df=ff_df)
|
||||
|
||||
hnr_all_frames = hnr_ratio(audio_file)
|
||||
df_hnr = pd.DataFrame(hnr_all_frames, columns=[r_config.aco_hnr])
|
||||
gne_all_frames = [np.NaN] * len(voice_seg[0])
|
||||
gne_segment_frames = segment_gne(
|
||||
voice_seg[0], voice_seg[1], voice_seg[2], gne_all_frames, audio_file
|
||||
)
|
||||
|
||||
df_hnr['Frames'] = df_hnr.index
|
||||
df_hnr['dbm_master_url'] = video_uri
|
||||
df_hnr[r_config.err_reason] = 'Pass'# will replace with threshold in future release
|
||||
df_gne = pd.DataFrame(gne_segment_frames, columns=[r_config.aco_gne])
|
||||
df_gne[
|
||||
r_config.err_reason
|
||||
] = "Pass" # will replace with threshold in future release
|
||||
|
||||
logger.info('Saving Output file {} '.format(out_loc))
|
||||
ut.save_output(df_hnr, out_loc, fl_name, hnr_dir, csv_ext)
|
||||
df_gne["Frames"] = df_gne.index
|
||||
df_gne["dbm_master_url"] = video_uri
|
||||
|
||||
def empty_hnr(video_uri, out_loc, fl_name, r_config):
|
||||
if save:
|
||||
logger.info("Processing Output file {} ".format(out_loc))
|
||||
ut.save_output(df_gne, out_loc, fl_name, gne_dir, csv_ext)
|
||||
return df_gne
|
||||
|
||||
else:
|
||||
error_txt = "error: pitch freq not available"
|
||||
return empty_gne(video_uri, out_loc, fl_name, r_config, error_txt, save=save)
|
||||
|
||||
|
||||
def run_gne(video_uri, out_dir, r_config, save=True, ff_df=None):
|
||||
"""
|
||||
Preparing empty HNR matrix if something fails
|
||||
"""
|
||||
cols = ['Frames', r_config.aco_hnr, r_config.err_reason]
|
||||
out_val = [[np.nan, np.nan, error_txt]]
|
||||
df_hnr = pd.DataFrame(out_val, columns = cols)
|
||||
df_hnr['dbm_master_url'] = video_uri
|
||||
|
||||
logger.info('Saving Output file {} '.format(out_loc))
|
||||
ut.save_output(df_hnr, out_loc, fl_name, hnr_dir, csv_ext)
|
||||
|
||||
def run_hnr(video_uri, out_dir, r_config):
|
||||
"""
|
||||
Processing all patient's for fetching harmonic noise ratio
|
||||
-------------------
|
||||
-------------------
|
||||
Processing all patient's for fetching glottal noise ratio
|
||||
---------------
|
||||
---------------
|
||||
Args:
|
||||
video_uri: video path; r_config: raw variable config object
|
||||
out_dir: (str) Output directory for processed output
|
||||
@@ -79,18 +135,32 @@ def run_hnr(video_uri, out_dir, r_config):
|
||||
try:
|
||||
|
||||
input_loc, out_loc, fl_name = ut.filter_path(video_uri, out_dir)
|
||||
aud_filter = glob.glob(join(input_loc, fl_name + '.wav'))
|
||||
if len(aud_filter)>0:
|
||||
aud_filter = glob.glob(join(input_loc, fl_name + ".wav"))
|
||||
if len(aud_filter) > 0:
|
||||
|
||||
audio_file = aud_filter[0]
|
||||
aud_dur = librosa.get_duration(filename=audio_file)
|
||||
aud_dur = ut.get_length(audio_file)
|
||||
|
||||
if float(aud_dur) < 0.064:
|
||||
logger.info('Output file {} size is less than 0.064sec'.format(audio_file))
|
||||
logger.info(
|
||||
"Output file {} size is less than 0.064sec".format(audio_file)
|
||||
)
|
||||
|
||||
empty_hnr(video_uri, out_loc, fl_name, r_config)
|
||||
return
|
||||
|
||||
calc_hnr(video_uri, audio_file, out_loc, fl_name, r_config)
|
||||
error_txt = "error: length less than 0.064"
|
||||
df = empty_gne(
|
||||
video_uri, out_loc, fl_name, r_config, error_txt, save=save
|
||||
)
|
||||
else:
|
||||
df = calc_gne(
|
||||
video_uri,
|
||||
audio_file,
|
||||
out_loc,
|
||||
fl_name,
|
||||
r_config,
|
||||
save=save,
|
||||
ff_df=ff_df,
|
||||
)
|
||||
return df
|
||||
except Exception as e:
|
||||
logger.error('Failed to process audio file')
|
||||
e
|
||||
logger.error("Failed to process audio file")
|
||||
|
||||
@@ -1,73 +1,133 @@
|
||||
"""
|
||||
file_name: intensity
|
||||
file_name: gne
|
||||
project_name: DBM
|
||||
created: 2020-20-07
|
||||
"""
|
||||
|
||||
import pandas as pd
|
||||
import numpy as np
|
||||
import glob
|
||||
import parselmouth
|
||||
import librosa
|
||||
from os.path import join
|
||||
import logging
|
||||
import os
|
||||
from os.path import join
|
||||
|
||||
import more_itertools as mit
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
import parselmouth
|
||||
|
||||
from opendbm.dbm_lib.dbm_features.raw_features.util import util as ut
|
||||
|
||||
logging.basicConfig(level=logging.INFO)
|
||||
logger=logging.getLogger()
|
||||
logger = logging.getLogger()
|
||||
|
||||
intensity_dir = 'acoustic/intensity'
|
||||
csv_ext = '_intensity.csv'
|
||||
error_txt = 'error: length less than 0.064'
|
||||
gne_dir = "acoustic/glottal_noise"
|
||||
ff_dir = "acoustic/pitch"
|
||||
csv_ext = "_gne.csv"
|
||||
|
||||
def intensity_score(path):
|
||||
|
||||
def gne_ratio(sound):
|
||||
"""
|
||||
Using parselmouth library fetching Intensity
|
||||
Using parselmouth library fetching glottal noise excitation ratio
|
||||
Args:
|
||||
path: (.wav) audio file location
|
||||
sound: parselmouth object
|
||||
Returns:
|
||||
(list) list of Intensity for each voice frame
|
||||
(list) list of gne ratio for each voice frame
|
||||
"""
|
||||
sound_pat = parselmouth.Sound(path)
|
||||
intensity = sound_pat.to_intensity(time_step=.001)
|
||||
return intensity.values[0]
|
||||
harmonicity_gne = sound.to_harmonicity_gne()
|
||||
gne_all_bands = harmonicity_gne.values
|
||||
gne_all_bands = np.where(gne_all_bands == -200, np.NaN, gne_all_bands)
|
||||
|
||||
def calc_intensity(video_uri, audio_file, out_loc, fl_name, r_config):
|
||||
gne = np.nanmax(
|
||||
gne_all_bands
|
||||
) # following http://www.fon.hum.uva.nl/rob/NKI_TEVA/TEVA/HTML/NKI_TEVA.pdf
|
||||
return gne
|
||||
|
||||
|
||||
def empty_gne(video_uri, out_loc, fl_name, r_config, error_txt, save=True):
|
||||
"""
|
||||
Preparing Intensity matrix
|
||||
Preparing empty GNE matrix if something fails
|
||||
"""
|
||||
cols = ["Frames", r_config.aco_gne, r_config.err_reason]
|
||||
out_val = [[np.nan, np.nan, error_txt]]
|
||||
|
||||
df_gne = pd.DataFrame(out_val, columns=cols)
|
||||
df_gne["dbm_master_url"] = video_uri
|
||||
|
||||
if save:
|
||||
logger.info("Saving Output file {} ".format(out_loc))
|
||||
ut.save_output(df_gne, out_loc, fl_name, gne_dir, csv_ext)
|
||||
return df_gne
|
||||
|
||||
|
||||
def segment_gne(com_speech_sort, voiced_yes, voiced_no, gne_all_frames, audio_file):
|
||||
"""
|
||||
calculating gne for each voice segment
|
||||
"""
|
||||
snd = parselmouth.Sound(audio_file)
|
||||
pitch = snd.to_pitch(time_step=0.001)
|
||||
|
||||
for idx, vs in enumerate(com_speech_sort):
|
||||
try:
|
||||
|
||||
max_gne = np.NaN
|
||||
if vs in voiced_yes and len(vs) > 1:
|
||||
|
||||
start_time = pitch.get_time_from_frame_number(vs[0])
|
||||
end_time = pitch.get_time_from_frame_number(vs[-1])
|
||||
|
||||
snd_start = int(snd.get_frame_number_from_time(start_time))
|
||||
snd_end = int(snd.get_frame_number_from_time(end_time))
|
||||
|
||||
samples = parselmouth.Sound(snd.as_array()[0][snd_start:snd_end])
|
||||
max_gne = gne_ratio(samples)
|
||||
except:
|
||||
pass
|
||||
|
||||
gne_all_frames[idx] = max_gne
|
||||
return gne_all_frames
|
||||
|
||||
|
||||
def calc_gne(video_uri, audio_file, out_loc, fl_name, r_config, save=True, ff_df=None):
|
||||
"""
|
||||
Preparing gne matrix
|
||||
Args:
|
||||
audio_file: (.wav) parsed audio file
|
||||
out_loc: (str) Output directory for csv's
|
||||
"""
|
||||
dir_path = os.path.join(out_loc, ff_dir)
|
||||
if os.path.isdir(dir_path) or ff_df is not None:
|
||||
if ff_df is not None:
|
||||
voice_seg = ut.process_segment_pitch(ff_df, r_config)
|
||||
else:
|
||||
voice_seg = ut.segment_pitch(dir_path, r_config, ff_df=ff_df)
|
||||
|
||||
intensity_frames = intensity_score(audio_file)
|
||||
df_intensity = pd.DataFrame(intensity_frames, columns=[r_config.aco_int])
|
||||
gne_all_frames = [np.NaN] * len(voice_seg[0])
|
||||
gne_segment_frames = segment_gne(
|
||||
voice_seg[0], voice_seg[1], voice_seg[2], gne_all_frames, audio_file
|
||||
)
|
||||
|
||||
df_intensity['Frames'] = df_intensity.index
|
||||
df_intensity['dbm_master_url'] = video_uri
|
||||
df_intensity[r_config.err_reason] = 'Pass'# will replace with threshold in future release
|
||||
df_gne = pd.DataFrame(gne_segment_frames, columns=[r_config.aco_gne])
|
||||
df_gne[
|
||||
r_config.err_reason
|
||||
] = "Pass" # will replace with threshold in future release
|
||||
|
||||
logger.info('Saving Output file {} '.format(out_loc))
|
||||
ut.save_output(df_intensity, out_loc, fl_name, intensity_dir, csv_ext)
|
||||
df_gne["Frames"] = df_gne.index
|
||||
df_gne["dbm_master_url"] = video_uri
|
||||
|
||||
def empty_intensity(video_uri, out_loc, fl_name, r_config):
|
||||
if save:
|
||||
logger.info("Processing Output file {} ".format(out_loc))
|
||||
ut.save_output(df_gne, out_loc, fl_name, gne_dir, csv_ext)
|
||||
return df_gne
|
||||
|
||||
else:
|
||||
error_txt = "error: pitch freq not available"
|
||||
return empty_gne(video_uri, out_loc, fl_name, r_config, error_txt, save=save)
|
||||
|
||||
|
||||
def run_gne(video_uri, out_dir, r_config, save=True, ff_df=None):
|
||||
"""
|
||||
Preparing empty Intensity matrix if something fails
|
||||
"""
|
||||
cols = ['Frames', r_config.aco_int, r_config.err_reason]
|
||||
out_val = [[np.nan, np.nan, error_txt]]
|
||||
df_int = pd.DataFrame(out_val, columns = cols)
|
||||
df_int['dbm_master_url'] = video_uri
|
||||
|
||||
logger.info('Saving Output file {} '.format(out_loc))
|
||||
ut.save_output(df_int, out_loc, fl_name, intensity_dir, csv_ext)
|
||||
|
||||
def run_intensity(video_uri, out_dir, r_config):
|
||||
"""
|
||||
Processing all patient's for fetching Intensity
|
||||
-------------------
|
||||
-------------------
|
||||
Processing all patient's for fetching glottal noise ratio
|
||||
---------------
|
||||
---------------
|
||||
Args:
|
||||
video_uri: video path; r_config: raw variable config object
|
||||
out_dir: (str) Output directory for processed output
|
||||
@@ -75,18 +135,32 @@ def run_intensity(video_uri, out_dir, r_config):
|
||||
try:
|
||||
|
||||
input_loc, out_loc, fl_name = ut.filter_path(video_uri, out_dir)
|
||||
aud_filter = glob.glob(join(input_loc, fl_name + '.wav'))
|
||||
if len(aud_filter)>0:
|
||||
aud_filter = glob.glob(join(input_loc, fl_name + ".wav"))
|
||||
if len(aud_filter) > 0:
|
||||
|
||||
audio_file = aud_filter[0]
|
||||
aud_dur = librosa.get_duration(filename=audio_file)
|
||||
aud_dur = ut.get_length(audio_file)
|
||||
|
||||
if float(aud_dur) < 0.064:
|
||||
logger.info('Output file {} size is less than 0.064sec'.format(audio_file))
|
||||
logger.info(
|
||||
"Output file {} size is less than 0.064sec".format(audio_file)
|
||||
)
|
||||
|
||||
empty_intensity(video_uri, out_loc, fl_name, r_config)
|
||||
return
|
||||
|
||||
calc_intensity(video_uri, audio_file, out_loc, fl_name, r_config)
|
||||
error_txt = "error: length less than 0.064"
|
||||
df = empty_gne(
|
||||
video_uri, out_loc, fl_name, r_config, error_txt, save=save
|
||||
)
|
||||
else:
|
||||
df = calc_gne(
|
||||
video_uri,
|
||||
audio_file,
|
||||
out_loc,
|
||||
fl_name,
|
||||
r_config,
|
||||
save=save,
|
||||
ff_df=ff_df,
|
||||
)
|
||||
return df
|
||||
except Exception as e:
|
||||
logger.error('Failed to process audio file')
|
||||
e
|
||||
logger.error("Failed to process audio file")
|
||||
|
||||
@@ -4,25 +4,24 @@ project_name: DBM
|
||||
created: 2020-20-07
|
||||
"""
|
||||
|
||||
import pandas as pd
|
||||
import numpy as np
|
||||
import os
|
||||
import glob
|
||||
import parselmouth
|
||||
import librosa
|
||||
import numpy as np
|
||||
import more_itertools as mit
|
||||
from os.path import join
|
||||
import logging
|
||||
import os
|
||||
from os.path import join
|
||||
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
import parselmouth
|
||||
|
||||
from opendbm.dbm_lib.dbm_features.raw_features.util import util as ut
|
||||
|
||||
logging.basicConfig(level=logging.INFO)
|
||||
logger=logging.getLogger()
|
||||
logger = logging.getLogger()
|
||||
|
||||
jitter_dir = "acoustic/jitter"
|
||||
ff_dir = "acoustic/pitch"
|
||||
csv_ext = "_jitter.csv"
|
||||
|
||||
jitter_dir = 'acoustic/jitter'
|
||||
ff_dir = 'acoustic/pitch'
|
||||
csv_ext = '_jitter.csv'
|
||||
|
||||
def audio_jitter(sound):
|
||||
"""
|
||||
@@ -32,60 +31,42 @@ def audio_jitter(sound):
|
||||
Returns:
|
||||
(list) list of jitters for each voice frame
|
||||
"""
|
||||
pointProcess = parselmouth.praat.call(sound, "To PointProcess (periodic, cc)...", 80, 500)
|
||||
jitter = parselmouth.praat.call(pointProcess, "Get jitter (local)", 0, 0, 0.0001, 0.02, 1.3)
|
||||
pointProcess = parselmouth.praat.call(
|
||||
sound, "To PointProcess (periodic, cc)...", 80, 500
|
||||
)
|
||||
jitter = parselmouth.praat.call(
|
||||
pointProcess, "Get jitter (local)", 0, 0, 0.0001, 0.02, 1.3
|
||||
)
|
||||
return jitter
|
||||
|
||||
def empty_jitter(video_uri, out_loc, fl_name, r_config, error_txt):
|
||||
|
||||
def empty_jitter(video_uri, out_loc, fl_name, r_config, error_txt, save=True):
|
||||
"""
|
||||
Preparing empty jitter matrix if something fails
|
||||
"""
|
||||
cols = ['Frames', r_config.aco_jitter, r_config.err_reason]
|
||||
cols = ["Frames", r_config.aco_jitter, r_config.err_reason]
|
||||
out_val = [[np.nan, np.nan, error_txt]]
|
||||
df_jitter = pd.DataFrame(out_val, columns = cols)
|
||||
df_jitter['dbm_master_url'] = video_uri
|
||||
df_jitter = pd.DataFrame(out_val, columns=cols)
|
||||
df_jitter["dbm_master_url"] = video_uri
|
||||
|
||||
logger.info('Saving Output file {} '.format(out_loc))
|
||||
if save:
|
||||
logger.info("Saving Output file {} ".format(out_loc))
|
||||
ut.save_output(df_jitter, out_loc, fl_name, jitter_dir, csv_ext)
|
||||
return df_jitter
|
||||
|
||||
def segment_pitch(dir_path, r_config):
|
||||
"""
|
||||
segmenting pitch freq for each voice segment
|
||||
"""
|
||||
com_speech_sort, voiced_yes, voiced_no = ([], ) * 3
|
||||
for file in os.listdir(dir_path):
|
||||
try:
|
||||
|
||||
if file.endswith('_pitch.csv'):
|
||||
|
||||
ff_df = pd.read_csv((dir_path+'/'+file))
|
||||
voice_label = ff_df[r_config.aco_voiceLabel]
|
||||
|
||||
indices_yes = [i for i, x in enumerate(voice_label) if x == "yes"]
|
||||
voiced_yes = [list(group) for group in mit.consecutive_groups(indices_yes)]
|
||||
|
||||
indices_no = [i for i, x in enumerate(voice_label) if x == "no"]
|
||||
voiced_no = [list(group) for group in mit.consecutive_groups(indices_no)]
|
||||
|
||||
com_speech = voiced_yes + voiced_no
|
||||
com_speech_sort = sorted(com_speech, key=lambda x: x[0])
|
||||
except:
|
||||
pass
|
||||
|
||||
return com_speech_sort, voiced_yes, voiced_no
|
||||
|
||||
def segment_jitter(com_speech_sort, voiced_yes, voiced_no, jitter_frames, audio_file):
|
||||
"""
|
||||
calculating jitter for each voice segment
|
||||
"""
|
||||
snd = parselmouth.Sound(audio_file)
|
||||
pitch = snd.to_pitch(time_step=.001)
|
||||
pitch = snd.to_pitch(time_step=0.001)
|
||||
|
||||
for idx, vs in enumerate(com_speech_sort):
|
||||
try:
|
||||
|
||||
jitter = np.NaN
|
||||
if vs in voiced_yes and len(vs)>1:
|
||||
if vs in voiced_yes and len(vs) > 1:
|
||||
|
||||
start_time = pitch.get_time_from_frame_number(vs[0])
|
||||
end_time = pitch.get_time_from_frame_number(vs[-1])
|
||||
@@ -101,7 +82,10 @@ def segment_jitter(com_speech_sort, voiced_yes, voiced_no, jitter_frames, audio_
|
||||
jitter_frames[idx] = jitter
|
||||
return jitter_frames
|
||||
|
||||
def calc_jitter(video_uri, audio_file, out_loc, fl_name, r_config):
|
||||
|
||||
def calc_jitter(
|
||||
video_uri, audio_file, out_loc, fl_name, r_config, save=True, ff_df=None
|
||||
):
|
||||
"""
|
||||
Preparing jitter matrix
|
||||
Args:
|
||||
@@ -110,26 +94,36 @@ def calc_jitter(video_uri, audio_file, out_loc, fl_name, r_config):
|
||||
r_config: config.config_raw_feature.pyConfigFeatureNmReader object
|
||||
"""
|
||||
dir_path = os.path.join(out_loc, ff_dir)
|
||||
if os.path.isdir(dir_path):
|
||||
voice_seg = segment_pitch(dir_path, r_config)
|
||||
if os.path.isdir(dir_path) or ff_df is not None:
|
||||
|
||||
if ff_df is not None:
|
||||
voice_seg = ut.process_segment_pitch(ff_df, r_config)
|
||||
else:
|
||||
voice_seg = ut.segment_pitch(dir_path, r_config, ff_df=ff_df)
|
||||
|
||||
jitter_frames = [np.NaN] * len(voice_seg[0])
|
||||
jitter_segment_frames = segment_jitter(voice_seg[0], voice_seg[1], voice_seg[2], jitter_frames, audio_file)
|
||||
jitter_segment_frames = segment_jitter(
|
||||
voice_seg[0], voice_seg[1], voice_seg[2], jitter_frames, audio_file
|
||||
)
|
||||
|
||||
df_jitter = pd.DataFrame(jitter_segment_frames, columns=[r_config.aco_jitter])
|
||||
df_jitter[r_config.err_reason] = 'Pass'# will replace with threshold in future release
|
||||
df_jitter[
|
||||
r_config.err_reason
|
||||
] = "Pass" # will replace with threshold in future release
|
||||
|
||||
df_jitter['Frames'] = df_jitter.index
|
||||
df_jitter['dbm_master_url'] = video_uri
|
||||
|
||||
logger.info('Processing Output file {} '.format(out_loc))
|
||||
df_jitter["Frames"] = df_jitter.index
|
||||
df_jitter["dbm_master_url"] = video_uri
|
||||
if save:
|
||||
logger.info("Processing Output file {} ".format(out_loc))
|
||||
ut.save_output(df_jitter, out_loc, fl_name, jitter_dir, csv_ext)
|
||||
|
||||
df = df_jitter
|
||||
else:
|
||||
error_txt = 'error: fundamental freq not available'
|
||||
empty_jitter(video_uri, out_loc, fl_name, r_config, error_txt)
|
||||
error_txt = "error: fundamental freq not available"
|
||||
df = empty_jitter(video_uri, out_loc, fl_name, r_config, error_txt, save=save)
|
||||
return df
|
||||
|
||||
def run_jitter(video_uri, out_dir, r_config):
|
||||
|
||||
def run_jitter(video_uri, out_dir, r_config, save=True, ff_df=None):
|
||||
"""
|
||||
Processing all patient's videos for fetching jitter
|
||||
-------------------
|
||||
@@ -141,19 +135,32 @@ def run_jitter(video_uri, out_dir, r_config):
|
||||
try:
|
||||
|
||||
input_loc, out_loc, fl_name = ut.filter_path(video_uri, out_dir)
|
||||
aud_filter = glob.glob(join(input_loc, fl_name + '.wav'))
|
||||
if len(aud_filter)>0:
|
||||
aud_filter = glob.glob(join(input_loc, fl_name + ".wav"))
|
||||
if len(aud_filter) > 0:
|
||||
|
||||
audio_file = aud_filter[0]
|
||||
aud_dur = librosa.get_duration(filename=audio_file)
|
||||
aud_dur = ut.get_length(audio_file)
|
||||
|
||||
if float(aud_dur) < 0.064:
|
||||
logger.info('Output file {} size is less than 0.064sec'.format(audio_file))
|
||||
logger.info(
|
||||
"Output file {} size is less than 0.064sec".format(audio_file)
|
||||
)
|
||||
|
||||
error_txt = 'error: length less than 0.064'
|
||||
empty_jitter(video_uri, out_loc, fl_name, r_config, error_txt)
|
||||
return
|
||||
|
||||
calc_jitter(video_uri, audio_file, out_loc, fl_name, r_config)
|
||||
error_txt = "error: length less than 0.064"
|
||||
df = empty_jitter(
|
||||
video_uri, out_loc, fl_name, r_config, error_txt, save=save
|
||||
)
|
||||
else:
|
||||
df = calc_jitter(
|
||||
video_uri,
|
||||
audio_file,
|
||||
out_loc,
|
||||
fl_name,
|
||||
r_config,
|
||||
save=save,
|
||||
ff_df=ff_df,
|
||||
)
|
||||
return df
|
||||
except Exception as e:
|
||||
logger.error('Failed to process audio file')
|
||||
logger.error("Error in jitter: {}".format(e))
|
||||
logger.error("Failed to process audio file")
|
||||
|
||||
@@ -4,41 +4,74 @@ project_name: DBM
|
||||
created: 2020-20-07
|
||||
"""
|
||||
|
||||
import pandas as pd
|
||||
import os
|
||||
import glob
|
||||
import parselmouth
|
||||
import librosa
|
||||
import numpy as np
|
||||
import librosa
|
||||
from os.path import join
|
||||
import logging
|
||||
import os
|
||||
from os.path import join
|
||||
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
import parselmouth
|
||||
|
||||
from opendbm.dbm_lib.dbm_features.raw_features.util import util as ut
|
||||
|
||||
logging.basicConfig(level=logging.INFO)
|
||||
logger=logging.getLogger()
|
||||
logger = logging.getLogger()
|
||||
|
||||
mfcc_dir = 'acoustic/mfcc'
|
||||
csv_ext = '_mfcc.csv'
|
||||
error_txt = 'error: length less than 0.064'
|
||||
mfcc_dir = "acoustic/mfcc"
|
||||
csv_ext = "_mfcc.csv"
|
||||
error_txt = "error: length less than 0.064"
|
||||
|
||||
def empty_mfcc(video_uri, out_loc, fl_name, r_config):
|
||||
|
||||
def empty_mfcc(video_uri, out_loc, fl_name, r_config, save=True):
|
||||
|
||||
"""
|
||||
Preparing empty empty_mfcc matrix if something fails
|
||||
"""
|
||||
cols = ['Frames', r_config.aco_mfcc1, r_config.aco_mfcc2, r_config.aco_mfcc3, r_config.aco_mfcc4, r_config.aco_mfcc5,
|
||||
r_config.aco_mfcc6, r_config.aco_mfcc7, r_config.aco_mfcc8, r_config.aco_mfcc9, r_config.aco_mfcc10,
|
||||
r_config.aco_mfcc11, r_config.aco_mfcc12, r_config.err_reason]
|
||||
out_val = [[np.nan, np.nan, np.nan, np.nan, np.nan, np.nan, np.nan, np.nan, np.nan, np.nan, np.nan, np.nan, np.nan,
|
||||
error_txt]]
|
||||
df_mfcc = pd.DataFrame(out_val, columns = cols)
|
||||
df_mfcc['dbm_master_url'] = video_uri
|
||||
cols = [
|
||||
"Frames",
|
||||
r_config.aco_mfcc1,
|
||||
r_config.aco_mfcc2,
|
||||
r_config.aco_mfcc3,
|
||||
r_config.aco_mfcc4,
|
||||
r_config.aco_mfcc5,
|
||||
r_config.aco_mfcc6,
|
||||
r_config.aco_mfcc7,
|
||||
r_config.aco_mfcc8,
|
||||
r_config.aco_mfcc9,
|
||||
r_config.aco_mfcc10,
|
||||
r_config.aco_mfcc11,
|
||||
r_config.aco_mfcc12,
|
||||
r_config.err_reason,
|
||||
]
|
||||
out_val = [
|
||||
[
|
||||
np.nan,
|
||||
np.nan,
|
||||
np.nan,
|
||||
np.nan,
|
||||
np.nan,
|
||||
np.nan,
|
||||
np.nan,
|
||||
np.nan,
|
||||
np.nan,
|
||||
np.nan,
|
||||
np.nan,
|
||||
np.nan,
|
||||
np.nan,
|
||||
error_txt,
|
||||
]
|
||||
]
|
||||
df_mfcc = pd.DataFrame(out_val, columns=cols)
|
||||
df_mfcc["dbm_master_url"] = video_uri
|
||||
|
||||
logger.info('Saving Output file {} '.format(out_loc))
|
||||
if save:
|
||||
logger.info("Saving Output file {} ".format(out_loc))
|
||||
ut.save_output(df_mfcc, out_loc, fl_name, mfcc_dir, csv_ext)
|
||||
|
||||
return df_mfcc
|
||||
|
||||
|
||||
def audio_mfcc(path):
|
||||
"""
|
||||
Using parselmouth library fetching mfccs
|
||||
@@ -48,12 +81,13 @@ def audio_mfcc(path):
|
||||
(list) list of mfccs for each voice frame
|
||||
"""
|
||||
sound = parselmouth.Sound(path)
|
||||
mfcc_object = sound.to_mfcc(time_step=.001,number_of_coefficients=12)
|
||||
mfcc_object = sound.to_mfcc(time_step=0.001, number_of_coefficients=12)
|
||||
mfccs = mfcc_object.to_array()
|
||||
mfccs = np.delete(mfccs, (0), axis=0)
|
||||
return mfccs
|
||||
|
||||
def calc_mfcc(video_uri, audio_file, out_loc, fl_name, r_config):
|
||||
|
||||
def calc_mfcc(video_uri, audio_file, out_loc, fl_name, r_config, save=True):
|
||||
"""
|
||||
Preparing mfcc matrix
|
||||
Args:
|
||||
@@ -65,19 +99,23 @@ def calc_mfcc(video_uri, audio_file, out_loc, fl_name, r_config):
|
||||
dict_ = {}
|
||||
mfccs = audio_mfcc(audio_file)
|
||||
|
||||
for i in range(1,13):
|
||||
conf_str = r_config.base_raw['raw_feature']
|
||||
dict_[conf_str['aco_mfcc' + str(i)]] = mfccs[i-1, :]
|
||||
for i in range(1, 13):
|
||||
conf_str = r_config.base_raw["raw_feature"]
|
||||
dict_[conf_str["aco_mfcc" + str(i)]] = mfccs[i - 1, :]
|
||||
|
||||
df = pd.DataFrame(dict_)
|
||||
df['Frames'] = df.index
|
||||
df["Frames"] = df.index
|
||||
|
||||
df[r_config.err_reason] = 'Pass'# may replace based on threshold in future release
|
||||
df['dbm_master_url'] = video_uri
|
||||
df[r_config.err_reason] = "Pass" # may replace based on threshold in future release
|
||||
df["dbm_master_url"] = video_uri
|
||||
|
||||
if save:
|
||||
logger.info("Saving Output file {} ".format(out_loc))
|
||||
ut.save_output(df, out_loc, fl_name, mfcc_dir, csv_ext)
|
||||
return df
|
||||
|
||||
def run_mfcc(video_uri, out_dir, r_config):
|
||||
|
||||
def run_mfcc(video_uri, out_dir, r_config, save=True):
|
||||
"""
|
||||
Processing all patients to fetch mfccs
|
||||
|
||||
@@ -88,18 +126,22 @@ def run_mfcc(video_uri, out_dir, r_config):
|
||||
try:
|
||||
|
||||
input_loc, out_loc, fl_name = ut.filter_path(video_uri, out_dir)
|
||||
aud_filter = glob.glob(join(input_loc, fl_name + '.wav'))
|
||||
if len(aud_filter)>0:
|
||||
aud_filter = glob.glob(join(input_loc, fl_name + ".wav"))
|
||||
if len(aud_filter) > 0:
|
||||
|
||||
audio_file = aud_filter[0]
|
||||
aud_dur = librosa.get_duration(filename=audio_file)
|
||||
aud_dur = ut.get_length(audio_file)
|
||||
|
||||
if float(aud_dur) < 0.064:
|
||||
logger.info('Output file {} size is less than 0.064sec'.format(audio_file))
|
||||
logger.info(
|
||||
"Output file {} size is less than 0.064sec".format(audio_file)
|
||||
)
|
||||
|
||||
empty_mfcc(video_uri, out_loc, fl_name, r_config)
|
||||
return
|
||||
return empty_mfcc(video_uri, out_loc, fl_name, r_config, save=save)
|
||||
|
||||
calc_mfcc(video_uri, audio_file, out_loc, fl_name, r_config)
|
||||
return calc_mfcc(
|
||||
video_uri, audio_file, out_loc, fl_name, r_config, save=save
|
||||
)
|
||||
except Exception as e:
|
||||
logger.error('Failed to process audio file')
|
||||
e
|
||||
logger.error("Failed to process audio file")
|
||||
|
||||
@@ -4,23 +4,25 @@ project_name: DBM
|
||||
created: 2020-20-07
|
||||
"""
|
||||
|
||||
import os
|
||||
import glob
|
||||
from pydub import AudioSegment
|
||||
import librosa
|
||||
import pandas as pd
|
||||
import numpy as np
|
||||
import webrtcvad
|
||||
from os.path import join
|
||||
import logging
|
||||
import os
|
||||
from os.path import join
|
||||
|
||||
from opendbm.dbm_lib.dbm_features.raw_features.util import vad_utilities as vu, util as ut
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
import webrtcvad
|
||||
from pydub import AudioSegment
|
||||
|
||||
from opendbm.dbm_lib.dbm_features.raw_features.util import util as ut
|
||||
from opendbm.dbm_lib.dbm_features.raw_features.util import vad_utilities as vu
|
||||
|
||||
logging.basicConfig(level=logging.INFO)
|
||||
logger=logging.getLogger()
|
||||
logger = logging.getLogger()
|
||||
|
||||
pause_seg_dir = "acoustic/pause_segment"
|
||||
csv_ext = "_pausechar.csv"
|
||||
|
||||
pause_seg_dir = 'acoustic/pause_segment'
|
||||
csv_ext = '_pausechar.csv'
|
||||
|
||||
def get_timing_cues(seg_starts_sec, seg_ends_sec, r_config):
|
||||
"""
|
||||
@@ -37,23 +39,25 @@ def get_timing_cues(seg_starts_sec, seg_ends_sec, r_config):
|
||||
pause_len = np.zeros(num_pauses)
|
||||
|
||||
for p in range(num_pauses):
|
||||
pause_len[p] = seg_starts_sec[p+1] - seg_ends_sec[p]
|
||||
pause_len[p] = seg_starts_sec[p + 1] - seg_ends_sec[p]
|
||||
|
||||
if len(pause_len)>0:
|
||||
pause_len_mean = np.mean(pause_len)
|
||||
pause_len_std = np.std(pause_len)
|
||||
if len(pause_len) > 0:
|
||||
pause_time = np.sum(pause_len)
|
||||
|
||||
else:
|
||||
pause_len_mean = 0
|
||||
pause_len_std = 0
|
||||
pause_time = 0
|
||||
|
||||
pause_frac = pause_time / total_time
|
||||
timing_dict = {r_config.aco_totaltime: total_time, r_config.aco_speakingtime: speaking_time,
|
||||
r_config.aco_numpauses: num_pauses, r_config.aco_pausetime: pause_time, r_config.aco_pausefrac: pause_frac}
|
||||
timing_dict = {
|
||||
r_config.aco_totaltime: total_time,
|
||||
r_config.aco_speakingtime: speaking_time,
|
||||
r_config.aco_numpauses: num_pauses,
|
||||
r_config.aco_pausetime: pause_time,
|
||||
r_config.aco_pausefrac: pause_frac,
|
||||
}
|
||||
return timing_dict
|
||||
|
||||
|
||||
def process_silence(audio_file, r_config):
|
||||
"""
|
||||
Returns dataframe for pause between words using voice activity detection
|
||||
@@ -69,59 +73,75 @@ def process_silence(audio_file, r_config):
|
||||
aggressiveness = 3
|
||||
frame_dur_ms = 20
|
||||
|
||||
#pause segment(long & short pad)
|
||||
# pause segment(long & short pad)
|
||||
long_pad_around_voice_ms = 200
|
||||
short_pad_around_voice_ms = 100
|
||||
|
||||
if len(y)>0:
|
||||
if len(y) > 0:
|
||||
vad = webrtcvad.Vad(aggressiveness)
|
||||
|
||||
frames = vu.frame_generator(frame_dur_ms, y, sr)
|
||||
frames = list(frames)
|
||||
|
||||
#longer pad time screens out little blips, but misses short silences
|
||||
long_seg_starts, long_seg_ends = vu.vad_get_segment_times(sr, frame_dur_ms, long_pad_around_voice_ms, vad, frames)
|
||||
# longer pad time screens out little blips, but misses short silences
|
||||
long_seg_starts, long_seg_ends = vu.vad_get_segment_times(
|
||||
sr, frame_dur_ms, long_pad_around_voice_ms, vad, frames
|
||||
)
|
||||
|
||||
#Logic to handle blank audio file
|
||||
# Logic to handle blank audio file
|
||||
if len(long_seg_starts) == 0 or len(long_seg_ends) == 0:
|
||||
return ''
|
||||
return ""
|
||||
|
||||
t_start = long_seg_starts[0]
|
||||
t_end = long_seg_ends[-1]
|
||||
# shorter pad time captures short silences (but misfires on little blips)
|
||||
short_seg_starts, short_seg_ends = vu.vad_get_segment_times(sr, frame_dur_ms, short_pad_around_voice_ms, vad, frames)
|
||||
short_seg_starts, short_seg_ends = vu.vad_get_segment_times(
|
||||
sr, frame_dur_ms, short_pad_around_voice_ms, vad, frames
|
||||
)
|
||||
|
||||
seg_starts = []
|
||||
seg_ends = []
|
||||
for k in range(len(short_seg_starts)): # logic to clean up some typical misfires
|
||||
if (short_seg_starts[k] >=t_start) and (short_seg_starts[k] <= t_end):
|
||||
for k in range(
|
||||
len(short_seg_starts)
|
||||
): # logic to clean up some typical misfires
|
||||
if (short_seg_starts[k] >= t_start) and (short_seg_starts[k] <= t_end):
|
||||
|
||||
seg_starts.append(short_seg_starts[k])
|
||||
seg_ends.append(short_seg_ends[k])
|
||||
if len(seg_starts) == 0 or len(seg_ends) == 0:
|
||||
return ''
|
||||
return ""
|
||||
|
||||
timing_dict = get_timing_cues(seg_starts, seg_ends, r_config)
|
||||
feat_dict_list.append(timing_dict)
|
||||
|
||||
df = pd.DataFrame(feat_dict_list)
|
||||
df[r_config.err_reason] = 'Pass'# will replace with threshold in future release
|
||||
df[r_config.err_reason] = "Pass" # will replace with threshold in future release
|
||||
return df
|
||||
|
||||
def empty_pause_segment(video_uri, out_loc, fl_name, r_config, error_txt):
|
||||
|
||||
def empty_pause_segment(video_uri, out_loc, fl_name, r_config, error_txt, save=True):
|
||||
"""
|
||||
Preparing empty Pause Segment matrix if something fails
|
||||
"""
|
||||
cols = [r_config.aco_totaltime, r_config.aco_speakingtime, r_config.aco_numpauses, r_config.aco_pausetime,
|
||||
r_config.aco_pausefrac, r_config.err_reason]
|
||||
cols = [
|
||||
r_config.aco_totaltime,
|
||||
r_config.aco_speakingtime,
|
||||
r_config.aco_numpauses,
|
||||
r_config.aco_pausetime,
|
||||
r_config.aco_pausefrac,
|
||||
r_config.err_reason,
|
||||
]
|
||||
out_val = [[np.nan, np.nan, np.nan, np.nan, np.nan, error_txt]]
|
||||
df_pause = pd.DataFrame(out_val, columns = cols)
|
||||
df_pause['dbm_master_url'] = video_uri
|
||||
df_pause = pd.DataFrame(out_val, columns=cols)
|
||||
df_pause["dbm_master_url"] = video_uri
|
||||
|
||||
logger.info('Saving Output file {} '.format(out_loc))
|
||||
if save:
|
||||
logger.info("Saving Output file {} ".format(out_loc))
|
||||
ut.save_output(df_pause, out_loc, fl_name, pause_seg_dir, csv_ext)
|
||||
return df_pause
|
||||
|
||||
def run_pause_segment(video_uri, out_dir, r_config):
|
||||
|
||||
def run_pause_segment(video_uri, out_dir, r_config, save=True):
|
||||
"""
|
||||
Processing all patient's for getting Pause Segment
|
||||
---------------
|
||||
@@ -133,39 +153,48 @@ def run_pause_segment(video_uri, out_dir, r_config):
|
||||
try:
|
||||
|
||||
input_loc, out_loc, fl_name = ut.filter_path(video_uri, out_dir)
|
||||
aud_filter = glob.glob(join(input_loc, fl_name + '.wav'))
|
||||
if len(aud_filter)>0:
|
||||
aud_filter = glob.glob(join(input_loc, fl_name + ".wav"))
|
||||
if len(aud_filter) > 0:
|
||||
|
||||
audio_file = aud_filter[0]
|
||||
aud_dur = librosa.get_duration(filename=audio_file)
|
||||
aud_dur = ut.get_length(audio_file)
|
||||
|
||||
if float(aud_dur) < 0.064:
|
||||
logger.info('Output file {} size is less than 0.064sec'.format(audio_file))
|
||||
logger.info(
|
||||
"Output file {} size is less than 0.064sec".format(audio_file)
|
||||
)
|
||||
|
||||
error_txt = 'error: length less than 0.064'
|
||||
error_txt = "error: length less than 0.064"
|
||||
empty_pause_segment(video_uri, out_loc, fl_name, r_config, error_txt)
|
||||
return
|
||||
|
||||
logger.info('Converting stereo sound to mono-lD')
|
||||
logger.info("Converting stereo sound to mono-lD")
|
||||
sound_mono = AudioSegment.from_wav(audio_file)
|
||||
sound_mono = sound_mono.set_channels(1)
|
||||
sound_mono = sound_mono.set_frame_rate(48000)
|
||||
|
||||
mono_wav = os.path.join(input_loc, fl_name + '_mono.wav')
|
||||
mono_wav = os.path.join(input_loc, fl_name + "_mono.wav")
|
||||
sound_mono.export(mono_wav, format="wav")
|
||||
|
||||
df_pause_seg = process_silence(mono_wav, r_config)
|
||||
os.remove(mono_wav)#removing mono wav file
|
||||
os.remove(mono_wav) # removing mono wav file
|
||||
|
||||
if isinstance(df_pause_seg, pd.DataFrame) and len(df_pause_seg)>0:
|
||||
logger.info('Processing Output file {} '.format(out_loc))
|
||||
|
||||
df_pause_seg['dbm_master_url'] = video_uri
|
||||
ut.save_output(df_pause_seg, out_loc, fl_name, pause_seg_dir, csv_ext)
|
||||
if isinstance(df_pause_seg, pd.DataFrame) and len(df_pause_seg) > 0:
|
||||
df_pause_seg["dbm_master_url"] = video_uri
|
||||
if save:
|
||||
logger.info("Processing Output file {} ".format(out_loc))
|
||||
ut.save_output(
|
||||
df_pause_seg, out_loc, fl_name, pause_seg_dir, csv_ext
|
||||
)
|
||||
df = df_pause_seg
|
||||
|
||||
else:
|
||||
error_txt = 'error: webrtcvad returns no segment'
|
||||
empty_pause_segment(video_uri, out_loc, fl_name, r_config, error_txt)
|
||||
error_txt = "error: webrtcvad returns no segment"
|
||||
df = empty_pause_segment(
|
||||
video_uri, out_loc, fl_name, r_config, error_txt, save=save
|
||||
)
|
||||
return df
|
||||
|
||||
except Exception as e:
|
||||
logger.error('Failed to process audio file')
|
||||
e
|
||||
logger.error("Failed to process audio file", str(e))
|
||||
|
||||
@@ -4,23 +4,24 @@ project_name: DBM
|
||||
created: 2020-20-07
|
||||
"""
|
||||
|
||||
import pandas as pd
|
||||
import os
|
||||
import glob
|
||||
import parselmouth
|
||||
import librosa
|
||||
import numpy as np
|
||||
from os.path import join
|
||||
import logging
|
||||
import os
|
||||
from os.path import join
|
||||
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
import parselmouth
|
||||
|
||||
from opendbm.dbm_lib.dbm_features.raw_features.util import util as ut
|
||||
|
||||
logging.basicConfig(level=logging.INFO)
|
||||
logger=logging.getLogger()
|
||||
logger = logging.getLogger()
|
||||
|
||||
ff_dir = "acoustic/pitch"
|
||||
csv_ext = "_pitch.csv"
|
||||
error_txt = "error: length less than 0.064"
|
||||
|
||||
ff_dir = 'acoustic/pitch'
|
||||
csv_ext = '_pitch.csv'
|
||||
error_txt = 'error: length less than 0.064'
|
||||
|
||||
def audio_pitch(path):
|
||||
"""
|
||||
@@ -31,12 +32,13 @@ def audio_pitch(path):
|
||||
(list) list of pitch/fundamental frequency for each voice frame
|
||||
"""
|
||||
sound_pat = parselmouth.Sound(path)
|
||||
pitch = sound_pat.to_pitch(time_step=.001)
|
||||
pitch_values = pitch.selected_array['frequency']
|
||||
pitch = sound_pat.to_pitch(time_step=0.001)
|
||||
pitch_values = pitch.selected_array["frequency"]
|
||||
|
||||
return list(pitch_values)
|
||||
|
||||
def label_speech(row,fd_freq):
|
||||
|
||||
def label_speech(row, fd_freq):
|
||||
"""
|
||||
identify whether frame is voiced or not
|
||||
Args:
|
||||
@@ -44,12 +46,13 @@ def label_speech(row,fd_freq):
|
||||
Returns:
|
||||
(str) yes or no indicator for voice
|
||||
"""
|
||||
if row[fd_freq] > 0 :
|
||||
return 'yes'
|
||||
if row[fd_freq] > 0:
|
||||
return "yes"
|
||||
else:
|
||||
return 'no'
|
||||
return "no"
|
||||
|
||||
def calc_pitch(video_uri, audio_file, out_loc, fl_name, r_config):
|
||||
|
||||
def calc_pitch(video_uri, audio_file, out_loc, fl_name, r_config, save=True):
|
||||
|
||||
"""
|
||||
Preparing pitch frequency matrix
|
||||
@@ -62,28 +65,45 @@ def calc_pitch(video_uri, audio_file, out_loc, fl_name, r_config):
|
||||
ff_frames = audio_pitch(audio_file)
|
||||
df_ffreq = pd.DataFrame(ff_frames, columns=[r_config.aco_ff])
|
||||
|
||||
df_ffreq['Frames'] = df_ffreq.index
|
||||
df_ffreq[r_config.aco_voiceLabel] = df_ffreq.apply(lambda row: label_speech(row, r_config.aco_ff),axis=1)
|
||||
df_ffreq["Frames"] = df_ffreq.index
|
||||
df_ffreq[r_config.aco_voiceLabel] = df_ffreq.apply(
|
||||
lambda row: label_speech(row, r_config.aco_ff), axis=1
|
||||
)
|
||||
|
||||
df_ffreq[r_config.err_reason] = 'Pass'# will replace with threshold in future release
|
||||
df_ffreq['dbm_master_url'] = video_uri
|
||||
df_ffreq[
|
||||
r_config.err_reason
|
||||
] = "Pass" # will replace with threshold in future release
|
||||
df_ffreq["dbm_master_url"] = video_uri
|
||||
|
||||
logger.info('Processing Output file {} '.format(out_loc))
|
||||
if save:
|
||||
logger.info("Processing Output file {} ".format(out_loc))
|
||||
ut.save_output(df_ffreq, out_loc, fl_name, ff_dir, csv_ext)
|
||||
return df_ffreq
|
||||
|
||||
def empty_pitch(video_uri, out_loc, fl_name, r_config):
|
||||
|
||||
def empty_pitch(video_uri, out_loc, fl_name, r_config, save=True):
|
||||
"""
|
||||
Preparing empty pitch frequency matrix if something fails
|
||||
"""
|
||||
|
||||
df_ffreq = pd.DataFrame([[np.nan, np.nan, 'no', error_txt]],
|
||||
columns=['Frames', r_config.aco_ff, r_config.aco_voiceLabel, r_config.err_reason])
|
||||
df_ffreq['dbm_master_url'] = video_uri
|
||||
df_ffreq = pd.DataFrame(
|
||||
[[np.nan, np.nan, "no", error_txt]],
|
||||
columns=[
|
||||
"Frames",
|
||||
r_config.aco_ff,
|
||||
r_config.aco_voiceLabel,
|
||||
r_config.err_reason,
|
||||
],
|
||||
)
|
||||
df_ffreq["dbm_master_url"] = video_uri
|
||||
|
||||
logger.info('Saving Output file {} '.format(out_loc))
|
||||
if save:
|
||||
logger.info("Saving Output file {} ".format(out_loc))
|
||||
ut.save_output(df_ffreq, out_loc, fl_name, ff_dir, csv_ext)
|
||||
return df_ffreq
|
||||
|
||||
def run_pitch(video_uri, out_dir, r_config):
|
||||
|
||||
def run_pitch(video_uri, out_dir, r_config, save=True):
|
||||
|
||||
"""
|
||||
Processing audio for fetching pitch
|
||||
@@ -96,18 +116,24 @@ def run_pitch(video_uri, out_dir, r_config):
|
||||
try:
|
||||
|
||||
input_loc, out_loc, fl_name = ut.filter_path(video_uri, out_dir)
|
||||
aud_filter = glob.glob(join(input_loc, fl_name + '.wav'))
|
||||
if len(aud_filter)>0:
|
||||
aud_filter = glob.glob(join(input_loc, fl_name + ".wav"))
|
||||
if len(aud_filter) > 0:
|
||||
|
||||
audio_file = aud_filter[0]
|
||||
aud_dur = librosa.get_duration(filename=audio_file)
|
||||
aud_dur = ut.get_length(audio_file)
|
||||
|
||||
if float(aud_dur) < 0.064:
|
||||
logger.info('Output file {} size is less than 0.064sec'.format(audio_file))
|
||||
logger.info(
|
||||
"Output file {} size is less than 0.064sec".format(audio_file)
|
||||
)
|
||||
|
||||
empty_pitch(video_uri, out_loc, fl_name, r_config)
|
||||
return
|
||||
df = empty_pitch(video_uri, out_loc, fl_name, r_config, save=save)
|
||||
else:
|
||||
df = calc_pitch(
|
||||
video_uri, audio_file, out_loc, fl_name, r_config, save=save
|
||||
)
|
||||
return df
|
||||
|
||||
calc_pitch(video_uri, audio_file, out_loc, fl_name, r_config)
|
||||
except Exception as e:
|
||||
logger.error('Failed to process audio file')
|
||||
e
|
||||
logger.error("Failed to process audio file")
|
||||
|
||||
@@ -4,26 +4,25 @@ project_name: DBM
|
||||
created: 2020-20-07
|
||||
"""
|
||||
|
||||
import pandas as pd
|
||||
import numpy as np
|
||||
import os
|
||||
import glob
|
||||
import parselmouth
|
||||
import librosa
|
||||
import numpy as np
|
||||
import more_itertools as mit
|
||||
import logging
|
||||
import os
|
||||
from os.path import join
|
||||
|
||||
import logging
|
||||
import more_itertools as mit
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
import parselmouth
|
||||
|
||||
from opendbm.dbm_lib.dbm_features.raw_features.util import util as ut
|
||||
|
||||
logging.basicConfig(level=logging.INFO)
|
||||
logger=logging.getLogger()
|
||||
logger = logging.getLogger()
|
||||
|
||||
shimmer_dir = "acoustic/shimmer"
|
||||
ff_dir = "acoustic/pitch"
|
||||
csv_ext = "_shimmer.csv"
|
||||
|
||||
shimmer_dir = 'acoustic/shimmer'
|
||||
ff_dir = 'acoustic/pitch'
|
||||
csv_ext = '_shimmer.csv'
|
||||
|
||||
def audio_shimmer(sound):
|
||||
"""
|
||||
@@ -33,60 +32,42 @@ def audio_shimmer(sound):
|
||||
Returns:
|
||||
(list) list of shimmers for each voice frame
|
||||
"""
|
||||
pointProcess = parselmouth.praat.call(sound, "To PointProcess (periodic, cc)...", 80, 500)
|
||||
shimmer = parselmouth.praat.call([sound, pointProcess], "Get shimmer (local)", 0, 0, 0.0001, 0.02, 1.3, 1.6)
|
||||
pointProcess = parselmouth.praat.call(
|
||||
sound, "To PointProcess (periodic, cc)...", 80, 500
|
||||
)
|
||||
shimmer = parselmouth.praat.call(
|
||||
[sound, pointProcess], "Get shimmer (local)", 0, 0, 0.0001, 0.02, 1.3, 1.6
|
||||
)
|
||||
return shimmer
|
||||
|
||||
def empty_shimmer(video_uri, out_loc, fl_name, r_config, error_txt):
|
||||
|
||||
def empty_shimmer(video_uri, out_loc, fl_name, r_config, error_txt, save=True):
|
||||
"""
|
||||
Preparing empty shimmer matrix if something fails
|
||||
"""
|
||||
cols = ['Frames', r_config.aco_shimmer, r_config.err_reason]
|
||||
cols = ["Frames", r_config.aco_shimmer, r_config.err_reason]
|
||||
out_val = [[np.nan, np.nan, error_txt]]
|
||||
df_shimmer = pd.DataFrame(out_val, columns = cols)
|
||||
df_shimmer['dbm_master_url'] = video_uri
|
||||
df_shimmer = pd.DataFrame(out_val, columns=cols)
|
||||
df_shimmer["dbm_master_url"] = video_uri
|
||||
|
||||
logger.info('Saving Output file {} '.format(out_loc))
|
||||
if save:
|
||||
logger.info("Saving Output file {} ".format(out_loc))
|
||||
ut.save_output(df_shimmer, out_loc, fl_name, shimmer_dir, csv_ext)
|
||||
return df_shimmer
|
||||
|
||||
def segment_pitch(dir_path, r_config):
|
||||
"""
|
||||
segmenting pitch freq for each voice segment
|
||||
"""
|
||||
com_speech_sort, voiced_yes, voiced_no = ([], ) * 3
|
||||
for file in os.listdir(dir_path):
|
||||
try:
|
||||
|
||||
if file.endswith('_pitch.csv'):
|
||||
|
||||
ff_df = pd.read_csv((dir_path+'/'+file))
|
||||
voice_label = ff_df[r_config.aco_voiceLabel]
|
||||
|
||||
indices_yes = [i for i, x in enumerate(voice_label) if x == "yes"]
|
||||
voiced_yes = [list(group) for group in mit.consecutive_groups(indices_yes)]
|
||||
|
||||
indices_no = [i for i, x in enumerate(voice_label) if x == "no"]
|
||||
voiced_no = [list(group) for group in mit.consecutive_groups(indices_no)]
|
||||
|
||||
com_speech = voiced_yes + voiced_no
|
||||
com_speech_sort = sorted(com_speech, key=lambda x: x[0])
|
||||
except:
|
||||
pass
|
||||
|
||||
return com_speech_sort, voiced_yes, voiced_no
|
||||
|
||||
def segment_shimmer(com_speech_sort, voiced_yes, voiced_no, shimmer_frames, audio_file):
|
||||
"""
|
||||
calculating shimmer for each voice segment
|
||||
"""
|
||||
snd = parselmouth.Sound(audio_file)
|
||||
pitch = snd.to_pitch(time_step=.001)
|
||||
pitch = snd.to_pitch(time_step=0.001)
|
||||
|
||||
for idx, vs in enumerate(com_speech_sort):
|
||||
try:
|
||||
|
||||
shimmer = np.NaN
|
||||
if vs in voiced_yes and len(vs)>1:
|
||||
if vs in voiced_yes and len(vs) > 1:
|
||||
|
||||
start_time = pitch.get_time_from_frame_number(vs[0])
|
||||
end_time = pitch.get_time_from_frame_number(vs[-1])
|
||||
@@ -102,7 +83,10 @@ def segment_shimmer(com_speech_sort, voiced_yes, voiced_no, shimmer_frames, audi
|
||||
shimmer_frames[idx] = shimmer
|
||||
return shimmer_frames
|
||||
|
||||
def calc_shimmer(video_uri, audio_file, out_loc, fl_name, r_config):
|
||||
|
||||
def calc_shimmer(
|
||||
video_uri, audio_file, out_loc, fl_name, r_config, save=True, ff_df=None
|
||||
):
|
||||
"""
|
||||
Preparing shimmer matrix
|
||||
Args:
|
||||
@@ -111,26 +95,37 @@ def calc_shimmer(video_uri, audio_file, out_loc, fl_name, r_config):
|
||||
r_config: config.config_raw_feature.pyConfigFeatureNmReader object
|
||||
"""
|
||||
dir_path = os.path.join(out_loc, ff_dir)
|
||||
if os.path.isdir(dir_path):
|
||||
voice_seg = segment_pitch(dir_path, r_config)
|
||||
if os.path.isdir(dir_path) or ff_df is not None:
|
||||
if ff_df is not None:
|
||||
voice_seg = ut.process_segment_pitch(ff_df, r_config)
|
||||
else:
|
||||
voice_seg = ut.segment_pitch(dir_path, r_config, ff_df=ff_df)
|
||||
|
||||
shimmer_frames = [np.NaN] * len(voice_seg[0])
|
||||
shimmer_segment_frames = segment_shimmer(voice_seg[0], voice_seg[1], voice_seg[2], shimmer_frames, audio_file)
|
||||
shimmer_segment_frames = segment_shimmer(
|
||||
voice_seg[0], voice_seg[1], voice_seg[2], shimmer_frames, audio_file
|
||||
)
|
||||
|
||||
df_shimmer = pd.DataFrame(shimmer_segment_frames, columns=[r_config.aco_shimmer])
|
||||
df_shimmer[r_config.err_reason] = 'Pass'# will replace with threshold in future release
|
||||
df_shimmer = pd.DataFrame(
|
||||
shimmer_segment_frames, columns=[r_config.aco_shimmer]
|
||||
)
|
||||
df_shimmer[
|
||||
r_config.err_reason
|
||||
] = "Pass" # will replace with threshold in future release
|
||||
|
||||
df_shimmer['Frames'] = df_shimmer.index
|
||||
df_shimmer['dbm_master_url'] = video_uri
|
||||
|
||||
logger.info('Processing Output file {} '.format(out_loc))
|
||||
df_shimmer["Frames"] = df_shimmer.index
|
||||
df_shimmer["dbm_master_url"] = video_uri
|
||||
if save:
|
||||
logger.info("Processing Output file {} ".format(out_loc))
|
||||
ut.save_output(df_shimmer, out_loc, fl_name, shimmer_dir, csv_ext)
|
||||
|
||||
df = df_shimmer
|
||||
else:
|
||||
error_txt = 'error: fundamental freq not available'
|
||||
empty_shimmer(video_uri, out_loc, fl_name, r_config, error_txt)
|
||||
error_txt = "error: fundamental freq not available"
|
||||
df = empty_shimmer(video_uri, out_loc, fl_name, r_config, error_txt, save=save)
|
||||
return df
|
||||
|
||||
def run_shimmer(video_uri, out_dir, r_config):
|
||||
|
||||
def run_shimmer(video_uri, out_dir, r_config, save=True, ff_df=None):
|
||||
"""
|
||||
Processing all patients to fetch shimmer
|
||||
---------------
|
||||
@@ -139,22 +134,33 @@ def run_shimmer(video_uri, out_dir, r_config):
|
||||
video_uri: video path; r_config: raw variable config object
|
||||
out_dir: (str) Output directory for processed output
|
||||
"""
|
||||
try:
|
||||
# try:
|
||||
|
||||
input_loc, out_loc, fl_name = ut.filter_path(video_uri, out_dir)
|
||||
aud_filter = glob.glob(join(input_loc, fl_name + '.wav'))
|
||||
if len(aud_filter)>0:
|
||||
aud_filter = glob.glob(join(input_loc, fl_name + ".wav"))
|
||||
if len(aud_filter) > 0:
|
||||
|
||||
audio_file = aud_filter[0]
|
||||
aud_dur = librosa.get_duration(filename=audio_file)
|
||||
aud_dur = ut.get_length(audio_file)
|
||||
|
||||
if float(aud_dur) < 0.064:
|
||||
logger.info('Output file {} size is less than 0.064sec'.format(audio_file))
|
||||
logger.info("Output file {} size is less than 0.064sec".format(audio_file))
|
||||
|
||||
error_txt = 'error: length less than 0.064'
|
||||
empty_shimmer(video_uri, out_loc, fl_name, r_config, error_txt)
|
||||
return
|
||||
|
||||
calc_shimmer(video_uri, audio_file, out_loc, fl_name, r_config)
|
||||
except Exception as e:
|
||||
logger.error('Failed to process audio file')
|
||||
error_txt = "error: length less than 0.064"
|
||||
df = empty_shimmer(
|
||||
video_uri, out_loc, fl_name, r_config, error_txt, save=save
|
||||
)
|
||||
else:
|
||||
df = calc_shimmer(
|
||||
video_uri,
|
||||
audio_file,
|
||||
out_loc,
|
||||
fl_name,
|
||||
r_config,
|
||||
save=save,
|
||||
ff_df=ff_df,
|
||||
)
|
||||
return df
|
||||
# except Exception as e:
|
||||
# logger.error('Error in shimmer: {}'.format(e))
|
||||
# logger.error('Failed to process audio file')
|
||||
|
||||
@@ -4,22 +4,23 @@ project_name: DBM
|
||||
created: 2020-20-07
|
||||
"""
|
||||
|
||||
import parselmouth
|
||||
import pandas as pd
|
||||
import numpy as np
|
||||
import glob
|
||||
import librosa
|
||||
from os.path import join
|
||||
import logging
|
||||
from os.path import join
|
||||
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
import parselmouth
|
||||
|
||||
from opendbm.dbm_lib.dbm_features.raw_features.util import util as ut
|
||||
|
||||
logging.basicConfig(level=logging.INFO)
|
||||
logger=logging.getLogger()
|
||||
logger = logging.getLogger()
|
||||
|
||||
vfs_dir = "acoustic/voice_frame_score"
|
||||
csv_ext = "_voiceprev.csv"
|
||||
error_txt = "error: length less than 0.064"
|
||||
|
||||
vfs_dir = 'acoustic/voice_frame_score'
|
||||
csv_ext = '_voiceprev.csv'
|
||||
error_txt = 'error: length less than 0.064'
|
||||
|
||||
def audio_pitch_frame(pitch):
|
||||
"""
|
||||
@@ -33,6 +34,7 @@ def audio_pitch_frame(pitch):
|
||||
voiced_frames = pitch.count_voiced_frames()
|
||||
return total_frames, voiced_frames
|
||||
|
||||
|
||||
def voice_segment(path):
|
||||
"""
|
||||
Using parselmouth library for fundamental frequency
|
||||
@@ -43,12 +45,13 @@ def voice_segment(path):
|
||||
"""
|
||||
sound_pat = parselmouth.Sound(path)
|
||||
pitch = sound_pat.to_pitch()
|
||||
total_frames,voiced_frames = audio_pitch_frame(pitch)
|
||||
total_frames, voiced_frames = audio_pitch_frame(pitch)
|
||||
|
||||
voiced_percentage = (voiced_frames/total_frames)*100
|
||||
voiced_percentage = (voiced_frames / total_frames) * 100
|
||||
return voiced_percentage, voiced_frames, total_frames
|
||||
|
||||
def calc_vfs(video_uri, audio_file, out_loc, fl_name, r_config):
|
||||
|
||||
def calc_vfs(video_uri, audio_file, out_loc, fl_name, r_config, save=True):
|
||||
"""
|
||||
creating dataframe matrix for voice frame score
|
||||
Args:
|
||||
@@ -57,32 +60,44 @@ def calc_vfs(video_uri, audio_file, out_loc, fl_name, r_config):
|
||||
f_nm_config: Config file object
|
||||
"""
|
||||
|
||||
voice_percentage,voiced_frames, total_frames = voice_segment(audio_file)
|
||||
voice_percentage, voiced_frames, total_frames = voice_segment(audio_file)
|
||||
df_vfs = pd.DataFrame([voiced_frames], columns=[r_config.aco_voiceFrame])
|
||||
|
||||
df_vfs[r_config.aco_totVoiceFrame] = [total_frames]
|
||||
df_vfs[r_config.aco_voicePct] = [voice_percentage]
|
||||
df_vfs[r_config.err_reason] = 'Pass'# will replace with threshold in future release
|
||||
df_vfs[
|
||||
r_config.err_reason
|
||||
] = "Pass" # will replace with threshold in future release
|
||||
|
||||
df_vfs['Frames'] = df_vfs.index
|
||||
df_vfs['dbm_master_url'] = video_uri
|
||||
|
||||
logger.info('Saving Output file {} '.format(out_loc))
|
||||
df_vfs["Frames"] = df_vfs.index
|
||||
df_vfs["dbm_master_url"] = video_uri
|
||||
if save:
|
||||
logger.info("Saving Output file {} ".format(out_loc))
|
||||
ut.save_output(df_vfs, out_loc, fl_name, vfs_dir, csv_ext)
|
||||
return df_vfs
|
||||
|
||||
def empty_vfs(video_uri, out_loc, fl_name, r_config):
|
||||
|
||||
def empty_vfs(video_uri, out_loc, fl_name, r_config, save=True):
|
||||
"""
|
||||
Preparing empty VFS matrix if something fails
|
||||
"""
|
||||
cols = ['Frames', r_config.aco_voiceFrame, r_config.aco_totVoiceFrame, r_config.aco_voicePct, r_config.err_reason]
|
||||
cols = [
|
||||
"Frames",
|
||||
r_config.aco_voiceFrame,
|
||||
r_config.aco_totVoiceFrame,
|
||||
r_config.aco_voicePct,
|
||||
r_config.err_reason,
|
||||
]
|
||||
out_val = [[np.nan, np.nan, np.nan, np.nan, error_txt]]
|
||||
df_vfs = pd.DataFrame(out_val, columns = cols)
|
||||
df_vfs['dbm_master_url'] = video_uri
|
||||
|
||||
logger.info('Saving Output file {} '.format(out_loc))
|
||||
df_vfs = pd.DataFrame(out_val, columns=cols)
|
||||
df_vfs["dbm_master_url"] = video_uri
|
||||
if save:
|
||||
logger.info("Saving Output file {} ".format(out_loc))
|
||||
ut.save_output(df_vfs, out_loc, fl_name, vfs_dir, csv_ext)
|
||||
return df_vfs
|
||||
|
||||
def run_vfs(video_uri, out_dir, r_config):
|
||||
|
||||
def run_vfs(video_uri, out_dir, r_config, save=True):
|
||||
"""
|
||||
Processing all participants for fetching voice frame score
|
||||
---------------
|
||||
@@ -94,18 +109,23 @@ def run_vfs(video_uri, out_dir, r_config):
|
||||
try:
|
||||
|
||||
input_loc, out_loc, fl_name = ut.filter_path(video_uri, out_dir)
|
||||
aud_filter = glob.glob(join(input_loc, fl_name + '.wav'))
|
||||
if len(aud_filter)>0:
|
||||
aud_filter = glob.glob(join(input_loc, fl_name + ".wav"))
|
||||
if len(aud_filter) > 0:
|
||||
|
||||
audio_file = aud_filter[0]
|
||||
aud_dur = librosa.get_duration(filename=audio_file)
|
||||
aud_dur = ut.get_length(audio_file)
|
||||
|
||||
if float(aud_dur) < 0.064:
|
||||
logger.info('Output file {} size is less than 0.064sec'.format(audio_file))
|
||||
logger.info(
|
||||
"Output file {} size is less than 0.064sec".format(audio_file)
|
||||
)
|
||||
|
||||
empty_vfs(video_uri, out_loc, fl_name, r_config)
|
||||
return
|
||||
|
||||
calc_vfs(video_uri, audio_file, out_loc, fl_name, r_config)
|
||||
df = empty_vfs(video_uri, out_loc, fl_name, r_config, save=save)
|
||||
else:
|
||||
df = calc_vfs(
|
||||
video_uri, audio_file, out_loc, fl_name, r_config, save=save
|
||||
)
|
||||
return df
|
||||
except Exception as e:
|
||||
logger.error('Failed to process audio file')
|
||||
e
|
||||
logger.error("Failed to process audio file")
|
||||
|
||||
Reference in New Issue
Block a user