code refactoring only
This commit is contained in:
@@ -4,25 +4,25 @@ project_name: DBM
|
|||||||
created: 2020-20-07
|
created: 2020-20-07
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
import glob
|
||||||
|
import logging
|
||||||
|
from os.path import join
|
||||||
|
|
||||||
|
import numpy as np
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
import parselmouth
|
import parselmouth
|
||||||
import numpy as np
|
|
||||||
import parselmouth
|
|
||||||
import librosa
|
|
||||||
import glob
|
|
||||||
from os.path import join
|
|
||||||
import logging
|
|
||||||
|
|
||||||
from opendbm.dbm_lib.dbm_features.raw_features.util import util as ut
|
from opendbm.dbm_lib.dbm_features.raw_features.util import util as ut
|
||||||
|
|
||||||
logging.basicConfig(level=logging.INFO)
|
logging.basicConfig(level=logging.INFO)
|
||||||
logger=logging.getLogger()
|
logger = logging.getLogger()
|
||||||
|
|
||||||
formant_dir = 'acoustic/formant_freq'
|
formant_dir = "acoustic/formant_freq"
|
||||||
csv_ext = '_formant.csv'
|
csv_ext = "_formant.csv"
|
||||||
error_txt = 'error: length less than 0.064'
|
error_txt = "error: length less than 0.064"
|
||||||
|
|
||||||
def formant_list(formant,snd):
|
|
||||||
|
def formant_list(formant, snd):
|
||||||
"""
|
"""
|
||||||
Getting formant frequency per second
|
Getting formant frequency per second
|
||||||
Args:
|
Args:
|
||||||
@@ -36,22 +36,23 @@ def formant_list(formant,snd):
|
|||||||
f3_list = []
|
f3_list = []
|
||||||
f4_list = []
|
f4_list = []
|
||||||
|
|
||||||
dur = snd.duration-0.02
|
dur = snd.duration - 0.02
|
||||||
dur_round = round(dur, 2)
|
dur_round = round(dur, 2)
|
||||||
|
|
||||||
time_list = np.arange(0.001, dur_round, 0.001)
|
time_list = np.arange(0.001, dur_round, 0.001)
|
||||||
for time in time_list:
|
for time in time_list:
|
||||||
|
|
||||||
f1 = formant.get_value_at_time(1,time)
|
f1 = formant.get_value_at_time(1, time)
|
||||||
f2 = formant.get_value_at_time(2,time)
|
f2 = formant.get_value_at_time(2, time)
|
||||||
f3 = formant.get_value_at_time(3,time)
|
f3 = formant.get_value_at_time(3, time)
|
||||||
f4 = formant.get_value_at_time(4,time)
|
f4 = formant.get_value_at_time(4, time)
|
||||||
|
|
||||||
f1_list.append(f1)
|
f1_list.append(f1)
|
||||||
f2_list.append(f2)
|
f2_list.append(f2)
|
||||||
f3_list.append(f3)
|
f3_list.append(f3)
|
||||||
f4_list.append(f4)
|
f4_list.append(f4)
|
||||||
return f1_list,f2_list,f3_list,f4_list
|
return f1_list, f2_list, f3_list, f4_list
|
||||||
|
|
||||||
|
|
||||||
def formant_score(path):
|
def formant_score(path):
|
||||||
"""
|
"""
|
||||||
@@ -62,11 +63,12 @@ def formant_score(path):
|
|||||||
(list) list of Formant freq for each voice frame
|
(list) list of Formant freq for each voice frame
|
||||||
"""
|
"""
|
||||||
sound_pat = parselmouth.Sound(path)
|
sound_pat = parselmouth.Sound(path)
|
||||||
formant = sound_pat.to_formant_burg(time_step=.001)
|
formant = sound_pat.to_formant_burg(time_step=0.001)
|
||||||
f_score = formant_list(formant,sound_pat)
|
f_score = formant_list(formant, sound_pat)
|
||||||
return f_score
|
return f_score
|
||||||
|
|
||||||
def calc_formant(video_uri, audio_file, out_loc, fl_name, r_config):
|
|
||||||
|
def calc_formant(video_uri, audio_file, out_loc, fl_name, r_config, save=True):
|
||||||
"""
|
"""
|
||||||
Preparing Formant freq matrix
|
Preparing Formant freq matrix
|
||||||
Args:
|
Args:
|
||||||
@@ -74,36 +76,51 @@ def calc_formant(video_uri, audio_file, out_loc, fl_name, r_config):
|
|||||||
out_loc: (str) Output directory; r_config: raw variable config
|
out_loc: (str) Output directory; r_config: raw variable config
|
||||||
"""
|
"""
|
||||||
|
|
||||||
f1_list,f2_list,f3_list,f4_list = formant_score(audio_file)
|
f1_list, f2_list, f3_list, f4_list = formant_score(audio_file)
|
||||||
df_formant = pd.DataFrame(f1_list, columns=[r_config.aco_fm1])
|
df_formant = pd.DataFrame(f1_list, columns=[r_config.aco_fm1])
|
||||||
|
|
||||||
df_formant[r_config.aco_fm2] = f2_list
|
df_formant[r_config.aco_fm2] = f2_list
|
||||||
df_formant[r_config.aco_fm3] = f3_list
|
df_formant[r_config.aco_fm3] = f3_list
|
||||||
df_formant[r_config.aco_fm4] = f4_list
|
df_formant[r_config.aco_fm4] = f4_list
|
||||||
|
|
||||||
df_formant.replace('', np.nan, regex=True,inplace=True)
|
df_formant.replace("", np.nan, regex=True, inplace=True)
|
||||||
df_formant[r_config.err_reason] = 'Pass'# will replace with threshold in future release
|
df_formant[
|
||||||
|
r_config.err_reason
|
||||||
|
] = "Pass" # will replace with threshold in future release
|
||||||
|
|
||||||
df_formant['Frames'] = df_formant.index
|
df_formant["Frames"] = df_formant.index
|
||||||
df_formant['dbm_master_url'] = video_uri
|
df_formant["dbm_master_url"] = video_uri
|
||||||
|
|
||||||
logger.info('Saving Output file {} '.format(out_loc))
|
if save:
|
||||||
|
logger.info("Saving Output file {} ".format(out_loc))
|
||||||
ut.save_output(df_formant, out_loc, fl_name, formant_dir, csv_ext)
|
ut.save_output(df_formant, out_loc, fl_name, formant_dir, csv_ext)
|
||||||
|
return df_formant
|
||||||
|
|
||||||
def empty_fm(video_uri, out_loc, fl_name, r_config):
|
|
||||||
|
def empty_fm(video_uri, out_loc, fl_name, r_config, save=True):
|
||||||
|
|
||||||
"""
|
"""
|
||||||
Preparing empty formant frequency matrix if something fails
|
Preparing empty formant frequency matrix if something fails
|
||||||
"""
|
"""
|
||||||
cols = ['Frames', r_config.aco_fm1, r_config.aco_fm2, r_config.aco_fm3, r_config.aco_fm4, r_config.err_reason]
|
cols = [
|
||||||
|
"Frames",
|
||||||
|
r_config.aco_fm1,
|
||||||
|
r_config.aco_fm2,
|
||||||
|
r_config.aco_fm3,
|
||||||
|
r_config.aco_fm4,
|
||||||
|
r_config.err_reason,
|
||||||
|
]
|
||||||
out_val = [[np.nan, np.nan, np.nan, np.nan, np.nan, error_txt]]
|
out_val = [[np.nan, np.nan, np.nan, np.nan, np.nan, error_txt]]
|
||||||
df_fm = pd.DataFrame(out_val, columns = cols)
|
df_fm = pd.DataFrame(out_val, columns=cols)
|
||||||
df_fm['dbm_master_url'] = video_uri
|
df_fm["dbm_master_url"] = video_uri
|
||||||
|
|
||||||
logger.info('Saving Output file {} '.format(out_loc))
|
if save:
|
||||||
|
logger.info("Saving Output file {} ".format(out_loc))
|
||||||
ut.save_output(df_fm, out_loc, fl_name, formant_dir, csv_ext)
|
ut.save_output(df_fm, out_loc, fl_name, formant_dir, csv_ext)
|
||||||
|
return df_fm
|
||||||
|
|
||||||
def run_formant(video_uri, out_dir, r_config):
|
|
||||||
|
def run_formant(video_uri, out_dir, r_config, save=True):
|
||||||
|
|
||||||
"""
|
"""
|
||||||
Processing all patient's for fetching Formant freq
|
Processing all patient's for fetching Formant freq
|
||||||
@@ -116,18 +133,23 @@ def run_formant(video_uri, out_dir, r_config):
|
|||||||
try:
|
try:
|
||||||
|
|
||||||
input_loc, out_loc, fl_name = ut.filter_path(video_uri, out_dir)
|
input_loc, out_loc, fl_name = ut.filter_path(video_uri, out_dir)
|
||||||
aud_filter = glob.glob(join(input_loc, fl_name + '.wav'))
|
aud_filter = glob.glob(join(input_loc, fl_name + ".wav"))
|
||||||
if len(aud_filter)>0:
|
if len(aud_filter) > 0:
|
||||||
|
|
||||||
audio_file = aud_filter[0]
|
audio_file = aud_filter[0]
|
||||||
aud_dur = librosa.get_duration(filename=audio_file)
|
aud_dur = ut.get_length(audio_file)
|
||||||
|
|
||||||
if float(aud_dur) < 0.064:
|
if float(aud_dur) < 0.064:
|
||||||
logger.info('Output file {} size is less than 0.064sec'.format(audio_file))
|
logger.info(
|
||||||
|
"Output file {} size is less than 0.064sec".format(audio_file)
|
||||||
|
)
|
||||||
|
|
||||||
empty_fm(video_uri, out_loc, fl_name, r_config)
|
df = empty_fm(video_uri, out_loc, fl_name, r_config, save=save)
|
||||||
return
|
else:
|
||||||
|
df = calc_formant(
|
||||||
calc_formant(video_uri, audio_file, out_loc, fl_name, r_config)
|
video_uri, audio_file, out_loc, fl_name, r_config, save=save
|
||||||
|
)
|
||||||
|
return df
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error('Failed to process audio file')
|
e
|
||||||
|
logger.error("Failed to process audio file")
|
||||||
|
|||||||
@@ -4,24 +4,25 @@ project_name: DBM
|
|||||||
created: 2020-20-07
|
created: 2020-20-07
|
||||||
"""
|
"""
|
||||||
|
|
||||||
import pandas as pd
|
|
||||||
import numpy as np
|
|
||||||
import os
|
|
||||||
import glob
|
import glob
|
||||||
import parselmouth
|
|
||||||
import librosa
|
|
||||||
import more_itertools as mit
|
|
||||||
from os.path import join
|
|
||||||
import logging
|
import logging
|
||||||
|
import os
|
||||||
|
from os.path import join
|
||||||
|
|
||||||
|
import more_itertools as mit
|
||||||
|
import numpy as np
|
||||||
|
import pandas as pd
|
||||||
|
import parselmouth
|
||||||
|
|
||||||
from opendbm.dbm_lib.dbm_features.raw_features.util import util as ut
|
from opendbm.dbm_lib.dbm_features.raw_features.util import util as ut
|
||||||
|
|
||||||
logging.basicConfig(level=logging.INFO)
|
logging.basicConfig(level=logging.INFO)
|
||||||
logger=logging.getLogger()
|
logger = logging.getLogger()
|
||||||
|
|
||||||
|
gne_dir = "acoustic/glottal_noise"
|
||||||
|
ff_dir = "acoustic/pitch"
|
||||||
|
csv_ext = "_gne.csv"
|
||||||
|
|
||||||
gne_dir = 'acoustic/glottal_noise'
|
|
||||||
ff_dir = 'acoustic/pitch'
|
|
||||||
csv_ext = '_gne.csv'
|
|
||||||
|
|
||||||
def gne_ratio(sound):
|
def gne_ratio(sound):
|
||||||
"""
|
"""
|
||||||
@@ -33,62 +34,42 @@ def gne_ratio(sound):
|
|||||||
"""
|
"""
|
||||||
harmonicity_gne = sound.to_harmonicity_gne()
|
harmonicity_gne = sound.to_harmonicity_gne()
|
||||||
gne_all_bands = harmonicity_gne.values
|
gne_all_bands = harmonicity_gne.values
|
||||||
gne_all_bands = np.where(gne_all_bands==-200, np.NaN, gne_all_bands)
|
gne_all_bands = np.where(gne_all_bands == -200, np.NaN, gne_all_bands)
|
||||||
|
|
||||||
gne = np.nanmax(gne_all_bands) # following http://www.fon.hum.uva.nl/rob/NKI_TEVA/TEVA/HTML/NKI_TEVA.pdf
|
gne = np.nanmax(
|
||||||
|
gne_all_bands
|
||||||
|
) # following http://www.fon.hum.uva.nl/rob/NKI_TEVA/TEVA/HTML/NKI_TEVA.pdf
|
||||||
return gne
|
return gne
|
||||||
|
|
||||||
def empty_gne(video_uri, out_loc, fl_name, r_config, error_txt):
|
|
||||||
|
def empty_gne(video_uri, out_loc, fl_name, r_config, error_txt, save=True):
|
||||||
"""
|
"""
|
||||||
Preparing empty GNE matrix if something fails
|
Preparing empty GNE matrix if something fails
|
||||||
"""
|
"""
|
||||||
cols = ['Frames', r_config.aco_gne, r_config.err_reason]
|
cols = ["Frames", r_config.aco_gne, r_config.err_reason]
|
||||||
out_val = [[np.nan, np.nan, error_txt]]
|
out_val = [[np.nan, np.nan, error_txt]]
|
||||||
|
|
||||||
df_gne = pd.DataFrame(out_val, columns = cols)
|
df_gne = pd.DataFrame(out_val, columns=cols)
|
||||||
df_gne['dbm_master_url'] = video_uri
|
df_gne["dbm_master_url"] = video_uri
|
||||||
|
|
||||||
logger.info('Saving Output file {} '.format(out_loc))
|
if save:
|
||||||
|
logger.info("Saving Output file {} ".format(out_loc))
|
||||||
ut.save_output(df_gne, out_loc, fl_name, gne_dir, csv_ext)
|
ut.save_output(df_gne, out_loc, fl_name, gne_dir, csv_ext)
|
||||||
|
return df_gne
|
||||||
|
|
||||||
def segment_pitch(dir_path, r_config):
|
|
||||||
"""
|
|
||||||
segmenting pitch freq for each voice segment
|
|
||||||
"""
|
|
||||||
com_speech_sort, voiced_yes, voiced_no = ([], ) * 3
|
|
||||||
for file in os.listdir(dir_path):
|
|
||||||
try:
|
|
||||||
|
|
||||||
if file.endswith('_pitch.csv'):
|
|
||||||
|
|
||||||
ff_df = pd.read_csv((dir_path+'/'+file))
|
|
||||||
voice_label = ff_df[r_config.aco_voiceLabel]
|
|
||||||
|
|
||||||
indices_yes = [i for i, x in enumerate(voice_label) if x == "yes"]
|
|
||||||
voiced_yes = [list(group) for group in mit.consecutive_groups(indices_yes)]
|
|
||||||
|
|
||||||
indices_no = [i for i, x in enumerate(voice_label) if x == "no"]
|
|
||||||
voiced_no = [list(group) for group in mit.consecutive_groups(indices_no)]
|
|
||||||
|
|
||||||
com_speech = voiced_yes + voiced_no
|
|
||||||
com_speech_sort = sorted(com_speech, key=lambda x: x[0])
|
|
||||||
except:
|
|
||||||
pass
|
|
||||||
|
|
||||||
return com_speech_sort, voiced_yes, voiced_no
|
|
||||||
|
|
||||||
def segment_gne(com_speech_sort, voiced_yes, voiced_no, gne_all_frames, audio_file):
|
def segment_gne(com_speech_sort, voiced_yes, voiced_no, gne_all_frames, audio_file):
|
||||||
"""
|
"""
|
||||||
calculating gne for each voice segment
|
calculating gne for each voice segment
|
||||||
"""
|
"""
|
||||||
snd = parselmouth.Sound(audio_file)
|
snd = parselmouth.Sound(audio_file)
|
||||||
pitch = snd.to_pitch(time_step=.001)
|
pitch = snd.to_pitch(time_step=0.001)
|
||||||
|
|
||||||
for idx, vs in enumerate(com_speech_sort):
|
for idx, vs in enumerate(com_speech_sort):
|
||||||
try:
|
try:
|
||||||
|
|
||||||
max_gne = np.NaN
|
max_gne = np.NaN
|
||||||
if vs in voiced_yes and len(vs)>1:
|
if vs in voiced_yes and len(vs) > 1:
|
||||||
|
|
||||||
start_time = pitch.get_time_from_frame_number(vs[0])
|
start_time = pitch.get_time_from_frame_number(vs[0])
|
||||||
end_time = pitch.get_time_from_frame_number(vs[-1])
|
end_time = pitch.get_time_from_frame_number(vs[-1])
|
||||||
@@ -104,7 +85,8 @@ def segment_gne(com_speech_sort, voiced_yes, voiced_no, gne_all_frames, audio_fi
|
|||||||
gne_all_frames[idx] = max_gne
|
gne_all_frames[idx] = max_gne
|
||||||
return gne_all_frames
|
return gne_all_frames
|
||||||
|
|
||||||
def calc_gne(video_uri, audio_file, out_loc, fl_name, r_config):
|
|
||||||
|
def calc_gne(video_uri, audio_file, out_loc, fl_name, r_config, save=True, ff_df=None):
|
||||||
"""
|
"""
|
||||||
Preparing gne matrix
|
Preparing gne matrix
|
||||||
Args:
|
Args:
|
||||||
@@ -112,26 +94,36 @@ def calc_gne(video_uri, audio_file, out_loc, fl_name, r_config):
|
|||||||
out_loc: (str) Output directory for csv's
|
out_loc: (str) Output directory for csv's
|
||||||
"""
|
"""
|
||||||
dir_path = os.path.join(out_loc, ff_dir)
|
dir_path = os.path.join(out_loc, ff_dir)
|
||||||
if os.path.isdir(dir_path):
|
if os.path.isdir(dir_path) or ff_df is not None:
|
||||||
voice_seg = segment_pitch(dir_path, r_config)
|
if ff_df is not None:
|
||||||
|
voice_seg = ut.process_segment_pitch(ff_df, r_config)
|
||||||
|
else:
|
||||||
|
voice_seg = ut.segment_pitch(dir_path, r_config, ff_df=ff_df)
|
||||||
|
|
||||||
gne_all_frames = [np.NaN] * len(voice_seg[0])
|
gne_all_frames = [np.NaN] * len(voice_seg[0])
|
||||||
gne_segment_frames = segment_gne(voice_seg[0], voice_seg[1], voice_seg[2], gne_all_frames, audio_file)
|
gne_segment_frames = segment_gne(
|
||||||
|
voice_seg[0], voice_seg[1], voice_seg[2], gne_all_frames, audio_file
|
||||||
|
)
|
||||||
|
|
||||||
df_gne = pd.DataFrame(gne_segment_frames, columns=[r_config.aco_gne])
|
df_gne = pd.DataFrame(gne_segment_frames, columns=[r_config.aco_gne])
|
||||||
df_gne[r_config.err_reason] = 'Pass'# will replace with threshold in future release
|
df_gne[
|
||||||
|
r_config.err_reason
|
||||||
|
] = "Pass" # will replace with threshold in future release
|
||||||
|
|
||||||
df_gne['Frames'] = df_gne.index
|
df_gne["Frames"] = df_gne.index
|
||||||
df_gne['dbm_master_url'] = video_uri
|
df_gne["dbm_master_url"] = video_uri
|
||||||
|
|
||||||
logger.info('Processing Output file {} '.format(out_loc))
|
if save:
|
||||||
|
logger.info("Processing Output file {} ".format(out_loc))
|
||||||
ut.save_output(df_gne, out_loc, fl_name, gne_dir, csv_ext)
|
ut.save_output(df_gne, out_loc, fl_name, gne_dir, csv_ext)
|
||||||
|
return df_gne
|
||||||
|
|
||||||
else:
|
else:
|
||||||
error_txt = 'error: pitch freq not available'
|
error_txt = "error: pitch freq not available"
|
||||||
empty_gne(video_uri, out_loc, fl_name, r_config, error_txt)
|
return empty_gne(video_uri, out_loc, fl_name, r_config, error_txt, save=save)
|
||||||
|
|
||||||
def run_gne(video_uri, out_dir, r_config):
|
|
||||||
|
def run_gne(video_uri, out_dir, r_config, save=True, ff_df=None):
|
||||||
"""
|
"""
|
||||||
Processing all patient's for fetching glottal noise ratio
|
Processing all patient's for fetching glottal noise ratio
|
||||||
---------------
|
---------------
|
||||||
@@ -143,19 +135,32 @@ def run_gne(video_uri, out_dir, r_config):
|
|||||||
try:
|
try:
|
||||||
|
|
||||||
input_loc, out_loc, fl_name = ut.filter_path(video_uri, out_dir)
|
input_loc, out_loc, fl_name = ut.filter_path(video_uri, out_dir)
|
||||||
aud_filter = glob.glob(join(input_loc, fl_name + '.wav'))
|
aud_filter = glob.glob(join(input_loc, fl_name + ".wav"))
|
||||||
if len(aud_filter)>0:
|
if len(aud_filter) > 0:
|
||||||
|
|
||||||
audio_file = aud_filter[0]
|
audio_file = aud_filter[0]
|
||||||
aud_dur = librosa.get_duration(filename=audio_file)
|
aud_dur = ut.get_length(audio_file)
|
||||||
|
|
||||||
if float(aud_dur) < 0.064:
|
if float(aud_dur) < 0.064:
|
||||||
logger.info('Output file {} size is less than 0.064sec'.format(audio_file))
|
logger.info(
|
||||||
|
"Output file {} size is less than 0.064sec".format(audio_file)
|
||||||
|
)
|
||||||
|
|
||||||
error_txt = 'error: length less than 0.064'
|
error_txt = "error: length less than 0.064"
|
||||||
empty_gne(video_uri, out_loc, fl_name, r_config, error_txt)
|
df = empty_gne(
|
||||||
return
|
video_uri, out_loc, fl_name, r_config, error_txt, save=save
|
||||||
|
)
|
||||||
calc_gne(video_uri, audio_file, out_loc, fl_name, r_config)
|
else:
|
||||||
|
df = calc_gne(
|
||||||
|
video_uri,
|
||||||
|
audio_file,
|
||||||
|
out_loc,
|
||||||
|
fl_name,
|
||||||
|
r_config,
|
||||||
|
save=save,
|
||||||
|
ff_df=ff_df,
|
||||||
|
)
|
||||||
|
return df
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error('Failed to process audio file')
|
e
|
||||||
|
logger.error("Failed to process audio file")
|
||||||
|
|||||||
@@ -1,77 +1,133 @@
|
|||||||
"""
|
"""
|
||||||
file_name: hnr
|
file_name: gne
|
||||||
project_name: DBM
|
project_name: DBM
|
||||||
created: 2020-20-07
|
created: 2020-20-07
|
||||||
"""
|
"""
|
||||||
|
|
||||||
import pandas as pd
|
|
||||||
import numpy as np
|
|
||||||
import os
|
|
||||||
import glob
|
import glob
|
||||||
import parselmouth
|
|
||||||
import librosa
|
|
||||||
from os.path import join
|
|
||||||
import logging
|
import logging
|
||||||
|
import os
|
||||||
|
from os.path import join
|
||||||
|
|
||||||
|
import more_itertools as mit
|
||||||
|
import numpy as np
|
||||||
|
import pandas as pd
|
||||||
|
import parselmouth
|
||||||
|
|
||||||
from opendbm.dbm_lib.dbm_features.raw_features.util import util as ut
|
from opendbm.dbm_lib.dbm_features.raw_features.util import util as ut
|
||||||
|
|
||||||
logging.basicConfig(level=logging.INFO)
|
logging.basicConfig(level=logging.INFO)
|
||||||
logger=logging.getLogger()
|
logger = logging.getLogger()
|
||||||
|
|
||||||
hnr_dir = 'acoustic/harmonic_noise'
|
gne_dir = "acoustic/glottal_noise"
|
||||||
csv_ext = '_hnr.csv'
|
ff_dir = "acoustic/pitch"
|
||||||
error_txt = 'error: length less than 0.064'
|
csv_ext = "_gne.csv"
|
||||||
|
|
||||||
def hnr_ratio(filepath):
|
|
||||||
|
def gne_ratio(sound):
|
||||||
"""
|
"""
|
||||||
Using parselmouth library fetching harmonic noise ratio ratio
|
Using parselmouth library fetching glottal noise excitation ratio
|
||||||
Args:
|
Args:
|
||||||
path: (.wav) audio file location
|
sound: parselmouth object
|
||||||
Returns:
|
Returns:
|
||||||
(list) list of hnr ratio for each voice frame, min,max and mean hnr
|
(list) list of gne ratio for each voice frame
|
||||||
"""
|
"""
|
||||||
sound = parselmouth.Sound(filepath)
|
harmonicity_gne = sound.to_harmonicity_gne()
|
||||||
harmonicity = sound.to_harmonicity_ac(time_step=.001)
|
gne_all_bands = harmonicity_gne.values
|
||||||
|
gne_all_bands = np.where(gne_all_bands == -200, np.NaN, gne_all_bands)
|
||||||
|
|
||||||
hnr_all_frames = harmonicity.values#[harmonicity.values != -200] nan it (****)
|
gne = np.nanmax(
|
||||||
hnr_all_frames = np.where(hnr_all_frames==-200, np.NaN, hnr_all_frames)
|
gne_all_bands
|
||||||
return hnr_all_frames.transpose()
|
) # following http://www.fon.hum.uva.nl/rob/NKI_TEVA/TEVA/HTML/NKI_TEVA.pdf
|
||||||
|
return gne
|
||||||
|
|
||||||
def calc_hnr(video_uri, audio_file, out_loc, fl_name, r_config):
|
|
||||||
|
def empty_gne(video_uri, out_loc, fl_name, r_config, error_txt, save=True):
|
||||||
"""
|
"""
|
||||||
Preparing harmonic noise matrix
|
Preparing empty GNE matrix if something fails
|
||||||
|
"""
|
||||||
|
cols = ["Frames", r_config.aco_gne, r_config.err_reason]
|
||||||
|
out_val = [[np.nan, np.nan, error_txt]]
|
||||||
|
|
||||||
|
df_gne = pd.DataFrame(out_val, columns=cols)
|
||||||
|
df_gne["dbm_master_url"] = video_uri
|
||||||
|
|
||||||
|
if save:
|
||||||
|
logger.info("Saving Output file {} ".format(out_loc))
|
||||||
|
ut.save_output(df_gne, out_loc, fl_name, gne_dir, csv_ext)
|
||||||
|
return df_gne
|
||||||
|
|
||||||
|
|
||||||
|
def segment_gne(com_speech_sort, voiced_yes, voiced_no, gne_all_frames, audio_file):
|
||||||
|
"""
|
||||||
|
calculating gne for each voice segment
|
||||||
|
"""
|
||||||
|
snd = parselmouth.Sound(audio_file)
|
||||||
|
pitch = snd.to_pitch(time_step=0.001)
|
||||||
|
|
||||||
|
for idx, vs in enumerate(com_speech_sort):
|
||||||
|
try:
|
||||||
|
|
||||||
|
max_gne = np.NaN
|
||||||
|
if vs in voiced_yes and len(vs) > 1:
|
||||||
|
|
||||||
|
start_time = pitch.get_time_from_frame_number(vs[0])
|
||||||
|
end_time = pitch.get_time_from_frame_number(vs[-1])
|
||||||
|
|
||||||
|
snd_start = int(snd.get_frame_number_from_time(start_time))
|
||||||
|
snd_end = int(snd.get_frame_number_from_time(end_time))
|
||||||
|
|
||||||
|
samples = parselmouth.Sound(snd.as_array()[0][snd_start:snd_end])
|
||||||
|
max_gne = gne_ratio(samples)
|
||||||
|
except:
|
||||||
|
pass
|
||||||
|
|
||||||
|
gne_all_frames[idx] = max_gne
|
||||||
|
return gne_all_frames
|
||||||
|
|
||||||
|
|
||||||
|
def calc_gne(video_uri, audio_file, out_loc, fl_name, r_config, save=True, ff_df=None):
|
||||||
|
"""
|
||||||
|
Preparing gne matrix
|
||||||
Args:
|
Args:
|
||||||
audio_file: (.wav) parsed audio file
|
audio_file: (.wav) parsed audio file
|
||||||
out_loc: (str) Output directory for csv's
|
out_loc: (str) Output directory for csv's
|
||||||
"""
|
"""
|
||||||
|
dir_path = os.path.join(out_loc, ff_dir)
|
||||||
|
if os.path.isdir(dir_path) or ff_df is not None:
|
||||||
|
if ff_df is not None:
|
||||||
|
voice_seg = ut.process_segment_pitch(ff_df, r_config)
|
||||||
|
else:
|
||||||
|
voice_seg = ut.segment_pitch(dir_path, r_config, ff_df=ff_df)
|
||||||
|
|
||||||
hnr_all_frames = hnr_ratio(audio_file)
|
gne_all_frames = [np.NaN] * len(voice_seg[0])
|
||||||
df_hnr = pd.DataFrame(hnr_all_frames, columns=[r_config.aco_hnr])
|
gne_segment_frames = segment_gne(
|
||||||
|
voice_seg[0], voice_seg[1], voice_seg[2], gne_all_frames, audio_file
|
||||||
|
)
|
||||||
|
|
||||||
df_hnr['Frames'] = df_hnr.index
|
df_gne = pd.DataFrame(gne_segment_frames, columns=[r_config.aco_gne])
|
||||||
df_hnr['dbm_master_url'] = video_uri
|
df_gne[
|
||||||
df_hnr[r_config.err_reason] = 'Pass'# will replace with threshold in future release
|
r_config.err_reason
|
||||||
|
] = "Pass" # will replace with threshold in future release
|
||||||
|
|
||||||
logger.info('Saving Output file {} '.format(out_loc))
|
df_gne["Frames"] = df_gne.index
|
||||||
ut.save_output(df_hnr, out_loc, fl_name, hnr_dir, csv_ext)
|
df_gne["dbm_master_url"] = video_uri
|
||||||
|
|
||||||
def empty_hnr(video_uri, out_loc, fl_name, r_config):
|
if save:
|
||||||
|
logger.info("Processing Output file {} ".format(out_loc))
|
||||||
|
ut.save_output(df_gne, out_loc, fl_name, gne_dir, csv_ext)
|
||||||
|
return df_gne
|
||||||
|
|
||||||
|
else:
|
||||||
|
error_txt = "error: pitch freq not available"
|
||||||
|
return empty_gne(video_uri, out_loc, fl_name, r_config, error_txt, save=save)
|
||||||
|
|
||||||
|
|
||||||
|
def run_gne(video_uri, out_dir, r_config, save=True, ff_df=None):
|
||||||
"""
|
"""
|
||||||
Preparing empty HNR matrix if something fails
|
Processing all patient's for fetching glottal noise ratio
|
||||||
"""
|
---------------
|
||||||
cols = ['Frames', r_config.aco_hnr, r_config.err_reason]
|
---------------
|
||||||
out_val = [[np.nan, np.nan, error_txt]]
|
|
||||||
df_hnr = pd.DataFrame(out_val, columns = cols)
|
|
||||||
df_hnr['dbm_master_url'] = video_uri
|
|
||||||
|
|
||||||
logger.info('Saving Output file {} '.format(out_loc))
|
|
||||||
ut.save_output(df_hnr, out_loc, fl_name, hnr_dir, csv_ext)
|
|
||||||
|
|
||||||
def run_hnr(video_uri, out_dir, r_config):
|
|
||||||
"""
|
|
||||||
Processing all patient's for fetching harmonic noise ratio
|
|
||||||
-------------------
|
|
||||||
-------------------
|
|
||||||
Args:
|
Args:
|
||||||
video_uri: video path; r_config: raw variable config object
|
video_uri: video path; r_config: raw variable config object
|
||||||
out_dir: (str) Output directory for processed output
|
out_dir: (str) Output directory for processed output
|
||||||
@@ -79,18 +135,32 @@ def run_hnr(video_uri, out_dir, r_config):
|
|||||||
try:
|
try:
|
||||||
|
|
||||||
input_loc, out_loc, fl_name = ut.filter_path(video_uri, out_dir)
|
input_loc, out_loc, fl_name = ut.filter_path(video_uri, out_dir)
|
||||||
aud_filter = glob.glob(join(input_loc, fl_name + '.wav'))
|
aud_filter = glob.glob(join(input_loc, fl_name + ".wav"))
|
||||||
if len(aud_filter)>0:
|
if len(aud_filter) > 0:
|
||||||
|
|
||||||
audio_file = aud_filter[0]
|
audio_file = aud_filter[0]
|
||||||
aud_dur = librosa.get_duration(filename=audio_file)
|
aud_dur = ut.get_length(audio_file)
|
||||||
|
|
||||||
if float(aud_dur) < 0.064:
|
if float(aud_dur) < 0.064:
|
||||||
logger.info('Output file {} size is less than 0.064sec'.format(audio_file))
|
logger.info(
|
||||||
|
"Output file {} size is less than 0.064sec".format(audio_file)
|
||||||
|
)
|
||||||
|
|
||||||
empty_hnr(video_uri, out_loc, fl_name, r_config)
|
error_txt = "error: length less than 0.064"
|
||||||
return
|
df = empty_gne(
|
||||||
|
video_uri, out_loc, fl_name, r_config, error_txt, save=save
|
||||||
calc_hnr(video_uri, audio_file, out_loc, fl_name, r_config)
|
)
|
||||||
|
else:
|
||||||
|
df = calc_gne(
|
||||||
|
video_uri,
|
||||||
|
audio_file,
|
||||||
|
out_loc,
|
||||||
|
fl_name,
|
||||||
|
r_config,
|
||||||
|
save=save,
|
||||||
|
ff_df=ff_df,
|
||||||
|
)
|
||||||
|
return df
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error('Failed to process audio file')
|
e
|
||||||
|
logger.error("Failed to process audio file")
|
||||||
|
|||||||
@@ -1,73 +1,133 @@
|
|||||||
"""
|
"""
|
||||||
file_name: intensity
|
file_name: gne
|
||||||
project_name: DBM
|
project_name: DBM
|
||||||
created: 2020-20-07
|
created: 2020-20-07
|
||||||
"""
|
"""
|
||||||
|
|
||||||
import pandas as pd
|
|
||||||
import numpy as np
|
|
||||||
import glob
|
import glob
|
||||||
import parselmouth
|
|
||||||
import librosa
|
|
||||||
from os.path import join
|
|
||||||
import logging
|
import logging
|
||||||
|
import os
|
||||||
|
from os.path import join
|
||||||
|
|
||||||
|
import more_itertools as mit
|
||||||
|
import numpy as np
|
||||||
|
import pandas as pd
|
||||||
|
import parselmouth
|
||||||
|
|
||||||
from opendbm.dbm_lib.dbm_features.raw_features.util import util as ut
|
from opendbm.dbm_lib.dbm_features.raw_features.util import util as ut
|
||||||
|
|
||||||
logging.basicConfig(level=logging.INFO)
|
logging.basicConfig(level=logging.INFO)
|
||||||
logger=logging.getLogger()
|
logger = logging.getLogger()
|
||||||
|
|
||||||
intensity_dir = 'acoustic/intensity'
|
gne_dir = "acoustic/glottal_noise"
|
||||||
csv_ext = '_intensity.csv'
|
ff_dir = "acoustic/pitch"
|
||||||
error_txt = 'error: length less than 0.064'
|
csv_ext = "_gne.csv"
|
||||||
|
|
||||||
def intensity_score(path):
|
|
||||||
|
def gne_ratio(sound):
|
||||||
"""
|
"""
|
||||||
Using parselmouth library fetching Intensity
|
Using parselmouth library fetching glottal noise excitation ratio
|
||||||
Args:
|
Args:
|
||||||
path: (.wav) audio file location
|
sound: parselmouth object
|
||||||
Returns:
|
Returns:
|
||||||
(list) list of Intensity for each voice frame
|
(list) list of gne ratio for each voice frame
|
||||||
"""
|
"""
|
||||||
sound_pat = parselmouth.Sound(path)
|
harmonicity_gne = sound.to_harmonicity_gne()
|
||||||
intensity = sound_pat.to_intensity(time_step=.001)
|
gne_all_bands = harmonicity_gne.values
|
||||||
return intensity.values[0]
|
gne_all_bands = np.where(gne_all_bands == -200, np.NaN, gne_all_bands)
|
||||||
|
|
||||||
def calc_intensity(video_uri, audio_file, out_loc, fl_name, r_config):
|
gne = np.nanmax(
|
||||||
|
gne_all_bands
|
||||||
|
) # following http://www.fon.hum.uva.nl/rob/NKI_TEVA/TEVA/HTML/NKI_TEVA.pdf
|
||||||
|
return gne
|
||||||
|
|
||||||
|
|
||||||
|
def empty_gne(video_uri, out_loc, fl_name, r_config, error_txt, save=True):
|
||||||
"""
|
"""
|
||||||
Preparing Intensity matrix
|
Preparing empty GNE matrix if something fails
|
||||||
|
"""
|
||||||
|
cols = ["Frames", r_config.aco_gne, r_config.err_reason]
|
||||||
|
out_val = [[np.nan, np.nan, error_txt]]
|
||||||
|
|
||||||
|
df_gne = pd.DataFrame(out_val, columns=cols)
|
||||||
|
df_gne["dbm_master_url"] = video_uri
|
||||||
|
|
||||||
|
if save:
|
||||||
|
logger.info("Saving Output file {} ".format(out_loc))
|
||||||
|
ut.save_output(df_gne, out_loc, fl_name, gne_dir, csv_ext)
|
||||||
|
return df_gne
|
||||||
|
|
||||||
|
|
||||||
|
def segment_gne(com_speech_sort, voiced_yes, voiced_no, gne_all_frames, audio_file):
|
||||||
|
"""
|
||||||
|
calculating gne for each voice segment
|
||||||
|
"""
|
||||||
|
snd = parselmouth.Sound(audio_file)
|
||||||
|
pitch = snd.to_pitch(time_step=0.001)
|
||||||
|
|
||||||
|
for idx, vs in enumerate(com_speech_sort):
|
||||||
|
try:
|
||||||
|
|
||||||
|
max_gne = np.NaN
|
||||||
|
if vs in voiced_yes and len(vs) > 1:
|
||||||
|
|
||||||
|
start_time = pitch.get_time_from_frame_number(vs[0])
|
||||||
|
end_time = pitch.get_time_from_frame_number(vs[-1])
|
||||||
|
|
||||||
|
snd_start = int(snd.get_frame_number_from_time(start_time))
|
||||||
|
snd_end = int(snd.get_frame_number_from_time(end_time))
|
||||||
|
|
||||||
|
samples = parselmouth.Sound(snd.as_array()[0][snd_start:snd_end])
|
||||||
|
max_gne = gne_ratio(samples)
|
||||||
|
except:
|
||||||
|
pass
|
||||||
|
|
||||||
|
gne_all_frames[idx] = max_gne
|
||||||
|
return gne_all_frames
|
||||||
|
|
||||||
|
|
||||||
|
def calc_gne(video_uri, audio_file, out_loc, fl_name, r_config, save=True, ff_df=None):
|
||||||
|
"""
|
||||||
|
Preparing gne matrix
|
||||||
Args:
|
Args:
|
||||||
audio_file: (.wav) parsed audio file
|
audio_file: (.wav) parsed audio file
|
||||||
out_loc: (str) Output directory for csv's
|
out_loc: (str) Output directory for csv's
|
||||||
"""
|
"""
|
||||||
|
dir_path = os.path.join(out_loc, ff_dir)
|
||||||
|
if os.path.isdir(dir_path) or ff_df is not None:
|
||||||
|
if ff_df is not None:
|
||||||
|
voice_seg = ut.process_segment_pitch(ff_df, r_config)
|
||||||
|
else:
|
||||||
|
voice_seg = ut.segment_pitch(dir_path, r_config, ff_df=ff_df)
|
||||||
|
|
||||||
intensity_frames = intensity_score(audio_file)
|
gne_all_frames = [np.NaN] * len(voice_seg[0])
|
||||||
df_intensity = pd.DataFrame(intensity_frames, columns=[r_config.aco_int])
|
gne_segment_frames = segment_gne(
|
||||||
|
voice_seg[0], voice_seg[1], voice_seg[2], gne_all_frames, audio_file
|
||||||
|
)
|
||||||
|
|
||||||
df_intensity['Frames'] = df_intensity.index
|
df_gne = pd.DataFrame(gne_segment_frames, columns=[r_config.aco_gne])
|
||||||
df_intensity['dbm_master_url'] = video_uri
|
df_gne[
|
||||||
df_intensity[r_config.err_reason] = 'Pass'# will replace with threshold in future release
|
r_config.err_reason
|
||||||
|
] = "Pass" # will replace with threshold in future release
|
||||||
|
|
||||||
logger.info('Saving Output file {} '.format(out_loc))
|
df_gne["Frames"] = df_gne.index
|
||||||
ut.save_output(df_intensity, out_loc, fl_name, intensity_dir, csv_ext)
|
df_gne["dbm_master_url"] = video_uri
|
||||||
|
|
||||||
def empty_intensity(video_uri, out_loc, fl_name, r_config):
|
if save:
|
||||||
|
logger.info("Processing Output file {} ".format(out_loc))
|
||||||
|
ut.save_output(df_gne, out_loc, fl_name, gne_dir, csv_ext)
|
||||||
|
return df_gne
|
||||||
|
|
||||||
|
else:
|
||||||
|
error_txt = "error: pitch freq not available"
|
||||||
|
return empty_gne(video_uri, out_loc, fl_name, r_config, error_txt, save=save)
|
||||||
|
|
||||||
|
|
||||||
|
def run_gne(video_uri, out_dir, r_config, save=True, ff_df=None):
|
||||||
"""
|
"""
|
||||||
Preparing empty Intensity matrix if something fails
|
Processing all patient's for fetching glottal noise ratio
|
||||||
"""
|
---------------
|
||||||
cols = ['Frames', r_config.aco_int, r_config.err_reason]
|
---------------
|
||||||
out_val = [[np.nan, np.nan, error_txt]]
|
|
||||||
df_int = pd.DataFrame(out_val, columns = cols)
|
|
||||||
df_int['dbm_master_url'] = video_uri
|
|
||||||
|
|
||||||
logger.info('Saving Output file {} '.format(out_loc))
|
|
||||||
ut.save_output(df_int, out_loc, fl_name, intensity_dir, csv_ext)
|
|
||||||
|
|
||||||
def run_intensity(video_uri, out_dir, r_config):
|
|
||||||
"""
|
|
||||||
Processing all patient's for fetching Intensity
|
|
||||||
-------------------
|
|
||||||
-------------------
|
|
||||||
Args:
|
Args:
|
||||||
video_uri: video path; r_config: raw variable config object
|
video_uri: video path; r_config: raw variable config object
|
||||||
out_dir: (str) Output directory for processed output
|
out_dir: (str) Output directory for processed output
|
||||||
@@ -75,18 +135,32 @@ def run_intensity(video_uri, out_dir, r_config):
|
|||||||
try:
|
try:
|
||||||
|
|
||||||
input_loc, out_loc, fl_name = ut.filter_path(video_uri, out_dir)
|
input_loc, out_loc, fl_name = ut.filter_path(video_uri, out_dir)
|
||||||
aud_filter = glob.glob(join(input_loc, fl_name + '.wav'))
|
aud_filter = glob.glob(join(input_loc, fl_name + ".wav"))
|
||||||
if len(aud_filter)>0:
|
if len(aud_filter) > 0:
|
||||||
|
|
||||||
audio_file = aud_filter[0]
|
audio_file = aud_filter[0]
|
||||||
aud_dur = librosa.get_duration(filename=audio_file)
|
aud_dur = ut.get_length(audio_file)
|
||||||
|
|
||||||
if float(aud_dur) < 0.064:
|
if float(aud_dur) < 0.064:
|
||||||
logger.info('Output file {} size is less than 0.064sec'.format(audio_file))
|
logger.info(
|
||||||
|
"Output file {} size is less than 0.064sec".format(audio_file)
|
||||||
|
)
|
||||||
|
|
||||||
empty_intensity(video_uri, out_loc, fl_name, r_config)
|
error_txt = "error: length less than 0.064"
|
||||||
return
|
df = empty_gne(
|
||||||
|
video_uri, out_loc, fl_name, r_config, error_txt, save=save
|
||||||
calc_intensity(video_uri, audio_file, out_loc, fl_name, r_config)
|
)
|
||||||
|
else:
|
||||||
|
df = calc_gne(
|
||||||
|
video_uri,
|
||||||
|
audio_file,
|
||||||
|
out_loc,
|
||||||
|
fl_name,
|
||||||
|
r_config,
|
||||||
|
save=save,
|
||||||
|
ff_df=ff_df,
|
||||||
|
)
|
||||||
|
return df
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error('Failed to process audio file')
|
e
|
||||||
|
logger.error("Failed to process audio file")
|
||||||
|
|||||||
@@ -4,25 +4,24 @@ project_name: DBM
|
|||||||
created: 2020-20-07
|
created: 2020-20-07
|
||||||
"""
|
"""
|
||||||
|
|
||||||
import pandas as pd
|
|
||||||
import numpy as np
|
|
||||||
import os
|
|
||||||
import glob
|
import glob
|
||||||
import parselmouth
|
|
||||||
import librosa
|
|
||||||
import numpy as np
|
|
||||||
import more_itertools as mit
|
|
||||||
from os.path import join
|
|
||||||
import logging
|
import logging
|
||||||
|
import os
|
||||||
|
from os.path import join
|
||||||
|
|
||||||
|
import numpy as np
|
||||||
|
import pandas as pd
|
||||||
|
import parselmouth
|
||||||
|
|
||||||
from opendbm.dbm_lib.dbm_features.raw_features.util import util as ut
|
from opendbm.dbm_lib.dbm_features.raw_features.util import util as ut
|
||||||
|
|
||||||
logging.basicConfig(level=logging.INFO)
|
logging.basicConfig(level=logging.INFO)
|
||||||
logger=logging.getLogger()
|
logger = logging.getLogger()
|
||||||
|
|
||||||
|
jitter_dir = "acoustic/jitter"
|
||||||
|
ff_dir = "acoustic/pitch"
|
||||||
|
csv_ext = "_jitter.csv"
|
||||||
|
|
||||||
jitter_dir = 'acoustic/jitter'
|
|
||||||
ff_dir = 'acoustic/pitch'
|
|
||||||
csv_ext = '_jitter.csv'
|
|
||||||
|
|
||||||
def audio_jitter(sound):
|
def audio_jitter(sound):
|
||||||
"""
|
"""
|
||||||
@@ -32,60 +31,42 @@ def audio_jitter(sound):
|
|||||||
Returns:
|
Returns:
|
||||||
(list) list of jitters for each voice frame
|
(list) list of jitters for each voice frame
|
||||||
"""
|
"""
|
||||||
pointProcess = parselmouth.praat.call(sound, "To PointProcess (periodic, cc)...", 80, 500)
|
pointProcess = parselmouth.praat.call(
|
||||||
jitter = parselmouth.praat.call(pointProcess, "Get jitter (local)", 0, 0, 0.0001, 0.02, 1.3)
|
sound, "To PointProcess (periodic, cc)...", 80, 500
|
||||||
|
)
|
||||||
|
jitter = parselmouth.praat.call(
|
||||||
|
pointProcess, "Get jitter (local)", 0, 0, 0.0001, 0.02, 1.3
|
||||||
|
)
|
||||||
return jitter
|
return jitter
|
||||||
|
|
||||||
def empty_jitter(video_uri, out_loc, fl_name, r_config, error_txt):
|
|
||||||
|
def empty_jitter(video_uri, out_loc, fl_name, r_config, error_txt, save=True):
|
||||||
"""
|
"""
|
||||||
Preparing empty jitter matrix if something fails
|
Preparing empty jitter matrix if something fails
|
||||||
"""
|
"""
|
||||||
cols = ['Frames', r_config.aco_jitter, r_config.err_reason]
|
cols = ["Frames", r_config.aco_jitter, r_config.err_reason]
|
||||||
out_val = [[np.nan, np.nan, error_txt]]
|
out_val = [[np.nan, np.nan, error_txt]]
|
||||||
df_jitter = pd.DataFrame(out_val, columns = cols)
|
df_jitter = pd.DataFrame(out_val, columns=cols)
|
||||||
df_jitter['dbm_master_url'] = video_uri
|
df_jitter["dbm_master_url"] = video_uri
|
||||||
|
|
||||||
logger.info('Saving Output file {} '.format(out_loc))
|
if save:
|
||||||
|
logger.info("Saving Output file {} ".format(out_loc))
|
||||||
ut.save_output(df_jitter, out_loc, fl_name, jitter_dir, csv_ext)
|
ut.save_output(df_jitter, out_loc, fl_name, jitter_dir, csv_ext)
|
||||||
|
return df_jitter
|
||||||
|
|
||||||
def segment_pitch(dir_path, r_config):
|
|
||||||
"""
|
|
||||||
segmenting pitch freq for each voice segment
|
|
||||||
"""
|
|
||||||
com_speech_sort, voiced_yes, voiced_no = ([], ) * 3
|
|
||||||
for file in os.listdir(dir_path):
|
|
||||||
try:
|
|
||||||
|
|
||||||
if file.endswith('_pitch.csv'):
|
|
||||||
|
|
||||||
ff_df = pd.read_csv((dir_path+'/'+file))
|
|
||||||
voice_label = ff_df[r_config.aco_voiceLabel]
|
|
||||||
|
|
||||||
indices_yes = [i for i, x in enumerate(voice_label) if x == "yes"]
|
|
||||||
voiced_yes = [list(group) for group in mit.consecutive_groups(indices_yes)]
|
|
||||||
|
|
||||||
indices_no = [i for i, x in enumerate(voice_label) if x == "no"]
|
|
||||||
voiced_no = [list(group) for group in mit.consecutive_groups(indices_no)]
|
|
||||||
|
|
||||||
com_speech = voiced_yes + voiced_no
|
|
||||||
com_speech_sort = sorted(com_speech, key=lambda x: x[0])
|
|
||||||
except:
|
|
||||||
pass
|
|
||||||
|
|
||||||
return com_speech_sort, voiced_yes, voiced_no
|
|
||||||
|
|
||||||
def segment_jitter(com_speech_sort, voiced_yes, voiced_no, jitter_frames, audio_file):
|
def segment_jitter(com_speech_sort, voiced_yes, voiced_no, jitter_frames, audio_file):
|
||||||
"""
|
"""
|
||||||
calculating jitter for each voice segment
|
calculating jitter for each voice segment
|
||||||
"""
|
"""
|
||||||
snd = parselmouth.Sound(audio_file)
|
snd = parselmouth.Sound(audio_file)
|
||||||
pitch = snd.to_pitch(time_step=.001)
|
pitch = snd.to_pitch(time_step=0.001)
|
||||||
|
|
||||||
for idx, vs in enumerate(com_speech_sort):
|
for idx, vs in enumerate(com_speech_sort):
|
||||||
try:
|
try:
|
||||||
|
|
||||||
jitter = np.NaN
|
jitter = np.NaN
|
||||||
if vs in voiced_yes and len(vs)>1:
|
if vs in voiced_yes and len(vs) > 1:
|
||||||
|
|
||||||
start_time = pitch.get_time_from_frame_number(vs[0])
|
start_time = pitch.get_time_from_frame_number(vs[0])
|
||||||
end_time = pitch.get_time_from_frame_number(vs[-1])
|
end_time = pitch.get_time_from_frame_number(vs[-1])
|
||||||
@@ -101,7 +82,10 @@ def segment_jitter(com_speech_sort, voiced_yes, voiced_no, jitter_frames, audio_
|
|||||||
jitter_frames[idx] = jitter
|
jitter_frames[idx] = jitter
|
||||||
return jitter_frames
|
return jitter_frames
|
||||||
|
|
||||||
def calc_jitter(video_uri, audio_file, out_loc, fl_name, r_config):
|
|
||||||
|
def calc_jitter(
|
||||||
|
video_uri, audio_file, out_loc, fl_name, r_config, save=True, ff_df=None
|
||||||
|
):
|
||||||
"""
|
"""
|
||||||
Preparing jitter matrix
|
Preparing jitter matrix
|
||||||
Args:
|
Args:
|
||||||
@@ -110,26 +94,36 @@ def calc_jitter(video_uri, audio_file, out_loc, fl_name, r_config):
|
|||||||
r_config: config.config_raw_feature.pyConfigFeatureNmReader object
|
r_config: config.config_raw_feature.pyConfigFeatureNmReader object
|
||||||
"""
|
"""
|
||||||
dir_path = os.path.join(out_loc, ff_dir)
|
dir_path = os.path.join(out_loc, ff_dir)
|
||||||
if os.path.isdir(dir_path):
|
if os.path.isdir(dir_path) or ff_df is not None:
|
||||||
voice_seg = segment_pitch(dir_path, r_config)
|
|
||||||
|
if ff_df is not None:
|
||||||
|
voice_seg = ut.process_segment_pitch(ff_df, r_config)
|
||||||
|
else:
|
||||||
|
voice_seg = ut.segment_pitch(dir_path, r_config, ff_df=ff_df)
|
||||||
|
|
||||||
jitter_frames = [np.NaN] * len(voice_seg[0])
|
jitter_frames = [np.NaN] * len(voice_seg[0])
|
||||||
jitter_segment_frames = segment_jitter(voice_seg[0], voice_seg[1], voice_seg[2], jitter_frames, audio_file)
|
jitter_segment_frames = segment_jitter(
|
||||||
|
voice_seg[0], voice_seg[1], voice_seg[2], jitter_frames, audio_file
|
||||||
|
)
|
||||||
|
|
||||||
df_jitter = pd.DataFrame(jitter_segment_frames, columns=[r_config.aco_jitter])
|
df_jitter = pd.DataFrame(jitter_segment_frames, columns=[r_config.aco_jitter])
|
||||||
df_jitter[r_config.err_reason] = 'Pass'# will replace with threshold in future release
|
df_jitter[
|
||||||
|
r_config.err_reason
|
||||||
|
] = "Pass" # will replace with threshold in future release
|
||||||
|
|
||||||
df_jitter['Frames'] = df_jitter.index
|
df_jitter["Frames"] = df_jitter.index
|
||||||
df_jitter['dbm_master_url'] = video_uri
|
df_jitter["dbm_master_url"] = video_uri
|
||||||
|
if save:
|
||||||
logger.info('Processing Output file {} '.format(out_loc))
|
logger.info("Processing Output file {} ".format(out_loc))
|
||||||
ut.save_output(df_jitter, out_loc, fl_name, jitter_dir, csv_ext)
|
ut.save_output(df_jitter, out_loc, fl_name, jitter_dir, csv_ext)
|
||||||
|
df = df_jitter
|
||||||
else:
|
else:
|
||||||
error_txt = 'error: fundamental freq not available'
|
error_txt = "error: fundamental freq not available"
|
||||||
empty_jitter(video_uri, out_loc, fl_name, r_config, error_txt)
|
df = empty_jitter(video_uri, out_loc, fl_name, r_config, error_txt, save=save)
|
||||||
|
return df
|
||||||
|
|
||||||
def run_jitter(video_uri, out_dir, r_config):
|
|
||||||
|
def run_jitter(video_uri, out_dir, r_config, save=True, ff_df=None):
|
||||||
"""
|
"""
|
||||||
Processing all patient's videos for fetching jitter
|
Processing all patient's videos for fetching jitter
|
||||||
-------------------
|
-------------------
|
||||||
@@ -141,19 +135,32 @@ def run_jitter(video_uri, out_dir, r_config):
|
|||||||
try:
|
try:
|
||||||
|
|
||||||
input_loc, out_loc, fl_name = ut.filter_path(video_uri, out_dir)
|
input_loc, out_loc, fl_name = ut.filter_path(video_uri, out_dir)
|
||||||
aud_filter = glob.glob(join(input_loc, fl_name + '.wav'))
|
aud_filter = glob.glob(join(input_loc, fl_name + ".wav"))
|
||||||
if len(aud_filter)>0:
|
if len(aud_filter) > 0:
|
||||||
|
|
||||||
audio_file = aud_filter[0]
|
audio_file = aud_filter[0]
|
||||||
aud_dur = librosa.get_duration(filename=audio_file)
|
aud_dur = ut.get_length(audio_file)
|
||||||
|
|
||||||
if float(aud_dur) < 0.064:
|
if float(aud_dur) < 0.064:
|
||||||
logger.info('Output file {} size is less than 0.064sec'.format(audio_file))
|
logger.info(
|
||||||
|
"Output file {} size is less than 0.064sec".format(audio_file)
|
||||||
|
)
|
||||||
|
|
||||||
error_txt = 'error: length less than 0.064'
|
error_txt = "error: length less than 0.064"
|
||||||
empty_jitter(video_uri, out_loc, fl_name, r_config, error_txt)
|
df = empty_jitter(
|
||||||
return
|
video_uri, out_loc, fl_name, r_config, error_txt, save=save
|
||||||
|
)
|
||||||
calc_jitter(video_uri, audio_file, out_loc, fl_name, r_config)
|
else:
|
||||||
|
df = calc_jitter(
|
||||||
|
video_uri,
|
||||||
|
audio_file,
|
||||||
|
out_loc,
|
||||||
|
fl_name,
|
||||||
|
r_config,
|
||||||
|
save=save,
|
||||||
|
ff_df=ff_df,
|
||||||
|
)
|
||||||
|
return df
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error('Failed to process audio file')
|
logger.error("Error in jitter: {}".format(e))
|
||||||
|
logger.error("Failed to process audio file")
|
||||||
|
|||||||
@@ -4,41 +4,74 @@ project_name: DBM
|
|||||||
created: 2020-20-07
|
created: 2020-20-07
|
||||||
"""
|
"""
|
||||||
|
|
||||||
import pandas as pd
|
|
||||||
import os
|
|
||||||
import glob
|
import glob
|
||||||
import parselmouth
|
|
||||||
import librosa
|
|
||||||
import numpy as np
|
|
||||||
import librosa
|
|
||||||
from os.path import join
|
|
||||||
import logging
|
import logging
|
||||||
|
import os
|
||||||
|
from os.path import join
|
||||||
|
|
||||||
|
import numpy as np
|
||||||
|
import pandas as pd
|
||||||
|
import parselmouth
|
||||||
|
|
||||||
from opendbm.dbm_lib.dbm_features.raw_features.util import util as ut
|
from opendbm.dbm_lib.dbm_features.raw_features.util import util as ut
|
||||||
|
|
||||||
logging.basicConfig(level=logging.INFO)
|
logging.basicConfig(level=logging.INFO)
|
||||||
logger=logging.getLogger()
|
logger = logging.getLogger()
|
||||||
|
|
||||||
mfcc_dir = 'acoustic/mfcc'
|
mfcc_dir = "acoustic/mfcc"
|
||||||
csv_ext = '_mfcc.csv'
|
csv_ext = "_mfcc.csv"
|
||||||
error_txt = 'error: length less than 0.064'
|
error_txt = "error: length less than 0.064"
|
||||||
|
|
||||||
def empty_mfcc(video_uri, out_loc, fl_name, r_config):
|
|
||||||
|
def empty_mfcc(video_uri, out_loc, fl_name, r_config, save=True):
|
||||||
|
|
||||||
"""
|
"""
|
||||||
Preparing empty empty_mfcc matrix if something fails
|
Preparing empty empty_mfcc matrix if something fails
|
||||||
"""
|
"""
|
||||||
cols = ['Frames', r_config.aco_mfcc1, r_config.aco_mfcc2, r_config.aco_mfcc3, r_config.aco_mfcc4, r_config.aco_mfcc5,
|
cols = [
|
||||||
r_config.aco_mfcc6, r_config.aco_mfcc7, r_config.aco_mfcc8, r_config.aco_mfcc9, r_config.aco_mfcc10,
|
"Frames",
|
||||||
r_config.aco_mfcc11, r_config.aco_mfcc12, r_config.err_reason]
|
r_config.aco_mfcc1,
|
||||||
out_val = [[np.nan, np.nan, np.nan, np.nan, np.nan, np.nan, np.nan, np.nan, np.nan, np.nan, np.nan, np.nan, np.nan,
|
r_config.aco_mfcc2,
|
||||||
error_txt]]
|
r_config.aco_mfcc3,
|
||||||
df_mfcc = pd.DataFrame(out_val, columns = cols)
|
r_config.aco_mfcc4,
|
||||||
df_mfcc['dbm_master_url'] = video_uri
|
r_config.aco_mfcc5,
|
||||||
|
r_config.aco_mfcc6,
|
||||||
|
r_config.aco_mfcc7,
|
||||||
|
r_config.aco_mfcc8,
|
||||||
|
r_config.aco_mfcc9,
|
||||||
|
r_config.aco_mfcc10,
|
||||||
|
r_config.aco_mfcc11,
|
||||||
|
r_config.aco_mfcc12,
|
||||||
|
r_config.err_reason,
|
||||||
|
]
|
||||||
|
out_val = [
|
||||||
|
[
|
||||||
|
np.nan,
|
||||||
|
np.nan,
|
||||||
|
np.nan,
|
||||||
|
np.nan,
|
||||||
|
np.nan,
|
||||||
|
np.nan,
|
||||||
|
np.nan,
|
||||||
|
np.nan,
|
||||||
|
np.nan,
|
||||||
|
np.nan,
|
||||||
|
np.nan,
|
||||||
|
np.nan,
|
||||||
|
np.nan,
|
||||||
|
error_txt,
|
||||||
|
]
|
||||||
|
]
|
||||||
|
df_mfcc = pd.DataFrame(out_val, columns=cols)
|
||||||
|
df_mfcc["dbm_master_url"] = video_uri
|
||||||
|
|
||||||
logger.info('Saving Output file {} '.format(out_loc))
|
if save:
|
||||||
|
logger.info("Saving Output file {} ".format(out_loc))
|
||||||
ut.save_output(df_mfcc, out_loc, fl_name, mfcc_dir, csv_ext)
|
ut.save_output(df_mfcc, out_loc, fl_name, mfcc_dir, csv_ext)
|
||||||
|
|
||||||
|
return df_mfcc
|
||||||
|
|
||||||
|
|
||||||
def audio_mfcc(path):
|
def audio_mfcc(path):
|
||||||
"""
|
"""
|
||||||
Using parselmouth library fetching mfccs
|
Using parselmouth library fetching mfccs
|
||||||
@@ -48,12 +81,13 @@ def audio_mfcc(path):
|
|||||||
(list) list of mfccs for each voice frame
|
(list) list of mfccs for each voice frame
|
||||||
"""
|
"""
|
||||||
sound = parselmouth.Sound(path)
|
sound = parselmouth.Sound(path)
|
||||||
mfcc_object = sound.to_mfcc(time_step=.001,number_of_coefficients=12)
|
mfcc_object = sound.to_mfcc(time_step=0.001, number_of_coefficients=12)
|
||||||
mfccs = mfcc_object.to_array()
|
mfccs = mfcc_object.to_array()
|
||||||
mfccs = np.delete(mfccs, (0), axis=0)
|
mfccs = np.delete(mfccs, (0), axis=0)
|
||||||
return mfccs
|
return mfccs
|
||||||
|
|
||||||
def calc_mfcc(video_uri, audio_file, out_loc, fl_name, r_config):
|
|
||||||
|
def calc_mfcc(video_uri, audio_file, out_loc, fl_name, r_config, save=True):
|
||||||
"""
|
"""
|
||||||
Preparing mfcc matrix
|
Preparing mfcc matrix
|
||||||
Args:
|
Args:
|
||||||
@@ -65,19 +99,23 @@ def calc_mfcc(video_uri, audio_file, out_loc, fl_name, r_config):
|
|||||||
dict_ = {}
|
dict_ = {}
|
||||||
mfccs = audio_mfcc(audio_file)
|
mfccs = audio_mfcc(audio_file)
|
||||||
|
|
||||||
for i in range(1,13):
|
for i in range(1, 13):
|
||||||
conf_str = r_config.base_raw['raw_feature']
|
conf_str = r_config.base_raw["raw_feature"]
|
||||||
dict_[conf_str['aco_mfcc' + str(i)]] = mfccs[i-1, :]
|
dict_[conf_str["aco_mfcc" + str(i)]] = mfccs[i - 1, :]
|
||||||
|
|
||||||
df = pd.DataFrame(dict_)
|
df = pd.DataFrame(dict_)
|
||||||
df['Frames'] = df.index
|
df["Frames"] = df.index
|
||||||
|
|
||||||
df[r_config.err_reason] = 'Pass'# may replace based on threshold in future release
|
df[r_config.err_reason] = "Pass" # may replace based on threshold in future release
|
||||||
df['dbm_master_url'] = video_uri
|
df["dbm_master_url"] = video_uri
|
||||||
|
|
||||||
|
if save:
|
||||||
|
logger.info("Saving Output file {} ".format(out_loc))
|
||||||
ut.save_output(df, out_loc, fl_name, mfcc_dir, csv_ext)
|
ut.save_output(df, out_loc, fl_name, mfcc_dir, csv_ext)
|
||||||
|
return df
|
||||||
|
|
||||||
def run_mfcc(video_uri, out_dir, r_config):
|
|
||||||
|
def run_mfcc(video_uri, out_dir, r_config, save=True):
|
||||||
"""
|
"""
|
||||||
Processing all patients to fetch mfccs
|
Processing all patients to fetch mfccs
|
||||||
|
|
||||||
@@ -88,18 +126,22 @@ def run_mfcc(video_uri, out_dir, r_config):
|
|||||||
try:
|
try:
|
||||||
|
|
||||||
input_loc, out_loc, fl_name = ut.filter_path(video_uri, out_dir)
|
input_loc, out_loc, fl_name = ut.filter_path(video_uri, out_dir)
|
||||||
aud_filter = glob.glob(join(input_loc, fl_name + '.wav'))
|
aud_filter = glob.glob(join(input_loc, fl_name + ".wav"))
|
||||||
if len(aud_filter)>0:
|
if len(aud_filter) > 0:
|
||||||
|
|
||||||
audio_file = aud_filter[0]
|
audio_file = aud_filter[0]
|
||||||
aud_dur = librosa.get_duration(filename=audio_file)
|
aud_dur = ut.get_length(audio_file)
|
||||||
|
|
||||||
if float(aud_dur) < 0.064:
|
if float(aud_dur) < 0.064:
|
||||||
logger.info('Output file {} size is less than 0.064sec'.format(audio_file))
|
logger.info(
|
||||||
|
"Output file {} size is less than 0.064sec".format(audio_file)
|
||||||
|
)
|
||||||
|
|
||||||
empty_mfcc(video_uri, out_loc, fl_name, r_config)
|
return empty_mfcc(video_uri, out_loc, fl_name, r_config, save=save)
|
||||||
return
|
|
||||||
|
|
||||||
calc_mfcc(video_uri, audio_file, out_loc, fl_name, r_config)
|
return calc_mfcc(
|
||||||
|
video_uri, audio_file, out_loc, fl_name, r_config, save=save
|
||||||
|
)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error('Failed to process audio file')
|
e
|
||||||
|
logger.error("Failed to process audio file")
|
||||||
|
|||||||
@@ -4,23 +4,25 @@ project_name: DBM
|
|||||||
created: 2020-20-07
|
created: 2020-20-07
|
||||||
"""
|
"""
|
||||||
|
|
||||||
import os
|
|
||||||
import glob
|
import glob
|
||||||
from pydub import AudioSegment
|
|
||||||
import librosa
|
|
||||||
import pandas as pd
|
|
||||||
import numpy as np
|
|
||||||
import webrtcvad
|
|
||||||
from os.path import join
|
|
||||||
import logging
|
import logging
|
||||||
|
import os
|
||||||
|
from os.path import join
|
||||||
|
|
||||||
from opendbm.dbm_lib.dbm_features.raw_features.util import vad_utilities as vu, util as ut
|
import numpy as np
|
||||||
|
import pandas as pd
|
||||||
|
import webrtcvad
|
||||||
|
from pydub import AudioSegment
|
||||||
|
|
||||||
|
from opendbm.dbm_lib.dbm_features.raw_features.util import util as ut
|
||||||
|
from opendbm.dbm_lib.dbm_features.raw_features.util import vad_utilities as vu
|
||||||
|
|
||||||
logging.basicConfig(level=logging.INFO)
|
logging.basicConfig(level=logging.INFO)
|
||||||
logger=logging.getLogger()
|
logger = logging.getLogger()
|
||||||
|
|
||||||
|
pause_seg_dir = "acoustic/pause_segment"
|
||||||
|
csv_ext = "_pausechar.csv"
|
||||||
|
|
||||||
pause_seg_dir = 'acoustic/pause_segment'
|
|
||||||
csv_ext = '_pausechar.csv'
|
|
||||||
|
|
||||||
def get_timing_cues(seg_starts_sec, seg_ends_sec, r_config):
|
def get_timing_cues(seg_starts_sec, seg_ends_sec, r_config):
|
||||||
"""
|
"""
|
||||||
@@ -37,23 +39,25 @@ def get_timing_cues(seg_starts_sec, seg_ends_sec, r_config):
|
|||||||
pause_len = np.zeros(num_pauses)
|
pause_len = np.zeros(num_pauses)
|
||||||
|
|
||||||
for p in range(num_pauses):
|
for p in range(num_pauses):
|
||||||
pause_len[p] = seg_starts_sec[p+1] - seg_ends_sec[p]
|
pause_len[p] = seg_starts_sec[p + 1] - seg_ends_sec[p]
|
||||||
|
|
||||||
if len(pause_len)>0:
|
if len(pause_len) > 0:
|
||||||
pause_len_mean = np.mean(pause_len)
|
|
||||||
pause_len_std = np.std(pause_len)
|
|
||||||
pause_time = np.sum(pause_len)
|
pause_time = np.sum(pause_len)
|
||||||
|
|
||||||
else:
|
else:
|
||||||
pause_len_mean = 0
|
|
||||||
pause_len_std = 0
|
|
||||||
pause_time = 0
|
pause_time = 0
|
||||||
|
|
||||||
pause_frac = pause_time / total_time
|
pause_frac = pause_time / total_time
|
||||||
timing_dict = {r_config.aco_totaltime: total_time, r_config.aco_speakingtime: speaking_time,
|
timing_dict = {
|
||||||
r_config.aco_numpauses: num_pauses, r_config.aco_pausetime: pause_time, r_config.aco_pausefrac: pause_frac}
|
r_config.aco_totaltime: total_time,
|
||||||
|
r_config.aco_speakingtime: speaking_time,
|
||||||
|
r_config.aco_numpauses: num_pauses,
|
||||||
|
r_config.aco_pausetime: pause_time,
|
||||||
|
r_config.aco_pausefrac: pause_frac,
|
||||||
|
}
|
||||||
return timing_dict
|
return timing_dict
|
||||||
|
|
||||||
|
|
||||||
def process_silence(audio_file, r_config):
|
def process_silence(audio_file, r_config):
|
||||||
"""
|
"""
|
||||||
Returns dataframe for pause between words using voice activity detection
|
Returns dataframe for pause between words using voice activity detection
|
||||||
@@ -69,59 +73,75 @@ def process_silence(audio_file, r_config):
|
|||||||
aggressiveness = 3
|
aggressiveness = 3
|
||||||
frame_dur_ms = 20
|
frame_dur_ms = 20
|
||||||
|
|
||||||
#pause segment(long & short pad)
|
# pause segment(long & short pad)
|
||||||
long_pad_around_voice_ms = 200
|
long_pad_around_voice_ms = 200
|
||||||
short_pad_around_voice_ms = 100
|
short_pad_around_voice_ms = 100
|
||||||
|
|
||||||
if len(y)>0:
|
if len(y) > 0:
|
||||||
vad = webrtcvad.Vad(aggressiveness)
|
vad = webrtcvad.Vad(aggressiveness)
|
||||||
|
|
||||||
frames = vu.frame_generator(frame_dur_ms, y, sr)
|
frames = vu.frame_generator(frame_dur_ms, y, sr)
|
||||||
frames = list(frames)
|
frames = list(frames)
|
||||||
|
|
||||||
#longer pad time screens out little blips, but misses short silences
|
# longer pad time screens out little blips, but misses short silences
|
||||||
long_seg_starts, long_seg_ends = vu.vad_get_segment_times(sr, frame_dur_ms, long_pad_around_voice_ms, vad, frames)
|
long_seg_starts, long_seg_ends = vu.vad_get_segment_times(
|
||||||
|
sr, frame_dur_ms, long_pad_around_voice_ms, vad, frames
|
||||||
|
)
|
||||||
|
|
||||||
#Logic to handle blank audio file
|
# Logic to handle blank audio file
|
||||||
if len(long_seg_starts) == 0 or len(long_seg_ends) == 0:
|
if len(long_seg_starts) == 0 or len(long_seg_ends) == 0:
|
||||||
return ''
|
return ""
|
||||||
|
|
||||||
t_start = long_seg_starts[0]
|
t_start = long_seg_starts[0]
|
||||||
t_end = long_seg_ends[-1]
|
t_end = long_seg_ends[-1]
|
||||||
# shorter pad time captures short silences (but misfires on little blips)
|
# shorter pad time captures short silences (but misfires on little blips)
|
||||||
short_seg_starts, short_seg_ends = vu.vad_get_segment_times(sr, frame_dur_ms, short_pad_around_voice_ms, vad, frames)
|
short_seg_starts, short_seg_ends = vu.vad_get_segment_times(
|
||||||
|
sr, frame_dur_ms, short_pad_around_voice_ms, vad, frames
|
||||||
|
)
|
||||||
|
|
||||||
seg_starts = []
|
seg_starts = []
|
||||||
seg_ends = []
|
seg_ends = []
|
||||||
for k in range(len(short_seg_starts)): # logic to clean up some typical misfires
|
for k in range(
|
||||||
if (short_seg_starts[k] >=t_start) and (short_seg_starts[k] <= t_end):
|
len(short_seg_starts)
|
||||||
|
): # logic to clean up some typical misfires
|
||||||
|
if (short_seg_starts[k] >= t_start) and (short_seg_starts[k] <= t_end):
|
||||||
|
|
||||||
seg_starts.append(short_seg_starts[k])
|
seg_starts.append(short_seg_starts[k])
|
||||||
seg_ends.append(short_seg_ends[k])
|
seg_ends.append(short_seg_ends[k])
|
||||||
if len(seg_starts) == 0 or len(seg_ends) == 0:
|
if len(seg_starts) == 0 or len(seg_ends) == 0:
|
||||||
return ''
|
return ""
|
||||||
|
|
||||||
timing_dict = get_timing_cues(seg_starts, seg_ends, r_config)
|
timing_dict = get_timing_cues(seg_starts, seg_ends, r_config)
|
||||||
feat_dict_list.append(timing_dict)
|
feat_dict_list.append(timing_dict)
|
||||||
|
|
||||||
df = pd.DataFrame(feat_dict_list)
|
df = pd.DataFrame(feat_dict_list)
|
||||||
df[r_config.err_reason] = 'Pass'# will replace with threshold in future release
|
df[r_config.err_reason] = "Pass" # will replace with threshold in future release
|
||||||
return df
|
return df
|
||||||
|
|
||||||
def empty_pause_segment(video_uri, out_loc, fl_name, r_config, error_txt):
|
|
||||||
|
def empty_pause_segment(video_uri, out_loc, fl_name, r_config, error_txt, save=True):
|
||||||
"""
|
"""
|
||||||
Preparing empty Pause Segment matrix if something fails
|
Preparing empty Pause Segment matrix if something fails
|
||||||
"""
|
"""
|
||||||
cols = [r_config.aco_totaltime, r_config.aco_speakingtime, r_config.aco_numpauses, r_config.aco_pausetime,
|
cols = [
|
||||||
r_config.aco_pausefrac, r_config.err_reason]
|
r_config.aco_totaltime,
|
||||||
|
r_config.aco_speakingtime,
|
||||||
|
r_config.aco_numpauses,
|
||||||
|
r_config.aco_pausetime,
|
||||||
|
r_config.aco_pausefrac,
|
||||||
|
r_config.err_reason,
|
||||||
|
]
|
||||||
out_val = [[np.nan, np.nan, np.nan, np.nan, np.nan, error_txt]]
|
out_val = [[np.nan, np.nan, np.nan, np.nan, np.nan, error_txt]]
|
||||||
df_pause = pd.DataFrame(out_val, columns = cols)
|
df_pause = pd.DataFrame(out_val, columns=cols)
|
||||||
df_pause['dbm_master_url'] = video_uri
|
df_pause["dbm_master_url"] = video_uri
|
||||||
|
|
||||||
logger.info('Saving Output file {} '.format(out_loc))
|
if save:
|
||||||
|
logger.info("Saving Output file {} ".format(out_loc))
|
||||||
ut.save_output(df_pause, out_loc, fl_name, pause_seg_dir, csv_ext)
|
ut.save_output(df_pause, out_loc, fl_name, pause_seg_dir, csv_ext)
|
||||||
|
return df_pause
|
||||||
|
|
||||||
def run_pause_segment(video_uri, out_dir, r_config):
|
|
||||||
|
def run_pause_segment(video_uri, out_dir, r_config, save=True):
|
||||||
"""
|
"""
|
||||||
Processing all patient's for getting Pause Segment
|
Processing all patient's for getting Pause Segment
|
||||||
---------------
|
---------------
|
||||||
@@ -133,39 +153,48 @@ def run_pause_segment(video_uri, out_dir, r_config):
|
|||||||
try:
|
try:
|
||||||
|
|
||||||
input_loc, out_loc, fl_name = ut.filter_path(video_uri, out_dir)
|
input_loc, out_loc, fl_name = ut.filter_path(video_uri, out_dir)
|
||||||
aud_filter = glob.glob(join(input_loc, fl_name + '.wav'))
|
aud_filter = glob.glob(join(input_loc, fl_name + ".wav"))
|
||||||
if len(aud_filter)>0:
|
if len(aud_filter) > 0:
|
||||||
|
|
||||||
audio_file = aud_filter[0]
|
audio_file = aud_filter[0]
|
||||||
aud_dur = librosa.get_duration(filename=audio_file)
|
aud_dur = ut.get_length(audio_file)
|
||||||
|
|
||||||
if float(aud_dur) < 0.064:
|
if float(aud_dur) < 0.064:
|
||||||
logger.info('Output file {} size is less than 0.064sec'.format(audio_file))
|
logger.info(
|
||||||
|
"Output file {} size is less than 0.064sec".format(audio_file)
|
||||||
|
)
|
||||||
|
|
||||||
error_txt = 'error: length less than 0.064'
|
error_txt = "error: length less than 0.064"
|
||||||
empty_pause_segment(video_uri, out_loc, fl_name, r_config, error_txt)
|
empty_pause_segment(video_uri, out_loc, fl_name, r_config, error_txt)
|
||||||
return
|
return
|
||||||
|
|
||||||
logger.info('Converting stereo sound to mono-lD')
|
logger.info("Converting stereo sound to mono-lD")
|
||||||
sound_mono = AudioSegment.from_wav(audio_file)
|
sound_mono = AudioSegment.from_wav(audio_file)
|
||||||
sound_mono = sound_mono.set_channels(1)
|
sound_mono = sound_mono.set_channels(1)
|
||||||
sound_mono = sound_mono.set_frame_rate(48000)
|
sound_mono = sound_mono.set_frame_rate(48000)
|
||||||
|
|
||||||
mono_wav = os.path.join(input_loc, fl_name + '_mono.wav')
|
mono_wav = os.path.join(input_loc, fl_name + "_mono.wav")
|
||||||
sound_mono.export(mono_wav, format="wav")
|
sound_mono.export(mono_wav, format="wav")
|
||||||
|
|
||||||
df_pause_seg = process_silence(mono_wav, r_config)
|
df_pause_seg = process_silence(mono_wav, r_config)
|
||||||
os.remove(mono_wav)#removing mono wav file
|
os.remove(mono_wav) # removing mono wav file
|
||||||
|
|
||||||
if isinstance(df_pause_seg, pd.DataFrame) and len(df_pause_seg)>0:
|
if isinstance(df_pause_seg, pd.DataFrame) and len(df_pause_seg) > 0:
|
||||||
logger.info('Processing Output file {} '.format(out_loc))
|
df_pause_seg["dbm_master_url"] = video_uri
|
||||||
|
if save:
|
||||||
df_pause_seg['dbm_master_url'] = video_uri
|
logger.info("Processing Output file {} ".format(out_loc))
|
||||||
ut.save_output(df_pause_seg, out_loc, fl_name, pause_seg_dir, csv_ext)
|
ut.save_output(
|
||||||
|
df_pause_seg, out_loc, fl_name, pause_seg_dir, csv_ext
|
||||||
|
)
|
||||||
|
df = df_pause_seg
|
||||||
|
|
||||||
else:
|
else:
|
||||||
error_txt = 'error: webrtcvad returns no segment'
|
error_txt = "error: webrtcvad returns no segment"
|
||||||
empty_pause_segment(video_uri, out_loc, fl_name, r_config, error_txt)
|
df = empty_pause_segment(
|
||||||
|
video_uri, out_loc, fl_name, r_config, error_txt, save=save
|
||||||
|
)
|
||||||
|
return df
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error('Failed to process audio file')
|
e
|
||||||
|
logger.error("Failed to process audio file", str(e))
|
||||||
|
|||||||
@@ -4,23 +4,24 @@ project_name: DBM
|
|||||||
created: 2020-20-07
|
created: 2020-20-07
|
||||||
"""
|
"""
|
||||||
|
|
||||||
import pandas as pd
|
|
||||||
import os
|
|
||||||
import glob
|
import glob
|
||||||
import parselmouth
|
|
||||||
import librosa
|
|
||||||
import numpy as np
|
|
||||||
from os.path import join
|
|
||||||
import logging
|
import logging
|
||||||
|
import os
|
||||||
|
from os.path import join
|
||||||
|
|
||||||
|
import numpy as np
|
||||||
|
import pandas as pd
|
||||||
|
import parselmouth
|
||||||
|
|
||||||
from opendbm.dbm_lib.dbm_features.raw_features.util import util as ut
|
from opendbm.dbm_lib.dbm_features.raw_features.util import util as ut
|
||||||
|
|
||||||
logging.basicConfig(level=logging.INFO)
|
logging.basicConfig(level=logging.INFO)
|
||||||
logger=logging.getLogger()
|
logger = logging.getLogger()
|
||||||
|
|
||||||
|
ff_dir = "acoustic/pitch"
|
||||||
|
csv_ext = "_pitch.csv"
|
||||||
|
error_txt = "error: length less than 0.064"
|
||||||
|
|
||||||
ff_dir = 'acoustic/pitch'
|
|
||||||
csv_ext = '_pitch.csv'
|
|
||||||
error_txt = 'error: length less than 0.064'
|
|
||||||
|
|
||||||
def audio_pitch(path):
|
def audio_pitch(path):
|
||||||
"""
|
"""
|
||||||
@@ -31,12 +32,13 @@ def audio_pitch(path):
|
|||||||
(list) list of pitch/fundamental frequency for each voice frame
|
(list) list of pitch/fundamental frequency for each voice frame
|
||||||
"""
|
"""
|
||||||
sound_pat = parselmouth.Sound(path)
|
sound_pat = parselmouth.Sound(path)
|
||||||
pitch = sound_pat.to_pitch(time_step=.001)
|
pitch = sound_pat.to_pitch(time_step=0.001)
|
||||||
pitch_values = pitch.selected_array['frequency']
|
pitch_values = pitch.selected_array["frequency"]
|
||||||
|
|
||||||
return list(pitch_values)
|
return list(pitch_values)
|
||||||
|
|
||||||
def label_speech(row,fd_freq):
|
|
||||||
|
def label_speech(row, fd_freq):
|
||||||
"""
|
"""
|
||||||
identify whether frame is voiced or not
|
identify whether frame is voiced or not
|
||||||
Args:
|
Args:
|
||||||
@@ -44,12 +46,13 @@ def label_speech(row,fd_freq):
|
|||||||
Returns:
|
Returns:
|
||||||
(str) yes or no indicator for voice
|
(str) yes or no indicator for voice
|
||||||
"""
|
"""
|
||||||
if row[fd_freq] > 0 :
|
if row[fd_freq] > 0:
|
||||||
return 'yes'
|
return "yes"
|
||||||
else:
|
else:
|
||||||
return 'no'
|
return "no"
|
||||||
|
|
||||||
def calc_pitch(video_uri, audio_file, out_loc, fl_name, r_config):
|
|
||||||
|
def calc_pitch(video_uri, audio_file, out_loc, fl_name, r_config, save=True):
|
||||||
|
|
||||||
"""
|
"""
|
||||||
Preparing pitch frequency matrix
|
Preparing pitch frequency matrix
|
||||||
@@ -62,28 +65,45 @@ def calc_pitch(video_uri, audio_file, out_loc, fl_name, r_config):
|
|||||||
ff_frames = audio_pitch(audio_file)
|
ff_frames = audio_pitch(audio_file)
|
||||||
df_ffreq = pd.DataFrame(ff_frames, columns=[r_config.aco_ff])
|
df_ffreq = pd.DataFrame(ff_frames, columns=[r_config.aco_ff])
|
||||||
|
|
||||||
df_ffreq['Frames'] = df_ffreq.index
|
df_ffreq["Frames"] = df_ffreq.index
|
||||||
df_ffreq[r_config.aco_voiceLabel] = df_ffreq.apply(lambda row: label_speech(row, r_config.aco_ff),axis=1)
|
df_ffreq[r_config.aco_voiceLabel] = df_ffreq.apply(
|
||||||
|
lambda row: label_speech(row, r_config.aco_ff), axis=1
|
||||||
|
)
|
||||||
|
|
||||||
df_ffreq[r_config.err_reason] = 'Pass'# will replace with threshold in future release
|
df_ffreq[
|
||||||
df_ffreq['dbm_master_url'] = video_uri
|
r_config.err_reason
|
||||||
|
] = "Pass" # will replace with threshold in future release
|
||||||
|
df_ffreq["dbm_master_url"] = video_uri
|
||||||
|
|
||||||
logger.info('Processing Output file {} '.format(out_loc))
|
if save:
|
||||||
|
logger.info("Processing Output file {} ".format(out_loc))
|
||||||
ut.save_output(df_ffreq, out_loc, fl_name, ff_dir, csv_ext)
|
ut.save_output(df_ffreq, out_loc, fl_name, ff_dir, csv_ext)
|
||||||
|
return df_ffreq
|
||||||
|
|
||||||
def empty_pitch(video_uri, out_loc, fl_name, r_config):
|
|
||||||
|
def empty_pitch(video_uri, out_loc, fl_name, r_config, save=True):
|
||||||
"""
|
"""
|
||||||
Preparing empty pitch frequency matrix if something fails
|
Preparing empty pitch frequency matrix if something fails
|
||||||
"""
|
"""
|
||||||
|
|
||||||
df_ffreq = pd.DataFrame([[np.nan, np.nan, 'no', error_txt]],
|
df_ffreq = pd.DataFrame(
|
||||||
columns=['Frames', r_config.aco_ff, r_config.aco_voiceLabel, r_config.err_reason])
|
[[np.nan, np.nan, "no", error_txt]],
|
||||||
df_ffreq['dbm_master_url'] = video_uri
|
columns=[
|
||||||
|
"Frames",
|
||||||
|
r_config.aco_ff,
|
||||||
|
r_config.aco_voiceLabel,
|
||||||
|
r_config.err_reason,
|
||||||
|
],
|
||||||
|
)
|
||||||
|
df_ffreq["dbm_master_url"] = video_uri
|
||||||
|
|
||||||
logger.info('Saving Output file {} '.format(out_loc))
|
if save:
|
||||||
|
logger.info("Saving Output file {} ".format(out_loc))
|
||||||
ut.save_output(df_ffreq, out_loc, fl_name, ff_dir, csv_ext)
|
ut.save_output(df_ffreq, out_loc, fl_name, ff_dir, csv_ext)
|
||||||
|
return df_ffreq
|
||||||
|
|
||||||
def run_pitch(video_uri, out_dir, r_config):
|
|
||||||
|
def run_pitch(video_uri, out_dir, r_config, save=True):
|
||||||
|
|
||||||
"""
|
"""
|
||||||
Processing audio for fetching pitch
|
Processing audio for fetching pitch
|
||||||
@@ -96,18 +116,24 @@ def run_pitch(video_uri, out_dir, r_config):
|
|||||||
try:
|
try:
|
||||||
|
|
||||||
input_loc, out_loc, fl_name = ut.filter_path(video_uri, out_dir)
|
input_loc, out_loc, fl_name = ut.filter_path(video_uri, out_dir)
|
||||||
aud_filter = glob.glob(join(input_loc, fl_name + '.wav'))
|
aud_filter = glob.glob(join(input_loc, fl_name + ".wav"))
|
||||||
if len(aud_filter)>0:
|
if len(aud_filter) > 0:
|
||||||
|
|
||||||
audio_file = aud_filter[0]
|
audio_file = aud_filter[0]
|
||||||
aud_dur = librosa.get_duration(filename=audio_file)
|
aud_dur = ut.get_length(audio_file)
|
||||||
|
|
||||||
if float(aud_dur) < 0.064:
|
if float(aud_dur) < 0.064:
|
||||||
logger.info('Output file {} size is less than 0.064sec'.format(audio_file))
|
logger.info(
|
||||||
|
"Output file {} size is less than 0.064sec".format(audio_file)
|
||||||
|
)
|
||||||
|
|
||||||
empty_pitch(video_uri, out_loc, fl_name, r_config)
|
df = empty_pitch(video_uri, out_loc, fl_name, r_config, save=save)
|
||||||
return
|
else:
|
||||||
|
df = calc_pitch(
|
||||||
|
video_uri, audio_file, out_loc, fl_name, r_config, save=save
|
||||||
|
)
|
||||||
|
return df
|
||||||
|
|
||||||
calc_pitch(video_uri, audio_file, out_loc, fl_name, r_config)
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error('Failed to process audio file')
|
e
|
||||||
|
logger.error("Failed to process audio file")
|
||||||
|
|||||||
@@ -4,26 +4,25 @@ project_name: DBM
|
|||||||
created: 2020-20-07
|
created: 2020-20-07
|
||||||
"""
|
"""
|
||||||
|
|
||||||
import pandas as pd
|
|
||||||
import numpy as np
|
|
||||||
import os
|
|
||||||
import glob
|
import glob
|
||||||
import parselmouth
|
import logging
|
||||||
import librosa
|
import os
|
||||||
import numpy as np
|
|
||||||
import more_itertools as mit
|
|
||||||
from os.path import join
|
from os.path import join
|
||||||
|
|
||||||
import logging
|
import more_itertools as mit
|
||||||
|
import numpy as np
|
||||||
|
import pandas as pd
|
||||||
|
import parselmouth
|
||||||
|
|
||||||
from opendbm.dbm_lib.dbm_features.raw_features.util import util as ut
|
from opendbm.dbm_lib.dbm_features.raw_features.util import util as ut
|
||||||
|
|
||||||
logging.basicConfig(level=logging.INFO)
|
logging.basicConfig(level=logging.INFO)
|
||||||
logger=logging.getLogger()
|
logger = logging.getLogger()
|
||||||
|
|
||||||
|
shimmer_dir = "acoustic/shimmer"
|
||||||
|
ff_dir = "acoustic/pitch"
|
||||||
|
csv_ext = "_shimmer.csv"
|
||||||
|
|
||||||
shimmer_dir = 'acoustic/shimmer'
|
|
||||||
ff_dir = 'acoustic/pitch'
|
|
||||||
csv_ext = '_shimmer.csv'
|
|
||||||
|
|
||||||
def audio_shimmer(sound):
|
def audio_shimmer(sound):
|
||||||
"""
|
"""
|
||||||
@@ -33,60 +32,42 @@ def audio_shimmer(sound):
|
|||||||
Returns:
|
Returns:
|
||||||
(list) list of shimmers for each voice frame
|
(list) list of shimmers for each voice frame
|
||||||
"""
|
"""
|
||||||
pointProcess = parselmouth.praat.call(sound, "To PointProcess (periodic, cc)...", 80, 500)
|
pointProcess = parselmouth.praat.call(
|
||||||
shimmer = parselmouth.praat.call([sound, pointProcess], "Get shimmer (local)", 0, 0, 0.0001, 0.02, 1.3, 1.6)
|
sound, "To PointProcess (periodic, cc)...", 80, 500
|
||||||
|
)
|
||||||
|
shimmer = parselmouth.praat.call(
|
||||||
|
[sound, pointProcess], "Get shimmer (local)", 0, 0, 0.0001, 0.02, 1.3, 1.6
|
||||||
|
)
|
||||||
return shimmer
|
return shimmer
|
||||||
|
|
||||||
def empty_shimmer(video_uri, out_loc, fl_name, r_config, error_txt):
|
|
||||||
|
def empty_shimmer(video_uri, out_loc, fl_name, r_config, error_txt, save=True):
|
||||||
"""
|
"""
|
||||||
Preparing empty shimmer matrix if something fails
|
Preparing empty shimmer matrix if something fails
|
||||||
"""
|
"""
|
||||||
cols = ['Frames', r_config.aco_shimmer, r_config.err_reason]
|
cols = ["Frames", r_config.aco_shimmer, r_config.err_reason]
|
||||||
out_val = [[np.nan, np.nan, error_txt]]
|
out_val = [[np.nan, np.nan, error_txt]]
|
||||||
df_shimmer = pd.DataFrame(out_val, columns = cols)
|
df_shimmer = pd.DataFrame(out_val, columns=cols)
|
||||||
df_shimmer['dbm_master_url'] = video_uri
|
df_shimmer["dbm_master_url"] = video_uri
|
||||||
|
|
||||||
logger.info('Saving Output file {} '.format(out_loc))
|
if save:
|
||||||
|
logger.info("Saving Output file {} ".format(out_loc))
|
||||||
ut.save_output(df_shimmer, out_loc, fl_name, shimmer_dir, csv_ext)
|
ut.save_output(df_shimmer, out_loc, fl_name, shimmer_dir, csv_ext)
|
||||||
|
return df_shimmer
|
||||||
|
|
||||||
def segment_pitch(dir_path, r_config):
|
|
||||||
"""
|
|
||||||
segmenting pitch freq for each voice segment
|
|
||||||
"""
|
|
||||||
com_speech_sort, voiced_yes, voiced_no = ([], ) * 3
|
|
||||||
for file in os.listdir(dir_path):
|
|
||||||
try:
|
|
||||||
|
|
||||||
if file.endswith('_pitch.csv'):
|
|
||||||
|
|
||||||
ff_df = pd.read_csv((dir_path+'/'+file))
|
|
||||||
voice_label = ff_df[r_config.aco_voiceLabel]
|
|
||||||
|
|
||||||
indices_yes = [i for i, x in enumerate(voice_label) if x == "yes"]
|
|
||||||
voiced_yes = [list(group) for group in mit.consecutive_groups(indices_yes)]
|
|
||||||
|
|
||||||
indices_no = [i for i, x in enumerate(voice_label) if x == "no"]
|
|
||||||
voiced_no = [list(group) for group in mit.consecutive_groups(indices_no)]
|
|
||||||
|
|
||||||
com_speech = voiced_yes + voiced_no
|
|
||||||
com_speech_sort = sorted(com_speech, key=lambda x: x[0])
|
|
||||||
except:
|
|
||||||
pass
|
|
||||||
|
|
||||||
return com_speech_sort, voiced_yes, voiced_no
|
|
||||||
|
|
||||||
def segment_shimmer(com_speech_sort, voiced_yes, voiced_no, shimmer_frames, audio_file):
|
def segment_shimmer(com_speech_sort, voiced_yes, voiced_no, shimmer_frames, audio_file):
|
||||||
"""
|
"""
|
||||||
calculating shimmer for each voice segment
|
calculating shimmer for each voice segment
|
||||||
"""
|
"""
|
||||||
snd = parselmouth.Sound(audio_file)
|
snd = parselmouth.Sound(audio_file)
|
||||||
pitch = snd.to_pitch(time_step=.001)
|
pitch = snd.to_pitch(time_step=0.001)
|
||||||
|
|
||||||
for idx, vs in enumerate(com_speech_sort):
|
for idx, vs in enumerate(com_speech_sort):
|
||||||
try:
|
try:
|
||||||
|
|
||||||
shimmer = np.NaN
|
shimmer = np.NaN
|
||||||
if vs in voiced_yes and len(vs)>1:
|
if vs in voiced_yes and len(vs) > 1:
|
||||||
|
|
||||||
start_time = pitch.get_time_from_frame_number(vs[0])
|
start_time = pitch.get_time_from_frame_number(vs[0])
|
||||||
end_time = pitch.get_time_from_frame_number(vs[-1])
|
end_time = pitch.get_time_from_frame_number(vs[-1])
|
||||||
@@ -102,7 +83,10 @@ def segment_shimmer(com_speech_sort, voiced_yes, voiced_no, shimmer_frames, audi
|
|||||||
shimmer_frames[idx] = shimmer
|
shimmer_frames[idx] = shimmer
|
||||||
return shimmer_frames
|
return shimmer_frames
|
||||||
|
|
||||||
def calc_shimmer(video_uri, audio_file, out_loc, fl_name, r_config):
|
|
||||||
|
def calc_shimmer(
|
||||||
|
video_uri, audio_file, out_loc, fl_name, r_config, save=True, ff_df=None
|
||||||
|
):
|
||||||
"""
|
"""
|
||||||
Preparing shimmer matrix
|
Preparing shimmer matrix
|
||||||
Args:
|
Args:
|
||||||
@@ -111,26 +95,37 @@ def calc_shimmer(video_uri, audio_file, out_loc, fl_name, r_config):
|
|||||||
r_config: config.config_raw_feature.pyConfigFeatureNmReader object
|
r_config: config.config_raw_feature.pyConfigFeatureNmReader object
|
||||||
"""
|
"""
|
||||||
dir_path = os.path.join(out_loc, ff_dir)
|
dir_path = os.path.join(out_loc, ff_dir)
|
||||||
if os.path.isdir(dir_path):
|
if os.path.isdir(dir_path) or ff_df is not None:
|
||||||
voice_seg = segment_pitch(dir_path, r_config)
|
if ff_df is not None:
|
||||||
|
voice_seg = ut.process_segment_pitch(ff_df, r_config)
|
||||||
|
else:
|
||||||
|
voice_seg = ut.segment_pitch(dir_path, r_config, ff_df=ff_df)
|
||||||
|
|
||||||
shimmer_frames = [np.NaN] * len(voice_seg[0])
|
shimmer_frames = [np.NaN] * len(voice_seg[0])
|
||||||
shimmer_segment_frames = segment_shimmer(voice_seg[0], voice_seg[1], voice_seg[2], shimmer_frames, audio_file)
|
shimmer_segment_frames = segment_shimmer(
|
||||||
|
voice_seg[0], voice_seg[1], voice_seg[2], shimmer_frames, audio_file
|
||||||
|
)
|
||||||
|
|
||||||
df_shimmer = pd.DataFrame(shimmer_segment_frames, columns=[r_config.aco_shimmer])
|
df_shimmer = pd.DataFrame(
|
||||||
df_shimmer[r_config.err_reason] = 'Pass'# will replace with threshold in future release
|
shimmer_segment_frames, columns=[r_config.aco_shimmer]
|
||||||
|
)
|
||||||
|
df_shimmer[
|
||||||
|
r_config.err_reason
|
||||||
|
] = "Pass" # will replace with threshold in future release
|
||||||
|
|
||||||
df_shimmer['Frames'] = df_shimmer.index
|
df_shimmer["Frames"] = df_shimmer.index
|
||||||
df_shimmer['dbm_master_url'] = video_uri
|
df_shimmer["dbm_master_url"] = video_uri
|
||||||
|
if save:
|
||||||
logger.info('Processing Output file {} '.format(out_loc))
|
logger.info("Processing Output file {} ".format(out_loc))
|
||||||
ut.save_output(df_shimmer, out_loc, fl_name, shimmer_dir, csv_ext)
|
ut.save_output(df_shimmer, out_loc, fl_name, shimmer_dir, csv_ext)
|
||||||
|
df = df_shimmer
|
||||||
else:
|
else:
|
||||||
error_txt = 'error: fundamental freq not available'
|
error_txt = "error: fundamental freq not available"
|
||||||
empty_shimmer(video_uri, out_loc, fl_name, r_config, error_txt)
|
df = empty_shimmer(video_uri, out_loc, fl_name, r_config, error_txt, save=save)
|
||||||
|
return df
|
||||||
|
|
||||||
def run_shimmer(video_uri, out_dir, r_config):
|
|
||||||
|
def run_shimmer(video_uri, out_dir, r_config, save=True, ff_df=None):
|
||||||
"""
|
"""
|
||||||
Processing all patients to fetch shimmer
|
Processing all patients to fetch shimmer
|
||||||
---------------
|
---------------
|
||||||
@@ -139,22 +134,33 @@ def run_shimmer(video_uri, out_dir, r_config):
|
|||||||
video_uri: video path; r_config: raw variable config object
|
video_uri: video path; r_config: raw variable config object
|
||||||
out_dir: (str) Output directory for processed output
|
out_dir: (str) Output directory for processed output
|
||||||
"""
|
"""
|
||||||
try:
|
# try:
|
||||||
|
|
||||||
input_loc, out_loc, fl_name = ut.filter_path(video_uri, out_dir)
|
input_loc, out_loc, fl_name = ut.filter_path(video_uri, out_dir)
|
||||||
aud_filter = glob.glob(join(input_loc, fl_name + '.wav'))
|
aud_filter = glob.glob(join(input_loc, fl_name + ".wav"))
|
||||||
if len(aud_filter)>0:
|
if len(aud_filter) > 0:
|
||||||
|
|
||||||
audio_file = aud_filter[0]
|
audio_file = aud_filter[0]
|
||||||
aud_dur = librosa.get_duration(filename=audio_file)
|
aud_dur = ut.get_length(audio_file)
|
||||||
|
|
||||||
if float(aud_dur) < 0.064:
|
if float(aud_dur) < 0.064:
|
||||||
logger.info('Output file {} size is less than 0.064sec'.format(audio_file))
|
logger.info("Output file {} size is less than 0.064sec".format(audio_file))
|
||||||
|
|
||||||
error_txt = 'error: length less than 0.064'
|
error_txt = "error: length less than 0.064"
|
||||||
empty_shimmer(video_uri, out_loc, fl_name, r_config, error_txt)
|
df = empty_shimmer(
|
||||||
return
|
video_uri, out_loc, fl_name, r_config, error_txt, save=save
|
||||||
|
)
|
||||||
calc_shimmer(video_uri, audio_file, out_loc, fl_name, r_config)
|
else:
|
||||||
except Exception as e:
|
df = calc_shimmer(
|
||||||
logger.error('Failed to process audio file')
|
video_uri,
|
||||||
|
audio_file,
|
||||||
|
out_loc,
|
||||||
|
fl_name,
|
||||||
|
r_config,
|
||||||
|
save=save,
|
||||||
|
ff_df=ff_df,
|
||||||
|
)
|
||||||
|
return df
|
||||||
|
# except Exception as e:
|
||||||
|
# logger.error('Error in shimmer: {}'.format(e))
|
||||||
|
# logger.error('Failed to process audio file')
|
||||||
|
|||||||
@@ -4,22 +4,23 @@ project_name: DBM
|
|||||||
created: 2020-20-07
|
created: 2020-20-07
|
||||||
"""
|
"""
|
||||||
|
|
||||||
import parselmouth
|
|
||||||
import pandas as pd
|
|
||||||
import numpy as np
|
|
||||||
import glob
|
import glob
|
||||||
import librosa
|
|
||||||
from os.path import join
|
|
||||||
import logging
|
import logging
|
||||||
|
from os.path import join
|
||||||
|
|
||||||
|
import numpy as np
|
||||||
|
import pandas as pd
|
||||||
|
import parselmouth
|
||||||
|
|
||||||
from opendbm.dbm_lib.dbm_features.raw_features.util import util as ut
|
from opendbm.dbm_lib.dbm_features.raw_features.util import util as ut
|
||||||
|
|
||||||
logging.basicConfig(level=logging.INFO)
|
logging.basicConfig(level=logging.INFO)
|
||||||
logger=logging.getLogger()
|
logger = logging.getLogger()
|
||||||
|
|
||||||
|
vfs_dir = "acoustic/voice_frame_score"
|
||||||
|
csv_ext = "_voiceprev.csv"
|
||||||
|
error_txt = "error: length less than 0.064"
|
||||||
|
|
||||||
vfs_dir = 'acoustic/voice_frame_score'
|
|
||||||
csv_ext = '_voiceprev.csv'
|
|
||||||
error_txt = 'error: length less than 0.064'
|
|
||||||
|
|
||||||
def audio_pitch_frame(pitch):
|
def audio_pitch_frame(pitch):
|
||||||
"""
|
"""
|
||||||
@@ -33,6 +34,7 @@ def audio_pitch_frame(pitch):
|
|||||||
voiced_frames = pitch.count_voiced_frames()
|
voiced_frames = pitch.count_voiced_frames()
|
||||||
return total_frames, voiced_frames
|
return total_frames, voiced_frames
|
||||||
|
|
||||||
|
|
||||||
def voice_segment(path):
|
def voice_segment(path):
|
||||||
"""
|
"""
|
||||||
Using parselmouth library for fundamental frequency
|
Using parselmouth library for fundamental frequency
|
||||||
@@ -43,12 +45,13 @@ def voice_segment(path):
|
|||||||
"""
|
"""
|
||||||
sound_pat = parselmouth.Sound(path)
|
sound_pat = parselmouth.Sound(path)
|
||||||
pitch = sound_pat.to_pitch()
|
pitch = sound_pat.to_pitch()
|
||||||
total_frames,voiced_frames = audio_pitch_frame(pitch)
|
total_frames, voiced_frames = audio_pitch_frame(pitch)
|
||||||
|
|
||||||
voiced_percentage = (voiced_frames/total_frames)*100
|
voiced_percentage = (voiced_frames / total_frames) * 100
|
||||||
return voiced_percentage, voiced_frames, total_frames
|
return voiced_percentage, voiced_frames, total_frames
|
||||||
|
|
||||||
def calc_vfs(video_uri, audio_file, out_loc, fl_name, r_config):
|
|
||||||
|
def calc_vfs(video_uri, audio_file, out_loc, fl_name, r_config, save=True):
|
||||||
"""
|
"""
|
||||||
creating dataframe matrix for voice frame score
|
creating dataframe matrix for voice frame score
|
||||||
Args:
|
Args:
|
||||||
@@ -57,32 +60,44 @@ def calc_vfs(video_uri, audio_file, out_loc, fl_name, r_config):
|
|||||||
f_nm_config: Config file object
|
f_nm_config: Config file object
|
||||||
"""
|
"""
|
||||||
|
|
||||||
voice_percentage,voiced_frames, total_frames = voice_segment(audio_file)
|
voice_percentage, voiced_frames, total_frames = voice_segment(audio_file)
|
||||||
df_vfs = pd.DataFrame([voiced_frames], columns=[r_config.aco_voiceFrame])
|
df_vfs = pd.DataFrame([voiced_frames], columns=[r_config.aco_voiceFrame])
|
||||||
|
|
||||||
df_vfs[r_config.aco_totVoiceFrame] = [total_frames]
|
df_vfs[r_config.aco_totVoiceFrame] = [total_frames]
|
||||||
df_vfs[r_config.aco_voicePct] = [voice_percentage]
|
df_vfs[r_config.aco_voicePct] = [voice_percentage]
|
||||||
df_vfs[r_config.err_reason] = 'Pass'# will replace with threshold in future release
|
df_vfs[
|
||||||
|
r_config.err_reason
|
||||||
|
] = "Pass" # will replace with threshold in future release
|
||||||
|
|
||||||
df_vfs['Frames'] = df_vfs.index
|
df_vfs["Frames"] = df_vfs.index
|
||||||
df_vfs['dbm_master_url'] = video_uri
|
df_vfs["dbm_master_url"] = video_uri
|
||||||
|
if save:
|
||||||
logger.info('Saving Output file {} '.format(out_loc))
|
logger.info("Saving Output file {} ".format(out_loc))
|
||||||
ut.save_output(df_vfs, out_loc, fl_name, vfs_dir, csv_ext)
|
ut.save_output(df_vfs, out_loc, fl_name, vfs_dir, csv_ext)
|
||||||
|
return df_vfs
|
||||||
|
|
||||||
def empty_vfs(video_uri, out_loc, fl_name, r_config):
|
|
||||||
|
def empty_vfs(video_uri, out_loc, fl_name, r_config, save=True):
|
||||||
"""
|
"""
|
||||||
Preparing empty VFS matrix if something fails
|
Preparing empty VFS matrix if something fails
|
||||||
"""
|
"""
|
||||||
cols = ['Frames', r_config.aco_voiceFrame, r_config.aco_totVoiceFrame, r_config.aco_voicePct, r_config.err_reason]
|
cols = [
|
||||||
|
"Frames",
|
||||||
|
r_config.aco_voiceFrame,
|
||||||
|
r_config.aco_totVoiceFrame,
|
||||||
|
r_config.aco_voicePct,
|
||||||
|
r_config.err_reason,
|
||||||
|
]
|
||||||
out_val = [[np.nan, np.nan, np.nan, np.nan, error_txt]]
|
out_val = [[np.nan, np.nan, np.nan, np.nan, error_txt]]
|
||||||
df_vfs = pd.DataFrame(out_val, columns = cols)
|
df_vfs = pd.DataFrame(out_val, columns=cols)
|
||||||
df_vfs['dbm_master_url'] = video_uri
|
df_vfs["dbm_master_url"] = video_uri
|
||||||
|
if save:
|
||||||
logger.info('Saving Output file {} '.format(out_loc))
|
logger.info("Saving Output file {} ".format(out_loc))
|
||||||
ut.save_output(df_vfs, out_loc, fl_name, vfs_dir, csv_ext)
|
ut.save_output(df_vfs, out_loc, fl_name, vfs_dir, csv_ext)
|
||||||
|
return df_vfs
|
||||||
|
|
||||||
def run_vfs(video_uri, out_dir, r_config):
|
|
||||||
|
def run_vfs(video_uri, out_dir, r_config, save=True):
|
||||||
"""
|
"""
|
||||||
Processing all participants for fetching voice frame score
|
Processing all participants for fetching voice frame score
|
||||||
---------------
|
---------------
|
||||||
@@ -94,18 +109,23 @@ def run_vfs(video_uri, out_dir, r_config):
|
|||||||
try:
|
try:
|
||||||
|
|
||||||
input_loc, out_loc, fl_name = ut.filter_path(video_uri, out_dir)
|
input_loc, out_loc, fl_name = ut.filter_path(video_uri, out_dir)
|
||||||
aud_filter = glob.glob(join(input_loc, fl_name + '.wav'))
|
aud_filter = glob.glob(join(input_loc, fl_name + ".wav"))
|
||||||
if len(aud_filter)>0:
|
if len(aud_filter) > 0:
|
||||||
|
|
||||||
audio_file = aud_filter[0]
|
audio_file = aud_filter[0]
|
||||||
aud_dur = librosa.get_duration(filename=audio_file)
|
aud_dur = ut.get_length(audio_file)
|
||||||
|
|
||||||
if float(aud_dur) < 0.064:
|
if float(aud_dur) < 0.064:
|
||||||
logger.info('Output file {} size is less than 0.064sec'.format(audio_file))
|
logger.info(
|
||||||
|
"Output file {} size is less than 0.064sec".format(audio_file)
|
||||||
|
)
|
||||||
|
|
||||||
empty_vfs(video_uri, out_loc, fl_name, r_config)
|
df = empty_vfs(video_uri, out_loc, fl_name, r_config, save=save)
|
||||||
return
|
else:
|
||||||
|
df = calc_vfs(
|
||||||
calc_vfs(video_uri, audio_file, out_loc, fl_name, r_config)
|
video_uri, audio_file, out_loc, fl_name, r_config, save=save
|
||||||
|
)
|
||||||
|
return df
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error('Failed to process audio file')
|
e
|
||||||
|
logger.error("Failed to process audio file")
|
||||||
|
|||||||
Reference in New Issue
Block a user