code refactoring only

This commit is contained in:
jordi.hasianta
2022-09-15 20:30:49 +07:00
parent c6cd54c376
commit 42455a1a2b
10 changed files with 954 additions and 653 deletions

View File

@@ -4,25 +4,25 @@ project_name: DBM
created: 2020-20-07
"""
import glob
import logging
from os.path import join
import numpy as np
import pandas as pd
import parselmouth
import numpy as np
import parselmouth
import librosa
import glob
from os.path import join
import logging
from opendbm.dbm_lib.dbm_features.raw_features.util import util as ut
logging.basicConfig(level=logging.INFO)
logger=logging.getLogger()
logger = logging.getLogger()
formant_dir = 'acoustic/formant_freq'
csv_ext = '_formant.csv'
error_txt = 'error: length less than 0.064'
formant_dir = "acoustic/formant_freq"
csv_ext = "_formant.csv"
error_txt = "error: length less than 0.064"
def formant_list(formant,snd):
def formant_list(formant, snd):
"""
Getting formant frequency per second
Args:
@@ -35,23 +35,24 @@ def formant_list(formant,snd):
f2_list = []
f3_list = []
f4_list = []
dur = snd.duration-0.02
dur = snd.duration - 0.02
dur_round = round(dur, 2)
time_list = np.arange(0.001, dur_round, 0.001)
for time in time_list:
f1 = formant.get_value_at_time(1,time)
f2 = formant.get_value_at_time(2,time)
f3 = formant.get_value_at_time(3,time)
f4 = formant.get_value_at_time(4,time)
f1 = formant.get_value_at_time(1, time)
f2 = formant.get_value_at_time(2, time)
f3 = formant.get_value_at_time(3, time)
f4 = formant.get_value_at_time(4, time)
f1_list.append(f1)
f2_list.append(f2)
f3_list.append(f3)
f4_list.append(f4)
return f1_list,f2_list,f3_list,f4_list
return f1_list, f2_list, f3_list, f4_list
def formant_score(path):
"""
@@ -62,49 +63,65 @@ def formant_score(path):
(list) list of Formant freq for each voice frame
"""
sound_pat = parselmouth.Sound(path)
formant = sound_pat.to_formant_burg(time_step=.001)
f_score = formant_list(formant,sound_pat)
formant = sound_pat.to_formant_burg(time_step=0.001)
f_score = formant_list(formant, sound_pat)
return f_score
def calc_formant(video_uri, audio_file, out_loc, fl_name, r_config):
def calc_formant(video_uri, audio_file, out_loc, fl_name, r_config, save=True):
"""
Preparing Formant freq matrix
Args:
audio_file: (.wav) parsed audio file; fl_name: input file name
out_loc: (str) Output directory; r_config: raw variable config
"""
f1_list,f2_list,f3_list,f4_list = formant_score(audio_file)
f1_list, f2_list, f3_list, f4_list = formant_score(audio_file)
df_formant = pd.DataFrame(f1_list, columns=[r_config.aco_fm1])
df_formant[r_config.aco_fm2] = f2_list
df_formant[r_config.aco_fm3] = f3_list
df_formant[r_config.aco_fm4] = f4_list
df_formant.replace('', np.nan, regex=True,inplace=True)
df_formant[r_config.err_reason] = 'Pass'# will replace with threshold in future release
df_formant['Frames'] = df_formant.index
df_formant['dbm_master_url'] = video_uri
logger.info('Saving Output file {} '.format(out_loc))
ut.save_output(df_formant, out_loc, fl_name, formant_dir, csv_ext)
def empty_fm(video_uri, out_loc, fl_name, r_config):
df_formant.replace("", np.nan, regex=True, inplace=True)
df_formant[
r_config.err_reason
] = "Pass" # will replace with threshold in future release
df_formant["Frames"] = df_formant.index
df_formant["dbm_master_url"] = video_uri
if save:
logger.info("Saving Output file {} ".format(out_loc))
ut.save_output(df_formant, out_loc, fl_name, formant_dir, csv_ext)
return df_formant
def empty_fm(video_uri, out_loc, fl_name, r_config, save=True):
"""
Preparing empty formant frequency matrix if something fails
"""
cols = ['Frames', r_config.aco_fm1, r_config.aco_fm2, r_config.aco_fm3, r_config.aco_fm4, r_config.err_reason]
cols = [
"Frames",
r_config.aco_fm1,
r_config.aco_fm2,
r_config.aco_fm3,
r_config.aco_fm4,
r_config.err_reason,
]
out_val = [[np.nan, np.nan, np.nan, np.nan, np.nan, error_txt]]
df_fm = pd.DataFrame(out_val, columns = cols)
df_fm['dbm_master_url'] = video_uri
logger.info('Saving Output file {} '.format(out_loc))
ut.save_output(df_fm, out_loc, fl_name, formant_dir, csv_ext)
df_fm = pd.DataFrame(out_val, columns=cols)
df_fm["dbm_master_url"] = video_uri
if save:
logger.info("Saving Output file {} ".format(out_loc))
ut.save_output(df_fm, out_loc, fl_name, formant_dir, csv_ext)
return df_fm
def run_formant(video_uri, out_dir, r_config, save=True):
def run_formant(video_uri, out_dir, r_config):
"""
Processing all patient's for fetching Formant freq
---------------
@@ -114,20 +131,25 @@ def run_formant(video_uri, out_dir, r_config):
out_dir: (str) Output directory for processed output
"""
try:
input_loc, out_loc, fl_name = ut.filter_path(video_uri, out_dir)
aud_filter = glob.glob(join(input_loc, fl_name + '.wav'))
if len(aud_filter)>0:
aud_filter = glob.glob(join(input_loc, fl_name + ".wav"))
if len(aud_filter) > 0:
audio_file = aud_filter[0]
aud_dur = librosa.get_duration(filename=audio_file)
aud_dur = ut.get_length(audio_file)
if float(aud_dur) < 0.064:
logger.info('Output file {} size is less than 0.064sec'.format(audio_file))
logger.info(
"Output file {} size is less than 0.064sec".format(audio_file)
)
empty_fm(video_uri, out_loc, fl_name, r_config)
return
calc_formant(video_uri, audio_file, out_loc, fl_name, r_config)
df = empty_fm(video_uri, out_loc, fl_name, r_config, save=save)
else:
df = calc_formant(
video_uri, audio_file, out_loc, fl_name, r_config, save=save
)
return df
except Exception as e:
logger.error('Failed to process audio file')
e
logger.error("Failed to process audio file")

View File

@@ -4,24 +4,25 @@ project_name: DBM
created: 2020-20-07
"""
import pandas as pd
import numpy as np
import os
import glob
import parselmouth
import librosa
import more_itertools as mit
from os.path import join
import logging
import os
from os.path import join
import more_itertools as mit
import numpy as np
import pandas as pd
import parselmouth
from opendbm.dbm_lib.dbm_features.raw_features.util import util as ut
logging.basicConfig(level=logging.INFO)
logger=logging.getLogger()
logger = logging.getLogger()
gne_dir = "acoustic/glottal_noise"
ff_dir = "acoustic/pitch"
csv_ext = "_gne.csv"
gne_dir = 'acoustic/glottal_noise'
ff_dir = 'acoustic/pitch'
csv_ext = '_gne.csv'
def gne_ratio(sound):
"""
@@ -33,63 +34,43 @@ def gne_ratio(sound):
"""
harmonicity_gne = sound.to_harmonicity_gne()
gne_all_bands = harmonicity_gne.values
gne_all_bands = np.where(gne_all_bands==-200, np.NaN, gne_all_bands)
gne = np.nanmax(gne_all_bands) # following http://www.fon.hum.uva.nl/rob/NKI_TEVA/TEVA/HTML/NKI_TEVA.pdf
gne_all_bands = np.where(gne_all_bands == -200, np.NaN, gne_all_bands)
gne = np.nanmax(
gne_all_bands
) # following http://www.fon.hum.uva.nl/rob/NKI_TEVA/TEVA/HTML/NKI_TEVA.pdf
return gne
def empty_gne(video_uri, out_loc, fl_name, r_config, error_txt):
def empty_gne(video_uri, out_loc, fl_name, r_config, error_txt, save=True):
"""
Preparing empty GNE matrix if something fails
"""
cols = ['Frames', r_config.aco_gne, r_config.err_reason]
cols = ["Frames", r_config.aco_gne, r_config.err_reason]
out_val = [[np.nan, np.nan, error_txt]]
df_gne = pd.DataFrame(out_val, columns = cols)
df_gne['dbm_master_url'] = video_uri
logger.info('Saving Output file {} '.format(out_loc))
ut.save_output(df_gne, out_loc, fl_name, gne_dir, csv_ext)
def segment_pitch(dir_path, r_config):
"""
segmenting pitch freq for each voice segment
"""
com_speech_sort, voiced_yes, voiced_no = ([], ) * 3
for file in os.listdir(dir_path):
try:
if file.endswith('_pitch.csv'):
ff_df = pd.read_csv((dir_path+'/'+file))
voice_label = ff_df[r_config.aco_voiceLabel]
indices_yes = [i for i, x in enumerate(voice_label) if x == "yes"]
voiced_yes = [list(group) for group in mit.consecutive_groups(indices_yes)]
indices_no = [i for i, x in enumerate(voice_label) if x == "no"]
voiced_no = [list(group) for group in mit.consecutive_groups(indices_no)]
com_speech = voiced_yes + voiced_no
com_speech_sort = sorted(com_speech, key=lambda x: x[0])
except:
pass
return com_speech_sort, voiced_yes, voiced_no
df_gne = pd.DataFrame(out_val, columns=cols)
df_gne["dbm_master_url"] = video_uri
if save:
logger.info("Saving Output file {} ".format(out_loc))
ut.save_output(df_gne, out_loc, fl_name, gne_dir, csv_ext)
return df_gne
def segment_gne(com_speech_sort, voiced_yes, voiced_no, gne_all_frames, audio_file):
"""
calculating gne for each voice segment
"""
snd = parselmouth.Sound(audio_file)
pitch = snd.to_pitch(time_step=.001)
pitch = snd.to_pitch(time_step=0.001)
for idx, vs in enumerate(com_speech_sort):
try:
max_gne = np.NaN
if vs in voiced_yes and len(vs)>1:
if vs in voiced_yes and len(vs) > 1:
start_time = pitch.get_time_from_frame_number(vs[0])
end_time = pitch.get_time_from_frame_number(vs[-1])
@@ -103,8 +84,9 @@ def segment_gne(com_speech_sort, voiced_yes, voiced_no, gne_all_frames, audio_fi
gne_all_frames[idx] = max_gne
return gne_all_frames
def calc_gne(video_uri, audio_file, out_loc, fl_name, r_config):
def calc_gne(video_uri, audio_file, out_loc, fl_name, r_config, save=True, ff_df=None):
"""
Preparing gne matrix
Args:
@@ -112,26 +94,36 @@ def calc_gne(video_uri, audio_file, out_loc, fl_name, r_config):
out_loc: (str) Output directory for csv's
"""
dir_path = os.path.join(out_loc, ff_dir)
if os.path.isdir(dir_path):
voice_seg = segment_pitch(dir_path, r_config)
gne_all_frames = [np.NaN] * len(voice_seg[0])
gne_segment_frames = segment_gne(voice_seg[0], voice_seg[1], voice_seg[2], gne_all_frames, audio_file)
df_gne = pd.DataFrame(gne_segment_frames, columns=[r_config.aco_gne])
df_gne[r_config.err_reason] = 'Pass'# will replace with threshold in future release
df_gne['Frames'] = df_gne.index
df_gne['dbm_master_url'] = video_uri
logger.info('Processing Output file {} '.format(out_loc))
ut.save_output(df_gne, out_loc, fl_name, gne_dir, csv_ext)
else:
error_txt = 'error: pitch freq not available'
empty_gne(video_uri, out_loc, fl_name, r_config, error_txt)
if os.path.isdir(dir_path) or ff_df is not None:
if ff_df is not None:
voice_seg = ut.process_segment_pitch(ff_df, r_config)
else:
voice_seg = ut.segment_pitch(dir_path, r_config, ff_df=ff_df)
def run_gne(video_uri, out_dir, r_config):
gne_all_frames = [np.NaN] * len(voice_seg[0])
gne_segment_frames = segment_gne(
voice_seg[0], voice_seg[1], voice_seg[2], gne_all_frames, audio_file
)
df_gne = pd.DataFrame(gne_segment_frames, columns=[r_config.aco_gne])
df_gne[
r_config.err_reason
] = "Pass" # will replace with threshold in future release
df_gne["Frames"] = df_gne.index
df_gne["dbm_master_url"] = video_uri
if save:
logger.info("Processing Output file {} ".format(out_loc))
ut.save_output(df_gne, out_loc, fl_name, gne_dir, csv_ext)
return df_gne
else:
error_txt = "error: pitch freq not available"
return empty_gne(video_uri, out_loc, fl_name, r_config, error_txt, save=save)
def run_gne(video_uri, out_dir, r_config, save=True, ff_df=None):
"""
Processing all patient's for fetching glottal noise ratio
---------------
@@ -141,21 +133,34 @@ def run_gne(video_uri, out_dir, r_config):
out_dir: (str) Output directory for processed output
"""
try:
input_loc, out_loc, fl_name = ut.filter_path(video_uri, out_dir)
aud_filter = glob.glob(join(input_loc, fl_name + '.wav'))
if len(aud_filter)>0:
aud_filter = glob.glob(join(input_loc, fl_name + ".wav"))
if len(aud_filter) > 0:
audio_file = aud_filter[0]
aud_dur = librosa.get_duration(filename=audio_file)
aud_dur = ut.get_length(audio_file)
if float(aud_dur) < 0.064:
logger.info('Output file {} size is less than 0.064sec'.format(audio_file))
logger.info(
"Output file {} size is less than 0.064sec".format(audio_file)
)
error_txt = 'error: length less than 0.064'
empty_gne(video_uri, out_loc, fl_name, r_config, error_txt)
return
calc_gne(video_uri, audio_file, out_loc, fl_name, r_config)
error_txt = "error: length less than 0.064"
df = empty_gne(
video_uri, out_loc, fl_name, r_config, error_txt, save=save
)
else:
df = calc_gne(
video_uri,
audio_file,
out_loc,
fl_name,
r_config,
save=save,
ff_df=ff_df,
)
return df
except Exception as e:
logger.error('Failed to process audio file')
e
logger.error("Failed to process audio file")

View File

@@ -1,96 +1,166 @@
"""
file_name: hnr
file_name: gne
project_name: DBM
created: 2020-20-07
"""
import pandas as pd
import numpy as np
import os
import glob
import parselmouth
import librosa
from os.path import join
import logging
import os
from os.path import join
import more_itertools as mit
import numpy as np
import pandas as pd
import parselmouth
from opendbm.dbm_lib.dbm_features.raw_features.util import util as ut
logging.basicConfig(level=logging.INFO)
logger=logging.getLogger()
logger = logging.getLogger()
hnr_dir = 'acoustic/harmonic_noise'
csv_ext = '_hnr.csv'
error_txt = 'error: length less than 0.064'
gne_dir = "acoustic/glottal_noise"
ff_dir = "acoustic/pitch"
csv_ext = "_gne.csv"
def hnr_ratio(filepath):
def gne_ratio(sound):
"""
Using parselmouth library fetching harmonic noise ratio ratio
Using parselmouth library fetching glottal noise excitation ratio
Args:
path: (.wav) audio file location
sound: parselmouth object
Returns:
(list) list of hnr ratio for each voice frame, min,max and mean hnr
(list) list of gne ratio for each voice frame
"""
sound = parselmouth.Sound(filepath)
harmonicity = sound.to_harmonicity_ac(time_step=.001)
hnr_all_frames = harmonicity.values#[harmonicity.values != -200] nan it (****)
hnr_all_frames = np.where(hnr_all_frames==-200, np.NaN, hnr_all_frames)
return hnr_all_frames.transpose()
harmonicity_gne = sound.to_harmonicity_gne()
gne_all_bands = harmonicity_gne.values
gne_all_bands = np.where(gne_all_bands == -200, np.NaN, gne_all_bands)
def calc_hnr(video_uri, audio_file, out_loc, fl_name, r_config):
gne = np.nanmax(
gne_all_bands
) # following http://www.fon.hum.uva.nl/rob/NKI_TEVA/TEVA/HTML/NKI_TEVA.pdf
return gne
def empty_gne(video_uri, out_loc, fl_name, r_config, error_txt, save=True):
"""
Preparing harmonic noise matrix
Preparing empty GNE matrix if something fails
"""
cols = ["Frames", r_config.aco_gne, r_config.err_reason]
out_val = [[np.nan, np.nan, error_txt]]
df_gne = pd.DataFrame(out_val, columns=cols)
df_gne["dbm_master_url"] = video_uri
if save:
logger.info("Saving Output file {} ".format(out_loc))
ut.save_output(df_gne, out_loc, fl_name, gne_dir, csv_ext)
return df_gne
def segment_gne(com_speech_sort, voiced_yes, voiced_no, gne_all_frames, audio_file):
"""
calculating gne for each voice segment
"""
snd = parselmouth.Sound(audio_file)
pitch = snd.to_pitch(time_step=0.001)
for idx, vs in enumerate(com_speech_sort):
try:
max_gne = np.NaN
if vs in voiced_yes and len(vs) > 1:
start_time = pitch.get_time_from_frame_number(vs[0])
end_time = pitch.get_time_from_frame_number(vs[-1])
snd_start = int(snd.get_frame_number_from_time(start_time))
snd_end = int(snd.get_frame_number_from_time(end_time))
samples = parselmouth.Sound(snd.as_array()[0][snd_start:snd_end])
max_gne = gne_ratio(samples)
except:
pass
gne_all_frames[idx] = max_gne
return gne_all_frames
def calc_gne(video_uri, audio_file, out_loc, fl_name, r_config, save=True, ff_df=None):
"""
Preparing gne matrix
Args:
audio_file: (.wav) parsed audio file
out_loc: (str) Output directory for csv's
"""
hnr_all_frames = hnr_ratio(audio_file)
df_hnr = pd.DataFrame(hnr_all_frames, columns=[r_config.aco_hnr])
df_hnr['Frames'] = df_hnr.index
df_hnr['dbm_master_url'] = video_uri
df_hnr[r_config.err_reason] = 'Pass'# will replace with threshold in future release
logger.info('Saving Output file {} '.format(out_loc))
ut.save_output(df_hnr, out_loc, fl_name, hnr_dir, csv_ext)
def empty_hnr(video_uri, out_loc, fl_name, r_config):
"""
Preparing empty HNR matrix if something fails
"""
cols = ['Frames', r_config.aco_hnr, r_config.err_reason]
out_val = [[np.nan, np.nan, error_txt]]
df_hnr = pd.DataFrame(out_val, columns = cols)
df_hnr['dbm_master_url'] = video_uri
logger.info('Saving Output file {} '.format(out_loc))
ut.save_output(df_hnr, out_loc, fl_name, hnr_dir, csv_ext)
dir_path = os.path.join(out_loc, ff_dir)
if os.path.isdir(dir_path) or ff_df is not None:
if ff_df is not None:
voice_seg = ut.process_segment_pitch(ff_df, r_config)
else:
voice_seg = ut.segment_pitch(dir_path, r_config, ff_df=ff_df)
def run_hnr(video_uri, out_dir, r_config):
gne_all_frames = [np.NaN] * len(voice_seg[0])
gne_segment_frames = segment_gne(
voice_seg[0], voice_seg[1], voice_seg[2], gne_all_frames, audio_file
)
df_gne = pd.DataFrame(gne_segment_frames, columns=[r_config.aco_gne])
df_gne[
r_config.err_reason
] = "Pass" # will replace with threshold in future release
df_gne["Frames"] = df_gne.index
df_gne["dbm_master_url"] = video_uri
if save:
logger.info("Processing Output file {} ".format(out_loc))
ut.save_output(df_gne, out_loc, fl_name, gne_dir, csv_ext)
return df_gne
else:
error_txt = "error: pitch freq not available"
return empty_gne(video_uri, out_loc, fl_name, r_config, error_txt, save=save)
def run_gne(video_uri, out_dir, r_config, save=True, ff_df=None):
"""
Processing all patient's for fetching harmonic noise ratio
-------------------
-------------------
Processing all patient's for fetching glottal noise ratio
---------------
---------------
Args:
video_uri: video path; r_config: raw variable config object
out_dir: (str) Output directory for processed output
"""
try:
input_loc, out_loc, fl_name = ut.filter_path(video_uri, out_dir)
aud_filter = glob.glob(join(input_loc, fl_name + '.wav'))
if len(aud_filter)>0:
aud_filter = glob.glob(join(input_loc, fl_name + ".wav"))
if len(aud_filter) > 0:
audio_file = aud_filter[0]
aud_dur = librosa.get_duration(filename=audio_file)
aud_dur = ut.get_length(audio_file)
if float(aud_dur) < 0.064:
logger.info('Output file {} size is less than 0.064sec'.format(audio_file))
logger.info(
"Output file {} size is less than 0.064sec".format(audio_file)
)
empty_hnr(video_uri, out_loc, fl_name, r_config)
return
calc_hnr(video_uri, audio_file, out_loc, fl_name, r_config)
error_txt = "error: length less than 0.064"
df = empty_gne(
video_uri, out_loc, fl_name, r_config, error_txt, save=save
)
else:
df = calc_gne(
video_uri,
audio_file,
out_loc,
fl_name,
r_config,
save=save,
ff_df=ff_df,
)
return df
except Exception as e:
logger.error('Failed to process audio file')
e
logger.error("Failed to process audio file")

View File

@@ -1,92 +1,166 @@
"""
file_name: intensity
file_name: gne
project_name: DBM
created: 2020-20-07
"""
import pandas as pd
import numpy as np
import glob
import parselmouth
import librosa
from os.path import join
import logging
import os
from os.path import join
import more_itertools as mit
import numpy as np
import pandas as pd
import parselmouth
from opendbm.dbm_lib.dbm_features.raw_features.util import util as ut
logging.basicConfig(level=logging.INFO)
logger=logging.getLogger()
logger = logging.getLogger()
intensity_dir = 'acoustic/intensity'
csv_ext = '_intensity.csv'
error_txt = 'error: length less than 0.064'
gne_dir = "acoustic/glottal_noise"
ff_dir = "acoustic/pitch"
csv_ext = "_gne.csv"
def intensity_score(path):
def gne_ratio(sound):
"""
Using parselmouth library fetching Intensity
Using parselmouth library fetching glottal noise excitation ratio
Args:
path: (.wav) audio file location
sound: parselmouth object
Returns:
(list) list of Intensity for each voice frame
(list) list of gne ratio for each voice frame
"""
sound_pat = parselmouth.Sound(path)
intensity = sound_pat.to_intensity(time_step=.001)
return intensity.values[0]
harmonicity_gne = sound.to_harmonicity_gne()
gne_all_bands = harmonicity_gne.values
gne_all_bands = np.where(gne_all_bands == -200, np.NaN, gne_all_bands)
def calc_intensity(video_uri, audio_file, out_loc, fl_name, r_config):
gne = np.nanmax(
gne_all_bands
) # following http://www.fon.hum.uva.nl/rob/NKI_TEVA/TEVA/HTML/NKI_TEVA.pdf
return gne
def empty_gne(video_uri, out_loc, fl_name, r_config, error_txt, save=True):
"""
Preparing Intensity matrix
Preparing empty GNE matrix if something fails
"""
cols = ["Frames", r_config.aco_gne, r_config.err_reason]
out_val = [[np.nan, np.nan, error_txt]]
df_gne = pd.DataFrame(out_val, columns=cols)
df_gne["dbm_master_url"] = video_uri
if save:
logger.info("Saving Output file {} ".format(out_loc))
ut.save_output(df_gne, out_loc, fl_name, gne_dir, csv_ext)
return df_gne
def segment_gne(com_speech_sort, voiced_yes, voiced_no, gne_all_frames, audio_file):
"""
calculating gne for each voice segment
"""
snd = parselmouth.Sound(audio_file)
pitch = snd.to_pitch(time_step=0.001)
for idx, vs in enumerate(com_speech_sort):
try:
max_gne = np.NaN
if vs in voiced_yes and len(vs) > 1:
start_time = pitch.get_time_from_frame_number(vs[0])
end_time = pitch.get_time_from_frame_number(vs[-1])
snd_start = int(snd.get_frame_number_from_time(start_time))
snd_end = int(snd.get_frame_number_from_time(end_time))
samples = parselmouth.Sound(snd.as_array()[0][snd_start:snd_end])
max_gne = gne_ratio(samples)
except:
pass
gne_all_frames[idx] = max_gne
return gne_all_frames
def calc_gne(video_uri, audio_file, out_loc, fl_name, r_config, save=True, ff_df=None):
"""
Preparing gne matrix
Args:
audio_file: (.wav) parsed audio file
out_loc: (str) Output directory for csv's
"""
intensity_frames = intensity_score(audio_file)
df_intensity = pd.DataFrame(intensity_frames, columns=[r_config.aco_int])
df_intensity['Frames'] = df_intensity.index
df_intensity['dbm_master_url'] = video_uri
df_intensity[r_config.err_reason] = 'Pass'# will replace with threshold in future release
logger.info('Saving Output file {} '.format(out_loc))
ut.save_output(df_intensity, out_loc, fl_name, intensity_dir, csv_ext)
def empty_intensity(video_uri, out_loc, fl_name, r_config):
"""
Preparing empty Intensity matrix if something fails
"""
cols = ['Frames', r_config.aco_int, r_config.err_reason]
out_val = [[np.nan, np.nan, error_txt]]
df_int = pd.DataFrame(out_val, columns = cols)
df_int['dbm_master_url'] = video_uri
logger.info('Saving Output file {} '.format(out_loc))
ut.save_output(df_int, out_loc, fl_name, intensity_dir, csv_ext)
dir_path = os.path.join(out_loc, ff_dir)
if os.path.isdir(dir_path) or ff_df is not None:
if ff_df is not None:
voice_seg = ut.process_segment_pitch(ff_df, r_config)
else:
voice_seg = ut.segment_pitch(dir_path, r_config, ff_df=ff_df)
def run_intensity(video_uri, out_dir, r_config):
gne_all_frames = [np.NaN] * len(voice_seg[0])
gne_segment_frames = segment_gne(
voice_seg[0], voice_seg[1], voice_seg[2], gne_all_frames, audio_file
)
df_gne = pd.DataFrame(gne_segment_frames, columns=[r_config.aco_gne])
df_gne[
r_config.err_reason
] = "Pass" # will replace with threshold in future release
df_gne["Frames"] = df_gne.index
df_gne["dbm_master_url"] = video_uri
if save:
logger.info("Processing Output file {} ".format(out_loc))
ut.save_output(df_gne, out_loc, fl_name, gne_dir, csv_ext)
return df_gne
else:
error_txt = "error: pitch freq not available"
return empty_gne(video_uri, out_loc, fl_name, r_config, error_txt, save=save)
def run_gne(video_uri, out_dir, r_config, save=True, ff_df=None):
"""
Processing all patient's for fetching Intensity
-------------------
-------------------
Processing all patient's for fetching glottal noise ratio
---------------
---------------
Args:
video_uri: video path; r_config: raw variable config object
out_dir: (str) Output directory for processed output
"""
try:
input_loc, out_loc, fl_name = ut.filter_path(video_uri, out_dir)
aud_filter = glob.glob(join(input_loc, fl_name + '.wav'))
if len(aud_filter)>0:
aud_filter = glob.glob(join(input_loc, fl_name + ".wav"))
if len(aud_filter) > 0:
audio_file = aud_filter[0]
aud_dur = librosa.get_duration(filename=audio_file)
aud_dur = ut.get_length(audio_file)
if float(aud_dur) < 0.064:
logger.info('Output file {} size is less than 0.064sec'.format(audio_file))
logger.info(
"Output file {} size is less than 0.064sec".format(audio_file)
)
empty_intensity(video_uri, out_loc, fl_name, r_config)
return
calc_intensity(video_uri, audio_file, out_loc, fl_name, r_config)
error_txt = "error: length less than 0.064"
df = empty_gne(
video_uri, out_loc, fl_name, r_config, error_txt, save=save
)
else:
df = calc_gne(
video_uri,
audio_file,
out_loc,
fl_name,
r_config,
save=save,
ff_df=ff_df,
)
return df
except Exception as e:
logger.error('Failed to process audio file')
e
logger.error("Failed to process audio file")

View File

@@ -4,25 +4,24 @@ project_name: DBM
created: 2020-20-07
"""
import pandas as pd
import numpy as np
import os
import glob
import parselmouth
import librosa
import numpy as np
import more_itertools as mit
from os.path import join
import logging
import os
from os.path import join
import numpy as np
import pandas as pd
import parselmouth
from opendbm.dbm_lib.dbm_features.raw_features.util import util as ut
logging.basicConfig(level=logging.INFO)
logger=logging.getLogger()
logger = logging.getLogger()
jitter_dir = "acoustic/jitter"
ff_dir = "acoustic/pitch"
csv_ext = "_jitter.csv"
jitter_dir = 'acoustic/jitter'
ff_dir = 'acoustic/pitch'
csv_ext = '_jitter.csv'
def audio_jitter(sound):
"""
@@ -32,61 +31,43 @@ def audio_jitter(sound):
Returns:
(list) list of jitters for each voice frame
"""
pointProcess = parselmouth.praat.call(sound, "To PointProcess (periodic, cc)...", 80, 500)
jitter = parselmouth.praat.call(pointProcess, "Get jitter (local)", 0, 0, 0.0001, 0.02, 1.3)
pointProcess = parselmouth.praat.call(
sound, "To PointProcess (periodic, cc)...", 80, 500
)
jitter = parselmouth.praat.call(
pointProcess, "Get jitter (local)", 0, 0, 0.0001, 0.02, 1.3
)
return jitter
def empty_jitter(video_uri, out_loc, fl_name, r_config, error_txt):
def empty_jitter(video_uri, out_loc, fl_name, r_config, error_txt, save=True):
"""
Preparing empty jitter matrix if something fails
"""
cols = ['Frames', r_config.aco_jitter, r_config.err_reason]
cols = ["Frames", r_config.aco_jitter, r_config.err_reason]
out_val = [[np.nan, np.nan, error_txt]]
df_jitter = pd.DataFrame(out_val, columns = cols)
df_jitter['dbm_master_url'] = video_uri
logger.info('Saving Output file {} '.format(out_loc))
ut.save_output(df_jitter, out_loc, fl_name, jitter_dir, csv_ext)
def segment_pitch(dir_path, r_config):
"""
segmenting pitch freq for each voice segment
"""
com_speech_sort, voiced_yes, voiced_no = ([], ) * 3
for file in os.listdir(dir_path):
try:
if file.endswith('_pitch.csv'):
ff_df = pd.read_csv((dir_path+'/'+file))
voice_label = ff_df[r_config.aco_voiceLabel]
indices_yes = [i for i, x in enumerate(voice_label) if x == "yes"]
voiced_yes = [list(group) for group in mit.consecutive_groups(indices_yes)]
indices_no = [i for i, x in enumerate(voice_label) if x == "no"]
voiced_no = [list(group) for group in mit.consecutive_groups(indices_no)]
com_speech = voiced_yes + voiced_no
com_speech_sort = sorted(com_speech, key=lambda x: x[0])
except:
pass
return com_speech_sort, voiced_yes, voiced_no
df_jitter = pd.DataFrame(out_val, columns=cols)
df_jitter["dbm_master_url"] = video_uri
if save:
logger.info("Saving Output file {} ".format(out_loc))
ut.save_output(df_jitter, out_loc, fl_name, jitter_dir, csv_ext)
return df_jitter
def segment_jitter(com_speech_sort, voiced_yes, voiced_no, jitter_frames, audio_file):
"""
calculating jitter for each voice segment
"""
snd = parselmouth.Sound(audio_file)
pitch = snd.to_pitch(time_step=.001)
pitch = snd.to_pitch(time_step=0.001)
for idx, vs in enumerate(com_speech_sort):
try:
jitter = np.NaN
if vs in voiced_yes and len(vs)>1:
if vs in voiced_yes and len(vs) > 1:
start_time = pitch.get_time_from_frame_number(vs[0])
end_time = pitch.get_time_from_frame_number(vs[-1])
@@ -100,8 +81,11 @@ def segment_jitter(com_speech_sort, voiced_yes, voiced_no, jitter_frames, audio_
jitter_frames[idx] = jitter
return jitter_frames
def calc_jitter(video_uri, audio_file, out_loc, fl_name, r_config):
def calc_jitter(
video_uri, audio_file, out_loc, fl_name, r_config, save=True, ff_df=None
):
"""
Preparing jitter matrix
Args:
@@ -110,50 +94,73 @@ def calc_jitter(video_uri, audio_file, out_loc, fl_name, r_config):
r_config: config.config_raw_feature.pyConfigFeatureNmReader object
"""
dir_path = os.path.join(out_loc, ff_dir)
if os.path.isdir(dir_path):
voice_seg = segment_pitch(dir_path, r_config)
if os.path.isdir(dir_path) or ff_df is not None:
if ff_df is not None:
voice_seg = ut.process_segment_pitch(ff_df, r_config)
else:
voice_seg = ut.segment_pitch(dir_path, r_config, ff_df=ff_df)
jitter_frames = [np.NaN] * len(voice_seg[0])
jitter_segment_frames = segment_jitter(voice_seg[0], voice_seg[1], voice_seg[2], jitter_frames, audio_file)
jitter_segment_frames = segment_jitter(
voice_seg[0], voice_seg[1], voice_seg[2], jitter_frames, audio_file
)
df_jitter = pd.DataFrame(jitter_segment_frames, columns=[r_config.aco_jitter])
df_jitter[r_config.err_reason] = 'Pass'# will replace with threshold in future release
df_jitter['Frames'] = df_jitter.index
df_jitter['dbm_master_url'] = video_uri
logger.info('Processing Output file {} '.format(out_loc))
ut.save_output(df_jitter, out_loc, fl_name, jitter_dir, csv_ext)
df_jitter[
r_config.err_reason
] = "Pass" # will replace with threshold in future release
df_jitter["Frames"] = df_jitter.index
df_jitter["dbm_master_url"] = video_uri
if save:
logger.info("Processing Output file {} ".format(out_loc))
ut.save_output(df_jitter, out_loc, fl_name, jitter_dir, csv_ext)
df = df_jitter
else:
error_txt = 'error: fundamental freq not available'
empty_jitter(video_uri, out_loc, fl_name, r_config, error_txt)
def run_jitter(video_uri, out_dir, r_config):
error_txt = "error: fundamental freq not available"
df = empty_jitter(video_uri, out_loc, fl_name, r_config, error_txt, save=save)
return df
def run_jitter(video_uri, out_dir, r_config, save=True, ff_df=None):
"""
Processing all patient's videos for fetching jitter
-------------------
-------------------
Args:
video_uri: video path; r_config: raw variable config object
out_dir: (str) Output directory for processed output
out_dir: (str) Output directory for processed output
"""
try:
input_loc, out_loc, fl_name = ut.filter_path(video_uri, out_dir)
aud_filter = glob.glob(join(input_loc, fl_name + '.wav'))
if len(aud_filter)>0:
aud_filter = glob.glob(join(input_loc, fl_name + ".wav"))
if len(aud_filter) > 0:
audio_file = aud_filter[0]
aud_dur = librosa.get_duration(filename=audio_file)
aud_dur = ut.get_length(audio_file)
if float(aud_dur) < 0.064:
logger.info('Output file {} size is less than 0.064sec'.format(audio_file))
logger.info(
"Output file {} size is less than 0.064sec".format(audio_file)
)
error_txt = 'error: length less than 0.064'
empty_jitter(video_uri, out_loc, fl_name, r_config, error_txt)
return
calc_jitter(video_uri, audio_file, out_loc, fl_name, r_config)
error_txt = "error: length less than 0.064"
df = empty_jitter(
video_uri, out_loc, fl_name, r_config, error_txt, save=save
)
else:
df = calc_jitter(
video_uri,
audio_file,
out_loc,
fl_name,
r_config,
save=save,
ff_df=ff_df,
)
return df
except Exception as e:
logger.error('Failed to process audio file')
logger.error("Error in jitter: {}".format(e))
logger.error("Failed to process audio file")

View File

@@ -4,40 +4,73 @@ project_name: DBM
created: 2020-20-07
"""
import pandas as pd
import os
import glob
import parselmouth
import librosa
import numpy as np
import librosa
from os.path import join
import logging
import os
from os.path import join
import numpy as np
import pandas as pd
import parselmouth
from opendbm.dbm_lib.dbm_features.raw_features.util import util as ut
logging.basicConfig(level=logging.INFO)
logger=logging.getLogger()
logger = logging.getLogger()
mfcc_dir = 'acoustic/mfcc'
csv_ext = '_mfcc.csv'
error_txt = 'error: length less than 0.064'
mfcc_dir = "acoustic/mfcc"
csv_ext = "_mfcc.csv"
error_txt = "error: length less than 0.064"
def empty_mfcc(video_uri, out_loc, fl_name, r_config, save=True):
def empty_mfcc(video_uri, out_loc, fl_name, r_config):
"""
Preparing empty empty_mfcc matrix if something fails
"""
cols = ['Frames', r_config.aco_mfcc1, r_config.aco_mfcc2, r_config.aco_mfcc3, r_config.aco_mfcc4, r_config.aco_mfcc5,
r_config.aco_mfcc6, r_config.aco_mfcc7, r_config.aco_mfcc8, r_config.aco_mfcc9, r_config.aco_mfcc10,
r_config.aco_mfcc11, r_config.aco_mfcc12, r_config.err_reason]
out_val = [[np.nan, np.nan, np.nan, np.nan, np.nan, np.nan, np.nan, np.nan, np.nan, np.nan, np.nan, np.nan, np.nan,
error_txt]]
df_mfcc = pd.DataFrame(out_val, columns = cols)
df_mfcc['dbm_master_url'] = video_uri
logger.info('Saving Output file {} '.format(out_loc))
ut.save_output(df_mfcc, out_loc, fl_name, mfcc_dir, csv_ext)
cols = [
"Frames",
r_config.aco_mfcc1,
r_config.aco_mfcc2,
r_config.aco_mfcc3,
r_config.aco_mfcc4,
r_config.aco_mfcc5,
r_config.aco_mfcc6,
r_config.aco_mfcc7,
r_config.aco_mfcc8,
r_config.aco_mfcc9,
r_config.aco_mfcc10,
r_config.aco_mfcc11,
r_config.aco_mfcc12,
r_config.err_reason,
]
out_val = [
[
np.nan,
np.nan,
np.nan,
np.nan,
np.nan,
np.nan,
np.nan,
np.nan,
np.nan,
np.nan,
np.nan,
np.nan,
np.nan,
error_txt,
]
]
df_mfcc = pd.DataFrame(out_val, columns=cols)
df_mfcc["dbm_master_url"] = video_uri
if save:
logger.info("Saving Output file {} ".format(out_loc))
ut.save_output(df_mfcc, out_loc, fl_name, mfcc_dir, csv_ext)
return df_mfcc
def audio_mfcc(path):
"""
@@ -48,12 +81,13 @@ def audio_mfcc(path):
(list) list of mfccs for each voice frame
"""
sound = parselmouth.Sound(path)
mfcc_object = sound.to_mfcc(time_step=.001,number_of_coefficients=12)
mfcc_object = sound.to_mfcc(time_step=0.001, number_of_coefficients=12)
mfccs = mfcc_object.to_array()
mfccs = np.delete(mfccs, (0), axis=0)
return mfccs
def calc_mfcc(video_uri, audio_file, out_loc, fl_name, r_config):
def calc_mfcc(video_uri, audio_file, out_loc, fl_name, r_config, save=True):
"""
Preparing mfcc matrix
Args:
@@ -64,42 +98,50 @@ def calc_mfcc(video_uri, audio_file, out_loc, fl_name, r_config):
"""
dict_ = {}
mfccs = audio_mfcc(audio_file)
for i in range(1,13):
conf_str = r_config.base_raw['raw_feature']
dict_[conf_str['aco_mfcc' + str(i)]] = mfccs[i-1, :]
for i in range(1, 13):
conf_str = r_config.base_raw["raw_feature"]
dict_[conf_str["aco_mfcc" + str(i)]] = mfccs[i - 1, :]
df = pd.DataFrame(dict_)
df['Frames'] = df.index
df[r_config.err_reason] = 'Pass'# may replace based on threshold in future release
df['dbm_master_url'] = video_uri
ut.save_output(df, out_loc, fl_name, mfcc_dir, csv_ext)
def run_mfcc(video_uri, out_dir, r_config):
df["Frames"] = df.index
df[r_config.err_reason] = "Pass" # may replace based on threshold in future release
df["dbm_master_url"] = video_uri
if save:
logger.info("Saving Output file {} ".format(out_loc))
ut.save_output(df, out_loc, fl_name, mfcc_dir, csv_ext)
return df
def run_mfcc(video_uri, out_dir, r_config, save=True):
"""
Processing all patients to fetch mfccs
Args:
video_uri: video path; r_config: raw variable config object
out_dir: (str) Output directory for processed output
out_dir: (str) Output directory for processed output
"""
try:
input_loc, out_loc, fl_name = ut.filter_path(video_uri, out_dir)
aud_filter = glob.glob(join(input_loc, fl_name + '.wav'))
if len(aud_filter)>0:
aud_filter = glob.glob(join(input_loc, fl_name + ".wav"))
if len(aud_filter) > 0:
audio_file = aud_filter[0]
aud_dur = librosa.get_duration(filename=audio_file)
aud_dur = ut.get_length(audio_file)
if float(aud_dur) < 0.064:
logger.info('Output file {} size is less than 0.064sec'.format(audio_file))
logger.info(
"Output file {} size is less than 0.064sec".format(audio_file)
)
empty_mfcc(video_uri, out_loc, fl_name, r_config)
return
return empty_mfcc(video_uri, out_loc, fl_name, r_config, save=save)
calc_mfcc(video_uri, audio_file, out_loc, fl_name, r_config)
return calc_mfcc(
video_uri, audio_file, out_loc, fl_name, r_config, save=save
)
except Exception as e:
logger.error('Failed to process audio file')
e
logger.error("Failed to process audio file")

View File

@@ -4,23 +4,25 @@ project_name: DBM
created: 2020-20-07
"""
import os
import glob
from pydub import AudioSegment
import librosa
import pandas as pd
import numpy as np
import webrtcvad
from os.path import join
import logging
import os
from os.path import join
from opendbm.dbm_lib.dbm_features.raw_features.util import vad_utilities as vu, util as ut
import numpy as np
import pandas as pd
import webrtcvad
from pydub import AudioSegment
from opendbm.dbm_lib.dbm_features.raw_features.util import util as ut
from opendbm.dbm_lib.dbm_features.raw_features.util import vad_utilities as vu
logging.basicConfig(level=logging.INFO)
logger=logging.getLogger()
logger = logging.getLogger()
pause_seg_dir = "acoustic/pause_segment"
csv_ext = "_pausechar.csv"
pause_seg_dir = 'acoustic/pause_segment'
csv_ext = '_pausechar.csv'
def get_timing_cues(seg_starts_sec, seg_ends_sec, r_config):
"""
@@ -35,25 +37,27 @@ def get_timing_cues(seg_starts_sec, seg_ends_sec, r_config):
speaking_time = np.sum(np.asarray(seg_ends_sec) - np.asarray(seg_starts_sec))
num_pauses = len(seg_starts_sec) - 1
pause_len = np.zeros(num_pauses)
for p in range(num_pauses):
pause_len[p] = seg_starts_sec[p+1] - seg_ends_sec[p]
if len(pause_len)>0:
pause_len_mean = np.mean(pause_len)
pause_len_std = np.std(pause_len)
pause_len[p] = seg_starts_sec[p + 1] - seg_ends_sec[p]
if len(pause_len) > 0:
pause_time = np.sum(pause_len)
else:
pause_len_mean = 0
pause_len_std = 0
pause_time = 0
pause_frac = pause_time / total_time
timing_dict = {r_config.aco_totaltime: total_time, r_config.aco_speakingtime: speaking_time,
r_config.aco_numpauses: num_pauses, r_config.aco_pausetime: pause_time, r_config.aco_pausefrac: pause_frac}
timing_dict = {
r_config.aco_totaltime: total_time,
r_config.aco_speakingtime: speaking_time,
r_config.aco_numpauses: num_pauses,
r_config.aco_pausetime: pause_time,
r_config.aco_pausefrac: pause_frac,
}
return timing_dict
def process_silence(audio_file, r_config):
"""
Returns dataframe for pause between words using voice activity detection
@@ -64,64 +68,80 @@ def process_silence(audio_file, r_config):
"""
feat_dict_list = []
y, sr = vu.read_wave(audio_file)
# 3 is most aggressive (splits most), 0 least (better for low snr)
aggressiveness = 3
frame_dur_ms = 20
#pause segment(long & short pad)
# pause segment(long & short pad)
long_pad_around_voice_ms = 200
short_pad_around_voice_ms = 100
if len(y)>0:
if len(y) > 0:
vad = webrtcvad.Vad(aggressiveness)
frames = vu.frame_generator(frame_dur_ms, y, sr)
frames = list(frames)
#longer pad time screens out little blips, but misses short silences
long_seg_starts, long_seg_ends = vu.vad_get_segment_times(sr, frame_dur_ms, long_pad_around_voice_ms, vad, frames)
#Logic to handle blank audio file
# longer pad time screens out little blips, but misses short silences
long_seg_starts, long_seg_ends = vu.vad_get_segment_times(
sr, frame_dur_ms, long_pad_around_voice_ms, vad, frames
)
# Logic to handle blank audio file
if len(long_seg_starts) == 0 or len(long_seg_ends) == 0:
return ''
return ""
t_start = long_seg_starts[0]
t_end = long_seg_ends[-1]
# shorter pad time captures short silences (but misfires on little blips)
short_seg_starts, short_seg_ends = vu.vad_get_segment_times(sr, frame_dur_ms, short_pad_around_voice_ms, vad, frames)
short_seg_starts, short_seg_ends = vu.vad_get_segment_times(
sr, frame_dur_ms, short_pad_around_voice_ms, vad, frames
)
seg_starts = []
seg_ends = []
for k in range(len(short_seg_starts)): # logic to clean up some typical misfires
if (short_seg_starts[k] >=t_start) and (short_seg_starts[k] <= t_end):
for k in range(
len(short_seg_starts)
): # logic to clean up some typical misfires
if (short_seg_starts[k] >= t_start) and (short_seg_starts[k] <= t_end):
seg_starts.append(short_seg_starts[k])
seg_ends.append(short_seg_ends[k])
if len(seg_starts) == 0 or len(seg_ends) == 0:
return ''
return ""
timing_dict = get_timing_cues(seg_starts, seg_ends, r_config)
feat_dict_list.append(timing_dict)
df = pd.DataFrame(feat_dict_list)
df[r_config.err_reason] = 'Pass'# will replace with threshold in future release
df[r_config.err_reason] = "Pass" # will replace with threshold in future release
return df
def empty_pause_segment(video_uri, out_loc, fl_name, r_config, error_txt):
def empty_pause_segment(video_uri, out_loc, fl_name, r_config, error_txt, save=True):
"""
Preparing empty Pause Segment matrix if something fails
"""
cols = [r_config.aco_totaltime, r_config.aco_speakingtime, r_config.aco_numpauses, r_config.aco_pausetime,
r_config.aco_pausefrac, r_config.err_reason]
cols = [
r_config.aco_totaltime,
r_config.aco_speakingtime,
r_config.aco_numpauses,
r_config.aco_pausetime,
r_config.aco_pausefrac,
r_config.err_reason,
]
out_val = [[np.nan, np.nan, np.nan, np.nan, np.nan, error_txt]]
df_pause = pd.DataFrame(out_val, columns = cols)
df_pause['dbm_master_url'] = video_uri
logger.info('Saving Output file {} '.format(out_loc))
ut.save_output(df_pause, out_loc, fl_name, pause_seg_dir, csv_ext)
df_pause = pd.DataFrame(out_val, columns=cols)
df_pause["dbm_master_url"] = video_uri
def run_pause_segment(video_uri, out_dir, r_config):
if save:
logger.info("Saving Output file {} ".format(out_loc))
ut.save_output(df_pause, out_loc, fl_name, pause_seg_dir, csv_ext)
return df_pause
def run_pause_segment(video_uri, out_dir, r_config, save=True):
"""
Processing all patient's for getting Pause Segment
---------------
@@ -131,41 +151,50 @@ def run_pause_segment(video_uri, out_dir, r_config):
out_dir: (str) Output directory for processed output
"""
try:
input_loc, out_loc, fl_name = ut.filter_path(video_uri, out_dir)
aud_filter = glob.glob(join(input_loc, fl_name + '.wav'))
if len(aud_filter)>0:
aud_filter = glob.glob(join(input_loc, fl_name + ".wav"))
if len(aud_filter) > 0:
audio_file = aud_filter[0]
aud_dur = librosa.get_duration(filename=audio_file)
aud_dur = ut.get_length(audio_file)
if float(aud_dur) < 0.064:
logger.info('Output file {} size is less than 0.064sec'.format(audio_file))
logger.info(
"Output file {} size is less than 0.064sec".format(audio_file)
)
error_txt = 'error: length less than 0.064'
error_txt = "error: length less than 0.064"
empty_pause_segment(video_uri, out_loc, fl_name, r_config, error_txt)
return
logger.info('Converting stereo sound to mono-lD')
logger.info("Converting stereo sound to mono-lD")
sound_mono = AudioSegment.from_wav(audio_file)
sound_mono = sound_mono.set_channels(1)
sound_mono = sound_mono.set_frame_rate(48000)
mono_wav = os.path.join(input_loc, fl_name + '_mono.wav')
mono_wav = os.path.join(input_loc, fl_name + "_mono.wav")
sound_mono.export(mono_wav, format="wav")
df_pause_seg = process_silence(mono_wav, r_config)
os.remove(mono_wav)#removing mono wav file
os.remove(mono_wav) # removing mono wav file
if isinstance(df_pause_seg, pd.DataFrame) and len(df_pause_seg)>0:
logger.info('Processing Output file {} '.format(out_loc))
df_pause_seg['dbm_master_url'] = video_uri
ut.save_output(df_pause_seg, out_loc, fl_name, pause_seg_dir, csv_ext)
if isinstance(df_pause_seg, pd.DataFrame) and len(df_pause_seg) > 0:
df_pause_seg["dbm_master_url"] = video_uri
if save:
logger.info("Processing Output file {} ".format(out_loc))
ut.save_output(
df_pause_seg, out_loc, fl_name, pause_seg_dir, csv_ext
)
df = df_pause_seg
else:
error_txt = 'error: webrtcvad returns no segment'
empty_pause_segment(video_uri, out_loc, fl_name, r_config, error_txt)
error_txt = "error: webrtcvad returns no segment"
df = empty_pause_segment(
video_uri, out_loc, fl_name, r_config, error_txt, save=save
)
return df
except Exception as e:
logger.error('Failed to process audio file')
e
logger.error("Failed to process audio file", str(e))

View File

@@ -4,23 +4,24 @@ project_name: DBM
created: 2020-20-07
"""
import pandas as pd
import os
import glob
import parselmouth
import librosa
import numpy as np
from os.path import join
import logging
import os
from os.path import join
import numpy as np
import pandas as pd
import parselmouth
from opendbm.dbm_lib.dbm_features.raw_features.util import util as ut
logging.basicConfig(level=logging.INFO)
logger=logging.getLogger()
logger = logging.getLogger()
ff_dir = "acoustic/pitch"
csv_ext = "_pitch.csv"
error_txt = "error: length less than 0.064"
ff_dir = 'acoustic/pitch'
csv_ext = '_pitch.csv'
error_txt = 'error: length less than 0.064'
def audio_pitch(path):
"""
@@ -31,12 +32,13 @@ def audio_pitch(path):
(list) list of pitch/fundamental frequency for each voice frame
"""
sound_pat = parselmouth.Sound(path)
pitch = sound_pat.to_pitch(time_step=.001)
pitch_values = pitch.selected_array['frequency']
pitch = sound_pat.to_pitch(time_step=0.001)
pitch_values = pitch.selected_array["frequency"]
return list(pitch_values)
def label_speech(row,fd_freq):
def label_speech(row, fd_freq):
"""
identify whether frame is voiced or not
Args:
@@ -44,13 +46,14 @@ def label_speech(row,fd_freq):
Returns:
(str) yes or no indicator for voice
"""
if row[fd_freq] > 0 :
return 'yes'
if row[fd_freq] > 0:
return "yes"
else:
return 'no'
return "no"
def calc_pitch(video_uri, audio_file, out_loc, fl_name, r_config, save=True):
def calc_pitch(video_uri, audio_file, out_loc, fl_name, r_config):
"""
Preparing pitch frequency matrix
Args:
@@ -61,30 +64,47 @@ def calc_pitch(video_uri, audio_file, out_loc, fl_name, r_config):
ff_frames = audio_pitch(audio_file)
df_ffreq = pd.DataFrame(ff_frames, columns=[r_config.aco_ff])
df_ffreq['Frames'] = df_ffreq.index
df_ffreq[r_config.aco_voiceLabel] = df_ffreq.apply(lambda row: label_speech(row, r_config.aco_ff),axis=1)
df_ffreq[r_config.err_reason] = 'Pass'# will replace with threshold in future release
df_ffreq['dbm_master_url'] = video_uri
logger.info('Processing Output file {} '.format(out_loc))
ut.save_output(df_ffreq, out_loc, fl_name, ff_dir, csv_ext)
def empty_pitch(video_uri, out_loc, fl_name, r_config):
df_ffreq["Frames"] = df_ffreq.index
df_ffreq[r_config.aco_voiceLabel] = df_ffreq.apply(
lambda row: label_speech(row, r_config.aco_ff), axis=1
)
df_ffreq[
r_config.err_reason
] = "Pass" # will replace with threshold in future release
df_ffreq["dbm_master_url"] = video_uri
if save:
logger.info("Processing Output file {} ".format(out_loc))
ut.save_output(df_ffreq, out_loc, fl_name, ff_dir, csv_ext)
return df_ffreq
def empty_pitch(video_uri, out_loc, fl_name, r_config, save=True):
"""
Preparing empty pitch frequency matrix if something fails
"""
df_ffreq = pd.DataFrame([[np.nan, np.nan, 'no', error_txt]],
columns=['Frames', r_config.aco_ff, r_config.aco_voiceLabel, r_config.err_reason])
df_ffreq['dbm_master_url'] = video_uri
logger.info('Saving Output file {} '.format(out_loc))
ut.save_output(df_ffreq, out_loc, fl_name, ff_dir, csv_ext)
def run_pitch(video_uri, out_dir, r_config):
df_ffreq = pd.DataFrame(
[[np.nan, np.nan, "no", error_txt]],
columns=[
"Frames",
r_config.aco_ff,
r_config.aco_voiceLabel,
r_config.err_reason,
],
)
df_ffreq["dbm_master_url"] = video_uri
if save:
logger.info("Saving Output file {} ".format(out_loc))
ut.save_output(df_ffreq, out_loc, fl_name, ff_dir, csv_ext)
return df_ffreq
def run_pitch(video_uri, out_dir, r_config, save=True):
"""
Processing audio for fetching pitch
-------------------
@@ -94,20 +114,26 @@ def run_pitch(video_uri, out_dir, r_config):
out_dir: (str) Output directory for processed output
"""
try:
input_loc, out_loc, fl_name = ut.filter_path(video_uri, out_dir)
aud_filter = glob.glob(join(input_loc, fl_name + '.wav'))
if len(aud_filter)>0:
aud_filter = glob.glob(join(input_loc, fl_name + ".wav"))
if len(aud_filter) > 0:
audio_file = aud_filter[0]
aud_dur = librosa.get_duration(filename=audio_file)
aud_dur = ut.get_length(audio_file)
if float(aud_dur) < 0.064:
logger.info('Output file {} size is less than 0.064sec'.format(audio_file))
logger.info(
"Output file {} size is less than 0.064sec".format(audio_file)
)
empty_pitch(video_uri, out_loc, fl_name, r_config)
return
df = empty_pitch(video_uri, out_loc, fl_name, r_config, save=save)
else:
df = calc_pitch(
video_uri, audio_file, out_loc, fl_name, r_config, save=save
)
return df
calc_pitch(video_uri, audio_file, out_loc, fl_name, r_config)
except Exception as e:
logger.error('Failed to process audio file')
e
logger.error("Failed to process audio file")

View File

@@ -4,26 +4,25 @@ project_name: DBM
created: 2020-20-07
"""
import pandas as pd
import numpy as np
import os
import glob
import parselmouth
import librosa
import numpy as np
import more_itertools as mit
import logging
import os
from os.path import join
import logging
import more_itertools as mit
import numpy as np
import pandas as pd
import parselmouth
from opendbm.dbm_lib.dbm_features.raw_features.util import util as ut
logging.basicConfig(level=logging.INFO)
logger=logging.getLogger()
logger = logging.getLogger()
shimmer_dir = "acoustic/shimmer"
ff_dir = "acoustic/pitch"
csv_ext = "_shimmer.csv"
shimmer_dir = 'acoustic/shimmer'
ff_dir = 'acoustic/pitch'
csv_ext = '_shimmer.csv'
def audio_shimmer(sound):
"""
@@ -33,61 +32,43 @@ def audio_shimmer(sound):
Returns:
(list) list of shimmers for each voice frame
"""
pointProcess = parselmouth.praat.call(sound, "To PointProcess (periodic, cc)...", 80, 500)
shimmer = parselmouth.praat.call([sound, pointProcess], "Get shimmer (local)", 0, 0, 0.0001, 0.02, 1.3, 1.6)
pointProcess = parselmouth.praat.call(
sound, "To PointProcess (periodic, cc)...", 80, 500
)
shimmer = parselmouth.praat.call(
[sound, pointProcess], "Get shimmer (local)", 0, 0, 0.0001, 0.02, 1.3, 1.6
)
return shimmer
def empty_shimmer(video_uri, out_loc, fl_name, r_config, error_txt):
def empty_shimmer(video_uri, out_loc, fl_name, r_config, error_txt, save=True):
"""
Preparing empty shimmer matrix if something fails
"""
cols = ['Frames', r_config.aco_shimmer, r_config.err_reason]
cols = ["Frames", r_config.aco_shimmer, r_config.err_reason]
out_val = [[np.nan, np.nan, error_txt]]
df_shimmer = pd.DataFrame(out_val, columns = cols)
df_shimmer['dbm_master_url'] = video_uri
logger.info('Saving Output file {} '.format(out_loc))
ut.save_output(df_shimmer, out_loc, fl_name, shimmer_dir, csv_ext)
def segment_pitch(dir_path, r_config):
"""
segmenting pitch freq for each voice segment
"""
com_speech_sort, voiced_yes, voiced_no = ([], ) * 3
for file in os.listdir(dir_path):
try:
if file.endswith('_pitch.csv'):
ff_df = pd.read_csv((dir_path+'/'+file))
voice_label = ff_df[r_config.aco_voiceLabel]
indices_yes = [i for i, x in enumerate(voice_label) if x == "yes"]
voiced_yes = [list(group) for group in mit.consecutive_groups(indices_yes)]
indices_no = [i for i, x in enumerate(voice_label) if x == "no"]
voiced_no = [list(group) for group in mit.consecutive_groups(indices_no)]
com_speech = voiced_yes + voiced_no
com_speech_sort = sorted(com_speech, key=lambda x: x[0])
except:
pass
return com_speech_sort, voiced_yes, voiced_no
df_shimmer = pd.DataFrame(out_val, columns=cols)
df_shimmer["dbm_master_url"] = video_uri
if save:
logger.info("Saving Output file {} ".format(out_loc))
ut.save_output(df_shimmer, out_loc, fl_name, shimmer_dir, csv_ext)
return df_shimmer
def segment_shimmer(com_speech_sort, voiced_yes, voiced_no, shimmer_frames, audio_file):
"""
calculating shimmer for each voice segment
"""
snd = parselmouth.Sound(audio_file)
pitch = snd.to_pitch(time_step=.001)
pitch = snd.to_pitch(time_step=0.001)
for idx, vs in enumerate(com_speech_sort):
try:
shimmer = np.NaN
if vs in voiced_yes and len(vs)>1:
if vs in voiced_yes and len(vs) > 1:
start_time = pitch.get_time_from_frame_number(vs[0])
end_time = pitch.get_time_from_frame_number(vs[-1])
@@ -101,8 +82,11 @@ def segment_shimmer(com_speech_sort, voiced_yes, voiced_no, shimmer_frames, audi
shimmer_frames[idx] = shimmer
return shimmer_frames
def calc_shimmer(video_uri, audio_file, out_loc, fl_name, r_config):
def calc_shimmer(
video_uri, audio_file, out_loc, fl_name, r_config, save=True, ff_df=None
):
"""
Preparing shimmer matrix
Args:
@@ -111,50 +95,72 @@ def calc_shimmer(video_uri, audio_file, out_loc, fl_name, r_config):
r_config: config.config_raw_feature.pyConfigFeatureNmReader object
"""
dir_path = os.path.join(out_loc, ff_dir)
if os.path.isdir(dir_path):
voice_seg = segment_pitch(dir_path, r_config)
if os.path.isdir(dir_path) or ff_df is not None:
if ff_df is not None:
voice_seg = ut.process_segment_pitch(ff_df, r_config)
else:
voice_seg = ut.segment_pitch(dir_path, r_config, ff_df=ff_df)
shimmer_frames = [np.NaN] * len(voice_seg[0])
shimmer_segment_frames = segment_shimmer(voice_seg[0], voice_seg[1], voice_seg[2], shimmer_frames, audio_file)
df_shimmer = pd.DataFrame(shimmer_segment_frames, columns=[r_config.aco_shimmer])
df_shimmer[r_config.err_reason] = 'Pass'# will replace with threshold in future release
df_shimmer['Frames'] = df_shimmer.index
df_shimmer['dbm_master_url'] = video_uri
logger.info('Processing Output file {} '.format(out_loc))
ut.save_output(df_shimmer, out_loc, fl_name, shimmer_dir, csv_ext)
shimmer_segment_frames = segment_shimmer(
voice_seg[0], voice_seg[1], voice_seg[2], shimmer_frames, audio_file
)
df_shimmer = pd.DataFrame(
shimmer_segment_frames, columns=[r_config.aco_shimmer]
)
df_shimmer[
r_config.err_reason
] = "Pass" # will replace with threshold in future release
df_shimmer["Frames"] = df_shimmer.index
df_shimmer["dbm_master_url"] = video_uri
if save:
logger.info("Processing Output file {} ".format(out_loc))
ut.save_output(df_shimmer, out_loc, fl_name, shimmer_dir, csv_ext)
df = df_shimmer
else:
error_txt = 'error: fundamental freq not available'
empty_shimmer(video_uri, out_loc, fl_name, r_config, error_txt)
def run_shimmer(video_uri, out_dir, r_config):
error_txt = "error: fundamental freq not available"
df = empty_shimmer(video_uri, out_loc, fl_name, r_config, error_txt, save=save)
return df
def run_shimmer(video_uri, out_dir, r_config, save=True, ff_df=None):
"""
Processing all patients to fetch shimmer
---------------
---------------
Args:
video_uri: video path; r_config: raw variable config object
out_dir: (str) Output directory for processed output
out_dir: (str) Output directory for processed output
"""
try:
input_loc, out_loc, fl_name = ut.filter_path(video_uri, out_dir)
aud_filter = glob.glob(join(input_loc, fl_name + '.wav'))
if len(aud_filter)>0:
# try:
audio_file = aud_filter[0]
aud_dur = librosa.get_duration(filename=audio_file)
input_loc, out_loc, fl_name = ut.filter_path(video_uri, out_dir)
aud_filter = glob.glob(join(input_loc, fl_name + ".wav"))
if len(aud_filter) > 0:
if float(aud_dur) < 0.064:
logger.info('Output file {} size is less than 0.064sec'.format(audio_file))
audio_file = aud_filter[0]
aud_dur = ut.get_length(audio_file)
error_txt = 'error: length less than 0.064'
empty_shimmer(video_uri, out_loc, fl_name, r_config, error_txt)
return
if float(aud_dur) < 0.064:
logger.info("Output file {} size is less than 0.064sec".format(audio_file))
calc_shimmer(video_uri, audio_file, out_loc, fl_name, r_config)
except Exception as e:
logger.error('Failed to process audio file')
error_txt = "error: length less than 0.064"
df = empty_shimmer(
video_uri, out_loc, fl_name, r_config, error_txt, save=save
)
else:
df = calc_shimmer(
video_uri,
audio_file,
out_loc,
fl_name,
r_config,
save=save,
ff_df=ff_df,
)
return df
# except Exception as e:
# logger.error('Error in shimmer: {}'.format(e))
# logger.error('Failed to process audio file')

View File

@@ -4,85 +4,100 @@ project_name: DBM
created: 2020-20-07
"""
import parselmouth
import pandas as pd
import numpy as np
import glob
import librosa
from os.path import join
import logging
from os.path import join
import numpy as np
import pandas as pd
import parselmouth
from opendbm.dbm_lib.dbm_features.raw_features.util import util as ut
logging.basicConfig(level=logging.INFO)
logger=logging.getLogger()
logger = logging.getLogger()
vfs_dir = "acoustic/voice_frame_score"
csv_ext = "_voiceprev.csv"
error_txt = "error: length less than 0.064"
vfs_dir = 'acoustic/voice_frame_score'
csv_ext = '_voiceprev.csv'
error_txt = 'error: length less than 0.064'
def audio_pitch_frame(pitch):
"""
Computing total number of speech and participant voiced frames
Args:
pitch: speech pitch
Returns:
(float) total voice frames and participant voiced frames
Computing total number of speech and participant voiced frames
Args:
pitch: speech pitch
Returns:
(float) total voice frames and participant voiced frames
"""
total_frames = pitch.get_number_of_frames()
voiced_frames = pitch.count_voiced_frames()
return total_frames, voiced_frames
def voice_segment(path):
"""
Using parselmouth library for fundamental frequency
Args:
path: (.wav) audio file location
Returns:
(float) total voice frames, participant voiced frames and voiced frames percentage
Using parselmouth library for fundamental frequency
Args:
path: (.wav) audio file location
Returns:
(float) total voice frames, participant voiced frames and voiced frames percentage
"""
sound_pat = parselmouth.Sound(path)
pitch = sound_pat.to_pitch()
total_frames,voiced_frames = audio_pitch_frame(pitch)
voiced_percentage = (voiced_frames/total_frames)*100
total_frames, voiced_frames = audio_pitch_frame(pitch)
voiced_percentage = (voiced_frames / total_frames) * 100
return voiced_percentage, voiced_frames, total_frames
def calc_vfs(video_uri, audio_file, out_loc, fl_name, r_config):
def calc_vfs(video_uri, audio_file, out_loc, fl_name, r_config, save=True):
"""
creating dataframe matrix for voice frame score
Args:
audio_file: Audio file path
new_out_base_dir: AWS instance output base directory path
f_nm_config: Config file object
creating dataframe matrix for voice frame score
Args:
audio_file: Audio file path
new_out_base_dir: AWS instance output base directory path
f_nm_config: Config file object
"""
voice_percentage,voiced_frames, total_frames = voice_segment(audio_file)
voice_percentage, voiced_frames, total_frames = voice_segment(audio_file)
df_vfs = pd.DataFrame([voiced_frames], columns=[r_config.aco_voiceFrame])
df_vfs[r_config.aco_totVoiceFrame] = [total_frames]
df_vfs[r_config.aco_voicePct] = [voice_percentage]
df_vfs[r_config.err_reason] = 'Pass'# will replace with threshold in future release
df_vfs['Frames'] = df_vfs.index
df_vfs['dbm_master_url'] = video_uri
logger.info('Saving Output file {} '.format(out_loc))
ut.save_output(df_vfs, out_loc, fl_name, vfs_dir, csv_ext)
def empty_vfs(video_uri, out_loc, fl_name, r_config):
df_vfs[
r_config.err_reason
] = "Pass" # will replace with threshold in future release
df_vfs["Frames"] = df_vfs.index
df_vfs["dbm_master_url"] = video_uri
if save:
logger.info("Saving Output file {} ".format(out_loc))
ut.save_output(df_vfs, out_loc, fl_name, vfs_dir, csv_ext)
return df_vfs
def empty_vfs(video_uri, out_loc, fl_name, r_config, save=True):
"""
Preparing empty VFS matrix if something fails
"""
cols = ['Frames', r_config.aco_voiceFrame, r_config.aco_totVoiceFrame, r_config.aco_voicePct, r_config.err_reason]
cols = [
"Frames",
r_config.aco_voiceFrame,
r_config.aco_totVoiceFrame,
r_config.aco_voicePct,
r_config.err_reason,
]
out_val = [[np.nan, np.nan, np.nan, np.nan, error_txt]]
df_vfs = pd.DataFrame(out_val, columns = cols)
df_vfs['dbm_master_url'] = video_uri
logger.info('Saving Output file {} '.format(out_loc))
ut.save_output(df_vfs, out_loc, fl_name, vfs_dir, csv_ext)
df_vfs = pd.DataFrame(out_val, columns=cols)
df_vfs["dbm_master_url"] = video_uri
if save:
logger.info("Saving Output file {} ".format(out_loc))
ut.save_output(df_vfs, out_loc, fl_name, vfs_dir, csv_ext)
return df_vfs
def run_vfs(video_uri, out_dir, r_config):
def run_vfs(video_uri, out_dir, r_config, save=True):
"""
Processing all participants for fetching voice frame score
---------------
@@ -92,20 +107,25 @@ def run_vfs(video_uri, out_dir, r_config):
out_dir: (str) Output directory for processed output
"""
try:
input_loc, out_loc, fl_name = ut.filter_path(video_uri, out_dir)
aud_filter = glob.glob(join(input_loc, fl_name + '.wav'))
if len(aud_filter)>0:
aud_filter = glob.glob(join(input_loc, fl_name + ".wav"))
if len(aud_filter) > 0:
audio_file = aud_filter[0]
aud_dur = librosa.get_duration(filename=audio_file)
aud_dur = ut.get_length(audio_file)
if float(aud_dur) < 0.064:
logger.info('Output file {} size is less than 0.064sec'.format(audio_file))
logger.info(
"Output file {} size is less than 0.064sec".format(audio_file)
)
empty_vfs(video_uri, out_loc, fl_name, r_config)
return
calc_vfs(video_uri, audio_file, out_loc, fl_name, r_config)
df = empty_vfs(video_uri, out_loc, fl_name, r_config, save=save)
else:
df = calc_vfs(
video_uri, audio_file, out_loc, fl_name, r_config, save=save
)
return df
except Exception as e:
logger.error('Failed to process audio file')
e
logger.error("Failed to process audio file")