code refactoring only

This commit is contained in:
jordi.hasianta
2022-09-15 20:30:49 +07:00
parent c6cd54c376
commit 42455a1a2b
10 changed files with 954 additions and 653 deletions

View File

@@ -4,23 +4,23 @@ project_name: DBM
created: 2020-20-07
"""
import glob
import logging
from os.path import join
import numpy as np
import pandas as pd
import parselmouth
import numpy as np
import parselmouth
import librosa
import glob
from os.path import join
import logging
from opendbm.dbm_lib.dbm_features.raw_features.util import util as ut
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger()
formant_dir = 'acoustic/formant_freq'
csv_ext = '_formant.csv'
error_txt = 'error: length less than 0.064'
formant_dir = "acoustic/formant_freq"
csv_ext = "_formant.csv"
error_txt = "error: length less than 0.064"
def formant_list(formant, snd):
"""
@@ -53,6 +53,7 @@ def formant_list(formant,snd):
f4_list.append(f4)
return f1_list, f2_list, f3_list, f4_list
def formant_score(path):
"""
Using parselmouth library fetching Formant Frequency
@@ -62,11 +63,12 @@ def formant_score(path):
(list) list of Formant freq for each voice frame
"""
sound_pat = parselmouth.Sound(path)
formant = sound_pat.to_formant_burg(time_step=.001)
formant = sound_pat.to_formant_burg(time_step=0.001)
f_score = formant_list(formant, sound_pat)
return f_score
def calc_formant(video_uri, audio_file, out_loc, fl_name, r_config):
def calc_formant(video_uri, audio_file, out_loc, fl_name, r_config, save=True):
"""
Preparing Formant freq matrix
Args:
@@ -81,29 +83,44 @@ def calc_formant(video_uri, audio_file, out_loc, fl_name, r_config):
df_formant[r_config.aco_fm3] = f3_list
df_formant[r_config.aco_fm4] = f4_list
df_formant.replace('', np.nan, regex=True,inplace=True)
df_formant[r_config.err_reason] = 'Pass'# will replace with threshold in future release
df_formant.replace("", np.nan, regex=True, inplace=True)
df_formant[
r_config.err_reason
] = "Pass" # will replace with threshold in future release
df_formant['Frames'] = df_formant.index
df_formant['dbm_master_url'] = video_uri
df_formant["Frames"] = df_formant.index
df_formant["dbm_master_url"] = video_uri
logger.info('Saving Output file {} '.format(out_loc))
if save:
logger.info("Saving Output file {} ".format(out_loc))
ut.save_output(df_formant, out_loc, fl_name, formant_dir, csv_ext)
return df_formant
def empty_fm(video_uri, out_loc, fl_name, r_config):
def empty_fm(video_uri, out_loc, fl_name, r_config, save=True):
"""
Preparing empty formant frequency matrix if something fails
"""
cols = ['Frames', r_config.aco_fm1, r_config.aco_fm2, r_config.aco_fm3, r_config.aco_fm4, r_config.err_reason]
cols = [
"Frames",
r_config.aco_fm1,
r_config.aco_fm2,
r_config.aco_fm3,
r_config.aco_fm4,
r_config.err_reason,
]
out_val = [[np.nan, np.nan, np.nan, np.nan, np.nan, error_txt]]
df_fm = pd.DataFrame(out_val, columns=cols)
df_fm['dbm_master_url'] = video_uri
df_fm["dbm_master_url"] = video_uri
logger.info('Saving Output file {} '.format(out_loc))
if save:
logger.info("Saving Output file {} ".format(out_loc))
ut.save_output(df_fm, out_loc, fl_name, formant_dir, csv_ext)
return df_fm
def run_formant(video_uri, out_dir, r_config):
def run_formant(video_uri, out_dir, r_config, save=True):
"""
Processing all patient's for fetching Formant freq
@@ -116,18 +133,23 @@ def run_formant(video_uri, out_dir, r_config):
try:
input_loc, out_loc, fl_name = ut.filter_path(video_uri, out_dir)
aud_filter = glob.glob(join(input_loc, fl_name + '.wav'))
aud_filter = glob.glob(join(input_loc, fl_name + ".wav"))
if len(aud_filter) > 0:
audio_file = aud_filter[0]
aud_dur = librosa.get_duration(filename=audio_file)
aud_dur = ut.get_length(audio_file)
if float(aud_dur) < 0.064:
logger.info('Output file {} size is less than 0.064sec'.format(audio_file))
logger.info(
"Output file {} size is less than 0.064sec".format(audio_file)
)
empty_fm(video_uri, out_loc, fl_name, r_config)
return
calc_formant(video_uri, audio_file, out_loc, fl_name, r_config)
df = empty_fm(video_uri, out_loc, fl_name, r_config, save=save)
else:
df = calc_formant(
video_uri, audio_file, out_loc, fl_name, r_config, save=save
)
return df
except Exception as e:
logger.error('Failed to process audio file')
e
logger.error("Failed to process audio file")

View File

@@ -4,24 +4,25 @@ project_name: DBM
created: 2020-20-07
"""
import pandas as pd
import numpy as np
import os
import glob
import parselmouth
import librosa
import more_itertools as mit
from os.path import join
import logging
import os
from os.path import join
import more_itertools as mit
import numpy as np
import pandas as pd
import parselmouth
from opendbm.dbm_lib.dbm_features.raw_features.util import util as ut
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger()
gne_dir = 'acoustic/glottal_noise'
ff_dir = 'acoustic/pitch'
csv_ext = '_gne.csv'
gne_dir = "acoustic/glottal_noise"
ff_dir = "acoustic/pitch"
csv_ext = "_gne.csv"
def gne_ratio(sound):
"""
@@ -35,54 +36,34 @@ def gne_ratio(sound):
gne_all_bands = harmonicity_gne.values
gne_all_bands = np.where(gne_all_bands == -200, np.NaN, gne_all_bands)
gne = np.nanmax(gne_all_bands) # following http://www.fon.hum.uva.nl/rob/NKI_TEVA/TEVA/HTML/NKI_TEVA.pdf
gne = np.nanmax(
gne_all_bands
) # following http://www.fon.hum.uva.nl/rob/NKI_TEVA/TEVA/HTML/NKI_TEVA.pdf
return gne
def empty_gne(video_uri, out_loc, fl_name, r_config, error_txt):
def empty_gne(video_uri, out_loc, fl_name, r_config, error_txt, save=True):
"""
Preparing empty GNE matrix if something fails
"""
cols = ['Frames', r_config.aco_gne, r_config.err_reason]
cols = ["Frames", r_config.aco_gne, r_config.err_reason]
out_val = [[np.nan, np.nan, error_txt]]
df_gne = pd.DataFrame(out_val, columns=cols)
df_gne['dbm_master_url'] = video_uri
df_gne["dbm_master_url"] = video_uri
logger.info('Saving Output file {} '.format(out_loc))
if save:
logger.info("Saving Output file {} ".format(out_loc))
ut.save_output(df_gne, out_loc, fl_name, gne_dir, csv_ext)
return df_gne
def segment_pitch(dir_path, r_config):
"""
segmenting pitch freq for each voice segment
"""
com_speech_sort, voiced_yes, voiced_no = ([], ) * 3
for file in os.listdir(dir_path):
try:
if file.endswith('_pitch.csv'):
ff_df = pd.read_csv((dir_path+'/'+file))
voice_label = ff_df[r_config.aco_voiceLabel]
indices_yes = [i for i, x in enumerate(voice_label) if x == "yes"]
voiced_yes = [list(group) for group in mit.consecutive_groups(indices_yes)]
indices_no = [i for i, x in enumerate(voice_label) if x == "no"]
voiced_no = [list(group) for group in mit.consecutive_groups(indices_no)]
com_speech = voiced_yes + voiced_no
com_speech_sort = sorted(com_speech, key=lambda x: x[0])
except:
pass
return com_speech_sort, voiced_yes, voiced_no
def segment_gne(com_speech_sort, voiced_yes, voiced_no, gne_all_frames, audio_file):
"""
calculating gne for each voice segment
"""
snd = parselmouth.Sound(audio_file)
pitch = snd.to_pitch(time_step=.001)
pitch = snd.to_pitch(time_step=0.001)
for idx, vs in enumerate(com_speech_sort):
try:
@@ -104,7 +85,8 @@ def segment_gne(com_speech_sort, voiced_yes, voiced_no, gne_all_frames, audio_fi
gne_all_frames[idx] = max_gne
return gne_all_frames
def calc_gne(video_uri, audio_file, out_loc, fl_name, r_config):
def calc_gne(video_uri, audio_file, out_loc, fl_name, r_config, save=True, ff_df=None):
"""
Preparing gne matrix
Args:
@@ -112,26 +94,36 @@ def calc_gne(video_uri, audio_file, out_loc, fl_name, r_config):
out_loc: (str) Output directory for csv's
"""
dir_path = os.path.join(out_loc, ff_dir)
if os.path.isdir(dir_path):
voice_seg = segment_pitch(dir_path, r_config)
if os.path.isdir(dir_path) or ff_df is not None:
if ff_df is not None:
voice_seg = ut.process_segment_pitch(ff_df, r_config)
else:
voice_seg = ut.segment_pitch(dir_path, r_config, ff_df=ff_df)
gne_all_frames = [np.NaN] * len(voice_seg[0])
gne_segment_frames = segment_gne(voice_seg[0], voice_seg[1], voice_seg[2], gne_all_frames, audio_file)
gne_segment_frames = segment_gne(
voice_seg[0], voice_seg[1], voice_seg[2], gne_all_frames, audio_file
)
df_gne = pd.DataFrame(gne_segment_frames, columns=[r_config.aco_gne])
df_gne[r_config.err_reason] = 'Pass'# will replace with threshold in future release
df_gne[
r_config.err_reason
] = "Pass" # will replace with threshold in future release
df_gne['Frames'] = df_gne.index
df_gne['dbm_master_url'] = video_uri
df_gne["Frames"] = df_gne.index
df_gne["dbm_master_url"] = video_uri
logger.info('Processing Output file {} '.format(out_loc))
if save:
logger.info("Processing Output file {} ".format(out_loc))
ut.save_output(df_gne, out_loc, fl_name, gne_dir, csv_ext)
return df_gne
else:
error_txt = 'error: pitch freq not available'
empty_gne(video_uri, out_loc, fl_name, r_config, error_txt)
error_txt = "error: pitch freq not available"
return empty_gne(video_uri, out_loc, fl_name, r_config, error_txt, save=save)
def run_gne(video_uri, out_dir, r_config):
def run_gne(video_uri, out_dir, r_config, save=True, ff_df=None):
"""
Processing all patient's for fetching glottal noise ratio
---------------
@@ -143,19 +135,32 @@ def run_gne(video_uri, out_dir, r_config):
try:
input_loc, out_loc, fl_name = ut.filter_path(video_uri, out_dir)
aud_filter = glob.glob(join(input_loc, fl_name + '.wav'))
aud_filter = glob.glob(join(input_loc, fl_name + ".wav"))
if len(aud_filter) > 0:
audio_file = aud_filter[0]
aud_dur = librosa.get_duration(filename=audio_file)
aud_dur = ut.get_length(audio_file)
if float(aud_dur) < 0.064:
logger.info('Output file {} size is less than 0.064sec'.format(audio_file))
logger.info(
"Output file {} size is less than 0.064sec".format(audio_file)
)
error_txt = 'error: length less than 0.064'
empty_gne(video_uri, out_loc, fl_name, r_config, error_txt)
return
calc_gne(video_uri, audio_file, out_loc, fl_name, r_config)
error_txt = "error: length less than 0.064"
df = empty_gne(
video_uri, out_loc, fl_name, r_config, error_txt, save=save
)
else:
df = calc_gne(
video_uri,
audio_file,
out_loc,
fl_name,
r_config,
save=save,
ff_df=ff_df,
)
return df
except Exception as e:
logger.error('Failed to process audio file')
e
logger.error("Failed to process audio file")

View File

@@ -1,77 +1,133 @@
"""
file_name: hnr
file_name: gne
project_name: DBM
created: 2020-20-07
"""
import pandas as pd
import numpy as np
import os
import glob
import parselmouth
import librosa
from os.path import join
import logging
import os
from os.path import join
import more_itertools as mit
import numpy as np
import pandas as pd
import parselmouth
from opendbm.dbm_lib.dbm_features.raw_features.util import util as ut
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger()
hnr_dir = 'acoustic/harmonic_noise'
csv_ext = '_hnr.csv'
error_txt = 'error: length less than 0.064'
gne_dir = "acoustic/glottal_noise"
ff_dir = "acoustic/pitch"
csv_ext = "_gne.csv"
def hnr_ratio(filepath):
def gne_ratio(sound):
"""
Using parselmouth library fetching harmonic noise ratio ratio
Using parselmouth library fetching glottal noise excitation ratio
Args:
path: (.wav) audio file location
sound: parselmouth object
Returns:
(list) list of hnr ratio for each voice frame, min,max and mean hnr
(list) list of gne ratio for each voice frame
"""
sound = parselmouth.Sound(filepath)
harmonicity = sound.to_harmonicity_ac(time_step=.001)
harmonicity_gne = sound.to_harmonicity_gne()
gne_all_bands = harmonicity_gne.values
gne_all_bands = np.where(gne_all_bands == -200, np.NaN, gne_all_bands)
hnr_all_frames = harmonicity.values#[harmonicity.values != -200] nan it (****)
hnr_all_frames = np.where(hnr_all_frames==-200, np.NaN, hnr_all_frames)
return hnr_all_frames.transpose()
gne = np.nanmax(
gne_all_bands
) # following http://www.fon.hum.uva.nl/rob/NKI_TEVA/TEVA/HTML/NKI_TEVA.pdf
return gne
def calc_hnr(video_uri, audio_file, out_loc, fl_name, r_config):
def empty_gne(video_uri, out_loc, fl_name, r_config, error_txt, save=True):
"""
Preparing harmonic noise matrix
Preparing empty GNE matrix if something fails
"""
cols = ["Frames", r_config.aco_gne, r_config.err_reason]
out_val = [[np.nan, np.nan, error_txt]]
df_gne = pd.DataFrame(out_val, columns=cols)
df_gne["dbm_master_url"] = video_uri
if save:
logger.info("Saving Output file {} ".format(out_loc))
ut.save_output(df_gne, out_loc, fl_name, gne_dir, csv_ext)
return df_gne
def segment_gne(com_speech_sort, voiced_yes, voiced_no, gne_all_frames, audio_file):
"""
calculating gne for each voice segment
"""
snd = parselmouth.Sound(audio_file)
pitch = snd.to_pitch(time_step=0.001)
for idx, vs in enumerate(com_speech_sort):
try:
max_gne = np.NaN
if vs in voiced_yes and len(vs) > 1:
start_time = pitch.get_time_from_frame_number(vs[0])
end_time = pitch.get_time_from_frame_number(vs[-1])
snd_start = int(snd.get_frame_number_from_time(start_time))
snd_end = int(snd.get_frame_number_from_time(end_time))
samples = parselmouth.Sound(snd.as_array()[0][snd_start:snd_end])
max_gne = gne_ratio(samples)
except:
pass
gne_all_frames[idx] = max_gne
return gne_all_frames
def calc_gne(video_uri, audio_file, out_loc, fl_name, r_config, save=True, ff_df=None):
"""
Preparing gne matrix
Args:
audio_file: (.wav) parsed audio file
out_loc: (str) Output directory for csv's
"""
dir_path = os.path.join(out_loc, ff_dir)
if os.path.isdir(dir_path) or ff_df is not None:
if ff_df is not None:
voice_seg = ut.process_segment_pitch(ff_df, r_config)
else:
voice_seg = ut.segment_pitch(dir_path, r_config, ff_df=ff_df)
hnr_all_frames = hnr_ratio(audio_file)
df_hnr = pd.DataFrame(hnr_all_frames, columns=[r_config.aco_hnr])
gne_all_frames = [np.NaN] * len(voice_seg[0])
gne_segment_frames = segment_gne(
voice_seg[0], voice_seg[1], voice_seg[2], gne_all_frames, audio_file
)
df_hnr['Frames'] = df_hnr.index
df_hnr['dbm_master_url'] = video_uri
df_hnr[r_config.err_reason] = 'Pass'# will replace with threshold in future release
df_gne = pd.DataFrame(gne_segment_frames, columns=[r_config.aco_gne])
df_gne[
r_config.err_reason
] = "Pass" # will replace with threshold in future release
logger.info('Saving Output file {} '.format(out_loc))
ut.save_output(df_hnr, out_loc, fl_name, hnr_dir, csv_ext)
df_gne["Frames"] = df_gne.index
df_gne["dbm_master_url"] = video_uri
def empty_hnr(video_uri, out_loc, fl_name, r_config):
if save:
logger.info("Processing Output file {} ".format(out_loc))
ut.save_output(df_gne, out_loc, fl_name, gne_dir, csv_ext)
return df_gne
else:
error_txt = "error: pitch freq not available"
return empty_gne(video_uri, out_loc, fl_name, r_config, error_txt, save=save)
def run_gne(video_uri, out_dir, r_config, save=True, ff_df=None):
"""
Preparing empty HNR matrix if something fails
"""
cols = ['Frames', r_config.aco_hnr, r_config.err_reason]
out_val = [[np.nan, np.nan, error_txt]]
df_hnr = pd.DataFrame(out_val, columns = cols)
df_hnr['dbm_master_url'] = video_uri
logger.info('Saving Output file {} '.format(out_loc))
ut.save_output(df_hnr, out_loc, fl_name, hnr_dir, csv_ext)
def run_hnr(video_uri, out_dir, r_config):
"""
Processing all patient's for fetching harmonic noise ratio
-------------------
-------------------
Processing all patient's for fetching glottal noise ratio
---------------
---------------
Args:
video_uri: video path; r_config: raw variable config object
out_dir: (str) Output directory for processed output
@@ -79,18 +135,32 @@ def run_hnr(video_uri, out_dir, r_config):
try:
input_loc, out_loc, fl_name = ut.filter_path(video_uri, out_dir)
aud_filter = glob.glob(join(input_loc, fl_name + '.wav'))
aud_filter = glob.glob(join(input_loc, fl_name + ".wav"))
if len(aud_filter) > 0:
audio_file = aud_filter[0]
aud_dur = librosa.get_duration(filename=audio_file)
aud_dur = ut.get_length(audio_file)
if float(aud_dur) < 0.064:
logger.info('Output file {} size is less than 0.064sec'.format(audio_file))
logger.info(
"Output file {} size is less than 0.064sec".format(audio_file)
)
empty_hnr(video_uri, out_loc, fl_name, r_config)
return
calc_hnr(video_uri, audio_file, out_loc, fl_name, r_config)
error_txt = "error: length less than 0.064"
df = empty_gne(
video_uri, out_loc, fl_name, r_config, error_txt, save=save
)
else:
df = calc_gne(
video_uri,
audio_file,
out_loc,
fl_name,
r_config,
save=save,
ff_df=ff_df,
)
return df
except Exception as e:
logger.error('Failed to process audio file')
e
logger.error("Failed to process audio file")

View File

@@ -1,73 +1,133 @@
"""
file_name: intensity
file_name: gne
project_name: DBM
created: 2020-20-07
"""
import pandas as pd
import numpy as np
import glob
import parselmouth
import librosa
from os.path import join
import logging
import os
from os.path import join
import more_itertools as mit
import numpy as np
import pandas as pd
import parselmouth
from opendbm.dbm_lib.dbm_features.raw_features.util import util as ut
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger()
intensity_dir = 'acoustic/intensity'
csv_ext = '_intensity.csv'
error_txt = 'error: length less than 0.064'
gne_dir = "acoustic/glottal_noise"
ff_dir = "acoustic/pitch"
csv_ext = "_gne.csv"
def intensity_score(path):
def gne_ratio(sound):
"""
Using parselmouth library fetching Intensity
Using parselmouth library fetching glottal noise excitation ratio
Args:
path: (.wav) audio file location
sound: parselmouth object
Returns:
(list) list of Intensity for each voice frame
(list) list of gne ratio for each voice frame
"""
sound_pat = parselmouth.Sound(path)
intensity = sound_pat.to_intensity(time_step=.001)
return intensity.values[0]
harmonicity_gne = sound.to_harmonicity_gne()
gne_all_bands = harmonicity_gne.values
gne_all_bands = np.where(gne_all_bands == -200, np.NaN, gne_all_bands)
def calc_intensity(video_uri, audio_file, out_loc, fl_name, r_config):
gne = np.nanmax(
gne_all_bands
) # following http://www.fon.hum.uva.nl/rob/NKI_TEVA/TEVA/HTML/NKI_TEVA.pdf
return gne
def empty_gne(video_uri, out_loc, fl_name, r_config, error_txt, save=True):
"""
Preparing Intensity matrix
Preparing empty GNE matrix if something fails
"""
cols = ["Frames", r_config.aco_gne, r_config.err_reason]
out_val = [[np.nan, np.nan, error_txt]]
df_gne = pd.DataFrame(out_val, columns=cols)
df_gne["dbm_master_url"] = video_uri
if save:
logger.info("Saving Output file {} ".format(out_loc))
ut.save_output(df_gne, out_loc, fl_name, gne_dir, csv_ext)
return df_gne
def segment_gne(com_speech_sort, voiced_yes, voiced_no, gne_all_frames, audio_file):
"""
calculating gne for each voice segment
"""
snd = parselmouth.Sound(audio_file)
pitch = snd.to_pitch(time_step=0.001)
for idx, vs in enumerate(com_speech_sort):
try:
max_gne = np.NaN
if vs in voiced_yes and len(vs) > 1:
start_time = pitch.get_time_from_frame_number(vs[0])
end_time = pitch.get_time_from_frame_number(vs[-1])
snd_start = int(snd.get_frame_number_from_time(start_time))
snd_end = int(snd.get_frame_number_from_time(end_time))
samples = parselmouth.Sound(snd.as_array()[0][snd_start:snd_end])
max_gne = gne_ratio(samples)
except:
pass
gne_all_frames[idx] = max_gne
return gne_all_frames
def calc_gne(video_uri, audio_file, out_loc, fl_name, r_config, save=True, ff_df=None):
"""
Preparing gne matrix
Args:
audio_file: (.wav) parsed audio file
out_loc: (str) Output directory for csv's
"""
dir_path = os.path.join(out_loc, ff_dir)
if os.path.isdir(dir_path) or ff_df is not None:
if ff_df is not None:
voice_seg = ut.process_segment_pitch(ff_df, r_config)
else:
voice_seg = ut.segment_pitch(dir_path, r_config, ff_df=ff_df)
intensity_frames = intensity_score(audio_file)
df_intensity = pd.DataFrame(intensity_frames, columns=[r_config.aco_int])
gne_all_frames = [np.NaN] * len(voice_seg[0])
gne_segment_frames = segment_gne(
voice_seg[0], voice_seg[1], voice_seg[2], gne_all_frames, audio_file
)
df_intensity['Frames'] = df_intensity.index
df_intensity['dbm_master_url'] = video_uri
df_intensity[r_config.err_reason] = 'Pass'# will replace with threshold in future release
df_gne = pd.DataFrame(gne_segment_frames, columns=[r_config.aco_gne])
df_gne[
r_config.err_reason
] = "Pass" # will replace with threshold in future release
logger.info('Saving Output file {} '.format(out_loc))
ut.save_output(df_intensity, out_loc, fl_name, intensity_dir, csv_ext)
df_gne["Frames"] = df_gne.index
df_gne["dbm_master_url"] = video_uri
def empty_intensity(video_uri, out_loc, fl_name, r_config):
if save:
logger.info("Processing Output file {} ".format(out_loc))
ut.save_output(df_gne, out_loc, fl_name, gne_dir, csv_ext)
return df_gne
else:
error_txt = "error: pitch freq not available"
return empty_gne(video_uri, out_loc, fl_name, r_config, error_txt, save=save)
def run_gne(video_uri, out_dir, r_config, save=True, ff_df=None):
"""
Preparing empty Intensity matrix if something fails
"""
cols = ['Frames', r_config.aco_int, r_config.err_reason]
out_val = [[np.nan, np.nan, error_txt]]
df_int = pd.DataFrame(out_val, columns = cols)
df_int['dbm_master_url'] = video_uri
logger.info('Saving Output file {} '.format(out_loc))
ut.save_output(df_int, out_loc, fl_name, intensity_dir, csv_ext)
def run_intensity(video_uri, out_dir, r_config):
"""
Processing all patient's for fetching Intensity
-------------------
-------------------
Processing all patient's for fetching glottal noise ratio
---------------
---------------
Args:
video_uri: video path; r_config: raw variable config object
out_dir: (str) Output directory for processed output
@@ -75,18 +135,32 @@ def run_intensity(video_uri, out_dir, r_config):
try:
input_loc, out_loc, fl_name = ut.filter_path(video_uri, out_dir)
aud_filter = glob.glob(join(input_loc, fl_name + '.wav'))
aud_filter = glob.glob(join(input_loc, fl_name + ".wav"))
if len(aud_filter) > 0:
audio_file = aud_filter[0]
aud_dur = librosa.get_duration(filename=audio_file)
aud_dur = ut.get_length(audio_file)
if float(aud_dur) < 0.064:
logger.info('Output file {} size is less than 0.064sec'.format(audio_file))
logger.info(
"Output file {} size is less than 0.064sec".format(audio_file)
)
empty_intensity(video_uri, out_loc, fl_name, r_config)
return
calc_intensity(video_uri, audio_file, out_loc, fl_name, r_config)
error_txt = "error: length less than 0.064"
df = empty_gne(
video_uri, out_loc, fl_name, r_config, error_txt, save=save
)
else:
df = calc_gne(
video_uri,
audio_file,
out_loc,
fl_name,
r_config,
save=save,
ff_df=ff_df,
)
return df
except Exception as e:
logger.error('Failed to process audio file')
e
logger.error("Failed to process audio file")

View File

@@ -4,25 +4,24 @@ project_name: DBM
created: 2020-20-07
"""
import pandas as pd
import numpy as np
import os
import glob
import parselmouth
import librosa
import numpy as np
import more_itertools as mit
from os.path import join
import logging
import os
from os.path import join
import numpy as np
import pandas as pd
import parselmouth
from opendbm.dbm_lib.dbm_features.raw_features.util import util as ut
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger()
jitter_dir = 'acoustic/jitter'
ff_dir = 'acoustic/pitch'
csv_ext = '_jitter.csv'
jitter_dir = "acoustic/jitter"
ff_dir = "acoustic/pitch"
csv_ext = "_jitter.csv"
def audio_jitter(sound):
"""
@@ -32,54 +31,36 @@ def audio_jitter(sound):
Returns:
(list) list of jitters for each voice frame
"""
pointProcess = parselmouth.praat.call(sound, "To PointProcess (periodic, cc)...", 80, 500)
jitter = parselmouth.praat.call(pointProcess, "Get jitter (local)", 0, 0, 0.0001, 0.02, 1.3)
pointProcess = parselmouth.praat.call(
sound, "To PointProcess (periodic, cc)...", 80, 500
)
jitter = parselmouth.praat.call(
pointProcess, "Get jitter (local)", 0, 0, 0.0001, 0.02, 1.3
)
return jitter
def empty_jitter(video_uri, out_loc, fl_name, r_config, error_txt):
def empty_jitter(video_uri, out_loc, fl_name, r_config, error_txt, save=True):
"""
Preparing empty jitter matrix if something fails
"""
cols = ['Frames', r_config.aco_jitter, r_config.err_reason]
cols = ["Frames", r_config.aco_jitter, r_config.err_reason]
out_val = [[np.nan, np.nan, error_txt]]
df_jitter = pd.DataFrame(out_val, columns=cols)
df_jitter['dbm_master_url'] = video_uri
df_jitter["dbm_master_url"] = video_uri
logger.info('Saving Output file {} '.format(out_loc))
if save:
logger.info("Saving Output file {} ".format(out_loc))
ut.save_output(df_jitter, out_loc, fl_name, jitter_dir, csv_ext)
return df_jitter
def segment_pitch(dir_path, r_config):
"""
segmenting pitch freq for each voice segment
"""
com_speech_sort, voiced_yes, voiced_no = ([], ) * 3
for file in os.listdir(dir_path):
try:
if file.endswith('_pitch.csv'):
ff_df = pd.read_csv((dir_path+'/'+file))
voice_label = ff_df[r_config.aco_voiceLabel]
indices_yes = [i for i, x in enumerate(voice_label) if x == "yes"]
voiced_yes = [list(group) for group in mit.consecutive_groups(indices_yes)]
indices_no = [i for i, x in enumerate(voice_label) if x == "no"]
voiced_no = [list(group) for group in mit.consecutive_groups(indices_no)]
com_speech = voiced_yes + voiced_no
com_speech_sort = sorted(com_speech, key=lambda x: x[0])
except:
pass
return com_speech_sort, voiced_yes, voiced_no
def segment_jitter(com_speech_sort, voiced_yes, voiced_no, jitter_frames, audio_file):
"""
calculating jitter for each voice segment
"""
snd = parselmouth.Sound(audio_file)
pitch = snd.to_pitch(time_step=.001)
pitch = snd.to_pitch(time_step=0.001)
for idx, vs in enumerate(com_speech_sort):
try:
@@ -101,7 +82,10 @@ def segment_jitter(com_speech_sort, voiced_yes, voiced_no, jitter_frames, audio_
jitter_frames[idx] = jitter
return jitter_frames
def calc_jitter(video_uri, audio_file, out_loc, fl_name, r_config):
def calc_jitter(
video_uri, audio_file, out_loc, fl_name, r_config, save=True, ff_df=None
):
"""
Preparing jitter matrix
Args:
@@ -110,26 +94,36 @@ def calc_jitter(video_uri, audio_file, out_loc, fl_name, r_config):
r_config: config.config_raw_feature.pyConfigFeatureNmReader object
"""
dir_path = os.path.join(out_loc, ff_dir)
if os.path.isdir(dir_path):
voice_seg = segment_pitch(dir_path, r_config)
if os.path.isdir(dir_path) or ff_df is not None:
if ff_df is not None:
voice_seg = ut.process_segment_pitch(ff_df, r_config)
else:
voice_seg = ut.segment_pitch(dir_path, r_config, ff_df=ff_df)
jitter_frames = [np.NaN] * len(voice_seg[0])
jitter_segment_frames = segment_jitter(voice_seg[0], voice_seg[1], voice_seg[2], jitter_frames, audio_file)
jitter_segment_frames = segment_jitter(
voice_seg[0], voice_seg[1], voice_seg[2], jitter_frames, audio_file
)
df_jitter = pd.DataFrame(jitter_segment_frames, columns=[r_config.aco_jitter])
df_jitter[r_config.err_reason] = 'Pass'# will replace with threshold in future release
df_jitter[
r_config.err_reason
] = "Pass" # will replace with threshold in future release
df_jitter['Frames'] = df_jitter.index
df_jitter['dbm_master_url'] = video_uri
logger.info('Processing Output file {} '.format(out_loc))
df_jitter["Frames"] = df_jitter.index
df_jitter["dbm_master_url"] = video_uri
if save:
logger.info("Processing Output file {} ".format(out_loc))
ut.save_output(df_jitter, out_loc, fl_name, jitter_dir, csv_ext)
df = df_jitter
else:
error_txt = 'error: fundamental freq not available'
empty_jitter(video_uri, out_loc, fl_name, r_config, error_txt)
error_txt = "error: fundamental freq not available"
df = empty_jitter(video_uri, out_loc, fl_name, r_config, error_txt, save=save)
return df
def run_jitter(video_uri, out_dir, r_config):
def run_jitter(video_uri, out_dir, r_config, save=True, ff_df=None):
"""
Processing all patient's videos for fetching jitter
-------------------
@@ -141,19 +135,32 @@ def run_jitter(video_uri, out_dir, r_config):
try:
input_loc, out_loc, fl_name = ut.filter_path(video_uri, out_dir)
aud_filter = glob.glob(join(input_loc, fl_name + '.wav'))
aud_filter = glob.glob(join(input_loc, fl_name + ".wav"))
if len(aud_filter) > 0:
audio_file = aud_filter[0]
aud_dur = librosa.get_duration(filename=audio_file)
aud_dur = ut.get_length(audio_file)
if float(aud_dur) < 0.064:
logger.info('Output file {} size is less than 0.064sec'.format(audio_file))
logger.info(
"Output file {} size is less than 0.064sec".format(audio_file)
)
error_txt = 'error: length less than 0.064'
empty_jitter(video_uri, out_loc, fl_name, r_config, error_txt)
return
calc_jitter(video_uri, audio_file, out_loc, fl_name, r_config)
error_txt = "error: length less than 0.064"
df = empty_jitter(
video_uri, out_loc, fl_name, r_config, error_txt, save=save
)
else:
df = calc_jitter(
video_uri,
audio_file,
out_loc,
fl_name,
r_config,
save=save,
ff_df=ff_df,
)
return df
except Exception as e:
logger.error('Failed to process audio file')
logger.error("Error in jitter: {}".format(e))
logger.error("Failed to process audio file")

View File

@@ -4,41 +4,74 @@ project_name: DBM
created: 2020-20-07
"""
import pandas as pd
import os
import glob
import parselmouth
import librosa
import numpy as np
import librosa
from os.path import join
import logging
import os
from os.path import join
import numpy as np
import pandas as pd
import parselmouth
from opendbm.dbm_lib.dbm_features.raw_features.util import util as ut
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger()
mfcc_dir = 'acoustic/mfcc'
csv_ext = '_mfcc.csv'
error_txt = 'error: length less than 0.064'
mfcc_dir = "acoustic/mfcc"
csv_ext = "_mfcc.csv"
error_txt = "error: length less than 0.064"
def empty_mfcc(video_uri, out_loc, fl_name, r_config):
def empty_mfcc(video_uri, out_loc, fl_name, r_config, save=True):
"""
Preparing empty empty_mfcc matrix if something fails
"""
cols = ['Frames', r_config.aco_mfcc1, r_config.aco_mfcc2, r_config.aco_mfcc3, r_config.aco_mfcc4, r_config.aco_mfcc5,
r_config.aco_mfcc6, r_config.aco_mfcc7, r_config.aco_mfcc8, r_config.aco_mfcc9, r_config.aco_mfcc10,
r_config.aco_mfcc11, r_config.aco_mfcc12, r_config.err_reason]
out_val = [[np.nan, np.nan, np.nan, np.nan, np.nan, np.nan, np.nan, np.nan, np.nan, np.nan, np.nan, np.nan, np.nan,
error_txt]]
cols = [
"Frames",
r_config.aco_mfcc1,
r_config.aco_mfcc2,
r_config.aco_mfcc3,
r_config.aco_mfcc4,
r_config.aco_mfcc5,
r_config.aco_mfcc6,
r_config.aco_mfcc7,
r_config.aco_mfcc8,
r_config.aco_mfcc9,
r_config.aco_mfcc10,
r_config.aco_mfcc11,
r_config.aco_mfcc12,
r_config.err_reason,
]
out_val = [
[
np.nan,
np.nan,
np.nan,
np.nan,
np.nan,
np.nan,
np.nan,
np.nan,
np.nan,
np.nan,
np.nan,
np.nan,
np.nan,
error_txt,
]
]
df_mfcc = pd.DataFrame(out_val, columns=cols)
df_mfcc['dbm_master_url'] = video_uri
df_mfcc["dbm_master_url"] = video_uri
logger.info('Saving Output file {} '.format(out_loc))
if save:
logger.info("Saving Output file {} ".format(out_loc))
ut.save_output(df_mfcc, out_loc, fl_name, mfcc_dir, csv_ext)
return df_mfcc
def audio_mfcc(path):
"""
Using parselmouth library fetching mfccs
@@ -48,12 +81,13 @@ def audio_mfcc(path):
(list) list of mfccs for each voice frame
"""
sound = parselmouth.Sound(path)
mfcc_object = sound.to_mfcc(time_step=.001,number_of_coefficients=12)
mfcc_object = sound.to_mfcc(time_step=0.001, number_of_coefficients=12)
mfccs = mfcc_object.to_array()
mfccs = np.delete(mfccs, (0), axis=0)
return mfccs
def calc_mfcc(video_uri, audio_file, out_loc, fl_name, r_config):
def calc_mfcc(video_uri, audio_file, out_loc, fl_name, r_config, save=True):
"""
Preparing mfcc matrix
Args:
@@ -66,18 +100,22 @@ def calc_mfcc(video_uri, audio_file, out_loc, fl_name, r_config):
mfccs = audio_mfcc(audio_file)
for i in range(1, 13):
conf_str = r_config.base_raw['raw_feature']
dict_[conf_str['aco_mfcc' + str(i)]] = mfccs[i-1, :]
conf_str = r_config.base_raw["raw_feature"]
dict_[conf_str["aco_mfcc" + str(i)]] = mfccs[i - 1, :]
df = pd.DataFrame(dict_)
df['Frames'] = df.index
df["Frames"] = df.index
df[r_config.err_reason] = 'Pass'# may replace based on threshold in future release
df['dbm_master_url'] = video_uri
df[r_config.err_reason] = "Pass" # may replace based on threshold in future release
df["dbm_master_url"] = video_uri
if save:
logger.info("Saving Output file {} ".format(out_loc))
ut.save_output(df, out_loc, fl_name, mfcc_dir, csv_ext)
return df
def run_mfcc(video_uri, out_dir, r_config):
def run_mfcc(video_uri, out_dir, r_config, save=True):
"""
Processing all patients to fetch mfccs
@@ -88,18 +126,22 @@ def run_mfcc(video_uri, out_dir, r_config):
try:
input_loc, out_loc, fl_name = ut.filter_path(video_uri, out_dir)
aud_filter = glob.glob(join(input_loc, fl_name + '.wav'))
aud_filter = glob.glob(join(input_loc, fl_name + ".wav"))
if len(aud_filter) > 0:
audio_file = aud_filter[0]
aud_dur = librosa.get_duration(filename=audio_file)
aud_dur = ut.get_length(audio_file)
if float(aud_dur) < 0.064:
logger.info('Output file {} size is less than 0.064sec'.format(audio_file))
logger.info(
"Output file {} size is less than 0.064sec".format(audio_file)
)
empty_mfcc(video_uri, out_loc, fl_name, r_config)
return
return empty_mfcc(video_uri, out_loc, fl_name, r_config, save=save)
calc_mfcc(video_uri, audio_file, out_loc, fl_name, r_config)
return calc_mfcc(
video_uri, audio_file, out_loc, fl_name, r_config, save=save
)
except Exception as e:
logger.error('Failed to process audio file')
e
logger.error("Failed to process audio file")

View File

@@ -4,23 +4,25 @@ project_name: DBM
created: 2020-20-07
"""
import os
import glob
from pydub import AudioSegment
import librosa
import pandas as pd
import numpy as np
import webrtcvad
from os.path import join
import logging
import os
from os.path import join
from opendbm.dbm_lib.dbm_features.raw_features.util import vad_utilities as vu, util as ut
import numpy as np
import pandas as pd
import webrtcvad
from pydub import AudioSegment
from opendbm.dbm_lib.dbm_features.raw_features.util import util as ut
from opendbm.dbm_lib.dbm_features.raw_features.util import vad_utilities as vu
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger()
pause_seg_dir = 'acoustic/pause_segment'
csv_ext = '_pausechar.csv'
pause_seg_dir = "acoustic/pause_segment"
csv_ext = "_pausechar.csv"
def get_timing_cues(seg_starts_sec, seg_ends_sec, r_config):
"""
@@ -40,20 +42,22 @@ def get_timing_cues(seg_starts_sec, seg_ends_sec, r_config):
pause_len[p] = seg_starts_sec[p + 1] - seg_ends_sec[p]
if len(pause_len) > 0:
pause_len_mean = np.mean(pause_len)
pause_len_std = np.std(pause_len)
pause_time = np.sum(pause_len)
else:
pause_len_mean = 0
pause_len_std = 0
pause_time = 0
pause_frac = pause_time / total_time
timing_dict = {r_config.aco_totaltime: total_time, r_config.aco_speakingtime: speaking_time,
r_config.aco_numpauses: num_pauses, r_config.aco_pausetime: pause_time, r_config.aco_pausefrac: pause_frac}
timing_dict = {
r_config.aco_totaltime: total_time,
r_config.aco_speakingtime: speaking_time,
r_config.aco_numpauses: num_pauses,
r_config.aco_pausetime: pause_time,
r_config.aco_pausefrac: pause_frac,
}
return timing_dict
def process_silence(audio_file, r_config):
"""
Returns dataframe for pause between words using voice activity detection
@@ -80,48 +84,64 @@ def process_silence(audio_file, r_config):
frames = list(frames)
# longer pad time screens out little blips, but misses short silences
long_seg_starts, long_seg_ends = vu.vad_get_segment_times(sr, frame_dur_ms, long_pad_around_voice_ms, vad, frames)
long_seg_starts, long_seg_ends = vu.vad_get_segment_times(
sr, frame_dur_ms, long_pad_around_voice_ms, vad, frames
)
# Logic to handle blank audio file
if len(long_seg_starts) == 0 or len(long_seg_ends) == 0:
return ''
return ""
t_start = long_seg_starts[0]
t_end = long_seg_ends[-1]
# shorter pad time captures short silences (but misfires on little blips)
short_seg_starts, short_seg_ends = vu.vad_get_segment_times(sr, frame_dur_ms, short_pad_around_voice_ms, vad, frames)
short_seg_starts, short_seg_ends = vu.vad_get_segment_times(
sr, frame_dur_ms, short_pad_around_voice_ms, vad, frames
)
seg_starts = []
seg_ends = []
for k in range(len(short_seg_starts)): # logic to clean up some typical misfires
for k in range(
len(short_seg_starts)
): # logic to clean up some typical misfires
if (short_seg_starts[k] >= t_start) and (short_seg_starts[k] <= t_end):
seg_starts.append(short_seg_starts[k])
seg_ends.append(short_seg_ends[k])
if len(seg_starts) == 0 or len(seg_ends) == 0:
return ''
return ""
timing_dict = get_timing_cues(seg_starts, seg_ends, r_config)
feat_dict_list.append(timing_dict)
df = pd.DataFrame(feat_dict_list)
df[r_config.err_reason] = 'Pass'# will replace with threshold in future release
df[r_config.err_reason] = "Pass" # will replace with threshold in future release
return df
def empty_pause_segment(video_uri, out_loc, fl_name, r_config, error_txt):
def empty_pause_segment(video_uri, out_loc, fl_name, r_config, error_txt, save=True):
"""
Preparing empty Pause Segment matrix if something fails
"""
cols = [r_config.aco_totaltime, r_config.aco_speakingtime, r_config.aco_numpauses, r_config.aco_pausetime,
r_config.aco_pausefrac, r_config.err_reason]
cols = [
r_config.aco_totaltime,
r_config.aco_speakingtime,
r_config.aco_numpauses,
r_config.aco_pausetime,
r_config.aco_pausefrac,
r_config.err_reason,
]
out_val = [[np.nan, np.nan, np.nan, np.nan, np.nan, error_txt]]
df_pause = pd.DataFrame(out_val, columns=cols)
df_pause['dbm_master_url'] = video_uri
df_pause["dbm_master_url"] = video_uri
logger.info('Saving Output file {} '.format(out_loc))
if save:
logger.info("Saving Output file {} ".format(out_loc))
ut.save_output(df_pause, out_loc, fl_name, pause_seg_dir, csv_ext)
return df_pause
def run_pause_segment(video_uri, out_dir, r_config):
def run_pause_segment(video_uri, out_dir, r_config, save=True):
"""
Processing all patient's for getting Pause Segment
---------------
@@ -133,39 +153,48 @@ def run_pause_segment(video_uri, out_dir, r_config):
try:
input_loc, out_loc, fl_name = ut.filter_path(video_uri, out_dir)
aud_filter = glob.glob(join(input_loc, fl_name + '.wav'))
aud_filter = glob.glob(join(input_loc, fl_name + ".wav"))
if len(aud_filter) > 0:
audio_file = aud_filter[0]
aud_dur = librosa.get_duration(filename=audio_file)
aud_dur = ut.get_length(audio_file)
if float(aud_dur) < 0.064:
logger.info('Output file {} size is less than 0.064sec'.format(audio_file))
logger.info(
"Output file {} size is less than 0.064sec".format(audio_file)
)
error_txt = 'error: length less than 0.064'
error_txt = "error: length less than 0.064"
empty_pause_segment(video_uri, out_loc, fl_name, r_config, error_txt)
return
logger.info('Converting stereo sound to mono-lD')
logger.info("Converting stereo sound to mono-lD")
sound_mono = AudioSegment.from_wav(audio_file)
sound_mono = sound_mono.set_channels(1)
sound_mono = sound_mono.set_frame_rate(48000)
mono_wav = os.path.join(input_loc, fl_name + '_mono.wav')
mono_wav = os.path.join(input_loc, fl_name + "_mono.wav")
sound_mono.export(mono_wav, format="wav")
df_pause_seg = process_silence(mono_wav, r_config)
os.remove(mono_wav) # removing mono wav file
if isinstance(df_pause_seg, pd.DataFrame) and len(df_pause_seg) > 0:
logger.info('Processing Output file {} '.format(out_loc))
df_pause_seg['dbm_master_url'] = video_uri
ut.save_output(df_pause_seg, out_loc, fl_name, pause_seg_dir, csv_ext)
df_pause_seg["dbm_master_url"] = video_uri
if save:
logger.info("Processing Output file {} ".format(out_loc))
ut.save_output(
df_pause_seg, out_loc, fl_name, pause_seg_dir, csv_ext
)
df = df_pause_seg
else:
error_txt = 'error: webrtcvad returns no segment'
empty_pause_segment(video_uri, out_loc, fl_name, r_config, error_txt)
error_txt = "error: webrtcvad returns no segment"
df = empty_pause_segment(
video_uri, out_loc, fl_name, r_config, error_txt, save=save
)
return df
except Exception as e:
logger.error('Failed to process audio file')
e
logger.error("Failed to process audio file", str(e))

View File

@@ -4,23 +4,24 @@ project_name: DBM
created: 2020-20-07
"""
import pandas as pd
import os
import glob
import parselmouth
import librosa
import numpy as np
from os.path import join
import logging
import os
from os.path import join
import numpy as np
import pandas as pd
import parselmouth
from opendbm.dbm_lib.dbm_features.raw_features.util import util as ut
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger()
ff_dir = 'acoustic/pitch'
csv_ext = '_pitch.csv'
error_txt = 'error: length less than 0.064'
ff_dir = "acoustic/pitch"
csv_ext = "_pitch.csv"
error_txt = "error: length less than 0.064"
def audio_pitch(path):
"""
@@ -31,11 +32,12 @@ def audio_pitch(path):
(list) list of pitch/fundamental frequency for each voice frame
"""
sound_pat = parselmouth.Sound(path)
pitch = sound_pat.to_pitch(time_step=.001)
pitch_values = pitch.selected_array['frequency']
pitch = sound_pat.to_pitch(time_step=0.001)
pitch_values = pitch.selected_array["frequency"]
return list(pitch_values)
def label_speech(row, fd_freq):
"""
identify whether frame is voiced or not
@@ -45,11 +47,12 @@ def label_speech(row,fd_freq):
(str) yes or no indicator for voice
"""
if row[fd_freq] > 0:
return 'yes'
return "yes"
else:
return 'no'
return "no"
def calc_pitch(video_uri, audio_file, out_loc, fl_name, r_config):
def calc_pitch(video_uri, audio_file, out_loc, fl_name, r_config, save=True):
"""
Preparing pitch frequency matrix
@@ -62,28 +65,45 @@ def calc_pitch(video_uri, audio_file, out_loc, fl_name, r_config):
ff_frames = audio_pitch(audio_file)
df_ffreq = pd.DataFrame(ff_frames, columns=[r_config.aco_ff])
df_ffreq['Frames'] = df_ffreq.index
df_ffreq[r_config.aco_voiceLabel] = df_ffreq.apply(lambda row: label_speech(row, r_config.aco_ff),axis=1)
df_ffreq["Frames"] = df_ffreq.index
df_ffreq[r_config.aco_voiceLabel] = df_ffreq.apply(
lambda row: label_speech(row, r_config.aco_ff), axis=1
)
df_ffreq[r_config.err_reason] = 'Pass'# will replace with threshold in future release
df_ffreq['dbm_master_url'] = video_uri
df_ffreq[
r_config.err_reason
] = "Pass" # will replace with threshold in future release
df_ffreq["dbm_master_url"] = video_uri
logger.info('Processing Output file {} '.format(out_loc))
if save:
logger.info("Processing Output file {} ".format(out_loc))
ut.save_output(df_ffreq, out_loc, fl_name, ff_dir, csv_ext)
return df_ffreq
def empty_pitch(video_uri, out_loc, fl_name, r_config):
def empty_pitch(video_uri, out_loc, fl_name, r_config, save=True):
"""
Preparing empty pitch frequency matrix if something fails
"""
df_ffreq = pd.DataFrame([[np.nan, np.nan, 'no', error_txt]],
columns=['Frames', r_config.aco_ff, r_config.aco_voiceLabel, r_config.err_reason])
df_ffreq['dbm_master_url'] = video_uri
df_ffreq = pd.DataFrame(
[[np.nan, np.nan, "no", error_txt]],
columns=[
"Frames",
r_config.aco_ff,
r_config.aco_voiceLabel,
r_config.err_reason,
],
)
df_ffreq["dbm_master_url"] = video_uri
logger.info('Saving Output file {} '.format(out_loc))
if save:
logger.info("Saving Output file {} ".format(out_loc))
ut.save_output(df_ffreq, out_loc, fl_name, ff_dir, csv_ext)
return df_ffreq
def run_pitch(video_uri, out_dir, r_config):
def run_pitch(video_uri, out_dir, r_config, save=True):
"""
Processing audio for fetching pitch
@@ -96,18 +116,24 @@ def run_pitch(video_uri, out_dir, r_config):
try:
input_loc, out_loc, fl_name = ut.filter_path(video_uri, out_dir)
aud_filter = glob.glob(join(input_loc, fl_name + '.wav'))
aud_filter = glob.glob(join(input_loc, fl_name + ".wav"))
if len(aud_filter) > 0:
audio_file = aud_filter[0]
aud_dur = librosa.get_duration(filename=audio_file)
aud_dur = ut.get_length(audio_file)
if float(aud_dur) < 0.064:
logger.info('Output file {} size is less than 0.064sec'.format(audio_file))
logger.info(
"Output file {} size is less than 0.064sec".format(audio_file)
)
empty_pitch(video_uri, out_loc, fl_name, r_config)
return
df = empty_pitch(video_uri, out_loc, fl_name, r_config, save=save)
else:
df = calc_pitch(
video_uri, audio_file, out_loc, fl_name, r_config, save=save
)
return df
calc_pitch(video_uri, audio_file, out_loc, fl_name, r_config)
except Exception as e:
logger.error('Failed to process audio file')
e
logger.error("Failed to process audio file")

View File

@@ -4,26 +4,25 @@ project_name: DBM
created: 2020-20-07
"""
import pandas as pd
import numpy as np
import os
import glob
import parselmouth
import librosa
import numpy as np
import more_itertools as mit
import logging
import os
from os.path import join
import logging
import more_itertools as mit
import numpy as np
import pandas as pd
import parselmouth
from opendbm.dbm_lib.dbm_features.raw_features.util import util as ut
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger()
shimmer_dir = 'acoustic/shimmer'
ff_dir = 'acoustic/pitch'
csv_ext = '_shimmer.csv'
shimmer_dir = "acoustic/shimmer"
ff_dir = "acoustic/pitch"
csv_ext = "_shimmer.csv"
def audio_shimmer(sound):
"""
@@ -33,54 +32,36 @@ def audio_shimmer(sound):
Returns:
(list) list of shimmers for each voice frame
"""
pointProcess = parselmouth.praat.call(sound, "To PointProcess (periodic, cc)...", 80, 500)
shimmer = parselmouth.praat.call([sound, pointProcess], "Get shimmer (local)", 0, 0, 0.0001, 0.02, 1.3, 1.6)
pointProcess = parselmouth.praat.call(
sound, "To PointProcess (periodic, cc)...", 80, 500
)
shimmer = parselmouth.praat.call(
[sound, pointProcess], "Get shimmer (local)", 0, 0, 0.0001, 0.02, 1.3, 1.6
)
return shimmer
def empty_shimmer(video_uri, out_loc, fl_name, r_config, error_txt):
def empty_shimmer(video_uri, out_loc, fl_name, r_config, error_txt, save=True):
"""
Preparing empty shimmer matrix if something fails
"""
cols = ['Frames', r_config.aco_shimmer, r_config.err_reason]
cols = ["Frames", r_config.aco_shimmer, r_config.err_reason]
out_val = [[np.nan, np.nan, error_txt]]
df_shimmer = pd.DataFrame(out_val, columns=cols)
df_shimmer['dbm_master_url'] = video_uri
df_shimmer["dbm_master_url"] = video_uri
logger.info('Saving Output file {} '.format(out_loc))
if save:
logger.info("Saving Output file {} ".format(out_loc))
ut.save_output(df_shimmer, out_loc, fl_name, shimmer_dir, csv_ext)
return df_shimmer
def segment_pitch(dir_path, r_config):
"""
segmenting pitch freq for each voice segment
"""
com_speech_sort, voiced_yes, voiced_no = ([], ) * 3
for file in os.listdir(dir_path):
try:
if file.endswith('_pitch.csv'):
ff_df = pd.read_csv((dir_path+'/'+file))
voice_label = ff_df[r_config.aco_voiceLabel]
indices_yes = [i for i, x in enumerate(voice_label) if x == "yes"]
voiced_yes = [list(group) for group in mit.consecutive_groups(indices_yes)]
indices_no = [i for i, x in enumerate(voice_label) if x == "no"]
voiced_no = [list(group) for group in mit.consecutive_groups(indices_no)]
com_speech = voiced_yes + voiced_no
com_speech_sort = sorted(com_speech, key=lambda x: x[0])
except:
pass
return com_speech_sort, voiced_yes, voiced_no
def segment_shimmer(com_speech_sort, voiced_yes, voiced_no, shimmer_frames, audio_file):
"""
calculating shimmer for each voice segment
"""
snd = parselmouth.Sound(audio_file)
pitch = snd.to_pitch(time_step=.001)
pitch = snd.to_pitch(time_step=0.001)
for idx, vs in enumerate(com_speech_sort):
try:
@@ -102,7 +83,10 @@ def segment_shimmer(com_speech_sort, voiced_yes, voiced_no, shimmer_frames, audi
shimmer_frames[idx] = shimmer
return shimmer_frames
def calc_shimmer(video_uri, audio_file, out_loc, fl_name, r_config):
def calc_shimmer(
video_uri, audio_file, out_loc, fl_name, r_config, save=True, ff_df=None
):
"""
Preparing shimmer matrix
Args:
@@ -111,26 +95,37 @@ def calc_shimmer(video_uri, audio_file, out_loc, fl_name, r_config):
r_config: config.config_raw_feature.pyConfigFeatureNmReader object
"""
dir_path = os.path.join(out_loc, ff_dir)
if os.path.isdir(dir_path):
voice_seg = segment_pitch(dir_path, r_config)
if os.path.isdir(dir_path) or ff_df is not None:
if ff_df is not None:
voice_seg = ut.process_segment_pitch(ff_df, r_config)
else:
voice_seg = ut.segment_pitch(dir_path, r_config, ff_df=ff_df)
shimmer_frames = [np.NaN] * len(voice_seg[0])
shimmer_segment_frames = segment_shimmer(voice_seg[0], voice_seg[1], voice_seg[2], shimmer_frames, audio_file)
shimmer_segment_frames = segment_shimmer(
voice_seg[0], voice_seg[1], voice_seg[2], shimmer_frames, audio_file
)
df_shimmer = pd.DataFrame(shimmer_segment_frames, columns=[r_config.aco_shimmer])
df_shimmer[r_config.err_reason] = 'Pass'# will replace with threshold in future release
df_shimmer = pd.DataFrame(
shimmer_segment_frames, columns=[r_config.aco_shimmer]
)
df_shimmer[
r_config.err_reason
] = "Pass" # will replace with threshold in future release
df_shimmer['Frames'] = df_shimmer.index
df_shimmer['dbm_master_url'] = video_uri
logger.info('Processing Output file {} '.format(out_loc))
df_shimmer["Frames"] = df_shimmer.index
df_shimmer["dbm_master_url"] = video_uri
if save:
logger.info("Processing Output file {} ".format(out_loc))
ut.save_output(df_shimmer, out_loc, fl_name, shimmer_dir, csv_ext)
df = df_shimmer
else:
error_txt = 'error: fundamental freq not available'
empty_shimmer(video_uri, out_loc, fl_name, r_config, error_txt)
error_txt = "error: fundamental freq not available"
df = empty_shimmer(video_uri, out_loc, fl_name, r_config, error_txt, save=save)
return df
def run_shimmer(video_uri, out_dir, r_config):
def run_shimmer(video_uri, out_dir, r_config, save=True, ff_df=None):
"""
Processing all patients to fetch shimmer
---------------
@@ -139,22 +134,33 @@ def run_shimmer(video_uri, out_dir, r_config):
video_uri: video path; r_config: raw variable config object
out_dir: (str) Output directory for processed output
"""
try:
# try:
input_loc, out_loc, fl_name = ut.filter_path(video_uri, out_dir)
aud_filter = glob.glob(join(input_loc, fl_name + '.wav'))
aud_filter = glob.glob(join(input_loc, fl_name + ".wav"))
if len(aud_filter) > 0:
audio_file = aud_filter[0]
aud_dur = librosa.get_duration(filename=audio_file)
aud_dur = ut.get_length(audio_file)
if float(aud_dur) < 0.064:
logger.info('Output file {} size is less than 0.064sec'.format(audio_file))
logger.info("Output file {} size is less than 0.064sec".format(audio_file))
error_txt = 'error: length less than 0.064'
empty_shimmer(video_uri, out_loc, fl_name, r_config, error_txt)
return
calc_shimmer(video_uri, audio_file, out_loc, fl_name, r_config)
except Exception as e:
logger.error('Failed to process audio file')
error_txt = "error: length less than 0.064"
df = empty_shimmer(
video_uri, out_loc, fl_name, r_config, error_txt, save=save
)
else:
df = calc_shimmer(
video_uri,
audio_file,
out_loc,
fl_name,
r_config,
save=save,
ff_df=ff_df,
)
return df
# except Exception as e:
# logger.error('Error in shimmer: {}'.format(e))
# logger.error('Failed to process audio file')

View File

@@ -4,22 +4,23 @@ project_name: DBM
created: 2020-20-07
"""
import parselmouth
import pandas as pd
import numpy as np
import glob
import librosa
from os.path import join
import logging
from os.path import join
import numpy as np
import pandas as pd
import parselmouth
from opendbm.dbm_lib.dbm_features.raw_features.util import util as ut
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger()
vfs_dir = 'acoustic/voice_frame_score'
csv_ext = '_voiceprev.csv'
error_txt = 'error: length less than 0.064'
vfs_dir = "acoustic/voice_frame_score"
csv_ext = "_voiceprev.csv"
error_txt = "error: length less than 0.064"
def audio_pitch_frame(pitch):
"""
@@ -33,6 +34,7 @@ def audio_pitch_frame(pitch):
voiced_frames = pitch.count_voiced_frames()
return total_frames, voiced_frames
def voice_segment(path):
"""
Using parselmouth library for fundamental frequency
@@ -48,7 +50,8 @@ def voice_segment(path):
voiced_percentage = (voiced_frames / total_frames) * 100
return voiced_percentage, voiced_frames, total_frames
def calc_vfs(video_uri, audio_file, out_loc, fl_name, r_config):
def calc_vfs(video_uri, audio_file, out_loc, fl_name, r_config, save=True):
"""
creating dataframe matrix for voice frame score
Args:
@@ -62,27 +65,39 @@ def calc_vfs(video_uri, audio_file, out_loc, fl_name, r_config):
df_vfs[r_config.aco_totVoiceFrame] = [total_frames]
df_vfs[r_config.aco_voicePct] = [voice_percentage]
df_vfs[r_config.err_reason] = 'Pass'# will replace with threshold in future release
df_vfs[
r_config.err_reason
] = "Pass" # will replace with threshold in future release
df_vfs['Frames'] = df_vfs.index
df_vfs['dbm_master_url'] = video_uri
logger.info('Saving Output file {} '.format(out_loc))
df_vfs["Frames"] = df_vfs.index
df_vfs["dbm_master_url"] = video_uri
if save:
logger.info("Saving Output file {} ".format(out_loc))
ut.save_output(df_vfs, out_loc, fl_name, vfs_dir, csv_ext)
return df_vfs
def empty_vfs(video_uri, out_loc, fl_name, r_config):
def empty_vfs(video_uri, out_loc, fl_name, r_config, save=True):
"""
Preparing empty VFS matrix if something fails
"""
cols = ['Frames', r_config.aco_voiceFrame, r_config.aco_totVoiceFrame, r_config.aco_voicePct, r_config.err_reason]
cols = [
"Frames",
r_config.aco_voiceFrame,
r_config.aco_totVoiceFrame,
r_config.aco_voicePct,
r_config.err_reason,
]
out_val = [[np.nan, np.nan, np.nan, np.nan, error_txt]]
df_vfs = pd.DataFrame(out_val, columns=cols)
df_vfs['dbm_master_url'] = video_uri
logger.info('Saving Output file {} '.format(out_loc))
df_vfs["dbm_master_url"] = video_uri
if save:
logger.info("Saving Output file {} ".format(out_loc))
ut.save_output(df_vfs, out_loc, fl_name, vfs_dir, csv_ext)
return df_vfs
def run_vfs(video_uri, out_dir, r_config):
def run_vfs(video_uri, out_dir, r_config, save=True):
"""
Processing all participants for fetching voice frame score
---------------
@@ -94,18 +109,23 @@ def run_vfs(video_uri, out_dir, r_config):
try:
input_loc, out_loc, fl_name = ut.filter_path(video_uri, out_dir)
aud_filter = glob.glob(join(input_loc, fl_name + '.wav'))
aud_filter = glob.glob(join(input_loc, fl_name + ".wav"))
if len(aud_filter) > 0:
audio_file = aud_filter[0]
aud_dur = librosa.get_duration(filename=audio_file)
aud_dur = ut.get_length(audio_file)
if float(aud_dur) < 0.064:
logger.info('Output file {} size is less than 0.064sec'.format(audio_file))
logger.info(
"Output file {} size is less than 0.064sec".format(audio_file)
)
empty_vfs(video_uri, out_loc, fl_name, r_config)
return
calc_vfs(video_uri, audio_file, out_loc, fl_name, r_config)
df = empty_vfs(video_uri, out_loc, fl_name, r_config, save=save)
else:
df = calc_vfs(
video_uri, audio_file, out_loc, fl_name, r_config, save=save
)
return df
except Exception as e:
logger.error('Failed to process audio file')
e
logger.error("Failed to process audio file")