open source pkg v1

This commit is contained in:
Vijay Yadev
2020-08-04 19:12:31 -04:00
parent bef213dba9
commit c389fc2c47
3708 changed files with 1624220 additions and 1 deletions

View File

@@ -0,0 +1,125 @@
"""
file_name: formant_freq
project_name: DBM
created: 2020-20-07
"""
import pandas as pd
import parselmouth
import numpy as np
import parselmouth
import librosa
import glob
from os.path import join
import logging
from dbm_lib.dbm_features.raw_features.util import util as ut
logging.basicConfig(level=logging.INFO)
logger=logging.getLogger()
formant_dir = 'audio/formant_freq'
csv_ext = '_formant.csv'
error_txt = 'error: length less than 0.064'
def formant_list(formant,snd):
"""
Getting formant frequency per second
Args:
formant: Formant object for sound wave
snd: Parselmouth sound object
Returns:
List of first through fourth formant for each frame
"""
f1_list, f2_list, f3_list, f4_list = ([], ) * 4
dur = snd.duration-0.02
dur_round = round(dur, 2)
time_list = np.arange(0.001, dur_round, 0.001)
for time in time_list:
f1 = formant.get_value_at_time(1,time)
f2 = formant.get_value_at_time(2,time)
f3 = formant.get_value_at_time(3,time)
f4 = formant.get_value_at_time(4,time)
f1_list.append(f1)
f2_list.append(f2)
f3_list.append(f3)
f4_list.append(f4)
return f1_list,f2_list,f3_list,f4_list
def formant_score(path):
"""
Using parselmouth library fetching Formant Frequency
Args:
path: (.wav) audio file location
Returns:
(list) list of Formant freq for each voice frame
"""
sound_pat = parselmouth.Sound(path)
formant = sound_pat.to_formant_burg(time_step=.001)
f_score = formant_list(formant,sound_pat)
return f_score
def calc_formant(video_uri, audio_file, out_loc, fl_name, r_config):
"""
Preparing Formant freq matrix
Args:
audio_file: (.wav) parsed audio file; fl_name: input file name
out_loc: (str) Output directory; r_config: raw variable config
"""
f1_list,f2_list,f3_list,f4_list = formant_score(audio_file)
df_formant = pd.DataFrame(f1_list, columns=[r_config.aco_fm1])
df_formant[r_config.aco_fm2] = f2_list
df_formant[r_config.aco_fm3] = f3_list
df_formant[r_config.aco_fm4] = f4_list
df_formant.replace('', np.nan, regex=True,inplace=True)
df_formant[r_config.err_reason] = 'Pass'# will replace with threshold in future release
df_formant['Frames'] = df_formant.index
df_formant['dbm_master_url'] = video_uri
logger.info('Saving Output file {} '.format(out_loc))
ut.save_output(df_formant, out_loc, fl_name, formant_dir, csv_ext)
def empty_fm(video_uri, out_loc, fl_name, r_config):
"""
Preparing empty formant frequency matrix if something fails
"""
cols = ['Frames', r_config.aco_fm1, r_config.aco_fm2, r_config.aco_fm3, r_config.aco_fm4, r_config.err_reason]
out_val = [[np.nan, np.nan, np.nan, np.nan, np.nan, error_txt]]
df_fm = pd.DataFrame(out_val, columns = cols)
df_fm['dbm_master_url'] = video_uri
logger.info('Saving Output file {} '.format(out_loc))
ut.save_output(df_fm, out_loc, fl_name, formant_dir, csv_ext)
def run_formant(video_uri, out_dir, r_config):
"""
Processing all patient's for fetching Formant freq
---------------
---------------
Args:
video_uri: video path; r_config: raw variable config object
out_dir: (str) Output directory for processed output
"""
input_loc, out_loc, fl_name = ut.filter_path(video_uri, out_dir)
aud_filter = glob.glob(join(input_loc, fl_name + '.wav'))
if len(aud_filter)>0:
audio_file = aud_filter[0]
aud_dur = librosa.get_duration(filename=audio_file)
if float(aud_dur) < 0.064:
logger.info('Output file {} size is less than 0.064sec'.format(audio_file))
empty_fm(video_uri, out_loc, fl_name, r_config)
return
calc_formant(video_uri, audio_file, out_loc, fl_name, r_config)

View File

@@ -0,0 +1,157 @@
"""
file_name: gne
project_name: DBM
created: 2020-20-07
"""
import pandas as pd
import numpy as np
import os
import glob
import parselmouth
import librosa
import more_itertools as mit
from os.path import join
import logging
from dbm_lib.dbm_features.raw_features.util import util as ut
logging.basicConfig(level=logging.INFO)
logger=logging.getLogger()
gne_dir = 'audio/glottal_noise'
ff_dir = 'audio/pitch'
csv_ext = '_gne_frame.csv'
def gne_ratio(sound):
"""
Using parselmouth library fetching glottal noise excitation ratio
Args:
sound: parselmouth object
Returns:
(list) list of gne ratio for each voice frame
"""
harmonicity_gne = sound.to_harmonicity_gne()
gne_all_bands = harmonicity_gne.values
gne_all_bands = np.where(gne_all_bands==-200, np.NaN, gne_all_bands)
gne = np.nanmax(gne_all_bands) # following http://www.fon.hum.uva.nl/rob/NKI_TEVA/TEVA/HTML/NKI_TEVA.pdf
return gne
def empty_gne(video_uri, out_loc, fl_name, r_config, error_txt):
"""
Preparing empty GNE matrix if something fails
"""
cols = ['Frames', r_config.aco_gne, r_config.err_reason]
out_val = [[np.nan, np.nan, error_txt]]
df_gne = pd.DataFrame(out_val, columns = cols)
df_gne['dbm_master_url'] = video_uri
logger.info('Saving Output file {} '.format(out_loc))
ut.save_output(df_gne, out_loc, fl_name, gne_dir, csv_ext)
def segment_pitch(dir_path, r_config):
"""
segmenting pitch freq for each voice segment
"""
com_speech_sort, voiced_yes, voiced_no = ([], ) * 3
for file in os.listdir(dir_path):
try:
if file.endswith('_pitch.csv'):
ff_df = pd.read_csv((dir_path+'/'+file))
voice_label = ff_df[r_config.aco_voiceLabel]
indices_yes = [i for i, x in enumerate(voice_label) if x == "yes"]
voiced_yes = [list(group) for group in mit.consecutive_groups(indices_yes)]
indices_no = [i for i, x in enumerate(voice_label) if x == "no"]
voiced_no = [list(group) for group in mit.consecutive_groups(indices_no)]
com_speech = voiced_yes + voiced_no
com_speech_sort = sorted(com_speech, key=lambda x: x[0])
except:
pass
return com_speech_sort, voiced_yes, voiced_no
def segment_gne(com_speech_sort, voiced_yes, voiced_no, gne_all_frames, audio_file):
"""
calculating gne for each voice segment
"""
snd = parselmouth.Sound(audio_file)
pitch = snd.to_pitch(time_step=.001)
for idx, vs in enumerate(com_speech_sort):
try:
max_gne = np.NaN
if vs in voiced_yes and len(vs)>1:
start_time = pitch.get_time_from_frame_number(vs[0])
end_time = pitch.get_time_from_frame_number(vs[-1])
snd_start = int(snd.get_frame_number_from_time(start_time))
snd_end = int(snd.get_frame_number_from_time(end_time))
samples = parselmouth.Sound(snd.as_array()[0][snd_start:snd_end])
max_gne = gne_ratio(samples)
except:
pass
gne_all_frames[idx] = max_gne
return gne_all_frames
def calc_gne(video_uri, audio_file, out_loc, fl_name, r_config):
"""
Preparing gne matrix
Args:
audio_file: (.wav) parsed audio file
out_loc: (str) Output directory for csv's
"""
dir_path = os.path.join(out_loc, ff_dir)
if os.path.isdir(dir_path):
voice_seg = segment_pitch(dir_path, r_config)
gne_all_frames = [np.NaN] * len(voice_seg[0])
gne_segment_frames = segment_gne(voice_seg[0], voice_seg[1], voice_seg[2], gne_all_frames, audio_file)
df_gne = pd.DataFrame(gne_segment_frames, columns=[r_config.aco_gne])
df_gne[r_config.err_reason] = 'Pass'# will replace with threshold in future release
df_gne['Frames'] = df_gne.index
df_gne['dbm_master_url'] = video_uri
logger.info('Processing Output file {} '.format(out_loc))
ut.save_output(df_gne, out_loc, fl_name, gne_dir, csv_ext)
else:
error_txt = 'error: pitch freq not available'
empty_gne(video_uri, out_loc, fl_name, r_config, error_txt)
def run_gne(video_uri, out_dir, r_config):
"""
Processing all patient's for fetching glottal noise ratio
---------------
---------------
Args:
video_uri: video path; r_config: raw variable config object
out_dir: (str) Output directory for processed output
"""
input_loc, out_loc, fl_name = ut.filter_path(video_uri, out_dir)
aud_filter = glob.glob(join(input_loc, fl_name + '.wav'))
if len(aud_filter)>0:
audio_file = aud_filter[0]
aud_dur = librosa.get_duration(filename=audio_file)
if float(aud_dur) < 0.064:
logger.info('Output file {} size is less than 0.064sec'.format(audio_file))
error_txt = 'error: length less than 0.064'
empty_gne(video_uri, out_loc, fl_name, r_config, error_txt)
return
calc_gne(video_uri, audio_file, out_loc, fl_name, r_config)

View File

@@ -0,0 +1,92 @@
"""
file_name: hnr
project_name: DBM
created: 2020-20-07
"""
import pandas as pd
import numpy as np
import os
import glob
import parselmouth
import librosa
from os.path import join
import logging
from dbm_lib.dbm_features.raw_features.util import util as ut
logging.basicConfig(level=logging.INFO)
logger=logging.getLogger()
hnr_dir = 'audio/harmonic_noise'
csv_ext = '_hnr_frame.csv'
error_txt = 'error: length less than 0.064'
def hnr_ratio(filepath):
"""
Using parselmouth library fetching harmonic noise ratio ratio
Args:
path: (.wav) audio file location
Returns:
(list) list of hnr ratio for each voice frame, min,max and mean hnr
"""
sound = parselmouth.Sound(filepath)
harmonicity = sound.to_harmonicity_ac(time_step=.001)
hnr_all_frames = harmonicity.values#[harmonicity.values != -200] nan it (****)
hnr_all_frames = np.where(hnr_all_frames==-200, np.NaN, hnr_all_frames)
return hnr_all_frames.transpose()
def calc_hnr(video_uri, audio_file, out_loc, fl_name, r_config):
"""
Preparing harmonic noise matrix
Args:
audio_file: (.wav) parsed audio file
out_loc: (str) Output directory for csv's
"""
hnr_all_frames = hnr_ratio(audio_file)
df_hnr = pd.DataFrame(hnr_all_frames, columns=[r_config.aco_hnr])
df_hnr['Frames'] = df_hnr.index
df_hnr['dbm_master_url'] = video_uri
df_hnr[r_config.err_reason] = 'Pass'# will replace with threshold in future release
logger.info('Saving Output file {} '.format(out_loc))
ut.save_output(df_hnr, out_loc, fl_name, hnr_dir, csv_ext)
def empty_hnr(video_uri, out_loc, fl_name, r_config):
"""
Preparing empty HNR matrix if something fails
"""
cols = ['Frames', r_config.aco_hnr, r_config.err_reason]
out_val = [[np.nan, np.nan, error_txt]]
df_hnr = pd.DataFrame(out_val, columns = cols)
df_hnr['dbm_master_url'] = video_uri
logger.info('Saving Output file {} '.format(out_loc))
ut.save_output(df_hnr, out_loc, fl_name, hnr_dir, csv_ext)
def run_hnr(video_uri, out_dir, r_config):
"""
Processing all patient's for fetching harmonic noise ratio
-------------------
-------------------
Args:
video_uri: video path; r_config: raw variable config object
out_dir: (str) Output directory for processed output
"""
input_loc, out_loc, fl_name = ut.filter_path(video_uri, out_dir)
aud_filter = glob.glob(join(input_loc, fl_name + '.wav'))
if len(aud_filter)>0:
audio_file = aud_filter[0]
aud_dur = librosa.get_duration(filename=audio_file)
if float(aud_dur) < 0.064:
logger.info('Output file {} size is less than 0.064sec'.format(audio_file))
empty_hnr(video_uri, out_loc, fl_name, r_config)
return
calc_hnr(video_uri, audio_file, out_loc, fl_name, r_config)

View File

@@ -0,0 +1,88 @@
"""
file_name: intensity
project_name: DBM
created: 2020-20-07
"""
import pandas as pd
import numpy as np
import glob
import parselmouth
import librosa
from os.path import join
import logging
from dbm_lib.dbm_features.raw_features.util import util as ut
logging.basicConfig(level=logging.INFO)
logger=logging.getLogger()
intensity_dir = 'audio/intensity'
csv_ext = '_intensity.csv'
error_txt = 'error: length less than 0.064'
def intensity_score(path):
"""
Using parselmouth library fetching Intensity
Args:
path: (.wav) audio file location
Returns:
(list) list of Intensity for each voice frame
"""
sound_pat = parselmouth.Sound(path)
intensity = sound_pat.to_intensity(time_step=.001)
return intensity.values[0]
def calc_intensity(video_uri, audio_file, out_loc, fl_name, r_config):
"""
Preparing Intensity matrix
Args:
audio_file: (.wav) parsed audio file
out_loc: (str) Output directory for csv's
"""
intensity_frames = intensity_score(audio_file)
df_intensity = pd.DataFrame(intensity_frames, columns=[r_config.aco_int])
df_intensity['Frames'] = df_intensity.index
df_intensity['dbm_master_url'] = video_uri
df_intensity[r_config.err_reason] = 'Pass'# will replace with threshold in future release
logger.info('Saving Output file {} '.format(out_loc))
ut.save_output(df_intensity, out_loc, fl_name, intensity_dir, csv_ext)
def empty_intensity(video_uri, out_loc, fl_name, r_config):
"""
Preparing empty Intensity matrix if something fails
"""
cols = ['Frames', r_config.aco_int, r_config.err_reason]
out_val = [[np.nan, np.nan, error_txt]]
df_int = pd.DataFrame(out_val, columns = cols)
df_int['dbm_master_url'] = video_uri
logger.info('Saving Output file {} '.format(out_loc))
ut.save_output(df_int, out_loc, fl_name, intensity_dir, csv_ext)
def run_intensity(video_uri, out_dir, r_config):
"""
Processing all patient's for fetching Intensity
-------------------
-------------------
Args:
video_uri: video path; r_config: raw variable config object
out_dir: (str) Output directory for processed output
"""
input_loc, out_loc, fl_name = ut.filter_path(video_uri, out_dir)
aud_filter = glob.glob(join(input_loc, fl_name + '.wav'))
if len(aud_filter)>0:
audio_file = aud_filter[0]
aud_dur = librosa.get_duration(filename=audio_file)
if float(aud_dur) < 0.064:
logger.info('Output file {} size is less than 0.064sec'.format(audio_file))
empty_intensity(video_uri, out_loc, fl_name, r_config)
return
calc_intensity(video_uri, audio_file, out_loc, fl_name, r_config)

View File

@@ -0,0 +1,155 @@
"""
file_name: jitter_processing
project_name: DBM
created: 2020-20-07
"""
import pandas as pd
import numpy as np
import os
import glob
import parselmouth
import librosa
import numpy as np
import more_itertools as mit
from os.path import join
import logging
from dbm_lib.dbm_features.raw_features.util import util as ut
logging.basicConfig(level=logging.INFO)
logger=logging.getLogger()
jitter_dir = 'audio/jitter'
ff_dir = 'audio/pitch'
csv_ext = '_jitter.csv'
def audio_jitter(sound):
"""
Using parselmouth library fetching jitter
Args:
sound: parselmouth object
Returns:
(list) list of jitters for each voice frame
"""
pointProcess = parselmouth.praat.call(sound, "To PointProcess (periodic, cc)...", 80, 500)
jitter = parselmouth.praat.call(pointProcess, "Get jitter (local)", 0, 0, 0.0001, 0.02, 1.3)
return jitter
def empty_jitter(video_uri, out_loc, fl_name, r_config, error_txt):
"""
Preparing empty jitter matrix if something fails
"""
cols = ['Frames', r_config.aco_jitter, r_config.err_reason]
out_val = [[np.nan, np.nan, error_txt]]
df_jitter = pd.DataFrame(out_val, columns = cols)
df_jitter['dbm_master_url'] = video_uri
logger.info('Saving Output file {} '.format(out_loc))
ut.save_output(df_jitter, out_loc, fl_name, jitter_dir, csv_ext)
def segment_pitch(dir_path, r_config):
"""
segmenting pitch freq for each voice segment
"""
com_speech_sort, voiced_yes, voiced_no = ([], ) * 3
for file in os.listdir(dir_path):
try:
if file.endswith('_pitch.csv'):
ff_df = pd.read_csv((dir_path+'/'+file))
voice_label = ff_df[r_config.aco_voiceLabel]
indices_yes = [i for i, x in enumerate(voice_label) if x == "yes"]
voiced_yes = [list(group) for group in mit.consecutive_groups(indices_yes)]
indices_no = [i for i, x in enumerate(voice_label) if x == "no"]
voiced_no = [list(group) for group in mit.consecutive_groups(indices_no)]
com_speech = voiced_yes + voiced_no
com_speech_sort = sorted(com_speech, key=lambda x: x[0])
except:
pass
return com_speech_sort, voiced_yes, voiced_no
def segment_jitter(com_speech_sort, voiced_yes, voiced_no, jitter_frames, audio_file):
"""
calculating jitter for each voice segment
"""
snd = parselmouth.Sound(audio_file)
pitch = snd.to_pitch(time_step=.001)
for idx, vs in enumerate(com_speech_sort):
try:
jitter = np.NaN
if vs in voiced_yes and len(vs)>1:
start_time = pitch.get_time_from_frame_number(vs[0])
end_time = pitch.get_time_from_frame_number(vs[-1])
snd_start = int(snd.get_frame_number_from_time(start_time))
snd_end = int(snd.get_frame_number_from_time(end_time))
samples = parselmouth.Sound(snd.as_array()[0][snd_start:snd_end])
jitter = audio_jitter(samples)
except:
pass
jitter_frames[idx] = jitter
return jitter_frames
def calc_jitter(video_uri, audio_file, out_loc, fl_name, r_config):
"""
Preparing jitter matrix
Args:
audio_file: (.wav) parsed audio file
out_loc: (str) Output directory for csv
r_config: config.config_raw_feature.pyConfigFeatureNmReader object
"""
dir_path = os.path.join(out_loc, ff_dir)
if os.path.isdir(dir_path):
voice_seg = segment_pitch(dir_path, r_config)
jitter_frames = [np.NaN] * len(voice_seg[0])
jitter_segment_frames = segment_jitter(voice_seg[0], voice_seg[1], voice_seg[2], jitter_frames, audio_file)
df_jitter = pd.DataFrame(jitter_segment_frames, columns=[r_config.aco_jitter])
df_jitter[r_config.err_reason] = 'Pass'# will replace with threshold in future release
df_jitter['Frames'] = df_jitter.index
df_jitter['dbm_master_url'] = video_uri
logger.info('Processing Output file {} '.format(out_loc))
ut.save_output(df_jitter, out_loc, fl_name, jitter_dir, csv_ext)
else:
error_txt = 'error: fundamental freq not available'
empty_jitter(video_uri, out_loc, fl_name, r_config, error_txt)
def run_jitter(video_uri, out_dir, r_config):
"""
Processing all patient's videos for fetching jitter
-------------------
-------------------
Args:
video_uri: video path; r_config: raw variable config object
out_dir: (str) Output directory for processed output
"""
input_loc, out_loc, fl_name = ut.filter_path(video_uri, out_dir)
aud_filter = glob.glob(join(input_loc, fl_name + '.wav'))
if len(aud_filter)>0:
audio_file = aud_filter[0]
aud_dur = librosa.get_duration(filename=audio_file)
if float(aud_dur) < 0.064:
logger.info('Output file {} size is less than 0.064sec'.format(audio_file))
error_txt = 'error: length less than 0.064'
empty_jitter(video_uri, out_loc, fl_name, r_config, error_txt)
return
calc_jitter(video_uri, audio_file, out_loc, fl_name, r_config)

View File

@@ -0,0 +1,102 @@
"""
file_name: mfcc
project_name: DBM
created: 2020-20-07
"""
import pandas as pd
import os
import glob
import parselmouth
import librosa
import numpy as np
import librosa
from os.path import join
import logging
from dbm_lib.dbm_features.raw_features.util import util as ut
logging.basicConfig(level=logging.INFO)
logger=logging.getLogger()
mfcc_dir = 'audio/mfcc'
csv_ext = '_mfcc.csv'
error_txt = 'error: length less than 0.064'
def empty_mfcc(video_uri, out_loc, fl_name, r_config):
"""
Preparing empty empty_mfcc matrix if something fails
"""
cols = ['Frames', r_config.aco_mfcc1, r_config.aco_mfcc2, r_config.aco_mfcc3, r_config.aco_mfcc4, r_config.aco_mfcc5,
r_config.aco_mfcc6, r_config.aco_mfcc7, r_config.aco_mfcc8, r_config.aco_mfcc9, r_config.aco_mfcc10,
r_config.aco_mfcc11, r_config.aco_mfcc12, r_config.err_reason]
out_val = [[np.nan, np.nan, np.nan, np.nan, np.nan, np.nan, np.nan, np.nan, np.nan, np.nan, np.nan, np.nan, np.nan,
error_txt]]
df_mfcc = pd.DataFrame(out_val, columns = cols)
df_mfcc['dbm_master_url'] = video_uri
logger.info('Saving Output file {} '.format(out_loc))
ut.save_output(df_mfcc, out_loc, fl_name, mfcc_dir, csv_ext)
def audio_mfcc(path):
"""
Using parselmouth library fetching mfccs
Args:
path: (.wav) audio file location
Returns:
(list) list of mfccs for each voice frame
"""
sound = parselmouth.Sound(path)
mfcc_object = sound.to_mfcc(time_step=.001,number_of_coefficients=12)
mfccs = mfcc_object.to_array()
mfccs = np.delete(mfccs, (0), axis=0)
return mfccs
def calc_mfcc(video_uri, audio_file, out_loc, fl_name, r_config):
"""
Preparing mfcc matrix
Args:
audio_file: (.wav) parsed audio file
out_loc: output location to save csv
fl_name: (str) name of audio file
r_config: config.config_raw_feature.pyConfigFeatureNmReader object
"""
dict_ = {}
mfccs = audio_mfcc(audio_file)
for i in range(1,13):
conf_str = r_config.base_raw['raw_feature']
dict_[conf_str['aco_mfcc' + str(i)]] = mfccs[i-1, :]
df = pd.DataFrame(dict_)
df['Frames'] = df.index
df[r_config.err_reason] = 'Pass'# may replace based on threshold in future release
df['dbm_master_url'] = video_uri
ut.save_output(df, out_loc, fl_name, mfcc_dir, csv_ext)
def run_mfcc(video_uri, out_dir, r_config):
"""
Processing all patients to fetch mfccs
Args:
video_uri: video path; r_config: raw variable config object
out_dir: (str) Output directory for processed output
"""
input_loc, out_loc, fl_name = ut.filter_path(video_uri, out_dir)
aud_filter = glob.glob(join(input_loc, fl_name + '.wav'))
if len(aud_filter)>0:
audio_file = aud_filter[0]
aud_dur = librosa.get_duration(filename=audio_file)
if float(aud_dur) < 0.064:
logger.info('Output file {} size is less than 0.064sec'.format(audio_file))
empty_mfcc(video_uri, out_loc, fl_name, r_config)
return
calc_mfcc(video_uri, audio_file, out_loc, fl_name, r_config)

View File

@@ -0,0 +1,167 @@
"""
file_name: pause_segment
project_name: DBM
created: 2020-20-07
"""
import os
import glob
from pydub import AudioSegment
import librosa
import pandas as pd
import numpy as np
import webrtcvad
from os.path import join
import logging
from dbm_lib.dbm_features.raw_features.util import vad_utilities as vu
from dbm_lib.dbm_features.raw_features.util import util as ut
logging.basicConfig(level=logging.INFO)
logger=logging.getLogger()
pause_seg_dir = 'audio/pause_segment'
csv_ext = '_pause_segment.csv'
def get_timing_cues(seg_starts_sec, seg_ends_sec, r_config):
"""
Get timing cues from segmented speech
Args:
seg_starts_sec: Audio segment start time in seconds
seg_ends_sec: Audio segment end time in seconds
Returns:
Dictionary with pause features
"""
total_time = seg_ends_sec[-1] - seg_starts_sec[0]
speaking_time = np.sum(np.asarray(seg_ends_sec) - np.asarray(seg_starts_sec))
num_pauses = len(seg_starts_sec) - 1
pause_len = np.zeros(num_pauses)
for p in range(num_pauses):
pause_len[p] = seg_starts_sec[p+1] - seg_ends_sec[p]
if len(pause_len)>0:
pause_len_mean = np.mean(pause_len)
pause_len_std = np.std(pause_len)
pause_time = np.sum(pause_len)
else:
pause_len_mean = 0
pause_len_std = 0
pause_time = 0
pause_frac = pause_time / total_time
timing_dict = {r_config.aco_totaltime: total_time, r_config.aco_speakingtime: speaking_time,
r_config.aco_numpauses: num_pauses, r_config.aco_pausetime: pause_time, r_config.aco_pausefrac: pause_frac}
return timing_dict
def process_silence(audio_file, r_config):
"""
Returns dataframe for pause between words using voice activity detection
Args:
audio_file: Audio file location
Returns:
Dataframe value
"""
feat_dict_list = []
y, sr = vu.read_wave(audio_file)
# 3 is most aggressive (splits most), 0 least (better for low snr)
aggressiveness = 3
frame_dur_ms = 20
#pause segment(long & short pad)
long_pad_around_voice_ms = 200
short_pad_around_voice_ms = 100
if len(y)>0:
vad = webrtcvad.Vad(aggressiveness)
frames = vu.frame_generator(frame_dur_ms, y, sr)
frames = list(frames)
#longer pad time screens out little blips, but misses short silences
long_seg_starts, long_seg_ends = vu.vad_get_segment_times(sr, frame_dur_ms, long_pad_around_voice_ms, vad, frames)
#Logic to handle blank audio file
if len(long_seg_starts) == 0 or len(long_seg_ends) == 0:
return ''
t_start = long_seg_starts[0]
t_end = long_seg_ends[-1]
# shorter pad time captures short silences (but misfires on little blips)
short_seg_starts, short_seg_ends = vu.vad_get_segment_times(sr, frame_dur_ms, short_pad_around_voice_ms, vad, frames)
seg_starts = []
seg_ends = []
for k in range(len(short_seg_starts)): # logic to clean up some typical misfires
if (short_seg_starts[k] >=t_start) and (short_seg_starts[k] <= t_end):
seg_starts.append(short_seg_starts[k])
seg_ends.append(short_seg_ends[k])
if len(seg_starts) == 0 or len(seg_ends) == 0:
return ''
timing_dict = get_timing_cues(seg_starts, seg_ends, r_config)
feat_dict_list.append(timing_dict)
df = pd.DataFrame(feat_dict_list)
df[r_config.err_reason] = 'Pass'# will replace with threshold in future release
return df
def empty_pause_segment(video_uri, out_loc, fl_name, r_config, error_txt):
"""
Preparing empty Pause Segment matrix if something fails
"""
cols = [r_config.aco_totaltime, r_config.aco_speakingtime, r_config.aco_numpauses, r_config.aco_pausetime,
r_config.aco_pausefrac, r_config.err_reason]
out_val = [[np.nan, np.nan, np.nan, np.nan, np.nan, error_txt]]
df_pause = pd.DataFrame(out_val, columns = cols)
df_pause['dbm_master_url'] = video_uri
logger.info('Saving Output file {} '.format(out_loc))
ut.save_output(df_pause, out_loc, fl_name, pause_seg_dir, csv_ext)
def run_pause_segment(video_uri, out_dir, r_config):
"""
Processing all patient's for getting Pause Segment
---------------
---------------
Args:
video_uri: video path; r_config: raw variable config object
out_dir: (str) Output directory for processed output
"""
input_loc, out_loc, fl_name = ut.filter_path(video_uri, out_dir)
aud_filter = glob.glob(join(input_loc, fl_name + '.wav'))
if len(aud_filter)>0:
audio_file = aud_filter[0]
aud_dur = librosa.get_duration(filename=audio_file)
if float(aud_dur) < 0.064:
logger.info('Output file {} size is less than 0.064sec'.format(audio_file))
error_txt = 'error: length less than 0.064'
empty_pause_segment(video_uri, out_loc, fl_name, r_config, error_txt)
return
logger.info('Converting stereo sound to mono-lD')
sound_mono = AudioSegment.from_wav(audio_file)
sound_mono = sound_mono.set_channels(1)
sound_mono = sound_mono.set_frame_rate(48000)
mono_wav = os.path.join(input_loc, fl_name + '_mono.wav')
sound_mono.export(mono_wav, format="wav")
df_pause_seg = process_silence(mono_wav, r_config)
os.remove(mono_wav)#removing mono wav file
if isinstance(df_pause_seg, pd.DataFrame) and len(df_pause_seg)>0:
logger.info('Processing Output file {} '.format(out_loc))
df_pause_seg['dbm_master_url'] = video_uri
ut.save_output(df_pause_seg, out_loc, fl_name, pause_seg_dir, csv_ext)
else:
error_txt = 'error: webrtcvad returns no segment'
empty_pause_segment(video_uri, out_loc, fl_name, r_config, error_txt)

View File

@@ -0,0 +1,109 @@
"""
file_name: pitch_freq
project_name: DBM
created: 2020-20-07
"""
import pandas as pd
import os
import glob
import parselmouth
import librosa
import numpy as np
from os.path import join
import logging
from dbm_lib.dbm_features.raw_features.util import util as ut
logging.basicConfig(level=logging.INFO)
logger=logging.getLogger()
ff_dir = 'audio/pitch'
csv_ext = '_pitch.csv'
error_txt = 'error: length less than 0.064'
def audio_pitch(path):
"""
Using parselmouth library fetching pitch/fundamental frequency
Args:
path: (.wav) audio file location
Returns:
(list) list of pitch/fundamental frequency for each voice frame
"""
sound_pat = parselmouth.Sound(path)
pitch = sound_pat.to_pitch(time_step=.001)
pitch_values = pitch.selected_array['frequency']
return list(pitch_values)
def label_speech(row,fd_freq):
"""
identify whether frame is voiced or not
Args:
row: (item) pitch frequency value
Returns:
(str) yes or no indicator for voice
"""
if row[fd_freq] > 0 :
return 'yes'
else:
return 'no'
def calc_pitch(video_uri, audio_file, out_loc, fl_name, r_config):
"""
Preparing pitch frequency matrix
Args:
audio_file: (.wav) parsed audio file
row: (dataframe) subject details from master csv
new_out_base_dir: (str) Output directory for csv
"""
ff_frames = audio_pitch(audio_file)
df_ffreq = pd.DataFrame(ff_frames, columns=[r_config.aco_ff])
df_ffreq['Frames'] = df_ffreq.index
df_ffreq[r_config.aco_voiceLabel] = df_ffreq.apply(lambda row: label_speech(row, r_config.aco_ff),axis=1)
df_ffreq[r_config.err_reason] = 'Pass'# will replace with threshold in future release
df_ffreq['dbm_master_url'] = video_uri
logger.info('Processing Output file {} '.format(out_loc))
ut.save_output(df_ffreq, out_loc, fl_name, ff_dir, csv_ext)
def empty_pitch(video_uri, out_loc, fl_name, r_config):
"""
Preparing empty pitch frequency matrix if something fails
"""
df_ffreq = pd.DataFrame([[np.nan, np.nan, 'no', error_txt]],
columns=['Frames', r_config.aco_ff, r_config.aco_voiceLabel, r_config.err_reason])
df_ffreq['dbm_master_url'] = video_uri
logger.info('Saving Output file {} '.format(out_loc))
ut.save_output(df_ffreq, out_loc, fl_name, ff_dir, csv_ext)
def run_pitch(video_uri, out_dir, r_config):
"""
Processing audio for fetching pitch
-------------------
-------------------
Args:
video_uri: video path; r_config: raw variable config object
out_dir: (str) Output directory for processed output
"""
input_loc, out_loc, fl_name = ut.filter_path(video_uri, out_dir)
aud_filter = glob.glob(join(input_loc, fl_name + '.wav'))
if len(aud_filter)>0:
audio_file = aud_filter[0]
aud_dur = librosa.get_duration(filename=audio_file)
if float(aud_dur) < 0.064:
logger.info('Output file {} size is less than 0.064sec'.format(audio_file))
empty_pitch(video_uri, out_loc, fl_name, r_config)
return
calc_pitch(video_uri, audio_file, out_loc, fl_name, r_config)

View File

@@ -0,0 +1,157 @@
"""
file_name: shimmer_processing
project_name: DBM
created: 2020-20-07
"""
import pandas as pd
import numpy as np
import os
import glob
import parselmouth
import librosa
import numpy as np
import more_itertools as mit
from os.path import join
import logging
from dbm_lib.dbm_features.raw_features.util import util as ut
logging.basicConfig(level=logging.INFO)
logger=logging.getLogger()
shimmer_dir = 'audio/shimmer'
ff_dir = 'audio/pitch'
csv_ext = '_shimmer.csv'
def audio_shimmer(sound):
"""
Using parselmouth library fetching shimmer
Args:
sound: parselmouth object
Returns:
(list) list of shimmers for each voice frame
"""
pointProcess = parselmouth.praat.call(sound, "To PointProcess (periodic, cc)...", 80, 500)
shimmer = parselmouth.praat.call([sound, pointProcess], "Get shimmer (local)", 0, 0, 0.0001, 0.02, 1.3, 1.6)
return shimmer
def empty_shimmer(video_uri, out_loc, fl_name, r_config, error_txt):
"""
Preparing empty shimmer matrix if something fails
"""
cols = ['Frames', r_config.aco_shimmer, r_config.err_reason]
out_val = [[np.nan, np.nan, error_txt]]
df_shimmer = pd.DataFrame(out_val, columns = cols)
df_shimmer['dbm_master_url'] = video_uri
logger.info('Saving Output file {} '.format(out_loc))
ut.save_output(df_shimmer, out_loc, fl_name, shimmer_dir, csv_ext)
def segment_pitch(dir_path, r_config):
"""
segmenting pitch freq for each voice segment
"""
com_speech_sort, voiced_yes, voiced_no = ([], ) * 3
for file in os.listdir(dir_path):
try:
if file.endswith('_pitch.csv'):
ff_df = pd.read_csv((dir_path+'/'+file))
voice_label = ff_df[r_config.aco_voiceLabel]
indices_yes = [i for i, x in enumerate(voice_label) if x == "yes"]
voiced_yes = [list(group) for group in mit.consecutive_groups(indices_yes)]
indices_no = [i for i, x in enumerate(voice_label) if x == "no"]
voiced_no = [list(group) for group in mit.consecutive_groups(indices_no)]
com_speech = voiced_yes + voiced_no
com_speech_sort = sorted(com_speech, key=lambda x: x[0])
except:
pass
return com_speech_sort, voiced_yes, voiced_no
def segment_shimmer(com_speech_sort, voiced_yes, voiced_no, shimmer_frames, audio_file):
"""
calculating shimmer for each voice segment
"""
snd = parselmouth.Sound(audio_file)
pitch = snd.to_pitch(time_step=.001)
for idx, vs in enumerate(com_speech_sort):
try:
shimmer = np.NaN
if vs in voiced_yes and len(vs)>1:
start_time = pitch.get_time_from_frame_number(vs[0])
end_time = pitch.get_time_from_frame_number(vs[-1])
snd_start = int(snd.get_frame_number_from_time(start_time))
snd_end = int(snd.get_frame_number_from_time(end_time))
samples = parselmouth.Sound(snd.as_array()[0][snd_start:snd_end])
shimmer = audio_shimmer(samples)
except:
pass
shimmer_frames[idx] = shimmer
return shimmer_frames
def calc_shimmer(video_uri, audio_file, out_loc, fl_name, r_config):
"""
Preparing shimmer matrix
Args:
audio_file: (.wav) parsed audio file
out_loc: (str) Output directory for csv
r_config: config.config_raw_feature.pyConfigFeatureNmReader object
"""
dir_path = os.path.join(out_loc, ff_dir)
if os.path.isdir(dir_path):
voice_seg = segment_pitch(dir_path, r_config)
shimmer_frames = [np.NaN] * len(voice_seg[0])
shimmer_segment_frames = segment_shimmer(voice_seg[0], voice_seg[1], voice_seg[2], shimmer_frames, audio_file)
df_shimmer = pd.DataFrame(shimmer_segment_frames, columns=[r_config.aco_shimmer])
df_shimmer[r_config.err_reason] = 'Pass'# will replace with threshold in future release
df_shimmer['Frames'] = df_shimmer.index
df_shimmer['dbm_master_url'] = video_uri
logger.info('Processing Output file {} '.format(out_loc))
ut.save_output(df_shimmer, out_loc, fl_name, shimmer_dir, csv_ext)
else:
error_txt = 'error: fundamental freq not available'
empty_shimmer(video_uri, out_loc, fl_name, r_config, error_txt)
def run_shimmer(video_uri, out_dir, r_config):
"""
Processing all patients to fetch shimmer
---------------
---------------
Args:
video_uri: video path; r_config: raw variable config object
out_dir: (str) Output directory for processed output
"""
input_loc, out_loc, fl_name = ut.filter_path(video_uri, out_dir)
aud_filter = glob.glob(join(input_loc, fl_name + '.wav'))
if len(aud_filter)>0:
audio_file = aud_filter[0]
aud_dur = librosa.get_duration(filename=audio_file)
if float(aud_dur) < 0.064:
logger.info('Output file {} size is less than 0.064sec'.format(audio_file))
error_txt = 'error: length less than 0.064'
empty_shimmer(video_uri, out_loc, fl_name, r_config, error_txt)
return
calc_shimmer(video_uri, audio_file, out_loc, fl_name, r_config)

View File

@@ -0,0 +1,107 @@
"""
file_name: voice_frame_score
project_name: DBM
created: 2020-20-07
"""
import parselmouth
import pandas as pd
import numpy as np
import glob
import librosa
from os.path import join
import logging
from dbm_lib.dbm_features.raw_features.util import util as ut
logging.basicConfig(level=logging.INFO)
logger=logging.getLogger()
vfs_dir = 'audio/voice_frame_score'
csv_ext = '_vfs.csv'
error_txt = 'error: length less than 0.064'
def audio_pitch_frame(pitch):
"""
Computing total number of speech and participant voiced frames
Args:
pitch: speech pitch
Returns:
(float) total voice frames and participant voiced frames
"""
total_frames = pitch.get_number_of_frames()
voiced_frames = pitch.count_voiced_frames()
return total_frames, voiced_frames
def voice_segment(path):
"""
Using parselmouth library for fundamental frequency
Args:
path: (.wav) audio file location
Returns:
(float) total voice frames, participant voiced frames and voiced frames percentage
"""
sound_pat = parselmouth.Sound(path)
pitch = sound_pat.to_pitch()
total_frames,voiced_frames = audio_pitch_frame(pitch)
voiced_percentage = (voiced_frames/total_frames)*100
return voiced_percentage, voiced_frames, total_frames
def calc_vfs(video_uri, audio_file, out_loc, fl_name, r_config):
"""
creating dataframe matrix for voice frame score
Args:
audio_file: Audio file path
new_out_base_dir: AWS instance output base directory path
f_nm_config: Config file object
"""
voice_percentage,voiced_frames, total_frames = voice_segment(audio_file)
df_vfs = pd.DataFrame([voiced_frames], columns=[r_config.aco_voiceFrame])
df_vfs[r_config.aco_totVoiceFrame] = [total_frames]
df_vfs[r_config.aco_voicePct] = [voice_percentage]
df_vfs[r_config.err_reason] = 'Pass'# will replace with threshold in future release
df_vfs['Frames'] = df_vfs.index
df_vfs['dbm_master_url'] = video_uri
logger.info('Saving Output file {} '.format(out_loc))
ut.save_output(df_vfs, out_loc, fl_name, vfs_dir, csv_ext)
def empty_vfs(video_uri, out_loc, fl_name, r_config):
"""
Preparing empty VFS matrix if something fails
"""
cols = ['Frames', r_config.aco_voiceFrame, r_config.aco_totVoiceFrame, r_config.aco_voicePct, r_config.err_reason]
out_val = [[np.nan, np.nan, np.nan, np.nan, error_txt]]
df_vfs = pd.DataFrame(out_val, columns = cols)
df_vfs['dbm_master_url'] = video_uri
logger.info('Saving Output file {} '.format(out_loc))
ut.save_output(df_vfs, out_loc, fl_name, vfs_dir, csv_ext)
def run_vfs(video_uri, out_dir, r_config):
"""
Processing all participants for fetching voice frame score
---------------
---------------
Args:
video_uri: video path; r_config: raw variable config object
out_dir: (str) Output directory for processed output
"""
input_loc, out_loc, fl_name = ut.filter_path(video_uri, out_dir)
aud_filter = glob.glob(join(input_loc, fl_name + '.wav'))
if len(aud_filter)>0:
audio_file = aud_filter[0]
aud_dur = librosa.get_duration(filename=audio_file)
if float(aud_dur) < 0.064:
logger.info('Output file {} size is less than 0.064sec'.format(audio_file))
empty_vfs(video_uri, out_loc, fl_name, r_config)
return
calc_vfs(video_uri, audio_file, out_loc, fl_name, r_config)