added vocal tremor

This commit is contained in:
Vidya Koesmahargyo
2020-11-18 23:33:30 -05:00
parent 82f2830a47
commit f9f5b4ec5f
9 changed files with 663 additions and 105 deletions

View File

@@ -17,7 +17,7 @@ class ConfigRawReader(object):
Args:
feature_config_yml (None, optional): yml file defined service configuration
"""
if feature_config_yml is None:
feature_config = DBMLIB_FEATURE_CONFIG
else:
@@ -25,15 +25,15 @@ class ConfigRawReader(object):
with open(feature_config, 'r') as ymlfile:
config = yaml.load(ymlfile)
#Verbal features
self.base_raw = config
self.err_reason = config['raw_feature']['error_reason']
#Output range
self.mov_headvel_start = config['raw_feature']['mov_headvel_start']
self.mov_headvel_end = config['raw_feature']['mov_headvel_end']
#Acoustic variable
self.aco_int = config['raw_feature']['aco_int']
self.aco_ff = config['raw_feature']['aco_ff']
@@ -210,7 +210,7 @@ class ConfigRawReader(object):
self.fac_AsymMaskEye = config['raw_feature']['fac_AsymMaskEye']
self.fac_AsymMaskEyebrow = config['raw_feature']['fac_AsymMaskEyebrow']
self.fac_AsymMaskCom = config['raw_feature']['fac_AsymMaskCom']
#Movement features
self.head_vel = config['raw_feature']['head_vel']
self.mov_blink_ear = config['raw_feature']['mov_blink_ear']
@@ -222,4 +222,18 @@ class ConfigRawReader(object):
self.mov_Hpose_Yaw = config['raw_feature']['mov_Hpose_Yaw']
self.mov_Hpose_Roll = config['raw_feature']['mov_Hpose_Roll']
self.mov_Hpose_Dist = config['raw_feature']['mov_Hpose_Dist']
self.mov_freq_trem_freq = config['raw_feature']['mov_freq_trem_freq']
self.mov_freq_trem_index = config['raw_feature']['mov_freq_trem_index']
self.mov_freq_trem_pindex = config['raw_feature']['mov_freq_trem_pindex']
self.mov_amp_trem_freq = config['raw_feature']['mov_amp_trem_freq']
self.mov_amp_trem_index = config['raw_feature']['mov_amp_trem_index']
self.mov_amp_trem_pindex = config['raw_feature']['mov_amp_trem_pindex']
self.fac_tremor_median_5 = config['raw_feature']['fac_tremor_median_5']
self.fac_tremor_median_12 = config['raw_feature']['fac_tremor_median_12']
self.fac_tremor_median_8 = config['raw_feature']['fac_tremor_median_8']
self.fac_tremor_median_48 = config['raw_feature']['fac_tremor_median_48']
self.fac_tremor_median_54 = config['raw_feature']['fac_tremor_median_54']
self.fac_tremor_median_28 = config['raw_feature']['fac_tremor_median_28']
self.fac_tremor_median_51 = config['raw_feature']['fac_tremor_median_51']
self.fac_tremor_median_66 = config['raw_feature']['fac_tremor_median_66']
self.fac_tremor_median_57 = config['raw_feature']['fac_tremor_median_57']

View File

@@ -7,7 +7,7 @@ created: 2020-20-07
from dbm_lib.dbm_features.raw_features.audio import intensity, pitch_freq, hnr, gne, voice_frame_score, formant_freq
from dbm_lib.dbm_features.raw_features.audio import pause_segment, jitter, shimmer, mfcc
from dbm_lib.dbm_features.raw_features.video import face_asymmetry, face_au, face_emotion_expressivity, face_landmark
from dbm_lib.dbm_features.raw_features.movement import head_motion, eye_blink
from dbm_lib.dbm_features.raw_features.movement import head_motion, eye_blink, voice_tremor
import subprocess
import logging
@@ -19,13 +19,13 @@ logging.basicConfig(level=logging.INFO)
logger=logging.getLogger()
def audio_to_wav(input_filepath):
""" Extracts a video's audio file and saves it to wav
""" Extracts a video's audio file and saves it to wav
Args:
input_filepath: (str)
input_filepath: (str)
Returns:
"""
try:
fname, _ = splitext(input_filepath)
output_filepath = fname + '.wav'
@@ -37,7 +37,7 @@ def audio_to_wav(input_filepath):
logger.info('wav output saved in {}'.format(output_filepath))
else:
logger.info('Output file {} already exists'.format(output_filepath))
except Exception as e:
logger.error('Failed to extract audio from Video')
@@ -50,38 +50,41 @@ def process_acoustic(video_uri, out_dir, dbm_group, r_config):
"""
if dbm_group != None and len(dbm_group)>0 and 'acoustic' not in dbm_group:
return
logger.info('Processing acoustic variables from data in {}'.format(video_uri))
logger.info('processing audio intensity....')
intensity.run_intensity(video_uri, out_dir, r_config)
logger.info('processing audio pitch freq....')
pitch_freq.run_pitch(video_uri, out_dir, r_config)
logger.info('processing HNR....')
hnr.run_hnr(video_uri, out_dir, r_config)
logger.info('processing GNE....')
gne.run_gne(video_uri, out_dir, r_config)
logger.info('processing voice frame score....')
voice_frame_score.run_vfs(video_uri, out_dir, r_config)
logger.info('processing formant frequency....')
formant_freq.run_formant(video_uri, out_dir, r_config)
logger.info('processing pause segment....')
pause_segment.run_pause_segment(video_uri, out_dir, r_config)
logger.info('processing jitter....')
jitter.run_jitter(video_uri, out_dir, r_config)
logger.info('processing shimmer....')
shimmer.run_shimmer(video_uri, out_dir, r_config)
logger.info('processing mfcc....')
mfcc.run_mfcc(video_uri, out_dir, r_config)
logger.info('processing voice tremor....')
voice_tremor.run_vtremor(video_uri, out_dir, r_config)
def process_facial(video_uri, out_dir, dbm_group, r_config):
"""
processing facial features
@@ -91,20 +94,20 @@ def process_facial(video_uri, out_dir, dbm_group, r_config):
"""
if dbm_group != None and len(dbm_group)>0 and 'facial' not in dbm_group:
return
logger.info('Processing facial variables from data in {}'.format(video_uri))
logger.info('processing facial asymmetry....')
face_asymmetry.run_face_asymmetry(video_uri, out_dir, r_config)
logger.info('processing facial Action Unit....')
face_au.run_face_au(video_uri, out_dir, r_config)
logger.info('processing facial expressivity....')
face_emotion_expressivity.run_face_expressivity(video_uri, out_dir, r_config)
logger.info('processing facial landmark....')
face_landmark.run_face_landmark(video_uri, out_dir, r_config)
def process_movement(video_uri, out_dir, dbm_group, r_config, dlib_model):
"""
processing facial features
@@ -115,23 +118,24 @@ def process_movement(video_uri, out_dir, dbm_group, r_config, dlib_model):
"""
if dbm_group != None and len(dbm_group)>0 and 'movement' not in dbm_group:
return
logger.info('Processing movement variables from data in {}'.format(video_uri))
logger.info('processing head movement....')
head_motion.run_head_movement(video_uri, out_dir, r_config)
logger.info('processing eye blink....')
eye_blink.run_eye_blink(video_uri, out_dir, r_config, dlib_model)
logger.info('processing voice tremor....')
voice_tremor.run_vtremor(video_uri, out_dir, r_config)
def remove_file(file_path):
"""
removing wav file
removing wav file
"""
file_dir = dirname(file_path)
file_name, _ = splitext(basename(file_path))
wav_file = glob.glob(join(file_dir, file_name + '.wav'))
if len(wav_file)> 0:
os.remove(wav_file[0])

View File

@@ -11,3 +11,5 @@ from __future__ import print_function
import os
DBMLIB_PATH = os.path.dirname(__file__)
DBMLIB_VTREMOR_LIB = os.path.abspath(os.path.join(DBMLIB_PATH,
'../../../../resources/libraries/voice_tremor.praat'))

View File

@@ -0,0 +1,93 @@
import pandas as pd
import os
import glob
from os.path import join
import parselmouth
from parselmouth.praat import call, run_file
import numpy as np
import librosa
import json
import re
import logging
from dbm_lib.dbm_features.raw_features.util import util as ut
from dbm_lib.dbm_features.raw_features.movement import DBMLIB_VTREMOR_LIB
logging.basicConfig(level=logging.INFO)
logger=logging.getLogger()
vt_dir = 'movement/voice_tremor'
csv_ext = '_vtremor.csv'
#Executing praat script using parselmouth function
def tremor_praat(snd_file,r_cfg):
"""
Generating Voice tremor endpoint dataframe
Args:
snd_file: (.wav) parsed audio file
r_cfg: Raw variable configuration file
Returns tremor endpoint dataframe
"""
snd = parselmouth.Sound(snd_file)
tremor_var = run_file(snd,DBMLIB_VTREMOR_LIB,capture_output=True)
new_tremor_var = re.sub('--undefined--', '0', tremor_var[1])
res = json.loads(new_tremor_var)
tremor_df = pd.DataFrame(res,index=['0',])
tremor_df.columns = [r_cfg.mov_freq_trem_freq,r_cfg.mov_amp_trem_freq,r_cfg.mov_freq_trem_index,
r_cfg.mov_amp_trem_index,r_cfg.mov_freq_trem_pindex,r_cfg.mov_amp_trem_pindex]
return tremor_df
def prepare_vtrem_output(audio_file, out_loc, r_config, fl_name):
"""
Preparing voice tremor matrix
Args:
audio_file: (.wav) parsed audio file ; r_config: raw config object
out_loc: (str) Output directory for csv ; fl_name: file name
"""
df_tremor = tremor_praat(audio_file, r_config)
df_tremor[r_config.err_reason] = 'Pass'# will replace with threshold in future release
logger.info('Processing Output file {} '.format(out_loc))
ut.save_output(df_tremor, out_loc, fl_name, vt_dir, csv_ext)
def prepare_empty_vt(out_loc, fl_name, r_config, error_txt):
"""
Preparing empty voice tremor matrix
"""
cols = [r_config.mov_freq_trem_freq, r_config.mov_amp_trem_freq, r_config.mov_freq_trem_index,
r_config.mov_amp_trem_index, r_config.mov_freq_trem_pindex, r_config.mov_amp_trem_pindex, r_config.err_reason]
out_val = [[np.nan, np.nan, np.nan, np.nan, np.nan, np.nan, error_txt]]
df_tremor = pd.DataFrame(out_val, columns = cols)
logger.info('Saving Output file {} '.format(out_loc))
ut.save_output(df_tremor, out_loc, fl_name, vt_dir, csv_ext)
def run_vtremor(video_uri, out_dir, r_config):
"""
Processing all patient's for fetching Formant freq
---------------
---------------
Args:
video_uri: video path; r_config: raw variable config object
out_dir: (str) Output directory for processed output
"""
try:
input_loc, out_loc, fl_name = ut.filter_path(video_uri, out_dir)
aud_filter = glob.glob(join(input_loc, fl_name + '.wav'))
if len(aud_filter)>0:
audio_file = aud_filter[0]
aud_dur = librosa.get_duration(filename=audio_file)
if float(aud_dur) < 0.064:
logger.info('Output file {} size is less than 0.064sec'.format(audio_file))
prepare_empty_vt(video_uri, out_loc, fl_name, r_config)
return
prepare_vtrem_output(audio_file, out_loc, r_config, fl_name)
except Exception as e:
logger.error('Failed to compute Voice Tremor {} for {}'.format(e,video_uri))
prepare_empty_vt(out_loc, fl_name, r_config, e)