move pkg, resources, dbm_lib, to under 1 opendbm directory
This commit is contained in:
16
opendbm/dbm_lib/__init__.py
Normal file
16
opendbm/dbm_lib/__init__.py
Normal file
@@ -0,0 +1,16 @@
|
||||
"""
|
||||
file_name: init
|
||||
project_name: DBM
|
||||
created: 2020-20-07
|
||||
"""
|
||||
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
|
||||
import os
|
||||
|
||||
DBMLIB_PATH = os.path.dirname(__file__)
|
||||
DBMLIB_SERVICE_CONFIG = os.path.abspath(os.path.join(DBMLIB_PATH, '../resources/services/services.yml'))
|
||||
DBMLIB_FEATURE_CONFIG = os.path.abspath(os.path.join(DBMLIB_PATH, '../resources/features/raw_feature.yml'))
|
||||
DBMLIB_DERIVE_FEATURE_CONFIG = os.path.abspath(os.path.join(DBMLIB_PATH, '../resources/features/derived_feature.yml'))
|
||||
29
opendbm/dbm_lib/config/config_derive_feature.py
Normal file
29
opendbm/dbm_lib/config/config_derive_feature.py
Normal file
@@ -0,0 +1,29 @@
|
||||
"""
|
||||
file_name: config_derive_feature
|
||||
project_name: DBM
|
||||
created: 2020-20-07
|
||||
"""
|
||||
|
||||
import yaml
|
||||
from opendbm.dbm_lib import DBMLIB_DERIVE_FEATURE_CONFIG
|
||||
|
||||
class ConfigDeriveReader(object):
|
||||
"""Summary
|
||||
Read sevice end ponit
|
||||
"""
|
||||
def __init__(self,
|
||||
feature_config_yml=None):
|
||||
"""Summary
|
||||
Args:
|
||||
feature_config_yml (None, optional): yml file defined service configuration
|
||||
"""
|
||||
|
||||
if feature_config_yml is None:
|
||||
feature_config = DBMLIB_DERIVE_FEATURE_CONFIG
|
||||
else:
|
||||
feature_config = feature_config_yml
|
||||
|
||||
with open(feature_config, 'r') as ymlfile:
|
||||
config = yaml.load(ymlfile)
|
||||
self.base_derive = config
|
||||
|
||||
277
opendbm/dbm_lib/config/config_raw_feature.py
Normal file
277
opendbm/dbm_lib/config/config_raw_feature.py
Normal file
@@ -0,0 +1,277 @@
|
||||
"""
|
||||
file_name: config_raw_feature
|
||||
project_name: DBM
|
||||
created: 2020-20-07
|
||||
"""
|
||||
|
||||
import yaml
|
||||
from opendbm.dbm_lib import DBMLIB_FEATURE_CONFIG
|
||||
|
||||
class ConfigRawReader(object):
|
||||
"""Summary
|
||||
Read sevice end ponit
|
||||
"""
|
||||
def __init__(self,
|
||||
feature_config_yml=None):
|
||||
"""Summary
|
||||
Args:
|
||||
feature_config_yml (None, optional): yml file defined service configuration
|
||||
"""
|
||||
|
||||
if feature_config_yml is None:
|
||||
feature_config = DBMLIB_FEATURE_CONFIG
|
||||
else:
|
||||
feature_config = feature_config_yml
|
||||
|
||||
with open(feature_config, 'r') as ymlfile:
|
||||
config = yaml.load(ymlfile)
|
||||
|
||||
#Verbal features
|
||||
self.base_raw = config
|
||||
self.err_reason = config['raw_feature']['error_reason']
|
||||
|
||||
#Output range
|
||||
self.mov_headvel_start = config['raw_feature']['mov_headvel_start']
|
||||
self.mov_headvel_end = config['raw_feature']['mov_headvel_end']
|
||||
|
||||
#Acoustic variable
|
||||
self.aco_int = config['raw_feature']['aco_int']
|
||||
self.aco_ff = config['raw_feature']['aco_ff']
|
||||
self.aco_voiceLabel = config['raw_feature']['aco_voiceLabel']
|
||||
self.aco_hnr = config['raw_feature']['aco_hnr']
|
||||
self.aco_gne = config['raw_feature']['aco_gne']
|
||||
self.aco_fm1 = config['raw_feature']['aco_fm1']
|
||||
self.aco_fm2 = config['raw_feature']['aco_fm2']
|
||||
self.aco_fm3 = config['raw_feature']['aco_fm3']
|
||||
self.aco_fm4 = config['raw_feature']['aco_fm4']
|
||||
self.aco_jitter = config['raw_feature']['aco_jitter']
|
||||
self.aco_shimmer = config['raw_feature']['aco_shimmer']
|
||||
self.aco_mfcc1 = config['raw_feature']['aco_mfcc1']
|
||||
self.aco_mfcc2 = config['raw_feature']['aco_mfcc2']
|
||||
self.aco_mfcc3 = config['raw_feature']['aco_mfcc3']
|
||||
self.aco_mfcc4 = config['raw_feature']['aco_mfcc4']
|
||||
self.aco_mfcc5 = config['raw_feature']['aco_mfcc5']
|
||||
self.aco_mfcc6 = config['raw_feature']['aco_mfcc6']
|
||||
self.aco_mfcc7 = config['raw_feature']['aco_mfcc7']
|
||||
self.aco_mfcc8 = config['raw_feature']['aco_mfcc8']
|
||||
self.aco_mfcc9 = config['raw_feature']['aco_mfcc9']
|
||||
self.aco_mfcc10 = config['raw_feature']['aco_mfcc10']
|
||||
self.aco_mfcc11 = config['raw_feature']['aco_mfcc11']
|
||||
self.aco_mfcc12 = config['raw_feature']['aco_mfcc12']
|
||||
self.aco_voiceFrame = config['raw_feature']['aco_voiceFrame']
|
||||
self.aco_totVoiceFrame = config['raw_feature']['aco_totVoiceFrame']
|
||||
self.aco_voicePct = config['raw_feature']['aco_voicePct']
|
||||
self.aco_pausetime = config['raw_feature']['aco_pausetime']
|
||||
self.aco_totaltime = config['raw_feature']['aco_totaltime']
|
||||
self.aco_speakingtime = config['raw_feature']['aco_speakingtime']
|
||||
self.aco_numpauses = config['raw_feature']['aco_numpauses']
|
||||
self.aco_pausefrac = config['raw_feature']['aco_pausefrac']
|
||||
|
||||
#Facial Action Unit (for consistency)
|
||||
self.fac_AU01int = config['raw_feature']['fac_AU01int']
|
||||
self.fac_AU02int = config['raw_feature']['fac_AU02int']
|
||||
self.fac_AU04int = config['raw_feature']['fac_AU04int']
|
||||
self.fac_AU05int = config['raw_feature']['fac_AU05int']
|
||||
self.fac_AU06int = config['raw_feature']['fac_AU06int']
|
||||
self.fac_AU07int = config['raw_feature']['fac_AU07int']
|
||||
self.fac_AU09int = config['raw_feature']['fac_AU09int']
|
||||
self.fac_AU10int = config['raw_feature']['fac_AU10int']
|
||||
self.fac_AU12int = config['raw_feature']['fac_AU12int']
|
||||
self.fac_AU14int = config['raw_feature']['fac_AU14int']
|
||||
self.fac_AU15int = config['raw_feature']['fac_AU15int']
|
||||
self.fac_AU17int = config['raw_feature']['fac_AU17int']
|
||||
self.fac_AU20int = config['raw_feature']['fac_AU20int']
|
||||
self.fac_AU23int = config['raw_feature']['fac_AU23int']
|
||||
self.fac_AU25int = config['raw_feature']['fac_AU25int']
|
||||
self.fac_AU26int = config['raw_feature']['fac_AU26int']
|
||||
self.fac_AU45int = config['raw_feature']['fac_AU45int']
|
||||
self.fac_AU01pres = config['raw_feature']['fac_AU01pres']
|
||||
self.fac_AU02pres = config['raw_feature']['fac_AU02pres']
|
||||
self.fac_AU04pres = config['raw_feature']['fac_AU04pres']
|
||||
self.fac_AU05pres = config['raw_feature']['fac_AU05pres']
|
||||
self.fac_AU06pres = config['raw_feature']['fac_AU06pres']
|
||||
self.fac_AU07pres = config['raw_feature']['fac_AU07pres']
|
||||
self.fac_AU09pres = config['raw_feature']['fac_AU09pres']
|
||||
self.fac_AU10pres = config['raw_feature']['fac_AU10pres']
|
||||
self.fac_AU12pres = config['raw_feature']['fac_AU12pres']
|
||||
self.fac_AU14pres = config['raw_feature']['fac_AU14pres']
|
||||
self.fac_AU15pres = config['raw_feature']['fac_AU15pres']
|
||||
self.fac_AU17pres = config['raw_feature']['fac_AU17pres']
|
||||
self.fac_AU20pres = config['raw_feature']['fac_AU20pres']
|
||||
self.fac_AU23pres = config['raw_feature']['fac_AU23pres']
|
||||
self.fac_AU25pres = config['raw_feature']['fac_AU25pres']
|
||||
self.fac_AU26pres = config['raw_feature']['fac_AU26pres']
|
||||
self.fac_AU28pres = config['raw_feature']['fac_AU28pres']
|
||||
self.fac_AU45pres = config['raw_feature']['fac_AU45pres']
|
||||
|
||||
#Facial Landmarks (for consistency)
|
||||
self.fac_LMK00disp = config['raw_feature']['fac_LMK00disp']
|
||||
self.fac_LMK01disp = config['raw_feature']['fac_LMK01disp']
|
||||
self.fac_LMK02disp = config['raw_feature']['fac_LMK02disp']
|
||||
self.fac_LMK03disp = config['raw_feature']['fac_LMK03disp']
|
||||
self.fac_LMK04disp = config['raw_feature']['fac_LMK04disp']
|
||||
self.fac_LMK05disp = config['raw_feature']['fac_LMK05disp']
|
||||
self.fac_LMK06disp = config['raw_feature']['fac_LMK06disp']
|
||||
self.fac_LMK07disp = config['raw_feature']['fac_LMK07disp']
|
||||
self.fac_LMK08disp = config['raw_feature']['fac_LMK08disp']
|
||||
self.fac_LMK09disp = config['raw_feature']['fac_LMK09disp']
|
||||
self.fac_LMK10disp = config['raw_feature']['fac_LMK10disp']
|
||||
self.fac_LMK11disp = config['raw_feature']['fac_LMK11disp']
|
||||
self.fac_LMK12disp = config['raw_feature']['fac_LMK12disp']
|
||||
self.fac_LMK13disp = config['raw_feature']['fac_LMK13disp']
|
||||
self.fac_LMK14disp = config['raw_feature']['fac_LMK14disp']
|
||||
self.fac_LMK15disp = config['raw_feature']['fac_LMK15disp']
|
||||
self.fac_LMK16disp = config['raw_feature']['fac_LMK16disp']
|
||||
self.fac_LMK17disp = config['raw_feature']['fac_LMK17disp']
|
||||
self.fac_LMK18disp = config['raw_feature']['fac_LMK18disp']
|
||||
self.fac_LMK19disp = config['raw_feature']['fac_LMK19disp']
|
||||
self.fac_LMK20disp = config['raw_feature']['fac_LMK20disp']
|
||||
self.fac_LMK21disp = config['raw_feature']['fac_LMK21disp']
|
||||
self.fac_LMK22disp = config['raw_feature']['fac_LMK22disp']
|
||||
self.fac_LMK23disp = config['raw_feature']['fac_LMK23disp']
|
||||
self.fac_LMK24disp = config['raw_feature']['fac_LMK24disp']
|
||||
self.fac_LMK25disp = config['raw_feature']['fac_LMK25disp']
|
||||
self.fac_LMK26disp = config['raw_feature']['fac_LMK26disp']
|
||||
self.fac_LMK27disp = config['raw_feature']['fac_LMK27disp']
|
||||
self.fac_LMK28disp = config['raw_feature']['fac_LMK28disp']
|
||||
self.fac_LMK29disp = config['raw_feature']['fac_LMK29disp']
|
||||
self.fac_LMK30disp = config['raw_feature']['fac_LMK30disp']
|
||||
self.fac_LMK31disp = config['raw_feature']['fac_LMK31disp']
|
||||
self.fac_LMK32disp = config['raw_feature']['fac_LMK32disp']
|
||||
self.fac_LMK33disp = config['raw_feature']['fac_LMK33disp']
|
||||
self.fac_LMK34disp = config['raw_feature']['fac_LMK34disp']
|
||||
self.fac_LMK35disp = config['raw_feature']['fac_LMK35disp']
|
||||
self.fac_LMK36disp = config['raw_feature']['fac_LMK36disp']
|
||||
self.fac_LMK37disp = config['raw_feature']['fac_LMK37disp']
|
||||
self.fac_LMK38disp = config['raw_feature']['fac_LMK38disp']
|
||||
self.fac_LMK39disp = config['raw_feature']['fac_LMK39disp']
|
||||
self.fac_LMK40disp = config['raw_feature']['fac_LMK40disp']
|
||||
self.fac_LMK41disp = config['raw_feature']['fac_LMK41disp']
|
||||
self.fac_LMK42disp = config['raw_feature']['fac_LMK42disp']
|
||||
self.fac_LMK43disp = config['raw_feature']['fac_LMK43disp']
|
||||
self.fac_LMK44disp = config['raw_feature']['fac_LMK44disp']
|
||||
self.fac_LMK45disp = config['raw_feature']['fac_LMK45disp']
|
||||
self.fac_LMK46disp = config['raw_feature']['fac_LMK46disp']
|
||||
self.fac_LMK47disp = config['raw_feature']['fac_LMK47disp']
|
||||
self.fac_LMK48disp = config['raw_feature']['fac_LMK48disp']
|
||||
self.fac_LMK49disp = config['raw_feature']['fac_LMK49disp']
|
||||
self.fac_LMK50disp = config['raw_feature']['fac_LMK50disp']
|
||||
self.fac_LMK51disp = config['raw_feature']['fac_LMK51disp']
|
||||
self.fac_LMK52disp = config['raw_feature']['fac_LMK52disp']
|
||||
self.fac_LMK53disp = config['raw_feature']['fac_LMK53disp']
|
||||
self.fac_LMK54disp = config['raw_feature']['fac_LMK54disp']
|
||||
self.fac_LMK55disp = config['raw_feature']['fac_LMK55disp']
|
||||
self.fac_LMK56disp = config['raw_feature']['fac_LMK56disp']
|
||||
self.fac_LMK57disp = config['raw_feature']['fac_LMK57disp']
|
||||
self.fac_LMK58disp = config['raw_feature']['fac_LMK58disp']
|
||||
self.fac_LMK59disp = config['raw_feature']['fac_LMK59disp']
|
||||
self.fac_LMK60disp = config['raw_feature']['fac_LMK60disp']
|
||||
self.fac_LMK61disp = config['raw_feature']['fac_LMK61disp']
|
||||
self.fac_LMK62disp = config['raw_feature']['fac_LMK62disp']
|
||||
self.fac_LMK63disp = config['raw_feature']['fac_LMK63disp']
|
||||
self.fac_LMK64disp = config['raw_feature']['fac_LMK64disp']
|
||||
self.fac_LMK65disp = config['raw_feature']['fac_LMK65disp']
|
||||
self.fac_LMK66disp = config['raw_feature']['fac_LMK66disp']
|
||||
self.fac_LMK67disp = config['raw_feature']['fac_LMK67disp']
|
||||
|
||||
#Facial features
|
||||
self.hap_exp = config['raw_feature']['hap_exp']
|
||||
self.sad_exp = config['raw_feature']['sad_exp']
|
||||
self.sur_exp = config['raw_feature']['sur_exp']
|
||||
self.fea_exp = config['raw_feature']['fea_exp']
|
||||
self.ang_exp = config['raw_feature']['ang_exp']
|
||||
self.dis_exp = config['raw_feature']['dis_exp']
|
||||
self.con_exp = config['raw_feature']['con_exp']
|
||||
self.happ_occ = config['raw_feature']['happ_occ']
|
||||
self.sad_occ = config['raw_feature']['sad_occ']
|
||||
self.sur_occ = config['raw_feature']['sur_occ']
|
||||
self.fea_occ = config['raw_feature']['fea_occ']
|
||||
self.ang_occ = config['raw_feature']['ang_occ']
|
||||
self.dis_occ = config['raw_feature']['dis_occ']
|
||||
self.con_occ = config['raw_feature']['con_occ']
|
||||
self.pos_exp = config['raw_feature']['pos_exp']
|
||||
self.neg_exp = config['raw_feature']['neg_exp']
|
||||
self.neu_exp = config['raw_feature']['neu_exp']
|
||||
self.cai_exp = config['raw_feature']['cai_exp']
|
||||
self.com_exp = config['raw_feature']['com_exp']
|
||||
self.com_lower_exp = config['raw_feature']['com_lower_exp']
|
||||
self.com_upper_exp = config['raw_feature']['com_upper_exp']
|
||||
self.pai_exp = config['raw_feature']['pai_exp']
|
||||
self.hap_exp_full = config['raw_feature']['hap_exp_full']
|
||||
self.sad_exp_full = config['raw_feature']['sad_exp_full']
|
||||
self.sur_exp_full = config['raw_feature']['sur_exp_full']
|
||||
self.fea_exp_full = config['raw_feature']['fea_exp_full']
|
||||
self.ang_exp_full = config['raw_feature']['ang_exp_full']
|
||||
self.dis_exp_full = config['raw_feature']['dis_exp_full']
|
||||
self.con_exp_full = config['raw_feature']['con_exp_full']
|
||||
self.pos_exp_full = config['raw_feature']['pos_exp_full']
|
||||
self.neg_exp_full = config['raw_feature']['neg_exp_full']
|
||||
self.neu_exp_full = config['raw_feature']['neu_exp_full']
|
||||
self.cai_exp_full = config['raw_feature']['cai_exp_full']
|
||||
self.com_exp_full = config['raw_feature']['com_exp_full']
|
||||
self.com_lower_exp_full = config['raw_feature']['com_lower_exp_full']
|
||||
self.com_upper_exp_full = config['raw_feature']['com_upper_exp_full']
|
||||
self.pai_exp_full = config['raw_feature']['pai_exp_full']
|
||||
self.fac_AsymMaskMouth = config['raw_feature']['fac_AsymMaskMouth']
|
||||
self.fac_AsymMaskEye = config['raw_feature']['fac_AsymMaskEye']
|
||||
self.fac_AsymMaskEyebrow = config['raw_feature']['fac_AsymMaskEyebrow']
|
||||
self.fac_AsymMaskCom = config['raw_feature']['fac_AsymMaskCom']
|
||||
|
||||
#Movement features
|
||||
self.head_vel = config['raw_feature']['head_vel']
|
||||
self.mov_blink_ear = config['raw_feature']['mov_blink_ear']
|
||||
self.vid_dur = config['raw_feature']['vid_dur']
|
||||
self.fps = config['raw_feature']['fps']
|
||||
self.mov_blinkframes = config['raw_feature']['mov_blinkframes']
|
||||
self.mov_blinkdur = config['raw_feature']['mov_blinkdur']
|
||||
self.mov_Hpose_Pitch = config['raw_feature']['mov_Hpose_Pitch']
|
||||
self.mov_Hpose_Yaw = config['raw_feature']['mov_Hpose_Yaw']
|
||||
self.mov_Hpose_Roll = config['raw_feature']['mov_Hpose_Roll']
|
||||
self.mov_Hpose_Dist = config['raw_feature']['mov_Hpose_Dist']
|
||||
|
||||
self.mov_freq_trem_freq = config['raw_feature']['mov_freq_trem_freq']
|
||||
self.mov_freq_trem_index = config['raw_feature']['mov_freq_trem_index']
|
||||
self.mov_freq_trem_pindex = config['raw_feature']['mov_freq_trem_pindex']
|
||||
self.mov_amp_trem_freq = config['raw_feature']['mov_amp_trem_freq']
|
||||
self.mov_amp_trem_index = config['raw_feature']['mov_amp_trem_index']
|
||||
self.mov_amp_trem_pindex = config['raw_feature']['mov_amp_trem_pindex']
|
||||
|
||||
self.fac_tremor_median_5 = config['raw_feature']['fac_tremor_median_5']
|
||||
self.fac_tremor_median_12 = config['raw_feature']['fac_tremor_median_12']
|
||||
self.fac_tremor_median_8 = config['raw_feature']['fac_tremor_median_8']
|
||||
self.fac_tremor_median_48 = config['raw_feature']['fac_tremor_median_48']
|
||||
self.fac_tremor_median_54 = config['raw_feature']['fac_tremor_median_54']
|
||||
self.fac_tremor_median_28 = config['raw_feature']['fac_tremor_median_28']
|
||||
self.fac_tremor_median_51 = config['raw_feature']['fac_tremor_median_51']
|
||||
self.fac_tremor_median_66 = config['raw_feature']['fac_tremor_median_66']
|
||||
self.fac_tremor_median_57 = config['raw_feature']['fac_tremor_median_57']
|
||||
|
||||
self.mov_leye_x = config['raw_feature']['mov_leye_x']
|
||||
self.mov_leye_y = config['raw_feature']['mov_leye_y']
|
||||
self.mov_leye_z = config['raw_feature']['mov_leye_z']
|
||||
self.mov_reye_x = config['raw_feature']['mov_reye_x']
|
||||
self.mov_reye_y = config['raw_feature']['mov_reye_y']
|
||||
self.mov_reye_z = config['raw_feature']['mov_reye_z']
|
||||
self.mov_eleft_disp = config['raw_feature']['mov_eleft_disp']
|
||||
self.mov_eright_disp = config['raw_feature']['mov_eright_disp']
|
||||
|
||||
#NLP features
|
||||
self.nlp_transcribe = config['raw_feature']['nlp_transcribe']
|
||||
self.nlp_numSentences = config['raw_feature']['nlp_numSentences']
|
||||
self.nlp_singPronPerAns = config['raw_feature']['nlp_singPronPerAns']
|
||||
self.nlp_singPronPerSen = config['raw_feature']['nlp_singPronPerSen']
|
||||
self.nlp_pastTensePerAns = config['raw_feature']['nlp_pastTensePerAns']
|
||||
self.nlp_pastTensePerSen = config['raw_feature']['nlp_pastTensePerSen']
|
||||
self.nlp_pronounsPerAns = config['raw_feature']['nlp_pronounsPerAns']
|
||||
self.nlp_pronounsPerSen = config['raw_feature']['nlp_pronounsPerSen']
|
||||
self.nlp_verbsPerAns = config['raw_feature']['nlp_verbsPerAns']
|
||||
self.nlp_verbsPerSen = config['raw_feature']['nlp_verbsPerSen']
|
||||
self.nlp_adjectivesPerAns = config['raw_feature']['nlp_adjectivesPerAns']
|
||||
self.nlp_adjectivesPerSen = config['raw_feature']['nlp_adjectivesPerSen']
|
||||
self.nlp_nounsPerAns = config['raw_feature']['nlp_nounsPerAns']
|
||||
self.nlp_nounsPerSen = config['raw_feature']['nlp_nounsPerSen']
|
||||
self.nlp_sentiment_mean = config['raw_feature']['nlp_sentiment_mean']
|
||||
self.nlp_mattr = config['raw_feature']['nlp_mattr']
|
||||
self.nlp_wordsPerMin = config['raw_feature']['nlp_wordsPerMin']
|
||||
self.nlp_totalTime = config['raw_feature']['nlp_totalTime']
|
||||
|
||||
67
opendbm/dbm_lib/config/config_reader.py
Normal file
67
opendbm/dbm_lib/config/config_reader.py
Normal file
@@ -0,0 +1,67 @@
|
||||
"""
|
||||
file_name: config_reader
|
||||
project_name: DBM
|
||||
created: 2020-20-07
|
||||
"""
|
||||
|
||||
import yaml
|
||||
from opendbm.dbm_lib import DBMLIB_SERVICE_CONFIG
|
||||
|
||||
class ConfigReader(object):
|
||||
"""Summary
|
||||
Read sevice end ponit
|
||||
"""
|
||||
def __init__(self,
|
||||
service_config_yml=None):
|
||||
"""Summary
|
||||
Args:
|
||||
service_config_yml (None, optional): yml file defined service configuration
|
||||
"""
|
||||
if service_config_yml is None:
|
||||
service_config = DBMLIB_SERVICE_CONFIG
|
||||
else:
|
||||
service_config = service_config_yml
|
||||
|
||||
with open(service_config, 'r') as ymlfile:
|
||||
config = yaml.load(ymlfile)
|
||||
self.input_dir = config['cdx_configuration']['input_dir']
|
||||
self.output_dir = config['cdx_configuration']['output_dir']
|
||||
self.out_derived_dir = config['cdx_configuration']['out_derived_dir']
|
||||
self.of_path = config['cdx_configuration']['open_face_path']
|
||||
self.facial_landmarks = config['cdx_configuration']['facial_landmarks']
|
||||
self.feature_group = config['cdx_configuration']['feature_group']
|
||||
|
||||
def get_open_face_path(self):
|
||||
"""Summary
|
||||
Returns:
|
||||
TYPE: end point
|
||||
"""
|
||||
return self.of_path
|
||||
|
||||
def get_input_dir(self):
|
||||
"""Summary
|
||||
Returns:
|
||||
TYPE: end point
|
||||
"""
|
||||
return self.input_dir
|
||||
|
||||
def get_output_dir(self):
|
||||
"""Summary
|
||||
Returns:
|
||||
TYPE: end point
|
||||
"""
|
||||
return self.output_dir
|
||||
|
||||
def get_out_derived_dir(self):
|
||||
"""Summary
|
||||
Returns:
|
||||
TYPE: end point
|
||||
"""
|
||||
return self.out_derived_dir
|
||||
|
||||
def get_fac_landmark_path(self):
|
||||
"""Summary
|
||||
Returns:
|
||||
TYPE: end point
|
||||
"""
|
||||
return self.facial_landmarks
|
||||
165
opendbm/dbm_lib/controller/process_feature.py
Normal file
165
opendbm/dbm_lib/controller/process_feature.py
Normal file
@@ -0,0 +1,165 @@
|
||||
"""
|
||||
file_name: process_features
|
||||
project_name: DBM
|
||||
created: 2020-20-07
|
||||
"""
|
||||
|
||||
from opendbm.dbm_lib.dbm_features.raw_features.audio import shimmer, pause_segment, gne, formant_freq, pitch_freq, mfcc, \
|
||||
jitter, intensity, voice_frame_score, hnr
|
||||
from opendbm.dbm_lib.dbm_features.raw_features.video import face_asymmetry, face_landmark
|
||||
from opendbm.dbm_lib.dbm_features.raw_features.video import face_au, face_emotion_expressivity
|
||||
from opendbm.dbm_lib.dbm_features.raw_features.movement import head_motion, eye_blink, eye_gaze
|
||||
from opendbm.dbm_lib.dbm_features.raw_features.movement import facial_tremor, voice_tremor
|
||||
from opendbm.dbm_lib.dbm_features.raw_features.nlp import transcribe, speech_features
|
||||
|
||||
import subprocess
|
||||
import logging
|
||||
from os.path import isfile, splitext, basename, dirname, join
|
||||
import glob
|
||||
import os
|
||||
|
||||
logging.basicConfig(level=logging.INFO)
|
||||
logger=logging.getLogger()
|
||||
|
||||
def audio_to_wav(input_filepath):
|
||||
""" Extracts a video's audio file and saves it to wav
|
||||
Args:
|
||||
input_filepath: (str)
|
||||
Returns:
|
||||
"""
|
||||
try:
|
||||
|
||||
fname, _ = splitext(input_filepath)
|
||||
output_filepath = fname + '.wav'
|
||||
|
||||
if not isfile(output_filepath):
|
||||
call = ['ffmpeg', '-i', input_filepath, '-vn', '-acodec', 'pcm_s16le', '-ar', '44100', output_filepath]
|
||||
|
||||
logger.info('Converting audio from {} to wav'.format(input_filepath))
|
||||
subprocess.check_output(call)
|
||||
logger.info('wav output saved in {}'.format(output_filepath))
|
||||
else:
|
||||
logger.info('Output file {} already exists'.format(output_filepath))
|
||||
|
||||
except Exception as e:
|
||||
logger.error('Failed to extract audio from Video')
|
||||
|
||||
def process_acoustic(video_uri, out_dir, dbm_group, r_config):
|
||||
"""
|
||||
processing acoustic features
|
||||
Args:
|
||||
video_uri: video path; out_dir: raw variable output dir
|
||||
dbm_group: list of features group to process; r_config: raw feature config object
|
||||
"""
|
||||
if dbm_group != None and len(dbm_group)>0 and 'acoustic' not in dbm_group:
|
||||
return
|
||||
|
||||
logger.info('Processing acoustic variables from data in {}'.format(video_uri))
|
||||
logger.info('processing audio intensity....')
|
||||
intensity.run_intensity(video_uri, out_dir, r_config)
|
||||
|
||||
logger.info('processing audio pitch freq....')
|
||||
pitch_freq.run_pitch(video_uri, out_dir, r_config)
|
||||
|
||||
logger.info('processing HNR....')
|
||||
hnr.run_hnr(video_uri, out_dir, r_config)
|
||||
|
||||
logger.info('processing GNE....')
|
||||
gne.run_gne(video_uri, out_dir, r_config)
|
||||
|
||||
logger.info('processing voice frame score....')
|
||||
voice_frame_score.run_vfs(video_uri, out_dir, r_config)
|
||||
|
||||
logger.info('processing formant frequency....')
|
||||
formant_freq.run_formant(video_uri, out_dir, r_config)
|
||||
|
||||
logger.info('processing pause segment....')
|
||||
pause_segment.run_pause_segment(video_uri, out_dir, r_config)
|
||||
|
||||
logger.info('processing jitter....')
|
||||
jitter.run_jitter(video_uri, out_dir, r_config)
|
||||
|
||||
logger.info('processing shimmer....')
|
||||
shimmer.run_shimmer(video_uri, out_dir, r_config)
|
||||
|
||||
logger.info('processing mfcc....')
|
||||
mfcc.run_mfcc(video_uri, out_dir, r_config)
|
||||
|
||||
def process_facial(video_uri, out_dir, dbm_group, r_config):
|
||||
"""
|
||||
processing facial features
|
||||
Args:
|
||||
video_uri: video path; out_dir: raw variable output dir
|
||||
dbm_group: list of features to process; r_config: raw feature config object
|
||||
"""
|
||||
if dbm_group != None and len(dbm_group)>0 and 'facial' not in dbm_group:
|
||||
return
|
||||
|
||||
logger.info('Processing facial variables from data in {}'.format(video_uri))
|
||||
logger.info('processing facial asymmetry....')
|
||||
face_asymmetry.run_face_asymmetry(video_uri, out_dir, r_config)
|
||||
|
||||
logger.info('processing facial Action Unit....')
|
||||
face_au.run_face_au(video_uri, out_dir, r_config)
|
||||
|
||||
logger.info('processing facial expressivity....')
|
||||
face_emotion_expressivity.run_face_expressivity(video_uri, out_dir, r_config)
|
||||
|
||||
logger.info('processing facial landmark....')
|
||||
face_landmark.run_face_landmark(video_uri, out_dir, r_config)
|
||||
|
||||
def process_movement(video_uri, out_dir, dbm_group, r_config, dlib_model):
|
||||
"""
|
||||
processing facial features
|
||||
Args:
|
||||
video_uri: video path; out_dir: raw variable output dir
|
||||
dbm_group: list of features to process; r_config: raw feature config object
|
||||
dlib_model: shape predictor model path
|
||||
"""
|
||||
if dbm_group != None and len(dbm_group)>0 and 'movement' not in dbm_group:
|
||||
return
|
||||
|
||||
logger.info('Processing movement variables from data in {}'.format(video_uri))
|
||||
|
||||
logger.info('processing head movement....')
|
||||
head_motion.run_head_movement(video_uri, out_dir, r_config)
|
||||
|
||||
logger.info('processing eye blink....')
|
||||
eye_blink.run_eye_blink(video_uri, out_dir, r_config, dlib_model)
|
||||
|
||||
logger.info('processing eye gaze....')
|
||||
eye_gaze.run_eye_gaze(video_uri, out_dir, r_config)
|
||||
|
||||
logger.info('processing voice tremor....')
|
||||
voice_tremor.run_vtremor(video_uri, out_dir, r_config)
|
||||
|
||||
logger.info('processing facial tremor....')
|
||||
facial_tremor.fac_tremor_process(video_uri, out_dir, r_config, model_output=True)
|
||||
|
||||
|
||||
def process_nlp(video_uri, out_dir, dbm_group, tran_tog, r_config, deep_path):
|
||||
"""
|
||||
processing nlp features
|
||||
Args:
|
||||
video_uri: video path; out_dir: raw variable output dir
|
||||
dbm_group: list of features to process; r_config: raw feature config object
|
||||
deep_path: deep speech build path
|
||||
"""
|
||||
if dbm_group != None and len(dbm_group)>0 and 'speech' not in dbm_group:
|
||||
return
|
||||
|
||||
logger.info('Processing nlp variables from data in {}'.format(video_uri))
|
||||
transcribe.run_transcribe(video_uri, out_dir, r_config, deep_path)
|
||||
speech_features.run_speech_feature(video_uri, out_dir, r_config, tran_tog)
|
||||
|
||||
|
||||
def remove_file(file_path, file_ext = '.wav'):
|
||||
"""
|
||||
removing wav file
|
||||
"""
|
||||
file_dir = dirname(file_path)
|
||||
file_name, _ = splitext(basename(file_path))
|
||||
wav_file = glob.glob(join(file_dir, file_name + file_ext))
|
||||
|
||||
if len(wav_file)> 0:
|
||||
os.remove(wav_file[0])
|
||||
10
opendbm/dbm_lib/dbm_features/derived_features/__init__.py
Normal file
10
opendbm/dbm_lib/dbm_features/derived_features/__init__.py
Normal file
@@ -0,0 +1,10 @@
|
||||
"""
|
||||
file_name: init
|
||||
project_name: DBM
|
||||
created: 2020-20-07
|
||||
"""
|
||||
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
|
||||
164
opendbm/dbm_lib/dbm_features/derived_features/derive.py
Normal file
164
opendbm/dbm_lib/dbm_features/derived_features/derive.py
Normal file
@@ -0,0 +1,164 @@
|
||||
"""
|
||||
file_name: derive
|
||||
project_name: DBM
|
||||
created: 2020-20-07
|
||||
"""
|
||||
|
||||
import pandas as pd
|
||||
import numpy as np
|
||||
import glob
|
||||
import os
|
||||
import logging
|
||||
from datetime import datetime
|
||||
|
||||
logging.basicConfig(level=logging.INFO)
|
||||
logger=logging.getLogger()
|
||||
|
||||
def dict_to_df(feature_dict, file):
|
||||
"""
|
||||
Converting ditionary to dataframe
|
||||
"""
|
||||
final_dict = {k: v for d in feature_dict for k, v in d.items()}
|
||||
|
||||
feature_df = pd.DataFrame([final_dict])
|
||||
feature_df['Filename'] = file
|
||||
|
||||
return feature_df
|
||||
|
||||
def save_derive_output(df_list, out_loc):
|
||||
"""
|
||||
Saving derive variable output
|
||||
"""
|
||||
try:
|
||||
if len(df_list)>0:
|
||||
df = df_list[0]
|
||||
|
||||
file_name = os.path.join(out_loc, 'derived_output.csv')
|
||||
if not os.path.exists(out_loc):
|
||||
|
||||
os.makedirs(out_loc)
|
||||
df.to_csv(file_name, index=False)
|
||||
|
||||
except Exception as e:
|
||||
logger.error('Failed to save derived variable csv')
|
||||
|
||||
def feature_output(df_fea, exp_var, cal_type):
|
||||
"""
|
||||
Computing mean value of dataframe columns
|
||||
"""
|
||||
exp_val = np.nan
|
||||
try:
|
||||
|
||||
df_ = df_fea[exp_var].astype(float).copy()
|
||||
df_ = df_.dropna().reset_index(drop=True)
|
||||
|
||||
if len(df_)>0:
|
||||
|
||||
if cal_type == 'mean':
|
||||
exp_val = df_.mean(axis = 0, skipna = True)
|
||||
|
||||
elif cal_type == 'std':
|
||||
exp_val = df_.std(axis = 0, skipna = True)
|
||||
|
||||
elif cal_type == 'count':#use case for eye blink
|
||||
exp_var = 'mov_blink'
|
||||
exp_val = (len(df_)/df_[0])*60
|
||||
|
||||
elif cal_type == 'pct':
|
||||
if len(df_)>0:
|
||||
exp_val = len(df_[df_ > 0])/len(df_)
|
||||
|
||||
elif cal_type == 'range':
|
||||
exp_val = max(df_) - min(df_)
|
||||
|
||||
except Exception as e:
|
||||
logger.error('Failed to compute calculation: {}'.format(e))
|
||||
pass
|
||||
|
||||
var_name = exp_var + '_' + cal_type
|
||||
exp_val = float("{0:.4f}".format(exp_val))
|
||||
var_val = (var_name, exp_val)
|
||||
|
||||
return var_val
|
||||
|
||||
def cal_type_dict(var_df, raw_df, d_cfg_Obj, r_cfg_Obj):
|
||||
|
||||
var_name = str(var_df['var_id'])
|
||||
|
||||
#fetching key based on variable name from raw config
|
||||
var_key = list(r_cfg_Obj.keys())[list(r_cfg_Obj.values()).index(var_name)]
|
||||
cal_type = d_cfg_Obj[var_key] # calculation type from config
|
||||
|
||||
var_val = [feature_output(raw_df, var_name, cal) for cal in cal_type]
|
||||
var_val_dict = dict(var_val)
|
||||
|
||||
return var_val_dict
|
||||
|
||||
def compute_feature(raw_df, var_cols, d_cfg_Obj, r_cfg_Obj):
|
||||
"""
|
||||
Computing features
|
||||
"""
|
||||
#Variable data frame for each feature group
|
||||
var_df = pd.DataFrame(var_cols,columns=['var_id'])
|
||||
feature_dict = {}
|
||||
|
||||
if len(raw_df)>0:
|
||||
feature_dict = var_df.apply(cal_type_dict, args=(raw_df, d_cfg_Obj, r_cfg_Obj, ), axis=1)
|
||||
|
||||
return feature_dict
|
||||
|
||||
def calc_derive(input_file, input_dir, r_cfg_Obj, d_cfg_Obj, feature):
|
||||
"""
|
||||
Calculating derived variable
|
||||
"""
|
||||
df_list = []
|
||||
df = pd.DataFrame()
|
||||
for file in input_file:
|
||||
|
||||
file_name, _ = os.path.splitext(os.path.basename(file))
|
||||
input_loc = os.path.join(input_dir, file_name)
|
||||
|
||||
var_cols = [r_cfg_Obj[x] for x in d_cfg_Obj[feature]]
|
||||
|
||||
fea_loc = d_cfg_Obj[feature + '_LOC']
|
||||
fea_res = glob.glob(os.path.join(input_loc, '*/*/*' + fea_loc + '.csv'))
|
||||
|
||||
if len(fea_res)>0:
|
||||
raw_df = pd.read_csv(fea_res[0])
|
||||
feature_dict = compute_feature(raw_df, var_cols, d_cfg_Obj, r_cfg_Obj)
|
||||
|
||||
if len(feature_dict)>0:
|
||||
feature_df = dict_to_df(feature_dict, file)
|
||||
df_list.append(feature_df)
|
||||
|
||||
if len(df_list)>0:
|
||||
df = pd.concat(df_list, ignore_index=True)
|
||||
return df
|
||||
|
||||
def run_derive(input_file, input_dir, output_dir, r_config, d_config):
|
||||
"""
|
||||
Processing derived variable
|
||||
"""
|
||||
d_cfg_Obj = d_config.base_derive['derive_feature']
|
||||
r_cfg_Obj = r_config.base_raw['raw_feature']
|
||||
feature_group = d_cfg_Obj['FEATURE_GROUP']
|
||||
|
||||
#Iterating over feature group
|
||||
df_list = []
|
||||
for feature in feature_group:
|
||||
try:
|
||||
|
||||
df_fea = calc_derive(input_file, input_dir, r_cfg_Obj, d_cfg_Obj, feature)
|
||||
if len(df_fea)>0:
|
||||
|
||||
if len(df_list) == 0:
|
||||
df_list.append(df_fea)
|
||||
else:
|
||||
result = pd.merge(df_list[0], df_fea, how='outer', on=['Filename'])
|
||||
df_list = [result]
|
||||
|
||||
except Exception as e:
|
||||
logger.error('Failed to process derived variables {}'.format(feature))
|
||||
|
||||
logger.info("Saving derived variable output...")
|
||||
save_derive_output(df_list, output_dir)
|
||||
133
opendbm/dbm_lib/dbm_features/raw_features/audio/formant_freq.py
Normal file
133
opendbm/dbm_lib/dbm_features/raw_features/audio/formant_freq.py
Normal file
@@ -0,0 +1,133 @@
|
||||
"""
|
||||
file_name: formant_freq
|
||||
project_name: DBM
|
||||
created: 2020-20-07
|
||||
"""
|
||||
|
||||
import pandas as pd
|
||||
import parselmouth
|
||||
import numpy as np
|
||||
import parselmouth
|
||||
import librosa
|
||||
import glob
|
||||
from os.path import join
|
||||
import logging
|
||||
|
||||
from opendbm.dbm_lib.dbm_features.raw_features.util import util as ut
|
||||
|
||||
logging.basicConfig(level=logging.INFO)
|
||||
logger=logging.getLogger()
|
||||
|
||||
formant_dir = 'acoustic/formant_freq'
|
||||
csv_ext = '_formant.csv'
|
||||
error_txt = 'error: length less than 0.064'
|
||||
|
||||
def formant_list(formant,snd):
|
||||
"""
|
||||
Getting formant frequency per second
|
||||
Args:
|
||||
formant: Formant object for sound wave
|
||||
snd: Parselmouth sound object
|
||||
Returns:
|
||||
List of first through fourth formant for each frame
|
||||
"""
|
||||
f1_list = []
|
||||
f2_list = []
|
||||
f3_list = []
|
||||
f4_list = []
|
||||
|
||||
dur = snd.duration-0.02
|
||||
dur_round = round(dur, 2)
|
||||
|
||||
time_list = np.arange(0.001, dur_round, 0.001)
|
||||
for time in time_list:
|
||||
|
||||
f1 = formant.get_value_at_time(1,time)
|
||||
f2 = formant.get_value_at_time(2,time)
|
||||
f3 = formant.get_value_at_time(3,time)
|
||||
f4 = formant.get_value_at_time(4,time)
|
||||
|
||||
f1_list.append(f1)
|
||||
f2_list.append(f2)
|
||||
f3_list.append(f3)
|
||||
f4_list.append(f4)
|
||||
return f1_list,f2_list,f3_list,f4_list
|
||||
|
||||
def formant_score(path):
|
||||
"""
|
||||
Using parselmouth library fetching Formant Frequency
|
||||
Args:
|
||||
path: (.wav) audio file location
|
||||
Returns:
|
||||
(list) list of Formant freq for each voice frame
|
||||
"""
|
||||
sound_pat = parselmouth.Sound(path)
|
||||
formant = sound_pat.to_formant_burg(time_step=.001)
|
||||
f_score = formant_list(formant,sound_pat)
|
||||
return f_score
|
||||
|
||||
def calc_formant(video_uri, audio_file, out_loc, fl_name, r_config):
|
||||
"""
|
||||
Preparing Formant freq matrix
|
||||
Args:
|
||||
audio_file: (.wav) parsed audio file; fl_name: input file name
|
||||
out_loc: (str) Output directory; r_config: raw variable config
|
||||
"""
|
||||
|
||||
f1_list,f2_list,f3_list,f4_list = formant_score(audio_file)
|
||||
df_formant = pd.DataFrame(f1_list, columns=[r_config.aco_fm1])
|
||||
|
||||
df_formant[r_config.aco_fm2] = f2_list
|
||||
df_formant[r_config.aco_fm3] = f3_list
|
||||
df_formant[r_config.aco_fm4] = f4_list
|
||||
|
||||
df_formant.replace('', np.nan, regex=True,inplace=True)
|
||||
df_formant[r_config.err_reason] = 'Pass'# will replace with threshold in future release
|
||||
|
||||
df_formant['Frames'] = df_formant.index
|
||||
df_formant['dbm_master_url'] = video_uri
|
||||
|
||||
logger.info('Saving Output file {} '.format(out_loc))
|
||||
ut.save_output(df_formant, out_loc, fl_name, formant_dir, csv_ext)
|
||||
|
||||
def empty_fm(video_uri, out_loc, fl_name, r_config):
|
||||
|
||||
"""
|
||||
Preparing empty formant frequency matrix if something fails
|
||||
"""
|
||||
cols = ['Frames', r_config.aco_fm1, r_config.aco_fm2, r_config.aco_fm3, r_config.aco_fm4, r_config.err_reason]
|
||||
out_val = [[np.nan, np.nan, np.nan, np.nan, np.nan, error_txt]]
|
||||
df_fm = pd.DataFrame(out_val, columns = cols)
|
||||
df_fm['dbm_master_url'] = video_uri
|
||||
|
||||
logger.info('Saving Output file {} '.format(out_loc))
|
||||
ut.save_output(df_fm, out_loc, fl_name, formant_dir, csv_ext)
|
||||
|
||||
def run_formant(video_uri, out_dir, r_config):
|
||||
|
||||
"""
|
||||
Processing all patient's for fetching Formant freq
|
||||
---------------
|
||||
---------------
|
||||
Args:
|
||||
video_uri: video path; r_config: raw variable config object
|
||||
out_dir: (str) Output directory for processed output
|
||||
"""
|
||||
try:
|
||||
|
||||
input_loc, out_loc, fl_name = ut.filter_path(video_uri, out_dir)
|
||||
aud_filter = glob.glob(join(input_loc, fl_name + '.wav'))
|
||||
if len(aud_filter)>0:
|
||||
|
||||
audio_file = aud_filter[0]
|
||||
aud_dur = librosa.get_duration(filename=audio_file)
|
||||
|
||||
if float(aud_dur) < 0.064:
|
||||
logger.info('Output file {} size is less than 0.064sec'.format(audio_file))
|
||||
|
||||
empty_fm(video_uri, out_loc, fl_name, r_config)
|
||||
return
|
||||
|
||||
calc_formant(video_uri, audio_file, out_loc, fl_name, r_config)
|
||||
except Exception as e:
|
||||
logger.error('Failed to process audio file')
|
||||
161
opendbm/dbm_lib/dbm_features/raw_features/audio/gne.py
Normal file
161
opendbm/dbm_lib/dbm_features/raw_features/audio/gne.py
Normal file
@@ -0,0 +1,161 @@
|
||||
"""
|
||||
file_name: gne
|
||||
project_name: DBM
|
||||
created: 2020-20-07
|
||||
"""
|
||||
|
||||
import pandas as pd
|
||||
import numpy as np
|
||||
import os
|
||||
import glob
|
||||
import parselmouth
|
||||
import librosa
|
||||
import more_itertools as mit
|
||||
from os.path import join
|
||||
import logging
|
||||
|
||||
from opendbm.dbm_lib.dbm_features.raw_features.util import util as ut
|
||||
|
||||
logging.basicConfig(level=logging.INFO)
|
||||
logger=logging.getLogger()
|
||||
|
||||
gne_dir = 'acoustic/glottal_noise'
|
||||
ff_dir = 'acoustic/pitch'
|
||||
csv_ext = '_gne.csv'
|
||||
|
||||
def gne_ratio(sound):
|
||||
"""
|
||||
Using parselmouth library fetching glottal noise excitation ratio
|
||||
Args:
|
||||
sound: parselmouth object
|
||||
Returns:
|
||||
(list) list of gne ratio for each voice frame
|
||||
"""
|
||||
harmonicity_gne = sound.to_harmonicity_gne()
|
||||
gne_all_bands = harmonicity_gne.values
|
||||
gne_all_bands = np.where(gne_all_bands==-200, np.NaN, gne_all_bands)
|
||||
|
||||
gne = np.nanmax(gne_all_bands) # following http://www.fon.hum.uva.nl/rob/NKI_TEVA/TEVA/HTML/NKI_TEVA.pdf
|
||||
return gne
|
||||
|
||||
def empty_gne(video_uri, out_loc, fl_name, r_config, error_txt):
|
||||
"""
|
||||
Preparing empty GNE matrix if something fails
|
||||
"""
|
||||
cols = ['Frames', r_config.aco_gne, r_config.err_reason]
|
||||
out_val = [[np.nan, np.nan, error_txt]]
|
||||
|
||||
df_gne = pd.DataFrame(out_val, columns = cols)
|
||||
df_gne['dbm_master_url'] = video_uri
|
||||
|
||||
logger.info('Saving Output file {} '.format(out_loc))
|
||||
ut.save_output(df_gne, out_loc, fl_name, gne_dir, csv_ext)
|
||||
|
||||
def segment_pitch(dir_path, r_config):
|
||||
"""
|
||||
segmenting pitch freq for each voice segment
|
||||
"""
|
||||
com_speech_sort, voiced_yes, voiced_no = ([], ) * 3
|
||||
for file in os.listdir(dir_path):
|
||||
try:
|
||||
|
||||
if file.endswith('_pitch.csv'):
|
||||
|
||||
ff_df = pd.read_csv((dir_path+'/'+file))
|
||||
voice_label = ff_df[r_config.aco_voiceLabel]
|
||||
|
||||
indices_yes = [i for i, x in enumerate(voice_label) if x == "yes"]
|
||||
voiced_yes = [list(group) for group in mit.consecutive_groups(indices_yes)]
|
||||
|
||||
indices_no = [i for i, x in enumerate(voice_label) if x == "no"]
|
||||
voiced_no = [list(group) for group in mit.consecutive_groups(indices_no)]
|
||||
|
||||
com_speech = voiced_yes + voiced_no
|
||||
com_speech_sort = sorted(com_speech, key=lambda x: x[0])
|
||||
except:
|
||||
pass
|
||||
|
||||
return com_speech_sort, voiced_yes, voiced_no
|
||||
|
||||
def segment_gne(com_speech_sort, voiced_yes, voiced_no, gne_all_frames, audio_file):
|
||||
"""
|
||||
calculating gne for each voice segment
|
||||
"""
|
||||
snd = parselmouth.Sound(audio_file)
|
||||
pitch = snd.to_pitch(time_step=.001)
|
||||
|
||||
for idx, vs in enumerate(com_speech_sort):
|
||||
try:
|
||||
|
||||
max_gne = np.NaN
|
||||
if vs in voiced_yes and len(vs)>1:
|
||||
|
||||
start_time = pitch.get_time_from_frame_number(vs[0])
|
||||
end_time = pitch.get_time_from_frame_number(vs[-1])
|
||||
|
||||
snd_start = int(snd.get_frame_number_from_time(start_time))
|
||||
snd_end = int(snd.get_frame_number_from_time(end_time))
|
||||
|
||||
samples = parselmouth.Sound(snd.as_array()[0][snd_start:snd_end])
|
||||
max_gne = gne_ratio(samples)
|
||||
except:
|
||||
pass
|
||||
|
||||
gne_all_frames[idx] = max_gne
|
||||
return gne_all_frames
|
||||
|
||||
def calc_gne(video_uri, audio_file, out_loc, fl_name, r_config):
|
||||
"""
|
||||
Preparing gne matrix
|
||||
Args:
|
||||
audio_file: (.wav) parsed audio file
|
||||
out_loc: (str) Output directory for csv's
|
||||
"""
|
||||
dir_path = os.path.join(out_loc, ff_dir)
|
||||
if os.path.isdir(dir_path):
|
||||
voice_seg = segment_pitch(dir_path, r_config)
|
||||
|
||||
gne_all_frames = [np.NaN] * len(voice_seg[0])
|
||||
gne_segment_frames = segment_gne(voice_seg[0], voice_seg[1], voice_seg[2], gne_all_frames, audio_file)
|
||||
|
||||
df_gne = pd.DataFrame(gne_segment_frames, columns=[r_config.aco_gne])
|
||||
df_gne[r_config.err_reason] = 'Pass'# will replace with threshold in future release
|
||||
|
||||
df_gne['Frames'] = df_gne.index
|
||||
df_gne['dbm_master_url'] = video_uri
|
||||
|
||||
logger.info('Processing Output file {} '.format(out_loc))
|
||||
ut.save_output(df_gne, out_loc, fl_name, gne_dir, csv_ext)
|
||||
|
||||
else:
|
||||
error_txt = 'error: pitch freq not available'
|
||||
empty_gne(video_uri, out_loc, fl_name, r_config, error_txt)
|
||||
|
||||
def run_gne(video_uri, out_dir, r_config):
|
||||
"""
|
||||
Processing all patient's for fetching glottal noise ratio
|
||||
---------------
|
||||
---------------
|
||||
Args:
|
||||
video_uri: video path; r_config: raw variable config object
|
||||
out_dir: (str) Output directory for processed output
|
||||
"""
|
||||
try:
|
||||
|
||||
input_loc, out_loc, fl_name = ut.filter_path(video_uri, out_dir)
|
||||
aud_filter = glob.glob(join(input_loc, fl_name + '.wav'))
|
||||
if len(aud_filter)>0:
|
||||
|
||||
audio_file = aud_filter[0]
|
||||
aud_dur = librosa.get_duration(filename=audio_file)
|
||||
|
||||
if float(aud_dur) < 0.064:
|
||||
logger.info('Output file {} size is less than 0.064sec'.format(audio_file))
|
||||
|
||||
error_txt = 'error: length less than 0.064'
|
||||
empty_gne(video_uri, out_loc, fl_name, r_config, error_txt)
|
||||
return
|
||||
|
||||
calc_gne(video_uri, audio_file, out_loc, fl_name, r_config)
|
||||
except Exception as e:
|
||||
logger.error('Failed to process audio file')
|
||||
96
opendbm/dbm_lib/dbm_features/raw_features/audio/hnr.py
Normal file
96
opendbm/dbm_lib/dbm_features/raw_features/audio/hnr.py
Normal file
@@ -0,0 +1,96 @@
|
||||
"""
|
||||
file_name: hnr
|
||||
project_name: DBM
|
||||
created: 2020-20-07
|
||||
"""
|
||||
|
||||
import pandas as pd
|
||||
import numpy as np
|
||||
import os
|
||||
import glob
|
||||
import parselmouth
|
||||
import librosa
|
||||
from os.path import join
|
||||
import logging
|
||||
|
||||
from opendbm.dbm_lib.dbm_features.raw_features.util import util as ut
|
||||
|
||||
logging.basicConfig(level=logging.INFO)
|
||||
logger=logging.getLogger()
|
||||
|
||||
hnr_dir = 'acoustic/harmonic_noise'
|
||||
csv_ext = '_hnr.csv'
|
||||
error_txt = 'error: length less than 0.064'
|
||||
|
||||
def hnr_ratio(filepath):
|
||||
"""
|
||||
Using parselmouth library fetching harmonic noise ratio ratio
|
||||
Args:
|
||||
path: (.wav) audio file location
|
||||
Returns:
|
||||
(list) list of hnr ratio for each voice frame, min,max and mean hnr
|
||||
"""
|
||||
sound = parselmouth.Sound(filepath)
|
||||
harmonicity = sound.to_harmonicity_ac(time_step=.001)
|
||||
|
||||
hnr_all_frames = harmonicity.values#[harmonicity.values != -200] nan it (****)
|
||||
hnr_all_frames = np.where(hnr_all_frames==-200, np.NaN, hnr_all_frames)
|
||||
return hnr_all_frames.transpose()
|
||||
|
||||
def calc_hnr(video_uri, audio_file, out_loc, fl_name, r_config):
|
||||
"""
|
||||
Preparing harmonic noise matrix
|
||||
Args:
|
||||
audio_file: (.wav) parsed audio file
|
||||
out_loc: (str) Output directory for csv's
|
||||
"""
|
||||
|
||||
hnr_all_frames = hnr_ratio(audio_file)
|
||||
df_hnr = pd.DataFrame(hnr_all_frames, columns=[r_config.aco_hnr])
|
||||
|
||||
df_hnr['Frames'] = df_hnr.index
|
||||
df_hnr['dbm_master_url'] = video_uri
|
||||
df_hnr[r_config.err_reason] = 'Pass'# will replace with threshold in future release
|
||||
|
||||
logger.info('Saving Output file {} '.format(out_loc))
|
||||
ut.save_output(df_hnr, out_loc, fl_name, hnr_dir, csv_ext)
|
||||
|
||||
def empty_hnr(video_uri, out_loc, fl_name, r_config):
|
||||
"""
|
||||
Preparing empty HNR matrix if something fails
|
||||
"""
|
||||
cols = ['Frames', r_config.aco_hnr, r_config.err_reason]
|
||||
out_val = [[np.nan, np.nan, error_txt]]
|
||||
df_hnr = pd.DataFrame(out_val, columns = cols)
|
||||
df_hnr['dbm_master_url'] = video_uri
|
||||
|
||||
logger.info('Saving Output file {} '.format(out_loc))
|
||||
ut.save_output(df_hnr, out_loc, fl_name, hnr_dir, csv_ext)
|
||||
|
||||
def run_hnr(video_uri, out_dir, r_config):
|
||||
"""
|
||||
Processing all patient's for fetching harmonic noise ratio
|
||||
-------------------
|
||||
-------------------
|
||||
Args:
|
||||
video_uri: video path; r_config: raw variable config object
|
||||
out_dir: (str) Output directory for processed output
|
||||
"""
|
||||
try:
|
||||
|
||||
input_loc, out_loc, fl_name = ut.filter_path(video_uri, out_dir)
|
||||
aud_filter = glob.glob(join(input_loc, fl_name + '.wav'))
|
||||
if len(aud_filter)>0:
|
||||
|
||||
audio_file = aud_filter[0]
|
||||
aud_dur = librosa.get_duration(filename=audio_file)
|
||||
|
||||
if float(aud_dur) < 0.064:
|
||||
logger.info('Output file {} size is less than 0.064sec'.format(audio_file))
|
||||
|
||||
empty_hnr(video_uri, out_loc, fl_name, r_config)
|
||||
return
|
||||
|
||||
calc_hnr(video_uri, audio_file, out_loc, fl_name, r_config)
|
||||
except Exception as e:
|
||||
logger.error('Failed to process audio file')
|
||||
92
opendbm/dbm_lib/dbm_features/raw_features/audio/intensity.py
Normal file
92
opendbm/dbm_lib/dbm_features/raw_features/audio/intensity.py
Normal file
@@ -0,0 +1,92 @@
|
||||
"""
|
||||
file_name: intensity
|
||||
project_name: DBM
|
||||
created: 2020-20-07
|
||||
"""
|
||||
|
||||
import pandas as pd
|
||||
import numpy as np
|
||||
import glob
|
||||
import parselmouth
|
||||
import librosa
|
||||
from os.path import join
|
||||
import logging
|
||||
|
||||
from opendbm.dbm_lib.dbm_features.raw_features.util import util as ut
|
||||
|
||||
logging.basicConfig(level=logging.INFO)
|
||||
logger=logging.getLogger()
|
||||
|
||||
intensity_dir = 'acoustic/intensity'
|
||||
csv_ext = '_intensity.csv'
|
||||
error_txt = 'error: length less than 0.064'
|
||||
|
||||
def intensity_score(path):
|
||||
"""
|
||||
Using parselmouth library fetching Intensity
|
||||
Args:
|
||||
path: (.wav) audio file location
|
||||
Returns:
|
||||
(list) list of Intensity for each voice frame
|
||||
"""
|
||||
sound_pat = parselmouth.Sound(path)
|
||||
intensity = sound_pat.to_intensity(time_step=.001)
|
||||
return intensity.values[0]
|
||||
|
||||
def calc_intensity(video_uri, audio_file, out_loc, fl_name, r_config):
|
||||
"""
|
||||
Preparing Intensity matrix
|
||||
Args:
|
||||
audio_file: (.wav) parsed audio file
|
||||
out_loc: (str) Output directory for csv's
|
||||
"""
|
||||
|
||||
intensity_frames = intensity_score(audio_file)
|
||||
df_intensity = pd.DataFrame(intensity_frames, columns=[r_config.aco_int])
|
||||
|
||||
df_intensity['Frames'] = df_intensity.index
|
||||
df_intensity['dbm_master_url'] = video_uri
|
||||
df_intensity[r_config.err_reason] = 'Pass'# will replace with threshold in future release
|
||||
|
||||
logger.info('Saving Output file {} '.format(out_loc))
|
||||
ut.save_output(df_intensity, out_loc, fl_name, intensity_dir, csv_ext)
|
||||
|
||||
def empty_intensity(video_uri, out_loc, fl_name, r_config):
|
||||
"""
|
||||
Preparing empty Intensity matrix if something fails
|
||||
"""
|
||||
cols = ['Frames', r_config.aco_int, r_config.err_reason]
|
||||
out_val = [[np.nan, np.nan, error_txt]]
|
||||
df_int = pd.DataFrame(out_val, columns = cols)
|
||||
df_int['dbm_master_url'] = video_uri
|
||||
|
||||
logger.info('Saving Output file {} '.format(out_loc))
|
||||
ut.save_output(df_int, out_loc, fl_name, intensity_dir, csv_ext)
|
||||
|
||||
def run_intensity(video_uri, out_dir, r_config):
|
||||
"""
|
||||
Processing all patient's for fetching Intensity
|
||||
-------------------
|
||||
-------------------
|
||||
Args:
|
||||
video_uri: video path; r_config: raw variable config object
|
||||
out_dir: (str) Output directory for processed output
|
||||
"""
|
||||
try:
|
||||
|
||||
input_loc, out_loc, fl_name = ut.filter_path(video_uri, out_dir)
|
||||
aud_filter = glob.glob(join(input_loc, fl_name + '.wav'))
|
||||
if len(aud_filter)>0:
|
||||
|
||||
audio_file = aud_filter[0]
|
||||
aud_dur = librosa.get_duration(filename=audio_file)
|
||||
|
||||
if float(aud_dur) < 0.064:
|
||||
logger.info('Output file {} size is less than 0.064sec'.format(audio_file))
|
||||
|
||||
empty_intensity(video_uri, out_loc, fl_name, r_config)
|
||||
return
|
||||
|
||||
calc_intensity(video_uri, audio_file, out_loc, fl_name, r_config)
|
||||
except Exception as e:
|
||||
logger.error('Failed to process audio file')
|
||||
159
opendbm/dbm_lib/dbm_features/raw_features/audio/jitter.py
Normal file
159
opendbm/dbm_lib/dbm_features/raw_features/audio/jitter.py
Normal file
@@ -0,0 +1,159 @@
|
||||
"""
|
||||
file_name: jitter_processing
|
||||
project_name: DBM
|
||||
created: 2020-20-07
|
||||
"""
|
||||
|
||||
import pandas as pd
|
||||
import numpy as np
|
||||
import os
|
||||
import glob
|
||||
import parselmouth
|
||||
import librosa
|
||||
import numpy as np
|
||||
import more_itertools as mit
|
||||
from os.path import join
|
||||
import logging
|
||||
|
||||
from opendbm.dbm_lib.dbm_features.raw_features.util import util as ut
|
||||
|
||||
logging.basicConfig(level=logging.INFO)
|
||||
logger=logging.getLogger()
|
||||
|
||||
jitter_dir = 'acoustic/jitter'
|
||||
ff_dir = 'acoustic/pitch'
|
||||
csv_ext = '_jitter.csv'
|
||||
|
||||
def audio_jitter(sound):
|
||||
"""
|
||||
Using parselmouth library fetching jitter
|
||||
Args:
|
||||
sound: parselmouth object
|
||||
Returns:
|
||||
(list) list of jitters for each voice frame
|
||||
"""
|
||||
pointProcess = parselmouth.praat.call(sound, "To PointProcess (periodic, cc)...", 80, 500)
|
||||
jitter = parselmouth.praat.call(pointProcess, "Get jitter (local)", 0, 0, 0.0001, 0.02, 1.3)
|
||||
return jitter
|
||||
|
||||
def empty_jitter(video_uri, out_loc, fl_name, r_config, error_txt):
|
||||
"""
|
||||
Preparing empty jitter matrix if something fails
|
||||
"""
|
||||
cols = ['Frames', r_config.aco_jitter, r_config.err_reason]
|
||||
out_val = [[np.nan, np.nan, error_txt]]
|
||||
df_jitter = pd.DataFrame(out_val, columns = cols)
|
||||
df_jitter['dbm_master_url'] = video_uri
|
||||
|
||||
logger.info('Saving Output file {} '.format(out_loc))
|
||||
ut.save_output(df_jitter, out_loc, fl_name, jitter_dir, csv_ext)
|
||||
|
||||
def segment_pitch(dir_path, r_config):
|
||||
"""
|
||||
segmenting pitch freq for each voice segment
|
||||
"""
|
||||
com_speech_sort, voiced_yes, voiced_no = ([], ) * 3
|
||||
for file in os.listdir(dir_path):
|
||||
try:
|
||||
|
||||
if file.endswith('_pitch.csv'):
|
||||
|
||||
ff_df = pd.read_csv((dir_path+'/'+file))
|
||||
voice_label = ff_df[r_config.aco_voiceLabel]
|
||||
|
||||
indices_yes = [i for i, x in enumerate(voice_label) if x == "yes"]
|
||||
voiced_yes = [list(group) for group in mit.consecutive_groups(indices_yes)]
|
||||
|
||||
indices_no = [i for i, x in enumerate(voice_label) if x == "no"]
|
||||
voiced_no = [list(group) for group in mit.consecutive_groups(indices_no)]
|
||||
|
||||
com_speech = voiced_yes + voiced_no
|
||||
com_speech_sort = sorted(com_speech, key=lambda x: x[0])
|
||||
except:
|
||||
pass
|
||||
|
||||
return com_speech_sort, voiced_yes, voiced_no
|
||||
|
||||
def segment_jitter(com_speech_sort, voiced_yes, voiced_no, jitter_frames, audio_file):
|
||||
"""
|
||||
calculating jitter for each voice segment
|
||||
"""
|
||||
snd = parselmouth.Sound(audio_file)
|
||||
pitch = snd.to_pitch(time_step=.001)
|
||||
|
||||
for idx, vs in enumerate(com_speech_sort):
|
||||
try:
|
||||
|
||||
jitter = np.NaN
|
||||
if vs in voiced_yes and len(vs)>1:
|
||||
|
||||
start_time = pitch.get_time_from_frame_number(vs[0])
|
||||
end_time = pitch.get_time_from_frame_number(vs[-1])
|
||||
|
||||
snd_start = int(snd.get_frame_number_from_time(start_time))
|
||||
snd_end = int(snd.get_frame_number_from_time(end_time))
|
||||
|
||||
samples = parselmouth.Sound(snd.as_array()[0][snd_start:snd_end])
|
||||
jitter = audio_jitter(samples)
|
||||
except:
|
||||
pass
|
||||
|
||||
jitter_frames[idx] = jitter
|
||||
return jitter_frames
|
||||
|
||||
def calc_jitter(video_uri, audio_file, out_loc, fl_name, r_config):
|
||||
"""
|
||||
Preparing jitter matrix
|
||||
Args:
|
||||
audio_file: (.wav) parsed audio file
|
||||
out_loc: (str) Output directory for csv
|
||||
r_config: config.config_raw_feature.pyConfigFeatureNmReader object
|
||||
"""
|
||||
dir_path = os.path.join(out_loc, ff_dir)
|
||||
if os.path.isdir(dir_path):
|
||||
voice_seg = segment_pitch(dir_path, r_config)
|
||||
|
||||
jitter_frames = [np.NaN] * len(voice_seg[0])
|
||||
jitter_segment_frames = segment_jitter(voice_seg[0], voice_seg[1], voice_seg[2], jitter_frames, audio_file)
|
||||
|
||||
df_jitter = pd.DataFrame(jitter_segment_frames, columns=[r_config.aco_jitter])
|
||||
df_jitter[r_config.err_reason] = 'Pass'# will replace with threshold in future release
|
||||
|
||||
df_jitter['Frames'] = df_jitter.index
|
||||
df_jitter['dbm_master_url'] = video_uri
|
||||
|
||||
logger.info('Processing Output file {} '.format(out_loc))
|
||||
ut.save_output(df_jitter, out_loc, fl_name, jitter_dir, csv_ext)
|
||||
|
||||
else:
|
||||
error_txt = 'error: fundamental freq not available'
|
||||
empty_jitter(video_uri, out_loc, fl_name, r_config, error_txt)
|
||||
|
||||
def run_jitter(video_uri, out_dir, r_config):
|
||||
"""
|
||||
Processing all patient's videos for fetching jitter
|
||||
-------------------
|
||||
-------------------
|
||||
Args:
|
||||
video_uri: video path; r_config: raw variable config object
|
||||
out_dir: (str) Output directory for processed output
|
||||
"""
|
||||
try:
|
||||
|
||||
input_loc, out_loc, fl_name = ut.filter_path(video_uri, out_dir)
|
||||
aud_filter = glob.glob(join(input_loc, fl_name + '.wav'))
|
||||
if len(aud_filter)>0:
|
||||
|
||||
audio_file = aud_filter[0]
|
||||
aud_dur = librosa.get_duration(filename=audio_file)
|
||||
|
||||
if float(aud_dur) < 0.064:
|
||||
logger.info('Output file {} size is less than 0.064sec'.format(audio_file))
|
||||
|
||||
error_txt = 'error: length less than 0.064'
|
||||
empty_jitter(video_uri, out_loc, fl_name, r_config, error_txt)
|
||||
return
|
||||
|
||||
calc_jitter(video_uri, audio_file, out_loc, fl_name, r_config)
|
||||
except Exception as e:
|
||||
logger.error('Failed to process audio file')
|
||||
105
opendbm/dbm_lib/dbm_features/raw_features/audio/mfcc.py
Normal file
105
opendbm/dbm_lib/dbm_features/raw_features/audio/mfcc.py
Normal file
@@ -0,0 +1,105 @@
|
||||
"""
|
||||
file_name: mfcc
|
||||
project_name: DBM
|
||||
created: 2020-20-07
|
||||
"""
|
||||
|
||||
import pandas as pd
|
||||
import os
|
||||
import glob
|
||||
import parselmouth
|
||||
import librosa
|
||||
import numpy as np
|
||||
import librosa
|
||||
from os.path import join
|
||||
import logging
|
||||
|
||||
from opendbm.dbm_lib.dbm_features.raw_features.util import util as ut
|
||||
|
||||
logging.basicConfig(level=logging.INFO)
|
||||
logger=logging.getLogger()
|
||||
|
||||
mfcc_dir = 'acoustic/mfcc'
|
||||
csv_ext = '_mfcc.csv'
|
||||
error_txt = 'error: length less than 0.064'
|
||||
|
||||
def empty_mfcc(video_uri, out_loc, fl_name, r_config):
|
||||
|
||||
"""
|
||||
Preparing empty empty_mfcc matrix if something fails
|
||||
"""
|
||||
cols = ['Frames', r_config.aco_mfcc1, r_config.aco_mfcc2, r_config.aco_mfcc3, r_config.aco_mfcc4, r_config.aco_mfcc5,
|
||||
r_config.aco_mfcc6, r_config.aco_mfcc7, r_config.aco_mfcc8, r_config.aco_mfcc9, r_config.aco_mfcc10,
|
||||
r_config.aco_mfcc11, r_config.aco_mfcc12, r_config.err_reason]
|
||||
out_val = [[np.nan, np.nan, np.nan, np.nan, np.nan, np.nan, np.nan, np.nan, np.nan, np.nan, np.nan, np.nan, np.nan,
|
||||
error_txt]]
|
||||
df_mfcc = pd.DataFrame(out_val, columns = cols)
|
||||
df_mfcc['dbm_master_url'] = video_uri
|
||||
|
||||
logger.info('Saving Output file {} '.format(out_loc))
|
||||
ut.save_output(df_mfcc, out_loc, fl_name, mfcc_dir, csv_ext)
|
||||
|
||||
def audio_mfcc(path):
|
||||
"""
|
||||
Using parselmouth library fetching mfccs
|
||||
Args:
|
||||
path: (.wav) audio file location
|
||||
Returns:
|
||||
(list) list of mfccs for each voice frame
|
||||
"""
|
||||
sound = parselmouth.Sound(path)
|
||||
mfcc_object = sound.to_mfcc(time_step=.001,number_of_coefficients=12)
|
||||
mfccs = mfcc_object.to_array()
|
||||
mfccs = np.delete(mfccs, (0), axis=0)
|
||||
return mfccs
|
||||
|
||||
def calc_mfcc(video_uri, audio_file, out_loc, fl_name, r_config):
|
||||
"""
|
||||
Preparing mfcc matrix
|
||||
Args:
|
||||
audio_file: (.wav) parsed audio file
|
||||
out_loc: output location to save csv
|
||||
fl_name: (str) name of audio file
|
||||
r_config: config.config_raw_feature.pyConfigFeatureNmReader object
|
||||
"""
|
||||
dict_ = {}
|
||||
mfccs = audio_mfcc(audio_file)
|
||||
|
||||
for i in range(1,13):
|
||||
conf_str = r_config.base_raw['raw_feature']
|
||||
dict_[conf_str['aco_mfcc' + str(i)]] = mfccs[i-1, :]
|
||||
|
||||
df = pd.DataFrame(dict_)
|
||||
df['Frames'] = df.index
|
||||
|
||||
df[r_config.err_reason] = 'Pass'# may replace based on threshold in future release
|
||||
df['dbm_master_url'] = video_uri
|
||||
|
||||
ut.save_output(df, out_loc, fl_name, mfcc_dir, csv_ext)
|
||||
|
||||
def run_mfcc(video_uri, out_dir, r_config):
|
||||
"""
|
||||
Processing all patients to fetch mfccs
|
||||
|
||||
Args:
|
||||
video_uri: video path; r_config: raw variable config object
|
||||
out_dir: (str) Output directory for processed output
|
||||
"""
|
||||
try:
|
||||
|
||||
input_loc, out_loc, fl_name = ut.filter_path(video_uri, out_dir)
|
||||
aud_filter = glob.glob(join(input_loc, fl_name + '.wav'))
|
||||
if len(aud_filter)>0:
|
||||
|
||||
audio_file = aud_filter[0]
|
||||
aud_dur = librosa.get_duration(filename=audio_file)
|
||||
|
||||
if float(aud_dur) < 0.064:
|
||||
logger.info('Output file {} size is less than 0.064sec'.format(audio_file))
|
||||
|
||||
empty_mfcc(video_uri, out_loc, fl_name, r_config)
|
||||
return
|
||||
|
||||
calc_mfcc(video_uri, audio_file, out_loc, fl_name, r_config)
|
||||
except Exception as e:
|
||||
logger.error('Failed to process audio file')
|
||||
171
opendbm/dbm_lib/dbm_features/raw_features/audio/pause_segment.py
Normal file
171
opendbm/dbm_lib/dbm_features/raw_features/audio/pause_segment.py
Normal file
@@ -0,0 +1,171 @@
|
||||
"""
|
||||
file_name: pause_segment
|
||||
project_name: DBM
|
||||
created: 2020-20-07
|
||||
"""
|
||||
|
||||
import os
|
||||
import glob
|
||||
from pydub import AudioSegment
|
||||
import librosa
|
||||
import pandas as pd
|
||||
import numpy as np
|
||||
import webrtcvad
|
||||
from os.path import join
|
||||
import logging
|
||||
|
||||
from opendbm.dbm_lib.dbm_features.raw_features.util import vad_utilities as vu, util as ut
|
||||
|
||||
logging.basicConfig(level=logging.INFO)
|
||||
logger=logging.getLogger()
|
||||
|
||||
pause_seg_dir = 'acoustic/pause_segment'
|
||||
csv_ext = '_pausechar.csv'
|
||||
|
||||
def get_timing_cues(seg_starts_sec, seg_ends_sec, r_config):
|
||||
"""
|
||||
Get timing cues from segmented speech
|
||||
Args:
|
||||
seg_starts_sec: Audio segment start time in seconds
|
||||
seg_ends_sec: Audio segment end time in seconds
|
||||
Returns:
|
||||
Dictionary with pause features
|
||||
"""
|
||||
total_time = seg_ends_sec[-1] - seg_starts_sec[0]
|
||||
speaking_time = np.sum(np.asarray(seg_ends_sec) - np.asarray(seg_starts_sec))
|
||||
num_pauses = len(seg_starts_sec) - 1
|
||||
pause_len = np.zeros(num_pauses)
|
||||
|
||||
for p in range(num_pauses):
|
||||
pause_len[p] = seg_starts_sec[p+1] - seg_ends_sec[p]
|
||||
|
||||
if len(pause_len)>0:
|
||||
pause_len_mean = np.mean(pause_len)
|
||||
pause_len_std = np.std(pause_len)
|
||||
pause_time = np.sum(pause_len)
|
||||
|
||||
else:
|
||||
pause_len_mean = 0
|
||||
pause_len_std = 0
|
||||
pause_time = 0
|
||||
|
||||
pause_frac = pause_time / total_time
|
||||
timing_dict = {r_config.aco_totaltime: total_time, r_config.aco_speakingtime: speaking_time,
|
||||
r_config.aco_numpauses: num_pauses, r_config.aco_pausetime: pause_time, r_config.aco_pausefrac: pause_frac}
|
||||
return timing_dict
|
||||
|
||||
def process_silence(audio_file, r_config):
|
||||
"""
|
||||
Returns dataframe for pause between words using voice activity detection
|
||||
Args:
|
||||
audio_file: Audio file location
|
||||
Returns:
|
||||
Dataframe value
|
||||
"""
|
||||
feat_dict_list = []
|
||||
y, sr = vu.read_wave(audio_file)
|
||||
|
||||
# 3 is most aggressive (splits most), 0 least (better for low snr)
|
||||
aggressiveness = 3
|
||||
frame_dur_ms = 20
|
||||
|
||||
#pause segment(long & short pad)
|
||||
long_pad_around_voice_ms = 200
|
||||
short_pad_around_voice_ms = 100
|
||||
|
||||
if len(y)>0:
|
||||
vad = webrtcvad.Vad(aggressiveness)
|
||||
|
||||
frames = vu.frame_generator(frame_dur_ms, y, sr)
|
||||
frames = list(frames)
|
||||
|
||||
#longer pad time screens out little blips, but misses short silences
|
||||
long_seg_starts, long_seg_ends = vu.vad_get_segment_times(sr, frame_dur_ms, long_pad_around_voice_ms, vad, frames)
|
||||
|
||||
#Logic to handle blank audio file
|
||||
if len(long_seg_starts) == 0 or len(long_seg_ends) == 0:
|
||||
return ''
|
||||
|
||||
t_start = long_seg_starts[0]
|
||||
t_end = long_seg_ends[-1]
|
||||
# shorter pad time captures short silences (but misfires on little blips)
|
||||
short_seg_starts, short_seg_ends = vu.vad_get_segment_times(sr, frame_dur_ms, short_pad_around_voice_ms, vad, frames)
|
||||
|
||||
seg_starts = []
|
||||
seg_ends = []
|
||||
for k in range(len(short_seg_starts)): # logic to clean up some typical misfires
|
||||
if (short_seg_starts[k] >=t_start) and (short_seg_starts[k] <= t_end):
|
||||
|
||||
seg_starts.append(short_seg_starts[k])
|
||||
seg_ends.append(short_seg_ends[k])
|
||||
if len(seg_starts) == 0 or len(seg_ends) == 0:
|
||||
return ''
|
||||
|
||||
timing_dict = get_timing_cues(seg_starts, seg_ends, r_config)
|
||||
feat_dict_list.append(timing_dict)
|
||||
|
||||
df = pd.DataFrame(feat_dict_list)
|
||||
df[r_config.err_reason] = 'Pass'# will replace with threshold in future release
|
||||
return df
|
||||
|
||||
def empty_pause_segment(video_uri, out_loc, fl_name, r_config, error_txt):
|
||||
"""
|
||||
Preparing empty Pause Segment matrix if something fails
|
||||
"""
|
||||
cols = [r_config.aco_totaltime, r_config.aco_speakingtime, r_config.aco_numpauses, r_config.aco_pausetime,
|
||||
r_config.aco_pausefrac, r_config.err_reason]
|
||||
out_val = [[np.nan, np.nan, np.nan, np.nan, np.nan, error_txt]]
|
||||
df_pause = pd.DataFrame(out_val, columns = cols)
|
||||
df_pause['dbm_master_url'] = video_uri
|
||||
|
||||
logger.info('Saving Output file {} '.format(out_loc))
|
||||
ut.save_output(df_pause, out_loc, fl_name, pause_seg_dir, csv_ext)
|
||||
|
||||
def run_pause_segment(video_uri, out_dir, r_config):
|
||||
"""
|
||||
Processing all patient's for getting Pause Segment
|
||||
---------------
|
||||
---------------
|
||||
Args:
|
||||
video_uri: video path; r_config: raw variable config object
|
||||
out_dir: (str) Output directory for processed output
|
||||
"""
|
||||
try:
|
||||
|
||||
input_loc, out_loc, fl_name = ut.filter_path(video_uri, out_dir)
|
||||
aud_filter = glob.glob(join(input_loc, fl_name + '.wav'))
|
||||
if len(aud_filter)>0:
|
||||
|
||||
audio_file = aud_filter[0]
|
||||
aud_dur = librosa.get_duration(filename=audio_file)
|
||||
|
||||
if float(aud_dur) < 0.064:
|
||||
logger.info('Output file {} size is less than 0.064sec'.format(audio_file))
|
||||
|
||||
error_txt = 'error: length less than 0.064'
|
||||
empty_pause_segment(video_uri, out_loc, fl_name, r_config, error_txt)
|
||||
return
|
||||
|
||||
logger.info('Converting stereo sound to mono-lD')
|
||||
sound_mono = AudioSegment.from_wav(audio_file)
|
||||
sound_mono = sound_mono.set_channels(1)
|
||||
sound_mono = sound_mono.set_frame_rate(48000)
|
||||
|
||||
mono_wav = os.path.join(input_loc, fl_name + '_mono.wav')
|
||||
sound_mono.export(mono_wav, format="wav")
|
||||
|
||||
df_pause_seg = process_silence(mono_wav, r_config)
|
||||
os.remove(mono_wav)#removing mono wav file
|
||||
|
||||
if isinstance(df_pause_seg, pd.DataFrame) and len(df_pause_seg)>0:
|
||||
logger.info('Processing Output file {} '.format(out_loc))
|
||||
|
||||
df_pause_seg['dbm_master_url'] = video_uri
|
||||
ut.save_output(df_pause_seg, out_loc, fl_name, pause_seg_dir, csv_ext)
|
||||
|
||||
else:
|
||||
error_txt = 'error: webrtcvad returns no segment'
|
||||
empty_pause_segment(video_uri, out_loc, fl_name, r_config, error_txt)
|
||||
|
||||
except Exception as e:
|
||||
logger.error('Failed to process audio file')
|
||||
113
opendbm/dbm_lib/dbm_features/raw_features/audio/pitch_freq.py
Normal file
113
opendbm/dbm_lib/dbm_features/raw_features/audio/pitch_freq.py
Normal file
@@ -0,0 +1,113 @@
|
||||
"""
|
||||
file_name: pitch_freq
|
||||
project_name: DBM
|
||||
created: 2020-20-07
|
||||
"""
|
||||
|
||||
import pandas as pd
|
||||
import os
|
||||
import glob
|
||||
import parselmouth
|
||||
import librosa
|
||||
import numpy as np
|
||||
from os.path import join
|
||||
import logging
|
||||
|
||||
from opendbm.dbm_lib.dbm_features.raw_features.util import util as ut
|
||||
|
||||
logging.basicConfig(level=logging.INFO)
|
||||
logger=logging.getLogger()
|
||||
|
||||
ff_dir = 'acoustic/pitch'
|
||||
csv_ext = '_pitch.csv'
|
||||
error_txt = 'error: length less than 0.064'
|
||||
|
||||
def audio_pitch(path):
|
||||
"""
|
||||
Using parselmouth library fetching pitch/fundamental frequency
|
||||
Args:
|
||||
path: (.wav) audio file location
|
||||
Returns:
|
||||
(list) list of pitch/fundamental frequency for each voice frame
|
||||
"""
|
||||
sound_pat = parselmouth.Sound(path)
|
||||
pitch = sound_pat.to_pitch(time_step=.001)
|
||||
pitch_values = pitch.selected_array['frequency']
|
||||
|
||||
return list(pitch_values)
|
||||
|
||||
def label_speech(row,fd_freq):
|
||||
"""
|
||||
identify whether frame is voiced or not
|
||||
Args:
|
||||
row: (item) pitch frequency value
|
||||
Returns:
|
||||
(str) yes or no indicator for voice
|
||||
"""
|
||||
if row[fd_freq] > 0 :
|
||||
return 'yes'
|
||||
else:
|
||||
return 'no'
|
||||
|
||||
def calc_pitch(video_uri, audio_file, out_loc, fl_name, r_config):
|
||||
|
||||
"""
|
||||
Preparing pitch frequency matrix
|
||||
Args:
|
||||
audio_file: (.wav) parsed audio file
|
||||
row: (dataframe) subject details from master csv
|
||||
new_out_base_dir: (str) Output directory for csv
|
||||
"""
|
||||
|
||||
ff_frames = audio_pitch(audio_file)
|
||||
df_ffreq = pd.DataFrame(ff_frames, columns=[r_config.aco_ff])
|
||||
|
||||
df_ffreq['Frames'] = df_ffreq.index
|
||||
df_ffreq[r_config.aco_voiceLabel] = df_ffreq.apply(lambda row: label_speech(row, r_config.aco_ff),axis=1)
|
||||
|
||||
df_ffreq[r_config.err_reason] = 'Pass'# will replace with threshold in future release
|
||||
df_ffreq['dbm_master_url'] = video_uri
|
||||
|
||||
logger.info('Processing Output file {} '.format(out_loc))
|
||||
ut.save_output(df_ffreq, out_loc, fl_name, ff_dir, csv_ext)
|
||||
|
||||
def empty_pitch(video_uri, out_loc, fl_name, r_config):
|
||||
"""
|
||||
Preparing empty pitch frequency matrix if something fails
|
||||
"""
|
||||
|
||||
df_ffreq = pd.DataFrame([[np.nan, np.nan, 'no', error_txt]],
|
||||
columns=['Frames', r_config.aco_ff, r_config.aco_voiceLabel, r_config.err_reason])
|
||||
df_ffreq['dbm_master_url'] = video_uri
|
||||
|
||||
logger.info('Saving Output file {} '.format(out_loc))
|
||||
ut.save_output(df_ffreq, out_loc, fl_name, ff_dir, csv_ext)
|
||||
|
||||
def run_pitch(video_uri, out_dir, r_config):
|
||||
|
||||
"""
|
||||
Processing audio for fetching pitch
|
||||
-------------------
|
||||
-------------------
|
||||
Args:
|
||||
video_uri: video path; r_config: raw variable config object
|
||||
out_dir: (str) Output directory for processed output
|
||||
"""
|
||||
try:
|
||||
|
||||
input_loc, out_loc, fl_name = ut.filter_path(video_uri, out_dir)
|
||||
aud_filter = glob.glob(join(input_loc, fl_name + '.wav'))
|
||||
if len(aud_filter)>0:
|
||||
|
||||
audio_file = aud_filter[0]
|
||||
aud_dur = librosa.get_duration(filename=audio_file)
|
||||
|
||||
if float(aud_dur) < 0.064:
|
||||
logger.info('Output file {} size is less than 0.064sec'.format(audio_file))
|
||||
|
||||
empty_pitch(video_uri, out_loc, fl_name, r_config)
|
||||
return
|
||||
|
||||
calc_pitch(video_uri, audio_file, out_loc, fl_name, r_config)
|
||||
except Exception as e:
|
||||
logger.error('Failed to process audio file')
|
||||
160
opendbm/dbm_lib/dbm_features/raw_features/audio/shimmer.py
Normal file
160
opendbm/dbm_lib/dbm_features/raw_features/audio/shimmer.py
Normal file
@@ -0,0 +1,160 @@
|
||||
"""
|
||||
file_name: shimmer_processing
|
||||
project_name: DBM
|
||||
created: 2020-20-07
|
||||
"""
|
||||
|
||||
import pandas as pd
|
||||
import numpy as np
|
||||
import os
|
||||
import glob
|
||||
import parselmouth
|
||||
import librosa
|
||||
import numpy as np
|
||||
import more_itertools as mit
|
||||
from os.path import join
|
||||
|
||||
import logging
|
||||
|
||||
from opendbm.dbm_lib.dbm_features.raw_features.util import util as ut
|
||||
|
||||
logging.basicConfig(level=logging.INFO)
|
||||
logger=logging.getLogger()
|
||||
|
||||
shimmer_dir = 'acoustic/shimmer'
|
||||
ff_dir = 'acoustic/pitch'
|
||||
csv_ext = '_shimmer.csv'
|
||||
|
||||
def audio_shimmer(sound):
|
||||
"""
|
||||
Using parselmouth library fetching shimmer
|
||||
Args:
|
||||
sound: parselmouth object
|
||||
Returns:
|
||||
(list) list of shimmers for each voice frame
|
||||
"""
|
||||
pointProcess = parselmouth.praat.call(sound, "To PointProcess (periodic, cc)...", 80, 500)
|
||||
shimmer = parselmouth.praat.call([sound, pointProcess], "Get shimmer (local)", 0, 0, 0.0001, 0.02, 1.3, 1.6)
|
||||
return shimmer
|
||||
|
||||
def empty_shimmer(video_uri, out_loc, fl_name, r_config, error_txt):
|
||||
"""
|
||||
Preparing empty shimmer matrix if something fails
|
||||
"""
|
||||
cols = ['Frames', r_config.aco_shimmer, r_config.err_reason]
|
||||
out_val = [[np.nan, np.nan, error_txt]]
|
||||
df_shimmer = pd.DataFrame(out_val, columns = cols)
|
||||
df_shimmer['dbm_master_url'] = video_uri
|
||||
|
||||
logger.info('Saving Output file {} '.format(out_loc))
|
||||
ut.save_output(df_shimmer, out_loc, fl_name, shimmer_dir, csv_ext)
|
||||
|
||||
def segment_pitch(dir_path, r_config):
|
||||
"""
|
||||
segmenting pitch freq for each voice segment
|
||||
"""
|
||||
com_speech_sort, voiced_yes, voiced_no = ([], ) * 3
|
||||
for file in os.listdir(dir_path):
|
||||
try:
|
||||
|
||||
if file.endswith('_pitch.csv'):
|
||||
|
||||
ff_df = pd.read_csv((dir_path+'/'+file))
|
||||
voice_label = ff_df[r_config.aco_voiceLabel]
|
||||
|
||||
indices_yes = [i for i, x in enumerate(voice_label) if x == "yes"]
|
||||
voiced_yes = [list(group) for group in mit.consecutive_groups(indices_yes)]
|
||||
|
||||
indices_no = [i for i, x in enumerate(voice_label) if x == "no"]
|
||||
voiced_no = [list(group) for group in mit.consecutive_groups(indices_no)]
|
||||
|
||||
com_speech = voiced_yes + voiced_no
|
||||
com_speech_sort = sorted(com_speech, key=lambda x: x[0])
|
||||
except:
|
||||
pass
|
||||
|
||||
return com_speech_sort, voiced_yes, voiced_no
|
||||
|
||||
def segment_shimmer(com_speech_sort, voiced_yes, voiced_no, shimmer_frames, audio_file):
|
||||
"""
|
||||
calculating shimmer for each voice segment
|
||||
"""
|
||||
snd = parselmouth.Sound(audio_file)
|
||||
pitch = snd.to_pitch(time_step=.001)
|
||||
|
||||
for idx, vs in enumerate(com_speech_sort):
|
||||
try:
|
||||
|
||||
shimmer = np.NaN
|
||||
if vs in voiced_yes and len(vs)>1:
|
||||
|
||||
start_time = pitch.get_time_from_frame_number(vs[0])
|
||||
end_time = pitch.get_time_from_frame_number(vs[-1])
|
||||
|
||||
snd_start = int(snd.get_frame_number_from_time(start_time))
|
||||
snd_end = int(snd.get_frame_number_from_time(end_time))
|
||||
|
||||
samples = parselmouth.Sound(snd.as_array()[0][snd_start:snd_end])
|
||||
shimmer = audio_shimmer(samples)
|
||||
except:
|
||||
pass
|
||||
|
||||
shimmer_frames[idx] = shimmer
|
||||
return shimmer_frames
|
||||
|
||||
def calc_shimmer(video_uri, audio_file, out_loc, fl_name, r_config):
|
||||
"""
|
||||
Preparing shimmer matrix
|
||||
Args:
|
||||
audio_file: (.wav) parsed audio file
|
||||
out_loc: (str) Output directory for csv
|
||||
r_config: config.config_raw_feature.pyConfigFeatureNmReader object
|
||||
"""
|
||||
dir_path = os.path.join(out_loc, ff_dir)
|
||||
if os.path.isdir(dir_path):
|
||||
voice_seg = segment_pitch(dir_path, r_config)
|
||||
|
||||
shimmer_frames = [np.NaN] * len(voice_seg[0])
|
||||
shimmer_segment_frames = segment_shimmer(voice_seg[0], voice_seg[1], voice_seg[2], shimmer_frames, audio_file)
|
||||
|
||||
df_shimmer = pd.DataFrame(shimmer_segment_frames, columns=[r_config.aco_shimmer])
|
||||
df_shimmer[r_config.err_reason] = 'Pass'# will replace with threshold in future release
|
||||
|
||||
df_shimmer['Frames'] = df_shimmer.index
|
||||
df_shimmer['dbm_master_url'] = video_uri
|
||||
|
||||
logger.info('Processing Output file {} '.format(out_loc))
|
||||
ut.save_output(df_shimmer, out_loc, fl_name, shimmer_dir, csv_ext)
|
||||
|
||||
else:
|
||||
error_txt = 'error: fundamental freq not available'
|
||||
empty_shimmer(video_uri, out_loc, fl_name, r_config, error_txt)
|
||||
|
||||
def run_shimmer(video_uri, out_dir, r_config):
|
||||
"""
|
||||
Processing all patients to fetch shimmer
|
||||
---------------
|
||||
---------------
|
||||
Args:
|
||||
video_uri: video path; r_config: raw variable config object
|
||||
out_dir: (str) Output directory for processed output
|
||||
"""
|
||||
try:
|
||||
|
||||
input_loc, out_loc, fl_name = ut.filter_path(video_uri, out_dir)
|
||||
aud_filter = glob.glob(join(input_loc, fl_name + '.wav'))
|
||||
if len(aud_filter)>0:
|
||||
|
||||
audio_file = aud_filter[0]
|
||||
aud_dur = librosa.get_duration(filename=audio_file)
|
||||
|
||||
if float(aud_dur) < 0.064:
|
||||
logger.info('Output file {} size is less than 0.064sec'.format(audio_file))
|
||||
|
||||
error_txt = 'error: length less than 0.064'
|
||||
empty_shimmer(video_uri, out_loc, fl_name, r_config, error_txt)
|
||||
return
|
||||
|
||||
calc_shimmer(video_uri, audio_file, out_loc, fl_name, r_config)
|
||||
except Exception as e:
|
||||
logger.error('Failed to process audio file')
|
||||
@@ -0,0 +1,111 @@
|
||||
"""
|
||||
file_name: voice_frame_score
|
||||
project_name: DBM
|
||||
created: 2020-20-07
|
||||
"""
|
||||
|
||||
import parselmouth
|
||||
import pandas as pd
|
||||
import numpy as np
|
||||
import glob
|
||||
import librosa
|
||||
from os.path import join
|
||||
import logging
|
||||
|
||||
from opendbm.dbm_lib.dbm_features.raw_features.util import util as ut
|
||||
|
||||
logging.basicConfig(level=logging.INFO)
|
||||
logger=logging.getLogger()
|
||||
|
||||
vfs_dir = 'acoustic/voice_frame_score'
|
||||
csv_ext = '_voiceprev.csv'
|
||||
error_txt = 'error: length less than 0.064'
|
||||
|
||||
def audio_pitch_frame(pitch):
|
||||
"""
|
||||
Computing total number of speech and participant voiced frames
|
||||
Args:
|
||||
pitch: speech pitch
|
||||
Returns:
|
||||
(float) total voice frames and participant voiced frames
|
||||
"""
|
||||
total_frames = pitch.get_number_of_frames()
|
||||
voiced_frames = pitch.count_voiced_frames()
|
||||
return total_frames, voiced_frames
|
||||
|
||||
def voice_segment(path):
|
||||
"""
|
||||
Using parselmouth library for fundamental frequency
|
||||
Args:
|
||||
path: (.wav) audio file location
|
||||
Returns:
|
||||
(float) total voice frames, participant voiced frames and voiced frames percentage
|
||||
"""
|
||||
sound_pat = parselmouth.Sound(path)
|
||||
pitch = sound_pat.to_pitch()
|
||||
total_frames,voiced_frames = audio_pitch_frame(pitch)
|
||||
|
||||
voiced_percentage = (voiced_frames/total_frames)*100
|
||||
return voiced_percentage, voiced_frames, total_frames
|
||||
|
||||
def calc_vfs(video_uri, audio_file, out_loc, fl_name, r_config):
|
||||
"""
|
||||
creating dataframe matrix for voice frame score
|
||||
Args:
|
||||
audio_file: Audio file path
|
||||
new_out_base_dir: AWS instance output base directory path
|
||||
f_nm_config: Config file object
|
||||
"""
|
||||
|
||||
voice_percentage,voiced_frames, total_frames = voice_segment(audio_file)
|
||||
df_vfs = pd.DataFrame([voiced_frames], columns=[r_config.aco_voiceFrame])
|
||||
|
||||
df_vfs[r_config.aco_totVoiceFrame] = [total_frames]
|
||||
df_vfs[r_config.aco_voicePct] = [voice_percentage]
|
||||
df_vfs[r_config.err_reason] = 'Pass'# will replace with threshold in future release
|
||||
|
||||
df_vfs['Frames'] = df_vfs.index
|
||||
df_vfs['dbm_master_url'] = video_uri
|
||||
|
||||
logger.info('Saving Output file {} '.format(out_loc))
|
||||
ut.save_output(df_vfs, out_loc, fl_name, vfs_dir, csv_ext)
|
||||
|
||||
def empty_vfs(video_uri, out_loc, fl_name, r_config):
|
||||
"""
|
||||
Preparing empty VFS matrix if something fails
|
||||
"""
|
||||
cols = ['Frames', r_config.aco_voiceFrame, r_config.aco_totVoiceFrame, r_config.aco_voicePct, r_config.err_reason]
|
||||
out_val = [[np.nan, np.nan, np.nan, np.nan, error_txt]]
|
||||
df_vfs = pd.DataFrame(out_val, columns = cols)
|
||||
df_vfs['dbm_master_url'] = video_uri
|
||||
|
||||
logger.info('Saving Output file {} '.format(out_loc))
|
||||
ut.save_output(df_vfs, out_loc, fl_name, vfs_dir, csv_ext)
|
||||
|
||||
def run_vfs(video_uri, out_dir, r_config):
|
||||
"""
|
||||
Processing all participants for fetching voice frame score
|
||||
---------------
|
||||
---------------
|
||||
Args:
|
||||
video_uri: video path; r_config: raw variable config object
|
||||
out_dir: (str) Output directory for processed output
|
||||
"""
|
||||
try:
|
||||
|
||||
input_loc, out_loc, fl_name = ut.filter_path(video_uri, out_dir)
|
||||
aud_filter = glob.glob(join(input_loc, fl_name + '.wav'))
|
||||
if len(aud_filter)>0:
|
||||
|
||||
audio_file = aud_filter[0]
|
||||
aud_dur = librosa.get_duration(filename=audio_file)
|
||||
|
||||
if float(aud_dur) < 0.064:
|
||||
logger.info('Output file {} size is less than 0.064sec'.format(audio_file))
|
||||
|
||||
empty_vfs(video_uri, out_loc, fl_name, r_config)
|
||||
return
|
||||
|
||||
calc_vfs(video_uri, audio_file, out_loc, fl_name, r_config)
|
||||
except Exception as e:
|
||||
logger.error('Failed to process audio file')
|
||||
@@ -0,0 +1,17 @@
|
||||
"""
|
||||
file_name: init
|
||||
project_name: DBM
|
||||
created: 2020-20-07
|
||||
"""
|
||||
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
|
||||
import os
|
||||
|
||||
DBMLIB_PATH = os.path.dirname(__file__)
|
||||
DBMLIB_VTREMOR_LIB = os.path.abspath(os.path.join(DBMLIB_PATH,
|
||||
'../../../../resources/libraries/voice_tremor.praat'))
|
||||
DBMLIB_FTREMOR_CONFIG = os.path.abspath(os.path.join(DBMLIB_PATH, '../../../../resources/features/facial/config.json'))
|
||||
|
||||
160
opendbm/dbm_lib/dbm_features/raw_features/movement/eye_blink.py
Normal file
160
opendbm/dbm_lib/dbm_features/raw_features/movement/eye_blink.py
Normal file
@@ -0,0 +1,160 @@
|
||||
"""
|
||||
file_name: eye_blink
|
||||
project_name: DBM
|
||||
created: 2020-20-07
|
||||
"""
|
||||
|
||||
import os
|
||||
import glob
|
||||
from scipy.spatial import distance as dist
|
||||
from scipy.signal import find_peaks
|
||||
from imutils.video import FileVideoStream
|
||||
from imutils.video import VideoStream
|
||||
from imutils import face_utils
|
||||
from moviepy.editor import VideoFileClip
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
import imutils
|
||||
import time
|
||||
import dlib
|
||||
import cv2
|
||||
import logging
|
||||
|
||||
from opendbm.dbm_lib.dbm_features.raw_features.util import util as ut
|
||||
|
||||
logging.basicConfig(level=logging.INFO)
|
||||
logger=logging.getLogger()
|
||||
|
||||
movement_expr_dir = 'movement/eye_blink'
|
||||
csv_ext = '_eyeblinks.csv'
|
||||
|
||||
def eye_aspect_ratio(eye):
|
||||
"""
|
||||
Computing eye aspect ratio for an individual frame
|
||||
Args:
|
||||
eye: Eye landmarks
|
||||
Return:
|
||||
Eye aspect ratio for a frame
|
||||
"""
|
||||
# euclidean distance for vertical eye landmarks
|
||||
dist_cor1 = dist.euclidean(eye[1], eye[5])
|
||||
dist_cor2 = dist.euclidean(eye[2], eye[4])
|
||||
|
||||
# euclidean distance for horizontal eye landmark
|
||||
dist_cor3 = dist.euclidean(eye[0], eye[3])
|
||||
|
||||
ear = (dist_cor1 + dist_cor2) / (2.0 * dist_cor3)
|
||||
return ear
|
||||
|
||||
def blink_detection(video_path,facial_landmarks,raw_config):
|
||||
"""
|
||||
Blink detection for each frame
|
||||
Args:
|
||||
video_path: MP4 file location
|
||||
facial_landmarks: Facial landmark pre-trained model path
|
||||
raw_config: Raw configuration file object
|
||||
Return:
|
||||
Dataframe with blink informatiom like blink frame, duration etc.
|
||||
"""
|
||||
TOT_FRAME = 1
|
||||
blink_frame = []
|
||||
ear_frame = []
|
||||
|
||||
clip = VideoFileClip(video_path, has_mask=True)
|
||||
vid_length = clip.duration
|
||||
|
||||
identifier = dlib.get_frontal_face_detector() #dlib's face detector (HOG-based)
|
||||
forecaster = dlib.shape_predictor(facial_landmarks) # the facial landmark predictor
|
||||
|
||||
#left and right eye landmarks
|
||||
(left_beg, left_end) = face_utils.FACIAL_LANDMARKS_IDXS["left_eye"]
|
||||
(right_beg, right_end) = face_utils.FACIAL_LANDMARKS_IDXS["right_eye"]
|
||||
|
||||
f_stream = True
|
||||
vid_stream = FileVideoStream(video_path).start()
|
||||
|
||||
while True:
|
||||
try:
|
||||
#check if stream/frame available in video
|
||||
if f_stream and not vid_stream.more():
|
||||
break
|
||||
|
||||
#reading & converting frame into grayscale
|
||||
vid_frame = vid_stream.read()
|
||||
vid_frame = imutils.resize(vid_frame, width=450)
|
||||
gray = cv2.cvtColor(vid_frame, cv2.COLOR_BGR2GRAY)
|
||||
|
||||
#detecting face
|
||||
rects = identifier(gray, 0)
|
||||
for rect in rects:
|
||||
|
||||
lmk = forecaster(gray, rect)
|
||||
lmk = face_utils.shape_to_np(lmk)
|
||||
|
||||
l_eye = lmk[left_beg:left_end] #Extracting left eye ratio
|
||||
r_eye = lmk[right_beg:right_end] #Extracting right eye ratio
|
||||
l_ear = eye_aspect_ratio(l_eye) # eye aspect ratio for left eye
|
||||
r_ear = eye_aspect_ratio(r_eye) # eye aspect ratio for right eye
|
||||
|
||||
ear = (l_ear + r_ear) / 2.0 # average the eye aspect ratio
|
||||
blink_frame.append(TOT_FRAME)
|
||||
ear_frame.append(ear)
|
||||
|
||||
TOT_FRAME += 1
|
||||
except Exception as e:
|
||||
#logger.error("blink detection processing failed for: {}".format(video_path))
|
||||
continue
|
||||
|
||||
blink_df = pd.DataFrame(ear_frame, columns =[raw_config.mov_blink_ear])
|
||||
blink_df[raw_config.vid_dur] = vid_length
|
||||
blink_df[raw_config.fps] = int(TOT_FRAME/vid_length)
|
||||
blink_df[raw_config.mov_blinkframes] = blink_frame
|
||||
|
||||
peaks, _ = find_peaks(blink_df[raw_config.mov_blink_ear]*-1, prominence=0.1)#prominence = 0.1 based on tuning
|
||||
final_blink_df = blink_df.iloc[peaks,:].reset_index(drop=True)
|
||||
|
||||
u_blink_df = blink_dur(final_blink_df,raw_config)
|
||||
u_blink_df['dbm_master_url'] = video_path
|
||||
return u_blink_df
|
||||
|
||||
def blink_dur(blink_df,raw_config):
|
||||
"""
|
||||
Computing blink duration between each blink
|
||||
Args:
|
||||
blink_df : Dataframe with blink informatiom like blink frame
|
||||
raw_config: Raw configuration file object
|
||||
Returns:
|
||||
Updated dataframe with blink duration
|
||||
"""
|
||||
dur_list = []
|
||||
if len(blink_df)>0:
|
||||
blink_df[raw_config.mov_blinkdur] = blink_df[raw_config.mov_blinkframes].diff().fillna(
|
||||
blink_df[raw_config.mov_blinkframes])
|
||||
else:
|
||||
blink_df[raw_config.mov_blinkdur] = np.nan
|
||||
blink_df[raw_config.mov_blinkdur] = blink_df[raw_config.mov_blinkdur]/blink_df[raw_config.fps]
|
||||
return blink_df
|
||||
|
||||
def run_eye_blink(video_uri, out_dir, r_config, facial_landmarks):
|
||||
"""
|
||||
Processing all patient's for getting eye blink artifacts
|
||||
---------------
|
||||
---------------
|
||||
Args:
|
||||
video_uri: video path; input_dir : input directory for video's
|
||||
out_dir: (str) Output directory for processed output; r_config: raw variable config object;
|
||||
facial_landmarks: landmark model path
|
||||
"""
|
||||
try:
|
||||
|
||||
input_loc, out_loc, fl_name = ut.filter_path(video_uri, out_dir)
|
||||
vid_file_path = os.path.exists(video_uri)
|
||||
if vid_file_path==True:
|
||||
|
||||
logger.info('Processing Output file {} '.format(os.path.join(out_loc, fl_name)))
|
||||
df_blink = blink_detection(video_uri, facial_landmarks, r_config)
|
||||
ut.save_output(df_blink, out_loc, fl_name, movement_expr_dir, csv_ext)
|
||||
|
||||
except Exception as e:
|
||||
logger.error('Failed to process video file')
|
||||
|
||||
148
opendbm/dbm_lib/dbm_features/raw_features/movement/eye_gaze.py
Normal file
148
opendbm/dbm_lib/dbm_features/raw_features/movement/eye_gaze.py
Normal file
@@ -0,0 +1,148 @@
|
||||
"""
|
||||
file_name: eye_gaze
|
||||
project_name: DBM
|
||||
created: 2020-30-11
|
||||
"""
|
||||
|
||||
import os
|
||||
import glob
|
||||
import pandas as pd
|
||||
import numpy as np
|
||||
from scipy.spatial import distance
|
||||
from os.path import join
|
||||
import logging
|
||||
|
||||
from opendbm.dbm_lib.dbm_features.raw_features.util import util as ut
|
||||
|
||||
logging.basicConfig(level=logging.INFO)
|
||||
logger=logging.getLogger()
|
||||
|
||||
eye_pose_dir = 'movement/gaze'
|
||||
eye_pose_ext = '_eyegaze.csv'
|
||||
|
||||
def eye_motion_df(l_disp, r_disp, error_list, r_config):
|
||||
"""
|
||||
Generating eye movement dataframe
|
||||
|
||||
Args:
|
||||
l_disp: displacement list(left eye); l_disp: displacement list(right eye)
|
||||
r_config: raw variable config file object
|
||||
|
||||
Reutrns:
|
||||
Final eye displacement dataframe
|
||||
"""
|
||||
df_eye_left = pd.DataFrame(l_disp, columns=[r_config.mov_eleft_disp])
|
||||
df_eye_right = pd.DataFrame(r_disp, columns=[r_config.mov_eright_disp])
|
||||
|
||||
df_eye_motion = pd.concat([df_eye_left, df_eye_right], axis=1, sort=False)
|
||||
df_eye_motion[r_config.err_reason] = error_list
|
||||
return df_eye_motion
|
||||
|
||||
def filter_motion(df_of, df_disp, col_l, col_r, r_config):
|
||||
"""
|
||||
Filtering final eye movement dataframe
|
||||
|
||||
Args:
|
||||
df_of: Openface raw out dataframe; col_r: right eye column
|
||||
col_l: left eye column; r_config: raw variable config file object
|
||||
"""
|
||||
|
||||
df_of = df_of[col_l + col_r + [' confidence']]
|
||||
df_of.loc[(df_of[' confidence'].astype(float) < 0.8), col_l + col_r] = np.nan
|
||||
|
||||
df_filter = df_of[col_l + col_r]
|
||||
df_filter.columns = [r_config.mov_leye_x, r_config.mov_leye_y, r_config.mov_leye_z,
|
||||
r_config.mov_reye_x, r_config.mov_reye_y, r_config.mov_reye_z]
|
||||
|
||||
df_motion = pd.concat([df_filter, df_disp], axis=1, sort=False)
|
||||
return df_motion
|
||||
|
||||
def eye_disp(of_results, col, r_config):
|
||||
"""
|
||||
Computing head velocity frame by frame
|
||||
|
||||
Args:
|
||||
of_results: Openface raw out dataframe
|
||||
r_config: Face config file object
|
||||
|
||||
Reutrns:
|
||||
Final head velocity frame by frame output
|
||||
"""
|
||||
distance_list = []
|
||||
error_list = []
|
||||
|
||||
of_results = of_results[col+ [' confidence']]
|
||||
for index, row in of_results.iterrows():
|
||||
dst = np.nan
|
||||
|
||||
if index == 0 or float(row[' confidence']) < 0.8: #Threshold < 0.8
|
||||
distance_list.append(dst)
|
||||
|
||||
if float(row[' confidence']) < 0.8:
|
||||
error_list.append('confidence less than 80%')
|
||||
|
||||
else:
|
||||
error_list.append('Pass')
|
||||
continue
|
||||
|
||||
if index > 0:
|
||||
|
||||
point_x = (of_results[col[0]][index-1], of_results[col[1]][index-1], of_results[col[2]][index-1])
|
||||
point_y = (row[col[0]],row[col[1]],row[col[2]])
|
||||
try:
|
||||
dst = distance.euclidean(point_x, point_y)
|
||||
except:
|
||||
pass
|
||||
|
||||
distance_list.append(abs(dst))
|
||||
error_list.append('Pass')
|
||||
|
||||
return distance_list, error_list
|
||||
|
||||
def calc_eye_mov(video_uri, df_of, out_loc, fl_name, r_config):
|
||||
"""
|
||||
Computing eye motion variables
|
||||
Args:
|
||||
df_of: Openface dataframe
|
||||
out_loc: Output path for saving output csv's
|
||||
fl_name: file name for output csv
|
||||
r_config: raw variable config file object
|
||||
|
||||
"""
|
||||
|
||||
col_l = [ ' gaze_0_x', ' gaze_0_y', ' gaze_0_z']
|
||||
col_r = [ ' gaze_1_x', ' gaze_1_y', ' gaze_1_z']
|
||||
|
||||
gazel_disp, err_l = eye_disp(df_of, col_l, r_config)
|
||||
gazer_disp, err_r = eye_disp(df_of, col_r, r_config)
|
||||
|
||||
df_disp = eye_motion_df(gazel_disp, gazer_disp, err_l, r_config)
|
||||
df_disp['dbm_master_url'] = video_uri
|
||||
|
||||
df_motion = filter_motion(df_of, df_disp, col_l, col_r, r_config)
|
||||
ut.save_output(df_motion, out_loc, fl_name, eye_pose_dir, eye_pose_ext)
|
||||
|
||||
def run_eye_gaze(video_uri, out_dir, r_config):
|
||||
"""
|
||||
Processing all patient's for getting eye movement artifacts
|
||||
--------------------------------
|
||||
--------------------------------
|
||||
Args:
|
||||
video_uri: video path; input_dir : input directory for video's
|
||||
out_dir: (str) Output directory for processed output; r_config: raw variable config object
|
||||
"""
|
||||
try:
|
||||
|
||||
#filtering path to generate input & output path
|
||||
input_loc, out_loc, fl_name = ut.filter_path(video_uri, out_dir)
|
||||
of_csv_path = glob.glob(join(out_loc, fl_name + '_openface/*.csv'))
|
||||
|
||||
if len(of_csv_path)>0:
|
||||
|
||||
of_csv = of_csv_path[0]
|
||||
df_of = pd.read_csv(of_csv, error_bad_lines=False)
|
||||
|
||||
logger.info('Processing Output file {} '.format(os.path.join(out_loc, fl_name)))
|
||||
calc_eye_mov(video_uri, df_of, out_loc, fl_name, r_config)
|
||||
except Exception as e:
|
||||
logger.error('Failed to process video file')
|
||||
@@ -0,0 +1,164 @@
|
||||
import sys, os, glob, cv2, re
|
||||
import pickle, json
|
||||
import pandas as pd
|
||||
import numpy as np
|
||||
import numpy.ma as ma
|
||||
import logging
|
||||
from os.path import join
|
||||
|
||||
from opendbm.dbm_lib.dbm_features.raw_features.util import util as ut
|
||||
from opendbm.dbm_lib.dbm_features.raw_features.util.math_util import *
|
||||
|
||||
from opendbm.dbm_lib.dbm_features.raw_features.movement import DBMLIB_FTREMOR_CONFIG
|
||||
|
||||
logging.basicConfig(level=logging.INFO)
|
||||
logger=logging.getLogger()
|
||||
|
||||
ft_dir = 'movement/facial_tremor'
|
||||
csv_ext = '_fac_tremor.csv'
|
||||
model_ext = '_fac_model.csv'
|
||||
fac_features_ext = '_fac_features.csv'
|
||||
|
||||
def compute_features(out_dir, df_of, r_config):
|
||||
""" Computes features
|
||||
|
||||
Returns: features in vector format
|
||||
"""
|
||||
config = json.loads(open(DBMLIB_FTREMOR_CONFIG,'r').read())
|
||||
|
||||
pattern_x = re.compile("l\d+_x")
|
||||
pattern_y = re.compile("l\d+_y")
|
||||
|
||||
# assumption: distance of face to camera remains at roughly static
|
||||
|
||||
# logic break
|
||||
landmark_columns = []
|
||||
for col in df_of.columns:
|
||||
if pattern_x.match(col) or pattern_y.match(col):
|
||||
landmark_columns.append(col)
|
||||
|
||||
df_of= df_of[(df_of[landmark_columns]!= 0).any(axis=1)]
|
||||
df_of.reset_index(inplace=True)
|
||||
|
||||
num_frames = len(df_of)
|
||||
logger.info("Number of frames to be processed: {}".format(str(num_frames)))
|
||||
landmarks = config['landmarks']
|
||||
|
||||
try:
|
||||
if num_frames == 0:
|
||||
error_reason = "No frames with visible face."
|
||||
logger.error(error_reason)
|
||||
return empty_frame(landmarks, r_config, error_reason)
|
||||
|
||||
# if num_frames < 60:
|
||||
# error_reason = 'Number of frames with visible face < 60. Video too short'
|
||||
# logger.error(error_reason)
|
||||
# return empty_frame(landmarks, f_cfg, error_reason)
|
||||
|
||||
first_row = df_of.iloc[0]
|
||||
|
||||
facew = abs(first_row[config['face_width_left']] - first_row[config['face_width_right']])
|
||||
faceh = abs(first_row[config['face_height_left']] - first_row[config['face_height_right']])
|
||||
|
||||
if facew == 0 or faceh == 0:
|
||||
error_reason = 'face width or height = 0. Check landmark values'
|
||||
logger.error(error_reason)
|
||||
return empty_frame(landmarks, r_config)
|
||||
|
||||
fac_disp = calc_displacement_vec(df_of, landmarks, num_frames)
|
||||
|
||||
# if verbose:
|
||||
# logger.info("Displacement output: {}".format(str(fac_disp)))
|
||||
|
||||
fac_disp_median = np.median(fac_disp, axis = 1)
|
||||
fac_disp_mean = np.mean(fac_disp, axis = 1)
|
||||
|
||||
if len(fac_disp.shape)!=2:
|
||||
error_reason = 'fac_disp is not 2D. smth went wrong with disp calc'
|
||||
logger.error(error_reason)
|
||||
return empty_frame(landmarks, r_config, error_reason)
|
||||
|
||||
if len(fac_disp[0])<=1:
|
||||
error_reason = 'Video too short. smth went wrong with disp calc'
|
||||
logger.error(error_reason)
|
||||
return empty_frame(landmarks, r_config, error_reason)
|
||||
|
||||
fac_corr_mat = np.corrcoef(fac_disp, rowvar = True)
|
||||
# extract relevant row from cov matrix
|
||||
ref_lmk_index = [i for i, lmk in enumerate(landmarks) if config['ref_lmk']==lmk]
|
||||
fac_corr = fac_corr_mat[ref_lmk_index][0]
|
||||
|
||||
fac_area = config['ref_area'] / (facew * faceh)
|
||||
|
||||
# if verbose:
|
||||
# logger.info("Face area: {}".format(fac_area))
|
||||
# logger.info("Face Displacement Median: {}".format(str(fac_disp_median)))
|
||||
# logger.info("Face Displacement Mean: {}".format(str(fac_disp_mean)))
|
||||
|
||||
fac_features1 = np.multiply(fac_area * fac_disp_median, (1. - fac_corr))
|
||||
fac_features2 = np.multiply(fac_area * fac_disp_mean, (1. - fac_corr))
|
||||
|
||||
# base_fac_features = np.dot(fac_area * fac_disp_median, (1. - fac_corr))
|
||||
|
||||
fac_features_dict = {}
|
||||
for i, landmark in enumerate(landmarks):
|
||||
fac_features_dict['fac_features_mean_{}'.format(landmark)] = [fac_features2[i]]
|
||||
raw_variable_map = 'fac_tremor_median_{}'.format(landmark)
|
||||
fac_features_dict[r_config.base_raw['raw_feature'][raw_variable_map]] = [fac_features1[i]]
|
||||
|
||||
fac_features_dict['fac_disp_median_{}'.format(landmark)] = [fac_disp_median[i]]
|
||||
fac_features_dict['fac_corr_{}'.format(landmark)] = [fac_corr[i]]
|
||||
|
||||
fac_features_dict[r_config.err_reason] = ['']
|
||||
data = pd.DataFrame.from_dict(fac_features_dict)
|
||||
logger.info('Concluded computing tremor features')
|
||||
|
||||
return data
|
||||
|
||||
except Exception as e:
|
||||
logger.error('Error computing tremor features: {}'.format(str(e)))
|
||||
return empty_frame(landmarks, r_config, str(e))
|
||||
|
||||
def empty_frame(landmarks, r_config, error_reason):
|
||||
fac_features_dict = {}
|
||||
for i, landmark in enumerate(landmarks):
|
||||
raw_variable_map = 'fac_tremor_median_{}'.format(landmark)
|
||||
fac_features_dict[r_config.base_raw['raw_feature'][raw_variable_map]] = [np.nan]
|
||||
|
||||
fac_features_dict['fac_features_mean_{}'.format(landmark)] = [np.nan]
|
||||
fac_features_dict['fac_disp_median_{}'.format(landmark)] = [np.nan]
|
||||
fac_features_dict['fac_corr_{}'.format(landmark)] = [np.nan]
|
||||
|
||||
fac_features_dict[r_config.err_reason] = [error_reason]
|
||||
empty_frame = pd.DataFrame.from_dict(fac_features_dict)
|
||||
return empty_frame
|
||||
|
||||
def fac_tremor_process(video_uri, out_dir, r_config, model_output=False):
|
||||
"""
|
||||
processing input videos
|
||||
|
||||
|
||||
"""
|
||||
# try:
|
||||
|
||||
input_loc, out_loc, fl_name = ut.filter_path(video_uri, out_dir)
|
||||
of_csv_path = glob.glob(join(out_loc, fl_name + '_openface_lmk/*.csv'))
|
||||
|
||||
if len(of_csv_path)>0:
|
||||
of_csv = of_csv_path[0]
|
||||
df_of = pd.read_csv(of_csv, error_bad_lines=False)
|
||||
|
||||
logger.info('Processing Output file {} '.format(os.path.join(out_loc, fl_name)))
|
||||
|
||||
feats = compute_features(of_csv_path , df_of, r_config)
|
||||
|
||||
# if model_output:
|
||||
# result = score(feats, r_config)
|
||||
# feats = pd.concat([feats, result], axis=1)
|
||||
|
||||
ut.save_output(feats, out_loc, fl_name, ft_dir, csv_ext)
|
||||
|
||||
|
||||
|
||||
# except Exception as e:
|
||||
logger.error('Failed to process video file')
|
||||
@@ -0,0 +1,196 @@
|
||||
"""
|
||||
file_name: head_mov
|
||||
project_name: DBM
|
||||
created: 2020-20-07
|
||||
"""
|
||||
|
||||
import os
|
||||
import glob
|
||||
import pandas as pd
|
||||
import numpy as np
|
||||
from scipy.spatial import distance
|
||||
from os.path import join
|
||||
import logging
|
||||
|
||||
from opendbm.dbm_lib.dbm_features.raw_features.util import util as ut
|
||||
|
||||
logging.basicConfig(level=logging.INFO)
|
||||
logger=logging.getLogger()
|
||||
|
||||
h_mov_dir = 'movement/head_movement'
|
||||
h_pose_dir = 'movement/head_pose'
|
||||
h_mov_ext = '_headmov.csv'
|
||||
h_pose_ext = '_headpose.csv'
|
||||
|
||||
def head_pose_dist(of_results):
|
||||
"""
|
||||
Computing head pose distance frame by frame
|
||||
|
||||
Args:
|
||||
of_results: Openface raw out dataframe
|
||||
f_nm_config: Face config file object
|
||||
|
||||
Reutrns:
|
||||
Final head pose distance frame by frame output
|
||||
"""
|
||||
distance_list = []
|
||||
error_list = []
|
||||
for index, row in of_results.iterrows():
|
||||
dst = np.nan
|
||||
|
||||
if index == 0 or float(row[' confidence']) < 0.2: #Threshold < 0.2
|
||||
distance_list.append(dst)
|
||||
|
||||
if float(row[' confidence']) < 0.2:
|
||||
error_list.append('confidence less than 20%')
|
||||
|
||||
else:
|
||||
error_list.append('Pass')
|
||||
continue
|
||||
|
||||
if index > 0:
|
||||
|
||||
point_x = (of_results[' pose_Rx'][index-1], of_results[' pose_Ry'][index-1], of_results[' pose_Rz'][index-1])
|
||||
point_y = (row[' pose_Rx'],row[' pose_Ry'],row[' pose_Rz'])
|
||||
try:
|
||||
dst = distance.euclidean(point_x, point_y)
|
||||
except:
|
||||
pass
|
||||
distance_list.append(abs(dst))
|
||||
error_list.append('Pass')
|
||||
return distance_list, error_list
|
||||
|
||||
def head_pose(of_results,r_config):
|
||||
"""
|
||||
Generating head pose estimation dataframe
|
||||
|
||||
Args:
|
||||
distance_val: distance list
|
||||
f_nm_config: raw variable config file object
|
||||
|
||||
Reutrns:
|
||||
Final head pose estimation dataframe
|
||||
"""
|
||||
pose_dist_list, error_list = head_pose_dist(of_results)
|
||||
of_results.loc[(of_results[' confidence'].astype(float) < 0.2), [' pose_Rx',' pose_Ry',' pose_Rz']] = np.nan
|
||||
pose_of = of_results[[' pose_Rx',' pose_Ry',' pose_Rz']]
|
||||
pose_of.columns = [r_config.mov_Hpose_Pitch, r_config.mov_Hpose_Yaw, r_config.mov_Hpose_Roll]
|
||||
pose_of[r_config.mov_Hpose_Dist] = pose_dist_list
|
||||
pose_of[r_config.err_reason] = error_list
|
||||
|
||||
return pose_of
|
||||
|
||||
def head_motion_df(distance_val, error_list, r_config):
|
||||
"""
|
||||
Generating head movement dataframe
|
||||
|
||||
Args:
|
||||
distance_val: distance list
|
||||
r_config: raw variable config file object
|
||||
|
||||
Reutrns:
|
||||
Final head velocity dataframe
|
||||
"""
|
||||
head_motion = r_config.head_vel
|
||||
df_head_motion = pd.DataFrame(distance_val, columns=[head_motion])
|
||||
df_head_motion['Frames'] = df_head_motion.index
|
||||
|
||||
new_df_intensity = df_head_motion[['Frames', head_motion]]
|
||||
new_df_intensity[r_config.err_reason] = error_list
|
||||
|
||||
return new_df_intensity
|
||||
|
||||
def head_vel(of_results, r_config):
|
||||
"""
|
||||
Computing head velocity frame by frame
|
||||
|
||||
Args:
|
||||
of_results: Openface raw out dataframe
|
||||
r_config: Face config file object
|
||||
|
||||
Reutrns:
|
||||
Final head velocity frame by frame output
|
||||
"""
|
||||
distance_list = []
|
||||
error_list = []
|
||||
for index, row in of_results.iterrows():
|
||||
dst = np.nan
|
||||
|
||||
if index == 0 or float(row[' confidence']) < 0.2: #Threshold < 0.2
|
||||
distance_list.append(dst)
|
||||
|
||||
if float(row[' confidence']) < 0.2:
|
||||
error_list.append('confidence less than 20%')
|
||||
|
||||
else:
|
||||
error_list.append('Pass')
|
||||
continue
|
||||
|
||||
if index > 0:
|
||||
|
||||
point_x = (of_results[' pose_Tx'][index-1], of_results[' pose_Ty'][index-1], of_results[' pose_Tz'][index-1])
|
||||
point_y = (row[' pose_Tx'],row[' pose_Ty'],row[' pose_Tz'])
|
||||
try:
|
||||
dst = distance.euclidean(point_x, point_y)
|
||||
except:
|
||||
pass
|
||||
|
||||
if abs(dst)>200:
|
||||
dst = np.nan
|
||||
error_list.append('Out of range')
|
||||
|
||||
else:
|
||||
error_list.append('Pass')
|
||||
distance_list.append(dst)
|
||||
df_velocity = head_motion_df(distance_list, error_list, r_config)
|
||||
|
||||
return df_velocity
|
||||
|
||||
def calc_head_mov(video_uri, df_of, out_loc, fl_name, r_config):
|
||||
"""
|
||||
Computing head motion and head pose variables
|
||||
Args:
|
||||
df_of: Openface dataframe
|
||||
out_loc: Output path for saving output csv's
|
||||
fl_name: file name for output csv
|
||||
r_config: raw variable config file object
|
||||
|
||||
"""
|
||||
|
||||
col = [' confidence',' pose_Rx',' pose_Ry',' pose_Rz',' pose_Tx', ' pose_Ty', ' pose_Tz']
|
||||
df_of = df_of[col]
|
||||
|
||||
df_hmotion = head_vel(df_of, r_config)
|
||||
df_hmotion['dbm_master_url'] = video_uri
|
||||
|
||||
df_pose = head_pose(df_of, r_config)
|
||||
df_pose['dbm_master_url'] = video_uri
|
||||
|
||||
ut.save_output(df_hmotion, out_loc, fl_name, h_mov_dir, h_mov_ext)
|
||||
ut.save_output(df_pose, out_loc, fl_name, h_pose_dir, h_pose_ext)
|
||||
|
||||
def run_head_movement(video_uri, out_dir, r_config):
|
||||
"""
|
||||
Processing all patient's for getting movement artifacts for cdx_analysis workflow
|
||||
--------------------------------
|
||||
--------------------------------
|
||||
Args:
|
||||
video_uri: video path; input_dir : input directory for video's
|
||||
out_dir: (str) Output directory for processed output; r_config: raw variable config object
|
||||
"""
|
||||
try:
|
||||
|
||||
#filtering path to generate input & output path
|
||||
input_loc, out_loc, fl_name = ut.filter_path(video_uri, out_dir)
|
||||
of_csv_path = glob.glob(join(out_loc, fl_name + '_openface/*.csv'))
|
||||
|
||||
|
||||
if len(of_csv_path)>0:
|
||||
|
||||
of_csv = of_csv_path[0]
|
||||
df_of = pd.read_csv(of_csv, error_bad_lines=False)
|
||||
|
||||
logger.info('Processing Output file {} '.format(os.path.join(out_loc, fl_name)))
|
||||
calc_head_mov(video_uri, df_of, out_loc, fl_name, r_config)
|
||||
except Exception as e:
|
||||
logger.error('Failed to process video file')
|
||||
@@ -0,0 +1,94 @@
|
||||
import pandas as pd
|
||||
import os
|
||||
import glob
|
||||
from os.path import join
|
||||
import parselmouth
|
||||
from parselmouth.praat import call, run_file
|
||||
import numpy as np
|
||||
import librosa
|
||||
import json
|
||||
import re
|
||||
import logging
|
||||
|
||||
from opendbm.dbm_lib.dbm_features.raw_features.util import util as ut
|
||||
from opendbm.dbm_lib.dbm_features.raw_features.movement import DBMLIB_VTREMOR_LIB
|
||||
|
||||
logging.basicConfig(level=logging.INFO)
|
||||
logger=logging.getLogger()
|
||||
|
||||
vt_dir = 'movement/voice_tremor'
|
||||
csv_ext = '_vtremor.csv'
|
||||
|
||||
#Executing praat script using parselmouth function
|
||||
def tremor_praat(snd_file,r_cfg):
|
||||
"""
|
||||
Generating Voice tremor endpoint dataframe
|
||||
Args:
|
||||
snd_file: (.wav) parsed audio file
|
||||
r_cfg: Raw variable configuration file
|
||||
Returns tremor endpoint dataframe
|
||||
"""
|
||||
snd = parselmouth.Sound(snd_file)
|
||||
tremor_var = run_file(snd,DBMLIB_VTREMOR_LIB,capture_output=True)
|
||||
new_tremor_var = re.sub('--undefined--', '0', tremor_var[1])
|
||||
res = json.loads(new_tremor_var)
|
||||
tremor_df = pd.DataFrame(res,index=['0',])
|
||||
tremor_df.columns = [r_cfg.mov_freq_trem_freq,r_cfg.mov_amp_trem_freq,r_cfg.mov_freq_trem_index,
|
||||
r_cfg.mov_amp_trem_index,r_cfg.mov_freq_trem_pindex,r_cfg.mov_amp_trem_pindex]
|
||||
return tremor_df
|
||||
|
||||
def prepare_vtrem_output(audio_file, out_loc, r_config, fl_name):
|
||||
"""
|
||||
Preparing voice tremor matrix
|
||||
Args:
|
||||
audio_file: (.wav) parsed audio file ; r_config: raw config object
|
||||
out_loc: (str) Output directory for csv ; fl_name: file name
|
||||
"""
|
||||
df_tremor = tremor_praat(audio_file, r_config)
|
||||
df_tremor[r_config.err_reason] = 'Pass'# will replace with threshold in future release
|
||||
|
||||
logger.info('Processing Output file {} '.format(os.path.join(out_loc, fl_name)))
|
||||
ut.save_output(df_tremor, out_loc, fl_name, vt_dir, csv_ext)
|
||||
|
||||
def prepare_empty_vt(out_loc, fl_name, r_config, error_txt):
|
||||
|
||||
"""
|
||||
Preparing empty voice tremor matrix
|
||||
"""
|
||||
cols = [r_config.mov_freq_trem_freq, r_config.mov_amp_trem_freq, r_config.mov_freq_trem_index,
|
||||
r_config.mov_amp_trem_index, r_config.mov_freq_trem_pindex, r_config.mov_amp_trem_pindex, r_config.err_reason]
|
||||
|
||||
out_val = [[np.nan, np.nan, np.nan, np.nan, np.nan, np.nan, error_txt]]
|
||||
df_tremor = pd.DataFrame(out_val, columns = cols)
|
||||
|
||||
logger.info('Saving Output file {} '.format(os.path.join(out_loc, fl_name)))
|
||||
ut.save_output(df_tremor, out_loc, fl_name, vt_dir, csv_ext)
|
||||
|
||||
def run_vtremor(video_uri, out_dir, r_config):
|
||||
"""
|
||||
Processing all patient's for fetching Formant freq
|
||||
---------------
|
||||
---------------
|
||||
Args:
|
||||
video_uri: video path; r_config: raw variable config object
|
||||
out_dir: (str) Output directory for processed output
|
||||
"""
|
||||
try:
|
||||
|
||||
input_loc, out_loc, fl_name = ut.filter_path(video_uri, out_dir)
|
||||
aud_filter = glob.glob(join(input_loc, fl_name + '.wav'))
|
||||
if len(aud_filter)>0:
|
||||
|
||||
audio_file = aud_filter[0]
|
||||
aud_dur = librosa.get_duration(filename=audio_file)
|
||||
|
||||
if float(aud_dur) < 0.5:
|
||||
logger.info('Output file {} size is less than 0.5sec'.format(audio_file))
|
||||
|
||||
error_txt = 'error: length less than 0.5 sec'
|
||||
prepare_empty_vt(video_uri, out_loc, fl_name, error_txt)
|
||||
return
|
||||
prepare_vtrem_output(audio_file, out_loc, r_config, fl_name)
|
||||
except Exception as e:
|
||||
logger.error('Failed to compute Voice Tremor {} for {}'.format(e,video_uri))
|
||||
prepare_empty_vt(out_loc, fl_name, r_config, e)
|
||||
@@ -0,0 +1,50 @@
|
||||
"""
|
||||
file_name: speech_features
|
||||
project_name: DBM
|
||||
created: 2020-13-11
|
||||
"""
|
||||
|
||||
import os
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
import glob
|
||||
from os.path import join
|
||||
import logging
|
||||
import shutil
|
||||
|
||||
from opendbm.dbm_lib.dbm_features.raw_features.util import nlp_util as n_util, util as ut
|
||||
|
||||
logging.basicConfig(level=logging.INFO)
|
||||
logger=logging.getLogger()
|
||||
|
||||
speech_dir = 'speech/speech_feature'
|
||||
speech_ext = '_nlp.csv'
|
||||
transcribe_ext = 'speech/deepspeech/*_transcribe.csv'
|
||||
|
||||
def run_speech_feature(video_uri, out_dir, r_config, tran_tog):
|
||||
"""
|
||||
Processing all patient's for fetching nlp features
|
||||
-------------------
|
||||
-------------------
|
||||
Args:
|
||||
video_uri: video path; r_config: raw variable config object
|
||||
out_dir: (str) Output directory for processed output
|
||||
"""
|
||||
try:
|
||||
|
||||
input_loc, out_loc, fl_name = ut.filter_path(video_uri, out_dir)
|
||||
|
||||
transcribe_path = glob.glob(join(out_loc, transcribe_ext))
|
||||
if len(transcribe_path)>0:
|
||||
|
||||
transcribe_df = pd.read_csv(transcribe_path[0])
|
||||
df_speech= n_util.process_speech(transcribe_df, r_config)
|
||||
|
||||
logger.info('Saving Output file {} '.format(out_loc))
|
||||
ut.save_output(df_speech, out_loc, fl_name, speech_dir, speech_ext)
|
||||
|
||||
if (tran_tog == None) or (tran_tog != 'on'):
|
||||
shutil.rmtree(os.path.dirname(transcribe_path[0]))
|
||||
|
||||
except Exception as e:
|
||||
logger.error('Failed to process video file')
|
||||
83
opendbm/dbm_lib/dbm_features/raw_features/nlp/transcribe.py
Normal file
83
opendbm/dbm_lib/dbm_features/raw_features/nlp/transcribe.py
Normal file
@@ -0,0 +1,83 @@
|
||||
"""
|
||||
file_name: transcribe
|
||||
project_name: DBM
|
||||
created: 2020-10-11
|
||||
"""
|
||||
|
||||
import pandas as pd
|
||||
import numpy as np
|
||||
import librosa
|
||||
import glob
|
||||
from os.path import join
|
||||
import logging
|
||||
|
||||
from opendbm.dbm_lib.dbm_features.raw_features.util import nlp_util as n_util, util as ut
|
||||
|
||||
logging.basicConfig(level=logging.INFO)
|
||||
logger=logging.getLogger()
|
||||
|
||||
formant_dir = 'speech/deepspeech'
|
||||
csv_ext = '_transcribe.csv'
|
||||
error_txt = 'error: length less than 0.1'
|
||||
|
||||
def calc_transcribe(video_uri, audio_file, out_loc, fl_name, r_config, deep_path, aud_dur):
|
||||
"""
|
||||
Preparing Formant freq matrix
|
||||
Args:
|
||||
audio_file: (.wav) parsed audio file; fl_name: input file name
|
||||
out_loc: (str) Output directory; r_config: raw variable config
|
||||
"""
|
||||
|
||||
text = n_util.process_deepspeech(audio_file, deep_path)
|
||||
df_formant = pd.DataFrame([text], columns=[r_config.nlp_transcribe])
|
||||
|
||||
df_formant.replace('', np.nan, regex=True,inplace=True)
|
||||
df_formant[r_config.nlp_totalTime] = aud_dur
|
||||
df_formant[r_config.err_reason] = 'Pass'# will replace with threshold in future release
|
||||
df_formant['dbm_master_url'] = video_uri
|
||||
|
||||
logger.info('Saving Output file {} '.format(out_loc))
|
||||
ut.save_output(df_formant, out_loc, fl_name, formant_dir, csv_ext)
|
||||
|
||||
def empty_transcribe(video_uri, out_loc, fl_name, r_config):
|
||||
|
||||
"""
|
||||
Preparing empty formant frequency matrix if something fails
|
||||
"""
|
||||
cols = [r_config.nlp_transcribe, r_config.nlp_totalTime, r_config.err_reason]
|
||||
out_val = [[np.nan, np.nan, error_txt]]
|
||||
df_fm = pd.DataFrame(out_val, columns = cols)
|
||||
df_fm['dbm_master_url'] = video_uri
|
||||
|
||||
logger.info('Saving Output file {} '.format(out_loc))
|
||||
ut.save_output(df_fm, out_loc, fl_name, formant_dir, csv_ext)
|
||||
|
||||
def run_transcribe(video_uri, out_dir, r_config, deep_path):
|
||||
|
||||
"""
|
||||
Processing all patient's for fetching Formant freq
|
||||
---------------
|
||||
---------------
|
||||
Args:
|
||||
video_uri: video path; r_config: raw variable config object
|
||||
out_dir: (str) Output directory for processed output; deep_path: deepspeech build path
|
||||
"""
|
||||
try:
|
||||
|
||||
input_loc, out_loc, fl_name = ut.filter_path(video_uri, out_dir)
|
||||
aud_filter = glob.glob(join(input_loc, fl_name + '.wav'))
|
||||
if len(aud_filter)>0:
|
||||
|
||||
audio_file = aud_filter[0]
|
||||
aud_dur = librosa.get_duration(filename=audio_file)
|
||||
|
||||
if float(aud_dur) < 0.1:
|
||||
logger.info('Output file {} size is less than 0.1 sec'.format(audio_file))
|
||||
|
||||
empty_transcribe(video_uri, out_loc, fl_name, r_config)
|
||||
return
|
||||
|
||||
calc_transcribe(video_uri, audio_file, out_loc, fl_name, r_config, deep_path, aud_dur)
|
||||
except Exception as e:
|
||||
logger.error('Failed to process audio file')
|
||||
|
||||
57
opendbm/dbm_lib/dbm_features/raw_features/util/math_util.py
Normal file
57
opendbm/dbm_lib/dbm_features/raw_features/util/math_util.py
Normal file
@@ -0,0 +1,57 @@
|
||||
"""
|
||||
file_name: facial_tremor
|
||||
project_name: cdx_analysis
|
||||
created: 2019-03-16
|
||||
author: Deshana Desai
|
||||
"""
|
||||
import sys, os, glob, cv2
|
||||
import pandas as pd
|
||||
import numpy as np
|
||||
|
||||
|
||||
def euclidean_distance(point1, point2):
|
||||
"""
|
||||
Compute euclidean distance between points
|
||||
"""
|
||||
|
||||
return np.sqrt((point1[0] - point2[0])**2 + (point1[1] - point2[1])**2)
|
||||
|
||||
|
||||
# def detect_peaks()
|
||||
|
||||
|
||||
def expand_landmarks(landmarks):
|
||||
"""
|
||||
util method to expand landmark list:
|
||||
eg: [1,2] -> [['l1_x', 'l1_y'], ['l2_x', 'l2_y']]
|
||||
"""
|
||||
return [['l{}_x'.format(l), 'l{}_y'.format(l)] for l in landmarks]
|
||||
|
||||
|
||||
|
||||
def calc_displacement_vec(df, landmarks, num_frames):
|
||||
"""
|
||||
Calculates displacement vector frame by frame
|
||||
"""
|
||||
|
||||
landmarks = expand_landmarks(landmarks)
|
||||
|
||||
disp_vec = np.zeros((len(landmarks), num_frames))
|
||||
prev_point = np.zeros((len(landmarks), 2))
|
||||
|
||||
# initialize
|
||||
for j, pair in enumerate(landmarks):
|
||||
first_row = df.iloc[0]
|
||||
prev_point[j] = (first_row[pair[0]], first_row[pair[1]])
|
||||
|
||||
|
||||
for i in range(num_frames):
|
||||
frame_row = df.iloc[i]
|
||||
for j, pair in enumerate(landmarks):
|
||||
x, y = pair[0], pair[1]
|
||||
current = (frame_row[x], frame_row[y])
|
||||
deviation = euclidean_distance( current, prev_point[j])
|
||||
disp_vec[j][i] = deviation
|
||||
prev_point[j] = current
|
||||
|
||||
return disp_vec
|
||||
212
opendbm/dbm_lib/dbm_features/raw_features/util/nlp_util.py
Normal file
212
opendbm/dbm_lib/dbm_features/raw_features/util/nlp_util.py
Normal file
@@ -0,0 +1,212 @@
|
||||
"""
|
||||
file_name: nlp_util
|
||||
project_name: DBM
|
||||
created: 2020-10-11
|
||||
"""
|
||||
|
||||
import subprocess
|
||||
import json
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
import os
|
||||
import logging
|
||||
|
||||
import nltk
|
||||
import re
|
||||
from lexicalrichness import LexicalRichness
|
||||
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
|
||||
|
||||
logging.basicConfig(level=logging.INFO)
|
||||
logger=logging.getLogger()
|
||||
|
||||
#Speech to text using Deepspeech 0.9.1
|
||||
def deepspeech(AUDIO_FILE,deep_path):
|
||||
"""
|
||||
Extracting text from audio using Deep Speech neural network trained model
|
||||
Returns:
|
||||
Text: text which is extracted from audio
|
||||
"""
|
||||
api = 'deepspeech'
|
||||
arg_speech0 = '--model'
|
||||
arg_speech_path0 = os.path.join(deep_path, 'deepspeech-0.9.1-models.pbmm')
|
||||
arg_speech1 = '--scorer'
|
||||
arg_speech_path1 = os.path.join(deep_path, 'deepspeech-0.9.1-models.scorer')
|
||||
arg_audio = "--audio"
|
||||
|
||||
out = subprocess.Popen([api, arg_speech0, arg_speech_path0, arg_speech1, arg_speech_path1, arg_audio, AUDIO_FILE],
|
||||
stdout=subprocess.PIPE,
|
||||
stderr=subprocess.STDOUT)
|
||||
logger.info('Deepspeech output...... {}'.format(out))
|
||||
try:
|
||||
stdout,stderr = out.communicate()
|
||||
except:
|
||||
return "error", "error"
|
||||
#print(stderr)
|
||||
return stdout,stderr
|
||||
|
||||
def deep_speech_output_clean(result):
|
||||
"""
|
||||
Parsing deep speech output(text)
|
||||
Return:
|
||||
Text from speech
|
||||
"""
|
||||
text = ""
|
||||
if len(result)>0:
|
||||
res_split = str(result[0]).split('\\n')
|
||||
|
||||
if len(res_split)>0:
|
||||
for i in range(len(res_split)):
|
||||
if 'Inference took' in res_split[i]:
|
||||
text = res_split[i + 1]
|
||||
return text
|
||||
return text
|
||||
|
||||
def process_deepspeech(audio_file,deep_path):
|
||||
"""
|
||||
Transcribing audio to extract text from speech
|
||||
"""
|
||||
deep_output = deepspeech(audio_file,deep_path)
|
||||
deep_text= deep_speech_output_clean(deep_output)
|
||||
|
||||
return deep_text
|
||||
|
||||
def nltk_download():
|
||||
|
||||
try:
|
||||
nltk.data.find('tokenizers/punkt')
|
||||
|
||||
except LookupError:
|
||||
logger.info('punkt is not available')
|
||||
nltk.download('punkt')
|
||||
|
||||
try:
|
||||
nltk.data.find('averaged_perceptron_tagger')
|
||||
|
||||
except LookupError:
|
||||
logger.info('averaged_perceptron_tagger is not available')
|
||||
nltk.download('averaged_perceptron_tagger')
|
||||
|
||||
def empty_speech(r_config, master_url, error_txt):
|
||||
"""
|
||||
Preparing empty speech matrix with error
|
||||
Args:
|
||||
r_config: raw config file object
|
||||
error_txt: Error message during transcription
|
||||
|
||||
Returns:
|
||||
Empty dataframe for speech features with error
|
||||
"""
|
||||
|
||||
col = [r_config.nlp_numSentences, r_config.nlp_singPronPerAns, r_config.nlp_singPronPerSen, r_config.nlp_pastTensePerAns,
|
||||
r_config.nlp_pastTensePerSen, r_config.nlp_pronounsPerAns, r_config.nlp_pronounsPerSen, r_config.nlp_verbsPerAns,
|
||||
r_config.nlp_verbsPerSen, r_config.nlp_adjectivesPerAns, r_config.nlp_adjectivesPerSen, r_config.nlp_nounsPerAns,
|
||||
r_config.nlp_nounsPerSen, r_config.nlp_sentiment_mean, r_config.nlp_mattr, r_config.nlp_wordsPerMin,
|
||||
r_config.nlp_totalTime, r_config.err_reason]
|
||||
|
||||
df_speech = pd.DataFrame([[np.nan] * len(col) + [error_txt]], columns = col)
|
||||
df_speech['dbm_master_url'] = master_url
|
||||
|
||||
return df_speech
|
||||
|
||||
def divide_var(speech_var1, spech_var2):
|
||||
"""
|
||||
divide variables
|
||||
"""
|
||||
speech_var = np.nan
|
||||
if spech_var2!=0:
|
||||
speech_var = speech_var1/spech_var2
|
||||
return speech_var
|
||||
|
||||
def process_speech(transcribe_df,r_config):
|
||||
"""
|
||||
Preparing speech features
|
||||
Args:
|
||||
transcribe_df: Transcribed dataframe
|
||||
r_config: raw config file object
|
||||
Returns:
|
||||
Dataframe for speech features
|
||||
"""
|
||||
transcribe_df = transcribe_df.replace(np.nan, '', regex=True)
|
||||
err_transcribe = transcribe_df[r_config.err_reason].iloc[0]
|
||||
transcribe = transcribe_df[r_config.nlp_transcribe].iloc[0]
|
||||
total_time = transcribe_df[r_config.nlp_totalTime].iloc[0]
|
||||
master_url = transcribe_df['dbm_master_url'].iloc[0]
|
||||
|
||||
#clean transcribe
|
||||
transcribe = transcribe.replace(",", "")
|
||||
transcribe = " ".join(re.findall(r"[\w']+|[.!?]", transcribe))
|
||||
|
||||
if err_transcribe != 'Pass':
|
||||
df_speech = empty_speech(r_config, master_url, error_txt)
|
||||
|
||||
return df_speech
|
||||
|
||||
speech_dict = {}
|
||||
nltk_download()
|
||||
|
||||
sentences = nltk.tokenize.sent_tokenize(transcribe)
|
||||
words_all = nltk.tokenize.word_tokenize(transcribe)
|
||||
num_sentences = len(sentences)
|
||||
|
||||
speech_dict[r_config.nlp_numSentences] = num_sentences
|
||||
|
||||
#nlp_singPron
|
||||
i_s = transcribe.count('I')
|
||||
me_s = transcribe.count('me')
|
||||
my_s = transcribe.count('my')
|
||||
sing_count = i_s + me_s + my_s
|
||||
|
||||
speech_dict[r_config.nlp_singPronPerAns] = sing_count if len(words_all)>0 else np.nan
|
||||
speech_dict[r_config.nlp_singPronPerSen] = divide_var(speech_dict[r_config.nlp_singPronPerAns], num_sentences)
|
||||
|
||||
tagged = nltk.pos_tag(transcribe.split())
|
||||
tagged_df = pd.DataFrame(tagged, columns=['word', 'pos_tag'])
|
||||
|
||||
#Past tense per answer
|
||||
all_POSs = tagged_df['pos_tag'].tolist()
|
||||
speech_dict[r_config.nlp_pastTensePerAns] = all_POSs.count('VBD') if len(words_all)>0 else np.nan
|
||||
speech_dict[r_config.nlp_pastTensePerSen] = divide_var(speech_dict[r_config.nlp_pastTensePerAns], num_sentences)
|
||||
|
||||
#Pronoun per answer
|
||||
pronounsPerAns = all_POSs.count('PRP') + all_POSs.count('PRP$')
|
||||
speech_dict[r_config.nlp_pronounsPerAns] = pronounsPerAns if len(words_all)>0 else np.nan
|
||||
speech_dict[r_config.nlp_pronounsPerSen] = divide_var(speech_dict[r_config.nlp_pronounsPerAns], num_sentences)
|
||||
|
||||
#Verb per answer
|
||||
verbPerAns = all_POSs.count('VB') + all_POSs.count('VBD') + all_POSs.count('VBG') \
|
||||
+ all_POSs.count('VBN') + all_POSs.count('VBP') + all_POSs.count('VBZ')
|
||||
speech_dict[r_config.nlp_verbsPerAns] = verbPerAns if len(words_all) > 0 else np.nan
|
||||
speech_dict[r_config.nlp_verbsPerSen] = divide_var(speech_dict[r_config.nlp_verbsPerAns], num_sentences)
|
||||
|
||||
#Adjective per answer
|
||||
adjectivesAns = all_POSs.count('JJ') + all_POSs.count('JJR') + all_POSs.count('JJS')
|
||||
speech_dict[r_config.nlp_adjectivesPerAns] = adjectivesAns if len(words_all) > 0 else np.nan
|
||||
speech_dict[r_config.nlp_adjectivesPerSen] = divide_var(speech_dict[r_config.nlp_adjectivesPerAns], num_sentences)
|
||||
|
||||
#Noun per answer
|
||||
nounsAns = all_POSs.count('NN') + all_POSs.count('NNP') + all_POSs.count('NNS')
|
||||
speech_dict[r_config.nlp_nounsPerAns] = nounsAns if len(words_all) > 0 else np.nan
|
||||
speech_dict[r_config.nlp_nounsPerSen] = divide_var(speech_dict[r_config.nlp_nounsPerAns], num_sentences)
|
||||
|
||||
#Sentiment analysis
|
||||
vader = SentimentIntensityAnalyzer()
|
||||
sentence_valences = []
|
||||
|
||||
for s in sentences:
|
||||
sentiment_dict = vader.polarity_scores(s)
|
||||
sentence_valences.append(sentiment_dict['compound'])
|
||||
|
||||
speech_dict[r_config.nlp_sentiment_mean] = np.mean(sentence_valences) if len(sentence_valences) > 0 else np.nan
|
||||
non_punc = list(value for value in words_all if value not in ['.','!','?'])
|
||||
|
||||
non_punc_as_str = " ".join(str(non_punc))
|
||||
lex = LexicalRichness(non_punc_as_str)
|
||||
speech_dict[r_config.nlp_mattr] = lex.mattr(window_size=lex.words) if lex.words > 0 else np.nan
|
||||
|
||||
#Number of words per minute
|
||||
speech_dict[r_config.nlp_wordsPerMin] = divide_var(len(non_punc), total_time)*60
|
||||
speech_dict[r_config.nlp_totalTime] = total_time
|
||||
speech_dict['dbm_master_url'] = master_url
|
||||
|
||||
df_speech = pd.DataFrame([speech_dict])
|
||||
return df_speech
|
||||
112
opendbm/dbm_lib/dbm_features/raw_features/util/util.py
Normal file
112
opendbm/dbm_lib/dbm_features/raw_features/util/util.py
Normal file
@@ -0,0 +1,112 @@
|
||||
"""
|
||||
file_name: util
|
||||
project_name: DBM
|
||||
created: 2020-20-07
|
||||
"""
|
||||
|
||||
import os
|
||||
import glob
|
||||
import numpy as np
|
||||
import subprocess
|
||||
|
||||
def filter_path(video_url, out_dir):
|
||||
|
||||
"""
|
||||
Filtering video uri path to prepare input and ouptut location
|
||||
|
||||
Args:
|
||||
video_url: S3 bucket path for video
|
||||
out_dir: Output directory path
|
||||
|
||||
"""
|
||||
|
||||
fl_name,_ = os.path.splitext(os.path.basename(video_url))
|
||||
input_loc = os.path.dirname(video_url)
|
||||
out_loc = os.path.join(out_dir, fl_name)
|
||||
return input_loc, out_loc, fl_name
|
||||
|
||||
def save_output(df, out_loc, fl_name, f_dir, f_ext):
|
||||
"""
|
||||
creating output directory for Audio features
|
||||
Args:
|
||||
df: (dataframe) feature dataframe[ex: Formant freq, pitch]
|
||||
out_loc: (dir) Output location where we want to save raw output
|
||||
fl_name: file name
|
||||
f_dir: directory name for a feature
|
||||
f_ext: extension for a feature [ex: '_pose.csv']
|
||||
"""
|
||||
full_f_name = fl_name + f_ext
|
||||
dir_path = os.path.join(out_loc, f_dir)
|
||||
|
||||
if not os.path.exists(dir_path):
|
||||
os.makedirs(dir_path)
|
||||
|
||||
sav_path = os.path.join(dir_path,full_f_name)
|
||||
df.to_csv(sav_path, index=False)
|
||||
|
||||
def audio_process(base_dir,video_url):
|
||||
"""
|
||||
Parsing cleaned audio files(Audio files without IMA voice)
|
||||
Args:
|
||||
base_dir: Base path for raw data
|
||||
video_url: Raw video file path
|
||||
"""
|
||||
new_video_url = base_dir+'/'.join(video_url[2:])
|
||||
split_val = new_video_url.split('/')
|
||||
wav_path = '/'.join(split_val[0:len(split_val)-1])
|
||||
audio_split_check = glob.glob(wav_path + '/*_split.wav')
|
||||
return audio_split_check
|
||||
|
||||
def compute_open_face_features(input_filepath,
|
||||
output_directory,
|
||||
open_face_executable,
|
||||
au_static=False,
|
||||
tracked_visualization=False,
|
||||
clobber=False,
|
||||
verbose=True):
|
||||
"""
|
||||
Runs OpenFace on an input video.
|
||||
See https://github.com/TadasBaltrusaitis/OpenFace/wiki/Command-line-arguments
|
||||
Args:
|
||||
input_filepath:
|
||||
output_directory:
|
||||
au_static:
|
||||
tracked_visualization:
|
||||
open_face_executable:
|
||||
clobber: (bool) if True existing files will be overwritten
|
||||
verbose:
|
||||
Returns:
|
||||
(str) path to output csv file
|
||||
Raises:
|
||||
IOError if OpenFace executable is missing
|
||||
"""
|
||||
|
||||
if not os.path.isfile(open_face_executable):
|
||||
raise IOError("OpenFace executable {} could not be found.".format(open_face_executable))
|
||||
|
||||
bn, _ = os.path.splitext(os.path.basename(input_filepath))
|
||||
if not output_directory:
|
||||
output_directory = os.path.join(os.path.dirname(input_filepath), bn + '_openface')
|
||||
|
||||
output_csv = os.path.join(output_directory, bn + '.csv')
|
||||
if not os.path.isfile(output_csv) or clobber:
|
||||
call = [open_face_executable, ]
|
||||
if au_static:
|
||||
call += ['-au_static', ]
|
||||
|
||||
if tracked_visualization:
|
||||
call += ['-tracked', ]
|
||||
|
||||
call += ['-q', '-2Dfp', '-3Dfp', '-pdmparams', '-pose', '-aus', '-gaze']
|
||||
call += ['-f', input_filepath, '-out_dir', output_directory]
|
||||
|
||||
if verbose:
|
||||
print('Computing OpenFace features {} from video file'.format(input_filepath))
|
||||
subprocess.check_output(call)
|
||||
if verbose:
|
||||
print('OpenFace features saved to {}'.format(output_directory))
|
||||
else:
|
||||
if verbose:
|
||||
print('Output file {} already exists'.format(output_csv))
|
||||
|
||||
return os.path.join(output_directory, bn + '.csv')
|
||||
221
opendbm/dbm_lib/dbm_features/raw_features/util/vad_utilities.py
Normal file
221
opendbm/dbm_lib/dbm_features/raw_features/util/vad_utilities.py
Normal file
@@ -0,0 +1,221 @@
|
||||
"""
|
||||
file_name: vad_utilities
|
||||
project_name: DBM
|
||||
created: 2020-20-07
|
||||
"""
|
||||
|
||||
# code from https://github.com/wiseman/py-webrtcvad/blob/master/example.py
|
||||
import collections
|
||||
import contextlib
|
||||
import sys
|
||||
import wave
|
||||
|
||||
def read_wave(path):
|
||||
"""Reads a .wav file.
|
||||
Takes the path, and returns (PCM audio data, sample rate).
|
||||
"""
|
||||
with contextlib.closing(wave.open(path, 'rb')) as wf:
|
||||
num_channels = wf.getnchannels()
|
||||
assert num_channels == 1
|
||||
sample_width = wf.getsampwidth()
|
||||
assert sample_width == 2
|
||||
sample_rate = wf.getframerate()
|
||||
assert sample_rate in (8000, 16000, 32000, 48000)
|
||||
pcm_data = wf.readframes(wf.getnframes())
|
||||
return pcm_data, sample_rate
|
||||
|
||||
|
||||
class Frame(object):
|
||||
"""Represents a "frame" of audio data."""
|
||||
def __init__(self, bytes, timestamp, duration):
|
||||
self.bytes = bytes
|
||||
self.timestamp = timestamp
|
||||
self.duration = duration
|
||||
|
||||
def frame_generator(frame_duration_ms, audio, sample_rate):
|
||||
"""Generates audio frames from PCM audio data.
|
||||
Takes the desired frame duration in milliseconds, the PCM data, and
|
||||
the sample rate.
|
||||
Yields Frames of the requested duration.
|
||||
"""
|
||||
n = int(sample_rate * (frame_duration_ms / 1000.0) * 2)
|
||||
offset = 0
|
||||
timestamp = 0.0
|
||||
duration = (float(n) / sample_rate) / 2.0
|
||||
while offset + n < len(audio):
|
||||
yield Frame(audio[offset:offset + n], timestamp, duration)
|
||||
timestamp += duration
|
||||
offset += n
|
||||
|
||||
|
||||
def vad_collector(sample_rate, frame_duration_ms,
|
||||
padding_duration_ms, vad, frames):
|
||||
"""Filters out non-voiced audio frames.
|
||||
Given a webrtcvad.Vad and a source of audio frames, yields only
|
||||
the voiced audio.
|
||||
Uses a padded, sliding window algorithm over the audio frames.
|
||||
When more than 90% of the frames in the window are voiced (as
|
||||
reported by the VAD), the collector triggers and begins yielding
|
||||
audio frames. Then the collector waits until 90% of the frames in
|
||||
the window are unvoiced to detrigger.
|
||||
The window is padded at the front and back to provide a small
|
||||
amount of silence or the beginnings/endings of speech around the
|
||||
voiced frames.
|
||||
Arguments:
|
||||
sample_rate - The audio sample rate, in Hz.
|
||||
frame_duration_ms - The frame duration in milliseconds.
|
||||
padding_duration_ms - The amount to pad the window, in milliseconds.
|
||||
vad - An instance of webrtcvad.Vad.
|
||||
frames - a source of audio frames (sequence or generator).
|
||||
Returns: A generator that yields PCM audio data.
|
||||
"""
|
||||
num_padding_frames = int(padding_duration_ms / frame_duration_ms)
|
||||
# We use a deque for our sliding window/ring buffer.
|
||||
ring_buffer = collections.deque(maxlen=num_padding_frames)
|
||||
# We have two states: TRIGGERED and NOTTRIGGERED. We start in the
|
||||
# NOTTRIGGERED state.
|
||||
triggered = False
|
||||
|
||||
voiced_frames = []
|
||||
for frame in frames:
|
||||
is_speech = vad.is_speech(frame.bytes, sample_rate)
|
||||
|
||||
sys.stdout.write('1' if is_speech else '0')
|
||||
if not triggered:
|
||||
ring_buffer.append((frame, is_speech))
|
||||
num_voiced = len([f for f, speech in ring_buffer if speech])
|
||||
# If we're NOTTRIGGERED and more than 90% of the frames in
|
||||
# the ring buffer are voiced frames, then enter the
|
||||
# TRIGGERED state.
|
||||
if num_voiced > 0.9 * ring_buffer.maxlen:
|
||||
triggered = True
|
||||
sys.stdout.write('+(%s)' % (ring_buffer[0][0].timestamp,))
|
||||
# We want to yield all the audio we see from now until
|
||||
# we are NOTTRIGGERED, but we have to start with the
|
||||
# audio that's already in the ring buffer.
|
||||
for f, s in ring_buffer:
|
||||
voiced_frames.append(f)
|
||||
ring_buffer.clear()
|
||||
else:
|
||||
# We're in the TRIGGERED state, so collect the audio data
|
||||
# and add it to the ring buffer.
|
||||
voiced_frames.append(frame)
|
||||
ring_buffer.append((frame, is_speech))
|
||||
num_unvoiced = len([f for f, speech in ring_buffer if not speech])
|
||||
# If more than 90% of the frames in the ring buffer are
|
||||
# unvoiced, then enter NOTTRIGGERED and yield whatever
|
||||
# audio we've collected.
|
||||
if num_unvoiced > 0.9 * ring_buffer.maxlen:
|
||||
sys.stdout.write('-(%s)' % (frame.timestamp + frame.duration))
|
||||
triggered = False
|
||||
yield b''.join([f.bytes for f in voiced_frames])
|
||||
ring_buffer.clear()
|
||||
voiced_frames = []
|
||||
if triggered: # BT if were in triggered state at end of signal, set output time
|
||||
sys.stdout.write('-(%s)' % (frame.timestamp + frame.duration))
|
||||
sys.stdout.write('\n')
|
||||
# If we have any leftover voiced audio when we run out of input,
|
||||
# yield it.
|
||||
if voiced_frames:
|
||||
yield b''.join([f.bytes for f in voiced_frames])
|
||||
|
||||
|
||||
|
||||
def vad_get_segment_times(sample_rate, frame_duration_ms,
|
||||
padding_duration_ms, vad, frames):
|
||||
"""Filters out non-voiced audio frames.
|
||||
BT: based on vad_collector, but returns start and end times for voiced segs
|
||||
|
||||
Given a webrtcvad.Vad and a source of audio frames, yields only
|
||||
the voiced audio.
|
||||
Uses a padded, sliding window algorithm over the audio frames.
|
||||
When more than 90% of the frames in the window are voiced (as
|
||||
reported by the VAD), the collector triggers and begins yielding
|
||||
audio frames. Then the collector waits until 90% of the frames in
|
||||
the window are unvoiced to detrigger.
|
||||
The window is padded at the front and back to provide a small
|
||||
amount of silence or the beginnings/endings of speech around the
|
||||
voiced frames.
|
||||
Arguments:
|
||||
sample_rate - The audio sample rate, in Hz.
|
||||
frame_duration_ms - The frame duration in milliseconds.
|
||||
padding_duration_ms - The amount to pad the window, in milliseconds.
|
||||
vad - An instance of webrtcvad.Vad.
|
||||
frames - a source of audio frames (sequence or generator).
|
||||
Returns: lists of start and end segments
|
||||
"""
|
||||
|
||||
num_padding_frames = int(padding_duration_ms / frame_duration_ms)
|
||||
# We use a deque for our sliding window/ring buffer.
|
||||
ring_buffer = collections.deque(maxlen=num_padding_frames)
|
||||
# We have two states: TRIGGERED and NOTTRIGGERED. We start in the
|
||||
# NOTTRIGGERED state.
|
||||
triggered = False
|
||||
|
||||
start_times = []
|
||||
end_times = []
|
||||
|
||||
for frame in frames:
|
||||
is_speech = vad.is_speech(frame.bytes, sample_rate)
|
||||
|
||||
#sys.stdout.write('1' if is_speech else '0')
|
||||
if not triggered:
|
||||
ring_buffer.append((frame, is_speech))
|
||||
num_voiced = len([f for f, speech in ring_buffer if speech])
|
||||
# If we're NOTTRIGGERED and more than 90% of the frames in
|
||||
# the ring buffer are voiced frames, then enter the
|
||||
# TRIGGERED state.
|
||||
if num_voiced > 0.9 * ring_buffer.maxlen:
|
||||
triggered = True
|
||||
#sys.stdout.write('+(%s)' % (ring_buffer[0][0].timestamp,))
|
||||
start_times.append(ring_buffer[0][0].timestamp) # BT
|
||||
ring_buffer.clear()
|
||||
else:
|
||||
# We're in the TRIGGERED state, so collect the audio data
|
||||
# and add it to the ring buffer.
|
||||
ring_buffer.append((frame, is_speech))
|
||||
num_unvoiced = len([f for f, speech in ring_buffer if not speech])
|
||||
# If more than 90% of the frames in the ring buffer are
|
||||
# unvoiced, then enter NOTTRIGGERED and yield whatever
|
||||
# audio we've collected.
|
||||
if num_unvoiced > 0.9 * ring_buffer.maxlen:
|
||||
#sys.stdout.write('-(%s)' % (frame.timestamp + frame.duration))
|
||||
end_times.append(ring_buffer[0][0].timestamp + frame.duration) # BT
|
||||
triggered = False
|
||||
|
||||
if triggered: # BT if were in triggered state at end of signal, set output time
|
||||
#sys.stdout.write('-(%s)' % (frame.timestamp + frame.duration))
|
||||
if len(ring_buffer)>0:
|
||||
end_times.append(ring_buffer[0][0].timestamp ) # BT
|
||||
else:
|
||||
# only get here in very rare case that we triggered on 2nd-to-last frame
|
||||
end_times.append(frame.timestamp + frame.duration)
|
||||
#sys.stdout.write('\n')
|
||||
|
||||
return(start_times, end_times)
|
||||
|
||||
|
||||
def filter_seg_times(seg_starts, seg_ends, pad_at_start = 0.5, len_to_keep=2.5 ):
|
||||
"""
|
||||
do some filtering on the segments found to select part for analysis
|
||||
rule: find the first segment that is at least (pad_at_start+len_to_keep sec long.
|
||||
Discard the firstpad_at_start sec, keep the next len_to_keep sec
|
||||
if no such segments, then return empty list
|
||||
|
||||
returns sel_start, sel_end, sel_end_longer
|
||||
"""
|
||||
sel_start = []
|
||||
sel_end = []
|
||||
sel_end_longer = []
|
||||
|
||||
not_found = True
|
||||
for iseg in range(len(seg_starts)):
|
||||
seg_dur = seg_ends[iseg]-seg_starts[iseg]
|
||||
if (not_found & (seg_dur > (pad_at_start + len_to_keep))):
|
||||
t_start = seg_starts[iseg] + pad_at_start
|
||||
sel_start.append(t_start)
|
||||
sel_end.append(t_start + len_to_keep)
|
||||
sel_end_longer.append(max(t_start + len_to_keep, seg_ends[iseg]-pad_at_start))
|
||||
not_found = False
|
||||
|
||||
return sel_start, sel_end, sel_end_longer
|
||||
191
opendbm/dbm_lib/dbm_features/raw_features/util/video_util.py
Normal file
191
opendbm/dbm_lib/dbm_features/raw_features/util/video_util.py
Normal file
@@ -0,0 +1,191 @@
|
||||
"""
|
||||
file_name: video_util
|
||||
project_name: DBM
|
||||
created: 2020-20-07
|
||||
"""
|
||||
|
||||
import pandas as pd
|
||||
import numpy as np
|
||||
import glob
|
||||
from opendbm.dbm_lib.dbm_features.raw_features.util import util as ut
|
||||
|
||||
|
||||
def smooth(x,window_len=11,window='hanning'):
|
||||
"""smooth the data using a window with requested size.
|
||||
|
||||
This method is based on the convolution of a scaled window with the signal.
|
||||
The signal is prepared by introducing reflected copies of the signal
|
||||
(with the window size) in both ends so that transient parts are minimized
|
||||
in the begining and end part of the output signal.
|
||||
|
||||
input:
|
||||
x: the input signal
|
||||
window_len: the dimension of the smoothing window; should be an odd integer
|
||||
window: the type of window from 'flat', 'hanning', 'hamming', 'bartlett', 'blackman'
|
||||
flat window will produce a moving average smoothing.
|
||||
|
||||
output:
|
||||
the smoothed signal
|
||||
|
||||
example:
|
||||
|
||||
t=linspace(-2,2,0.1)
|
||||
x=sin(t)+randn(len(t))*0.1
|
||||
y=smooth(x)
|
||||
|
||||
see also:
|
||||
|
||||
numpy.hanning, numpy.hamming, numpy.bartlett, numpy.blackman, numpy.convolve
|
||||
scipy.signal.lfilter
|
||||
|
||||
TODO: the window parameter could be the window itself if an array instead of a string
|
||||
NOTE: length(output) != length(input), to correct this: return y[(window_len/2-1):-(window_len/2)] instead of just y.
|
||||
"""
|
||||
if x.ndim != 1:
|
||||
raise (ValueError, "smooth only accepts 1 dimension arrays.")
|
||||
if x.size < window_len:
|
||||
raise (ValueError, "Input vector needs to be bigger than window size.")
|
||||
if window_len<3:
|
||||
return x
|
||||
if not window in ['flat', 'hanning', 'hamming', 'bartlett', 'blackman']:
|
||||
raise (ValueError, "Window is on of 'flat', 'hanning', 'hamming', 'bartlett', 'blackman'")
|
||||
s=np.r_[x[window_len-1:0:-1],x,x[-2:-window_len-1:-1]]
|
||||
#print(len(s))
|
||||
if window == 'flat': #moving average
|
||||
w=np.ones(window_len,'d')
|
||||
else:
|
||||
w=eval('np.'+window+'(window_len)')
|
||||
y=np.convolve(w/w.sum(),s,mode='valid')
|
||||
return y[int(window_len/2):-int(window_len/2)]
|
||||
|
||||
def filter_by_confidence_and_thresh(x, fea, thresh):
|
||||
if x['s_confidence'] > 0.2 and np.fabs(x[fea]) < thresh:
|
||||
return x[fea]
|
||||
else:
|
||||
return np.NaN
|
||||
|
||||
def add_au_emotion(x, emotion,emotion_type,exp_type):
|
||||
"""
|
||||
computing individula emotion expressivity matrix
|
||||
Args:
|
||||
emotion: Action Unit
|
||||
"""
|
||||
error_reason = 'Pass'
|
||||
if x['s_confidence'] > 0.8: #if using smooth, no need for 'success'
|
||||
sum_r = 0
|
||||
cnt = 0
|
||||
for au in emotion:
|
||||
au_c_label = " AU{:02d}_c".format(au)
|
||||
au_r_label = " AU{:02d}_r".format(au)
|
||||
if x[au_c_label]==1 and (not np.isnan(x[au_r_label])): #there are data with face in, but au_c=0
|
||||
sum_r += x[au_r_label]
|
||||
cnt += 6
|
||||
if exp_type=='full' and x[au_c_label]==0: #Logic to compute emotion expressivity when all AU's are present
|
||||
cnt = 0
|
||||
break
|
||||
if cnt > 0:
|
||||
sum_r /= cnt
|
||||
else:
|
||||
sum_r = 0
|
||||
v_emo = x[emotion_type] + sum_r
|
||||
else:
|
||||
v_emo = np.NaN
|
||||
error_reason = 'confidence less than 80%'
|
||||
|
||||
return v_emo, error_reason
|
||||
|
||||
def add_au_occ(x, emotion,emotion_type):
|
||||
"""
|
||||
computing individula emotion presence
|
||||
Args:
|
||||
emotion: Action Unit
|
||||
"""
|
||||
au_pres = []
|
||||
em_pres = 0
|
||||
error_reason = 'Pass'
|
||||
if x['s_confidence'] > 0.8: #if using smooth, no need for 'success'
|
||||
for au in emotion:
|
||||
au_c_label = " AU{:02d}_c".format(au)
|
||||
if x[au_c_label]==1: #there are data with face in, but au_c=0
|
||||
au_pres.append(1)
|
||||
|
||||
if len(au_pres) == len(emotion):
|
||||
em_pres = 1
|
||||
else:
|
||||
em_pres = np.NaN
|
||||
error_reason = 'confidence less than 80%'
|
||||
return em_pres, error_reason
|
||||
|
||||
def emotion_exp(em_au,of,em_col,err_col):
|
||||
"""
|
||||
Computing individual emotion expressivity and adding it to dataframe
|
||||
"""
|
||||
for emotion in em_au:
|
||||
of[[em_col[0],err_col]]=of.apply(add_au_emotion, args=(emotion,em_col[0],'partial',), axis=1, result_type='expand')
|
||||
of[[em_col[1],err_col]]=of.apply(add_au_emotion, args=(emotion,em_col[1],'full',), axis=1, result_type='expand')
|
||||
|
||||
def emotion_pres(em_au,of,em_col,err_col):
|
||||
"""
|
||||
Computing individual emotion expressivity and adding it to dataframe
|
||||
"""
|
||||
for emotion in em_au:
|
||||
of[[em_col,err_col]]=of.apply(add_au_occ, args=(emotion,em_col,), axis=1, result_type='expand')
|
||||
|
||||
def calc_of_for_video(of,face_cfg,fe_cfg):
|
||||
"""
|
||||
Creating dataframe for emotion expressivity
|
||||
"""
|
||||
new_cols = [fe_cfg.hap_exp,fe_cfg.sad_exp,fe_cfg.sur_exp,fe_cfg.fea_exp,fe_cfg.ang_exp,fe_cfg.dis_exp,fe_cfg.con_exp,
|
||||
fe_cfg.pai_exp,fe_cfg.neg_exp,fe_cfg.pos_exp,fe_cfg.neu_exp,fe_cfg.com_lower_exp,fe_cfg.com_upper_exp,
|
||||
fe_cfg.cai_exp,fe_cfg.com_exp,fe_cfg.happ_occ,fe_cfg.sad_occ,fe_cfg.sur_occ,fe_cfg.fea_occ,fe_cfg.ang_occ,
|
||||
fe_cfg.dis_occ,fe_cfg.con_occ,fe_cfg.hap_exp_full,fe_cfg.sad_exp_full,fe_cfg.sur_exp_full,fe_cfg.fea_exp_full,
|
||||
fe_cfg.ang_exp_full,fe_cfg.dis_exp_full,fe_cfg.con_exp_full,fe_cfg.pai_exp_full,fe_cfg.neg_exp_full,
|
||||
fe_cfg.pos_exp_full,fe_cfg.neu_exp_full,fe_cfg.cai_exp_full,fe_cfg.com_lower_exp_full,fe_cfg.com_upper_exp_full,
|
||||
fe_cfg.com_exp_full]
|
||||
of[new_cols] = pd.DataFrame([[0] * len(new_cols)], index=of.index)
|
||||
of[fe_cfg.err_reason] = 'Pass'
|
||||
|
||||
#Composite happiness expressivity
|
||||
emotion_exp(face_cfg.happiness,of,[fe_cfg.hap_exp,fe_cfg.hap_exp_full],fe_cfg.err_reason)
|
||||
#Composite sadness expressivity
|
||||
emotion_exp(face_cfg.sadness,of,[fe_cfg.sad_exp,fe_cfg.sad_exp_full],fe_cfg.err_reason)
|
||||
#Composite surprise expressivity
|
||||
emotion_exp(face_cfg.surprise,of,[fe_cfg.sur_exp,fe_cfg.sur_exp_full],fe_cfg.err_reason)
|
||||
#Composite fear expressivity
|
||||
emotion_exp(face_cfg.fear,of,[fe_cfg.fea_exp,fe_cfg.fea_exp_full],fe_cfg.err_reason)
|
||||
#Composite anger expressivity
|
||||
emotion_exp(face_cfg.anger,of,[fe_cfg.ang_exp,fe_cfg.ang_exp_full],fe_cfg.err_reason)
|
||||
#Composite disgust expressivity
|
||||
emotion_exp(face_cfg.disgust,of,[fe_cfg.dis_exp,fe_cfg.dis_exp_full],fe_cfg.err_reason)
|
||||
#Composite contempt expressivity
|
||||
emotion_exp(face_cfg.contempt,of,[fe_cfg.con_exp,fe_cfg.con_exp_full],fe_cfg.err_reason)
|
||||
#Composite Negative Expressivity
|
||||
emotion_exp(face_cfg.NEG_ACTION_UNITS,of,[fe_cfg.neg_exp,fe_cfg.neg_exp_full],fe_cfg.err_reason)
|
||||
#Composite Positive Expressivity
|
||||
emotion_exp(face_cfg.POS_ACTION_UNITS,of,[fe_cfg.pos_exp,fe_cfg.pos_exp_full],fe_cfg.err_reason)
|
||||
#Composite Neutral Expressivity
|
||||
emotion_exp(face_cfg.NET_ACTION_UNITS,of,[fe_cfg.neu_exp,fe_cfg.neu_exp_full],fe_cfg.err_reason)
|
||||
#Composite Activation Expressivity
|
||||
emotion_exp(face_cfg.cai,of,[fe_cfg.cai_exp,fe_cfg.cai_exp_full],fe_cfg.err_reason)
|
||||
#Composite Expressivity
|
||||
emotion_exp(face_cfg.ACTION_UNITS,of,[fe_cfg.com_exp,fe_cfg.com_exp_full],fe_cfg.err_reason)
|
||||
#Composite lower face expressivity
|
||||
emotion_exp(face_cfg.LOWER_ACTION_UNITS,of,[fe_cfg.com_lower_exp,fe_cfg.com_lower_exp_full],fe_cfg.err_reason)
|
||||
#Composite upper face Expressivity
|
||||
emotion_exp(face_cfg.UPPER_ACTION_UNITS,of,[fe_cfg.com_upper_exp,fe_cfg.com_upper_exp_full],fe_cfg.err_reason)
|
||||
#Composite pain expressivity
|
||||
emotion_exp(face_cfg.pain,of,[fe_cfg.pai_exp,fe_cfg.pai_exp_full],fe_cfg.err_reason)
|
||||
#AU happiness presence
|
||||
emotion_pres(face_cfg.happiness,of,fe_cfg.happ_occ,fe_cfg.err_reason)
|
||||
#AU Sad presence
|
||||
emotion_pres(face_cfg.sadness,of,fe_cfg.sad_occ,fe_cfg.err_reason)
|
||||
#AU Surprise presence
|
||||
emotion_pres(face_cfg.surprise,of,fe_cfg.sur_occ,fe_cfg.err_reason)
|
||||
#AU fear presence
|
||||
emotion_pres(face_cfg.fear,of,fe_cfg.fea_occ,fe_cfg.err_reason)
|
||||
#AU anger presence
|
||||
emotion_pres(face_cfg.anger,of,fe_cfg.ang_occ,fe_cfg.err_reason)
|
||||
#AU disgust presence
|
||||
emotion_pres(face_cfg.disgust,of,fe_cfg.dis_occ,fe_cfg.err_reason)
|
||||
#AU contempt presence
|
||||
emotion_pres(face_cfg.contempt,of,fe_cfg.con_occ,fe_cfg.err_reason)
|
||||
14
opendbm/dbm_lib/dbm_features/raw_features/video/__init__.py
Normal file
14
opendbm/dbm_lib/dbm_features/raw_features/video/__init__.py
Normal file
@@ -0,0 +1,14 @@
|
||||
"""
|
||||
file_name: __init__
|
||||
project_name: DBM
|
||||
created: 2020-20-07
|
||||
"""
|
||||
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
|
||||
import os
|
||||
|
||||
DBMLIB_PATH = os.path.dirname(__file__)
|
||||
DBMLIB_FACE_CONFIG = os.path.abspath(os.path.join(DBMLIB_PATH, '../../../../resources/services/face_util.yml'))
|
||||
@@ -0,0 +1,354 @@
|
||||
"""
|
||||
file_name: face_asymmetry.py
|
||||
project_name: DBM
|
||||
created: 2020-20-07
|
||||
"""
|
||||
|
||||
from mpl_toolkits import mplot3d
|
||||
from matplotlib import pyplot as plt
|
||||
import time
|
||||
import numpy as np
|
||||
import os
|
||||
import datetime
|
||||
import glob
|
||||
import cv2
|
||||
from scipy.spatial.transform import Rotation as R
|
||||
import subprocess
|
||||
import pandas as pd
|
||||
from os.path import join
|
||||
import logging
|
||||
|
||||
from opendbm.dbm_lib.dbm_features.raw_features.video.face_config.face_config_reader import ConfigFaceReader
|
||||
from opendbm.dbm_lib.dbm_features.raw_features.util import video_util as vu, util as ut
|
||||
|
||||
logging.basicConfig(level=logging.INFO)
|
||||
logger=logging.getLogger()
|
||||
|
||||
face_asym_dir = 'facial/face_asymmetry'
|
||||
csv_ext = '_facasym.csv'
|
||||
|
||||
cv2_color_purple = (254,19,188)
|
||||
color_blue = (0,0,1.0)
|
||||
color_green = (0,1.0,0)
|
||||
color_red = (1.0,0,0)
|
||||
color_y = (1.0,1.0,0)
|
||||
|
||||
error_code_message = {
|
||||
0: 'pass',
|
||||
1: 'confidence less than 80%',
|
||||
}
|
||||
error_message_code = {y:x for x,y in error_code_message.items()}
|
||||
|
||||
def visualize_vid(fn, attr=None, write_out=False):
|
||||
|
||||
vid = cv2.VideoCapture(fn)
|
||||
tot = int(vid.get(cv2.CAP_PROP_FRAME_COUNT))
|
||||
fps = vid.get(cv2.CAP_PROP_FPS)
|
||||
frame_width = int(vid.get(3))
|
||||
frame_height = int(vid.get(4))
|
||||
|
||||
if write_out:
|
||||
fig_w = 680 #680 667 676 #frame_width in order of Ali, Vennessa, synthesis
|
||||
fig_h = 659 #659 659 659 #frame_height
|
||||
out_vid = cv2.VideoWriter('out.mp4',cv2.VideoWriter_fourcc(*'MP4V'), fps, (fig_w,fig_h))
|
||||
|
||||
plt.figure(figsize=(8, 8))
|
||||
try:
|
||||
frameid = 0
|
||||
while(True):
|
||||
ret, frame = vid.read()
|
||||
if not ret:
|
||||
# Release the Video Device if ret is false
|
||||
vid.release()
|
||||
print('Released Video Resource')
|
||||
break
|
||||
frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
|
||||
frameid += 1
|
||||
logger.info(frameid, frame.shape)
|
||||
|
||||
if 'lmks_frms' in attr:
|
||||
lmks_frms = attr['lmks_frms']
|
||||
for i in range(lmks_frms[frameid].shape[0]):
|
||||
cv2.circle(frame,(int(lmks_frms[frameid][i,0]),int(lmks_frms[frameid][i,1])), 2, cv2_color_purple, -1)
|
||||
|
||||
if write_out:
|
||||
cv2.putText(frame,'Frame: '+str(frameid), (10,50), cv2.FONT_HERSHEY_SIMPLEX, 1, (255,255,255), 3)
|
||||
|
||||
plt.subplot(211)
|
||||
plt.imshow(frame)
|
||||
plt.axis('off'); plt.pause(0.2);
|
||||
|
||||
if 'score_asym' in attr:
|
||||
ax = plt.subplot(212)
|
||||
ax.cla()
|
||||
ax.set_xlim(0,140) #ax.set_xlim(0,300)
|
||||
ax.set_ylim(0,10)
|
||||
|
||||
sa = attr['score_asym']
|
||||
s = sa[np.where(sa[:,0] <= frameid),:][0,:,:]
|
||||
|
||||
for i in range(1,s.shape[1]):
|
||||
plt.plot(s[:,0], s[:,i])
|
||||
|
||||
plt.legend(['mouth', 'eyebrow', 'eye', 'mouth+eye+eyebrow'])
|
||||
plt.minorticks_on()
|
||||
plt.grid(b=True, which='major', color='r', linestyle='-')
|
||||
plt.grid(b=True, which='minor', color='r', linestyle='--')
|
||||
|
||||
plt.savefig('tmp.png', bbox_inches='tight')
|
||||
print(cv2.imread('tmp.png').shape)
|
||||
|
||||
plt.clf()
|
||||
if write_out:
|
||||
out_vid.write(cv2.imread('tmp.png'))
|
||||
|
||||
except KeyboardInterrupt:
|
||||
# Release the Video Device
|
||||
vid.release()
|
||||
if write_out:
|
||||
out_vid.release()
|
||||
logger.info('Exception, and Video Resource Released')
|
||||
|
||||
if write_out:
|
||||
out_vid.release()
|
||||
|
||||
|
||||
def retrieve_attr(of_df):
|
||||
'''
|
||||
Retrieve landmarks and pose_translation for each frame from openface output
|
||||
Args:
|
||||
of_df: dataframe output from openface, including detected landmark coordinates
|
||||
Returns:
|
||||
lmks_frms: dictionary, with frame id as key and 68 landmark set as value
|
||||
pose_p: dictionary, with frame id as key and pose param as value
|
||||
'''
|
||||
tot_lmks = 68 # openface specific
|
||||
if len([i for i in of_df.columns.to_list() if ' x_' in i]) != tot_lmks:
|
||||
return {}
|
||||
|
||||
lmks_frms = {}
|
||||
pose_p = {}
|
||||
|
||||
for fi in sorted(of_df['frame'].to_list()):
|
||||
lmks = np.zeros((tot_lmks,6))
|
||||
r = of_df[of_df['frame']==fi]
|
||||
|
||||
for i in range(tot_lmks):
|
||||
lmk_y = r[' y_'+str(i)].iloc[0]
|
||||
lmk_x = r[' x_'+str(i)].iloc[0]
|
||||
lmk_X = r[' X_'+str(i)].iloc[0]
|
||||
lmk_Y = r[' Y_'+str(i)].iloc[0]
|
||||
lmk_Z = r[' Z_'+str(i)].iloc[0]
|
||||
|
||||
confi = r[' confidence']
|
||||
lmks[i,:] = [lmk_x, lmk_y, lmk_X, lmk_Y, lmk_Z, confi]
|
||||
|
||||
lmks_frms[fi] = lmks
|
||||
pose_p[fi] = [r[' pose_Tx'].iloc[0], r[' pose_Ty'].iloc[0], r[' pose_Tz'].iloc[0],
|
||||
r[' pose_Rx'].iloc[0], r[' pose_Ry'].iloc[0], r[' pose_Rz'].iloc[0]]
|
||||
|
||||
return lmks_frms, pose_p
|
||||
|
||||
|
||||
def mirror_point(a, b, c, d, x1, y1, z1):
|
||||
# mirror a point w.r.t a 3D plane
|
||||
k =(-a * x1-b * y1-c * z1-d)/float((a * a + b * b + c * c))
|
||||
|
||||
x2 = a * k + x1
|
||||
y2 = b * k + y1
|
||||
z2 = c * k + z1
|
||||
|
||||
x3 = 2 * x2-x1
|
||||
y3 = 2 * y2-y1
|
||||
z3 = 2 * z2-z1
|
||||
return [x3, y3, z3]
|
||||
|
||||
|
||||
def dist_vec2plane(vec, nrm):
|
||||
# Calculate the projected length of a vector (vec) to a plane defined by its normal (nrm)
|
||||
return np.sqrt(np.dot(vec, vec) - np.dot(vec, nrm)**2)
|
||||
|
||||
|
||||
def vis_lmks3d(lmks_frms, vis_idx):
|
||||
"""
|
||||
Visualizing facial landmarks
|
||||
"""
|
||||
fig = plt.figure()
|
||||
color_type = ['b','g','r','y','c']
|
||||
assert len(color_type) > len(vis_idx)
|
||||
|
||||
for fi in sorted(list(lmks_frms.keys())):
|
||||
ax = plt.axes(projection="3d")
|
||||
for i,vi in enumerate(vis_idx):
|
||||
ax.scatter(lmks_frms[fi][vi,2], lmks_frms[fi][vi,3], lmks_frms[fi][vi,4], c=color_type[i])
|
||||
|
||||
ax.axes.set_xlim3d(left=-75, right=100)
|
||||
ax.axes.set_ylim3d(bottom=-200, top=25)
|
||||
ax.axes.set_zlim3d(bottom=440, top=560)
|
||||
ax.view_init(-89, -90) #elev, ariz
|
||||
plt.title(str(fi)); ax.set_xlabel('X'); ax.set_ylabel('Y'); ax.set_zlabel('Z')
|
||||
plt.pause(0.2)
|
||||
plt.cla()
|
||||
plt.draw()
|
||||
|
||||
def calc_fac_asymmetry(attr, is_vis=False):
|
||||
'''
|
||||
Quantify facial asymmetry
|
||||
Args:
|
||||
attr: attribute dictionary containing necessary features for calculation, e.g.,
|
||||
lmks_frms: dictionary, with frame id as key and 68 landmark set (OpenFace) as value
|
||||
pose_param: dictionary, with frame id as key and pose param as value
|
||||
Returns:
|
||||
score_asym: 2D array of size (num_frms, num_asymm_fea), with frame id as the 0th column, and each remaining column as one asymmetry feature
|
||||
'''
|
||||
# openface landmark indices
|
||||
lmks_ref_idx = list(range(0,17)) + list(range(27,36))
|
||||
lmks_mid_idx = [27,28,29,30,33,51,62,66,57,8]
|
||||
lmks_rgt_idx = [0,1,2,3,4,5,6,7,
|
||||
17,18,19,20,21,
|
||||
36,37,38,39,40,41,
|
||||
48,49,50,
|
||||
59,58,
|
||||
60,61,
|
||||
67]
|
||||
lmks_lft_idx = [16,15,14,13,12,11,10,9,
|
||||
26,25,24,23,22,
|
||||
45,44,43,42,47,46,
|
||||
54,53,52,
|
||||
55,56,
|
||||
64,63,
|
||||
65]
|
||||
|
||||
lmks_mth_idx = list(range(48,68))
|
||||
lmks_ebr_idx = list(range(17,27))
|
||||
lmks_eye_idx = list(range(36,48))
|
||||
assert len(lmks_lft_idx)==len(lmks_rgt_idx)
|
||||
|
||||
fea_list = ['mouth', 'eyebrow', 'eye', 'composite']
|
||||
score_asym = np.empty(shape=(0, 0))
|
||||
|
||||
if ('lmks_frms' in attr) and ('pose_param' in attr):
|
||||
lmks_frms = attr['lmks_frms']
|
||||
pose_p = attr['pose_param']
|
||||
|
||||
if is_vis:
|
||||
vis_lmks3d(lmks_frms, [lmks_lft_idx, lmks_rgt_idx, lmks_mid_idx, lmks_ref_idx])
|
||||
|
||||
score_asym = np.zeros((len(lmks_frms),len(fea_list)+1+1)) # +1: extra column for error code
|
||||
if is_vis:
|
||||
fig = plt.figure()
|
||||
ax = plt.axes(projection="3d")
|
||||
|
||||
for s,fi in enumerate(sorted(list(lmks_frms.keys()))):
|
||||
lmks_3d = lmks_frms[fi][:,2:5]
|
||||
pose = pose_p[fi]
|
||||
err_code = error_message_code['pass']
|
||||
|
||||
if lmks_frms[fi][0,5] < 0.8:
|
||||
err_code = error_message_code['confidence less than 80%']
|
||||
score_asym[s,:] = [fi,np.NaN,np.NaN,np.NaN,np.NaN,err_code]
|
||||
continue
|
||||
|
||||
rx = R.from_euler('x', pose[3])
|
||||
ry = R.from_euler('y', pose[4])
|
||||
rz = R.from_euler('z', pose[5])
|
||||
|
||||
vec_pose = rz.apply(ry.apply(rx.apply([0,0,1])))
|
||||
anc_idx = [30, 27, 8] # for central plane estimation
|
||||
nrm = np.cross(lmks_3d[anc_idx[2],:] - lmks_3d[anc_idx[0],:],
|
||||
lmks_3d[anc_idx[1],:] - lmks_3d[anc_idx[0],:])
|
||||
|
||||
nrm = nrm / np.linalg.norm(nrm)
|
||||
a,b,c = nrm
|
||||
d = np.dot(nrm, lmks_3d[anc_idx[0],:])
|
||||
|
||||
dist_L2R_mth = []
|
||||
dist_L2R_ebr = []
|
||||
dist_L2R_eye = []
|
||||
dist_com = []
|
||||
|
||||
lmks_rfl = np.empty((0,3))
|
||||
src_idx = lmks_lft_idx
|
||||
|
||||
for k,idx in enumerate(src_idx):
|
||||
p_rfl = np.array(mirror_point(a, b, c, -d, lmks_3d[idx,0], lmks_3d[idx,1], lmks_3d[idx,2]))
|
||||
lmks_rfl = np.vstack((lmks_rfl, p_rfl))
|
||||
dist = dist_vec2plane((p_rfl-lmks_3d[lmks_rgt_idx[k],:]), vec_pose)
|
||||
|
||||
if idx in lmks_mth_idx:
|
||||
dist_L2R_mth.append(dist)
|
||||
if idx in lmks_ebr_idx:
|
||||
dist_L2R_ebr.append(dist)
|
||||
if idx in lmks_eye_idx:
|
||||
dist_L2R_eye.append(dist)
|
||||
if (idx in lmks_mth_idx) or (idx in lmks_ebr_idx) or (idx in lmks_eye_idx):
|
||||
dist_com.append(dist)
|
||||
score_asym[s,:] = [fi,np.mean(dist_L2R_mth),np.mean(dist_L2R_ebr),np.mean(dist_L2R_eye),np.mean(dist_com),err_code]
|
||||
|
||||
if is_vis:
|
||||
ax.scatter(lmks_3d[:,0], lmks_3d[:,1], lmks_3d[:,2])
|
||||
ax.scatter(lmks_rfl[:,0], lmks_rfl[:,1], lmks_rfl[:,2], c='y')
|
||||
ax.scatter(pose_p[fi][0], pose_p[fi][1], pose_p[fi][2], c='c')
|
||||
plt.title('mirrored landmarks, frame: '+str(fi)); ax.set_xlabel('X'); ax.set_ylabel('Y'); ax.set_zlabel('Z')
|
||||
plt.pause(0.2)
|
||||
plt.cla()
|
||||
plt.draw()
|
||||
|
||||
return score_asym
|
||||
|
||||
|
||||
def calc_asym_feature(open_face_csv, f_cfg):
|
||||
"""
|
||||
Calculating facial asymmetry features and preparing final df
|
||||
"""
|
||||
df_list = []
|
||||
|
||||
of_df = pd.read_csv(open_face_csv, error_bad_lines=False)
|
||||
lmks_frms, pose_p = retrieve_attr(of_df)
|
||||
|
||||
attr = {'lmks_frms': lmks_frms, 'pose_param': pose_p}
|
||||
score_asym = calc_fac_asymmetry(attr)
|
||||
|
||||
df_score_asym = pd.DataFrame(score_asym, columns=['frame', f_cfg.fac_AsymMaskMouth, f_cfg.fac_AsymMaskEyebrow,
|
||||
f_cfg.fac_AsymMaskEye, f_cfg.fac_AsymMaskCom, f_cfg.err_reason])
|
||||
df_score_asym[f_cfg.err_reason] = df_score_asym[f_cfg.err_reason].apply(lambda x: error_code_message[x])
|
||||
|
||||
df_score_asym['frame'] = of_df['frame']
|
||||
df_score_asym['face_id'] = of_df[' face_id']
|
||||
df_score_asym['timestamp'] = of_df[' timestamp']
|
||||
df_score_asym['confidence'] = of_df[' confidence']
|
||||
df_score_asym['success'] = of_df[' success']
|
||||
|
||||
df_list.append(df_score_asym)
|
||||
return df_list
|
||||
|
||||
|
||||
def run_face_asymmetry(video_uri, out_dir, f_cfg):
|
||||
"""
|
||||
Processing all patient's for calculating facial asymmetry
|
||||
---------------
|
||||
---------------
|
||||
Args:
|
||||
video_uri: video path; f_cfg: face config object
|
||||
out_dir: (str) Output directory for processed output
|
||||
"""
|
||||
try:
|
||||
|
||||
#Baseline logic
|
||||
cfr = ConfigFaceReader()
|
||||
input_loc, out_loc, fl_name = ut.filter_path(video_uri, out_dir)
|
||||
|
||||
of_csv_path = glob.glob(join(out_loc, fl_name + '_openface/*.csv'))
|
||||
if len(of_csv_path)>0:
|
||||
|
||||
of_csv = of_csv_path[0]
|
||||
asym_df_list = calc_asym_feature(of_csv, f_cfg)
|
||||
|
||||
asym_final_df = pd.concat(asym_df_list, ignore_index=True)
|
||||
asym_final_df['dbm_master_url'] = video_uri
|
||||
|
||||
logger.info('Processing Output file {} '.format(os.path.join(out_loc, fl_name)))
|
||||
ut.save_output(asym_final_df, out_loc, fl_name, face_asym_dir, csv_ext)
|
||||
|
||||
except Exception as e:
|
||||
logger.error('Failed to process video file')
|
||||
97
opendbm/dbm_lib/dbm_features/raw_features/video/face_au.py
Normal file
97
opendbm/dbm_lib/dbm_features/raw_features/video/face_au.py
Normal file
@@ -0,0 +1,97 @@
|
||||
"""
|
||||
file_name: face_au.py
|
||||
project_name: DBM
|
||||
created: 2020-20-07
|
||||
"""
|
||||
|
||||
import os
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
import datetime
|
||||
import glob
|
||||
from os.path import join
|
||||
import logging
|
||||
|
||||
from opendbm.dbm_lib.dbm_features.raw_features.video.face_config.face_config_reader import ConfigFaceReader
|
||||
from opendbm.dbm_lib.dbm_features.raw_features.util import video_util as vu, util as ut
|
||||
|
||||
logging.basicConfig(level=logging.INFO)
|
||||
logger=logging.getLogger()
|
||||
|
||||
face_au_dir = 'facial/face_au'
|
||||
csv_ext = '_facau.csv'
|
||||
|
||||
|
||||
def extract_col_nm_au(cols):
|
||||
"""
|
||||
Extract action unit (au) column names from openface output (csv)
|
||||
Args:
|
||||
cols: column names from open face output (csv)
|
||||
Returns:
|
||||
(list) list of au column names
|
||||
"""
|
||||
cols_lmk = []
|
||||
au_tags = ' AU'
|
||||
cols_au = [c for c in cols if au_tags in c]
|
||||
return cols_au
|
||||
|
||||
|
||||
def au_col_nm_map(df):
|
||||
"""
|
||||
Rename dataframe action unit column names to match functional specifications v1.0
|
||||
Args:
|
||||
df: dataframe
|
||||
Returns:
|
||||
dataframe with mapped variables
|
||||
"""
|
||||
dict_au_cols = {}
|
||||
for col in list(df):
|
||||
if ' AU' in col:
|
||||
idx = col.rfind('_')
|
||||
if idx > -1:
|
||||
au_id = col[idx-2:idx]
|
||||
if '_r' in col:
|
||||
dict_au_cols[col] = 'fac_AU' + au_id + 'int'
|
||||
if '_c' in col:
|
||||
dict_au_cols[col] = 'fac_AU' + au_id + 'pres'
|
||||
df.rename(columns=dict_au_cols, inplace=True)
|
||||
return df
|
||||
|
||||
|
||||
def run_face_au(video_uri, out_dir, f_cfg):
|
||||
"""
|
||||
Processing all patient's for fetching action units
|
||||
---------------
|
||||
---------------
|
||||
Args:
|
||||
video_uri: video path; f_cfg: face config object
|
||||
out_dir: (str) Output directory for processed output
|
||||
"""
|
||||
try:
|
||||
|
||||
#Baseline logic
|
||||
cfr = ConfigFaceReader()
|
||||
input_loc, out_loc, fl_name = ut.filter_path(video_uri, out_dir)
|
||||
|
||||
of_csv_path = glob.glob(join(out_loc, fl_name + '_openface/*.csv'))
|
||||
if len(of_csv_path)>0:
|
||||
|
||||
df_of = pd.read_csv(of_csv_path[0], error_bad_lines=False)
|
||||
df_au = df_of[extract_col_nm_au(df_of)]
|
||||
df_au = df_au.copy()
|
||||
|
||||
df_au['frame'] = df_of['frame']
|
||||
df_au['face_id'] = df_of[' face_id']
|
||||
df_au['timestamp'] = df_of[' timestamp']
|
||||
df_au['confidence'] = df_of[' confidence']
|
||||
df_au['success'] = df_of[' success']
|
||||
|
||||
df_au = au_col_nm_map(df_au)
|
||||
df_au['dbm_master_url'] = video_uri
|
||||
|
||||
logger.info('Processing Output file {} '.format(os.path.join(out_loc, fl_name)))
|
||||
ut.save_output(df_au, out_loc, fl_name, face_au_dir, csv_ext)
|
||||
|
||||
except Exception as e:
|
||||
logger.error('Failed to process video file')
|
||||
|
||||
@@ -0,0 +1,140 @@
|
||||
"""
|
||||
file_name: face_config_reader
|
||||
project_name: DBM
|
||||
created: 2020-20-07
|
||||
"""
|
||||
|
||||
import yaml
|
||||
import boto3
|
||||
from opendbm.dbm_lib.dbm_features.raw_features.video import DBMLIB_FACE_CONFIG
|
||||
|
||||
class ConfigFaceReader(object):
|
||||
"""Summary
|
||||
Read sevice end ponit
|
||||
"""
|
||||
def __init__(self,
|
||||
service_config_yml=None):
|
||||
"""Summary
|
||||
Args:
|
||||
service_config_yml (None, optional): yml file defined service configuration
|
||||
"""
|
||||
|
||||
if service_config_yml is None:
|
||||
service_config = DBMLIB_FACE_CONFIG
|
||||
else:
|
||||
service_config = service_config_yml
|
||||
|
||||
with open(service_config, 'r') as ymlfile:
|
||||
config = yaml.load(ymlfile)
|
||||
self.ACTION_UNITS = config['cdx_face_config']['ACTION_UNITS']
|
||||
self.NEG_ACTION_UNITS = config['cdx_face_config']['NEG_ACTION_UNITS']
|
||||
self.POS_ACTION_UNITS = config['cdx_face_config']['POS_ACTION_UNITS']
|
||||
self.NET_ACTION_UNITS = config['cdx_face_config']['NET_ACTION_UNITS']
|
||||
self.LOWER_ACTION_UNITS = config['cdx_face_config']['LOWER_ACTION_UNITS']
|
||||
self.UPPER_ACTION_UNITS = config['cdx_face_config']['UPPER_ACTION_UNITS']
|
||||
self.happiness = config['cdx_face_config']['happiness']
|
||||
self.sadness = config['cdx_face_config']['sadness']
|
||||
self.surprise = config['cdx_face_config']['surprise']
|
||||
self.fear = config['cdx_face_config']['fear']
|
||||
self.anger = config['cdx_face_config']['anger']
|
||||
self.disgust = config['cdx_face_config']['disgust']
|
||||
self.contempt = config['cdx_face_config']['contempt']
|
||||
self.pain = config['cdx_face_config']['pain']
|
||||
self.cai = config['cdx_face_config']['CAI']
|
||||
self.SELECTED_FEATURES = config['cdx_face_config']['SELECTED_FEATURES'].split(',')
|
||||
self.face_expr_dir = config['cdx_face_config']['face_expr_dir']
|
||||
self.face_asym_dir = config['cdx_face_config']['face_asym_dir']
|
||||
self.AU_fl = config['cdx_face_config']['AU_filters']
|
||||
self.au_int = config['cdx_face_config']['au_intensity']
|
||||
self.au_prs = config['cdx_face_config']['au_presence']
|
||||
|
||||
def get_action_unit(self):
|
||||
"""Summary
|
||||
Returns:
|
||||
TYPE: end point
|
||||
"""
|
||||
return self.ACTION_UNITS
|
||||
|
||||
def get_neg_action_unit(self):
|
||||
"""Summary
|
||||
Returns:
|
||||
TYPE: end point
|
||||
"""
|
||||
return self.NEG_ACTION_UNITS
|
||||
|
||||
def get_pos_action_unit(self):
|
||||
"""Summary
|
||||
Returns:
|
||||
TYPE: end point
|
||||
"""
|
||||
return self.POS_ACTION_UNITS
|
||||
|
||||
def get_net_action_unit(self):
|
||||
"""Summary
|
||||
Returns:
|
||||
TYPE: end point
|
||||
"""
|
||||
return self.NET_ACTION_UNITS
|
||||
|
||||
def get_selected_feature(self):
|
||||
"""Summary
|
||||
Returns:
|
||||
TYPE: end point
|
||||
"""
|
||||
return self.SELECTED_FEATURES
|
||||
|
||||
def get_happiness(self):
|
||||
"""Summary
|
||||
Returns:
|
||||
TYPE: end point
|
||||
"""
|
||||
return self.happiness
|
||||
|
||||
def get_sadness(self):
|
||||
"""Summary
|
||||
Returns:
|
||||
TYPE: end point
|
||||
"""
|
||||
return self.sadness
|
||||
|
||||
def get_surprise(self):
|
||||
"""Summary
|
||||
Returns:
|
||||
TYPE: end point
|
||||
"""
|
||||
return self.surprise
|
||||
|
||||
def get_fear(self):
|
||||
"""Summary
|
||||
Returns:
|
||||
TYPE: end point
|
||||
"""
|
||||
return self.fear
|
||||
|
||||
def get_anger(self):
|
||||
"""Summary
|
||||
Returns:
|
||||
TYPE: end point
|
||||
"""
|
||||
return self.anger
|
||||
|
||||
def get_disgust(self):
|
||||
"""Summary
|
||||
Returns:
|
||||
TYPE: end point
|
||||
"""
|
||||
return self.disgust
|
||||
|
||||
def get_contempt(self):
|
||||
"""Summary
|
||||
Returns:
|
||||
TYPE: end point
|
||||
"""
|
||||
return self.contempt
|
||||
|
||||
def get_cai(self):
|
||||
"""Summary
|
||||
Returns:
|
||||
TYPE: end point
|
||||
"""
|
||||
return self.cai
|
||||
@@ -0,0 +1,85 @@
|
||||
"""
|
||||
file_name: process_emotion_expressivity
|
||||
project_name: DBM
|
||||
created: 2020-20-07
|
||||
"""
|
||||
|
||||
import os
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
import datetime
|
||||
import glob
|
||||
from os.path import join
|
||||
import logging
|
||||
|
||||
from opendbm.dbm_lib.dbm_features.raw_features.video.face_config.face_config_reader import ConfigFaceReader
|
||||
from opendbm.dbm_lib.dbm_features.raw_features.util import video_util as vu, util as ut
|
||||
|
||||
logging.basicConfig(level=logging.INFO)
|
||||
logger=logging.getLogger()
|
||||
|
||||
face_expr_dir = 'facial/face_expressivity'
|
||||
csv_ext = '_facemo.csv'
|
||||
|
||||
#Openface feature extraction
|
||||
def of_feature(df_of, cfr, f_cfg):
|
||||
"""
|
||||
Creating dataframe for face expressivity
|
||||
Args:
|
||||
of: open face attributes
|
||||
Returns:
|
||||
(list) list of expressivity score for emotions
|
||||
"""
|
||||
df_list = []
|
||||
df_of['s_confidence'] = vu.smooth(df_of[' confidence'].values, window='flat').tolist()
|
||||
|
||||
if 'AU' in cfr.SELECTED_FEATURES :
|
||||
vu.calc_of_for_video(df_of, cfr, f_cfg)
|
||||
#Normalizing facial expressivity for Composite and Negative expr(Range 0 to 1)
|
||||
|
||||
if len(df_of[f_cfg.neg_exp])>0:
|
||||
df_of[f_cfg.neg_exp] = df_of[f_cfg.neg_exp]/5
|
||||
|
||||
if len(df_of[f_cfg.neg_exp_full])>0:
|
||||
df_of[f_cfg.neg_exp_full] = df_of[f_cfg.neg_exp_full]/5
|
||||
|
||||
if len(df_of[f_cfg.com_exp])>0:
|
||||
df_of[f_cfg.com_exp] = df_of[f_cfg.com_exp]/7
|
||||
|
||||
if len(df_of[f_cfg.com_exp_full])>0:
|
||||
df_of[f_cfg.com_exp_full] = df_of[f_cfg.com_exp_full]/7
|
||||
|
||||
df_list.append(df_of)
|
||||
return df_list
|
||||
|
||||
|
||||
def run_face_expressivity(video_uri, out_dir, f_cfg):
|
||||
"""
|
||||
Processing all patient's for fetching facial landmarks
|
||||
---------------
|
||||
---------------
|
||||
Args:
|
||||
video_uri: video path; f_cfg: raw variable config object
|
||||
out_dir: (str) Output directory for processed output
|
||||
"""
|
||||
try:
|
||||
|
||||
#Baseline logic
|
||||
cfr = ConfigFaceReader()
|
||||
input_loc, out_loc, fl_name = ut.filter_path(video_uri, out_dir)
|
||||
|
||||
of_csv_path = glob.glob(join(out_loc, fl_name + '_openface/*.csv'))
|
||||
if len(of_csv_path)>0:
|
||||
|
||||
df_of = pd.read_csv(of_csv_path[0], error_bad_lines=False)
|
||||
df_of = df_of[cfr.AU_fl]
|
||||
expr_df_list = of_feature(df_of, cfr, f_cfg)
|
||||
|
||||
exp_final_df = pd.concat(expr_df_list, ignore_index=True)
|
||||
exp_final_df['dbm_master_url'] = video_uri
|
||||
|
||||
logger.info('Processing Output file {} '.format(os.path.join(out_loc, fl_name)))
|
||||
ut.save_output(exp_final_df, out_loc, fl_name, face_expr_dir, csv_ext)
|
||||
|
||||
except Exception as e:
|
||||
logger.error('Failed to process video file')
|
||||
121
opendbm/dbm_lib/dbm_features/raw_features/video/face_landmark.py
Normal file
121
opendbm/dbm_lib/dbm_features/raw_features/video/face_landmark.py
Normal file
@@ -0,0 +1,121 @@
|
||||
"""
|
||||
file_name: face_landmark
|
||||
project_name: DBM
|
||||
created: 2020-20-07
|
||||
"""
|
||||
|
||||
import os
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
import datetime
|
||||
import glob
|
||||
from os.path import join
|
||||
import logging
|
||||
|
||||
from opendbm.dbm_lib.dbm_features.raw_features.video.face_config.face_config_reader import ConfigFaceReader
|
||||
from opendbm.dbm_lib.dbm_features.raw_features.util import video_util as vu, util as ut
|
||||
|
||||
logging.basicConfig(level=logging.INFO)
|
||||
logger=logging.getLogger()
|
||||
|
||||
face_lmk_dir = 'facial/face_landmark'
|
||||
csv_ext = '_faclmk.csv'
|
||||
|
||||
def extract_col_nm_lmk(cols):
|
||||
"""
|
||||
Extract landmark column names from openface output (csv)
|
||||
Args:
|
||||
cols: column names from open face output (csv)
|
||||
Returns:
|
||||
(list) list of landmark column names
|
||||
"""
|
||||
cols_lmk = []
|
||||
lmk_tags = [' y_', ' x_', ' X_', ' Y_', ' Z_']
|
||||
for c in cols:
|
||||
if any(t in c for t in lmk_tags):
|
||||
cols_lmk.append(c)
|
||||
return cols_lmk
|
||||
|
||||
|
||||
def lmk_col_nm_map(df):
|
||||
"""
|
||||
Rename dataframe landmark column names to match functional specifications v1.0
|
||||
Args:
|
||||
df: dataframe
|
||||
"""
|
||||
dict_lmk_cols = {}
|
||||
for col in list(df):
|
||||
idx = col.rfind('_')+1
|
||||
if idx > 0:
|
||||
lmk_id = col[idx:] if len(col[idx:])>1 else '0'+col[idx:]
|
||||
if ' y_' in col:
|
||||
dict_lmk_cols[col] = 'fac_LMK' + lmk_id + 'r'
|
||||
if ' x_' in col:
|
||||
dict_lmk_cols[col] = 'fac_LMK' + lmk_id + 'c'
|
||||
if ' X_' in col:
|
||||
dict_lmk_cols[col] = 'fac_LMK' + lmk_id + 'X'
|
||||
if ' Y_' in col:
|
||||
dict_lmk_cols[col] = 'fac_LMK' + lmk_id + 'Y'
|
||||
if ' Z_' in col:
|
||||
dict_lmk_cols[col] = 'fac_LMK' + lmk_id + 'Z'
|
||||
df.rename(columns=dict_lmk_cols, inplace=True)
|
||||
return df
|
||||
|
||||
|
||||
def add_disp_3D(df):
|
||||
"""
|
||||
Add 3D displacement for each landmark
|
||||
Args:
|
||||
df: landmark dataframe
|
||||
"""
|
||||
df = df.sort_values(by=['frame'], ascending=False)
|
||||
cols_lmk = [col for col in list(df) if 'fac_LMK' in col]
|
||||
df_t = df[cols_lmk]
|
||||
df_diff = df_t.diff()
|
||||
df_diff = df_diff.pow(2)
|
||||
|
||||
tot_lmk = 68 # 68 landmark model
|
||||
for i in range(tot_lmk):
|
||||
lmk_id = '{:02d}'.format(i)
|
||||
df['fac_LMK'+lmk_id+'disp'] = df_diff[['fac_LMK'+lmk_id+'X', 'fac_LMK'+lmk_id+'Y', 'fac_LMK'+lmk_id+'Z']].sum(axis=1).apply(np.sqrt)
|
||||
|
||||
return df
|
||||
|
||||
|
||||
def run_face_landmark(video_uri, out_dir, f_cfg):
|
||||
"""
|
||||
Processing all patient's for fetching facial landmarks
|
||||
---------------
|
||||
---------------
|
||||
Args:
|
||||
video_uri: video path; f_cfg: raw variable config object
|
||||
out_dir: (str) Output directory for processed output
|
||||
"""
|
||||
try:
|
||||
|
||||
#Baseline logic
|
||||
cfr = ConfigFaceReader()
|
||||
input_loc, out_loc, fl_name = ut.filter_path(video_uri, out_dir)
|
||||
|
||||
of_csv_path = glob.glob(join(out_loc, fl_name + '_openface/*.csv'))
|
||||
if len(of_csv_path)>0:
|
||||
|
||||
df_of = pd.read_csv(of_csv_path[0], error_bad_lines=False)
|
||||
df_lmk = df_of[extract_col_nm_lmk(df_of)]
|
||||
df_lmk = df_lmk.copy()
|
||||
|
||||
df_lmk['frame'] = df_of['frame']
|
||||
df_lmk['face_id'] = df_of[' face_id']
|
||||
df_lmk['timestamp'] = df_of[' timestamp']
|
||||
df_lmk['confidence'] = df_of[' confidence']
|
||||
df_lmk['success'] = df_of[' success']
|
||||
|
||||
df_lmk = lmk_col_nm_map(df_lmk)
|
||||
df_lmk = add_disp_3D(df_lmk)
|
||||
df_lmk['dbm_master_url'] = video_uri
|
||||
|
||||
logger.info('Processing Output file {} '.format(join(out_loc, fl_name)))
|
||||
ut.save_output(df_lmk, out_loc, fl_name, face_lmk_dir, csv_ext)
|
||||
|
||||
except Exception as e:
|
||||
logger.error('Failed to process video file')
|
||||
@@ -0,0 +1,80 @@
|
||||
"""
|
||||
file_name: process_features
|
||||
project_name: DBM
|
||||
created: 2020-20-07
|
||||
"""
|
||||
|
||||
import os
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
import glob
|
||||
import logging
|
||||
|
||||
from opendbm.dbm_lib.dbm_features.raw_features.util import util as ut
|
||||
|
||||
logging.basicConfig(level=logging.INFO)
|
||||
logger=logging.getLogger()
|
||||
|
||||
def batch_open_face(filepaths,video_url, input_dir, out_dir, of_path, video_tracking=False):
|
||||
""" Computes open_face features for the files in filepaths
|
||||
|
||||
Args:
|
||||
-----
|
||||
filepaths: (itreable[str])
|
||||
video_tracking: To specify whether openface's video tracking module (FaceLandmarkVid)
|
||||
is being used or the default (FeatureExtract)
|
||||
video_url: Raw video location on S3 bucket
|
||||
input_dir: Path to the input videos
|
||||
out_dir: Path to the processed output
|
||||
of_path: OpenFace source code path
|
||||
|
||||
Returns:
|
||||
--------
|
||||
(itreable[str]) list of .csv files
|
||||
"""
|
||||
if video_tracking:
|
||||
suffix = '_openface_lmk'
|
||||
else:
|
||||
suffix = '_openface'
|
||||
|
||||
csv_files = []
|
||||
|
||||
for fp in filepaths:
|
||||
try:
|
||||
|
||||
_, out_loc, fl_name = ut.filter_path(video_url, out_dir)
|
||||
full_f_name = fl_name + suffix
|
||||
output_directory = os.path.join(out_loc, full_f_name)
|
||||
|
||||
if video_tracking and not os.path.exists(os.path.abspath(output_directory)):
|
||||
os.makedirs(os.path.abspath(output_directory))
|
||||
csv_files.append(ut.compute_open_face_features(fp,output_directory,of_path))
|
||||
|
||||
except Exception as e:
|
||||
logger.error('Failed to run OpenFace on {}\n{}'.format(fp, e))
|
||||
|
||||
return csv_files
|
||||
|
||||
def process_open_face(video_uri, input_dir, out_dir, of_path, dbm_group,video_tracking):
|
||||
"""
|
||||
Processing all patient's for fetching emotion expressivity
|
||||
-------------------
|
||||
-------------------
|
||||
Args:
|
||||
video_uri: video path; input_dir : input directory for video's; dbm_group: feature group
|
||||
out_dir: (str) Output directory for processed output; of_path: OpenFace source code path
|
||||
|
||||
"""
|
||||
try:
|
||||
|
||||
if dbm_group != None:
|
||||
check_group = ['facial','movement'] #add group here: if you want to use openface output for raw variable calculation
|
||||
check_val = bool(len({*check_group} & {*dbm_group}))
|
||||
if not check_val:
|
||||
return
|
||||
|
||||
filepaths = [video_uri]
|
||||
csv_filepaths = batch_open_face(filepaths, video_uri, input_dir, out_dir, of_path, video_tracking)
|
||||
|
||||
except Exception as e:
|
||||
logger.error('Failed to process video file')
|
||||
BIN
opendbm/pkg/shape_detector/shape_predictor_68_face_landmarks.dat
Normal file
BIN
opendbm/pkg/shape_detector/shape_predictor_68_face_landmarks.dat
Normal file
Binary file not shown.
BIN
opendbm/pkg/v_tremor/xgb_bin_vtrem.sav
Normal file
BIN
opendbm/pkg/v_tremor/xgb_bin_vtrem.sav
Normal file
Binary file not shown.
319
opendbm/resources/features/derived_feature.yml
Normal file
319
opendbm/resources/features/derived_feature.yml
Normal file
@@ -0,0 +1,319 @@
|
||||
derive_feature:
|
||||
|
||||
#DBM Feature Group
|
||||
FEATURE_GROUP: ['FAC_ASYM', 'FAC_AU', 'FAC_EXP', 'FAC_LMK', 'ACO_INT', 'ACO_FF', 'ACO_HNR', 'ACO_GNE', 'ACO_FM',
|
||||
'ACO_JITTER','ACO_SHIMMER', 'ACO_PAUSE', 'ACO_VFS', 'ACO_MFCC', 'MOV_HM', 'MOV_HP', 'EYE_BLINK', 'NLP_SPEECH',
|
||||
'EYE_GAZE', 'MOV_VT', 'MOV_FT']
|
||||
|
||||
#Feature group output file extensions
|
||||
FAC_ASYM_LOC: _facasym
|
||||
FAC_AU_LOC: _facau
|
||||
FAC_EXP_LOC: _facemo
|
||||
FAC_LMK_LOC: _faclmk
|
||||
ACO_INT_LOC: _intensity
|
||||
ACO_FF_LOC: _pitch
|
||||
ACO_HNR_LOC: _hnr
|
||||
ACO_GNE_LOC: _gne
|
||||
ACO_FM_LOC: _formant
|
||||
ACO_JITTER_LOC: _jitter
|
||||
ACO_SHIMMER_LOC: _shimmer
|
||||
ACO_PAUSE_LOC: _pausechar
|
||||
ACO_VFS_LOC: _voiceprev
|
||||
ACO_MFCC_LOC: _mfcc
|
||||
MOV_HM_LOC: _headmov
|
||||
MOV_HP_LOC: _headpose
|
||||
EYE_BLINK_LOC: _eyeblinks
|
||||
NLP_SPEECH_LOC: _nlp
|
||||
EYE_GAZE_LOC: _eyegaze
|
||||
MOV_VT_LOC: _vtremor
|
||||
MOV_FT_LOC: _fac_tremor
|
||||
|
||||
#Facial category feature group
|
||||
FAC_ASYM: ['fac_AsymMaskMouth', 'fac_AsymMaskEyebrow', 'fac_AsymMaskEye', 'fac_AsymMaskCom']
|
||||
FAC_AU: ['fac_AU01int', 'fac_AU02int', 'fac_AU04int', 'fac_AU05int', 'fac_AU06int', 'fac_AU07int', 'fac_AU09int',
|
||||
'fac_AU10int', 'fac_AU12int', 'fac_AU14int', 'fac_AU15int', 'fac_AU17int', 'fac_AU20int', 'fac_AU23int',
|
||||
'fac_AU25int', 'fac_AU26int', 'fac_AU45int', 'fac_AU01pres', 'fac_AU02pres', 'fac_AU04pres', 'fac_AU05pres',
|
||||
'fac_AU06pres', 'fac_AU07pres', 'fac_AU09pres', 'fac_AU10pres', 'fac_AU12pres', 'fac_AU14pres', 'fac_AU15pres',
|
||||
'fac_AU17pres', 'fac_AU20pres', 'fac_AU23pres', 'fac_AU25pres', 'fac_AU26pres', 'fac_AU28pres', 'fac_AU45pres']
|
||||
FAC_EXP: ['hap_exp', 'sad_exp', 'sur_exp', 'fea_exp', 'ang_exp', 'dis_exp', 'con_exp', 'happ_occ', 'sad_occ',
|
||||
'sur_occ', 'fea_occ', 'ang_occ', 'dis_occ', 'con_occ', 'pos_exp', 'neg_exp', 'com_exp', 'hap_exp_full',
|
||||
'sad_exp_full', 'sur_exp_full','fea_exp_full', 'ang_exp_full', 'dis_exp_full', 'con_exp_full', 'pos_exp_full',
|
||||
'neg_exp_full', 'com_exp_full', 'com_lower_exp','com_upper_exp', 'pai_exp', 'pai_exp_full']
|
||||
FAC_LMK: ['fac_LMK00disp', 'fac_LMK01disp', 'fac_LMK02disp', 'fac_LMK03disp', 'fac_LMK04disp',
|
||||
'fac_LMK05disp', 'fac_LMK06disp', 'fac_LMK07disp', 'fac_LMK08disp', 'fac_LMK09disp', 'fac_LMK10disp',
|
||||
'fac_LMK11disp', 'fac_LMK12disp', 'fac_LMK13disp', 'fac_LMK14disp', 'fac_LMK15disp', 'fac_LMK16disp',
|
||||
'fac_LMK17disp', 'fac_LMK18disp', 'fac_LMK19disp', 'fac_LMK20disp', 'fac_LMK21disp', 'fac_LMK22disp',
|
||||
'fac_LMK23disp', 'fac_LMK24disp', 'fac_LMK25disp', 'fac_LMK26disp', 'fac_LMK27disp', 'fac_LMK28disp',
|
||||
'fac_LMK29disp', 'fac_LMK30disp', 'fac_LMK31disp', 'fac_LMK32disp', 'fac_LMK33disp', 'fac_LMK34disp',
|
||||
'fac_LMK35disp', 'fac_LMK36disp', 'fac_LMK37disp', 'fac_LMK38disp', 'fac_LMK39disp', 'fac_LMK40disp',
|
||||
'fac_LMK41disp', 'fac_LMK42disp', 'fac_LMK43disp', 'fac_LMK44disp', 'fac_LMK45disp', 'fac_LMK46disp',
|
||||
'fac_LMK47disp', 'fac_LMK48disp', 'fac_LMK49disp', 'fac_LMK50disp', 'fac_LMK51disp', 'fac_LMK52disp',
|
||||
'fac_LMK53disp', 'fac_LMK54disp', 'fac_LMK55disp', 'fac_LMK56disp', 'fac_LMK57disp', 'fac_LMK58disp',
|
||||
'fac_LMK59disp', 'fac_LMK60disp', 'fac_LMK61disp', 'fac_LMK62disp', 'fac_LMK63disp', 'fac_LMK64disp',
|
||||
'fac_LMK65disp', 'fac_LMK66disp', 'fac_LMK67disp']
|
||||
|
||||
#Acoustic category feature group
|
||||
ACO_INT: ['aco_int']
|
||||
ACO_FF: ['aco_ff']
|
||||
ACO_HNR: ['aco_hnr']
|
||||
ACO_GNE: ['aco_gne']
|
||||
ACO_FM: ['aco_fm1','aco_fm2','aco_fm3','aco_fm4']
|
||||
ACO_JITTER: ['aco_jitter']
|
||||
ACO_SHIMMER: ['aco_shimmer']
|
||||
ACO_PAUSE: ['aco_pausetime','aco_totaltime','aco_pausefrac','aco_numpauses']
|
||||
ACO_VFS: ['aco_voicePct']
|
||||
ACO_MFCC: ['aco_mfcc1','aco_mfcc2','aco_mfcc3','aco_mfcc4','aco_mfcc5','aco_mfcc6','aco_mfcc7','aco_mfcc8','aco_mfcc9',
|
||||
'aco_mfcc10','aco_mfcc11','aco_mfcc12']
|
||||
|
||||
#Movement category feature group
|
||||
MOV_HM: ['head_vel']
|
||||
MOV_HP: ['mov_Hpose_Dist','mov_Hpose_Pitch','mov_Hpose_Yaw','mov_Hpose_Roll']
|
||||
EYE_BLINK: ['mov_blink_ear', 'vid_dur', 'mov_blinkdur']
|
||||
MOV_VT: ['mov_freq_trem_freq', 'mov_freq_trem_index', 'mov_freq_trem_pindex', 'mov_amp_trem_freq',
|
||||
'mov_amp_trem_index', 'mov_amp_trem_pindex']
|
||||
MOV_FT: ['fac_tremor_median_5','fac_tremor_median_12','fac_tremor_median_8','fac_tremor_median_48','fac_tremor_median_54','fac_tremor_median_28','fac_tremor_median_51','fac_tremor_median_66','fac_tremor_median_57']
|
||||
|
||||
EYE_GAZE: ['mov_leye_x', 'mov_leye_y', 'mov_leye_z', 'mov_reye_x', 'mov_reye_y', 'mov_reye_z', 'mov_eleft_disp',
|
||||
'mov_eright_disp']
|
||||
|
||||
#NLP category feature group
|
||||
NLP_SPEECH: ['nlp_numSentences', 'nlp_singPronPerAns', 'nlp_singPronPerSen', 'nlp_pastTensePerAns', 'nlp_pastTensePerSen',
|
||||
'nlp_pronounsPerAns', 'nlp_pronounsPerSen', 'nlp_verbsPerAns', 'nlp_verbsPerSen', 'nlp_adjectivesPerAns',
|
||||
'nlp_adjectivesPerSen', 'nlp_nounsPerAns', 'nlp_nounsPerSen', 'nlp_sentiment_mean', 'nlp_mattr', 'nlp_wordsPerMin',
|
||||
'nlp_totalTime']
|
||||
|
||||
#Calculation for variables
|
||||
# Facial Asymmetry
|
||||
fac_AsymMaskMouth: ['mean', 'std']
|
||||
fac_AsymMaskEyebrow: ['mean', 'std']
|
||||
fac_AsymMaskEye: ['mean', 'std']
|
||||
fac_AsymMaskCom: ['mean', 'std']
|
||||
|
||||
#Facial Action Unit
|
||||
fac_AU01int: ['mean', 'std']
|
||||
fac_AU02int: ['mean', 'std']
|
||||
fac_AU04int: ['mean', 'std']
|
||||
fac_AU05int: ['mean', 'std']
|
||||
fac_AU06int: ['mean', 'std']
|
||||
fac_AU07int: ['mean', 'std']
|
||||
fac_AU09int: ['mean', 'std']
|
||||
fac_AU10int: ['mean', 'std']
|
||||
fac_AU12int: ['mean', 'std']
|
||||
fac_AU14int: ['mean', 'std']
|
||||
fac_AU15int: ['mean', 'std']
|
||||
fac_AU17int: ['mean', 'std']
|
||||
fac_AU20int: ['mean', 'std']
|
||||
fac_AU23int: ['mean', 'std']
|
||||
fac_AU25int: ['mean', 'std']
|
||||
fac_AU26int: ['mean', 'std']
|
||||
fac_AU45int: ['mean', 'std']
|
||||
fac_AU01pres: ['pct']
|
||||
fac_AU02pres: ['pct']
|
||||
fac_AU04pres: ['pct']
|
||||
fac_AU05pres: ['pct']
|
||||
fac_AU06pres: ['pct']
|
||||
fac_AU07pres: ['pct']
|
||||
fac_AU09pres: ['pct']
|
||||
fac_AU10pres: ['pct']
|
||||
fac_AU12pres: ['pct']
|
||||
fac_AU14pres: ['pct']
|
||||
fac_AU15pres: ['pct']
|
||||
fac_AU17pres: ['pct']
|
||||
fac_AU20pres: ['pct']
|
||||
fac_AU23pres: ['pct']
|
||||
fac_AU25pres: ['pct']
|
||||
fac_AU26pres: ['pct']
|
||||
fac_AU28pres: ['pct']
|
||||
fac_AU45pres: ['pct']
|
||||
|
||||
#Facial Expressivity
|
||||
hap_exp: ['mean', 'std']
|
||||
sad_exp: ['mean', 'std']
|
||||
sur_exp: ['mean', 'std']
|
||||
fea_exp: ['mean', 'std']
|
||||
ang_exp: ['mean', 'std']
|
||||
dis_exp: ['mean', 'std']
|
||||
con_exp: ['mean', 'std']
|
||||
happ_occ: ['pct']
|
||||
sad_occ: ['pct']
|
||||
sur_occ: ['pct']
|
||||
fea_occ: ['pct']
|
||||
ang_occ: ['pct']
|
||||
dis_occ: ['pct']
|
||||
con_occ: ['pct']
|
||||
pos_exp: ['mean', 'std', 'pct']
|
||||
neg_exp: ['mean', 'std', 'pct']
|
||||
neu_exp: ['mean', 'std', 'pct']
|
||||
com_exp: ['mean', 'std', 'pct']
|
||||
com_lower_exp: ['mean','std','pct']
|
||||
com_upper_exp: ['mean','std','pct']
|
||||
pai_exp: ['mean','std','pct']
|
||||
hap_exp_full: ['mean', 'std']
|
||||
sad_exp_full: ['mean', 'std']
|
||||
sur_exp_full: ['mean', 'std']
|
||||
fea_exp_full: ['mean', 'std']
|
||||
ang_exp_full: ['mean', 'std']
|
||||
dis_exp_full: ['mean', 'std']
|
||||
con_exp_full: ['mean', 'std']
|
||||
pos_exp_full: ['mean', 'std']
|
||||
neg_exp_full: ['mean', 'std']
|
||||
neu_exp_full: ['mean', 'std']
|
||||
com_exp_full: ['mean', 'std']
|
||||
com_lower_exp_full: ['mean','std']
|
||||
com_upper_exp_full: ['mean', 'std']
|
||||
pai_exp_full: ['mean','std']
|
||||
|
||||
#Facial Landmarks
|
||||
fac_LMK00disp: ['mean', 'std']
|
||||
fac_LMK01disp: ['mean', 'std']
|
||||
fac_LMK02disp: ['mean', 'std']
|
||||
fac_LMK03disp: ['mean', 'std']
|
||||
fac_LMK04disp: ['mean', 'std']
|
||||
fac_LMK05disp: ['mean', 'std']
|
||||
fac_LMK06disp: ['mean', 'std']
|
||||
fac_LMK07disp: ['mean', 'std']
|
||||
fac_LMK08disp: ['mean', 'std']
|
||||
fac_LMK09disp: ['mean', 'std']
|
||||
fac_LMK10disp: ['mean', 'std']
|
||||
fac_LMK11disp: ['mean', 'std']
|
||||
fac_LMK12disp: ['mean', 'std']
|
||||
fac_LMK13disp: ['mean', 'std']
|
||||
fac_LMK14disp: ['mean', 'std']
|
||||
fac_LMK15disp: ['mean', 'std']
|
||||
fac_LMK16disp: ['mean', 'std']
|
||||
fac_LMK17disp: ['mean', 'std']
|
||||
fac_LMK18disp: ['mean', 'std']
|
||||
fac_LMK19disp: ['mean', 'std']
|
||||
fac_LMK20disp: ['mean', 'std']
|
||||
fac_LMK21disp: ['mean', 'std']
|
||||
fac_LMK22disp: ['mean', 'std']
|
||||
fac_LMK23disp: ['mean', 'std']
|
||||
fac_LMK24disp: ['mean', 'std']
|
||||
fac_LMK25disp: ['mean', 'std']
|
||||
fac_LMK26disp: ['mean', 'std']
|
||||
fac_LMK27disp: ['mean', 'std']
|
||||
fac_LMK28disp: ['mean', 'std']
|
||||
fac_LMK29disp: ['mean', 'std']
|
||||
fac_LMK30disp: ['mean', 'std']
|
||||
fac_LMK31disp: ['mean', 'std']
|
||||
fac_LMK32disp: ['mean', 'std']
|
||||
fac_LMK33disp: ['mean', 'std']
|
||||
fac_LMK34disp: ['mean', 'std']
|
||||
fac_LMK35disp: ['mean', 'std']
|
||||
fac_LMK36disp: ['mean', 'std']
|
||||
fac_LMK37disp: ['mean', 'std']
|
||||
fac_LMK38disp: ['mean', 'std']
|
||||
fac_LMK39disp: ['mean', 'std']
|
||||
fac_LMK40disp: ['mean', 'std']
|
||||
fac_LMK41disp: ['mean', 'std']
|
||||
fac_LMK42disp: ['mean', 'std']
|
||||
fac_LMK43disp: ['mean', 'std']
|
||||
fac_LMK44disp: ['mean', 'std']
|
||||
fac_LMK45disp: ['mean', 'std']
|
||||
fac_LMK46disp: ['mean', 'std']
|
||||
fac_LMK47disp: ['mean', 'std']
|
||||
fac_LMK48disp: ['mean', 'std']
|
||||
fac_LMK49disp: ['mean', 'std']
|
||||
fac_LMK50disp: ['mean', 'std']
|
||||
fac_LMK51disp: ['mean', 'std']
|
||||
fac_LMK52disp: ['mean', 'std']
|
||||
fac_LMK53disp: ['mean', 'std']
|
||||
fac_LMK54disp: ['mean', 'std']
|
||||
fac_LMK55disp: ['mean', 'std']
|
||||
fac_LMK56disp: ['mean', 'std']
|
||||
fac_LMK57disp: ['mean', 'std']
|
||||
fac_LMK58disp: ['mean', 'std']
|
||||
fac_LMK59disp: ['mean', 'std']
|
||||
fac_LMK60disp: ['mean', 'std']
|
||||
fac_LMK61disp: ['mean', 'std']
|
||||
fac_LMK62disp: ['mean', 'std']
|
||||
fac_LMK63disp: ['mean', 'std']
|
||||
fac_LMK64disp: ['mean', 'std']
|
||||
fac_LMK65disp: ['mean', 'std']
|
||||
fac_LMK66disp: ['mean', 'std']
|
||||
fac_LMK67disp: ['mean', 'std']
|
||||
|
||||
#Acoustic feature
|
||||
aco_int: ['mean', 'std', 'range']
|
||||
aco_ff: ['mean', 'std', 'range']
|
||||
aco_hnr: ['mean', 'std', 'range']
|
||||
aco_gne: ['mean', 'std', 'range']
|
||||
aco_fm1: ['mean', 'std', 'range']
|
||||
aco_fm2: ['mean', 'std', 'range']
|
||||
aco_fm3: ['mean', 'std', 'range']
|
||||
aco_fm4: ['mean', 'std', 'range']
|
||||
aco_jitter: ['mean', 'std', 'range']
|
||||
aco_shimmer: ['mean', 'std', 'range']
|
||||
aco_pausetime: ['mean']
|
||||
aco_pausefrac: ['mean']
|
||||
aco_voicePct: ['mean']
|
||||
aco_totaltime: ['mean']
|
||||
aco_numpauses: ['mean']
|
||||
aco_mfcc1: ['mean']
|
||||
aco_mfcc2: ['mean']
|
||||
aco_mfcc3: ['mean']
|
||||
aco_mfcc4: ['mean']
|
||||
aco_mfcc5: ['mean']
|
||||
aco_mfcc6: ['mean']
|
||||
aco_mfcc7: ['mean']
|
||||
aco_mfcc8: ['mean']
|
||||
aco_mfcc9: ['mean']
|
||||
aco_mfcc10: ['mean']
|
||||
aco_mfcc11: ['mean']
|
||||
aco_mfcc12: ['mean']
|
||||
|
||||
#Movement feature
|
||||
head_vel: ['mean', 'std']
|
||||
mov_Hpose_Dist: ['mean', 'std']
|
||||
mov_Hpose_Pitch: ['mean', 'std']
|
||||
mov_Hpose_Yaw: ['mean', 'std']
|
||||
mov_Hpose_Roll: ['mean', 'std']
|
||||
mov_blink_ear: ['mean', 'std']
|
||||
vid_dur: ['count']
|
||||
mov_blinkdur: ['mean', 'std']
|
||||
|
||||
mov_freq_trem_freq: ['mean']
|
||||
mov_freq_trem_index: ['mean']
|
||||
mov_freq_trem_pindex: ['mean']
|
||||
mov_amp_trem_freq: ['mean']
|
||||
mov_amp_trem_index: ['mean']
|
||||
mov_amp_trem_pindex: ['mean']
|
||||
|
||||
fac_tremor_median_5: ['mean']
|
||||
fac_tremor_median_12: ['mean']
|
||||
fac_tremor_median_8: ['mean']
|
||||
fac_tremor_median_48: ['mean']
|
||||
fac_tremor_median_54: ['mean']
|
||||
fac_tremor_median_28: ['mean']
|
||||
fac_tremor_median_51: ['mean']
|
||||
fac_tremor_median_66: ['mean']
|
||||
fac_tremor_median_57: ['mean']
|
||||
|
||||
mov_leye_x: ['mean', 'std']
|
||||
mov_leye_y: ['mean', 'std']
|
||||
mov_leye_z: ['mean', 'std']
|
||||
mov_reye_x: ['mean', 'std']
|
||||
mov_reye_y: ['mean', 'std']
|
||||
mov_reye_z: ['mean', 'std']
|
||||
mov_eleft_disp: ['mean', 'std']
|
||||
mov_eright_disp: ['mean', 'std']
|
||||
|
||||
#NLP feature
|
||||
nlp_numSentences: ['mean']
|
||||
nlp_singPronPerAns: ['mean']
|
||||
nlp_singPronPerSen: ['mean']
|
||||
nlp_pastTensePerAns: ['mean']
|
||||
nlp_pastTensePerSen: ['mean']
|
||||
nlp_pronounsPerAns: ['mean']
|
||||
nlp_pronounsPerSen: ['mean']
|
||||
nlp_verbsPerAns: ['mean']
|
||||
nlp_verbsPerSen: ['mean']
|
||||
nlp_adjectivesPerAns: ['mean']
|
||||
nlp_adjectivesPerSen: ['mean']
|
||||
nlp_nounsPerAns: ['mean']
|
||||
nlp_nounsPerSen: ['mean']
|
||||
nlp_sentiment_mean: ['mean']
|
||||
nlp_mattr: ['mean']
|
||||
nlp_wordsPerMin: ['mean']
|
||||
nlp_totalTime: ['mean']
|
||||
|
||||
1
opendbm/resources/features/facial/config.json
Normal file
1
opendbm/resources/features/facial/config.json
Normal file
@@ -0,0 +1 @@
|
||||
{"ref_lmk": 28, "ref_area": 350000, "face_width_left": "l15_x", "face_width_right": "l1_x", "face_height_left": "l8_y", "face_height_right": "l27_y", "landmarks": [5, 12, 8, 48, 54, 28, 51, 66, 57], "model_path": "resources/facial/svm_bin_fac_tremor.sav", "feature_order": ["fac_features_mean_5", "fac_features_mean_12", "fac_features_mean_8", "fac_features_mean_48", "fac_features_mean_54", "fac_features_mean_28", "fac_features_mean_51", "fac_features_mean_66", "fac_features_mean_57", "fac_features_median_5", "fac_features_median_12", "fac_features_median_8", "fac_features_median_48", "fac_features_median_54", "fac_features_median_28", "fac_features_median_51", "fac_features_median_66", "fac_features_median_57"]}
|
||||
253
opendbm/resources/features/raw_feature.yml
Normal file
253
opendbm/resources/features/raw_feature.yml
Normal file
@@ -0,0 +1,253 @@
|
||||
raw_feature:
|
||||
#error reason
|
||||
error_reason: error_reason
|
||||
|
||||
#Output range
|
||||
mov_headvel_start: 0
|
||||
mov_headvel_end: 200
|
||||
|
||||
#Facial markers
|
||||
hap_exp: fac_hapintsoft
|
||||
sad_exp: fac_sadintsoft
|
||||
sur_exp: fac_surintsoft
|
||||
fea_exp: fac_feaintsoft
|
||||
ang_exp: fac_angintsoft
|
||||
dis_exp: fac_disintsoft
|
||||
con_exp: fac_conintsoft
|
||||
happ_occ: fac_happres
|
||||
sad_occ: fac_sadpres
|
||||
sur_occ: fac_surpres
|
||||
fea_occ: fac_feapres
|
||||
ang_occ: fac_angpres
|
||||
dis_occ: fac_dispres
|
||||
con_occ: fac_conpres
|
||||
pos_exp: fac_posintsoft
|
||||
neg_exp: fac_negintsoft
|
||||
neu_exp: neu_exp
|
||||
cai_exp: cai_exp
|
||||
com_exp: fac_comintsoft
|
||||
com_lower_exp: fac_comlowintsoft
|
||||
com_upper_exp: fac_comuppintsoft
|
||||
pai_exp: fac_paiintsoft
|
||||
hap_exp_full: fac_hapinthard
|
||||
sad_exp_full: fac_sadinthard
|
||||
sur_exp_full: fac_surinthard
|
||||
fea_exp_full: fac_feainthard
|
||||
ang_exp_full: fac_anginthard
|
||||
dis_exp_full: fac_disinthard
|
||||
con_exp_full: fac_coninthard
|
||||
pos_exp_full: fac_posinthard
|
||||
neg_exp_full: fac_neginthard
|
||||
neu_exp_full: neu_exp_full
|
||||
cai_exp_full: cai_exp_full
|
||||
com_exp_full: fac_cominthard
|
||||
com_lower_exp_full: fac_comlowinthard
|
||||
com_upper_exp_full: fac_comuppinthard
|
||||
pai_exp_full: fac_paiinthard
|
||||
|
||||
#Facial asymmetry
|
||||
fac_AsymMaskMouth: fac_asymmaskmouth
|
||||
fac_AsymMaskEye: fac_asymmaskeye
|
||||
fac_AsymMaskEyebrow: fac_asymmaskeyebrow
|
||||
fac_AsymMaskCom: fac_asymmaskcom
|
||||
|
||||
#Facial landmark
|
||||
fac_LMK00disp: fac_LMK00disp
|
||||
fac_LMK01disp: fac_LMK01disp
|
||||
fac_LMK02disp: fac_LMK02disp
|
||||
fac_LMK03disp: fac_LMK03disp
|
||||
fac_LMK04disp: fac_LMK04disp
|
||||
fac_LMK05disp: fac_LMK05disp
|
||||
fac_LMK06disp: fac_LMK06disp
|
||||
fac_LMK07disp: fac_LMK07disp
|
||||
fac_LMK08disp: fac_LMK08disp
|
||||
fac_LMK09disp: fac_LMK09disp
|
||||
fac_LMK10disp: fac_LMK10disp
|
||||
fac_LMK11disp: fac_LMK11disp
|
||||
fac_LMK12disp: fac_LMK12disp
|
||||
fac_LMK13disp: fac_LMK13disp
|
||||
fac_LMK14disp: fac_LMK14disp
|
||||
fac_LMK15disp: fac_LMK15disp
|
||||
fac_LMK16disp: fac_LMK16disp
|
||||
fac_LMK17disp: fac_LMK17disp
|
||||
fac_LMK18disp: fac_LMK18disp
|
||||
fac_LMK19disp: fac_LMK19disp
|
||||
fac_LMK20disp: fac_LMK20disp
|
||||
fac_LMK21disp: fac_LMK21disp
|
||||
fac_LMK22disp: fac_LMK22disp
|
||||
fac_LMK23disp: fac_LMK23disp
|
||||
fac_LMK24disp: fac_LMK24disp
|
||||
fac_LMK25disp: fac_LMK25disp
|
||||
fac_LMK26disp: fac_LMK26disp
|
||||
fac_LMK27disp: fac_LMK27disp
|
||||
fac_LMK28disp: fac_LMK28disp
|
||||
fac_LMK29disp: fac_LMK29disp
|
||||
fac_LMK30disp: fac_LMK30disp
|
||||
fac_LMK31disp: fac_LMK31disp
|
||||
fac_LMK32disp: fac_LMK32disp
|
||||
fac_LMK33disp: fac_LMK33disp
|
||||
fac_LMK34disp: fac_LMK34disp
|
||||
fac_LMK35disp: fac_LMK35disp
|
||||
fac_LMK36disp: fac_LMK36disp
|
||||
fac_LMK37disp: fac_LMK37disp
|
||||
fac_LMK38disp: fac_LMK38disp
|
||||
fac_LMK39disp: fac_LMK39disp
|
||||
fac_LMK40disp: fac_LMK40disp
|
||||
fac_LMK41disp: fac_LMK41disp
|
||||
fac_LMK42disp: fac_LMK42disp
|
||||
fac_LMK43disp: fac_LMK43disp
|
||||
fac_LMK44disp: fac_LMK44disp
|
||||
fac_LMK45disp: fac_LMK45disp
|
||||
fac_LMK46disp: fac_LMK46disp
|
||||
fac_LMK47disp: fac_LMK47disp
|
||||
fac_LMK48disp: fac_LMK48disp
|
||||
fac_LMK49disp: fac_LMK49disp
|
||||
fac_LMK50disp: fac_LMK50disp
|
||||
fac_LMK51disp: fac_LMK51disp
|
||||
fac_LMK52disp: fac_LMK52disp
|
||||
fac_LMK53disp: fac_LMK53disp
|
||||
fac_LMK54disp: fac_LMK54disp
|
||||
fac_LMK55disp: fac_LMK55disp
|
||||
fac_LMK56disp: fac_LMK56disp
|
||||
fac_LMK57disp: fac_LMK57disp
|
||||
fac_LMK58disp: fac_LMK58disp
|
||||
fac_LMK59disp: fac_LMK59disp
|
||||
fac_LMK60disp: fac_LMK60disp
|
||||
fac_LMK61disp: fac_LMK61disp
|
||||
fac_LMK62disp: fac_LMK62disp
|
||||
fac_LMK63disp: fac_LMK63disp
|
||||
fac_LMK64disp: fac_LMK64disp
|
||||
fac_LMK65disp: fac_LMK65disp
|
||||
fac_LMK66disp: fac_LMK66disp
|
||||
fac_LMK67disp: fac_LMK67disp
|
||||
|
||||
#Facial action unit
|
||||
fac_AU01int: fac_AU01int
|
||||
fac_AU02int: fac_AU02int
|
||||
fac_AU04int: fac_AU04int
|
||||
fac_AU05int: fac_AU05int
|
||||
fac_AU06int: fac_AU06int
|
||||
fac_AU07int: fac_AU07int
|
||||
fac_AU09int: fac_AU09int
|
||||
fac_AU10int: fac_AU10int
|
||||
fac_AU12int: fac_AU12int
|
||||
fac_AU14int: fac_AU14int
|
||||
fac_AU15int: fac_AU15int
|
||||
fac_AU17int: fac_AU17int
|
||||
fac_AU20int: fac_AU20int
|
||||
fac_AU23int: fac_AU23int
|
||||
fac_AU25int: fac_AU25int
|
||||
fac_AU26int: fac_AU26int
|
||||
fac_AU45int: fac_AU45int
|
||||
fac_AU01pres: fac_AU01pres
|
||||
fac_AU02pres: fac_AU02pres
|
||||
fac_AU04pres: fac_AU04pres
|
||||
fac_AU05pres: fac_AU05pres
|
||||
fac_AU06pres: fac_AU06pres
|
||||
fac_AU07pres: fac_AU07pres
|
||||
fac_AU09pres: fac_AU09pres
|
||||
fac_AU10pres: fac_AU10pres
|
||||
fac_AU12pres: fac_AU12pres
|
||||
fac_AU14pres: fac_AU14pres
|
||||
fac_AU15pres: fac_AU15pres
|
||||
fac_AU17pres: fac_AU17pres
|
||||
fac_AU20pres: fac_AU20pres
|
||||
fac_AU23pres: fac_AU23pres
|
||||
fac_AU25pres: fac_AU25pres
|
||||
fac_AU26pres: fac_AU26pres
|
||||
fac_AU28pres: fac_AU28pres
|
||||
fac_AU45pres: fac_AU45pres
|
||||
|
||||
#Verbal markers
|
||||
aco_int: aco_int
|
||||
aco_ff: aco_ff
|
||||
aco_voiceLabel: aco_voicelabel
|
||||
aco_hnr: aco_hnr
|
||||
aco_gne: aco_gne
|
||||
aco_fm1: aco_fm1
|
||||
aco_fm2: aco_fm2
|
||||
aco_fm3: aco_fm3
|
||||
aco_fm4: aco_fm4
|
||||
aco_jitter: aco_jitter
|
||||
aco_shimmer: aco_shimmer
|
||||
aco_mfcc1: aco_mfcc1
|
||||
aco_mfcc2: aco_mfcc2
|
||||
aco_mfcc3: aco_mfcc3
|
||||
aco_mfcc4: aco_mfcc4
|
||||
aco_mfcc5: aco_mfcc5
|
||||
aco_mfcc6: aco_mfcc6
|
||||
aco_mfcc7: aco_mfcc7
|
||||
aco_mfcc8: aco_mfcc8
|
||||
aco_mfcc9: aco_mfcc9
|
||||
aco_mfcc10: aco_mfcc10
|
||||
aco_mfcc11: aco_mfcc11
|
||||
aco_mfcc12: aco_mfcc12
|
||||
aco_voiceFrame: aco_voiceframe
|
||||
aco_totVoiceFrame: aco_totvoiceframe
|
||||
aco_voicePct: aco_voicepct
|
||||
aco_pausetime: aco_pausetime
|
||||
aco_totaltime: aco_totaltime
|
||||
aco_speakingtime: aco_speakingtime
|
||||
aco_numpauses: aco_numpauses
|
||||
aco_pausefrac: aco_pausefrac
|
||||
|
||||
#Movement markers
|
||||
head_vel: mov_headvel
|
||||
mov_blink_ear: mov_blink_ear
|
||||
vid_dur: vid_dur
|
||||
fps: fps
|
||||
mov_blinkframes: mov_blinkframes
|
||||
mov_blinkdur: mov_blinkdur
|
||||
mov_Hpose_Pitch: mov_hposepitch
|
||||
mov_Hpose_Yaw: mov_hposeyaw
|
||||
mov_Hpose_Roll: mov_hposeroll
|
||||
mov_Hpose_Dist: mov_hposedist
|
||||
|
||||
mov_freq_trem_freq: mov_freqtremfreq
|
||||
mov_freq_trem_index: mov_freqtremindex
|
||||
mov_freq_trem_pindex: mov_freqtrempindex
|
||||
mov_amp_trem_freq: mov_amptremfreq
|
||||
mov_amp_trem_index: mov_amptremindex
|
||||
mov_amp_trem_pindex: mov_amptrempindex
|
||||
|
||||
fac_tremor_median_5: fac_tremor_median_5
|
||||
fac_tremor_median_12: fac_tremor_median_12
|
||||
fac_tremor_median_8: fac_tremor_median_8
|
||||
fac_tremor_median_48: fac_tremor_median_48
|
||||
fac_tremor_median_54: fac_tremor_median_54
|
||||
fac_tremor_median_28: fac_tremor_median_28
|
||||
fac_tremor_median_51: fac_tremor_median_51
|
||||
fac_tremor_median_66: fac_tremor_median_66
|
||||
fac_tremor_median_57: fac_tremor_median_57
|
||||
|
||||
mov_leye_x: mov_lefteyex
|
||||
mov_leye_y: mov_lefteyey
|
||||
mov_leye_z: mov_lefteyez
|
||||
mov_reye_x: mov_righteyex
|
||||
mov_reye_y: mov_righteyey
|
||||
mov_reye_z: mov_righteyez
|
||||
mov_eleft_disp: mov_leyedisp
|
||||
mov_eright_disp: mov_reyedisp
|
||||
|
||||
#NLP markers
|
||||
nlp_transcribe: nlp_transcribe
|
||||
nlp_numSentences: nlp_numSentences
|
||||
nlp_singPronPerAns: nlp_singPronPerAns
|
||||
nlp_singPronPerSen: nlp_singPronPerSen
|
||||
nlp_pastTensePerAns: nlp_pastTensePerAns
|
||||
nlp_pastTensePerSen: nlp_pastTensePerSen
|
||||
nlp_pronounsPerAns: nlp_pronounsPerAns
|
||||
nlp_pronounsPerSen: nlp_pronounsPerSen
|
||||
nlp_verbsPerAns: nlp_verbsPerAns
|
||||
nlp_verbsPerSen: nlp_verbsPerSen
|
||||
nlp_adjectivesPerAns: nlp_adjectivesPerAns
|
||||
nlp_adjectivesPerSen: nlp_adjectivesPerSen
|
||||
nlp_nounsPerAns: nlp_nounsPerAns
|
||||
nlp_nounsPerSen: nlp_nounsPerSen
|
||||
nlp_sentiment_mean: nlp_sentiment_mean
|
||||
nlp_mattr: nlp_mattr
|
||||
nlp_wordsPerMin: nlp_wordsPerMin
|
||||
nlp_totalTime: nlp_totalTime
|
||||
|
||||
|
||||
|
||||
422
opendbm/resources/libraries/voice_tremor.praat
Normal file
422
opendbm/resources/libraries/voice_tremor.praat
Normal file
@@ -0,0 +1,422 @@
|
||||
|
||||
######################################
|
||||
# Global Settings
|
||||
######################################
|
||||
sourcedirec$ = "./"; directory of sounds to be analyzed
|
||||
minPi = 60; minimal Pitch [Hz]
|
||||
maxPi = 350; maximal Pitch [Hz]
|
||||
ts = 0.015; analysis time step [s]
|
||||
tremthresh = 0.15; minimal autocorr.-coefficient to assume "tremor"
|
||||
minTr = 1.5; minimal tremor frequency [Hz]
|
||||
maxTr = 15; maximal tremor frequency [Hz]
|
||||
|
||||
|
||||
|
||||
######################################
|
||||
# Sound (.wav) in, results (.txt) out
|
||||
######################################
|
||||
|
||||
# record/load and select the sound to be analyzed!!!
|
||||
|
||||
info$ = Info
|
||||
name$ = extractWord$(info$, "Object name: ")
|
||||
|
||||
slength = Get total duration
|
||||
call ftrem
|
||||
call atrem
|
||||
|
||||
echo
|
||||
...{"FTrF": 'ftrf:2#', "ATrF":'atrf:2',"FTrI":'ftri:3',"ATrI":'atri:3',"FTrP":'ftrp:3',"ATrP":'atrp:3'}
|
||||
|
||||
|
||||
|
||||
######################################
|
||||
# Frequency Tremor Analysis
|
||||
######################################
|
||||
procedure ftrem
|
||||
To Pitch (cc)... ts minPi 15 yes 0.03 0.3 0.01 0.35 0.14 maxPi
|
||||
|
||||
#Edit
|
||||
#pause
|
||||
|
||||
# because PRAAT only runs "Subtract linear fit" if the last frame is "voiceless" (!?):
|
||||
# numberOfFrames+1 (1)
|
||||
numberOfFrames = Get number of frames
|
||||
x1 = Get time from frame number... 1
|
||||
am_F0 = Get mean... 0 0 Hertz
|
||||
|
||||
Create Matrix... ftrem_0 0 slength numberOfFrames+1 ts x1 1 1 1 1 1 0
|
||||
for i from 1 to numberOfFrames
|
||||
select Pitch 'name$'
|
||||
f0 = Get value in frame... i Hertz
|
||||
select Matrix ftrem_0
|
||||
# write zeros to matrix where frames are voiceless
|
||||
if f0 = undefined
|
||||
Set value... 1 i 0
|
||||
else
|
||||
Set value... 1 i f0
|
||||
endif
|
||||
endfor
|
||||
|
||||
# remove the linear F0 trend (F0 declination)
|
||||
To Pitch
|
||||
Subtract linear fit... Hertz
|
||||
Rename... ftrem_0_lin
|
||||
|
||||
# undo (1)
|
||||
Create Matrix... ftrem 0 slength numberOfFrames ts x1 1 1 1 1 1 0
|
||||
for i from 1 to numberOfFrames
|
||||
select Pitch ftrem_0_lin
|
||||
f0 = Get value in frame... i Hertz
|
||||
select Matrix ftrem
|
||||
# write zeros to matrix where frames are voiceless
|
||||
if f0 = undefined
|
||||
Set value... 1 i 0
|
||||
else
|
||||
Set value... 1 i f0
|
||||
endif
|
||||
endfor
|
||||
|
||||
To Pitch
|
||||
|
||||
# normalize F0-contour by mean F0
|
||||
select Matrix ftrem
|
||||
Formula... (self-am_F0)/am_F0
|
||||
|
||||
# since zeros in the Matrix (unvoiced frames) become normalized to -1 but
|
||||
# unvoiced frames should be zero (if anything)
|
||||
# write zeros to matrix where frames are voiceless
|
||||
for i from 1 to numberOfFrames
|
||||
select Pitch ftrem
|
||||
f0 = Get value in frame... i Hertz
|
||||
if f0 = undefined
|
||||
select Matrix ftrem
|
||||
Set value... 1 i 0
|
||||
endif
|
||||
endfor
|
||||
|
||||
# to calculate autocorrelation (cc-method):
|
||||
select Matrix ftrem
|
||||
To Sound (slice)... 1
|
||||
# calculate Frequency of Frequency Tremor [Hz]
|
||||
To Pitch (cc)... slength minTr 15 yes 0.01 tremthresh 0.01 0.35 0.14 maxTr
|
||||
Rename... ftrem_norm
|
||||
|
||||
ftrf = Get mean... 0 0 Hertz
|
||||
|
||||
# calculate Intensity Index of Frequency Tremor [%]
|
||||
select Sound ftrem
|
||||
plus Pitch ftrem_norm
|
||||
To PointProcess (peaks)... yes no
|
||||
Rename... Maxima
|
||||
numberofMaxPoints = Get number of points
|
||||
ftri_max = 0
|
||||
noFMax = 0
|
||||
for iPoint from 1 to numberofMaxPoints
|
||||
select PointProcess Maxima
|
||||
ti = Get time from index... iPoint
|
||||
select Sound ftrem
|
||||
ftri_Point = Get value at time... Average ti Sinc70
|
||||
if ftri_Point = undefined
|
||||
ftri_Point = 0
|
||||
noFMax += 1
|
||||
endif
|
||||
ftri_max += abs(ftri_Point)
|
||||
endfor
|
||||
|
||||
select Sound ftrem
|
||||
plus PointProcess Maxima
|
||||
#Edit
|
||||
#pause
|
||||
|
||||
# ftri_max:= (mean) procentual deviation of F0-maxima from mean F0 at ftrf
|
||||
numberofMaxima = numberofMaxPoints - noFMax
|
||||
ftri_max = 100 * ftri_max/numberofMaxima
|
||||
|
||||
select Sound ftrem
|
||||
plus Pitch ftrem_norm
|
||||
To PointProcess (peaks)... no yes
|
||||
Rename... Minima
|
||||
numberofMinPoints = Get number of points
|
||||
ftri_min = 0
|
||||
noFMin = 0
|
||||
for iPoint from 1 to numberofMinPoints
|
||||
select PointProcess Minima
|
||||
ti = Get time from index... iPoint
|
||||
select Sound ftrem
|
||||
ftri_Point = Get value at time... Average ti Sinc70
|
||||
if ftri_Point = undefined
|
||||
ftri_Point = 0
|
||||
noFMin += 1
|
||||
endif
|
||||
ftri_min += abs(ftri_Point)
|
||||
endfor
|
||||
|
||||
select Sound ftrem
|
||||
plus PointProcess Minima
|
||||
#Edit
|
||||
#pause
|
||||
|
||||
|
||||
# ftri_min:= (mean) procentual deviation of F0-minima from mean F0 at ftrf
|
||||
numberofMinima = numberofMinPoints - noFMin
|
||||
ftri_min = 100 * ftri_min/numberofMinima
|
||||
|
||||
ftri = (ftri_max + ftri_min) / 2
|
||||
|
||||
ftrp = ftri * ftrf/(ftrf+1)
|
||||
|
||||
# uncomment to inspect frequnecy tremor objects:
|
||||
# pause
|
||||
|
||||
select Pitch ftrem
|
||||
# uncomment if only frequency tremor is to be analyzed:
|
||||
# plus Pitch 'name$'
|
||||
plus Matrix ftrem_0
|
||||
plus Pitch ftrem_0
|
||||
plus Pitch ftrem_0_lin
|
||||
plus Matrix ftrem
|
||||
plus Sound ftrem
|
||||
plus Pitch ftrem_norm
|
||||
plus PointProcess Maxima
|
||||
plus PointProcess Minima
|
||||
Remove
|
||||
|
||||
endproc
|
||||
|
||||
|
||||
######################################
|
||||
# Amplitude Tremor Analysis
|
||||
######################################
|
||||
procedure atrem
|
||||
select Sound 'name$'
|
||||
# uncomment if only amplitude tremor is to be analyzed:
|
||||
# To Pitch (cc)... ts minPi 15 yes 0.03 0.3 0.01 0.35 0.14 maxPi
|
||||
# select Sound 'name$'
|
||||
plus Pitch 'name$'
|
||||
To PointProcess (cc)
|
||||
select Sound 'name$'
|
||||
plus PointProcess 'name$'_'name$'
|
||||
|
||||
# amplitudes are integrals of intensity over periods -- not intensity maxima
|
||||
To AmplitudeTier (period)... 0 0 0.0001 0.02 1.7
|
||||
|
||||
#Edit
|
||||
#pause
|
||||
|
||||
# from here on out: prepare to autocorrelate AmplitudeTier-data
|
||||
# sample AmplitudeTier at (constant) rate ts
|
||||
numbOfAmpPoints = Get number of points
|
||||
first_ampP = Get time from index... 1
|
||||
last_ampP = Get time from index... numbOfAmpPoints
|
||||
|
||||
# to be able to -- automatically -- read Amp. values...
|
||||
Down to TableOfReal
|
||||
|
||||
select Pitch 'name$'
|
||||
frameNo1 = Get frame number from time... first_ampP
|
||||
hiframe1 = ceiling(frameNo1)
|
||||
t_hiframe1 = Get time from frame number... hiframe1
|
||||
|
||||
frameNoN = Get frame number from time... last_ampP
|
||||
loframeN = floor(frameNoN)
|
||||
|
||||
# number of Amp. points if (re-)sampled at ts
|
||||
numbOfPoints_neu = loframeN - hiframe1 + 1
|
||||
|
||||
# to enable autocorrelation of the Amp.-contour: ->Matrix->Sound
|
||||
|
||||
Create Matrix... atrem_nlc 0 slength numbOfPoints_neu+1 ts t_hiframe1 1 1 1 1 1 2
|
||||
# get the mean of the amplitude contour in time windows of constant duration
|
||||
for point_neu from 1 to numbOfPoints_neu
|
||||
t = (point_neu-1) * ts + t_hiframe1
|
||||
tl = t - ts/2
|
||||
tu = t + ts/2
|
||||
|
||||
select AmplitudeTier 'name$'_'name$'_'name$'
|
||||
loil = Get low index from time... tl
|
||||
hiil = Get high index from time... tl
|
||||
loiu = Get low index from time... tu
|
||||
hiiu = Get high index from time... tu
|
||||
|
||||
select TableOfReal 'name$'_'name$'_'name$'
|
||||
if loil = 0
|
||||
lotl = 0; time before the first amp. point
|
||||
druck_lol = Get value... hiil 2; amplitude value before the first amp. point
|
||||
else
|
||||
lotl = Get value... loil 1; time value of Amp.Point before tl in the PointProcess [s]
|
||||
druck_lol = Get value... loil 2; amplitude value before tl in the PointProcess [Pa, ranged from 0 to 1]
|
||||
endif
|
||||
|
||||
hitl = Get value... hiil 1
|
||||
druck_hil = Get value... hiil 2; amplitude value after tl in the PointProcess
|
||||
|
||||
lotu = Get value... loiu 1
|
||||
druck_lou = Get value... loiu 2; amplitude value before tu in the PointProcess
|
||||
|
||||
if hiiu = numbOfAmpPoints + 1
|
||||
hitu = slength; time after the last amp. point
|
||||
druck_hiu = Get value... hiil 2; amplitude value after the last amp. point
|
||||
else
|
||||
hitu = Get value... hiiu 1; time value after tu in the PointProcess
|
||||
druck_hiu = Get value... hiiu 2; amplitude value after tu in the PointProcess
|
||||
endif
|
||||
|
||||
nPinter = loiu - loil; = hiiu - hiil; number of amp.-points between tl and tu
|
||||
if nPinter > 0
|
||||
itinter = 0
|
||||
tinter = 0
|
||||
druck_tin = 0
|
||||
deltat = 0
|
||||
for iinter from 1 to nPinter
|
||||
hilft = itinter
|
||||
itinter = Get value... loil+iinter 1
|
||||
idruck_tin = Get value... loil+iinter 2
|
||||
|
||||
ideltat = itinter - hilft
|
||||
druck_tin += idruck_tin * ideltat
|
||||
tinter += itinter
|
||||
deltat += ideltat
|
||||
endfor
|
||||
|
||||
tin = tinter/nPinter
|
||||
druck_tin = druck_tin/deltat
|
||||
endif
|
||||
|
||||
druck_tl = ((hitl-tl)*druck_lol + (tl-lotl)*druck_hil) / (hitl-lotl)
|
||||
druck_tu = ((hitu-tu)*druck_lou + (tu-lotu)*druck_hiu) / (hitu-lotu)
|
||||
|
||||
if nPinter = 0; loil = loiu; hiil = hiiu
|
||||
druck_mean = (druck_tl + druck_tu) / 2
|
||||
else
|
||||
druck_mean = ((tin-tl)*(druck_tl + druck_tin)/2 + (tu-tin)*(druck_tin + druck_tu)/2) / (tu-tl)
|
||||
endif
|
||||
|
||||
select Matrix atrem_nlc
|
||||
Set value... 1 point_neu druck_mean
|
||||
endfor
|
||||
|
||||
To Pitch
|
||||
am_Int = Get mean... 0 0 Hertz
|
||||
|
||||
# because PRAAT classifies frequencies in Pitch objects <=0 as "voiceless" and
|
||||
# therefore parts with extreme INTENSITIES would be considered as "voiceless"
|
||||
# (irrelevant) after "Subtract linear fit" (1)
|
||||
# "1" is added to the original Pa-values (ranged from 0 to 1)
|
||||
select Matrix atrem_nlc
|
||||
Formula... self+1
|
||||
|
||||
# because PRAAT only runs "Subtract linear fit" if the last frame is "voiceless"...?(2)
|
||||
Set value... 1 numbOfPoints_neu+1 0
|
||||
|
||||
# remove the linear amp.-trend (amplitude declination)
|
||||
#Formula... self*1000; better for viewing
|
||||
To Pitch
|
||||
Rename... hilf_lincorr
|
||||
Subtract linear fit... Hertz
|
||||
Rename... atrem
|
||||
|
||||
# undo (1)...
|
||||
To Matrix
|
||||
Formula... self-1
|
||||
|
||||
# normalize Amp. contour by mean Amp.
|
||||
Formula... (self-am_Int)/am_Int
|
||||
|
||||
# remove last frame, undo (2)
|
||||
Create Matrix... atrem_besser 0 slength numbOfPoints_neu ts t_hiframe1 1 1 1 1 1 0
|
||||
for point_neu from 1 to numbOfPoints_neu
|
||||
select Matrix atrem
|
||||
spring = Get value in cell... 1 point_neu
|
||||
select Matrix atrem_besser
|
||||
Set value... 1 point_neu spring
|
||||
endfor
|
||||
|
||||
# to calculate autocorrelation (cc-method)
|
||||
To Sound (slice)... 1
|
||||
# calculate Frequency of Ampitude Tremor [Hz]
|
||||
To Pitch (cc)... slength minTr 15 yes 0.01 tremthresh 0.01 0.35 0.14 maxTr
|
||||
Rename... atrem_norm
|
||||
|
||||
atrf = Get mean... 0 0 Hertz
|
||||
|
||||
# calculate Intensity Index of Amplitude Tremor [%]
|
||||
select Sound atrem_besser
|
||||
plus Pitch atrem_norm
|
||||
To PointProcess (peaks)... yes no
|
||||
Rename... Maxima
|
||||
numberofMaxPoints = Get number of points
|
||||
atri_max = 0
|
||||
noAMax = 0
|
||||
for iPoint from 1 to numberofMaxPoints
|
||||
select PointProcess Maxima
|
||||
ti = Get time from index... iPoint
|
||||
select Sound atrem_besser
|
||||
atri_Point = Get value at time... 0 ti Sinc70
|
||||
if atri_Point = undefined
|
||||
atri_Point = 0
|
||||
noAMax += 1
|
||||
endif
|
||||
atri_max += abs(atri_Point)
|
||||
endfor
|
||||
|
||||
select Sound atrem_besser
|
||||
plus PointProcess Maxima
|
||||
#Edit
|
||||
#pause
|
||||
|
||||
# atri_max:= (mean) procentual deviation of Amp. maxima from mean Amp.[Pa] at atrf
|
||||
numberofMaxima = numberofMaxPoints - noAMax
|
||||
atri_max = 100 * atri_max / numberofMaxima
|
||||
|
||||
select Sound atrem_besser
|
||||
plus Pitch atrem_norm
|
||||
To PointProcess (peaks)... no yes
|
||||
Rename... Minima
|
||||
numberofMinPoints = Get number of points
|
||||
atri_min = 0
|
||||
noAMin = 0
|
||||
for iPoint from 1 to numberofMinPoints
|
||||
select PointProcess Minima
|
||||
ti = Get time from index... iPoint
|
||||
select Sound atrem_besser
|
||||
atri_Point = Get value at time... 0 ti Sinc70
|
||||
if atri_Point = undefined
|
||||
atri_Point = 0
|
||||
noAMin += 1
|
||||
endif
|
||||
atri_min += abs(atri_Point)
|
||||
endfor
|
||||
|
||||
select Sound atrem_besser
|
||||
plus PointProcess Minima
|
||||
#Edit
|
||||
#pause
|
||||
|
||||
# atri_min:= (mean) procentual deviation of Amp. minima from mean Amp.[Pa] at atrf
|
||||
numberofMinima = numberofMinPoints - noAMin
|
||||
atri_min = 100 * atri_min / numberofMinima
|
||||
|
||||
atri = (atri_max + atri_min) / 2
|
||||
|
||||
atrp = atri * atrf/(atrf+1)
|
||||
|
||||
# uncomment to inspect amplitude tremor objects:
|
||||
# pause
|
||||
|
||||
select Pitch 'name$'
|
||||
plus PointProcess 'name$'_'name$'
|
||||
plus AmplitudeTier 'name$'_'name$'_'name$'
|
||||
plus TableOfReal 'name$'_'name$'_'name$'
|
||||
plus Matrix atrem_nlc
|
||||
plus Pitch atrem_nlc
|
||||
plus Pitch hilf_lincorr
|
||||
plus Pitch atrem
|
||||
plus Matrix atrem
|
||||
plus Matrix atrem_besser
|
||||
plus Sound atrem_besser
|
||||
plus Pitch atrem_norm
|
||||
plus PointProcess Maxima
|
||||
plus PointProcess Minima
|
||||
Remove
|
||||
endproc
|
||||
28
opendbm/resources/services/face_util.yml
Normal file
28
opendbm/resources/services/face_util.yml
Normal file
@@ -0,0 +1,28 @@
|
||||
cdx_face_config:
|
||||
ACTION_UNITS: [[6, 12],[1, 4, 15],[1, 2, 5, 26],[1, 2, 4, 5, 7, 20, 26],[4, 5, 7, 23],[9, 15],[12, 14]]
|
||||
LOWER_ACTION_UNITS: [[12], [15], [26], [20, 26], [23], [15], [12, 14]]
|
||||
UPPER_ACTION_UNITS: [[6], [1, 4], [1, 2, 5], [1, 2, 4, 5, 7], [4, 5, 7], [9]]
|
||||
NEG_ACTION_UNITS: [[1, 4, 15], [1, 2, 4, 5, 7, 20, 26], [4, 5, 7, 23], [9, 15], [12, 14]]
|
||||
POS_ACTION_UNITS: [[6, 12]]
|
||||
NET_ACTION_UNITS: [[1, 2, 5, 26]]
|
||||
happiness: [[6, 12]]
|
||||
sadness: [[1, 4, 15]]
|
||||
surprise: [[1, 2, 5, 26]]
|
||||
fear: [[1, 2, 4, 5, 7, 20, 26]]
|
||||
anger: [[4, 5, 7, 23]]
|
||||
disgust: [[9, 15]]
|
||||
contempt: [[12, 14]]
|
||||
pain: [[4, 6, 7, 9, 10, 12, 20, 26]]
|
||||
CAI: [[6, 12],[1, 4, 15],[2, 5, 26],[7, 20, 26],[23],[9],[12, 14]]
|
||||
SELECTED_FEATURES: AU,POSE
|
||||
face_expr_dir: /video/face_expressivity
|
||||
face_asym_dir: /video/face_asymmetry
|
||||
AU_filters: ['frame', ' face_id', ' timestamp', ' confidence', ' success', ' AU01_r',' AU02_r',' AU04_r',' AU05_r',
|
||||
' AU06_r', ' AU07_r', ' AU09_r', ' AU10_r', ' AU12_r', ' AU14_r', ' AU15_r', ' AU17_r', ' AU20_r',
|
||||
' AU25_r', ' AU26_r', ' AU45_r', ' AU01_c', ' AU02_c', ' AU04_c', ' AU05_c', ' AU06_c', ' AU07_c',
|
||||
' AU10_c', ' AU12_c', ' AU14_c', ' AU15_c', ' AU17_c', ' AU20_c', ' AU23_c', ' AU25_c', ' AU26_c',
|
||||
' AU28_c', ' AU45_c',' AU09_c',' AU23_r' ]
|
||||
au_intensity: [' AU01_r',' AU02_r',' AU04_r',' AU05_r', ' AU06_r', ' AU07_r', ' AU09_r', ' AU10_r', ' AU12_r',
|
||||
' AU14_r', ' AU15_r', ' AU17_r', ' AU20_r',' AU23_r', ' AU25_r', ' AU26_r', ' AU45_r']
|
||||
au_presence: [' AU01_c', ' AU02_c', ' AU04_c', ' AU05_c', ' AU06_c', ' AU07_c', ' AU09_c', ' AU10_c', ' AU12_c',
|
||||
' AU14_c', ' AU15_c', ' AU17_c', ' AU20_c', ' AU23_c', ' AU25_c', ' AU26_c', ' AU45_c']
|
||||
8
opendbm/resources/services/services.yml
Normal file
8
opendbm/resources/services/services.yml
Normal file
@@ -0,0 +1,8 @@
|
||||
cdx_configuration:
|
||||
input_dir: data/result_CDX/
|
||||
output_dir: data/result_CDX/dbm_client_output/
|
||||
out_derived_dir: data/result_CDX_derived_output/dbm_client_derived_output/
|
||||
open_face_path: pkg/open_dbm/OpenFace/build/bin/FeatureExtraction
|
||||
facial_landmarks: pkg/shape_detector/shape_predictor_68_face_landmarks.dat
|
||||
feature_group: ['facial', 'acoustic', 'movement']
|
||||
|
||||
Reference in New Issue
Block a user