util code refactoring

This commit is contained in:
jordi.hasianta
2022-09-15 20:49:50 +07:00
parent 9223bce123
commit f8818a4047
5 changed files with 560 additions and 301 deletions

View File

@@ -4,9 +4,13 @@ project_name: cdx_analysis
created: 2019-03-16 created: 2019-03-16
author: Deshana Desai author: Deshana Desai
""" """
import sys, os, glob, cv2 import glob
import pandas as pd import os
import sys
import cv2
import numpy as np import numpy as np
import pandas as pd
def euclidean_distance(point1, point2): def euclidean_distance(point1, point2):
@@ -25,8 +29,7 @@ def expand_landmarks(landmarks):
util method to expand landmark list: util method to expand landmark list:
eg: [1,2] -> [['l1_x', 'l1_y'], ['l2_x', 'l2_y']] eg: [1,2] -> [['l1_x', 'l1_y'], ['l2_x', 'l2_y']]
""" """
return [['l{}_x'.format(l), 'l{}_y'.format(l)] for l in landmarks] return [["l{}_x".format(point), "l{}_y".format(point)] for point in landmarks]
def calc_displacement_vec(df, landmarks, num_frames): def calc_displacement_vec(df, landmarks, num_frames):
@@ -44,7 +47,6 @@ def calc_displacement_vec(df, landmarks, num_frames):
first_row = df.iloc[0] first_row = df.iloc[0]
prev_point[j] = (first_row[pair[0]], first_row[pair[1]]) prev_point[j] = (first_row[pair[0]], first_row[pair[1]])
for i in range(num_frames): for i in range(num_frames):
frame_row = df.iloc[i] frame_row = df.iloc[i]
for j, pair in enumerate(landmarks): for j, pair in enumerate(landmarks):

View File

@@ -4,15 +4,15 @@ project_name: DBM
created: 2020-10-11 created: 2020-10-11
""" """
import subprocess
import json import json
import numpy as np
import pandas as pd
import os
import logging import logging
import os
import re
import subprocess
import nltk import nltk
import re import numpy as np
import pandas as pd
from lexicalrichness import LexicalRichness from lexicalrichness import LexicalRichness
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
@@ -26,17 +26,27 @@ def deepspeech(AUDIO_FILE,deep_path):
Returns: Returns:
Text: text which is extracted from audio Text: text which is extracted from audio
""" """
api = 'deepspeech' api = "deepspeech"
arg_speech0 = '--model' arg_speech0 = "--model"
arg_speech_path0 = os.path.join(deep_path, 'deepspeech-0.9.1-models.pbmm') arg_speech_path0 = os.path.join(deep_path, "deepspeech-0.9.1-models.pbmm")
arg_speech1 = '--scorer' arg_speech1 = "--scorer"
arg_speech_path1 = os.path.join(deep_path, 'deepspeech-0.9.1-models.scorer') arg_speech_path1 = os.path.join(deep_path, "deepspeech-0.9.1-models.scorer")
arg_audio = "--audio" arg_audio = "--audio"
out = subprocess.Popen([api, arg_speech0, arg_speech_path0, arg_speech1, arg_speech_path1, arg_audio, AUDIO_FILE], out = subprocess.Popen(
[
api,
arg_speech0,
arg_speech_path0,
arg_speech1,
arg_speech_path1,
arg_audio,
AUDIO_FILE,
],
stdout=subprocess.PIPE, stdout=subprocess.PIPE,
stderr=subprocess.STDOUT) stderr=subprocess.STDOUT,
logger.info('Deepspeech output...... {}'.format(out)) )
logger.info("Deepspeech output...... {}".format(out))
try: try:
stdout, stderr = out.communicate() stdout, stderr = out.communicate()
except: except:
@@ -44,6 +54,7 @@ def deepspeech(AUDIO_FILE,deep_path):
# print(stderr) # print(stderr)
return stdout, stderr return stdout, stderr
def deep_speech_output_clean(result): def deep_speech_output_clean(result):
""" """
Parsing deep speech output(text) Parsing deep speech output(text)
@@ -52,15 +63,16 @@ def deep_speech_output_clean(result):
""" """
text = "" text = ""
if len(result) > 0: if len(result) > 0:
res_split = str(result[0]).split('\\n') res_split = str(result[0]).split("\\n")
if len(res_split) > 0: if len(res_split) > 0:
for i in range(len(res_split)): for i in range(len(res_split)):
if 'Inference took' in res_split[i]: if "Inference took" in res_split[i]:
text = res_split[i + 1] text = res_split[i + 1]
return text return text
return text return text
def process_deepspeech(audio_file, deep_path): def process_deepspeech(audio_file, deep_path):
""" """
Transcribing audio to extract text from speech Transcribing audio to extract text from speech
@@ -70,21 +82,23 @@ def process_deepspeech(audio_file,deep_path):
return deep_text return deep_text
def nltk_download(): def nltk_download():
try: try:
nltk.data.find('tokenizers/punkt') nltk.data.find("tokenizers/punkt")
except LookupError: except LookupError:
logger.info('punkt is not available') logger.info("punkt is not available")
nltk.download('punkt') nltk.download("punkt")
try: try:
nltk.data.find('averaged_perceptron_tagger') nltk.data.find("averaged_perceptron_tagger")
except LookupError: except LookupError:
logger.info('averaged_perceptron_tagger is not available') logger.info("averaged_perceptron_tagger is not available")
nltk.download('averaged_perceptron_tagger') nltk.download("averaged_perceptron_tagger")
def empty_speech(r_config, master_url, error_txt): def empty_speech(r_config, master_url, error_txt):
""" """
@@ -97,17 +111,33 @@ def empty_speech(r_config, master_url, error_txt):
Empty dataframe for speech features with error Empty dataframe for speech features with error
""" """
col = [r_config.nlp_numSentences, r_config.nlp_singPronPerAns, r_config.nlp_singPronPerSen, r_config.nlp_pastTensePerAns, col = [
r_config.nlp_pastTensePerSen, r_config.nlp_pronounsPerAns, r_config.nlp_pronounsPerSen, r_config.nlp_verbsPerAns, r_config.nlp_numSentences,
r_config.nlp_verbsPerSen, r_config.nlp_adjectivesPerAns, r_config.nlp_adjectivesPerSen, r_config.nlp_nounsPerAns, r_config.nlp_singPronPerAns,
r_config.nlp_nounsPerSen, r_config.nlp_sentiment_mean, r_config.nlp_mattr, r_config.nlp_wordsPerMin, r_config.nlp_singPronPerSen,
r_config.nlp_totalTime, r_config.err_reason] r_config.nlp_pastTensePerAns,
r_config.nlp_pastTensePerSen,
r_config.nlp_pronounsPerAns,
r_config.nlp_pronounsPerSen,
r_config.nlp_verbsPerAns,
r_config.nlp_verbsPerSen,
r_config.nlp_adjectivesPerAns,
r_config.nlp_adjectivesPerSen,
r_config.nlp_nounsPerAns,
r_config.nlp_nounsPerSen,
r_config.nlp_sentiment_mean,
r_config.nlp_mattr,
r_config.nlp_wordsPerMin,
r_config.nlp_totalTime,
r_config.err_reason,
]
df_speech = pd.DataFrame([[np.nan] * len(col) + [error_txt]], columns=col) df_speech = pd.DataFrame([[np.nan] * len(col) + [error_txt]], columns=col)
df_speech['dbm_master_url'] = master_url df_speech["dbm_master_url"] = master_url
return df_speech return df_speech
def divide_var(speech_var1, spech_var2): def divide_var(speech_var1, spech_var2):
""" """
divide variables divide variables
@@ -117,6 +147,7 @@ def divide_var(speech_var1, spech_var2):
speech_var = speech_var1 / spech_var2 speech_var = speech_var1 / spech_var2
return speech_var return speech_var
def process_speech(transcribe_df, r_config): def process_speech(transcribe_df, r_config):
""" """
Preparing speech features Preparing speech features
@@ -126,18 +157,18 @@ def process_speech(transcribe_df,r_config):
Returns: Returns:
Dataframe for speech features Dataframe for speech features
""" """
transcribe_df = transcribe_df.replace(np.nan, '', regex=True) transcribe_df = transcribe_df.replace(np.nan, "", regex=True)
err_transcribe = transcribe_df[r_config.err_reason].iloc[0] err_transcribe = transcribe_df[r_config.err_reason].iloc[0]
transcribe = transcribe_df[r_config.nlp_transcribe].iloc[0] transcribe = transcribe_df[r_config.nlp_transcribe].iloc[0]
total_time = transcribe_df[r_config.nlp_totalTime].iloc[0] total_time = transcribe_df[r_config.nlp_totalTime].iloc[0]
master_url = transcribe_df['dbm_master_url'].iloc[0] master_url = transcribe_df["dbm_master_url"].iloc[0]
# clean transcribe # clean transcribe
transcribe = transcribe.replace(",", "") transcribe = transcribe.replace(",", "")
transcribe = " ".join(re.findall(r"[\w']+|[.!?]", transcribe)) transcribe = " ".join(re.findall(r"[\w']+|[.!?]", transcribe))
if err_transcribe != 'Pass': if err_transcribe != "Pass":
df_speech = empty_speech(r_config, master_url, error_txt) df_speech = empty_speech(r_config, master_url, "error")
return df_speech return df_speech
@@ -151,42 +182,68 @@ def process_speech(transcribe_df,r_config):
speech_dict[r_config.nlp_numSentences] = num_sentences speech_dict[r_config.nlp_numSentences] = num_sentences
# nlp_singPron # nlp_singPron
i_s = transcribe.count('I') i_s = transcribe.count("I")
me_s = transcribe.count('me') me_s = transcribe.count("me")
my_s = transcribe.count('my') my_s = transcribe.count("my")
sing_count = i_s + me_s + my_s sing_count = i_s + me_s + my_s
speech_dict[r_config.nlp_singPronPerAns] = sing_count if len(words_all)>0 else np.nan speech_dict[r_config.nlp_singPronPerAns] = (
speech_dict[r_config.nlp_singPronPerSen] = divide_var(speech_dict[r_config.nlp_singPronPerAns], num_sentences) sing_count if len(words_all) > 0 else np.nan
)
speech_dict[r_config.nlp_singPronPerSen] = divide_var(
speech_dict[r_config.nlp_singPronPerAns], num_sentences
)
tagged = nltk.pos_tag(transcribe.split()) tagged = nltk.pos_tag(transcribe.split())
tagged_df = pd.DataFrame(tagged, columns=['word', 'pos_tag']) tagged_df = pd.DataFrame(tagged, columns=["word", "pos_tag"])
# Past tense per answer # Past tense per answer
all_POSs = tagged_df['pos_tag'].tolist() all_POSs = tagged_df["pos_tag"].tolist()
speech_dict[r_config.nlp_pastTensePerAns] = all_POSs.count('VBD') if len(words_all)>0 else np.nan speech_dict[r_config.nlp_pastTensePerAns] = (
speech_dict[r_config.nlp_pastTensePerSen] = divide_var(speech_dict[r_config.nlp_pastTensePerAns], num_sentences) all_POSs.count("VBD") if len(words_all) > 0 else np.nan
)
speech_dict[r_config.nlp_pastTensePerSen] = divide_var(
speech_dict[r_config.nlp_pastTensePerAns], num_sentences
)
# Pronoun per answer # Pronoun per answer
pronounsPerAns = all_POSs.count('PRP') + all_POSs.count('PRP$') pronounsPerAns = all_POSs.count("PRP") + all_POSs.count("PRP$")
speech_dict[r_config.nlp_pronounsPerAns] = pronounsPerAns if len(words_all)>0 else np.nan speech_dict[r_config.nlp_pronounsPerAns] = (
speech_dict[r_config.nlp_pronounsPerSen] = divide_var(speech_dict[r_config.nlp_pronounsPerAns], num_sentences) pronounsPerAns if len(words_all) > 0 else np.nan
)
speech_dict[r_config.nlp_pronounsPerSen] = divide_var(
speech_dict[r_config.nlp_pronounsPerAns], num_sentences
)
# Verb per answer # Verb per answer
verbPerAns = all_POSs.count('VB') + all_POSs.count('VBD') + all_POSs.count('VBG') \ verbPerAns = (
+ all_POSs.count('VBN') + all_POSs.count('VBP') + all_POSs.count('VBZ') all_POSs.count("VB")
+ all_POSs.count("VBD")
+ all_POSs.count("VBG")
+ all_POSs.count("VBN")
+ all_POSs.count("VBP")
+ all_POSs.count("VBZ")
)
speech_dict[r_config.nlp_verbsPerAns] = verbPerAns if len(words_all) > 0 else np.nan speech_dict[r_config.nlp_verbsPerAns] = verbPerAns if len(words_all) > 0 else np.nan
speech_dict[r_config.nlp_verbsPerSen] = divide_var(speech_dict[r_config.nlp_verbsPerAns], num_sentences) speech_dict[r_config.nlp_verbsPerSen] = divide_var(
speech_dict[r_config.nlp_verbsPerAns], num_sentences
)
# Adjective per answer # Adjective per answer
adjectivesAns = all_POSs.count('JJ') + all_POSs.count('JJR') + all_POSs.count('JJS') adjectivesAns = all_POSs.count("JJ") + all_POSs.count("JJR") + all_POSs.count("JJS")
speech_dict[r_config.nlp_adjectivesPerAns] = adjectivesAns if len(words_all) > 0 else np.nan speech_dict[r_config.nlp_adjectivesPerAns] = (
speech_dict[r_config.nlp_adjectivesPerSen] = divide_var(speech_dict[r_config.nlp_adjectivesPerAns], num_sentences) adjectivesAns if len(words_all) > 0 else np.nan
)
speech_dict[r_config.nlp_adjectivesPerSen] = divide_var(
speech_dict[r_config.nlp_adjectivesPerAns], num_sentences
)
# Noun per answer # Noun per answer
nounsAns = all_POSs.count('NN') + all_POSs.count('NNP') + all_POSs.count('NNS') nounsAns = all_POSs.count("NN") + all_POSs.count("NNP") + all_POSs.count("NNS")
speech_dict[r_config.nlp_nounsPerAns] = nounsAns if len(words_all) > 0 else np.nan speech_dict[r_config.nlp_nounsPerAns] = nounsAns if len(words_all) > 0 else np.nan
speech_dict[r_config.nlp_nounsPerSen] = divide_var(speech_dict[r_config.nlp_nounsPerAns], num_sentences) speech_dict[r_config.nlp_nounsPerSen] = divide_var(
speech_dict[r_config.nlp_nounsPerAns], num_sentences
)
# Sentiment analysis # Sentiment analysis
vader = SentimentIntensityAnalyzer() vader = SentimentIntensityAnalyzer()
@@ -194,19 +251,23 @@ def process_speech(transcribe_df,r_config):
for s in sentences: for s in sentences:
sentiment_dict = vader.polarity_scores(s) sentiment_dict = vader.polarity_scores(s)
sentence_valences.append(sentiment_dict['compound']) sentence_valences.append(sentiment_dict["compound"])
speech_dict[r_config.nlp_sentiment_mean] = np.mean(sentence_valences) if len(sentence_valences) > 0 else np.nan speech_dict[r_config.nlp_sentiment_mean] = (
non_punc = list(value for value in words_all if value not in ['.','!','?']) np.mean(sentence_valences) if len(sentence_valences) > 0 else np.nan
)
non_punc = list(value for value in words_all if value not in [".", "!", "?"])
non_punc_as_str = " ".join(str(non_punc)) non_punc_as_str = " ".join(str(non_punc))
lex = LexicalRichness(non_punc_as_str) lex = LexicalRichness(non_punc_as_str)
speech_dict[r_config.nlp_mattr] = lex.mattr(window_size=lex.words) if lex.words > 0 else np.nan speech_dict[r_config.nlp_mattr] = (
lex.mattr(window_size=lex.words) if lex.words > 0 else np.nan
)
# Number of words per minute # Number of words per minute
speech_dict[r_config.nlp_wordsPerMin] = divide_var(len(non_punc), total_time) * 60 speech_dict[r_config.nlp_wordsPerMin] = divide_var(len(non_punc), total_time) * 60
speech_dict[r_config.nlp_totalTime] = total_time speech_dict[r_config.nlp_totalTime] = total_time
speech_dict['dbm_master_url'] = master_url speech_dict["dbm_master_url"] = master_url
df_speech = pd.DataFrame([speech_dict]) df_speech = pd.DataFrame([speech_dict])
return df_speech return df_speech

View File

@@ -4,11 +4,65 @@ project_name: DBM
created: 2020-20-07 created: 2020-20-07
""" """
import os
import glob import glob
import numpy as np import os
import subprocess import subprocess
import more_itertools as mit
import numpy as np
import pandas as pd
def get_length(filename):
result = subprocess.run(
[
"ffprobe",
"-v",
"error",
"-show_entries",
"format=duration",
"-of",
"default=noprint_wrappers=1:nokey=1",
filename,
],
stdout=subprocess.PIPE,
stderr=subprocess.STDOUT,
stdin=subprocess.DEVNULL,
)
return float(result.stdout)
def process_segment_pitch(ff_df, r_config):
voice_label = ff_df[r_config.aco_voiceLabel]
indices_yes = [i for i, x in enumerate(voice_label) if x == "yes"]
voiced_yes = [list(group) for group in mit.consecutive_groups(indices_yes)]
indices_no = [i for i, x in enumerate(voice_label) if x == "no"]
voiced_no = [list(group) for group in mit.consecutive_groups(indices_no)]
com_speech = voiced_yes + voiced_no
com_speech_sort = sorted(com_speech, key=lambda x: x[0])
return com_speech_sort, voiced_yes, voiced_no
def segment_pitch(dir_path, r_config, ff_df=None):
"""
segmenting pitch freq for each voice segment
"""
com_speech_sort, voiced_yes, voiced_no = ([],) * 3
for file in os.listdir(dir_path):
try:
if file.endswith("_pitch.csv") and ff_df is None:
ff_df = pd.read_csv((dir_path + "/" + file))
com_speech_sort, voiced_yes, voiced_no
except:
pass
return com_speech_sort, voiced_yes, voiced_no
def filter_path(video_url, out_dir): def filter_path(video_url, out_dir):
""" """
@@ -25,6 +79,7 @@ def filter_path(video_url, out_dir):
out_loc = os.path.join(out_dir, fl_name) out_loc = os.path.join(out_dir, fl_name)
return input_loc, out_loc, fl_name return input_loc, out_loc, fl_name
def save_output(df, out_loc, fl_name, f_dir, f_ext): def save_output(df, out_loc, fl_name, f_dir, f_ext):
""" """
creating output directory for Audio features creating output directory for Audio features
@@ -44,6 +99,7 @@ def save_output(df, out_loc, fl_name, f_dir, f_ext):
sav_path = os.path.join(dir_path, full_f_name) sav_path = os.path.join(dir_path, full_f_name)
df.to_csv(sav_path, index=False) df.to_csv(sav_path, index=False)
def audio_process(base_dir, video_url): def audio_process(base_dir, video_url):
""" """
Parsing cleaned audio files(Audio files without IMA voice) Parsing cleaned audio files(Audio files without IMA voice)
@@ -51,19 +107,22 @@ def audio_process(base_dir,video_url):
base_dir: Base path for raw data base_dir: Base path for raw data
video_url: Raw video file path video_url: Raw video file path
""" """
new_video_url = base_dir+'/'.join(video_url[2:]) new_video_url = base_dir + "/".join(video_url[2:])
split_val = new_video_url.split('/') split_val = new_video_url.split("/")
wav_path = '/'.join(split_val[0:len(split_val)-1]) wav_path = "/".join(split_val[0 : len(split_val) - 1])
audio_split_check = glob.glob(wav_path + '/*_split.wav') audio_split_check = glob.glob(wav_path + "/*_split.wav")
return audio_split_check return audio_split_check
def compute_open_face_features(input_filepath,
def compute_open_face_features(
input_filepath,
output_directory, output_directory,
open_face_executable, open_face_executable,
au_static=False, au_static=False,
tracked_visualization=False, tracked_visualization=False,
clobber=False, clobber=False,
verbose=True): verbose=True,
):
""" """
Runs OpenFace on an input video. Runs OpenFace on an input video.
See https://github.com/TadasBaltrusaitis/OpenFace/wiki/Command-line-arguments See https://github.com/TadasBaltrusaitis/OpenFace/wiki/Command-line-arguments
@@ -82,31 +141,43 @@ def compute_open_face_features(input_filepath,
""" """
if not os.path.isfile(open_face_executable): if not os.path.isfile(open_face_executable):
raise IOError("OpenFace executable {} could not be found.".format(open_face_executable)) raise IOError(
"OpenFace executable {} could not be found.".format(open_face_executable)
)
bn, _ = os.path.splitext(os.path.basename(input_filepath)) bn, _ = os.path.splitext(os.path.basename(input_filepath))
if not output_directory: if not output_directory:
output_directory = os.path.join(os.path.dirname(input_filepath), bn + '_openface') output_directory = os.path.join(
os.path.dirname(input_filepath), bn + "_openface"
)
output_csv = os.path.join(output_directory, bn + '.csv') output_csv = os.path.join(output_directory, bn + ".csv")
if not os.path.isfile(output_csv) or clobber: if not os.path.isfile(output_csv) or clobber:
call = [open_face_executable, ] call = [
open_face_executable,
]
if au_static: if au_static:
call += ['-au_static', ] call += [
"-au_static",
]
if tracked_visualization: if tracked_visualization:
call += ['-tracked', ] call += [
"-tracked",
]
call += ['-q', '-2Dfp', '-3Dfp', '-pdmparams', '-pose', '-aus', '-gaze'] call += ["-q", "-2Dfp", "-3Dfp", "-pdmparams", "-pose", "-aus", "-gaze"]
call += ['-f', input_filepath, '-out_dir', output_directory] call += ["-f", input_filepath, "-out_dir", output_directory]
if verbose: if verbose:
print('Computing OpenFace features {} from video file'.format(input_filepath)) print(
"Computing OpenFace features {} from video file".format(input_filepath)
)
subprocess.check_output(call) subprocess.check_output(call)
if verbose: if verbose:
print('OpenFace features saved to {}'.format(output_directory)) print("OpenFace features saved to {}".format(output_directory))
else: else:
if verbose: if verbose:
print('Output file {} already exists'.format(output_csv)) print("Output file {} already exists".format(output_csv))
return os.path.join(output_directory, bn + '.csv') return os.path.join(output_directory, bn + ".csv")

View File

@@ -10,11 +10,12 @@ import contextlib
import sys import sys
import wave import wave
def read_wave(path): def read_wave(path):
"""Reads a .wav file. """Reads a .wav file.
Takes the path, and returns (PCM audio data, sample rate). Takes the path, and returns (PCM audio data, sample rate).
""" """
with contextlib.closing(wave.open(path, 'rb')) as wf: with contextlib.closing(wave.open(path, "rb")) as wf:
num_channels = wf.getnchannels() num_channels = wf.getnchannels()
assert num_channels == 1 assert num_channels == 1
sample_width = wf.getsampwidth() sample_width = wf.getsampwidth()
@@ -27,11 +28,13 @@ def read_wave(path):
class Frame(object): class Frame(object):
"""Represents a "frame" of audio data.""" """Represents a "frame" of audio data."""
def __init__(self, bytes, timestamp, duration): def __init__(self, bytes, timestamp, duration):
self.bytes = bytes self.bytes = bytes
self.timestamp = timestamp self.timestamp = timestamp
self.duration = duration self.duration = duration
def frame_generator(frame_duration_ms, audio, sample_rate): def frame_generator(frame_duration_ms, audio, sample_rate):
"""Generates audio frames from PCM audio data. """Generates audio frames from PCM audio data.
Takes the desired frame duration in milliseconds, the PCM data, and Takes the desired frame duration in milliseconds, the PCM data, and
@@ -48,8 +51,7 @@ def frame_generator(frame_duration_ms, audio, sample_rate):
offset += n offset += n
def vad_collector(sample_rate, frame_duration_ms, def vad_collector(sample_rate, frame_duration_ms, padding_duration_ms, vad, frames):
padding_duration_ms, vad, frames):
"""Filters out non-voiced audio frames. """Filters out non-voiced audio frames.
Given a webrtcvad.Vad and a source of audio frames, yields only Given a webrtcvad.Vad and a source of audio frames, yields only
the voiced audio. the voiced audio.
@@ -80,7 +82,7 @@ def vad_collector(sample_rate, frame_duration_ms,
for frame in frames: for frame in frames:
is_speech = vad.is_speech(frame.bytes, sample_rate) is_speech = vad.is_speech(frame.bytes, sample_rate)
sys.stdout.write('1' if is_speech else '0') sys.stdout.write("1" if is_speech else "0")
if not triggered: if not triggered:
ring_buffer.append((frame, is_speech)) ring_buffer.append((frame, is_speech))
num_voiced = len([f for f, speech in ring_buffer if speech]) num_voiced = len([f for f, speech in ring_buffer if speech])
@@ -89,7 +91,7 @@ def vad_collector(sample_rate, frame_duration_ms,
# TRIGGERED state. # TRIGGERED state.
if num_voiced > 0.9 * ring_buffer.maxlen: if num_voiced > 0.9 * ring_buffer.maxlen:
triggered = True triggered = True
sys.stdout.write('+(%s)' % (ring_buffer[0][0].timestamp,)) sys.stdout.write("+(%s)" % (ring_buffer[0][0].timestamp,))
# We want to yield all the audio we see from now until # We want to yield all the audio we see from now until
# we are NOTTRIGGERED, but we have to start with the # we are NOTTRIGGERED, but we have to start with the
# audio that's already in the ring buffer. # audio that's already in the ring buffer.
@@ -106,23 +108,23 @@ def vad_collector(sample_rate, frame_duration_ms,
# unvoiced, then enter NOTTRIGGERED and yield whatever # unvoiced, then enter NOTTRIGGERED and yield whatever
# audio we've collected. # audio we've collected.
if num_unvoiced > 0.9 * ring_buffer.maxlen: if num_unvoiced > 0.9 * ring_buffer.maxlen:
sys.stdout.write('-(%s)' % (frame.timestamp + frame.duration)) sys.stdout.write("-(%s)" % (frame.timestamp + frame.duration))
triggered = False triggered = False
yield b''.join([f.bytes for f in voiced_frames]) yield b"".join([f.bytes for f in voiced_frames])
ring_buffer.clear() ring_buffer.clear()
voiced_frames = [] voiced_frames = []
if triggered: # BT if were in triggered state at end of signal, set output time if triggered: # BT if were in triggered state at end of signal, set output time
sys.stdout.write('-(%s)' % (frame.timestamp + frame.duration)) sys.stdout.write("-(%s)" % (frame.timestamp + frame.duration))
sys.stdout.write('\n') sys.stdout.write("\n")
# If we have any leftover voiced audio when we run out of input, # If we have any leftover voiced audio when we run out of input,
# yield it. # yield it.
if voiced_frames: if voiced_frames:
yield b''.join([f.bytes for f in voiced_frames]) yield b"".join([f.bytes for f in voiced_frames])
def vad_get_segment_times(
def vad_get_segment_times(sample_rate, frame_duration_ms, sample_rate, frame_duration_ms, padding_duration_ms, vad, frames
padding_duration_ms, vad, frames): ):
"""Filters out non-voiced audio frames. """Filters out non-voiced audio frames.
BT: based on vad_collector, but returns start and end times for voiced segs BT: based on vad_collector, but returns start and end times for voiced segs
@@ -211,11 +213,13 @@ def filter_seg_times(seg_starts, seg_ends, pad_at_start = 0.5, len_to_keep=2.5 )
not_found = True not_found = True
for iseg in range(len(seg_starts)): for iseg in range(len(seg_starts)):
seg_dur = seg_ends[iseg] - seg_starts[iseg] seg_dur = seg_ends[iseg] - seg_starts[iseg]
if (not_found & (seg_dur > (pad_at_start + len_to_keep))): if not_found & (seg_dur > (pad_at_start + len_to_keep)):
t_start = seg_starts[iseg] + pad_at_start t_start = seg_starts[iseg] + pad_at_start
sel_start.append(t_start) sel_start.append(t_start)
sel_end.append(t_start + len_to_keep) sel_end.append(t_start + len_to_keep)
sel_end_longer.append(max(t_start + len_to_keep, seg_ends[iseg]-pad_at_start)) sel_end_longer.append(
max(t_start + len_to_keep, seg_ends[iseg] - pad_at_start)
)
not_found = False not_found = False
return sel_start, sel_end, sel_end_longer return sel_start, sel_end, sel_end_longer

View File

@@ -4,13 +4,15 @@ project_name: DBM
created: 2020-20-07 created: 2020-20-07
""" """
import pandas as pd
import numpy as np
import glob import glob
import numpy as np
import pandas as pd
from opendbm.dbm_lib.dbm_features.raw_features.util import util as ut from opendbm.dbm_lib.dbm_features.raw_features.util import util as ut
def smooth(x,window_len=11,window='hanning'): def smooth(x, window_len=11, window="hanning"):
"""smooth the data using a window with requested size. """smooth the data using a window with requested size.
This method is based on the convolution of a scaled window with the signal. This method is based on the convolution of a scaled window with the signal.
@@ -47,40 +49,49 @@ def smooth(x,window_len=11,window='hanning'):
raise (ValueError, "Input vector needs to be bigger than window size.") raise (ValueError, "Input vector needs to be bigger than window size.")
if window_len < 3: if window_len < 3:
return x return x
if not window in ['flat', 'hanning', 'hamming', 'bartlett', 'blackman']: if window not in ["flat", "hanning", "hamming", "bartlett", "blackman"]:
raise (ValueError, "Window is on of 'flat', 'hanning', 'hamming', 'bartlett', 'blackman'") raise (
ValueError,
"Window is on of 'flat', 'hanning', 'hamming', 'bartlett', 'blackman'",
)
s = np.r_[x[window_len - 1 : 0 : -1], x, x[-2 : -window_len - 1 : -1]] s = np.r_[x[window_len - 1 : 0 : -1], x, x[-2 : -window_len - 1 : -1]]
# print(len(s)) # print(len(s))
if window == 'flat': #moving average if window == "flat": # moving average
w=np.ones(window_len,'d') w = np.ones(window_len, "d")
else: else:
w=eval('np.'+window+'(window_len)') w = eval("np." + window + "(window_len)")
y=np.convolve(w/w.sum(),s,mode='valid') y = np.convolve(w / w.sum(), s, mode="valid")
return y[int(window_len / 2) : -int(window_len / 2)] return y[int(window_len / 2) : -int(window_len / 2)]
def filter_by_confidence_and_thresh(x, fea, thresh): def filter_by_confidence_and_thresh(x, fea, thresh):
if x['s_confidence'] > 0.2 and np.fabs(x[fea]) < thresh: if x["s_confidence"] > 0.2 and np.fabs(x[fea]) < thresh:
return x[fea] return x[fea]
else: else:
return np.NaN return np.NaN
def add_au_emotion(x, emotion, emotion_type, exp_type): def add_au_emotion(x, emotion, emotion_type, exp_type):
""" """
computing individula emotion expressivity matrix computing individula emotion expressivity matrix
Args: Args:
emotion: Action Unit emotion: Action Unit
""" """
error_reason = 'Pass' error_reason = "Pass"
if x['s_confidence'] > 0.8: #if using smooth, no need for 'success' if x["s_confidence"] > 0.8: # if using smooth, no need for 'success'
sum_r = 0 sum_r = 0
cnt = 0 cnt = 0
for au in emotion: for au in emotion:
au_c_label = " AU{:02d}_c".format(au) au_c_label = " AU{:02d}_c".format(au)
au_r_label = " AU{:02d}_r".format(au) au_r_label = " AU{:02d}_r".format(au)
if x[au_c_label]==1 and (not np.isnan(x[au_r_label])): #there are data with face in, but au_c=0 if x[au_c_label] == 1 and (
not np.isnan(x[au_r_label])
): # there are data with face in, but au_c=0
sum_r += x[au_r_label] sum_r += x[au_r_label]
cnt += 6 cnt += 6
if exp_type=='full' and x[au_c_label]==0: #Logic to compute emotion expressivity when all AU's are present if (
exp_type == "full" and x[au_c_label] == 0
): # Logic to compute emotion expressivity when all AU's are present
cnt = 0 cnt = 0
break break
if cnt > 0: if cnt > 0:
@@ -90,10 +101,11 @@ def add_au_emotion(x, emotion,emotion_type,exp_type):
v_emo = x[emotion_type] + sum_r v_emo = x[emotion_type] + sum_r
else: else:
v_emo = np.NaN v_emo = np.NaN
error_reason = 'confidence less than 80%' error_reason = "confidence less than 80%"
return v_emo, error_reason return v_emo, error_reason
def add_au_occ(x, emotion, emotion_type): def add_au_occ(x, emotion, emotion_type):
""" """
computing individula emotion presence computing individula emotion presence
@@ -102,8 +114,8 @@ def add_au_occ(x, emotion,emotion_type):
""" """
au_pres = [] au_pres = []
em_pres = 0 em_pres = 0
error_reason = 'Pass' error_reason = "Pass"
if x['s_confidence'] > 0.8: #if using smooth, no need for 'success' if x["s_confidence"] > 0.8: # if using smooth, no need for 'success'
for au in emotion: for au in emotion:
au_c_label = " AU{:02d}_c".format(au) au_c_label = " AU{:02d}_c".format(au)
if x[au_c_label] == 1: # there are data with face in, but au_c=0 if x[au_c_label] == 1: # there are data with face in, but au_c=0
@@ -113,68 +125,177 @@ def add_au_occ(x, emotion,emotion_type):
em_pres = 1 em_pres = 1
else: else:
em_pres = np.NaN em_pres = np.NaN
error_reason = 'confidence less than 80%' error_reason = "confidence less than 80%"
return em_pres, error_reason return em_pres, error_reason
def emotion_exp(em_au, of, em_col, err_col): def emotion_exp(em_au, of, em_col, err_col):
""" """
Computing individual emotion expressivity and adding it to dataframe Computing individual emotion expressivity and adding it to dataframe
""" """
for emotion in em_au: for emotion in em_au:
of[[em_col[0],err_col]]=of.apply(add_au_emotion, args=(emotion,em_col[0],'partial',), axis=1, result_type='expand') of[[em_col[0], err_col]] = of.apply(
of[[em_col[1],err_col]]=of.apply(add_au_emotion, args=(emotion,em_col[1],'full',), axis=1, result_type='expand') add_au_emotion,
args=(
emotion,
em_col[0],
"partial",
),
axis=1,
result_type="expand",
)
of[[em_col[1], err_col]] = of.apply(
add_au_emotion,
args=(
emotion,
em_col[1],
"full",
),
axis=1,
result_type="expand",
)
def emotion_pres(em_au, of, em_col, err_col): def emotion_pres(em_au, of, em_col, err_col):
""" """
Computing individual emotion expressivity and adding it to dataframe Computing individual emotion expressivity and adding it to dataframe
""" """
for emotion in em_au: for emotion in em_au:
of[[em_col,err_col]]=of.apply(add_au_occ, args=(emotion,em_col,), axis=1, result_type='expand') of[[em_col, err_col]] = of.apply(
add_au_occ,
args=(
emotion,
em_col,
),
axis=1,
result_type="expand",
)
def calc_of_for_video(of, face_cfg, fe_cfg): def calc_of_for_video(of, face_cfg, fe_cfg):
""" """
Creating dataframe for emotion expressivity Creating dataframe for emotion expressivity
""" """
new_cols = [fe_cfg.hap_exp,fe_cfg.sad_exp,fe_cfg.sur_exp,fe_cfg.fea_exp,fe_cfg.ang_exp,fe_cfg.dis_exp,fe_cfg.con_exp, new_cols = [
fe_cfg.pai_exp,fe_cfg.neg_exp,fe_cfg.pos_exp,fe_cfg.neu_exp,fe_cfg.com_lower_exp,fe_cfg.com_upper_exp, fe_cfg.hap_exp,
fe_cfg.cai_exp,fe_cfg.com_exp,fe_cfg.happ_occ,fe_cfg.sad_occ,fe_cfg.sur_occ,fe_cfg.fea_occ,fe_cfg.ang_occ, fe_cfg.sad_exp,
fe_cfg.dis_occ,fe_cfg.con_occ,fe_cfg.hap_exp_full,fe_cfg.sad_exp_full,fe_cfg.sur_exp_full,fe_cfg.fea_exp_full, fe_cfg.sur_exp,
fe_cfg.ang_exp_full,fe_cfg.dis_exp_full,fe_cfg.con_exp_full,fe_cfg.pai_exp_full,fe_cfg.neg_exp_full, fe_cfg.fea_exp,
fe_cfg.pos_exp_full,fe_cfg.neu_exp_full,fe_cfg.cai_exp_full,fe_cfg.com_lower_exp_full,fe_cfg.com_upper_exp_full, fe_cfg.ang_exp,
fe_cfg.com_exp_full] fe_cfg.dis_exp,
fe_cfg.con_exp,
fe_cfg.pai_exp,
fe_cfg.neg_exp,
fe_cfg.pos_exp,
fe_cfg.neu_exp,
fe_cfg.com_lower_exp,
fe_cfg.com_upper_exp,
fe_cfg.cai_exp,
fe_cfg.com_exp,
fe_cfg.happ_occ,
fe_cfg.sad_occ,
fe_cfg.sur_occ,
fe_cfg.fea_occ,
fe_cfg.ang_occ,
fe_cfg.dis_occ,
fe_cfg.con_occ,
fe_cfg.hap_exp_full,
fe_cfg.sad_exp_full,
fe_cfg.sur_exp_full,
fe_cfg.fea_exp_full,
fe_cfg.ang_exp_full,
fe_cfg.dis_exp_full,
fe_cfg.con_exp_full,
fe_cfg.pai_exp_full,
fe_cfg.neg_exp_full,
fe_cfg.pos_exp_full,
fe_cfg.neu_exp_full,
fe_cfg.cai_exp_full,
fe_cfg.com_lower_exp_full,
fe_cfg.com_upper_exp_full,
fe_cfg.com_exp_full,
]
of[new_cols] = pd.DataFrame([[0] * len(new_cols)], index=of.index) of[new_cols] = pd.DataFrame([[0] * len(new_cols)], index=of.index)
of[fe_cfg.err_reason] = 'Pass' of[fe_cfg.err_reason] = "Pass"
# Composite happiness expressivity # Composite happiness expressivity
emotion_exp(face_cfg.happiness,of,[fe_cfg.hap_exp,fe_cfg.hap_exp_full],fe_cfg.err_reason) emotion_exp(
face_cfg.happiness, of, [fe_cfg.hap_exp, fe_cfg.hap_exp_full], fe_cfg.err_reason
)
# Composite sadness expressivity # Composite sadness expressivity
emotion_exp(face_cfg.sadness,of,[fe_cfg.sad_exp,fe_cfg.sad_exp_full],fe_cfg.err_reason) emotion_exp(
face_cfg.sadness, of, [fe_cfg.sad_exp, fe_cfg.sad_exp_full], fe_cfg.err_reason
)
# Composite surprise expressivity # Composite surprise expressivity
emotion_exp(face_cfg.surprise,of,[fe_cfg.sur_exp,fe_cfg.sur_exp_full],fe_cfg.err_reason) emotion_exp(
face_cfg.surprise, of, [fe_cfg.sur_exp, fe_cfg.sur_exp_full], fe_cfg.err_reason
)
# Composite fear expressivity # Composite fear expressivity
emotion_exp(face_cfg.fear,of,[fe_cfg.fea_exp,fe_cfg.fea_exp_full],fe_cfg.err_reason) emotion_exp(
face_cfg.fear, of, [fe_cfg.fea_exp, fe_cfg.fea_exp_full], fe_cfg.err_reason
)
# Composite anger expressivity # Composite anger expressivity
emotion_exp(face_cfg.anger,of,[fe_cfg.ang_exp,fe_cfg.ang_exp_full],fe_cfg.err_reason) emotion_exp(
face_cfg.anger, of, [fe_cfg.ang_exp, fe_cfg.ang_exp_full], fe_cfg.err_reason
)
# Composite disgust expressivity # Composite disgust expressivity
emotion_exp(face_cfg.disgust,of,[fe_cfg.dis_exp,fe_cfg.dis_exp_full],fe_cfg.err_reason) emotion_exp(
face_cfg.disgust, of, [fe_cfg.dis_exp, fe_cfg.dis_exp_full], fe_cfg.err_reason
)
# Composite contempt expressivity # Composite contempt expressivity
emotion_exp(face_cfg.contempt,of,[fe_cfg.con_exp,fe_cfg.con_exp_full],fe_cfg.err_reason) emotion_exp(
face_cfg.contempt, of, [fe_cfg.con_exp, fe_cfg.con_exp_full], fe_cfg.err_reason
)
# Composite Negative Expressivity # Composite Negative Expressivity
emotion_exp(face_cfg.NEG_ACTION_UNITS,of,[fe_cfg.neg_exp,fe_cfg.neg_exp_full],fe_cfg.err_reason) emotion_exp(
face_cfg.NEG_ACTION_UNITS,
of,
[fe_cfg.neg_exp, fe_cfg.neg_exp_full],
fe_cfg.err_reason,
)
# Composite Positive Expressivity # Composite Positive Expressivity
emotion_exp(face_cfg.POS_ACTION_UNITS,of,[fe_cfg.pos_exp,fe_cfg.pos_exp_full],fe_cfg.err_reason) emotion_exp(
face_cfg.POS_ACTION_UNITS,
of,
[fe_cfg.pos_exp, fe_cfg.pos_exp_full],
fe_cfg.err_reason,
)
# Composite Neutral Expressivity # Composite Neutral Expressivity
emotion_exp(face_cfg.NET_ACTION_UNITS,of,[fe_cfg.neu_exp,fe_cfg.neu_exp_full],fe_cfg.err_reason) emotion_exp(
face_cfg.NET_ACTION_UNITS,
of,
[fe_cfg.neu_exp, fe_cfg.neu_exp_full],
fe_cfg.err_reason,
)
# Composite Activation Expressivity # Composite Activation Expressivity
emotion_exp(face_cfg.cai,of,[fe_cfg.cai_exp,fe_cfg.cai_exp_full],fe_cfg.err_reason) emotion_exp(
face_cfg.cai, of, [fe_cfg.cai_exp, fe_cfg.cai_exp_full], fe_cfg.err_reason
)
# Composite Expressivity # Composite Expressivity
emotion_exp(face_cfg.ACTION_UNITS,of,[fe_cfg.com_exp,fe_cfg.com_exp_full],fe_cfg.err_reason) emotion_exp(
face_cfg.ACTION_UNITS,
of,
[fe_cfg.com_exp, fe_cfg.com_exp_full],
fe_cfg.err_reason,
)
# Composite lower face expressivity # Composite lower face expressivity
emotion_exp(face_cfg.LOWER_ACTION_UNITS,of,[fe_cfg.com_lower_exp,fe_cfg.com_lower_exp_full],fe_cfg.err_reason) emotion_exp(
face_cfg.LOWER_ACTION_UNITS,
of,
[fe_cfg.com_lower_exp, fe_cfg.com_lower_exp_full],
fe_cfg.err_reason,
)
# Composite upper face Expressivity # Composite upper face Expressivity
emotion_exp(face_cfg.UPPER_ACTION_UNITS,of,[fe_cfg.com_upper_exp,fe_cfg.com_upper_exp_full],fe_cfg.err_reason) emotion_exp(
face_cfg.UPPER_ACTION_UNITS,
of,
[fe_cfg.com_upper_exp, fe_cfg.com_upper_exp_full],
fe_cfg.err_reason,
)
# Composite pain expressivity # Composite pain expressivity
emotion_exp(face_cfg.pain,of,[fe_cfg.pai_exp,fe_cfg.pai_exp_full],fe_cfg.err_reason) emotion_exp(
face_cfg.pain, of, [fe_cfg.pai_exp, fe_cfg.pai_exp_full], fe_cfg.err_reason
)
# AU happiness presence # AU happiness presence
emotion_pres(face_cfg.happiness, of, fe_cfg.happ_occ, fe_cfg.err_reason) emotion_pres(face_cfg.happiness, of, fe_cfg.happ_occ, fe_cfg.err_reason)
# AU Sad presence # AU Sad presence