util code refactoring

2022-09-15 20:49:50 +07:00
parent 9223bce123
commit f8818a4047
5 changed files with 560 additions and 301 deletions
--- a/opendbm/dbm_lib/dbm_features/raw_features/util/math_util.py
+++ b/opendbm/dbm_lib/dbm_features/raw_features/util/math_util.py
@@ -4,9 +4,13 @@ project_name: cdx_analysis
 created: 2019-03-16
 author: Deshana Desai
 """
-import sys, os, glob, cv2
-import pandas as pd
+import glob
+import os
+import sys
+
+import cv2
 import numpy as np
+import pandas as pd


 def euclidean_distance(point1, point2):
@@ -14,7 +18,7 @@ def euclidean_distance(point1, point2):
    Compute euclidean distance between points
    """

-    return np.sqrt((point1[0] - point2[0])**2 + (point1[1] - point2[1])**2)
+    return np.sqrt((point1[0] - point2[0]) ** 2 + (point1[1] - point2[1]) ** 2)


 # def detect_peaks()
@@ -25,8 +29,7 @@ def expand_landmarks(landmarks):
    util method to expand landmark list:
    eg: [1,2] -> [['l1_x', 'l1_y'], ['l2_x', 'l2_y']]
    """
-    return [['l{}_x'.format(l), 'l{}_y'.format(l)] for l in landmarks]
-
+    return [["l{}_x".format(point), "l{}_y".format(point)] for point in landmarks]


 def calc_displacement_vec(df, landmarks, num_frames):
@@ -44,13 +47,12 @@ def calc_displacement_vec(df, landmarks, num_frames):
        first_row = df.iloc[0]
        prev_point[j] = (first_row[pair[0]], first_row[pair[1]])

-
    for i in range(num_frames):
        frame_row = df.iloc[i]
        for j, pair in enumerate(landmarks):
            x, y = pair[0], pair[1]
            current = (frame_row[x], frame_row[y])
-            deviation = euclidean_distance( current, prev_point[j])
+            deviation = euclidean_distance(current, prev_point[j])
            disp_vec[j][i] = deviation
            prev_point[j] = current

--- a/opendbm/dbm_lib/dbm_features/raw_features/util/nlp_util.py
+++ b/opendbm/dbm_lib/dbm_features/raw_features/util/nlp_util.py
@@ -4,45 +4,56 @@ project_name: DBM
 created: 2020-10-11
 """

-import subprocess
 import json
-import numpy as np
-import pandas as pd
-import os
 import logging
+import os
+import re
+import subprocess

 import nltk
-import re
+import numpy as np
+import pandas as pd
 from lexicalrichness import LexicalRichness
 from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer

 logging.basicConfig(level=logging.INFO)
-logger=logging.getLogger()
+logger = logging.getLogger()

-#Speech to text using Deepspeech 0.9.1
-def deepspeech(AUDIO_FILE,deep_path):
+# Speech to text using Deepspeech 0.9.1
+def deepspeech(AUDIO_FILE, deep_path):
    """
    Extracting text from audio using Deep Speech neural network trained model
    Returns:
        Text: text which is extracted from audio
    """
-    api = 'deepspeech'
-    arg_speech0 = '--model'
-    arg_speech_path0 = os.path.join(deep_path, 'deepspeech-0.9.1-models.pbmm')
-    arg_speech1 = '--scorer'
-    arg_speech_path1 = os.path.join(deep_path, 'deepspeech-0.9.1-models.scorer')
+    api = "deepspeech"
+    arg_speech0 = "--model"
+    arg_speech_path0 = os.path.join(deep_path, "deepspeech-0.9.1-models.pbmm")
+    arg_speech1 = "--scorer"
+    arg_speech_path1 = os.path.join(deep_path, "deepspeech-0.9.1-models.scorer")
    arg_audio = "--audio"

-    out = subprocess.Popen([api, arg_speech0, arg_speech_path0, arg_speech1, arg_speech_path1, arg_audio, AUDIO_FILE],
+    out = subprocess.Popen(
+        [
+            api,
+            arg_speech0,
+            arg_speech_path0,
+            arg_speech1,
+            arg_speech_path1,
+            arg_audio,
+            AUDIO_FILE,
+        ],
        stdout=subprocess.PIPE,
-                           stderr=subprocess.STDOUT)
-    logger.info('Deepspeech output...... {}'.format(out))
+        stderr=subprocess.STDOUT,
+    )
+    logger.info("Deepspeech output...... {}".format(out))
    try:
-        stdout,stderr = out.communicate()
+        stdout, stderr = out.communicate()
    except:
        return "error", "error"
-    #print(stderr)
-    return stdout,stderr
+    # print(stderr)
+    return stdout, stderr
+

 def deep_speech_output_clean(result):
    """
@@ -51,40 +62,43 @@ def deep_speech_output_clean(result):
        Text from speech
    """
    text = ""
-    if len(result)>0:
-        res_split = str(result[0]).split('\\n')
+    if len(result) > 0:
+        res_split = str(result[0]).split("\\n")

-        if len(res_split)>0:
+        if len(res_split) > 0:
            for i in range(len(res_split)):
-                if 'Inference took' in res_split[i]:
+                if "Inference took" in res_split[i]:
                    text = res_split[i + 1]
                    return text
    return text

-def process_deepspeech(audio_file,deep_path):
+
+def process_deepspeech(audio_file, deep_path):
    """
    Transcribing audio to extract text from speech
    """
-    deep_output = deepspeech(audio_file,deep_path)
-    deep_text= deep_speech_output_clean(deep_output)
+    deep_output = deepspeech(audio_file, deep_path)
+    deep_text = deep_speech_output_clean(deep_output)

    return deep_text

+
 def nltk_download():

    try:
-        nltk.data.find('tokenizers/punkt')
+        nltk.data.find("tokenizers/punkt")

    except LookupError:
-        logger.info('punkt is not available')
-        nltk.download('punkt')
+        logger.info("punkt is not available")
+        nltk.download("punkt")

    try:
-        nltk.data.find('averaged_perceptron_tagger')
+        nltk.data.find("averaged_perceptron_tagger")

    except LookupError:
-        logger.info('averaged_perceptron_tagger is not available')
-        nltk.download('averaged_perceptron_tagger')
+        logger.info("averaged_perceptron_tagger is not available")
+        nltk.download("averaged_perceptron_tagger")
+

 def empty_speech(r_config, master_url, error_txt):
    """
@@ -97,27 +111,44 @@ def empty_speech(r_config, master_url, error_txt):
            Empty dataframe for speech features with error
    """

-    col = [r_config.nlp_numSentences, r_config.nlp_singPronPerAns, r_config.nlp_singPronPerSen, r_config.nlp_pastTensePerAns,
-          r_config.nlp_pastTensePerSen, r_config.nlp_pronounsPerAns, r_config.nlp_pronounsPerSen, r_config.nlp_verbsPerAns,
-          r_config.nlp_verbsPerSen, r_config.nlp_adjectivesPerAns, r_config.nlp_adjectivesPerSen, r_config.nlp_nounsPerAns,
-          r_config.nlp_nounsPerSen, r_config.nlp_sentiment_mean, r_config.nlp_mattr, r_config.nlp_wordsPerMin,
-          r_config.nlp_totalTime, r_config.err_reason]
+    col = [
+        r_config.nlp_numSentences,
+        r_config.nlp_singPronPerAns,
+        r_config.nlp_singPronPerSen,
+        r_config.nlp_pastTensePerAns,
+        r_config.nlp_pastTensePerSen,
+        r_config.nlp_pronounsPerAns,
+        r_config.nlp_pronounsPerSen,
+        r_config.nlp_verbsPerAns,
+        r_config.nlp_verbsPerSen,
+        r_config.nlp_adjectivesPerAns,
+        r_config.nlp_adjectivesPerSen,
+        r_config.nlp_nounsPerAns,
+        r_config.nlp_nounsPerSen,
+        r_config.nlp_sentiment_mean,
+        r_config.nlp_mattr,
+        r_config.nlp_wordsPerMin,
+        r_config.nlp_totalTime,
+        r_config.err_reason,
+    ]

-    df_speech = pd.DataFrame([[np.nan] * len(col) + [error_txt]], columns = col)
-    df_speech['dbm_master_url'] = master_url
+    df_speech = pd.DataFrame([[np.nan] * len(col) + [error_txt]], columns=col)
+    df_speech["dbm_master_url"] = master_url

    return df_speech

+
 def divide_var(speech_var1, spech_var2):
    """
    divide variables
    """
    speech_var = np.nan
-    if spech_var2!=0:
-        speech_var = speech_var1/spech_var2
+    if spech_var2 != 0:
+        speech_var = speech_var1 / spech_var2
    return speech_var

-def process_speech(transcribe_df,r_config):
+
+def process_speech(transcribe_df, r_config):
    """
    Preparing speech features
    Args:
@@ -126,18 +157,18 @@ def process_speech(transcribe_df,r_config):
    Returns:
        Dataframe for speech features
    """
-    transcribe_df = transcribe_df.replace(np.nan, '', regex=True)
+    transcribe_df = transcribe_df.replace(np.nan, "", regex=True)
    err_transcribe = transcribe_df[r_config.err_reason].iloc[0]
    transcribe = transcribe_df[r_config.nlp_transcribe].iloc[0]
    total_time = transcribe_df[r_config.nlp_totalTime].iloc[0]
-    master_url = transcribe_df['dbm_master_url'].iloc[0]
+    master_url = transcribe_df["dbm_master_url"].iloc[0]

-    #clean transcribe
+    # clean transcribe
    transcribe = transcribe.replace(",", "")
    transcribe = " ".join(re.findall(r"[\w']+|[.!?]", transcribe))

-    if err_transcribe != 'Pass':
-        df_speech = empty_speech(r_config, master_url, error_txt)
+    if err_transcribe != "Pass":
+        df_speech = empty_speech(r_config, master_url, "error")

        return df_speech

@@ -150,63 +181,93 @@ def process_speech(transcribe_df,r_config):

    speech_dict[r_config.nlp_numSentences] = num_sentences

-    #nlp_singPron
-    i_s = transcribe.count('I')
-    me_s = transcribe.count('me')
-    my_s = transcribe.count('my')
+    # nlp_singPron
+    i_s = transcribe.count("I")
+    me_s = transcribe.count("me")
+    my_s = transcribe.count("my")
    sing_count = i_s + me_s + my_s

-    speech_dict[r_config.nlp_singPronPerAns] = sing_count if len(words_all)>0 else np.nan
-    speech_dict[r_config.nlp_singPronPerSen] = divide_var(speech_dict[r_config.nlp_singPronPerAns], num_sentences)
+    speech_dict[r_config.nlp_singPronPerAns] = (
+        sing_count if len(words_all) > 0 else np.nan
+    )
+    speech_dict[r_config.nlp_singPronPerSen] = divide_var(
+        speech_dict[r_config.nlp_singPronPerAns], num_sentences
+    )

    tagged = nltk.pos_tag(transcribe.split())
-    tagged_df = pd.DataFrame(tagged, columns=['word', 'pos_tag'])
+    tagged_df = pd.DataFrame(tagged, columns=["word", "pos_tag"])

-    #Past tense per answer
-    all_POSs = tagged_df['pos_tag'].tolist()
-    speech_dict[r_config.nlp_pastTensePerAns] = all_POSs.count('VBD') if len(words_all)>0 else np.nan
-    speech_dict[r_config.nlp_pastTensePerSen] = divide_var(speech_dict[r_config.nlp_pastTensePerAns], num_sentences)
+    # Past tense per answer
+    all_POSs = tagged_df["pos_tag"].tolist()
+    speech_dict[r_config.nlp_pastTensePerAns] = (
+        all_POSs.count("VBD") if len(words_all) > 0 else np.nan
+    )
+    speech_dict[r_config.nlp_pastTensePerSen] = divide_var(
+        speech_dict[r_config.nlp_pastTensePerAns], num_sentences
+    )

-    #Pronoun per answer
-    pronounsPerAns = all_POSs.count('PRP') + all_POSs.count('PRP$')
-    speech_dict[r_config.nlp_pronounsPerAns] = pronounsPerAns if len(words_all)>0 else np.nan
-    speech_dict[r_config.nlp_pronounsPerSen] = divide_var(speech_dict[r_config.nlp_pronounsPerAns], num_sentences)
+    # Pronoun per answer
+    pronounsPerAns = all_POSs.count("PRP") + all_POSs.count("PRP$")
+    speech_dict[r_config.nlp_pronounsPerAns] = (
+        pronounsPerAns if len(words_all) > 0 else np.nan
+    )
+    speech_dict[r_config.nlp_pronounsPerSen] = divide_var(
+        speech_dict[r_config.nlp_pronounsPerAns], num_sentences
+    )

-    #Verb per answer
-    verbPerAns = all_POSs.count('VB') + all_POSs.count('VBD') + all_POSs.count('VBG') \
-                      + all_POSs.count('VBN') + all_POSs.count('VBP') + all_POSs.count('VBZ')
+    # Verb per answer
+    verbPerAns = (
+        all_POSs.count("VB")
+        + all_POSs.count("VBD")
+        + all_POSs.count("VBG")
+        + all_POSs.count("VBN")
+        + all_POSs.count("VBP")
+        + all_POSs.count("VBZ")
+    )
    speech_dict[r_config.nlp_verbsPerAns] = verbPerAns if len(words_all) > 0 else np.nan
-    speech_dict[r_config.nlp_verbsPerSen] = divide_var(speech_dict[r_config.nlp_verbsPerAns], num_sentences)
+    speech_dict[r_config.nlp_verbsPerSen] = divide_var(
+        speech_dict[r_config.nlp_verbsPerAns], num_sentences
+    )

-    #Adjective per answer
-    adjectivesAns = all_POSs.count('JJ') + all_POSs.count('JJR') + all_POSs.count('JJS')
-    speech_dict[r_config.nlp_adjectivesPerAns] = adjectivesAns if len(words_all) > 0 else np.nan
-    speech_dict[r_config.nlp_adjectivesPerSen] = divide_var(speech_dict[r_config.nlp_adjectivesPerAns], num_sentences)
+    # Adjective per answer
+    adjectivesAns = all_POSs.count("JJ") + all_POSs.count("JJR") + all_POSs.count("JJS")
+    speech_dict[r_config.nlp_adjectivesPerAns] = (
+        adjectivesAns if len(words_all) > 0 else np.nan
+    )
+    speech_dict[r_config.nlp_adjectivesPerSen] = divide_var(
+        speech_dict[r_config.nlp_adjectivesPerAns], num_sentences
+    )

-    #Noun per answer
-    nounsAns = all_POSs.count('NN') + all_POSs.count('NNP') + all_POSs.count('NNS')
+    # Noun per answer
+    nounsAns = all_POSs.count("NN") + all_POSs.count("NNP") + all_POSs.count("NNS")
    speech_dict[r_config.nlp_nounsPerAns] = nounsAns if len(words_all) > 0 else np.nan
-    speech_dict[r_config.nlp_nounsPerSen] = divide_var(speech_dict[r_config.nlp_nounsPerAns], num_sentences)
+    speech_dict[r_config.nlp_nounsPerSen] = divide_var(
+        speech_dict[r_config.nlp_nounsPerAns], num_sentences
+    )

-    #Sentiment analysis
+    # Sentiment analysis
    vader = SentimentIntensityAnalyzer()
    sentence_valences = []

    for s in sentences:
        sentiment_dict = vader.polarity_scores(s)
-        sentence_valences.append(sentiment_dict['compound'])
+        sentence_valences.append(sentiment_dict["compound"])

-    speech_dict[r_config.nlp_sentiment_mean] = np.mean(sentence_valences) if len(sentence_valences) > 0 else np.nan
-    non_punc = list(value for value in words_all if value not in ['.','!','?'])
+    speech_dict[r_config.nlp_sentiment_mean] = (
+        np.mean(sentence_valences) if len(sentence_valences) > 0 else np.nan
+    )
+    non_punc = list(value for value in words_all if value not in [".", "!", "?"])

    non_punc_as_str = " ".join(str(non_punc))
    lex = LexicalRichness(non_punc_as_str)
-    speech_dict[r_config.nlp_mattr] = lex.mattr(window_size=lex.words) if lex.words > 0 else np.nan
+    speech_dict[r_config.nlp_mattr] = (
+        lex.mattr(window_size=lex.words) if lex.words > 0 else np.nan
+    )

-    #Number of words per minute
-    speech_dict[r_config.nlp_wordsPerMin] = divide_var(len(non_punc), total_time)*60
+    # Number of words per minute
+    speech_dict[r_config.nlp_wordsPerMin] = divide_var(len(non_punc), total_time) * 60
    speech_dict[r_config.nlp_totalTime] = total_time
-    speech_dict['dbm_master_url'] = master_url
+    speech_dict["dbm_master_url"] = master_url

    df_speech = pd.DataFrame([speech_dict])
    return df_speech
--- a/opendbm/dbm_lib/dbm_features/raw_features/util/util.py
+++ b/opendbm/dbm_lib/dbm_features/raw_features/util/util.py
@@ -4,11 +4,65 @@ project_name: DBM
 created: 2020-20-07
 """

-import os
 import glob
-import numpy as np
+import os
 import subprocess

+import more_itertools as mit
+import numpy as np
+import pandas as pd
+
+
+def get_length(filename):
+    result = subprocess.run(
+        [
+            "ffprobe",
+            "-v",
+            "error",
+            "-show_entries",
+            "format=duration",
+            "-of",
+            "default=noprint_wrappers=1:nokey=1",
+            filename,
+        ],
+        stdout=subprocess.PIPE,
+        stderr=subprocess.STDOUT,
+        stdin=subprocess.DEVNULL,
+    )
+    return float(result.stdout)
+
+
+def process_segment_pitch(ff_df, r_config):
+    voice_label = ff_df[r_config.aco_voiceLabel]
+
+    indices_yes = [i for i, x in enumerate(voice_label) if x == "yes"]
+    voiced_yes = [list(group) for group in mit.consecutive_groups(indices_yes)]
+
+    indices_no = [i for i, x in enumerate(voice_label) if x == "no"]
+    voiced_no = [list(group) for group in mit.consecutive_groups(indices_no)]
+
+    com_speech = voiced_yes + voiced_no
+    com_speech_sort = sorted(com_speech, key=lambda x: x[0])
+    return com_speech_sort, voiced_yes, voiced_no
+
+
+def segment_pitch(dir_path, r_config, ff_df=None):
+    """
+    segmenting pitch freq for each voice segment
+    """
+    com_speech_sort, voiced_yes, voiced_no = ([],) * 3
+
+    for file in os.listdir(dir_path):
+        try:
+            if file.endswith("_pitch.csv") and ff_df is None:
+                ff_df = pd.read_csv((dir_path + "/" + file))
+                com_speech_sort, voiced_yes, voiced_no
+        except:
+            pass
+
+    return com_speech_sort, voiced_yes, voiced_no
+
+
 def filter_path(video_url, out_dir):

    """
@@ -20,11 +74,12 @@ def filter_path(video_url, out_dir):

    """

-    fl_name,_ = os.path.splitext(os.path.basename(video_url))
+    fl_name, _ = os.path.splitext(os.path.basename(video_url))
    input_loc = os.path.dirname(video_url)
    out_loc = os.path.join(out_dir, fl_name)
    return input_loc, out_loc, fl_name

+
 def save_output(df, out_loc, fl_name, f_dir, f_ext):
    """
    creating output directory for Audio features
@@ -41,29 +96,33 @@ def save_output(df, out_loc, fl_name, f_dir, f_ext):
    if not os.path.exists(dir_path):
        os.makedirs(dir_path)

-    sav_path = os.path.join(dir_path,full_f_name)
+    sav_path = os.path.join(dir_path, full_f_name)
    df.to_csv(sav_path, index=False)

-def audio_process(base_dir,video_url):
+
+def audio_process(base_dir, video_url):
    """
    Parsing cleaned audio files(Audio files without IMA voice)
    Args:
        base_dir: Base path for raw data
        video_url: Raw video file path
    """
-    new_video_url = base_dir+'/'.join(video_url[2:])
-    split_val = new_video_url.split('/')
-    wav_path = '/'.join(split_val[0:len(split_val)-1])
-    audio_split_check = glob.glob(wav_path + '/*_split.wav')
+    new_video_url = base_dir + "/".join(video_url[2:])
+    split_val = new_video_url.split("/")
+    wav_path = "/".join(split_val[0 : len(split_val) - 1])
+    audio_split_check = glob.glob(wav_path + "/*_split.wav")
    return audio_split_check

-def compute_open_face_features(input_filepath,
+
+def compute_open_face_features(
+    input_filepath,
    output_directory,
    open_face_executable,
    au_static=False,
    tracked_visualization=False,
    clobber=False,
-                         verbose=True):
+    verbose=True,
+):
    """
    Runs OpenFace on an input video.
    See https://github.com/TadasBaltrusaitis/OpenFace/wiki/Command-line-arguments
@@ -82,31 +141,43 @@ def compute_open_face_features(input_filepath,
    """

    if not os.path.isfile(open_face_executable):
-        raise IOError("OpenFace executable {} could not be found.".format(open_face_executable))
+        raise IOError(
+            "OpenFace executable {} could not be found.".format(open_face_executable)
+        )

    bn, _ = os.path.splitext(os.path.basename(input_filepath))
    if not output_directory:
-        output_directory = os.path.join(os.path.dirname(input_filepath), bn + '_openface')
+        output_directory = os.path.join(
+            os.path.dirname(input_filepath), bn + "_openface"
+        )

-    output_csv = os.path.join(output_directory, bn + '.csv')
+    output_csv = os.path.join(output_directory, bn + ".csv")
    if not os.path.isfile(output_csv) or clobber:
-        call = [open_face_executable, ]
+        call = [
+            open_face_executable,
+        ]
        if au_static:
-            call += ['-au_static', ]
+            call += [
+                "-au_static",
+            ]

        if tracked_visualization:
-            call += ['-tracked', ]
+            call += [
+                "-tracked",
+            ]

-        call += ['-q', '-2Dfp', '-3Dfp', '-pdmparams', '-pose', '-aus', '-gaze']
-        call += ['-f', input_filepath, '-out_dir', output_directory]
+        call += ["-q", "-2Dfp", "-3Dfp", "-pdmparams", "-pose", "-aus", "-gaze"]
+        call += ["-f", input_filepath, "-out_dir", output_directory]

        if verbose:
-            print('Computing OpenFace features {} from video file'.format(input_filepath))
+            print(
+                "Computing OpenFace features {} from video file".format(input_filepath)
+            )
        subprocess.check_output(call)
        if verbose:
-            print('OpenFace features saved to {}'.format(output_directory))
+            print("OpenFace features saved to {}".format(output_directory))
    else:
        if verbose:
-            print('Output file {} already exists'.format(output_csv))
+            print("Output file {} already exists".format(output_csv))

-    return os.path.join(output_directory, bn + '.csv')
+    return os.path.join(output_directory, bn + ".csv")
--- a/opendbm/dbm_lib/dbm_features/raw_features/util/vad_utilities.py
+++ b/opendbm/dbm_lib/dbm_features/raw_features/util/vad_utilities.py
@@ -10,11 +10,12 @@ import contextlib
 import sys
 import wave

+
 def read_wave(path):
    """Reads a .wav file.
    Takes the path, and returns (PCM audio data, sample rate).
    """
-    with contextlib.closing(wave.open(path, 'rb')) as wf:
+    with contextlib.closing(wave.open(path, "rb")) as wf:
        num_channels = wf.getnchannels()
        assert num_channels == 1
        sample_width = wf.getsampwidth()
@@ -27,11 +28,13 @@ def read_wave(path):

 class Frame(object):
    """Represents a "frame" of audio data."""
+
    def __init__(self, bytes, timestamp, duration):
        self.bytes = bytes
        self.timestamp = timestamp
        self.duration = duration

+
 def frame_generator(frame_duration_ms, audio, sample_rate):
    """Generates audio frames from PCM audio data.
    Takes the desired frame duration in milliseconds, the PCM data, and
@@ -43,13 +46,12 @@ def frame_generator(frame_duration_ms, audio, sample_rate):
    timestamp = 0.0
    duration = (float(n) / sample_rate) / 2.0
    while offset + n < len(audio):
-        yield Frame(audio[offset:offset + n], timestamp, duration)
+        yield Frame(audio[offset : offset + n], timestamp, duration)
        timestamp += duration
        offset += n


-def vad_collector(sample_rate, frame_duration_ms,
-                  padding_duration_ms, vad, frames):
+def vad_collector(sample_rate, frame_duration_ms, padding_duration_ms, vad, frames):
    """Filters out non-voiced audio frames.
    Given a webrtcvad.Vad and a source of audio frames, yields only
    the voiced audio.
@@ -80,7 +82,7 @@ def vad_collector(sample_rate, frame_duration_ms,
    for frame in frames:
        is_speech = vad.is_speech(frame.bytes, sample_rate)

-        sys.stdout.write('1' if is_speech else '0')
+        sys.stdout.write("1" if is_speech else "0")
        if not triggered:
            ring_buffer.append((frame, is_speech))
            num_voiced = len([f for f, speech in ring_buffer if speech])
@@ -89,7 +91,7 @@ def vad_collector(sample_rate, frame_duration_ms,
            # TRIGGERED state.
            if num_voiced > 0.9 * ring_buffer.maxlen:
                triggered = True
-                sys.stdout.write('+(%s)' % (ring_buffer[0][0].timestamp,))
+                sys.stdout.write("+(%s)" % (ring_buffer[0][0].timestamp,))
                # We want to yield all the audio we see from now until
                # we are NOTTRIGGERED, but we have to start with the
                # audio that's already in the ring buffer.
@@ -106,23 +108,23 @@ def vad_collector(sample_rate, frame_duration_ms,
            # unvoiced, then enter NOTTRIGGERED and yield whatever
            # audio we've collected.
            if num_unvoiced > 0.9 * ring_buffer.maxlen:
-                sys.stdout.write('-(%s)' % (frame.timestamp + frame.duration))
+                sys.stdout.write("-(%s)" % (frame.timestamp + frame.duration))
                triggered = False
-                yield b''.join([f.bytes for f in voiced_frames])
+                yield b"".join([f.bytes for f in voiced_frames])
                ring_buffer.clear()
                voiced_frames = []
    if triggered:  # BT if were in triggered state at end of signal, set output time
-        sys.stdout.write('-(%s)' % (frame.timestamp + frame.duration))
-    sys.stdout.write('\n')
+        sys.stdout.write("-(%s)" % (frame.timestamp + frame.duration))
+    sys.stdout.write("\n")
    # If we have any leftover voiced audio when we run out of input,
    # yield it.
    if voiced_frames:
-        yield b''.join([f.bytes for f in voiced_frames])
+        yield b"".join([f.bytes for f in voiced_frames])


-
-def vad_get_segment_times(sample_rate, frame_duration_ms,
-                  padding_duration_ms, vad, frames):
+def vad_get_segment_times(
+    sample_rate, frame_duration_ms, padding_duration_ms, vad, frames
+):
    """Filters out non-voiced audio frames.
    BT: based on vad_collector, but returns start and end times for voiced segs

@@ -158,7 +160,7 @@ def vad_get_segment_times(sample_rate, frame_duration_ms,
    for frame in frames:
        is_speech = vad.is_speech(frame.bytes, sample_rate)

-        #sys.stdout.write('1' if is_speech else '0')
+        # sys.stdout.write('1' if is_speech else '0')
        if not triggered:
            ring_buffer.append((frame, is_speech))
            num_voiced = len([f for f, speech in ring_buffer if speech])
@@ -167,7 +169,7 @@ def vad_get_segment_times(sample_rate, frame_duration_ms,
            # TRIGGERED state.
            if num_voiced > 0.9 * ring_buffer.maxlen:
                triggered = True
-                #sys.stdout.write('+(%s)' % (ring_buffer[0][0].timestamp,))
+                # sys.stdout.write('+(%s)' % (ring_buffer[0][0].timestamp,))
                start_times.append(ring_buffer[0][0].timestamp)  # BT
                ring_buffer.clear()
        else:
@@ -179,23 +181,23 @@ def vad_get_segment_times(sample_rate, frame_duration_ms,
            # unvoiced, then enter NOTTRIGGERED and yield whatever
            # audio we've collected.
            if num_unvoiced > 0.9 * ring_buffer.maxlen:
-                #sys.stdout.write('-(%s)' % (frame.timestamp + frame.duration))
+                # sys.stdout.write('-(%s)' % (frame.timestamp + frame.duration))
                end_times.append(ring_buffer[0][0].timestamp + frame.duration)  # BT
                triggered = False

    if triggered:  # BT if were in triggered state at end of signal, set output time
-        #sys.stdout.write('-(%s)' % (frame.timestamp + frame.duration))
-        if len(ring_buffer)>0:
-            end_times.append(ring_buffer[0][0].timestamp )  # BT
+        # sys.stdout.write('-(%s)' % (frame.timestamp + frame.duration))
+        if len(ring_buffer) > 0:
+            end_times.append(ring_buffer[0][0].timestamp)  # BT
        else:
            # only get here in very rare case that we triggered on 2nd-to-last frame
            end_times.append(frame.timestamp + frame.duration)
-    #sys.stdout.write('\n')
+    # sys.stdout.write('\n')

-    return(start_times, end_times)
+    return (start_times, end_times)


-def filter_seg_times(seg_starts, seg_ends, pad_at_start = 0.5, len_to_keep=2.5 ):
+def filter_seg_times(seg_starts, seg_ends, pad_at_start=0.5, len_to_keep=2.5):
    """
    do some filtering on the segments found to select part for analysis
    rule: find the first segment that is at least (pad_at_start+len_to_keep sec long.
@@ -210,12 +212,14 @@ def filter_seg_times(seg_starts, seg_ends, pad_at_start = 0.5, len_to_keep=2.5 )

    not_found = True
    for iseg in range(len(seg_starts)):
-        seg_dur = seg_ends[iseg]-seg_starts[iseg]
-        if (not_found & (seg_dur > (pad_at_start + len_to_keep))):
+        seg_dur = seg_ends[iseg] - seg_starts[iseg]
+        if not_found & (seg_dur > (pad_at_start + len_to_keep)):
            t_start = seg_starts[iseg] + pad_at_start
            sel_start.append(t_start)
            sel_end.append(t_start + len_to_keep)
-            sel_end_longer.append(max(t_start + len_to_keep, seg_ends[iseg]-pad_at_start))
+            sel_end_longer.append(
+                max(t_start + len_to_keep, seg_ends[iseg] - pad_at_start)
+            )
            not_found = False

    return sel_start, sel_end, sel_end_longer
--- a/opendbm/dbm_lib/dbm_features/raw_features/util/video_util.py
+++ b/opendbm/dbm_lib/dbm_features/raw_features/util/video_util.py
@@ -4,13 +4,15 @@ project_name: DBM
 created: 2020-20-07
 """

-import pandas as pd
-import numpy as np
 import glob
+
+import numpy as np
+import pandas as pd
+
 from opendbm.dbm_lib.dbm_features.raw_features.util import util as ut


-def smooth(x,window_len=11,window='hanning'):
+def smooth(x, window_len=11, window="hanning"):
    """smooth the data using a window with requested size.

    This method is based on the convolution of a scaled window with the signal.
@@ -45,42 +47,51 @@ def smooth(x,window_len=11,window='hanning'):
        raise (ValueError, "smooth only accepts 1 dimension arrays.")
    if x.size < window_len:
        raise (ValueError, "Input vector needs to be bigger than window size.")
-    if window_len<3:
+    if window_len < 3:
        return x
-    if not window in ['flat', 'hanning', 'hamming', 'bartlett', 'blackman']:
-        raise (ValueError, "Window is on of 'flat', 'hanning', 'hamming', 'bartlett', 'blackman'")
-    s=np.r_[x[window_len-1:0:-1],x,x[-2:-window_len-1:-1]]
-    #print(len(s))
-    if window == 'flat': #moving average
-        w=np.ones(window_len,'d')
+    if window not in ["flat", "hanning", "hamming", "bartlett", "blackman"]:
+        raise (
+            ValueError,
+            "Window is on of 'flat', 'hanning', 'hamming', 'bartlett', 'blackman'",
+        )
+    s = np.r_[x[window_len - 1 : 0 : -1], x, x[-2 : -window_len - 1 : -1]]
+    # print(len(s))
+    if window == "flat":  # moving average
+        w = np.ones(window_len, "d")
    else:
-        w=eval('np.'+window+'(window_len)')
-    y=np.convolve(w/w.sum(),s,mode='valid')
-    return y[int(window_len/2):-int(window_len/2)]
+        w = eval("np." + window + "(window_len)")
+    y = np.convolve(w / w.sum(), s, mode="valid")
+    return y[int(window_len / 2) : -int(window_len / 2)]
+

 def filter_by_confidence_and_thresh(x, fea, thresh):
-    if x['s_confidence'] > 0.2 and np.fabs(x[fea]) < thresh:
+    if x["s_confidence"] > 0.2 and np.fabs(x[fea]) < thresh:
        return x[fea]
    else:
        return np.NaN

-def add_au_emotion(x, emotion,emotion_type,exp_type):
+
+def add_au_emotion(x, emotion, emotion_type, exp_type):
    """
    computing individula emotion expressivity matrix
    Args:
        emotion: Action Unit
    """
-    error_reason = 'Pass'
-    if x['s_confidence'] > 0.8: #if using smooth, no need for 'success'
+    error_reason = "Pass"
+    if x["s_confidence"] > 0.8:  # if using smooth, no need for 'success'
        sum_r = 0
        cnt = 0
        for au in emotion:
            au_c_label = " AU{:02d}_c".format(au)
            au_r_label = " AU{:02d}_r".format(au)
-            if x[au_c_label]==1 and (not np.isnan(x[au_r_label])): #there are data with face in, but au_c=0
+            if x[au_c_label] == 1 and (
+                not np.isnan(x[au_r_label])
+            ):  # there are data with face in, but au_c=0
                sum_r += x[au_r_label]
                cnt += 6
-            if exp_type=='full' and x[au_c_label]==0: #Logic to compute emotion expressivity when all AU's are present 
+            if (
+                exp_type == "full" and x[au_c_label] == 0
+            ):  # Logic to compute emotion expressivity when all AU's are present
                cnt = 0
                break
        if cnt > 0:
@@ -90,11 +101,12 @@ def add_au_emotion(x, emotion,emotion_type,exp_type):
        v_emo = x[emotion_type] + sum_r
    else:
        v_emo = np.NaN
-        error_reason = 'confidence less than 80%'
+        error_reason = "confidence less than 80%"

    return v_emo, error_reason

-def add_au_occ(x, emotion,emotion_type):
+
+def add_au_occ(x, emotion, emotion_type):
    """
    computing individula emotion presence
    Args:
@@ -102,90 +114,199 @@ def add_au_occ(x, emotion,emotion_type):
    """
    au_pres = []
    em_pres = 0
-    error_reason = 'Pass'
-    if x['s_confidence'] > 0.8: #if using smooth, no need for 'success'
+    error_reason = "Pass"
+    if x["s_confidence"] > 0.8:  # if using smooth, no need for 'success'
        for au in emotion:
            au_c_label = " AU{:02d}_c".format(au)
-            if x[au_c_label]==1: #there are data with face in, but au_c=0
+            if x[au_c_label] == 1:  # there are data with face in, but au_c=0
                au_pres.append(1)

        if len(au_pres) == len(emotion):
            em_pres = 1
    else:
        em_pres = np.NaN
-        error_reason = 'confidence less than 80%'
+        error_reason = "confidence less than 80%"
    return em_pres, error_reason

-def emotion_exp(em_au,of,em_col,err_col):
+
+def emotion_exp(em_au, of, em_col, err_col):
    """
    Computing individual emotion expressivity and adding it to dataframe
    """
    for emotion in em_au:
-        of[[em_col[0],err_col]]=of.apply(add_au_emotion, args=(emotion,em_col[0],'partial',), axis=1, result_type='expand')
-        of[[em_col[1],err_col]]=of.apply(add_au_emotion, args=(emotion,em_col[1],'full',), axis=1, result_type='expand')
+        of[[em_col[0], err_col]] = of.apply(
+            add_au_emotion,
+            args=(
+                emotion,
+                em_col[0],
+                "partial",
+            ),
+            axis=1,
+            result_type="expand",
+        )
+        of[[em_col[1], err_col]] = of.apply(
+            add_au_emotion,
+            args=(
+                emotion,
+                em_col[1],
+                "full",
+            ),
+            axis=1,
+            result_type="expand",
+        )

-def emotion_pres(em_au,of,em_col,err_col):
+
+def emotion_pres(em_au, of, em_col, err_col):
    """
    Computing individual emotion expressivity and adding it to dataframe
    """
    for emotion in em_au:
-        of[[em_col,err_col]]=of.apply(add_au_occ, args=(emotion,em_col,), axis=1, result_type='expand')
+        of[[em_col, err_col]] = of.apply(
+            add_au_occ,
+            args=(
+                emotion,
+                em_col,
+            ),
+            axis=1,
+            result_type="expand",
+        )

-def calc_of_for_video(of,face_cfg,fe_cfg):
+
+def calc_of_for_video(of, face_cfg, fe_cfg):
    """
    Creating dataframe for emotion expressivity
    """
-    new_cols = [fe_cfg.hap_exp,fe_cfg.sad_exp,fe_cfg.sur_exp,fe_cfg.fea_exp,fe_cfg.ang_exp,fe_cfg.dis_exp,fe_cfg.con_exp,
-               fe_cfg.pai_exp,fe_cfg.neg_exp,fe_cfg.pos_exp,fe_cfg.neu_exp,fe_cfg.com_lower_exp,fe_cfg.com_upper_exp,
-                fe_cfg.cai_exp,fe_cfg.com_exp,fe_cfg.happ_occ,fe_cfg.sad_occ,fe_cfg.sur_occ,fe_cfg.fea_occ,fe_cfg.ang_occ,
-                fe_cfg.dis_occ,fe_cfg.con_occ,fe_cfg.hap_exp_full,fe_cfg.sad_exp_full,fe_cfg.sur_exp_full,fe_cfg.fea_exp_full,
-                fe_cfg.ang_exp_full,fe_cfg.dis_exp_full,fe_cfg.con_exp_full,fe_cfg.pai_exp_full,fe_cfg.neg_exp_full,
-                fe_cfg.pos_exp_full,fe_cfg.neu_exp_full,fe_cfg.cai_exp_full,fe_cfg.com_lower_exp_full,fe_cfg.com_upper_exp_full,
-                fe_cfg.com_exp_full]
+    new_cols = [
+        fe_cfg.hap_exp,
+        fe_cfg.sad_exp,
+        fe_cfg.sur_exp,
+        fe_cfg.fea_exp,
+        fe_cfg.ang_exp,
+        fe_cfg.dis_exp,
+        fe_cfg.con_exp,
+        fe_cfg.pai_exp,
+        fe_cfg.neg_exp,
+        fe_cfg.pos_exp,
+        fe_cfg.neu_exp,
+        fe_cfg.com_lower_exp,
+        fe_cfg.com_upper_exp,
+        fe_cfg.cai_exp,
+        fe_cfg.com_exp,
+        fe_cfg.happ_occ,
+        fe_cfg.sad_occ,
+        fe_cfg.sur_occ,
+        fe_cfg.fea_occ,
+        fe_cfg.ang_occ,
+        fe_cfg.dis_occ,
+        fe_cfg.con_occ,
+        fe_cfg.hap_exp_full,
+        fe_cfg.sad_exp_full,
+        fe_cfg.sur_exp_full,
+        fe_cfg.fea_exp_full,
+        fe_cfg.ang_exp_full,
+        fe_cfg.dis_exp_full,
+        fe_cfg.con_exp_full,
+        fe_cfg.pai_exp_full,
+        fe_cfg.neg_exp_full,
+        fe_cfg.pos_exp_full,
+        fe_cfg.neu_exp_full,
+        fe_cfg.cai_exp_full,
+        fe_cfg.com_lower_exp_full,
+        fe_cfg.com_upper_exp_full,
+        fe_cfg.com_exp_full,
+    ]
    of[new_cols] = pd.DataFrame([[0] * len(new_cols)], index=of.index)
-    of[fe_cfg.err_reason] = 'Pass'
+    of[fe_cfg.err_reason] = "Pass"

-    #Composite happiness expressivity
-    emotion_exp(face_cfg.happiness,of,[fe_cfg.hap_exp,fe_cfg.hap_exp_full],fe_cfg.err_reason)
-    #Composite sadness expressivity
-    emotion_exp(face_cfg.sadness,of,[fe_cfg.sad_exp,fe_cfg.sad_exp_full],fe_cfg.err_reason)
-    #Composite surprise expressivity
-    emotion_exp(face_cfg.surprise,of,[fe_cfg.sur_exp,fe_cfg.sur_exp_full],fe_cfg.err_reason)
-    #Composite fear expressivity
-    emotion_exp(face_cfg.fear,of,[fe_cfg.fea_exp,fe_cfg.fea_exp_full],fe_cfg.err_reason)
-    #Composite anger expressivity
-    emotion_exp(face_cfg.anger,of,[fe_cfg.ang_exp,fe_cfg.ang_exp_full],fe_cfg.err_reason)
-    #Composite disgust expressivity
-    emotion_exp(face_cfg.disgust,of,[fe_cfg.dis_exp,fe_cfg.dis_exp_full],fe_cfg.err_reason)
-    #Composite contempt expressivity
-    emotion_exp(face_cfg.contempt,of,[fe_cfg.con_exp,fe_cfg.con_exp_full],fe_cfg.err_reason)
-    #Composite Negative Expressivity
-    emotion_exp(face_cfg.NEG_ACTION_UNITS,of,[fe_cfg.neg_exp,fe_cfg.neg_exp_full],fe_cfg.err_reason)
-    #Composite Positive Expressivity
-    emotion_exp(face_cfg.POS_ACTION_UNITS,of,[fe_cfg.pos_exp,fe_cfg.pos_exp_full],fe_cfg.err_reason)
-    #Composite Neutral Expressivity
-    emotion_exp(face_cfg.NET_ACTION_UNITS,of,[fe_cfg.neu_exp,fe_cfg.neu_exp_full],fe_cfg.err_reason)
-    #Composite Activation Expressivity
-    emotion_exp(face_cfg.cai,of,[fe_cfg.cai_exp,fe_cfg.cai_exp_full],fe_cfg.err_reason)
-    #Composite Expressivity
-    emotion_exp(face_cfg.ACTION_UNITS,of,[fe_cfg.com_exp,fe_cfg.com_exp_full],fe_cfg.err_reason)
-    #Composite lower face expressivity
-    emotion_exp(face_cfg.LOWER_ACTION_UNITS,of,[fe_cfg.com_lower_exp,fe_cfg.com_lower_exp_full],fe_cfg.err_reason)
-    #Composite upper face Expressivity
-    emotion_exp(face_cfg.UPPER_ACTION_UNITS,of,[fe_cfg.com_upper_exp,fe_cfg.com_upper_exp_full],fe_cfg.err_reason)
-    #Composite pain expressivity
-    emotion_exp(face_cfg.pain,of,[fe_cfg.pai_exp,fe_cfg.pai_exp_full],fe_cfg.err_reason) 
-    #AU happiness presence
-    emotion_pres(face_cfg.happiness,of,fe_cfg.happ_occ,fe_cfg.err_reason)
-    #AU Sad presence
-    emotion_pres(face_cfg.sadness,of,fe_cfg.sad_occ,fe_cfg.err_reason)
-    #AU Surprise presence
-    emotion_pres(face_cfg.surprise,of,fe_cfg.sur_occ,fe_cfg.err_reason)
-    #AU fear presence
-    emotion_pres(face_cfg.fear,of,fe_cfg.fea_occ,fe_cfg.err_reason)
-    #AU anger presence
-    emotion_pres(face_cfg.anger,of,fe_cfg.ang_occ,fe_cfg.err_reason)
-    #AU disgust presence
-    emotion_pres(face_cfg.disgust,of,fe_cfg.dis_occ,fe_cfg.err_reason)
-    #AU contempt presence
-    emotion_pres(face_cfg.contempt,of,fe_cfg.con_occ,fe_cfg.err_reason)
+    # Composite happiness expressivity
+    emotion_exp(
+        face_cfg.happiness, of, [fe_cfg.hap_exp, fe_cfg.hap_exp_full], fe_cfg.err_reason
+    )
+    # Composite sadness expressivity
+    emotion_exp(
+        face_cfg.sadness, of, [fe_cfg.sad_exp, fe_cfg.sad_exp_full], fe_cfg.err_reason
+    )
+    # Composite surprise expressivity
+    emotion_exp(
+        face_cfg.surprise, of, [fe_cfg.sur_exp, fe_cfg.sur_exp_full], fe_cfg.err_reason
+    )
+    # Composite fear expressivity
+    emotion_exp(
+        face_cfg.fear, of, [fe_cfg.fea_exp, fe_cfg.fea_exp_full], fe_cfg.err_reason
+    )
+    # Composite anger expressivity
+    emotion_exp(
+        face_cfg.anger, of, [fe_cfg.ang_exp, fe_cfg.ang_exp_full], fe_cfg.err_reason
+    )
+    # Composite disgust expressivity
+    emotion_exp(
+        face_cfg.disgust, of, [fe_cfg.dis_exp, fe_cfg.dis_exp_full], fe_cfg.err_reason
+    )
+    # Composite contempt expressivity
+    emotion_exp(
+        face_cfg.contempt, of, [fe_cfg.con_exp, fe_cfg.con_exp_full], fe_cfg.err_reason
+    )
+    # Composite Negative Expressivity
+    emotion_exp(
+        face_cfg.NEG_ACTION_UNITS,
+        of,
+        [fe_cfg.neg_exp, fe_cfg.neg_exp_full],
+        fe_cfg.err_reason,
+    )
+    # Composite Positive Expressivity
+    emotion_exp(
+        face_cfg.POS_ACTION_UNITS,
+        of,
+        [fe_cfg.pos_exp, fe_cfg.pos_exp_full],
+        fe_cfg.err_reason,
+    )
+    # Composite Neutral Expressivity
+    emotion_exp(
+        face_cfg.NET_ACTION_UNITS,
+        of,
+        [fe_cfg.neu_exp, fe_cfg.neu_exp_full],
+        fe_cfg.err_reason,
+    )
+    # Composite Activation Expressivity
+    emotion_exp(
+        face_cfg.cai, of, [fe_cfg.cai_exp, fe_cfg.cai_exp_full], fe_cfg.err_reason
+    )
+    # Composite Expressivity
+    emotion_exp(
+        face_cfg.ACTION_UNITS,
+        of,
+        [fe_cfg.com_exp, fe_cfg.com_exp_full],
+        fe_cfg.err_reason,
+    )
+    # Composite lower face expressivity
+    emotion_exp(
+        face_cfg.LOWER_ACTION_UNITS,
+        of,
+        [fe_cfg.com_lower_exp, fe_cfg.com_lower_exp_full],
+        fe_cfg.err_reason,
+    )
+    # Composite upper face Expressivity
+    emotion_exp(
+        face_cfg.UPPER_ACTION_UNITS,
+        of,
+        [fe_cfg.com_upper_exp, fe_cfg.com_upper_exp_full],
+        fe_cfg.err_reason,
+    )
+    # Composite pain expressivity
+    emotion_exp(
+        face_cfg.pain, of, [fe_cfg.pai_exp, fe_cfg.pai_exp_full], fe_cfg.err_reason
+    )
+    # AU happiness presence
+    emotion_pres(face_cfg.happiness, of, fe_cfg.happ_occ, fe_cfg.err_reason)
+    # AU Sad presence
+    emotion_pres(face_cfg.sadness, of, fe_cfg.sad_occ, fe_cfg.err_reason)
+    # AU Surprise presence
+    emotion_pres(face_cfg.surprise, of, fe_cfg.sur_occ, fe_cfg.err_reason)
+    # AU fear presence
+    emotion_pres(face_cfg.fear, of, fe_cfg.fea_occ, fe_cfg.err_reason)
+    # AU anger presence
+    emotion_pres(face_cfg.anger, of, fe_cfg.ang_occ, fe_cfg.err_reason)
+    # AU disgust presence
+    emotion_pres(face_cfg.disgust, of, fe_cfg.dis_occ, fe_cfg.err_reason)
+    # AU contempt presence
+    emotion_pres(face_cfg.contempt, of, fe_cfg.con_occ, fe_cfg.err_reason)