code refactoring for speech dbm group

This commit is contained in:
jordi.hasianta
2022-09-15 20:47:56 +07:00
parent 609c752cfa
commit 8b02866483
2 changed files with 87 additions and 68 deletions

View File

@@ -4,24 +4,26 @@ project_name: DBM
created: 2020-13-11 created: 2020-13-11
""" """
import os
import numpy as np
import pandas as pd
import glob import glob
from os.path import join
import logging import logging
import os
import shutil import shutil
from os.path import join
from opendbm.dbm_lib.dbm_features.raw_features.util import nlp_util as n_util, util as ut import pandas as pd
from opendbm.dbm_lib.dbm_features.raw_features.util import nlp_util as n_util
from opendbm.dbm_lib.dbm_features.raw_features.util import util as ut
logging.basicConfig(level=logging.INFO) logging.basicConfig(level=logging.INFO)
logger=logging.getLogger() logger = logging.getLogger()
speech_dir = 'speech/speech_feature' speech_dir = "speech/speech_feature"
speech_ext = '_nlp.csv' speech_ext = "_nlp.csv"
transcribe_ext = 'speech/deepspeech/*_transcribe.csv' transcribe_ext = "speech/deepspeech/*_transcribe.csv"
def run_speech_feature(video_uri, out_dir, r_config, tran_tog):
def run_speech_feature(video_uri, out_dir, r_config, tran_tog, save=True):
""" """
Processing all patient's for fetching nlp features Processing all patient's for fetching nlp features
------------------- -------------------
@@ -30,21 +32,27 @@ def run_speech_feature(video_uri, out_dir, r_config, tran_tog):
video_uri: video path; r_config: raw variable config object video_uri: video path; r_config: raw variable config object
out_dir: (str) Output directory for processed output out_dir: (str) Output directory for processed output
""" """
try:
input_loc, out_loc, fl_name = ut.filter_path(video_uri, out_dir) input_loc, out_loc, fl_name = ut.filter_path(video_uri, out_dir)
transcribe_path = glob.glob(join(out_loc, transcribe_ext)) transcribe_path = glob.glob(join(out_loc, transcribe_ext))
if len(transcribe_path)>0: transcribe_df = pd.read_csv(transcribe_path[0])
df_speech = n_util.process_speech(transcribe_df, r_config)
transcribe_df = pd.read_csv(transcribe_path[0]) if save:
df_speech= n_util.process_speech(transcribe_df, r_config) logger.info("Saving Output file {} ".format(out_loc))
logger.info("filename {} ".format(fl_name))
ut.save_output(df_speech, out_loc, fl_name, speech_dir, speech_ext)
logger.info('Saving Output file {} '.format(out_loc)) if (tran_tog is None) or (tran_tog != "on"):
ut.save_output(df_speech, out_loc, fl_name, speech_dir, speech_ext) if os.getcwd() == "/app": # docker version
shutil.rmtree(os.path.dirname(transcribe_path[0]))
else: # api_lib version
if fl_name.endswith("mp4"):
shutil.rmtree((out_dir + "/" + fl_name).replace("//", "/"))
else:
shutil.rmtree(
(out_dir + "/" + fl_name.strip(".mp4")).replace("//", "/")
)
if (tran_tog == None) or (tran_tog != 'on'): return df_speech
shutil.rmtree(os.path.dirname(transcribe_path[0]))
except Exception as e:
logger.error('Failed to process video file')

View File

@@ -4,23 +4,27 @@ project_name: DBM
created: 2020-10-11 created: 2020-10-11
""" """
import pandas as pd
import numpy as np
import librosa
import glob import glob
from os.path import join
import logging import logging
from os.path import join
from opendbm.dbm_lib.dbm_features.raw_features.util import nlp_util as n_util, util as ut import numpy as np
import pandas as pd
from opendbm.dbm_lib.dbm_features.raw_features.util import nlp_util as n_util
from opendbm.dbm_lib.dbm_features.raw_features.util import util as ut
logging.basicConfig(level=logging.INFO) logging.basicConfig(level=logging.INFO)
logger=logging.getLogger() logger = logging.getLogger()
formant_dir = 'speech/deepspeech' formant_dir = "speech/deepspeech"
csv_ext = '_transcribe.csv' csv_ext = "_transcribe.csv"
error_txt = 'error: length less than 0.1' error_txt = "error: length less than 0.1"
def calc_transcribe(video_uri, audio_file, out_loc, fl_name, r_config, deep_path, aud_dur):
def calc_transcribe(
video_uri, audio_file, out_loc, fl_name, r_config, deep_path, aud_dur, save=True
):
""" """
Preparing Formant freq matrix Preparing Formant freq matrix
Args: Args:
@@ -31,28 +35,36 @@ def calc_transcribe(video_uri, audio_file, out_loc, fl_name, r_config, deep_path
text = n_util.process_deepspeech(audio_file, deep_path) text = n_util.process_deepspeech(audio_file, deep_path)
df_formant = pd.DataFrame([text], columns=[r_config.nlp_transcribe]) df_formant = pd.DataFrame([text], columns=[r_config.nlp_transcribe])
df_formant.replace('', np.nan, regex=True,inplace=True) df_formant.replace("", np.nan, regex=True, inplace=True)
df_formant[r_config.nlp_totalTime] = aud_dur df_formant[r_config.nlp_totalTime] = aud_dur
df_formant[r_config.err_reason] = 'Pass'# will replace with threshold in future release df_formant[
df_formant['dbm_master_url'] = video_uri r_config.err_reason
] = "Pass" # will replace with threshold in future release
df_formant["dbm_master_url"] = video_uri
logger.info('Saving Output file {} '.format(out_loc)) if save:
ut.save_output(df_formant, out_loc, fl_name, formant_dir, csv_ext) logger.info("Saving Output file {} ".format(out_loc))
ut.save_output(df_formant, out_loc, fl_name, formant_dir, csv_ext)
return df_formant
def empty_transcribe(video_uri, out_loc, fl_name, r_config):
def empty_transcribe(video_uri, out_loc, fl_name, r_config, save=True):
""" """
Preparing empty formant frequency matrix if something fails Preparing empty formant frequency matrix if something fails
""" """
cols = [r_config.nlp_transcribe, r_config.nlp_totalTime, r_config.err_reason] cols = [r_config.nlp_transcribe, r_config.nlp_totalTime, r_config.err_reason]
out_val = [[np.nan, np.nan, error_txt]] out_val = [[np.nan, np.nan, error_txt]]
df_fm = pd.DataFrame(out_val, columns = cols) df_fm = pd.DataFrame(out_val, columns=cols)
df_fm['dbm_master_url'] = video_uri df_fm["dbm_master_url"] = video_uri
logger.info('Saving Output file {} '.format(out_loc)) if save:
ut.save_output(df_fm, out_loc, fl_name, formant_dir, csv_ext) logger.info("Saving Output file {} ".format(out_loc))
ut.save_output(df_fm, out_loc, fl_name, formant_dir, csv_ext)
return df_fm
def run_transcribe(video_uri, out_dir, r_config, deep_path):
def run_transcribe(video_uri, out_dir, r_config, deep_path, save=True):
""" """
Processing all patient's for fetching Formant freq Processing all patient's for fetching Formant freq
@@ -60,24 +72,23 @@ def run_transcribe(video_uri, out_dir, r_config, deep_path):
--------------- ---------------
Args: Args:
video_uri: video path; r_config: raw variable config object video_uri: video path; r_config: raw variable config object
out_dir: (str) Output directory for processed output; deep_path: deepspeech build path out_dir: (str) Output directory for processed output;
deep_path: deepspeech build path
""" """
try:
input_loc, out_loc, fl_name = ut.filter_path(video_uri, out_dir) input_loc, out_loc, fl_name = ut.filter_path(video_uri, out_dir)
aud_filter = glob.glob(join(input_loc, fl_name + '.wav')) aud_filter = glob.glob(join(input_loc, fl_name + ".wav"))
if len(aud_filter)>0: if len(aud_filter) > 0:
audio_file = aud_filter[0] audio_file = aud_filter[0]
aud_dur = librosa.get_duration(filename=audio_file) aud_dur = ut.get_length(audio_file)
if float(aud_dur) < 0.1:
logger.info("Output file {} size is less than 0.1 sec".format(audio_file))
if float(aud_dur) < 0.1: df = empty_transcribe(video_uri, out_loc, fl_name, r_config)
logger.info('Output file {} size is less than 0.1 sec'.format(audio_file)) return df
empty_transcribe(video_uri, out_loc, fl_name, r_config)
return
calc_transcribe(video_uri, audio_file, out_loc, fl_name, r_config, deep_path, aud_dur)
except Exception as e:
logger.error('Failed to process audio file')
df = calc_transcribe(
video_uri, audio_file, out_loc, fl_name, r_config, deep_path, aud_dur
)
return df