change directory name and run scripts to account for name change

This commit is contained in:
ADParedes
2022-09-20 13:27:37 -05:00
parent 8829c31b7b
commit 3b8f5ee363
10242 changed files with 2 additions and 2027522 deletions

View File

@@ -0,0 +1,203 @@
import pandas as pd
import json
from os.path import exists
def read_derivedAttr(ar):
derivedFilename = ar+"/derived_variables/derived_output.csv"
if not exists(derivedFilename):
return pd.DataFrame()
derived_df = pd.read_csv(derivedFilename)
facial_cols = [col for col in derived_df if "fac_" in col]
acoustic_cols = [col for col in derived_df if "aco_" in col]
movement_cols = [col for col in derived_df if "mov_" in col]
nlp_cols = [col for col in derived_df if "nlp_" in col]
derived_facial = derived_df.loc[:,derived_df.columns.isin(facial_cols)]
derived_acoustic = derived_df.loc[:,derived_df.columns.isin(acoustic_cols)]
derived_movement = derived_df.loc[:,derived_df.columns.isin(movement_cols)]
derived_nlp = derived_df.loc[:,derived_df.columns.isin(nlp_cols)]
ids = derived_df["Filename"].tolist()
return {"ids": ids,
"facialAttr": facial_cols, "acousticAttr": acoustic_cols, "movementAttr": movement_cols, "speechAttr": nlp_cols}
def read_medatada(ar, ar2):
metaData={}
metdataFilename = ar+"/"+ar2
if exists(metdataFilename):
metadataDf = pd.read_csv(metdataFilename)
metaData = metadataDf.to_json(orient="records")
return metaData
def read_derivedDf(ar):
derivedFilename = ar+"/derived_variables/derived_output.csv"
if not exists(derivedFilename):
return pd.DataFrame()
derived_df = pd.read_csv(derivedFilename)
return derived_df
def read_rawFacialDf(ar, id):
skip_cols = ["frame", "face_id", "error_reason", "timestamp", "confidence", "success", "dbm_master_url", "error_reason", " confidence", " face_id", " success", " timestamp", "s_confidence", ]
facial_asym_filename = ar + "/raw_variables/"+id+"/facial/face_asymmetry/"+id+"_facasym.csv"
facial_au_filename = ar + "/raw_variables/"+id+"/facial/face_au/"+id+"_facau.csv"
facial_expr_filename = ar + "/raw_variables/"+id+"/facial/face_expressivity/"+id+"_facemo.csv"
landmark_filename = ar + "/raw_variables/"+id+"/facial/face_landmark/"+id+"_faclmk.csv"
if not exists(facial_asym_filename) or not exists(facial_au_filename) or not exists(facial_expr_filename):
return pd.DataFrame()
facial_asym = pd.read_csv(facial_asym_filename)
facial_asym_cols = [col for col in facial_asym if col not in skip_cols]
facial_au = pd.read_csv(facial_au_filename)
facial_au_cols = [col for col in facial_au if col not in skip_cols]
facial_expr = pd.read_csv(facial_expr_filename)
facial_expr_cols = [col for col in facial_expr if col not in skip_cols and "AU" not in col]
landmark = pd.read_csv(landmark_filename)
landmark_cols = [col for col in landmark if col not in skip_cols]
face_df = landmark.loc[:, ~landmark.columns.isin(skip_cols)].copy()
for el in facial_expr_cols:
face_df[el] = facial_expr[el]
for el in facial_au_cols:
face_df[el] = facial_au[el]
for el in facial_asym_cols:
face_df[el] = facial_asym[el]
return face_df[face_df.columns[::-1]].fillna(0)
def read_rawMovementDf(ar, id):
skip_cols = ["error_reason", "dbm_master_url", "Frames", " Frames", "vid_dur", "fps" ]
gaze_filename = ar + "/raw_variables/"+id+"/movement/gaze/"+id+"_eyegaze.csv"
head_movement_filename = ar + "/raw_variables/"+id+"/movement/head_movement/"+id+"_headmov.csv"
head_pose_filename = ar + "/raw_variables/"+id+"/movement/head_pose/"+id+"_headpose.csv"
blinks_filename = ar + "/raw_variables/"+id+"/movement/eye_blink/"+id+"_eyeblinks.csv"
fac_tremor_filename = ar + "/raw_variables/"+id+"/movement/facial_tremor/"+id+"_fac_tremor.csv"
voice_tremor_filename = ar + "/raw_variables/"+id+"/movement/voice_tremor/"+id+"_vtremor.csv"
if not exists(gaze_filename) or not exists(head_movement_filename) or not exists(head_pose_filename):
return pd.DataFrame()
gaze = pd.read_csv(gaze_filename)
gaze_cols = [col for col in gaze if col not in skip_cols]
head_movement = pd.read_csv(head_movement_filename)
head_movement_cols = [col for col in head_movement if col not in skip_cols]
blinks = pd.read_csv(blinks_filename)
blinks_cols = [col for col in blinks if col not in skip_cols]
fac_tremor = pd.read_csv(fac_tremor_filename)
fac_tremor_cols = [col for col in fac_tremor if col not in skip_cols]
voice_tremor = pd.read_csv(voice_tremor_filename)
voice_tremor_cols = [col for col in voice_tremor if col not in skip_cols]
head_pose = pd.read_csv(head_pose_filename)
head_pose_cols = [col for col in head_pose if col not in skip_cols]
movement_df = head_pose.loc[:, ~head_pose.columns.isin(skip_cols)].copy()
for el in head_movement_cols:
movement_df[el] = head_movement[el]
for el in gaze_cols:
movement_df[el] = gaze[el]
for el in blinks_cols:
movement_df[el] = blinks[el]
for el in fac_tremor_cols:
movement_df[el] = fac_tremor[el]
for el in voice_tremor_cols:
movement_df[el] = voice_tremor[el]
return movement_df.fillna(0)
def read_rawAcousticDf(ar, id):
skip_cols = ["error_reason", "dbm_master_url", "Frames", " Frames", "aco_voicelabel"]
fm_filename = ar + "/raw_variables/"+id+"/acoustic/formant_freq/"+id+"_formant.csv"
gne_filename = ar + "/raw_variables/"+id+"/acoustic/glottal_noise/"+id+"_gne.csv"
hnr_filename = ar + "/raw_variables/"+id+"/acoustic/harmonic_noise/"+id+"_hnr.csv"
intt_filename = ar + "/raw_variables/"+id+"/acoustic/intensity/"+id+"_intensity.csv"
mfcc_filename = ar + "/raw_variables/"+id+"/acoustic/mfcc/"+id+"_mfcc.csv"
pitch_filename = ar + "/raw_variables/"+id+"/acoustic/pitch/"+id+"_pitch.csv"
jitter_filename = ar + "/raw_variables/"+id+"/acoustic/jitter/"+id+"_jitter.csv"
pause_segment_filename = ar + "/raw_variables/"+id+"/acoustic/pause_segment/"+id+"_pausechar.csv"
shimmer_filename = ar + "/raw_variables/"+id+"/acoustic/shimmer/"+id+"_shimmer.csv"
voice_frame_score_filename = ar + "/raw_variables/"+id+"/acoustic/voice_frame_score/"+id+"_voiceprev.csv"
filename_list = [fm_filename, gne_filename, hnr_filename, intt_filename,mfcc_filename, pitch_filename, pause_segment_filename, shimmer_filename, voice_frame_score_filename]
if not exists(fm_filename) or not exists(gne_filename) or not exists(hnr_filename) or not exists(intt_filename) or not exists(mfcc_filename) or not exists(pitch_filename):
return pd.DataFrame
fm = pd.read_csv(fm_filename)
fm_cols = [col for col in fm if col not in skip_cols]
gne = pd.read_csv(gne_filename)
gne_cols = [col for col in gne if col not in skip_cols]
hnr = pd.read_csv(hnr_filename)
hnr_cols = [col for col in hnr if col not in skip_cols]
intt = pd.read_csv(intt_filename)
intt_cols = [col for col in intt if col not in skip_cols]
mfcc = pd.read_csv(mfcc_filename)
mfcc_cols = [col for col in mfcc if col not in skip_cols]
pitch = pd.read_csv(pitch_filename)
pitch_cols = [col for col in pitch if col not in skip_cols]
pause_segment = pd.read_csv(pause_segment_filename)
pause_segment_cols = [col for col in pause_segment if col not in skip_cols]
jitter = pd.read_csv(jitter_filename)
jitter_cols = [col for col in jitter if col not in skip_cols]
shimmer = pd.read_csv(shimmer_filename)
shimmer_cols = [col for col in shimmer if col not in skip_cols]
voice_frame_score = pd.read_csv(voice_frame_score_filename)
voice_frame_score_cols = [col for col in voice_frame_score if col not in skip_cols]
acoustic_df = fm.loc[:, ~fm.columns.isin(skip_cols)].copy()
for el in gne_cols:
acoustic_df[el] = gne[el]
for el in hnr_cols:
acoustic_df[el] = hnr[el]
for el in intt_cols:
acoustic_df[el] = intt[el]
for el in mfcc_cols:
acoustic_df[el] = mfcc[el]
for el in pitch_cols:
acoustic_df[el] = pitch[el]
for el in pause_segment_cols:
acoustic_df[el] = pause_segment[el]
for el in jitter_cols:
acoustic_df[el] = jitter[el]
for el in shimmer_cols:
acoustic_df[el] = shimmer[el]
for el in voice_frame_score_cols:
acoustic_df[el] = voice_frame_score[el]
return acoustic_df.fillna(0)
def load():
global videoIds
read_derivedAttr()
read_derivedDf()
if __name__=="__main__":
load()

View File

@@ -0,0 +1,381 @@
from flask import Flask
from flask import request
import pandas as pd
import json
import numpy as np
from sklearn.decomposition import PCA
import sys
import process_input_data
app = Flask(__name__)
@app.route('/fetchIndividualFacialRawData', methods=["POST"])
def fetchIndividualFacialRawData():
id = request.json['id']
if id:
individualFacialRawData = process_input_data.read_rawFacialDf(sys.argv[1], id)
else:
individualFacialRawData = process_input_data.read_rawFacialDf(sys.argv[1], inputData['Filename'][0])
if individualFacialRawData.empty:
return {}
return individualFacialRawData.fillna(0).to_json(orient="records")
@app.route('/fetchIndividualMovementRawData', methods=["POST"])
def fetchIndividualMovementRawData():
id = request.json['id']
if id:
individualMovementRawData = process_input_data.read_rawMovementDf(sys.argv[1], id)
else:
individualMovementRawData = process_input_data.read_rawMovementDf(sys.argv[1], inputData['Filename'][0])
if individualMovementRawData.empty:
return {}
return individualMovementRawData.fillna(0).to_json(orient="records")
@app.route('/fetchIndividualAcousticRawData', methods=["POST"])
def fetchIndividualAcousticRawData():
id = request.json['id']
if id:
individualAcousticRawData = process_input_data.read_rawAcousticDf(sys.argv[1], id)
else:
individualAcousticRawData = process_input_data.read_rawAcousticDf(sys.argv[1], inputData['Filename'][0])
if individualAcousticRawData.empty:
return {}
return individualAcousticRawData.fillna(0).to_json(orient="records")
@app.route('/fetchIndividualDerivedData', methods=["POST"])
def fetchIndividualDerivedData():
if len(list(inputData.columns)) <2:
return {}
id = request.json['id']
if id:
res = inputData.loc[inputData['Filename'] == id, ~inputData.columns.isin(['Filename'])]
else:
res = inputData.iloc[:1, :].loc[:, ~inputData.columns.isin(['Filename'])]
return res.fillna(0).to_json(orient="records")
@app.route('/fetchIndividualFacialTimelineData', methods=["POST"])
def fetchIndividualFacialTimelineData():
id = request.json['id']
timepoints = 20
if id:
dfFace = process_input_data.read_rawFacialDf(sys.argv[1], id)
dfMovement = process_input_data.read_rawMovementDf(sys.argv[1], id)
else:
dfFace = process_input_data.read_rawFacialDf(sys.argv[1], inputData['Filename'][0])
dfMovement = process_input_data.read_rawMovementDf(sys.argv[1], inputData['Filename'][0])
if dfFace.empty:
return {}
dfFace=dfFace.fillna(0)
attrOfInterest= ["fac_angintsoft", "fac_feaintsoft", "fac_disintsoft", "fac_sadintsoft",
"fac_conintsoft", "fac_surintsoft", "fac_hapintsoft",
"fac_AU01int", "fac_AU02int", "fac_AU04int", "fac_AU05int", "fac_AU06int",
"fac_AU07int", "fac_AU09int", "fac_AU10int", "fac_AU12int","fac_AU14int",
"fac_AU15int", "fac_AU17int", "fac_AU20int", "fac_AU23int", "fac_AU25int",
"fac_AU26int", "fac_asymmaskcom", "fac_asymmaskeye", "fac_asymmaskeyebrow",
"fac_asymmaskmouth", "fac_paiintsoft", "fac_comintsoft",
"fac_comlowintsoft", "fac_comuppintsoft"]
timelineObject = {}
for a in attrOfInterest:
timelineObject[a] = []
seg = len(dfFace)//19
reminder = len(dfFace)%19
for t in range(0,timepoints):
for k in attrOfInterest:
if t <= reminder:
timelineObject[k].append(sum(list(dfFace[t*(seg + 1):(t+1)*(seg+1)][k]))/(seg+1))
else:
timelineObject[k].append(sum(list(dfFace[t*seg:(t+1)*seg][k]))/seg)
if dfMovement.empty:
return timelineObject
dfMovement=dfMovement.fillna(0)
movementAttr = ["mov_hposepitch", "mov_hposeyaw", "mov_hposeroll"]
for a in movementAttr:
timelineObject[a] = []
seg = len(dfMovement)//20
reminder = len(dfMovement)%20
for t in range(0,timepoints):
for k in movementAttr:
if t <= reminder:
timelineObject[k].append(sum(list(dfMovement[t*(seg + 1):(t+1)*(seg+1)][k]))/(seg+1))
else:
timelineObject[k].append(sum(list(dfMovement[t*seg:(t+1)*seg][k]))/seg)
return timelineObject
@app.route('/getRawAttributesAndIds')
def getRawAttributesAndIds():
result = {}
if not individualFacialRawData.empty:
result['facial'] = [x for x in list(individualFacialRawData.columns)]
else:
result['facial'] =[]
if not individualAcousticRawData.empty:
result['acoustic'] = [x for x in list(individualAcousticRawData.columns)]
else:
result['acoustic'] = []
if not individualMovementRawData.empty:
result['movement'] = [x for x in list(individualMovementRawData.columns)]
else:
result['movement'] = []
if len(rawDataArgs) > 0:
result['ids'] = rawDataArgs['ids']
else:
result['ids'] = []
return result
def individualCorrMatrixData(id, corrArgs):
individualFacialRawData = pd.DataFrame()
individualMovementRawData = pd.DataFrame()
individualAcousticRawData = pd.DataFrame()
if id:
if len([x for x in corrArgs if "fac_" in x])>0:
individualFacialRawData = process_input_data.read_rawFacialDf(sys.argv[1], id)
if len([x for x in corrArgs if "mov_" in x])>0:
individualMovementRawData = process_input_data.read_rawMovementDf(sys.argv[1], id)
if len([x for x in corrArgs if "aco_" in x])>0:
individualAcousticRawData = process_input_data.read_rawAcousticDf(sys.argv[1], id)
else:
if len([x for x in corrArgs if "fac_" in x])>0:
individualFacialRawData = process_input_data.read_rawFacialDf(sys.argv[1], inputData['Filename'][0])
if len([x for x in corrArgs if "mov_" in x])>0:
individualMovementRawData = process_input_data.read_rawMovementDf(sys.argv[1], inputData['Filename'][0])
if len(list(filter(lambda x : "aco_" in x or "tremor_median" in x or "fac_features_mean" in x or "fac_corr" in x, individualMovementRawData))) >0:
individualAcousticRawData = process_input_data.read_rawAcousticDf(sys.argv[1], inputData['Filename'][0])
f = individualFacialRawData.loc[:, individualFacialRawData.columns.isin(corrArgs)]
m = individualMovementRawData.loc[:, individualMovementRawData.columns.isin(corrArgs)]
a = individualAcousticRawData.loc[:, individualAcousticRawData.columns.isin(corrArgs)]
if f.empty:
f = pd.DataFrame()
if m.empty:
m = pd.DataFrame()
if a.empty:
a = pd.DataFrame()
min_len = float('inf')
if not f.empty:
min_len = len(f)
if not m.empty:
min_len = min(min_len, len(m))
if not a.empty:
min_len=min(min_len, len(a))
if min_len == float('inf'):
min_len = 0
if min_len == len(f):
all_df = f.copy()
if len(f) == len(m):
for x in m.columns:
all_df[x] = m[x]
else:
seg = int(len(m)/(max(len(f),1)))
reminder = len(m)%(max(len(f),1))
for i, row in all_df.iterrows():
for x in m.columns:
if i <reminder:
all_df.loc[i,x] = sum(list(m[i*(seg + 1):(i+1)*(seg+1)][x]))/(seg+1)
else:
all_df.loc[i,x] = sum(list(m[i*seg:(i+1)*seg][x]))/seg
if len(a) > len(f):
seg = int(len(a)/(max(len(f),1)))
reminder = len(a)%(max(len(f),1))
for i, row in all_df.iterrows():
for x in a.columns:
if i <reminder:
all_df.loc[i,x] = sum(list(a[i*(seg + 1):(i+1)*(seg+1)][x]))/(seg+1)
else:
all_df.loc[i,x] = sum(list(a[i*seg:(i+1)*seg][x]))/seg
else:
for x in a.columns:
all_df[x] = a[x]
elif len(m) == min_len:
all_df = m.copy()
seg = int(len(f)/(max(len(m),1)))
reminder = len(f)%(max(len(m),1))
for i, row in all_df.iterrows():
for x in f.columns:
if i <reminder:
all_df.loc[i,x] = sum(list(f[i*(seg + 1):(i+1)*(seg+1)][x]))/(seg+1)
else:
all_df.loc[i,x] = sum(list(f[i*seg:(i+1)*seg][x]))/seg
if len(a) > len(m):
seg = int(len(a)/(max(len(m),1)))
reminder = len(a)%(max(len(m),1))
for i, row in all_df.iterrows():
for x in a.columns:
if i <reminder:
all_df.loc[i,x] = sum(list(a[i*(seg + 1):(i+1)*(seg+1)][x]))/(seg+1)
else:
all_df.loc[i,x] = sum(list(a[i*seg:(i+1)*seg][x]))/seg
else:
for x in a.columns:
all_df[x] = a[x]
else:
all_df = a.copy()
seg = int(len(f)/(max(len(a),1)))
reminder = len(f)%(max(len(a),1))
for i, row in all_df.iterrows():
for x in f.columns:
if i <reminder:
all_df.loc[i,x] = sum(list(f[i*(seg + 1):(i+1)*(seg+1)][x]))/(seg+1)
else:
all_df.loc[i,x] = sum(list(f[i*seg:(i+1)*seg][x]))/seg
seg = int(len(m)/(max(len(a),1)))
reminder = len(m)%(max(len(a),1))
for i, row in all_df.iterrows():
for x in m.columns:
if i <reminder:
all_df.loc[i,x] = sum(list(m[i*(seg + 1):(i+1)*(seg+1)][x]))/(seg+1)
else:
all_df.loc[i,x] = sum(list(m[i*seg:(i+1)*seg][x]))/seg
return all_df.fillna(0)
@app.route("/performPCA")
def performPCA(PCAargs=[]):
if len(list(inputData.columns)) <2:
return {}
pca = PCA()
if len(PCAargs) == 0:
X = pd.DataFrame(columns = [x for x in list(inputData.columns) if x not in ['Filename']])
X = inputData.loc[:, ~inputData.columns.isin(['Filename'])]
else:
X = pd.DataFrame(columns = [x for x in PCAargs ])
X = inputData.loc[:, inputData.columns.isin(PCAargs)]
X = X.fillna(0)
x_pca = pca.fit_transform(X)
x_pca = pd.DataFrame(x_pca)
x_pca = x_pca.iloc[:, list(range(2))]
x_pca.columns = ['PC1', "PC2"]
pc1 = x_pca["PC1"].to_numpy()
pc2 = x_pca["PC2"].to_numpy()
pc1 = list((pc1 -pc1.mean())/np.std(pc1))
pc2 = list((pc2 -pc2.mean())/np.std(pc2))
pca_results =[[pc1[i], pc2[i]] for i in range(0, len(pc1))]
return dict(zip(inputData['Filename'], pca_results))
@app.route('/updatePCA', methods=["POST"])
def updatePCA():
PCAargs = request.json['PCA_args']
return performPCA(PCAargs)
@app.route('/defaultDistribution', methods=["POST"])
def defaultDistribution():
args = request.json['distributionArgs']
return {"data":updateDistribution(args)}
@app.route('/updateDistribution', methods=["POST"])
def updateDistribution(attr=[]):
if len(list(inputData.columns)) <2:
return {}
if len(attr) !=0:
distributionArgs = attr
else:
distributionArgs = request.json['distributionArgs']
if len(distributionArgs) == 0:
df = pd.DataFrame(columns = [x for x in list(inputData.columns) if x not in ['Filename']])
df = inputData.loc[:, ~inputData.columns.isin(['Filename'])]
else:
cols = ['Filename']+ distributionArgs
df = pd.DataFrame(columns = [c for c in list(cols )])
df = inputData.loc[:, inputData.columns.isin(cols)]
df = df.fillna(0)
if len(distributionArgs) ==0:
return {}
result = {}
for i, row in df.iterrows():
result[row['Filename']]={}
result[row['Filename']]['id'] = row['Filename']
for attr in distributionArgs:
result[row['Filename']][attr] = row[attr]
return result
@app.route('/defaultCorrMatrix', methods=["POST"])
def defaultCorrMatrix():
corrMatrixArgs = request.json['corrMatrix_args']
individual_corr = request.json['individual']
id= request.json['id']
return updateCorrMatrix(corrMatrixArgs, individual_corr, id)
@app.route('/updateCorrMatrix', methods=["POST"])
def updateCorrMatrix(attr=[], individual = False, id=None):
if len(attr) !=0:
corrMatrixArgs = attr
else:
corrMatrixArgs = request.json['corrMatrix_args']
individual = request.json['individual']
if len(corrMatrixArgs) < 2:
return {}
if len(corrMatrixArgs) > 24:
return{}
df = pd.DataFrame(columns = [c for c in corrMatrixArgs])
if individual:
if not id:
id = request.json['id']
d = individualCorrMatrixData(id, corrMatrixArgs)
df = d
# df = d.loc[:, d.columns.isin(corrMatrixArgs)]
else:
df = inputData.loc[:, inputData.columns.isin(corrMatrixArgs)]
corrMatrix = df.corr(method="spearman").fillna(0)
return corrMatrix.to_dict()
@app.route("/getDerivedAttributes")
def getDerivedAttributes():
if not len(rawDataArgs):
return {"facial": [], "acoustic":[], "movement":[], "speech": [], "ids": []}
res={"facial": rawDataArgs['facialAttr'], "acoustic": rawDataArgs['acousticAttr'], "movement": rawDataArgs['movementAttr'],
"speech": rawDataArgs["speechAttr"], "ids": rawDataArgs['ids']}
return res
@app.route("/getMetadata")
def getMetadata():
if not len(metadata):
return {}
return metadata
def load():
global rawDataArgs, metadata
rawDataArgs = process_input_data.read_derivedAttr(sys.argv[1])
if len(sys.argv) >2:
metadata = process_input_data.read_medatada(sys.argv[1], sys.argv[2])
else:
metadata=[]
global inputData
inputData = process_input_data.read_derivedDf(sys.argv[1])
if len(inputData):
inputData['Filename'] = inputData['Filename'].apply(lambda x: x.split('/')[1].replace(".mp4", ""))
else:
inputData['Filename'] = [""]
global individualFacialRawData, individualDerivedData, individualAcousticRawData, individualMovementRawData
individualFacialRawData = process_input_data.read_rawFacialDf(sys.argv[1], inputData['Filename'][0])
individualMovementRawData = process_input_data.read_rawMovementDf(sys.argv[1], inputData['Filename'][0])
individualAcousticRawData = process_input_data.read_rawAcousticDf(sys.argv[1], inputData['Filename'][0])
if __name__=="__main__":
# from waitress import serve
load()
# serve(app, host='127.0.0.1', port=5000)
app.run(debug=True)
# app.run(debug=False)