-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathmain.py
More file actions
111 lines (89 loc) · 3.48 KB
/
main.py
File metadata and controls
111 lines (89 loc) · 3.48 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
import glob
import os
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
def normalize_model_features(models_dir="data/model_csvs", save=False):
csv_files = sorted([f for f in os.listdir(models_dir) if f.endswith(".csv")])
dfs = []
for file in csv_files:
model_name = os.path.splitext(file)[0]
df = pd.read_csv(os.path.join(models_dir, file))
df["model_name"] = model_name
dfs.append(df)
combined_df = pd.concat(dfs, ignore_index=True)
numeric_fields = [
"FLOPs (G)",
"Param Memory (MB)",
"Activation Size (MB)",
"pi_execution_time"
]
scaler = MinMaxScaler(feature_range=(-1, 1))
combined_df[numeric_fields] = scaler.fit_transform(combined_df[numeric_fields])
if save:
combined_df.to_csv(os.path.join(models_dir, "combined_normalized.csv"), index=False)
import joblib
joblib.dump(scaler, os.path.join(models_dir, "system_feature_scaler.pkl"))
print("✅ Saved scaler for future inference")
return combined_df
def split_by_model(combined_df, save_dir="data/normalized_model_csvs"):
"""
Split combined_df into separate CSVs by model_name and save to save_dir.
Keeps all columns.
"""
os.makedirs(save_dir, exist_ok=True)
model_dfs = {}
for model_name, group_df in combined_df.groupby("model_name"):
filename = f"{model_name}_normalized.csv"
path = os.path.join(save_dir, filename)
group_df.to_csv(path, index=False)
model_dfs[model_name] = group_df
print(f"✅ Saved {model_name} to {path}")
return model_dfs
def load_models(models_dir="data/normalized_model_csvs"):
"""
Load normalized block metrics.
Returns: {model_name: blocks_list}
Each block dict:
{
"pi_time": ...,
"mem_req": ...,
"activation_size": ...,
"model": ...
}
"""
model_data = {}
files = sorted(glob.glob(os.path.join(models_dir, "*_block_metrics_batch8_normalized.csv")))
for filepath in files:
df = pd.read_csv(filepath)
model_name = os.path.basename(filepath).replace("_block_metrics_batch8_normalized.csv", "")
# --- Standardize model names ---
if model_name.lower().startswith("vgg"):
model_name = "VGG" + model_name[3:]
elif model_name.lower().startswith("mobilenetv"):
model_name = "MobileNetV" + model_name[10:]
elif model_name.lower().startswith("alexnet"):
model_name = "AlexNet"
elif model_name.lower().startswith("inceptionv"):
model_name = "InceptionV" + model_name[10:]
elif model_name.lower().startswith("resnet18"):
model_name = "ResNet18"
# --- Convert rows to blocks ---
blocks = []
for _, row in df.iterrows():
blocks.append({
"cpu_time": row["pi_execution_time"],
"gpu_time": row["gpu_execution_time"],
"activation_size": row["Activation Size (MB)"],
"mem_req": row["Param Memory (MB)"],
"model": row["model_name"]
})
model_data[model_name] = blocks
return model_data
# Example usage
if __name__ == "__main__":
combined_df = normalize_model_features()
model_dfs = split_by_model(combined_df, save_dir="data/normalized_model_csvs")
models = load_models()
print(f"Found {len(models)} models:")
for name, blocks in models.items():
print(f"{name}: {len(blocks)} blocks")