Files
2025-09-07 00:20:34 +02:00

230 lines
10 KiB
Python

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Taguchi analysis pipeline for FDM experiment (per user's thesis)
- Reads a CSV with columns similar to:
'Eksperiment','Orijentacija','Visina sloja','Širina ekstruzije','Postotak ispune',
'Broj slojeva stijenke','A_ekv [mm^2]','Fm kN]','Sigma [Mpa]','SNR [dB]'
- Cleans units to numeric, recomputes Sigma (optional) and SNR (LB, n=1),
- Builds response tables (means, Δ), ranks factors, selects optimal levels by SNR,
- Predicts response at optimal combination (additive model),
- Runs Taguchi-style ANOVA on Sigma,
- Saves CSV outputs + main-effects plots + LaTeX snippet.
Usage:
python taguchi_from_csv.py --input ispitni_rezultati.csv --outdir out_tlak
"""
import argparse, os, re, json
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
def norm_num(x):
if pd.isna(x):
return np.nan
if isinstance(x, (int, float, np.number)):
return float(x)
s = str(x).strip()
s = s.replace(',', '.')
s = s.replace('%','')
s = s.replace(' mm','')
s = s.replace('MPa','').replace('Mpa','')
s = s.replace('kN','').replace('kN]','').replace('[','').replace(']','')
try:
return float(s)
except:
return np.nan
def compute_snr_lb(y):
# larger-the-better; handles n=1 case
y = pd.to_numeric(y, errors='coerce')
return 20.0*np.log10(y.clip(lower=1e-12))
def response_table(df, factor, col):
t = df.groupby(factor, as_index=False)[col].mean()
t["Delta (max-min)"] = t[col].max() - t[col].min()
t["Faktor"] = factor
return t
def taguchi_anova(df, response, factors):
y = df[response].astype(float)
mu = y.mean()
total_ss = ((y - mu)**2).sum()
rows = []
dof_used = 0
ss_used = 0.0
for f in factors:
grp = df.groupby(f)[response].agg(['mean','count'])
ss_f = (grp['count']*(grp['mean']-mu)**2).sum()
dof_f = grp.shape[0]-1
rows.append([f, ss_f, dof_f])
dof_used += dof_f
ss_used += ss_f
err_ss = max(total_ss - ss_used, 0.0)
err_dof = max(len(df)-1 - dof_used, 0)
an = pd.DataFrame(rows, columns=["Factor","SS","DOF"])
an["MS"] = an["SS"]/an["DOF"]
an["Pct_contrib_%"] = (an["SS"]/total_ss*100.0) if total_ss>0 else np.nan
err_row = pd.DataFrame([["Error", err_ss, err_dof, (err_ss/err_dof) if err_dof>0 else np.nan, (err_ss/total_ss*100.0) if total_ss>0 else np.nan]],
columns=["Factor","SS","DOF","MS","Pct_contrib_%"])
an = pd.concat([an, err_row], ignore_index=True)
return an, mu, total_ss
def main():
ap = argparse.ArgumentParser()
ap.add_argument("--input", required=True, help="Path to CSV with results")
ap.add_argument("--outdir", default=None, help="Output directory")
ap.add_argument("--response", default="Sigma [Mpa]", help="Response column to analyze (default Sigma [Mpa])")
ap.add_argument("--area_col", default="A_ekv [mm^2]", help="Area column if Sigma should be recomputed from Fm/Area")
ap.add_argument("--fm_col", default="Fm kN]", help="Force column (kN)")
ap.add_argument("--recompute_sigma", action="store_true", help="If set, recompute Sigma = Fm*1000/Area")
ap.add_argument("--sn_type", default="LB", choices=["LB"], help="S/N type (only LB supported here)")
args = ap.parse_args()
in_path = args.input
outdir = args.outdir or (os.path.splitext(os.path.basename(in_path))[0] + "_taguchi_out")
os.makedirs(outdir, exist_ok=True)
df = pd.read_csv(in_path)
# Standard column mapping / cleanup for known names
rename_map = {
"Visina sloja":"Visina sloja [mm]",
"Širina ekstruzije":"Širina ekstruzije [mm]",
"Postotak ispune":"Postotak ispune [%]",
"Broj slojeva stijenke":"Broj stijenki",
"Sigma [MPa]":"Sigma [Mpa]",
"Fm [kN]":"Fm kN]",
}
df = df.rename(columns={k:v for k,v in rename_map.items() if k in df.columns})
# Ensure numeric for relevant columns
if "Visina sloja [mm]" in df.columns:
df["Visina sloja [mm]"] = df["Visina sloja [mm]"].apply(norm_num)
if "Širina ekstruzije [mm]" in df.columns:
df["Širina ekstruzije [mm]"] = df["Širina ekstruzije [mm]"].apply(norm_num)
if "Postotak ispune [%]" in df.columns:
df["Postotak ispune [%]"] = df["Postotak ispune [%]"].apply(norm_num)
if "Broj stijenki" in df.columns:
df["Broj stijenki"] = df["Broj stijenki"].apply(norm_num)
if args.area_col in df.columns:
df[args.area_col] = df[args.area_col].apply(norm_num)
if args.fm_col in df.columns:
df[args.fm_col] = df[args.fm_col].apply(norm_num)
if args.response in df.columns:
df[args.response] = df[args.response].apply(norm_num)
# Compute Sigma if asked or missing
if args.recompute_sigma or args.response not in df.columns or df[args.response].isna().all():
if args.fm_col in df.columns and args.area_col in df.columns:
df[args.response] = (df[args.fm_col] * 1000.0) / df[args.area_col]
else:
raise SystemExit("Cannot recompute Sigma: missing Fm or Area columns")
# Compute SNR (LB)
df["SNR_LB [dB]"] = compute_snr_lb(df[args.response])
# Save cleaned raw
raw_out = os.path.join(outdir, "0_raw_with_SNR.csv")
df.to_csv(raw_out, index=False)
# Factors to analyze (auto detect from known list)
candidate_factors = ["Orijentacija","Visina sloja [mm]","Širina ekstruzije [mm]","Postotak ispune [%]","Broj stijenki"]
factors = [f for f in candidate_factors if f in df.columns]
if len(factors) == 0:
raise SystemExit("No known factor columns found. Expected some of: " + ", ".join(candidate_factors))
# Response tables and deltas
resp_mu = pd.concat([response_table(df, f, args.response) for f in factors], ignore_index=True)
resp_sn = pd.concat([response_table(df, f, "SNR_LB [dB]") for f in factors], ignore_index=True)
resp_mu.to_csv(os.path.join(outdir, "1_response_means_Sigma.csv"), index=False)
resp_sn.to_csv(os.path.join(outdir, "2_response_means_SNR.csv"), index=False)
# Ranking (by Delta)
rank_mu = resp_mu.groupby("Faktor")["Delta (max-min)"].max().sort_values(ascending=False).reset_index().rename(columns={"Delta (max-min)":"Rang delta (Sigma)"})
rank_sn = resp_sn.groupby("Faktor")["Delta (max-min)"].max().sort_values(ascending=False).reset_index().rename(columns={"Delta (max-min)":"Rang delta (SNR)"})
ranking = pd.merge(rank_mu, rank_sn, on="Faktor")
ranking.to_csv(os.path.join(outdir, "3_factor_ranking.csv"), index=False)
# Optimal levels by SNR
opt_levels = {f: df.groupby(f)["SNR_LB [dB]"].mean().sort_values(ascending=False).index[0] for f in factors}
opt_table = pd.DataFrame({"Faktor": list(opt_levels.keys()), "Optimalna razina (po S/N)": list(opt_levels.values())})
opt_table.to_csv(os.path.join(outdir, "4_optimal_levels.csv"), index=False)
# Prediction at optimal combo (additive model) on response
grand_mean = df[args.response].mean()
k = len(factors)
pred_sigma = sum(df.groupby(f)[args.response].mean().loc[opt_levels[f]] for f in factors) - (k-1)*grand_mean
grand_mean_snr = df["SNR_LB [dB]"].mean()
pred_snr = sum(df.groupby(f)["SNR_LB [dB]"].mean().loc[opt_levels[f]] for f in factors) - (k-1)*grand_mean_snr
pred_df = pd.DataFrame({
"Predikcija": ["Sigma_opt [MPa]","SNR_opt [dB]","Grand mean Sigma [MPa]","Grand mean SNR [dB]"],
"Vrijednost": [pred_sigma, pred_snr, grand_mean, grand_mean_snr]
})
pred_df.to_csv(os.path.join(outdir, "5_prediction.csv"), index=False)
# ANOVA (Taguchi-style) on response
anova_df, mu_sigma, totss = taguchi_anova(df, args.response, factors)
anova_df.to_csv(os.path.join(outdir, "6_anova_sigma.csv"), index=False)
# Plots: main effects for SNR
for f in factors:
means = df.groupby(f)["SNR_LB [dB]"].mean().reset_index()
# numeric sort if possible
try:
means[f] = pd.to_numeric(means[f], errors="ignore")
means = means.sort_values(by=f)
except:
pass
plt.figure()
plt.plot(means[f], means["SNR_LB [dB]"], marker="o")
plt.xlabel(f)
plt.ylabel("S/N (LB) [dB]")
plt.title(f"Main effect (S/N): {f}")
plt.tight_layout()
plt.savefig(os.path.join(outdir, f"main_effect_SNR_{f}.png"), dpi=150)
plt.close()
# LaTeX snippet
latex_lines = []
latex_lines.append(r"% --- Taguchi rezultati (S = Sigma [MPa], S/N larger-the-better) ---")
latex_lines.append(r"\subsection{Rezultati Taguchijeve metode}")
latex_lines.append(r"U skladu s ortogonalnom matricom provedena je analiza s kriterijem \textbf{što-veće-to-bolje}. Za svaku kombinaciju izračunat je S/N omjer \((\mathrm{S/N}=20\log_{10}(\sigma))\) te su određeni glavni učinci po razinama i optimalna kombinacija.")
# Optimal levels
latex_lines.append(r"\paragraph{Optimalne razine (po S/N).}")
latex_lines.append(opt_table.to_latex(index=False, escape=False))
# Prediction
latex_lines.append(r"\paragraph{Predikcija odziva na optimalnoj kombinaciji.}")
latex_lines.append(pred_df.to_latex(index=False, escape=False, float_format='%.2f'))
# Ranking
latex_lines.append(r"\paragraph{Rang utjecaja faktora.}")
latex_lines.append(ranking.to_latex(index=False, escape=False, float_format='%.3f'))
# ANOVA
an_fmt = anova_df.copy()
for c in ["SS","MS","Pct_contrib_%"]:
if c in an_fmt.columns:
an_fmt[c] = an_fmt[c].astype(float).round(3)
latex_lines.append(r"\paragraph{ANOVA (Taguchi).}")
latex_lines.append(an_fmt.to_latex(index=False, escape=False))
latex_lines.append(r"Napomena: budući da je \(n{=}1\), pogreška (Error) procijenjena je iz preostalih stupnjeva slobode (Taguchi pooling).")
with open(os.path.join(outdir, "taguchi_results.tex"), "w", encoding="utf-8") as f:
f.write("\n\n".join(latex_lines))
# Small JSON summary
summary = {
"outdir": outdir,
"factors": factors,
"opt_levels": opt_levels,
"pred_sigma": pred_sigma,
"grand_mean_sigma": grand_mean,
}
with open(os.path.join(outdir, "summary.json"), "w", encoding="utf-8") as f:
json.dump(summary, f, ensure_ascii=False, indent=2)
print("Done. Outputs in:", outdir)
if __name__ == "__main__":
main()