#!/usr/bin/env python3 # -*- coding: utf-8 -*- """ Taguchi analysis pipeline for FDM experiment (per user's thesis) - Reads a CSV with columns similar to: 'Eksperiment','Orijentacija','Visina sloja','Širina ekstruzije','Postotak ispune', 'Broj slojeva stijenke','A_ekv [mm^2]','Fm kN]','Sigma [Mpa]','SNR [dB]' - Cleans units to numeric, recomputes Sigma (optional) and SNR (LB, n=1), - Builds response tables (means, Δ), ranks factors, selects optimal levels by SNR, - Predicts response at optimal combination (additive model), - Runs Taguchi-style ANOVA on Sigma, - Saves CSV outputs + main-effects plots + LaTeX snippet. Usage: python taguchi_from_csv.py --input ispitni_rezultati.csv --outdir out_tlak """ import argparse, os, re, json import pandas as pd import numpy as np import matplotlib.pyplot as plt def norm_num(x): if pd.isna(x): return np.nan if isinstance(x, (int, float, np.number)): return float(x) s = str(x).strip() s = s.replace(',', '.') s = s.replace('%','') s = s.replace(' mm','') s = s.replace('MPa','').replace('Mpa','') s = s.replace('kN','').replace('kN]','').replace('[','').replace(']','') try: return float(s) except: return np.nan def compute_snr_lb(y): # larger-the-better; handles n=1 case y = pd.to_numeric(y, errors='coerce') return 20.0*np.log10(y.clip(lower=1e-12)) def response_table(df, factor, col): t = df.groupby(factor, as_index=False)[col].mean() t["Delta (max-min)"] = t[col].max() - t[col].min() t["Faktor"] = factor return t def taguchi_anova(df, response, factors): y = df[response].astype(float) mu = y.mean() total_ss = ((y - mu)**2).sum() rows = [] dof_used = 0 ss_used = 0.0 for f in factors: grp = df.groupby(f)[response].agg(['mean','count']) ss_f = (grp['count']*(grp['mean']-mu)**2).sum() dof_f = grp.shape[0]-1 rows.append([f, ss_f, dof_f]) dof_used += dof_f ss_used += ss_f err_ss = max(total_ss - ss_used, 0.0) err_dof = max(len(df)-1 - dof_used, 0) an = pd.DataFrame(rows, columns=["Factor","SS","DOF"]) an["MS"] = an["SS"]/an["DOF"] an["Pct_contrib_%"] = (an["SS"]/total_ss*100.0) if total_ss>0 else np.nan err_row = pd.DataFrame([["Error", err_ss, err_dof, (err_ss/err_dof) if err_dof>0 else np.nan, (err_ss/total_ss*100.0) if total_ss>0 else np.nan]], columns=["Factor","SS","DOF","MS","Pct_contrib_%"]) an = pd.concat([an, err_row], ignore_index=True) return an, mu, total_ss def main(): ap = argparse.ArgumentParser() ap.add_argument("--input", required=True, help="Path to CSV with results") ap.add_argument("--outdir", default=None, help="Output directory") ap.add_argument("--response", default="Sigma [Mpa]", help="Response column to analyze (default Sigma [Mpa])") ap.add_argument("--area_col", default="A_ekv [mm^2]", help="Area column if Sigma should be recomputed from Fm/Area") ap.add_argument("--fm_col", default="Fm kN]", help="Force column (kN)") ap.add_argument("--recompute_sigma", action="store_true", help="If set, recompute Sigma = Fm*1000/Area") ap.add_argument("--sn_type", default="LB", choices=["LB"], help="S/N type (only LB supported here)") args = ap.parse_args() in_path = args.input outdir = args.outdir or (os.path.splitext(os.path.basename(in_path))[0] + "_taguchi_out") os.makedirs(outdir, exist_ok=True) df = pd.read_csv(in_path) # Standard column mapping / cleanup for known names rename_map = { "Visina sloja":"Visina sloja [mm]", "Širina ekstruzije":"Širina ekstruzije [mm]", "Postotak ispune":"Postotak ispune [%]", "Broj slojeva stijenke":"Broj stijenki", "Sigma [MPa]":"Sigma [Mpa]", "Fm [kN]":"Fm kN]", } df = df.rename(columns={k:v for k,v in rename_map.items() if k in df.columns}) # Ensure numeric for relevant columns if "Visina sloja [mm]" in df.columns: df["Visina sloja [mm]"] = df["Visina sloja [mm]"].apply(norm_num) if "Širina ekstruzije [mm]" in df.columns: df["Širina ekstruzije [mm]"] = df["Širina ekstruzije [mm]"].apply(norm_num) if "Postotak ispune [%]" in df.columns: df["Postotak ispune [%]"] = df["Postotak ispune [%]"].apply(norm_num) if "Broj stijenki" in df.columns: df["Broj stijenki"] = df["Broj stijenki"].apply(norm_num) if args.area_col in df.columns: df[args.area_col] = df[args.area_col].apply(norm_num) if args.fm_col in df.columns: df[args.fm_col] = df[args.fm_col].apply(norm_num) if args.response in df.columns: df[args.response] = df[args.response].apply(norm_num) # Compute Sigma if asked or missing if args.recompute_sigma or args.response not in df.columns or df[args.response].isna().all(): if args.fm_col in df.columns and args.area_col in df.columns: df[args.response] = (df[args.fm_col] * 1000.0) / df[args.area_col] else: raise SystemExit("Cannot recompute Sigma: missing Fm or Area columns") # Compute SNR (LB) df["SNR_LB [dB]"] = compute_snr_lb(df[args.response]) # Save cleaned raw raw_out = os.path.join(outdir, "0_raw_with_SNR.csv") df.to_csv(raw_out, index=False) # Factors to analyze (auto detect from known list) candidate_factors = ["Orijentacija","Visina sloja [mm]","Širina ekstruzije [mm]","Postotak ispune [%]","Broj stijenki"] factors = [f for f in candidate_factors if f in df.columns] if len(factors) == 0: raise SystemExit("No known factor columns found. Expected some of: " + ", ".join(candidate_factors)) # Response tables and deltas resp_mu = pd.concat([response_table(df, f, args.response) for f in factors], ignore_index=True) resp_sn = pd.concat([response_table(df, f, "SNR_LB [dB]") for f in factors], ignore_index=True) resp_mu.to_csv(os.path.join(outdir, "1_response_means_Sigma.csv"), index=False) resp_sn.to_csv(os.path.join(outdir, "2_response_means_SNR.csv"), index=False) # Ranking (by Delta) rank_mu = resp_mu.groupby("Faktor")["Delta (max-min)"].max().sort_values(ascending=False).reset_index().rename(columns={"Delta (max-min)":"Rang delta (Sigma)"}) rank_sn = resp_sn.groupby("Faktor")["Delta (max-min)"].max().sort_values(ascending=False).reset_index().rename(columns={"Delta (max-min)":"Rang delta (SNR)"}) ranking = pd.merge(rank_mu, rank_sn, on="Faktor") ranking.to_csv(os.path.join(outdir, "3_factor_ranking.csv"), index=False) # Optimal levels by SNR opt_levels = {f: df.groupby(f)["SNR_LB [dB]"].mean().sort_values(ascending=False).index[0] for f in factors} opt_table = pd.DataFrame({"Faktor": list(opt_levels.keys()), "Optimalna razina (po S/N)": list(opt_levels.values())}) opt_table.to_csv(os.path.join(outdir, "4_optimal_levels.csv"), index=False) # Prediction at optimal combo (additive model) on response grand_mean = df[args.response].mean() k = len(factors) pred_sigma = sum(df.groupby(f)[args.response].mean().loc[opt_levels[f]] for f in factors) - (k-1)*grand_mean grand_mean_snr = df["SNR_LB [dB]"].mean() pred_snr = sum(df.groupby(f)["SNR_LB [dB]"].mean().loc[opt_levels[f]] for f in factors) - (k-1)*grand_mean_snr pred_df = pd.DataFrame({ "Predikcija": ["Sigma_opt [MPa]","SNR_opt [dB]","Grand mean Sigma [MPa]","Grand mean SNR [dB]"], "Vrijednost": [pred_sigma, pred_snr, grand_mean, grand_mean_snr] }) pred_df.to_csv(os.path.join(outdir, "5_prediction.csv"), index=False) # ANOVA (Taguchi-style) on response anova_df, mu_sigma, totss = taguchi_anova(df, args.response, factors) anova_df.to_csv(os.path.join(outdir, "6_anova_sigma.csv"), index=False) # Plots: main effects for SNR for f in factors: means = df.groupby(f)["SNR_LB [dB]"].mean().reset_index() # numeric sort if possible try: means[f] = pd.to_numeric(means[f], errors="ignore") means = means.sort_values(by=f) except: pass plt.figure() plt.plot(means[f], means["SNR_LB [dB]"], marker="o") plt.xlabel(f) plt.ylabel("S/N (LB) [dB]") plt.title(f"Main effect (S/N): {f}") plt.tight_layout() plt.savefig(os.path.join(outdir, f"main_effect_SNR_{f}.png"), dpi=150) plt.close() # LaTeX snippet latex_lines = [] latex_lines.append(r"% --- Taguchi rezultati (S = Sigma [MPa], S/N larger-the-better) ---") latex_lines.append(r"\subsection{Rezultati Taguchijeve metode}") latex_lines.append(r"U skladu s ortogonalnom matricom provedena je analiza s kriterijem \textbf{što-veće-to-bolje}. Za svaku kombinaciju izračunat je S/N omjer \((\mathrm{S/N}=20\log_{10}(\sigma))\) te su određeni glavni učinci po razinama i optimalna kombinacija.") # Optimal levels latex_lines.append(r"\paragraph{Optimalne razine (po S/N).}") latex_lines.append(opt_table.to_latex(index=False, escape=False)) # Prediction latex_lines.append(r"\paragraph{Predikcija odziva na optimalnoj kombinaciji.}") latex_lines.append(pred_df.to_latex(index=False, escape=False, float_format='%.2f')) # Ranking latex_lines.append(r"\paragraph{Rang utjecaja faktora.}") latex_lines.append(ranking.to_latex(index=False, escape=False, float_format='%.3f')) # ANOVA an_fmt = anova_df.copy() for c in ["SS","MS","Pct_contrib_%"]: if c in an_fmt.columns: an_fmt[c] = an_fmt[c].astype(float).round(3) latex_lines.append(r"\paragraph{ANOVA (Taguchi).}") latex_lines.append(an_fmt.to_latex(index=False, escape=False)) latex_lines.append(r"Napomena: budući da je \(n{=}1\), pogreška (Error) procijenjena je iz preostalih stupnjeva slobode (Taguchi pooling).") with open(os.path.join(outdir, "taguchi_results.tex"), "w", encoding="utf-8") as f: f.write("\n\n".join(latex_lines)) # Small JSON summary summary = { "outdir": outdir, "factors": factors, "opt_levels": opt_levels, "pred_sigma": pred_sigma, "grand_mean_sigma": grand_mean, } with open(os.path.join(outdir, "summary.json"), "w", encoding="utf-8") as f: json.dump(summary, f, ensure_ascii=False, indent=2) print("Done. Outputs in:", outdir) if __name__ == "__main__": main()