gotovo, fali kazalo itd

This commit is contained in:
2025-09-07 22:31:05 +02:00
parent 7114ca772f
commit 73ed7c002f
46 changed files with 1368 additions and 549 deletions

View File

@@ -1 +1 @@
,marockaspark,localhost.localdomain,07.09.2025 00:19,file:///home/marockaspark/.config/libreoffice/4;
,marockaspark,localhost.localdomain,07.09.2025 17:24,file:///home/marockaspark/.config/libreoffice/4;

Binary file not shown.

Binary file not shown.

159
docs/taguchi_ancova.py Normal file
View File

@@ -0,0 +1,159 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
# Taguchi ANOVA + ANCOVA helper
# Run: python taguchi_ancova.py --input ispitni_rezultati.ods --out results.xlsx --cov A_min A_max
import argparse, sys, unicodedata, math
from pathlib import Path
import numpy as np
import pandas as pd
try:
from scipy.stats import f as fdist
def f_p_value(F, dfn, dfd): return float(fdist.sf(F, dfn, dfd))
except Exception:
def f_p_value(F, dfn, dfd): return float("nan")
def norm_label(s: str) -> str:
s = str(s).strip().lower()
s = "".join(c for c in unicodedata.normalize("NFKD", s) if not unicodedata.combining(c))
s = s.replace("["," ").replace("]"," ").replace("("," ").replace(")"," ")
s = " ".join(s.split())
return s
def find_col(df: pd.DataFrame, *candidates):
labels = {norm_label(c): c for c in df.columns}
for cand in candidates:
key = norm_label(cand)
if key in labels: return labels[key]
for cand in candidates:
toks = norm_label(cand).split()
for k, orig in labels.items():
if all(t in k for t in toks): return orig
return None
def taguchi_anova(df: pd.DataFrame, y_col: str, factor_cols):
y = pd.to_numeric(df[y_col], errors="coerce")
mask = y.notna(); y = y[mask]
N = y.shape[0]; mu = float(y.mean()); SST = float(((y - mu) ** 2).sum())
rows = []; level_means = {}
for f in factor_cols:
g = df.loc[mask, f]
stats = (pd.DataFrame({"y": y.values, "lvl": g.values}).groupby("lvl", dropna=False)["y"].agg(["mean","count"]).reset_index())
SSf = float((stats["count"] * (stats["mean"] - mu) ** 2).sum())
a = stats.shape[0]; df_f = max(a-1,0); MSf = SSf/df_f if df_f>0 else float("nan")
rows.append({"Faktor": f, "SS": SSf, "df": df_f, "MS": MSf})
level_means[f] = dict(zip(stats["lvl"], stats["mean"]))
res = pd.DataFrame(rows)
SS_factors = float(res["SS"].sum()); df_factors = int(res["df"].sum()); df_total = N-1
SS_error = SST - SS_factors; df_error = df_total - df_factors; MS_error = SS_error/df_error if df_error>0 else float("nan")
res["F"] = res["MS"] / MS_error if MS_error>0 else float("nan")
res["Udio var [%]"] = 100.0*res["SS"]/SST if SST>0 else float("nan")
return res, level_means, {"N":N,"mu":mu,"SST":SST,"SS_error":SS_error,"df_error":df_error,"MS_error":MS_error}
def taguchi_yhat(df, y_col, factor_cols, level_means, mu):
k = len(factor_cols); yhat = np.zeros(len(df), dtype=float)
for f in factor_cols: yhat += df[f].map(level_means[f]).astype(float).values
yhat = yhat - (k-1)*mu
mask = pd.to_numeric(df[y_col], errors="coerce").notna().values; yhat[~mask] = np.nan
return yhat
def simple_linregress(x, y):
x = np.asarray(pd.to_numeric(x, errors="coerce"), dtype=float)
y = np.asarray(pd.to_numeric(y, errors="coerce"), dtype=float)
m = np.isfinite(x) & np.isfinite(y); x=x[m]; y=y[m]; n=len(x)
if n<2: return float("nan"), float("nan"), n
xb, yb = float(x.mean()), float(y.mean())
Sxx = float(((x-xb)**2).sum()); Sxy = float(((x-xb)*(y-yb)).sum())
b1 = Sxy/Sxx if Sxx!=0 else float("nan"); b0 = yb - b1*xb if math.isfinite(b1) else float("nan")
return b1, b0, n
def ancova_single(e, cov, df_err_old, SST):
b1, b0, n_used = simple_linregress(cov, e)
cov_num = np.asarray(pd.to_numeric(cov, errors="coerce"), dtype=float)
e_hat = b0 + b1*cov_num; e_new = e - e_hat
SSE_old = float(np.nansum(e**2)); SSE_new = float(np.nansum(e_new**2)); SS_cov = SSE_old - SSE_new
df_err_new = df_err_old - 1; MS_err_new = SSE_new/df_err_new if df_err_new>0 else float("nan")
F_cov = (SS_cov/1.0)/MS_err_new if MS_err_new>0 else float("nan"); p = f_p_value(F_cov,1,df_err_new) if math.isfinite(F_cov) else float("nan")
dR2 = (SSE_old - SSE_new)/SST if SST>0 else float("nan")
return {"b0":b0,"b1":b1,"n_used":n_used,"SSE_old":SSE_old,"SSE_new":SSE_new,"SS_cov":SS_cov,
"df_err_old":df_err_old,"df_err_new":df_err_new,"MS_err_new":MS_err_new,"F_cov":F_cov,"p":p,"Delta_R2_vs_SST":dR2}, e_hat, e_new
def main():
ap = argparse.ArgumentParser(description="Taguchi ANOVA + ANCOVA (A_min / A_max)")
ap.add_argument("--input", required=True); ap.add_argument("--sheet", default=None)
ap.add_argument("--out", default="Taguchi_ANCOVA_results.xlsx")
ap.add_argument("--cov", nargs="*", default=["A_min"])
args = ap.parse_args()
inp = Path(args.input)
if not inp.exists(): print(f"ERROR: Ne postoji {inp}", file=sys.stderr); sys.exit(2)
if inp.suffix.lower()==".ods":
try: df = pd.read_excel(inp, sheet_name=args.sheet, engine="odf")
except Exception as e: print(f"ERROR: .ods ({type(e).__name__}): {e}", file=sys.stderr); sys.exit(2)
elif inp.suffix.lower() in (".xlsx",".xls"): df = pd.read_excel(inp, sheet_name=args.sheet)
else: df = pd.read_csv(inp)
y_col = (find_col(df,"Sigma [Mpa]","Sigma [MPa]","sigma","σ") or find_col(df,"Sigma [Mpa]"))
if y_col is None:
fm = find_col(df,"Fm kN]","Fm [kN]","Force"); aeq = find_col(df,"A_ekv [mm^2]","A ekv","A_ekv")
if fm and aeq: df["Sigma [Mpa]"] = pd.to_numeric(df[fm],errors="coerce")*1000/pd.to_numeric(df[aeq],errors="coerce"); y_col="Sigma [Mpa]"
else: print("ERROR: σ ni (Fm i A_ekv) nisu pronađeni.", file=sys.stderr); sys.exit(2)
cand = ["Orijentacija","Visina sloja","Širina ekstruzije","Sirina ekstruzije","Postotak ispune","Broj stijenki","Broj slojeva stijenke"]
factors=[];
for c in cand:
col = find_col(df,c);
if col and col not in factors: factors.append(col)
if find_col(df,"Broj stijenki") and find_col(df,"Broj slojeva stijenke"):
bs = find_col(df,"Broj slojeva stijenke");
if bs in factors: factors.remove(bs)
if not factors: print("ERROR: Faktori nisu pronađeni.", file=sys.stderr); sys.exit(2)
anova, lvl_means, meta = taguchi_anova(df, y_col, factors)
yhat = taguchi_yhat(df, y_col, factors, lvl_means, meta["mu"]); e = pd.to_numeric(df[y_col],errors="coerce").values - yhat
cov_results={}; per_row = pd.DataFrame({"σ [MPa]":df[y_col], "σ̂ (Taguchi)":yhat, "e":e})
names = [c.lower() for c in (args.cov or [])]
if ("a_min" in names) or ("a min" in names):
amin_col = find_col(df,"A_min [mm^2]","A_min","A min")
if amin_col:
info, e_hat, e_new = ancova_single(e, df[amin_col], meta["df_error"], meta["SST"])
cov_results["A_min"]=info; per_row["A_min"]=df[amin_col]; per_row["ê(A_min)"]=e_hat; per_row["e_new(A_min)"]=e_new
if ("a_max" in names) or ("a max" in names):
amax_col = find_col(df,"A_max [mm^2]","A_max","A max")
if amax_col:
info, e_hat, e_new = ancova_single(e, df[amax_col], meta["df_error"], meta["SST"])
cov_results["A_max"]=info; per_row["A_max"]=df[amax_col]; per_row["ê(A_max)"]=e_hat; per_row["e_new(A_max)"]=e_new
MS_err_old = meta["MS_error"]; decisions=[]
for nm, info in cov_results.items():
keep = (info["MS_err_new"] < MS_err_old) and ( (not math.isnan(info["p"]) and info["p"]<0.05) or math.isnan(info["p"]) )
decisions.append({"Kovarijata":nm,"MS_err_old":MS_err_old,"MS_err_new":info["MS_err_new"],"F":info["F_cov"],"p":info["p"],"ΔR²_vs_SST":info["Delta_R2_vs_SST"],"Preporuka_zadrzati":bool(keep)})
anova_updated = anova.copy()
valid = [d for d in decisions if d["MS_err_new"] < MS_err_old]
if valid:
best = min(valid, key=lambda d: d["MS_err_new"]); ms_new = best["MS_err_new"]
anova_updated["F (nakon ANCOVA)"] = anova_updated["MS"] / ms_new
out = Path(args.out)
with pd.ExcelWriter(out, engine="xlsxwriter") as wr:
df.to_excel(wr, sheet_name="Raw", index=False); anova.to_excel(wr, sheet_name="ANOVA_Taguchi", index=False)
pd.DataFrame([{"N":meta["N"],"mu":meta["mu"],"SST":meta["SST"],"SS_error":meta["SS_error"],"df_error":meta["df_error"],"MS_error":meta["MS_error"]}]).to_excel(wr, sheet_name="Meta", index=False)
for f in factors: pd.DataFrame({"Razina":list(lvl_means[f].keys()),"mean σ":list(lvl_means[f].values())}).to_excel(wr, sheet_name=f"Mean_{f}", index=False)
if cov_results:
for nm, info in cov_results.items(): pd.DataFrame([info]).to_excel(wr, sheet_name=f"ANCOVA_{nm}_info", index=False)
per_row.to_excel(wr, sheet_name="ANCOVA_rows", index=False); anova_updated.to_excel(wr, sheet_name="ANOVA_updated", index=False)
print("ANOVA glavni učinci:\n", anova.to_string(index=False))
if cov_results:
print("\nANCOVA rezultati:")
for nm, info in cov_results.items():
print(f" {nm}: F={info['F_cov']:.4g}, p={info['p']:.4g}, MS_err_new={info['MS_err_new']:.4g}, ΔR²={info['Delta_R2_vs_SST']:.4g}")
if decisions:
import pandas as pd; print("\nPreporuke:\n", pd.DataFrame(decisions).to_string(index=False))
print(f"\nExcel zapisano u: {out}")
if __name__ == "__main__":
main()