fza fzt vc lc Ra
1 0.50 0.10 40 emulsion 1.915556
2 1.25 0.10 40 emulsion 3.515556
3 0.50 0.20 40 emulsion 1.065556
4 1.25 0.20 40 emulsion 1.965556
5 0.50 0.15 20 emulsion 1.765556
6 1.25 0.15 20 emulsion 3.265556
Ra modeling with Cubist - Helical milling of Inconel 718 with round carbide inserts
Loading libraries, defining experimental design, and getting measurement results.
The same as done previously.
Tuning best model again with a wider grid
The best model was the SVM radial. Tuning with a wider grid is performed to improve model performance. A regular grid of 10 values of both cost and \(\sigma\) is considered.
<-
normalized_rec recipe(Ra ~ ., data = plan_train) %>%
step_normalize(fza,fzt,vc) %>%
step_dummy(all_nominal_predictors())
<-
cubist_spec cubist_rules(committees = tune(), neighbors = tune()) %>%
set_engine("Cubist")
<-
cubist_wflow workflow() %>%
add_model(cubist_spec) %>%
add_recipe(normalized_rec)
<- parameters(committees(), neighbors(c(0,9)))
p
<- grid_regular(p, levels = 10)
param_grid
<- tune_grid(
tune_res1
cubist_wflow, resamples = plan_folds,
grid = param_grid
)
Plotting the hyperparameter tuning results.
autoplot(tune_res1) + theme_bw()
Sorting models considering RMSE.
collect_metrics(tune_res1) %>%
arrange(mean)
# A tibble: 200 × 8
committees neighbors .metric .estimator mean n std_err .config
<int> <int> <chr> <chr> <dbl> <int> <dbl> <chr>
1 12 9 rmse standard 0.444 20 0.0645 Preprocessor1_Mo…
2 78 9 rmse standard 0.444 20 0.0645 Preprocessor1_Mo…
3 89 9 rmse standard 0.445 20 0.0644 Preprocessor1_Mo…
4 67 9 rmse standard 0.445 20 0.0644 Preprocessor1_Mo…
5 34 9 rmse standard 0.445 20 0.0644 Preprocessor1_Mo…
6 56 9 rmse standard 0.445 20 0.0643 Preprocessor1_Mo…
7 1 9 rmse standard 0.446 20 0.0643 Preprocessor1_Mo…
8 100 9 rmse standard 0.446 20 0.0642 Preprocessor1_Mo…
9 23 9 rmse standard 0.446 20 0.0642 Preprocessor1_Mo…
10 45 9 rmse standard 0.447 20 0.0642 Preprocessor1_Mo…
# ℹ 190 more rows
Locking to best models performance.
<-
best_rmse %>%
tune_res1 select_best(metric = "rmse")
best_rmse
# A tibble: 1 × 3
committees neighbors .config
<int> <int> <chr>
1 12 9 Preprocessor1_Model020
<-
best_rsq %>%
tune_res1 select_best(metric = "rsq")
best_rsq
# A tibble: 1 × 3
committees neighbors .config
<int> <int> <chr>
1 12 8 Preprocessor1_Model019
The best model considering RMSE is with comittees = 12 and neighboors = 9.
<- finalize_workflow(cubist_wflow, best_rmse)
cubist_final
<- fit(cubist_final, data = plan_train) cubist_final_fit
Final model is then defined with these parameters’ levels. The model is also applied in the test data.
augment(cubist_final_fit, new_data = plan_test) %>%
rsq(truth = Ra, estimate = .pred)
# A tibble: 1 × 3
.metric .estimator .estimate
<chr> <chr> <dbl>
1 rsq standard 0.914
augment(cubist_final_fit, new_data = plan_test) %>%
rmse(truth = Ra, estimate = .pred)
# A tibble: 1 × 3
.metric .estimator .estimate
<chr> <chr> <dbl>
1 rmse standard 0.280
Cubist model structure.
summary(cubist_final_fit$fit$fit$fit)
Call:
cubist.default(x = x, y = y, committees = 12L)
Cubist [Release 2.07 GPL Edition] Thu Nov 16 09:31:26 2023
---------------------------------
Target attribute `outcome'
Read 30 cases (5 attributes) from undefined.data
Model 1:
Rule 1/1: [30 cases, mean 2.3588889, range 0.8655556 to 4.715556, est err 0.4025470]
outcome = 2.2732842 + 0.691 fza - 0.443 fzt
Model 2:
Rule 2/1: [30 cases, mean 2.3588889, range 0.8655556 to 4.715556, est err 0.4023428]
outcome = 2.2718962 + 0.69 fza - 0.444 fzt
Model 3:
Rule 3/1: [30 cases, mean 2.3588889, range 0.8655556 to 4.715556, est err 0.4025469]
outcome = 2.2732841 + 0.691 fza - 0.443 fzt
Model 4:
Rule 4/1: [30 cases, mean 2.3588889, range 0.8655556 to 4.715556, est err 0.4023428]
outcome = 2.2718962 + 0.69 fza - 0.444 fzt
Model 5:
Rule 5/1: [30 cases, mean 2.3588889, range 0.8655556 to 4.715556, est err 0.4025469]
outcome = 2.2732841 + 0.691 fza - 0.443 fzt
Model 6:
Rule 6/1: [30 cases, mean 2.3588889, range 0.8655556 to 4.715556, est err 0.4023428]
outcome = 2.2718962 + 0.69 fza - 0.444 fzt
Model 7:
Rule 7/1: [30 cases, mean 2.3588889, range 0.8655556 to 4.715556, est err 0.4025469]
outcome = 2.2732841 + 0.691 fza - 0.443 fzt
Model 8:
Rule 8/1: [30 cases, mean 2.3588889, range 0.8655556 to 4.715556, est err 0.4023428]
outcome = 2.2718962 + 0.69 fza - 0.444 fzt
Model 9:
Rule 9/1: [30 cases, mean 2.3588889, range 0.8655556 to 4.715556, est err 0.4025469]
outcome = 2.2732841 + 0.691 fza - 0.443 fzt
Model 10:
Rule 10/1: [30 cases, mean 2.3588889, range 0.8655556 to 4.715556, est err 0.4023428]
outcome = 2.2718962 + 0.69 fza - 0.444 fzt
Model 11:
Rule 11/1: [30 cases, mean 2.3588889, range 0.8655556 to 4.715556, est err 0.4025469]
outcome = 2.2732841 + 0.691 fza - 0.443 fzt
Model 12:
Rule 12/1: [30 cases, mean 2.3588889, range 0.8655556 to 4.715556, est err 0.4023428]
outcome = 2.2718962 + 0.69 fza - 0.444 fzt
Evaluation on training data (30 cases):
Average |error| 0.3848325
Relative |error| 0.48
Correlation coefficient 0.77
Attribute usage:
Conds Model
100% fza
100% fzt
Time: 0.0 secs
# library(Cubist)
# library(tidyrules)
#
# cubist_Ra <- cubist(x = plan_train[,1:4], y = plan_train[,5], committees = 12)
# summary(cubist_Ra)
#
# cubist_Ra$usage
# cubist_Ra$coefficients
# cubist_Ra$committees
# cubist_Ra$vars
#
# tidyRules(cubist_Ra) %>%
# select(RHS, committee)
Evaluating the model in the whole training set
The model is evaluated in the whole training data set.
<- predict(cubist_final_fit, new_data = plan_train %>% select(-Ra))
cubist_res <- bind_cols(cubist_res, plan_train %>% select(Ra))
cubist_res head(cubist_res)
# A tibble: 6 × 2
.pred Ra
<dbl> <dbl>
1 1.82 1.92
2 3.62 3.52
3 0.643 1.07
4 2.32 1.97
5 2.94 3.27
6 1.19 1.17
ggplot(cubist_res, aes(x = Ra, y = .pred)) +
# Create a diagonal line:
geom_abline(lty = 2) +
geom_point(alpha = 0.5) +
coord_obs_pred() + theme_bw()
Model interpretation
Effects plots to interpret the model according to process aspects.
##########
<- seq(min(plan_train$fza), max(plan_train$fza), length = 50)
x1_grid <- predict(cubist_final_fit, new_data = data.frame(fza = x1_grid,
ypred_fza_em fzt = 0.15,
vc = 40,
lc = "emulsion"))
<- data.frame(fza = x1_grid,
data_p1_em Ra = ypred_fza_em$.pred,
fzt = 0.15,
vc = 40,
lc = "emulsion")
<- predict(cubist_final_fit, new_data = data.frame(fza = x1_grid,
ypred_fza_mql fzt = 0.15,
vc = 40,
lc = "mql"))
<- data.frame(fza = x1_grid,
data_p1_mql Ra = ypred_fza_mql$.pred,
fzt = 0.15,
vc = 40,
lc = "mql")
<- rbind(data_p1_em, data_p1_mql)
data_p1
<- ggplot(data = data_p1, mapping = aes(x = fza, y = Ra, group = lc)) +
p1 geom_line(aes(colour = lc, linetype = lc), linewidth = 1.2) +
ylim(1,3.05) +
scale_color_manual(values = c("red", "blue")) +
theme_bw()
##########
<- seq(min(plan_train$fzt), max(plan_train$fzt), length = 50)
x2_grid <- predict(cubist_final_fit, new_data = data.frame(fza = 0.875,
ypred_fzt_em fzt = x2_grid,
vc = 40,
lc = "emulsion"))
<- data.frame(fza = 0.875,
data_p2_em Ra = ypred_fzt_em$.pred,
fzt = x2_grid,
vc = 40,
lc = "emulsion")
<- predict(cubist_final_fit, new_data = data.frame(fza = 0.875,
ypred_fzt_mql fzt = x2_grid,
vc = 40,
lc = "mql"))
<- data.frame(fza = 0.875,
data_p2_mql Ra = ypred_fzt_mql$.pred,
fzt = x2_grid,
vc = 40,
lc = "mql")
<- rbind(data_p2_em, data_p2_mql)
data_p2
<- ggplot(data = data_p2, mapping = aes(x = fzt, y = Ra, group = lc)) +
p2 geom_line(aes(colour = lc, linetype = lc), linewidth = 1.2) +
ylim(1,3.05) +
scale_color_manual(values = c("red", "blue")) +
theme_bw()
##########
<- seq(min(plan_train$vc), max(plan_train$vc), length = 50)
x3_grid <- predict(cubist_final_fit, new_data = data.frame(fza = 0.875,
ypred_vc_em fzt = 0.15,
vc = x3_grid,
lc = "emulsion"))
<- data.frame(fza = 0.875,
data_p3_em Ra = ypred_fzt_em$.pred,
fzt = 0.15,
vc = x3_grid,
lc = "emulsion")
<- predict(cubist_final_fit, new_data = data.frame(fza = 0.875,
ypred_vc_mql fzt = 0.15,
vc = x3_grid,
lc = "mql"))
<- data.frame(fza = 0.875,
data_p3_mql Ra = ypred_vc_mql$.pred,
fzt = 0.15,
vc = x3_grid,
lc = "mql")
<- rbind(data_p3_em, data_p3_mql)
data_p3
<- ggplot(data = data_p3, mapping = aes(x = vc, y = Ra, group = lc)) +
p3 geom_line(aes(colour = lc, linetype = lc), linewidth = 1.2) +
ylim(1,3.05) +
scale_color_manual(values = c("red", "blue")) +
theme_bw()
ggarrange(p1 , p2, p3, common.legend = T, nrow = 1)
<- predict(cubist_final_fit, new_data = data.frame(fza = x1_grid,
ypred_fza_em_a fzt = 0.1,
vc = 40,
lc = "emulsion"))
<- data.frame(fza = x1_grid,
data_p1_em_a Ra = ypred_fza_em_a$.pred,
fzt = 0.1,
vc = 40,
lc = "emulsion")
<- predict(cubist_final_fit, new_data = data.frame(fza = x1_grid,
ypred_fza_mql_a fzt = 0.1,
vc = 40,
lc = "mql"))
<- data.frame(fza = x1_grid,
data_p1_mql_a Ra = ypred_fza_mql_a$.pred,
fzt = 0.1,
vc = 40,
lc = "mql")
<- predict(cubist_final_fit, new_data = data.frame(fza = x1_grid,
ypred_fza_em_b fzt = 0.2,
vc = 40,
lc = "emulsion"))
<- data.frame(fza = x1_grid,
data_p1_em_b Ra = ypred_fza_em_b$.pred,
fzt = 0.2,
vc = 40,
lc = "emulsion")
<- predict(cubist_final_fit, new_data = data.frame(fza = x1_grid,
ypred_fza_mql_b fzt = 0.2,
vc = 40,
lc = "mql"))
<- data.frame(fza = x1_grid,
data_p1_mql_b Ra = ypred_fza_mql_b$.pred,
fzt = 0.2,
vc = 40,
lc = "mql")
<- rbind(data_p1, data_p1_em_a, data_p1_mql_a,
data_p1_fza_fzt
data_p1_em_b, data_p1_mql_b)
<- ggplot(data_p1_fza_fzt, aes(y = Ra, x = fza, group = fzt)) +
pp12 geom_line(aes(color = fzt), linewidth = 1.2) +
# scale_fill_binned(type = "viridis") +
scale_color_gradient(low="blue", high="red") +
facet_grid(cols = vars(lc), scales = "free") +
ylim(.4,4) +
theme_bw()
<- predict(cubist_final_fit, new_data = data.frame(fza = x1_grid,
ypred_fza_em_c fzt = 0.15,
vc = 20,
lc = "emulsion"))
<- data.frame(fza = x1_grid,
data_p1_em_c Ra = ypred_fza_em_c$.pred,
fzt = 0.15,
vc = 20,
lc = "emulsion")
<- predict(cubist_final_fit, new_data = data.frame(fza = x1_grid,
ypred_fza_mql_c fzt = 0.15,
vc = 20,
lc = "mql"))
<- data.frame(fza = x1_grid,
data_p1_mql_c Ra = ypred_fza_mql_c$.pred,
fzt = 0.15,
vc = 20,
lc = "mql")
<- predict(cubist_final_fit, new_data = data.frame(fza = x1_grid,
ypred_fza_em_d fzt = 0.15,
vc = 60,
lc = "emulsion"))
<- data.frame(fza = x1_grid,
data_p1_em_d Ra = ypred_fza_em_d$.pred,
fzt = 0.15,
vc = 60,
lc = "emulsion")
<- predict(cubist_final_fit, new_data = data.frame(fza = x1_grid,
ypred_fza_mql_d fzt = 0.15,
vc = 60,
lc = "mql"))
<- data.frame(fza = x1_grid,
data_p1_mql_d Ra = ypred_fza_mql_d$.pred,
fzt = 0.15,
vc = 60,
lc = "mql")
<- rbind(data_p1, data_p1_em_c, data_p1_mql_c,
data_p1_fza_vc
data_p1_em_d, data_p1_mql_d)
<- ggplot(data_p1_fza_vc, aes(y = Ra, x = fza, group = vc)) +
pp13 geom_line(aes(color = vc), linewidth = 1.2) +
scale_color_gradient(low="blue", high="red") +
facet_grid(cols = vars(lc), scales = "free") +
ylim(.4,4) +
theme_bw()
<- predict(cubist_final_fit, new_data = data.frame(fza = 0.875,
ypred_fzt_em_c fzt = x2_grid,
vc = 20,
lc = "emulsion"))
<- data.frame(fza = 0.875,
data_p2_em_c fzt = x2_grid,
Ra = ypred_fzt_em_c$.pred,
vc = 20,
lc = "emulsion")
<- predict(cubist_final_fit, new_data = data.frame(fza = 0.875,
ypred_fzt_mql_c fzt = x2_grid,
vc = 20,
lc = "mql"))
<- data.frame(fza = 0.875,
data_p2_mql_c fzt = x2_grid,
Ra = ypred_fzt_mql_c$.pred,
vc = 20,
lc = "mql")
<- predict(cubist_final_fit, new_data = data.frame(fza = 0.875,
ypred_fzt_em_d fzt = x2_grid,
vc = 60,
lc = "emulsion"))
<- data.frame(fza = 0.875,
data_p2_em_d fzt = x2_grid,
Ra = ypred_fzt_em_d$.pred,
vc = 60,
lc = "emulsion")
<- predict(cubist_final_fit, new_data = data.frame(fza = 0.875,
ypred_fzt_mql_d fzt = x2_grid,
vc = 60,
lc = "mql"))
<- data.frame(fza = 0.875,
data_p2_mql_d fzt = x2_grid,
Ra = ypred_fzt_mql_d$.pred,
vc = 60,
lc = "mql")
<- rbind(data_p2, data_p2_em_c, data_p2_mql_c,
data_p2_fzt_vc
data_p2_em_d, data_p2_mql_d)
<- ggplot(data_p2_fzt_vc, aes(y = Ra, x = fzt, group = vc)) +
pp23 geom_line(aes(color = vc), linewidth = 1.2) +
scale_color_gradient(low="blue", high="red") +
facet_grid(cols = vars(lc), scales = "free") +
ylim(.4,4) +
theme_bw()
ggarrange(pp12,pp13,pp23, nrow = 3)
Countour plots.
#######################
<- seq(min(plan_train$fza), max(plan_train$fza), length = 30)
x1_grid <- seq(min(plan_train$fzt), max(plan_train$fzt), length = 30)
x2_grid <- seq(min(plan_train$vc), max(plan_train$vc), length = 30)
x3_grid
#######################
<- expand.grid(fza = x1_grid,
grid_12_em fzt = x2_grid,
vc = x3_grid, lc = "emulsion")
<- predict(cubist_final_fit, new_data = grid_12_em)
y_hat_12_em $Ra <- y_hat_12_em$.pred
grid_12_em
<- expand.grid(fza = x1_grid,
grid_12_mql fzt = x2_grid,
vc = x3_grid, lc = "mql")
<- predict(cubist_final_fit, new_data = grid_12_mql)
y_hat_12_mql $Ra <- y_hat_12_mql$.pred
grid_12_mql
<- rbind(grid_12_em, grid_12_mql)
grid_12
<- ggplot(data = grid_12,
cp12 mapping = aes(x = fza, y = fzt, z = Ra)) +
geom_tile(aes(fill=Ra)) +
facet_grid(cols = vars(lc), scales = "free") +
scale_fill_distiller(palette = "RdBu",
direction = -1) +
geom_contour(color = "black") +
theme_bw()
#######################
<- expand.grid(fza = x1_grid,
grid_13_em fzt = x2_grid,
vc = x3_grid, lc = "emulsion")
<- predict(cubist_final_fit, new_data = grid_13_em)
y_hat_13_em $Ra <- y_hat_13_em$.pred
grid_13_em
<- expand.grid(fza = x1_grid,
grid_13_mql fzt = x2_grid,
vc = x3_grid, lc = "mql")
<- predict(cubist_final_fit, new_data = grid_13_mql)
y_hat_13_mql $Ra <- y_hat_13_mql$.pred
grid_13_mql
<- rbind(grid_13_em, grid_13_mql)
grid_13
<- ggplot(data = grid_13,
cp13 mapping = aes(x = fza, y = vc, z = Ra)) +
geom_tile(aes(fill=Ra)) +
facet_grid(cols = vars(lc), scales = "free") +
scale_fill_distiller(palette = "RdBu",
direction = -1) +
geom_contour(color = "black") +
theme_bw()
#######################
<- expand.grid(fza = x1_grid,
grid_23_em fzt = x2_grid,
vc = x3_grid, lc = "emulsion")
<- predict(cubist_final_fit, new_data = grid_23_em)
y_hat_23_em $Ra <- y_hat_23_em$.pred
grid_23_em
<- expand.grid(fza = x1_grid,
grid_23_mql fzt = x2_grid,
vc = x3_grid, lc = "mql")
<- predict(cubist_final_fit, new_data = grid_23_mql)
y_hat_23_mql $Ra <- y_hat_23_mql$.pred
grid_23_mql
<- rbind(grid_23_em, grid_23_mql)
grid_23
<- ggplot(data = grid_23,
cp23 mapping = aes(x = fzt, y = vc, z = Ra)) +
geom_tile(aes(fill=Ra)) +
facet_grid(cols = vars(lc), scales = "free") +
scale_fill_distiller(palette = "RdBu",
direction = -1) +
geom_contour(color = "black") +
theme_bw()
ggarrange(cp12,cp13,cp23, nrow = 3)
Variance importance is also measured.
<- c("fza", "fzt", "vc", "lc")
vip_features
<-
vip_train %>%
plan_train select(all_of(vip_features))
<-
explainer_cubist explain_tidymodels(
cubist_final_fit, data = plan_train %>% select(-Ra),
y = plan_train$Ra,
verbose = FALSE
)
set.seed(1803)
<- model_parts(explainer_cubist, loss_function = loss_root_mean_square) vip_cubist
<- function(...) {
ggplot_imp <- list(...)
obj <- attr(obj[[1]], "loss_name")
metric_name <- paste(metric_name,
metric_lab "after permutations\n(higher indicates more important)")
<- bind_rows(obj) %>%
full_vip filter(variable != "_baseline_")
<- full_vip %>%
perm_vals filter(variable == "_full_model_") %>%
group_by(label) %>%
summarise(dropout_loss = mean(dropout_loss))
<- full_vip %>%
p filter(variable != "_full_model_") %>%
mutate(variable = fct_reorder(variable, dropout_loss)) %>%
ggplot(aes(dropout_loss, variable))
if(length(obj) > 1) {
<- p +
p facet_wrap(vars(label)) +
geom_vline(data = perm_vals, aes(xintercept = dropout_loss, color = label),
linewidth = 1.4, lty = 2, alpha = 0.7) +
geom_boxplot(aes(color = label, fill = label), alpha = 0.2)
else {
} <- p +
p geom_vline(data = perm_vals, aes(xintercept = dropout_loss),
linewidth = 1.4, lty = 2, alpha = 0.7) +
geom_boxplot(fill = "sandybrown", alpha = 0.4)
}+
p theme(legend.position = "none") +
labs(x = metric_lab,
y = NULL, fill = NULL, color = NULL)
}
ggplot_imp(vip_cubist) + labs(x = "RMSE after permutations") + theme_bw()