library(rsm)
library(MaxPro)
library(dplyr)
library(tidymodels)
library(rules)
library(baguette)
library(finetune)
library(GGally)
Ra modeling - Helical milling of Inconel 718 with round carbide inserts
Libraries loading
The following libraries must be loaded to perform the analysis.
Experimental design
The experimental design is a Box-Behnken design augmented considering the maximum projection criteria.
<- bbd(3,
plan n0 = 2,
coding = list(x1 ~ (fza - 0.875)/0.375, # um/dente
~ (fzt - 0.15)/0.05, # mm/dente
x2 ~ (vc - 40)/20), # m/min
x3 randomize = F)
<- plan[,3:5]
plan01 $x1 <- (plan$x1 - min(plan$x1))/(max(plan$x1) - min(plan$x1))
plan01$x2 <- (plan$x2 - min(plan$x2))/(max(plan$x2) - min(plan$x2))
plan01$x3 <- (plan$x3 - min(plan$x3))/(max(plan$x3) - min(plan$x3))
plan01
set.seed(7)
<- CandPoints(N = 6, p_cont = 3, l_disnum=NULL, l_nom=NULL)
plan_cand
<- MaxProAugment(as.matrix(plan01), plan_cand, nNew = 6,
plan_rand p_disnum=0, l_disnum=NULL, p_nom=0, l_nom=NULL)
<- data.frame(plan_rand$Design)
plan_rand2 colnames(plan_rand2) <- c("x1","x2", "x3")
$x1 <- plan_rand2$x1*(max(plan$x1) - min(plan$x1)) + min(plan$x1)
plan_rand2$x2 <- plan_rand2$x2*(max(plan$x2) - min(plan$x2)) + min(plan$x2)
plan_rand2$x3 <- plan_rand2$x3*(max(plan$x3) - min(plan$x3)) + min(plan$x3)
plan_rand2
<- data.frame(plan_rand2)
plan2 $fza <- plan2$x1*0.375 + 0.875
plan2$fzt <- plan2$x2*0.05 + 0.15
plan2$vc <- plan2$x3*20 + 40
plan2
set.seed(5)
$run.order <- sample(1:nrow(plan2),nrow(plan2),replace=F)
plan2
plan2
x1 x2 x3 fza fzt vc run.order
1 -1.0000000 -1.0000000 0.0000000 0.5000 0.1000000 40.00000 2
2 1.0000000 -1.0000000 0.0000000 1.2500 0.1000000 40.00000 11
3 -1.0000000 1.0000000 0.0000000 0.5000 0.2000000 40.00000 15
4 1.0000000 1.0000000 0.0000000 1.2500 0.2000000 40.00000 19
5 -1.0000000 0.0000000 -1.0000000 0.5000 0.1500000 20.00000 9
6 1.0000000 0.0000000 -1.0000000 1.2500 0.1500000 20.00000 16
7 -1.0000000 0.0000000 1.0000000 0.5000 0.1500000 60.00000 5
8 1.0000000 0.0000000 1.0000000 1.2500 0.1500000 60.00000 7
9 0.0000000 -1.0000000 -1.0000000 0.8750 0.1000000 20.00000 13
10 0.0000000 1.0000000 -1.0000000 0.8750 0.2000000 20.00000 3
11 0.0000000 -1.0000000 1.0000000 0.8750 0.1000000 60.00000 17
12 0.0000000 1.0000000 1.0000000 0.8750 0.2000000 60.00000 6
13 0.0000000 0.0000000 0.0000000 0.8750 0.1500000 40.00000 20
14 0.0000000 0.0000000 0.0000000 0.8750 0.1500000 40.00000 12
15 -0.5000000 -0.5000000 -0.1666667 0.6875 0.1250000 36.66667 14
16 0.8333333 -0.8333333 0.5000000 1.1875 0.1083333 50.00000 4
17 -0.8333333 0.5000000 -0.8333333 0.5625 0.1750000 23.33333 18
18 0.5000000 0.1666667 0.1666667 1.0625 0.1583333 43.33333 8
19 -0.1666667 -0.1666667 -0.5000000 0.8125 0.1416667 30.00000 1
20 0.1666667 0.8333333 0.8333333 0.9375 0.1916667 56.66667 10
Measurement results
Initially measurements are stored in the design for both lubri-cooling types.
<- rbind(plan2, plan2)
plan_
<- c(1.9155556, 3.5155556, 1.0655556, 1.9655556, 1.7655556, 3.2655556, 1.1655556, 2.5655556, 3.1155556, 1.6655556, 3.2655556, 1.3655556, 1.9155556, 2.0155556, 1.9655556, 3.5155556, 0.9655556, 2.5155556, 1.6655556, 2.3155556, 1.6655556, 3.6155556, 0.7655556, 1.9655556, 0.8655556, 2.8155556, 1.0155556, 4.3155556, 2.9155556, 1.4155556, 4.7155556, 1.8655556, 1.8155556, 2.2155556, 1.7655556, 3.4655556, 0.8655556, 2.2155556, 2.3655556, 1.9155556)
rough
<- plan_ %>%
plan_ mutate(lc = rep(c("emulsion", "mql"), each = 20),
Ra = rough)
<- plan_ %>%
plan_ select(fza,fzt,vc,lc,Ra)
plan_
fza fzt vc lc Ra
1 0.5000 0.1000000 40.00000 emulsion 1.9155556
2 1.2500 0.1000000 40.00000 emulsion 3.5155556
3 0.5000 0.2000000 40.00000 emulsion 1.0655556
4 1.2500 0.2000000 40.00000 emulsion 1.9655556
5 0.5000 0.1500000 20.00000 emulsion 1.7655556
6 1.2500 0.1500000 20.00000 emulsion 3.2655556
7 0.5000 0.1500000 60.00000 emulsion 1.1655556
8 1.2500 0.1500000 60.00000 emulsion 2.5655556
9 0.8750 0.1000000 20.00000 emulsion 3.1155556
10 0.8750 0.2000000 20.00000 emulsion 1.6655556
11 0.8750 0.1000000 60.00000 emulsion 3.2655556
12 0.8750 0.2000000 60.00000 emulsion 1.3655556
13 0.8750 0.1500000 40.00000 emulsion 1.9155556
14 0.8750 0.1500000 40.00000 emulsion 2.0155556
15 0.6875 0.1250000 36.66667 emulsion 1.9655556
16 1.1875 0.1083333 50.00000 emulsion 3.5155556
17 0.5625 0.1750000 23.33333 emulsion 0.9655556
18 1.0625 0.1583333 43.33333 emulsion 2.5155556
19 0.8125 0.1416667 30.00000 emulsion 1.6655556
20 0.9375 0.1916667 56.66667 emulsion 2.3155556
21 0.5000 0.1000000 40.00000 mql 1.6655556
22 1.2500 0.1000000 40.00000 mql 3.6155556
23 0.5000 0.2000000 40.00000 mql 0.7655556
24 1.2500 0.2000000 40.00000 mql 1.9655556
25 0.5000 0.1500000 20.00000 mql 0.8655556
26 1.2500 0.1500000 20.00000 mql 2.8155556
27 0.5000 0.1500000 60.00000 mql 1.0155556
28 1.2500 0.1500000 60.00000 mql 4.3155556
29 0.8750 0.1000000 20.00000 mql 2.9155556
30 0.8750 0.2000000 20.00000 mql 1.4155556
31 0.8750 0.1000000 60.00000 mql 4.7155556
32 0.8750 0.2000000 60.00000 mql 1.8655556
33 0.8750 0.1500000 40.00000 mql 1.8155556
34 0.8750 0.1500000 40.00000 mql 2.2155556
35 0.6875 0.1250000 36.66667 mql 1.7655556
36 1.1875 0.1083333 50.00000 mql 3.4655556
37 0.5625 0.1750000 23.33333 mql 0.8655556
38 1.0625 0.1583333 43.33333 mql 2.2155556
39 0.8125 0.1416667 30.00000 mql 2.3655556
40 0.9375 0.1916667 56.66667 mql 1.9155556
Visualization to see lubri-cooling influence on Ra.
<- ggpairs(plan_, columns = 4:5, aes(colour = lc, alpha = 0.1),
gg2 lower = list(continuous = "cor" , combo ="box_no_facet"),
upper = list(continuous = "points", combo = "dot_no_facet")) + theme_bw()
gg2
Modeling and learning workflow
Data spliting
The data is splitted to model training (75%) and validation (25%). The training data is used to tuning the hyperparameters of the models with a 10-fold cross-validation repeated twice.
set.seed(1501)
<- initial_split(plan_, strata = lc)
plan_split
<- training(plan_split)
plan_train <- testing(plan_split)
plan_test
set.seed(1502)
<-
plan_folds vfold_cv(v = 10, plan_train, repeats = 2)
plan_test
fza fzt vc lc Ra
5 0.5000 0.1500000 20.00000 emulsion 1.7655556
10 0.8750 0.2000000 20.00000 emulsion 1.6655556
11 0.8750 0.1000000 60.00000 emulsion 3.2655556
14 0.8750 0.1500000 40.00000 emulsion 2.0155556
17 0.5625 0.1750000 23.33333 emulsion 0.9655556
23 0.5000 0.2000000 40.00000 mql 0.7655556
30 0.8750 0.2000000 20.00000 mql 1.4155556
34 0.8750 0.1500000 40.00000 mql 2.2155556
37 0.5625 0.1750000 23.33333 mql 0.8655556
39 0.8125 0.1416667 30.00000 mql 2.3655556
Recipes
A normalized recipe is conceived to avoid scale and measurement units effects all numerical variables (fza, fzt, and vc) are standardized. The categorical variable (lubri-cooling) was defined as dummy variable.
<-
normalized_rec recipe(Ra ~ ., data = plan_train) %>%
step_normalize(fza,fzt,vc) %>%
step_dummy(all_nominal_predictors())
# poly_recipe <-
# normalized_rec %>%
# step_poly(fza,fzt,vc) %>%
# step_interact(~ all_predictors():all_predictors())
Models
Seven model types were defined: linear regression, neural networks, support vector machine (with radial and polynomial kernels), k-nearest neighbors, extreme gradient boosting, and cubist.
<-
linear_reg_spec linear_reg(penalty = tune(), mixture = tune()) %>%
set_engine("glmnet")
<-
nnet_spec mlp(hidden_units = tune(), penalty = tune(), epochs = tune()) %>%
set_engine("nnet", MaxNWts = 2600) %>%
set_mode("regression")
<-
svm_r_spec svm_rbf(cost = tune(), rbf_sigma = tune()) %>%
set_engine("kernlab") %>%
set_mode("regression")
<-
svm_p_spec svm_poly(cost = tune(), degree = tune()) %>%
set_engine("kernlab") %>%
set_mode("regression")
<-
knn_spec nearest_neighbor(neighbors = tune(), dist_power = tune(), weight_func = tune()) %>%
set_engine("kknn") %>%
set_mode("regression")
<-
xgb_spec boost_tree(tree_depth = tune(), learn_rate = tune(), loss_reduction = tune(),
min_n = tune(), sample_size = tune(), trees = tune()) %>%
set_engine("xgboost") %>%
set_mode("regression")
<-
cubist_spec cubist_rules(committees = tune(), neighbors = tune()) %>%
set_engine("Cubist")
<-
nnet_param %>%
nnet_spec extract_parameter_set_dials() %>%
update(hidden_units = hidden_units(c(1, 27)))
Workflows
Two workflow is defined considering all methods, predictors, and response.
<-
normalized workflow_set(
preproc = list(normalized = normalized_rec),
models = list(linear_reg = linear_reg_spec,
SVM_radial = svm_r_spec,
SVM_poly = svm_p_spec,
KNN = knn_spec,
neural_network = nnet_spec,
XGB = xgb_spec,
Cubist = cubist_spec)
) normalized
# A workflow set/tibble: 7 × 4
wflow_id info option result
<chr> <list> <list> <list>
1 normalized_linear_reg <tibble [1 × 4]> <opts[0]> <list [0]>
2 normalized_SVM_radial <tibble [1 × 4]> <opts[0]> <list [0]>
3 normalized_SVM_poly <tibble [1 × 4]> <opts[0]> <list [0]>
4 normalized_KNN <tibble [1 × 4]> <opts[0]> <list [0]>
5 normalized_neural_network <tibble [1 × 4]> <opts[0]> <list [0]>
6 normalized_XGB <tibble [1 × 4]> <opts[0]> <list [0]>
7 normalized_Cubist <tibble [1 × 4]> <opts[0]> <list [0]>
<-
normalized %>%
normalized option_add(param_info = nnet_param, id = "normalized_neural_network")
normalized
# A workflow set/tibble: 7 × 4
wflow_id info option result
<chr> <list> <list> <list>
1 normalized_linear_reg <tibble [1 × 4]> <opts[0]> <list [0]>
2 normalized_SVM_radial <tibble [1 × 4]> <opts[0]> <list [0]>
3 normalized_SVM_poly <tibble [1 × 4]> <opts[0]> <list [0]>
4 normalized_KNN <tibble [1 × 4]> <opts[0]> <list [0]>
5 normalized_neural_network <tibble [1 × 4]> <opts[1]> <list [0]>
6 normalized_XGB <tibble [1 × 4]> <opts[0]> <list [0]>
7 normalized_Cubist <tibble [1 × 4]> <opts[0]> <list [0]>
<-
all_workflows bind_rows(normalized) %>%
# Make the workflow ID's a little more simple:
mutate(wflow_id = gsub("(simple_)|(normalized_)", "", wflow_id))
all_workflows
# A workflow set/tibble: 7 × 4
wflow_id info option result
<chr> <list> <list> <list>
1 linear_reg <tibble [1 × 4]> <opts[0]> <list [0]>
2 SVM_radial <tibble [1 × 4]> <opts[0]> <list [0]>
3 SVM_poly <tibble [1 × 4]> <opts[0]> <list [0]>
4 KNN <tibble [1 × 4]> <opts[0]> <list [0]>
5 neural_network <tibble [1 × 4]> <opts[1]> <list [0]>
6 XGB <tibble [1 × 4]> <opts[0]> <list [0]>
7 Cubist <tibble [1 × 4]> <opts[0]> <list [0]>
Model tuning
Tuning of the models is performed considering a grid of 25 combinations of the levels of the hyperparameters.
<-
race_ctrl control_race(
save_pred = TRUE,
parallel_over = "everything",
save_workflow = TRUE
)
<-
race_results %>%
all_workflows workflow_map(
"tune_race_anova",
seed = 1503,
resamples = plan_folds,
grid = 25,
control = race_ctrl
)
Results of tuning of all methods in all hyperparameter combinations of the grid.
race_results
# A workflow set/tibble: 7 × 4
wflow_id info option result
<chr> <list> <list> <list>
1 linear_reg <tibble [1 × 4]> <opts[3]> <race[+]>
2 SVM_radial <tibble [1 × 4]> <opts[3]> <race[+]>
3 SVM_poly <tibble [1 × 4]> <opts[3]> <race[+]>
4 KNN <tibble [1 × 4]> <opts[3]> <race[+]>
5 neural_network <tibble [1 × 4]> <opts[4]> <race[+]>
6 XGB <tibble [1 × 4]> <opts[3]> <race[+]>
7 Cubist <tibble [1 × 4]> <opts[3]> <race[+]>
Resuls are sorted considering RMSE.
collect_metrics(race_results) %>%
filter(.metric == "rmse") %>%
arrange(mean)
# A tibble: 21 × 9
wflow_id .config preproc model .metric .estimator mean n std_err
<chr> <chr> <chr> <chr> <chr> <chr> <dbl> <int> <dbl>
1 neural_network Preproce… recipe mlp rmse standard 0.414 20 0.0556
2 Cubist Preproce… recipe cubi… rmse standard 0.445 20 0.0643
3 SVM_radial Preproce… recipe svm_… rmse standard 0.445 20 0.0711
4 Cubist Preproce… recipe cubi… rmse standard 0.448 20 0.0638
5 Cubist Preproce… recipe cubi… rmse standard 0.448 20 0.0637
6 Cubist Preproce… recipe cubi… rmse standard 0.449 20 0.0637
7 SVM_poly Preproce… recipe svm_… rmse standard 0.450 20 0.0681
8 neural_network Preproce… recipe mlp rmse standard 0.451 20 0.0582
9 SVM_poly Preproce… recipe svm_… rmse standard 0.458 20 0.0642
10 SVM_poly Preproce… recipe svm_… rmse standard 0.459 20 0.0675
# ℹ 11 more rows
Resuls are sorted considering R^2.
collect_metrics(race_results) %>%
filter(.metric == "rsq") %>%
arrange(desc(mean))
# A tibble: 21 × 9
wflow_id .config preproc model .metric .estimator mean n std_err
<chr> <chr> <chr> <chr> <chr> <chr> <dbl> <int> <dbl>
1 neural_network Preproce… recipe mlp rsq standard 0.843 19 0.0543
2 SVM_radial Preproce… recipe svm_… rsq standard 0.841 20 0.0598
3 Cubist Preproce… recipe cubi… rsq standard 0.836 20 0.0534
4 Cubist Preproce… recipe cubi… rsq standard 0.835 20 0.0535
5 Cubist Preproce… recipe cubi… rsq standard 0.835 20 0.0535
6 SVM_poly Preproce… recipe svm_… rsq standard 0.835 20 0.0548
7 Cubist Preproce… recipe cubi… rsq standard 0.833 20 0.0530
8 KNN Preproce… recipe near… rsq standard 0.832 20 0.0558
9 SVM_poly Preproce… recipe svm_… rsq standard 0.819 20 0.0558
10 SVM_poly Preproce… recipe svm_… rsq standard 0.811 20 0.0551
# ℹ 11 more rows
Plotting performance of the methods considering both metrics.
<- collect_metrics(race_results) %>%
IC_rmse filter(.metric == "rmse") %>%
group_by(wflow_id) %>%
filter(mean == min(mean)) %>%
group_by(wflow_id) %>%
arrange(mean) %>%
ungroup()
<- collect_metrics(race_results) %>%
IC_r2 filter(.metric == "rsq") %>%
group_by(wflow_id) %>%
filter(mean == max(mean)) %>%
group_by(wflow_id) %>%
arrange(desc(mean)) %>%
ungroup()
<- bind_rows(IC_rmse, IC_r2)
IC
ggplot(IC, aes(x = factor(wflow_id, levels = unique(wflow_id)), y = mean)) +
facet_wrap(~.metric) +
geom_point(stat="identity", aes(color = wflow_id), pch = 1) +
geom_errorbar(stat="identity", aes(color = wflow_id,
ymin=mean-1.96*std_err,
ymax=mean+1.96*std_err), width=.2) +
labs(y = "", x = "method") + theme_bw() +
theme(legend.position = "none",
axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))
Performance in the test data.
<-
best_rmse %>%
race_results extract_workflow_set_result("Cubist") %>%
select_best(metric = "rmse")
best_rmse
# A tibble: 1 × 3
committees neighbors .config
<int> <int> <chr>
1 27 9 Preprocessor1_Model07
<-
Cubist_test_results %>%
race_results extract_workflow("Cubist") %>%
finalize_workflow(best_rmse) %>%
last_fit(split = plan_split)
collect_metrics(Cubist_test_results)
# A tibble: 2 × 4
.metric .estimator .estimate .config
<chr> <chr> <dbl> <chr>
1 rmse standard 0.280 Preprocessor1_Model1
2 rsq standard 0.914 Preprocessor1_Model1
<-
best_rmse2 %>%
race_results extract_workflow_set_result("SVM_radial") %>%
select_best(metric = "rmse")
best_rmse2
# A tibble: 1 × 3
cost rbf_sigma .config
<dbl> <dbl> <chr>
1 2.30 0.0545 Preprocessor1_Model13
<-
SVM_radial_test_results %>%
race_results extract_workflow("SVM_radial") %>%
finalize_workflow(best_rmse2) %>%
last_fit(split = plan_split)
collect_metrics(SVM_radial_test_results)
# A tibble: 2 × 4
.metric .estimator .estimate .config
<chr> <chr> <dbl> <chr>
1 rmse standard 0.294 Preprocessor1_Model1
2 rsq standard 0.923 Preprocessor1_Model1
Plotting predicted versus observed Ra.
<- rbind(Cubist_test_results %>% collect_predictions(),
test_results %>% collect_predictions())
SVM_radial_test_results $method <- c(rep("Cubist", nrow(Cubist_test_results %>% collect_predictions())),
test_resultsrep("SVM_radial", nrow(SVM_radial_test_results %>% collect_predictions())))
%>%
test_results ggplot(aes(x = Ra, y = .pred)) +
facet_grid(cols = vars(method)) +
geom_abline(color = "gray50", lty = 2) +
geom_point(alpha = 0.5) +
coord_obs_pred() +
labs(x = "observed", y = "predicted") +
theme_bw()