In this post, we have applied some ML and DL algotihms on a dataset compiled from previous studies in the literature about spray cooling systems. The target variable is Nusselt number (Nu). The applying methods are 1- Random Forest Regression (RFR) 2- Support Vector Regression (SVR) 3- Decision Tree Regression (DTR) 4- XGBoost 5- LightGBM and 6- Multilayer Perception (MLP)
1- Random Forest Regression
Importing needed libraries:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import RandomizedSearchCVLoading dataset:
lnk = 'https://raw.githubusercontent.com/mlori77/ANN/refs/heads/main/htttt.csv'
data = pd.read_csv(lnk)Encoding:
fluid_dummy = pd.get_dummies(data['Fluid'], drop_first = True)Correlation matrix:
data.drop(['Fluid', 'SAT'], inplace = True, axis = 1)
corr_data = data.copy()
correlation_matrix = corr_data.corr()
plt.figure(figsize=(16,12))
sns.heatmap(correlation_matrix, annot=True,fmt=".2f", cmap="Greens")
plt.show()Randomly splitting the dataset into training and testing subsets
Y = pd.DataFrame(data['Nu'], columns = ['Nu'])
data.drop(['um', 'rho', 'k', 'Nu', 'At'], inplace = True, axis = 1)
X_train, X_test, y_train, y_test = train_test_split(data, Y, test_size = 0.2, random_state = 0)
y_test.columns = ['y_test']Model definition, Hyperparameters tuning and Prediction
rf = RandomForestRegressor(random_state = 42)
n_estimators = np.arange(200, 2001, 200)
max_features = ['auto', 'sqrt']
max_depth = [10, 20, 30, 40, 50, 60, 70, 80, 90, 100, 110, None]
min_samples_split = [2, 5, 7, 10, 20, 50, 100]
min_samples_leaf = [1, 2, 4, 5, 10, 50, 100, 200]
bootstrap = [True, False]
random_grid = {'n_estimators': n_estimators,
'max_features': max_features,
'max_depth': max_depth,
'min_samples_split': min_samples_split,
'min_samples_leaf': min_samples_leaf,
'bootstrap': bootstrap}
rf = RandomForestRegressor()
rf_random = RandomizedSearchCV(estimator = rf, param_distributions = random_grid, n_iter = 100, cv = 3, verbose=2, random_state=42, n_jobs = -1)
rf_random.fit(X_train, y_train)
rf_random.best_params_
predict = rf_random.predict(X_test)2- Support Vector Regression
Importing needed libraries:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.svm import SVR
from sklearn.model_selection import RandomizedSearchCVLoading dataset:
data = pd.read_csv("heatml.csv")Encoding:
fluid_dummy = pd.get_dummies(data['Fluid'], drop_first=True)
data.drop(['Fluid', 'SAT'], inplace=True, axis=1)Correlation matrix:
correlation_matrix = corr_data.corr()
plt.figure(figsize=(16, 12))
sns.heatmap(correlation_matrix, annot=True, fmt=".2f", cmap="Greens")
plt.show()Randomly splitting the dataset into training and testing subsets and Scaling
Y = pd.DataFrame(data['Nu'], columns=['Nu'])
data.drop(['um', 'rho', 'k', 'Nu', 'At'], inplace=True, axis=1)
X_train, X_test, y_train, y_test = train_test_split(data, Y, test_size=0.2, random_state=0)
y_test.columns = ['y_test']
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)Model definition, Hyperparameters tuning and Prediction
svm = SVR()
C = [0.01, 0.1, 1, 10, 100, 1000]
epsilon = [0.01, 0.05, 0.1, 0.2, 0.5]
kernel = ['rbf', 'poly', 'sigmoid']
gamma = ['scale', 'auto', 0.001, 0.01, 0.1]
random_grid = {
'C': C,
'epsilon': epsilon,
'kernel': kernel,
'gamma': gamma
}
svm_random = RandomizedSearchCV(
estimator=svm,
param_distributions=random_grid,
n_iter=100,
cv=5,
verbose=2,
random_state=42,
n_jobs=-1
)
svm_random.fit(X_train, y_train)
svm_random.best_params_
predict = svm_random.predict(X_test) 3- Decision Tree
Importing needed libraries:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeRegressor
from sklearn.model_selection import RandomizedSearchCVLoading dataset:
data = pd.read_csv("heatml.csv")Encoding:
fluid_dummy = pd.get_dummies(data['Fluid'], drop_first = True)
data.drop(['Fluid', 'SAT'], inplace = True, axis = 1)Correlation matrix:
corr_data = data.copy()
correlation_matrix = corr_data.corr()
plt.figure(figsize=(16,12))
sns.heatmap(correlation_matrix, annot=True, fmt=".2f", cmap="Greens")
plt.show()Randomly splitting the dataset into training and testing subsets
Y = pd.DataFrame(data['Nu'], columns = ['Nu'])
data.drop(['um', 'rho', 'k', 'Nu', 'At'], inplace = True, axis = 1)
X_train, X_test, y_train, y_test = train_test_split(data, Y, test_size = 0.2, random_state = 0)
y_test.columns = ['y_test']Model definition, Hyperparameters tuning and Prediction
dt = DecisionTreeRegressor(random_state=42)
max_depth = [10, 20, 30, 40, 50, 60, 70, 80, 90, 100, 110, None]
min_samples_split = [2, 5, 7, 10, 20, 50, 100]
min_samples_leaf = [1, 2, 4, 5, 10, 50, 100, 200]
max_features = ['auto', 'sqrt', None]
criterion = ['mse', 'friedman_mse', 'mae']
random_grid = {
'max_depth': max_depth,
'min_samples_split': min_samples_split,
'min_samples_leaf': min_samples_leaf,
'max_features': max_features,
'criterion': criterion
}
dt = DecisionTreeRegressor()
dt_random = RandomizedSearchCV(
estimator=dt,
param_distributions=random_grid,
n_iter=100,
cv=3,
verbose=2,
random_state=42,
n_jobs=-1
)
dt_random.fit(X_train, y_train)
dt_random.best_params_
predict = dt_random.predict(X_test)4- XGBoost
Importing and installing needed libraries:
!pip install xgboost
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from xgboost import XGBRegressor
from sklearn.model_selection import RandomizedSearchCVLoading dataset:
data = pd.read_csv("heatml.csv")Encoding:
fluid_dummy = pd.get_dummies(data['Fluid'], drop_first=True)
data.drop(['Fluid', 'SAT'], inplace=True, axis=1)Correlation matrix:
corr_data = data.copy()
correlation_matrix = corr_data.corr()
plt.figure(figsize=(16,12))
sns.heatmap(correlation_matrix, annot=True, fmt=".2f", cmap="Greens")
plt.show()Randomly splitting the dataset into training and testing subsets
Y = pd.DataFrame(data['Nu'], columns=['Nu'])
data.drop(['um', 'rho', 'k', 'Nu', 'At'], inplace=True, axis=1)
X_train, X_test, y_train, y_test = train_test_split(data, Y, test_size=0.2, random_state=0)
y_test.columns = ['y_test']Model definition, Hyperparameters tuning and Prediction
xg_reg = XGBRegressor(random_state=42)
param_dist = {
'n_estimators': np.arange(200, 2001, 200),
'max_depth': [3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15],
'learning_rate': [0.001, 0.01, 0.1, 0.2, 0.3],
'subsample': [0.5, 0.7, 1.0],
'colsample_bytree': [0.5, 0.7, 1.0],
'gamma': [0, 0.1, 0.2, 0.3],
'min_child_weight': [1, 5, 10]
}
random_search = RandomizedSearchCV(estimator=xg_reg, param_distributions=param_dist, n_iter=100,
cv=3, verbose=2, random_state=42, n_jobs=-1)
random_search.fit(X_train, y_train)
random_search.best_params_
predict = random_search.predict(X_test)If you got an error of “‘super’ object has no attribute ‘__sklearn_tags__’” try this line of code “pip install scikit-learn==1.1.3”, downgrading your sklearn
5- LightGBM
Importing and installing needed libraries:
!pip install lightgbm
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
import lightgbm as lgb
from sklearn.model_selection import RandomizedSearchCVLoading dataset:
data = pd.read_csv("heatml.csv")Encoding:
fluid_dummy = pd.get_dummies(data['Fluid'], drop_first=True)
data.drop(['Fluid', 'SAT'], inplace=True, axis=1)Correlation matrix:
corr_data = data.copy()
correlation_matrix = corr_data.corr()
plt.figure(figsize=(16,12))
sns.heatmap(correlation_matrix, annot=True, fmt=".2f", cmap="Greens")
plt.show()Randomly splitting the dataset into training and testing subsets
Y = pd.DataFrame(data['Nu'], columns=['Nu'])
data.drop([ 'um', 'rho', 'k', 'Nu', 'At'], inplace=True, axis=1)
X_train, X_test, y_train, y_test = train_test_split(data, Y, test_size=0.2, random_state=0)
y_test.columns = ['y_test']Model definition, Hyperparameters tuning and Prediction
lgb_reg = lgb.LGBMRegressor(random_state=42)
param_dist = {
'n_estimators': np.arange(200, 2001, 200),
'max_depth': np.arange(3, 15,1),
'learning_rate': [0.001, 0.01, 0.1, 0.2, 0.3],
'subsample': [0.5, 0.7, 1.0],
'colsample_bytree': [0.5, 0.7, 1.0],
'min_child_samples': [5, 10, 20],
'reg_alpha': [0, 0.1, 0.2, 0.3],
'reg_lambda': [0, 0.1, 0.2, 0.3]
}
random_search = RandomizedSearchCV(estimator=lgb_reg, param_distributions=param_dist, n_iter=100,
cv=3, verbose=2, random_state=42, n_jobs=-1)
random_search.fit(X_train, y_train)
random_search.best_params_
predict = random_search.predict(X_test)
Prediction on Gas Atomizer Sprays
1- RFR
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import RandomizedSearchCV
data = pd.read_csv("heatml-gas.csv")
fluid_dummy = pd.get_dummies(data['Fluid'], drop_first = True)
data.drop(['Fluid', 'SAT'], inplace = True, axis = 1)
corr_data = data.copy()
correlation_matrix = corr_data.corr()
plt.figure(figsize=(16,12))
sns.heatmap(correlation_matrix, annot=True,fmt=".2f", cmap="Greens")
plt.show()
Y = pd.DataFrame(data['Nu'], columns = ['Nu'])
data.drop(['um', 'rho', 'k', 'Nu', 'At'], inplace = True, axis = 1)
X_train, X_test, y_train, y_test = train_test_split(data, Y, test_size = 0.2, random_state = 0)
y_test.columns = ['y_test']
rf = RandomForestRegressor(random_state = 42)
n_estimators = np.arange(200, 2001, 200)
max_features = ['auto', 'sqrt']
max_depth = [10, 20, 30, 40, 50, 60, 70, 80, 90, 100, 110, None]
min_samples_split = [2, 5, 7, 10, 20, 50, 100]
min_samples_leaf = [1, 2, 4, 5, 10, 50, 100, 200]
bootstrap = [True, False]
random_grid = {'n_estimators': n_estimators,
'max_features': max_features,
'max_depth': max_depth,
'min_samples_split': min_samples_split,
'min_samples_leaf': min_samples_leaf,
'bootstrap': bootstrap}
rf = RandomForestRegressor()
rf_random = RandomizedSearchCV(estimator = rf, param_distributions = random_grid, n_iter = 100, cv = 3, verbose=2, random_state=42, n_jobs = -1)
rf_random.fit(X_train, y_train)
rf_random.best_params_
predict = rf_random.predict(X_test)
yt = y_test.to_numpy()
predict.reshape(6,1)
result = pd.concat([pd.DataFrame(predict), pd.DataFrame(yt)], axis = 1)
result.to_csv('rfrnuhtgas.csv')2- SVR
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.svm import SVR
from sklearn.model_selection import RandomizedSearchCV
data = pd.read_csv("heatml-gas.csv")
fluid_dummy = pd.get_dummies(data['Fluid'], drop_first=True)
data.drop(['Fluid', 'SAT'], inplace=True, axis=1)
corr_data = data.copy()
correlation_matrix = corr_data.corr()
plt.figure(figsize=(16, 12))
sns.heatmap(correlation_matrix, annot=True, fmt=".2f", cmap="Greens")
plt.show()
Y = pd.DataFrame(data['Nu'], columns=['Nu'])
data.drop(['um', 'rho', 'k', 'Nu', 'At'], inplace=True, axis=1)
X_train, X_test, y_train, y_test = train_test_split(data, Y, test_size=0.2, random_state=0)
y_test.columns = ['y_test']
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)
svm = SVR()
C = [0.01, 0.1, 1, 10, 100, 1000]
epsilon = [0.01, 0.05, 0.1, 0.2, 0.5]
kernel = ['rbf', 'poly', 'sigmoid']
gamma = ['scale', 'auto', 0.001, 0.01, 0.1]
random_grid = {
'C': C,
'epsilon': epsilon,
'kernel': kernel,
'gamma': gamma
}
pprint(random_grid)
svm = SVR()
svm_random = RandomizedSearchCV(
estimator=svm,
param_distributions=random_grid,
n_iter=100,
cv=5,
verbose=2,
random_state=42,
n_jobs=-1
)
svm_random.fit(X_train, y_train)
svm_random.best_params_
predict = svm_random.predict(X_test)
yt = y_test.to_numpy()
predict = predict.reshape(-1, 1)
result = pd.concat([pd.DataFrame(predict), pd.DataFrame(yt)], axis=1)
result.to_csv('svmnuhtgas.csv')3- DT
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeRegressor
from sklearn.model_selection import RandomizedSearchCV
data = pd.read_csv("heatml-gas.csv")
fluid_dummy = pd.get_dummies(data['Fluid'], drop_first = True)
data.drop(['Fluid', 'SAT'], inplace = True, axis = 1)
corr_data = data.copy()
correlation_matrix = corr_data.corr()
plt.figure(figsize=(16,12))
sns.heatmap(correlation_matrix, annot=True, fmt=".2f", cmap="Greens")
plt.show()
Y = pd.DataFrame(data['Nu'], columns = ['Nu'])
data.drop(['um', 'rho', 'k', 'Nu', 'At'], inplace = True, axis = 1)
X_train, X_test, y_train, y_test = train_test_split(data, Y, test_size = 0.2, random_state = 0)
y_test.columns = ['y_test']
dt = DecisionTreeRegressor(random_state=42)
max_depth = [int(x) for x in np.linspace(10, 110, num=11)]
max_depth.append(None)
min_samples_split = [2, 5, 7, 10, 20, 50, 100]
min_samples_leaf = [1, 2, 4, 5, 10, 50, 100, 200]
max_features = ['auto', 'sqrt', None]
criterion = ['mse', 'friedman_mse', 'mae']
random_grid = {
'max_depth': max_depth,
'min_samples_split': min_samples_split,
'min_samples_leaf': min_samples_leaf,
'max_features': max_features,
'criterion': criterion
}
dt = DecisionTreeRegressor()
dt_random = RandomizedSearchCV(
estimator=dt,
param_distributions=random_grid,
n_iter=100,
cv=3,
verbose=2,
random_state=42,
n_jobs=-1
)
dt_random.fit(X_train, y_train)
dt_random.best_params_
predict = dt_random.predict(X_test)
yt = y_test.to_numpy()
predict = predict.reshape(-1, 1)
result = pd.concat([pd.DataFrame(predict), pd.DataFrame(yt)], axis=1)
result.to_csv('dtnuhtgas.csv')4- XGBoost
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from xgboost import XGBRegressor
from sklearn.model_selection import RandomizedSearchCV
data = pd.read_csv("heatml-gas.csv")
fluid_dummy = pd.get_dummies(data['Fluid'], drop_first=True)
data.drop(['Fluid', 'SAT'], inplace=True, axis=1)
corr_data = data.copy()
correlation_matrix = corr_data.corr()
plt.figure(figsize=(16,12))
sns.heatmap(correlation_matrix, annot=True, fmt=".2f", cmap="Greens")
plt.show()
Y = pd.DataFrame(data['Nu'], columns=['Nu'])
data.drop(['um', 'rho', 'k', 'Nu', 'At'], inplace=True, axis=1)
X_train, X_test, y_train, y_test = train_test_split(data, Y, test_size=0.2, random_state=0)
y_test.columns = ['y_test']
xg_reg = XGBRegressor(random_state=42)
param_dist = {
'n_estimators': np.arange(200, 2001, 200),
'max_depth': [3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15],
'learning_rate': [0.0001, 0.001, 0.01, 0.1, 0.2, 0.3],
'subsample': [0.5, 0.7, 1.0],
'colsample_bytree': [0.5, 0.7, 1.0],
'gamma': [0, 0.1, 0.2, 0.3],
'min_child_weight': [1, 5, 10]
}
pprint(param_dist)
random_search = RandomizedSearchCV(estimator=xg_reg, param_distributions=param_dist, n_iter=100,
cv=3, verbose=2, random_state=42, n_jobs=-1)
random_search.fit(X_train, y_train)
random_search.best_params_
predict = random_search.predict(X_test)
yt = y_test.to_numpy()
result = pd.concat([pd.DataFrame(predict), pd.DataFrame(yt)], axis=1)
result.to_csv('xgboostgas.csv')5- LightBGM
!pip install lightgbm
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
import lightgbm as lgb
from sklearn.model_selection import RandomizedSearchCV
data = pd.read_csv("heatml-gas.csv")
fluid_dummy = pd.get_dummies(data['Fluid'], drop_first=True)
data.drop(['Fluid', 'SAT'], inplace=True, axis=1)
corr_data = data.copy()
correlation_matrix = corr_data.corr()
plt.figure(figsize=(16,12))
sns.heatmap(correlation_matrix, annot=True, fmt=".2f", cmap="Greens")
plt.show()
Y = pd.DataFrame(data['Nu'], columns=['Nu'])
data.drop(['um', 'rho', 'k', 'Nu', 'At'], inplace=True, axis=1)
X_train, X_test, y_train, y_test = train_test_split(data, Y, test_size=0.2, random_state=0)
y_test.columns = ['y_test']
lgb_reg = lgb.LGBMRegressor(random_state=42)
param_dist = {
'n_estimators': np.arange(200, 2001, 200),
'max_depth': np.arange(3, 15,1),
'learning_rate': [0.0001, 0.001, 0.01, 0.1, 0.2, 0.3],
'subsample': [0.5, 0.7, 1.0],
'colsample_bytree': [0.5, 0.7, 1.0],
'min_child_samples': [5, 10, 20],
'reg_alpha': [0, 0.1, 0.2, 0.3],
'reg_lambda': [0, 0.1, 0.2, 0.3]
}
random_search = RandomizedSearchCV(estimator=lgb_reg, param_distributions=param_dist, n_iter=100,
cv=3, verbose=2, random_state=42, n_jobs=-1)
random_search.fit(X_train, y_train)
random_search.best_params_
predict = random_search.predict(X_test)
yt = y_test.to_numpy()
lgbm_result = pd.concat([pd.DataFrame(predict), pd.DataFrame(yt)], axis=1)
lgbm_result.to_csv('lgbmgas_results.csv')Prediction on Pressure Atomizer Sprays
1- RFR
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import RandomizedSearchCV
data = pd.read_csv("heatml-pressure.csv")
fluid_dummy = pd.get_dummies(data['Fluid'], drop_first = True)
data.drop(['Fluid', 'SAT'], inplace = True, axis = 1)
corr_data = data.copy()
correlation_matrix = corr_data.corr()
plt.figure(figsize=(16,12))
sns.heatmap(correlation_matrix, annot=True,fmt=".2f", cmap="Greens")
plt.show()
Y = pd.DataFrame(data['Nu'], columns = ['Nu'])
data.drop(['um', 'rho', 'k', 'Nu', 'At'], inplace = True, axis = 1)
X_train, X_test, y_train, y_test = train_test_split(data, Y, test_size = 0.2, random_state = 0)
y_test.columns = ['y_test']
rf = RandomForestRegressor(random_state = 42)
n_estimators = np.arange(200, 2001, 200)
max_features = ['auto', 'sqrt']
max_depth = [10, 20, 30, 40, 50, 60, 70, 80, 90, 100, 110, None]
min_samples_split = [2, 5, 7, 10, 20, 50, 100]
min_samples_leaf = [1, 2, 4, 5, 10, 50, 100, 200]
bootstrap = [True, False]
random_grid = {'n_estimators': n_estimators,
'max_features': max_features,
'max_depth': max_depth,
'min_samples_split': min_samples_split,
'min_samples_leaf': min_samples_leaf,
'bootstrap': bootstrap}
rf = RandomForestRegressor()
rf_random = RandomizedSearchCV(estimator = rf, param_distributions = random_grid, n_iter = 100, cv = 3, verbose=2, random_state=42, n_jobs = -1)
rf_random.fit(X_train, y_train)
rf_random.best_params_
predict = rf_random.predict(X_test)
yt = y_test.to_numpy()
predict.reshape(101,1)
result = pd.concat([pd.DataFrame(predict), pd.DataFrame(yt)], axis = 1)
result.to_csv('rfrnuhtpressure.csv')2- SVR
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.svm import SVR
from sklearn.model_selection import RandomizedSearchCV
data = pd.read_csv("heatml-pressure.csv")
fluid_dummy = pd.get_dummies(data['Fluid'], drop_first=True)
data.drop(['Fluid', 'SAT'], inplace=True, axis=1)
corr_data = data.copy()
correlation_matrix = corr_data.corr()
plt.figure(figsize=(16, 12))
sns.heatmap(correlation_matrix, annot=True, fmt=".2f", cmap="Greens")
plt.show()
Y = pd.DataFrame(data['Nu'], columns=['Nu'])
data.drop(['um', 'rho', 'k', 'Nu', 'At'], inplace=True, axis=1)
X_train, X_test, y_train, y_test = train_test_split(data, Y, test_size=0.2, random_state=0)
y_test.columns = ['y_test']
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)
svm = SVR()
C = [0.01, 0.1, 1, 10, 100, 1000]
epsilon = [0.01, 0.05, 0.1, 0.2, 0.5]
kernel = ['rbf', 'poly', 'sigmoid']
gamma = ['scale', 'auto', 0.001, 0.01, 0.1]
random_grid = {
'C': C,
'epsilon': epsilon,
'kernel': kernel,
'gamma': gamma
}
svm = SVR()
svm_random = RandomizedSearchCV(
estimator=svm,
param_distributions=random_grid,
n_iter=100,
cv=5,
verbose=2,
random_state=42,
n_jobs=-1
)
svm_random.fit(X_train, y_train)
svm_random.best_params_
predict = svm_random.predict(X_test)
yt = y_test.to_numpy()
predict = predict.reshape(-1, 1)
svm_result = pd.concat([pd.DataFrame(predict), pd.DataFrame(yt)], axis=1)
svm_result.to_csv('svmnuhtpressure_svm.csv')3- DT
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeRegressor
from sklearn.model_selection import RandomizedSearchCV
data = pd.read_csv("heatml-pressure.csv")
fluid_dummy = pd.get_dummies(data['Fluid'], drop_first = True)
data.drop(['Fluid', 'SAT'], inplace = True, axis = 1)
corr_data = data.copy()
correlation_matrix = corr_data.corr()
plt.figure(figsize=(16,12))
sns.heatmap(correlation_matrix, annot=True, fmt=".2f", cmap="Greens")
plt.show()
Y = pd.DataFrame(data['Nu'], columns = ['Nu'])
data.drop(['um', 'rho', 'k', 'Nu', 'At'], inplace = True, axis = 1)
X_train, X_test, y_train, y_test = train_test_split(data, Y, test_size = 0.2, random_state = 0)
y_test.columns = ['y_test']
dt = DecisionTreeRegressor(random_state=42)
max_depth = [10, 20, 30, 40, 50, 60, 70, 80, 90, 100, 110, None]
min_samples_split = [2, 5, 7, 10, 20, 50, 100]
min_samples_leaf = [1, 2, 4, 5, 10, 50, 100, 200]
max_features = ['auto', 'sqrt', None]
criterion = ['mse', 'friedman_mse', 'mae']
random_grid = {
'max_depth': max_depth,
'min_samples_split': min_samples_split,
'min_samples_leaf': min_samples_leaf,
'max_features': max_features,
'criterion': criterion
}
dt = DecisionTreeRegressor()
dt_random = RandomizedSearchCV(
estimator=dt,
param_distributions=random_grid,
n_iter=100,
cv=3,
verbose=2,
random_state=42,
n_jobs=-1
)
dt_random.fit(X_train, y_train)
dt_random.best_params_
predict = dt_random.predict(X_test)
yt = y_test.to_numpy()
predict = predict.reshape(-1, 1)
result = pd.concat([pd.DataFrame(predict), pd.DataFrame(yt)], axis=1)
result.to_csv('dtnuhtpressure.csv')4- XGBoost
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from xgboost import XGBRegressor
from sklearn.model_selection import RandomizedSearchCV
data = pd.read_csv("heatml-pressure.csv")
fluid_dummy = pd.get_dummies(data['Fluid'], drop_first=True)
data.drop(['Fluid', 'SAT'], inplace=True, axis=1)
corr_data = data.copy()
correlation_matrix = corr_data.corr()
plt.figure(figsize=(16,12))
sns.heatmap(correlation_matrix, annot=True, fmt=".2f", cmap="Greens")
plt.show()
Y = pd.DataFrame(data['Nu'], columns=['Nu'])
data.drop(['um', 'rho', 'k', 'Nu', 'At'], inplace=True, axis=1)
X_train, X_test, y_train, y_test = train_test_split(data, Y, test_size=0.2, random_state=0)
y_test.columns = ['y_test']
xg_reg = XGBRegressor(random_state=42)
param_dist = {
'n_estimators': np.arange(200, 2001, 200),
'max_depth': [10, 20, 30, 40, 50, 60, 70, 80, 90, 100, 110, None],
'learning_rate': [0.0001, 0.001, 0.01, 0.1, 0.2, 0.3],
'subsample': [0.5, 0.7, 1.0],
'colsample_bytree': [0.5, 0.7, 1.0],
'gamma': [0, 0.1, 0.2, 0.3],
'min_child_weight': [1, 5, 10]
}
random_search = RandomizedSearchCV(estimator=xg_reg, param_distributions=param_dist, n_iter=100,
cv=3, verbose=2, random_state=42, n_jobs=-1)
random_search.fit(X_train, y_train)
random_search.best_params_
predict = random_search.predict(X_test)
yt = y_test.to_numpy()
result = pd.concat([pd.DataFrame(predict), pd.DataFrame(yt)], axis=1)
result.to_csv('xgboostpressure.csv')5- LightGBM
!pip install lightgbm
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
import lightgbm as lgb
from sklearn.model_selection import RandomizedSearchCV
data = pd.read_csv("heatml-pressure.csv")
fluid_dummy = pd.get_dummies(data['Fluid'], drop_first=True)
data.drop(['Fluid', 'SAT'], inplace=True, axis=1)
corr_data = data.copy()
correlation_matrix = corr_data.corr()
plt.figure(figsize=(16,12))
sns.heatmap(correlation_matrix, annot=True, fmt=".2f", cmap="Greens")
plt.show()
Y = pd.DataFrame(data['Nu'], columns=['Nu'])
data.drop(['um', 'rho', 'k', 'Nu', 'At'], inplace=True, axis=1)
X_train, X_test, y_train, y_test = train_test_split(data, Y, test_size=0.2, random_state=0)
y_test.columns = ['y_test']
lgb_reg = lgb.LGBMRegressor(random_state=42)
param_dist = {
'n_estimators': np.arange(200, 2001, 200),
'max_depth': [10, 20, 30, 40, 50, 60, 70, 80, 90, 100, 110, None],
'learning_rate': [0.0001, 0.001, 0.01, 0.1, 0.2, 0.3],
'subsample': [0.5, 0.7, 1.0],
'colsample_bytree': [0.5, 0.7, 1.0],
'min_child_samples': [5, 10, 20],
'reg_alpha': [0, 0.1, 0.2, 0.3],
'reg_lambda': [0, 0.1, 0.2, 0.3]
}
random_search = RandomizedSearchCV(estimator=lgb_reg, param_distributions=param_dist, n_iter=100,
cv=3, verbose=2, random_state=42, n_jobs=-1)
random_search.fit(X_train, y_train)
print("Best hyperparameters found: ", random_search.best_params_)
predict = random_search.predict(X_test)Testing on an excluded subset
1- RFR
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import RandomizedSearchCV
data = pd.read_csv("WholeExcludeCiofalo.csv")
data1 = pd.read_csv("Ciofalo.csv")
fluid_dummy = pd.get_dummies(data['Fluid'], drop_first = True)
data.drop(['Fluid', 'SAT'], inplace = True, axis = 1)
corr_data = data.copy()
correlation_matrix = corr_data.corr()
plt.figure(figsize=(16,12))
sns.heatmap(correlation_matrix, annot=True, fmt=".2f", cmap="Greens")
plt.show()
Y = pd.DataFrame(data['Nu'], columns = ['Nu'])
data.drop(['um', 'rho', 'k', 'Nu', 'At'], inplace = True, axis = 1)
fluid_dummy1 = pd.get_dummies(data1['Fluid'], drop_first = True)
data1.drop(['Fluid', 'SAT'], inplace = True, axis = 1)
Y_test = pd.DataFrame(data1['Nu'], columns = ['Nu'])
data1.drop(['P', 'um', 'Pr', 'rho', 'k', 'Nu', 'At'], inplace = True, axis = 1)
X_train = data
y_train = Y
X_test = data1
y_test = Y_test
rf = RandomForestRegressor(random_state = 42)
n_estimators = np.arange(200, 2001, 200)
max_features = ['auto', 'sqrt']
max_depth = [10, 20, 30, 40, 50, 60, 70, 80, 90, 100, 110, None]
min_samples_split = [2, 5, 7, 10, 20, 50, 100]
min_samples_leaf = [1, 2, 4, 5, 10, 50, 100, 200]
bootstrap = [True, False]
random_grid = {'n_estimators': n_estimators,
'max_features': max_features,
'max_depth': max_depth,
'min_samples_split': min_samples_split,
'min_samples_leaf': min_samples_leaf,
'bootstrap': bootstrap}
rf = RandomForestRegressor()
rf_random = RandomizedSearchCV(estimator = rf, param_distributions = random_grid, n_iter = 100, cv = 3, verbose=2, random_state=42, n_jobs = -1)
rf_random.fit(X_train, y_train)
rf_random.best_params_
predict = rf_random.predict(X_test)
yt = y_test.to_numpy()
predict.reshape(63,1)
result = pd.concat([pd.DataFrame(predict), pd.DataFrame(yt)], axis = 1)
result.to_csv('rfrnuhtexc.csv')2- SVR
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.svm import SVR
from sklearn.model_selection import RandomizedSearchCV
data = pd.read_csv("WholeExcludeCiofalo.csv")
data1 = pd.read_csv("Ciofalo.csv")
fluid_dummy = pd.get_dummies(data['Fluid'], drop_first = True)
data.drop(['Fluid', 'SAT'], inplace = True, axis = 1)
corr_data = data.copy()
correlation_matrix = corr_data.corr()
plt.figure(figsize=(16,12))
sns.heatmap(correlation_matrix, annot=True, fmt=".2f", cmap="Greens")
plt.show()
Y = pd.DataFrame(data['Nu'], columns = ['Nu'])
data.drop(['um', 'rho', 'k', 'Nu', 'At'], inplace = True, axis = 1)
fluid_dummy1 = pd.get_dummies(data1['Fluid'], drop_first = True)
data1.drop(['Fluid', 'SAT'], inplace = True, axis = 1)
Y_test = pd.DataFrame(data1['Nu'], columns = ['Nu'])
data1.drop(['P', 'um', 'Pr', 'rho', 'k', 'Nu', 'At'], inplace = True, axis = 1)
X_train = data
y_train = Y
X_test = data1
y_test = Y_test
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)
svm = SVR()
C = [0.01, 0.1, 1, 10, 100, 1000]
epsilon = [0.01, 0.05, 0.1, 0.2, 0.5]
kernel = ['rbf', 'poly', 'sigmoid']
gamma = ['scale', 'auto', 0.001, 0.01, 0.1]
random_grid = {
'C': C,
'epsilon': epsilon,
'kernel': kernel,
'gamma': gamma
}
pprint(random_grid)
svm = SVR()
svm_random = RandomizedSearchCV(
estimator=svm,
param_distributions=random_grid,
n_iter=100,
cv=5,
verbose=2,
random_state=42,
n_jobs=-1
)
svm_random.fit(X_train, y_train)
svm_random.best_params_
predict = svm_random.predict(X_test)
yt = y_test.to_numpy()
predict = predict.reshape(-1, 1)
svm_result = pd.concat([pd.DataFrame(predict), pd.DataFrame(yt)], axis=1)
svm_result.to_csv('svmnuhtexcluded.csv')3- DT
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
from sklearn.tree import DecisionTreeRegressor
from sklearn.model_selection import RandomizedSearchCV
data = pd.read_csv("WholeExcludeCiofalo.csv")
data1 = pd.read_csv("Ciofalo.csv")
fluid_dummy = pd.get_dummies(data['Fluid'], drop_first = True)
data.drop(['Fluid', 'SAT'], inplace = True, axis = 1)
corr_data = data.copy()
correlation_matrix = corr_data.corr()
plt.figure(figsize=(16,12))
sns.heatmap(correlation_matrix, annot=True, fmt=".2f", cmap="Greens")
plt.show()
Y = pd.DataFrame(data['Nu'], columns = ['Nu'])
data.drop(['um', 'rho', 'k', 'Nu', 'At'], inplace = True, axis = 1)
fluid_dummy1 = pd.get_dummies(data1['Fluid'], drop_first = True)
data1.drop(['Fluid', 'SAT'], inplace = True, axis = 1)
Y_test = pd.DataFrame(data1['Nu'], columns = ['Nu'])
data1.drop(['um', 'rho', 'k', 'Nu', 'At'], inplace = True, axis = 1)
X_train = data
y_train = Y
X_test = data1
y_test = Y_test
dt = DecisionTreeRegressor(random_state=42)
max_depth = [10, 20, 30, 40, 50, 60, 70, 80, 90, 100, 110, None]
min_samples_split = [2, 5, 7, 10, 20, 50, 100]
min_samples_leaf = [1, 2, 4, 5, 10, 50, 100, 200]
max_features = ['auto', 'sqrt', None]
criterion = ['mse', 'friedman_mse', 'mae']
random_grid = {
'max_depth': max_depth,
'min_samples_split': min_samples_split,
'min_samples_leaf': min_samples_leaf,
'max_features': max_features,
'criterion': criterion
}
dt_random = RandomizedSearchCV(
estimator=dt,
param_distributions=random_grid,
n_iter=100,
cv=3,
verbose=2,
random_state=42,
n_jobs=-1
)
dt_random.fit(X_train, y_train)
dt_random.best_params_
predict = dt_random.predict(X_test)
yt = y_test.to_numpy()
predict = predict.reshape(-1, 1)
result = pd.concat([pd.DataFrame(predict), pd.DataFrame(yt)], axis=1)
result.to_csv('dtnuhtexc.csv')4- XGBoost
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from xgboost import XGBRegressor
from sklearn.model_selection import RandomizedSearchCV
data = pd.read_csv("WholeExcludeCiofalo.csv")
data1 = pd.read_csv("Ciofalo.csv")
fluid_dummy = pd.get_dummies(data['Fluid'], drop_first = True)
data.drop(['Fluid', 'SAT'], inplace = True, axis = 1)
corr_data = data.copy()
correlation_matrix = corr_data.corr()
plt.figure(figsize=(16,12))
sns.heatmap(correlation_matrix, annot=True, fmt=".2f", cmap="Greens")
plt.show()
Y = pd.DataFrame(data['Nu'], columns = ['Nu'])
data.drop(['um', 'rho', 'k', 'Nu', 'At'], inplace = True, axis = 1)
fluid_dummy1 = pd.get_dummies(data1['Fluid'], drop_first = True)
data1.drop(['Fluid', 'SAT'], inplace = True, axis = 1)
Y_test = pd.DataFrame(data1['Nu'], columns = ['Nu'])
data1.drop(['um', 'rho', 'k', 'Nu', 'At'], inplace = True, axis = 1)
X_train = data
y_train = Y
X_test = data1
y_test = Y_test
xg_reg = XGBRegressor(random_state=42)
param_dist = {
'n_estimators': np.arange(200, 2001, 200),
'max_depth': [3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15],
'learning_rate': [0.001, 0.01, 0.1, 0.2, 0.3],
'subsample': [0.5, 0.7, 1.0],
'colsample_bytree': [0.5, 0.7, 1.0],
'gamma': [0, 0.1, 0.2, 0.3],
'min_child_weight': [1, 5, 10]
}
random_search = RandomizedSearchCV(estimator=xg_reg, param_distributions=param_dist, n_iter=100,
cv=3, verbose=2, random_state=42, n_jobs=-1)
random_search.fit(X_train, y_train)
random_search.best_params_
predict = random_search.predict(X_test)
yt = y_test.to_numpy()
result = pd.concat([pd.DataFrame(predict), pd.DataFrame(yt)], axis=1)
result.to_csv('xgboosexc.csv')5- LightGBM
!pip install lightgbm
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
import lightgbm as lgb
from sklearn.model_selection import RandomizedSearchCV
data = pd.read_csv("WholeExcludeCiofalo.csv")
data1 = pd.read_csv("Ciofalo.csv")
fluid_dummy = pd.get_dummies(data['Fluid'], drop_first = True)
data.drop(['Fluid', 'SAT'], inplace = True, axis = 1)
corr_data = data.copy()
correlation_matrix = corr_data.corr()
plt.figure(figsize=(16,12))
sns.heatmap(correlation_matrix, annot=True, fmt=".2f", cmap="Greens")
plt.show()
Y = pd.DataFrame(data['Nu'], columns = ['Nu'])
data.drop([''um', 'rho', 'k', 'Nu', 'At'], inplace = True, axis = 1)
fluid_dummy1 = pd.get_dummies(data1['Fluid'], drop_first = True)
data1.drop(['Fluid', 'SAT'], inplace = True, axis = 1)
Y_test = pd.DataFrame(data1['Nu'], columns = ['Nu'])
data1.drop(['um', 'rho', 'k', 'Nu', 'At'], inplace = True, axis = 1)
X_train = data
y_train = Y
X_test = data1
y_test = Y_test
lgb_reg = lgb.LGBMRegressor(random_state=42)
param_dist = {
'n_estimators': np.arange(200, 2001, 200),
'max_depth': np.arange(3, 15,1),
'learning_rate': [0.001, 0.01, 0.1, 0.2, 0.3],
'subsample': [0.5, 0.7, 1.0],
'colsample_bytree': [0.5, 0.7, 1.0],
'min_child_samples': [5, 10, 20],
'reg_alpha': [0, 0.1, 0.2, 0.3],
'reg_lambda': [0, 0.1, 0.2, 0.3]
}
random_search = RandomizedSearchCV(estimator=lgb_reg, param_distributions=param_dist, n_iter=100,
cv=3, verbose=2, random_state=42, n_jobs=-1)
random_search.fit(X_train, y_train)
random_search.best_params_
predict = random_search.predict(X_test)
yt = y_test.to_numpy()
lgbm_result = pd.concat([pd.DataFrame(predict), pd.DataFrame(yt)], axis=1)
lgbm_result.to_csv('lgbmexcluded.csv')Predictions by deep neural networks
without dropout
!pip3 install scikeras
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import KFold, cross_val_score, GridSearchCV, train_test_split
from sklearn.metrics import r2_score, mean_absolute_error
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, BatchNormalization
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.metrics import MeanAbsoluteError
from scikeras.wrappers import KerasRegressor
from matplotlib import pyplot as plt
from tensorflow.keras.callbacks import EarlyStopping
df = pd.read_csv('htttt.csv')
df_model = df.drop(columns=['Fluid', 'SAT'])
X1 = df_model.drop(columns=['um', 'rho', 'k', 'Nu', 'At']).values
y1 = df_model.Nu.values
x_scaler = StandardScaler()
X = x_scaler.fit_transform(X1)
y_scaler = StandardScaler()
y = y_scaler.fit_transform(y1.reshape(-1, 1))
n_features = X.shape[1]
def DNN_model(layers, lr_rate):
DNN = Sequential()
DNN.add(Dense(layers[0], input_dim=n_features, kernel_initializer='glorot_uniform', activation="relu"))
DNN.add(BatchNormalization())
for nodes in layers[1:]:
DNN.add(Dense(nodes, kernel_initializer='uniform', activation="relu"))
DNN.add(BatchNormalization())
DNN.add(Dense(units=1, kernel_initializer='uniform', activation='linear'))
opt = Adam(learning_rate=lr_rate)
DNN.compile(optimizer=opt,
loss="mae",
metrics=[MeanAbsoluteError()])
return DNN
layers = [ (64, 32), (128, 64, 32), (256, 128, 64, 32), (512, 256, 128, 64, 32),
(128, 64, 32, 16, 8),
(256, 128, 64, 32, 16),
(512, 256, 128, 64, 32),
(1024, 512, 256, 128, 64),
(256, 128, 64, 32, 16, 8),
(512, 256, 128, 64, 32, 16),
(1024, 512, 256, 128, 64, 32),
(1024, 512, 256, 128, 64, 32, 16),
(2048, 1024, 512, 256, 128, 64, 32),
]
lr_rate = [0.1, 0.01, 0.001, 0.0001]
batch_size = [64, 32]
epochs = [1000, 100]
param_grid = dict(layers=layers, lr_rate=lr_rate, batch_size=batch_size, epochs=epochs)
model = KerasRegressor(model=DNN_model, layers=layers, lr_rate=lr_rate, verbose=0)
grid = GridSearchCV(estimator=model, param_grid=param_grid, cv=5)
grid_result = grid.fit(X, y)
best_params = grid_result.best_params_
cv = KFold(n_splits=5, random_state=3, shuffle=True)
selected_model = KerasRegressor(model=DNN_model, layers=best_params['layers'],
lr_rate=best_params['lr_rate'],
batch_size=best_params['batch_size'],
epochs=best_params['epochs'], verbose=0)
scores = cross_val_score(selected_model, X, y, scoring='r2', cv=cv, n_jobs=-1)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
model = DNN_model(layers=best_params['layers'], lr_rate=best_params['lr_rate'])
early_stopping = EarlyStopping(monitor='val_loss', patience=20, restore_best_weights=True)
history = model.fit(X_train, y_train, epochs=best_params['epochs'],
batch_size=best_params['batch_size'],
validation_data=(X_test, y_test),
verbose=0, callbacks=[early_stopping])
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('Model Performance')
plt.ylabel('Loss')
plt.xlabel('Epoch')
plt.legend(['Train', 'Val'], loc='upper right')
plt.box(False)
plt.show()
y_pred = model.predict(X_test)
print("R2 score:", r2_score(y_test, y_pred))
mae = mean_absolute_error(y_test, y_pred)
print("Mean Absolute Error (MAE):", mae)
y_original = y_scaler.inverse_transform(y_pred.reshape(-1, 1))
y_test_original = y_scaler.inverse_transform(y_test.reshape(-1, 1))
mlp_result = pd.concat([pd.DataFrame(y_original), pd.DataFrame(y_test_original)], axis=1)
mlp_result
mlp_result.to_csv('ann_no_dropout.csv')Using dropout
!pip3 install scikeras
import numpy as np
import pandas as pd
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import KFold, cross_val_score, GridSearchCV, train_test_split
from sklearn.metrics import r2_score, mean_absolute_error
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.layers import Dense, BatchNormalization, Dropout
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.metrics import MeanAbsoluteError, RootMeanSquaredError
from scikeras.wrappers import KerasRegressor
from matplotlib import pyplot as plt
import seaborn as sns
from tensorflow.keras.callbacks import EarlyStopping
df = pd.read_csv('htttt.csv')
df_model = df.drop(columns = ['Fluid', 'SAT'])
X1 = df_model.drop(columns=['um', 'rho', 'k', 'Nu', 'At']).values
y1 = df_model.Nu.values
x_scaler = StandardScaler()
X = x_scaler.fit_transform(X1)
y_scaler = StandardScaler()
y = y_scaler.fit_transform(y1.reshape(-1, 1))
n_features = X.shape[1]
def DNN_model(layers, lr_rate):
DNN = Sequential()
DNN.add(Dense(layers[0], input_dim=n_features, kernel_initializer='glorot_uniform', activation="relu"))
DNN.add(BatchNormalization())
DNN.add(Dropout(0.2))
for nodes in layers[1:]:
DNN.add(Dense(nodes, kernel_initializer='uniform', activation="relu"))
DNN.add(BatchNormalization())
DNN.add(Dropout(0.2))
DNN.add(Dense(units=1, kernel_initializer='uniform', activation='linear'))
opt = Adam(learning_rate=lr_rate)
DNN.compile(optimizer=opt,
loss="mae",
metrics=[MeanAbsoluteError()])
return DNN
layers = [(64, 32), (128, 64, 32), (256, 128, 64, 32), (512, 256, 128, 64, 32),
(1024, 512, 256, 128, 64),
(256, 128, 64, 32, 16, 8),
(512, 256, 128, 64, 32, 16),
(1024, 512, 256, 128, 64, 32),
(1024, 512, 256, 128, 64, 32, 16),
(2048, 1024, 512, 256, 128, 64, 32)]
lr_rate = [0.1, 0.01, 0.001, 0.0001]
batch_size = [64, 32]
epochs = [1000, 100]
param_grid = dict(layers=layers, lr_rate=lr_rate, batch_size=batch_size, epochs=epochs)
model = KerasRegressor(model=DNN_model, layers=layers, lr_rate=lr_rate, verbose=0)
grid = GridSearchCV(estimator=model, param_grid=param_grid, cv=5)
grid_result = grid.fit(X, y)
print("The best parameters are:", grid_result.best_params_)
best_params = grid_result.best_params_
cv = KFold(n_splits=5, random_state=3, shuffle=True)
selected_model = KerasRegressor(model=DNN_model, layers=best_params['layers'],
lr_rate=best_params['lr_rate'],
batch_size=best_params['batch_size'],
epochs=best_params['epochs'], verbose=0)
scores = cross_val_score(selected_model, X, y, scoring='r2', cv=cv, n_jobs=-1)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
model = DNN_model(layers=best_params['layers'], lr_rate=best_params['lr_rate'])
early_stopping = EarlyStopping(monitor='val_loss', patience=20, restore_best_weights=True)
history = model.fit(X_train, y_train, epochs=best_params['epochs'],
batch_size=best_params['batch_size'],
validation_data=(X_test, y_test),
verbose=0, callbacks=[early_stopping])
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('Model Performance')
plt.ylabel('Loss')
plt.xlabel('Epoch')
plt.legend(['Train', 'Val'], loc='upper right')
plt.box(False)
plt.show()
y_pred = model.predict(X_test)
print("R2 score:", r2_score(y_test, y_pred))
mae = mean_absolute_error(y_test, y_pred)
print("Mean Absolute Error (MAE):", mae)Using RandomizedGridSearchCV
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import KFold, train_test_split, RandomizedSearchCV
from sklearn.metrics import r2_score, mean_absolute_error
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, BatchNormalization
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping
from scikeras.wrappers import KerasRegressor
from matplotlib import pyplot as plt
url = 'https://raw.githubusercontent.com/mlori77/ANN/main/heatml.csv'
df = pd.read_csv(url)
df_model = df.drop(columns=['Fluid', 'SAT'])
X1 = df_model.drop(columns=['um', 'rho', 'k', 'Nu', 'At']).values
y1 = df_model.Nu.values
x_scaler = StandardScaler()
X = x_scaler.fit_transform(X1)
y_scaler = StandardScaler()
y = y_scaler.fit_transform(y1.reshape(-1, 1))
n_features = X.shape[1]
def DNN_model(layers, lr_rate):
DNN = Sequential()
DNN.add(Dense(layers[0], input_dim=n_features, kernel_initializer='glorot_uniform', activation="relu"))
DNN.add(BatchNormalization()) # Batch Normalization
for nodes in layers[1:]:
DNN.add(Dense(nodes, kernel_initializer='uniform', activation="relu"))
DNN.add(BatchNormalization()) # Batch Normalization
DNN.add(Dense(units=1, kernel_initializer='uniform', activation='linear'))
opt = Adam(learning_rate=lr_rate)
DNN.compile(optimizer=opt,
loss="mae",
metrics=[MeanAbsoluteError()])
return DNN
layers = [
(64, 32), (128, 64, 32), (256, 128, 64, 32), (512, 256, 128, 64, 32),
(1024, 512, 256, 128, 64),
(256, 128, 64, 32, 16, 8),
(512, 256, 128, 64, 32, 16)
]
lr_rate = [0.01, 0.001]
batch_size = [32, 64]
epochs = [100, 200]
param_grid = dict(layers=layers, lr_rate=lr_rate, batch_size=batch_size, epochs=epochs)
model = KerasRegressor(model=DNN_model, layers=layers, lr_rate=lr_rate, verbose=0)
grid = RandomizedSearchCV(estimator=model, param_distributions=param_grid, n_iter=20, cv=5)
grid_result = grid.fit(X, y)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
best_params = grid_result.best_params_
model = DNN_model(layers=best_params['layers'], lr_rate=best_params['lr_rate'])
early_stopping = EarlyStopping(monitor='val_loss', patience=20, restore_best_weights=True)
history = model.fit(X_train, y_train, epochs=best_params['epochs'],
batch_size=best_params['batch_size'],
validation_data=(X_test, y_test),
verbose=0, callbacks=[early_stopping])
plt.plot(history.history['loss'], label='Train Loss')
plt.plot(history.history['val_loss'], label='Val Loss')
plt.title('Model Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()
plt.show()
y_pred = model.predict(X_test)
print("R2 Score: ", r2_score(y_test, y_pred))
mae = mean_absolute_error(y_test, y_pred)
print("MAE: ", mae)
y_original = y_scaler.inverse_transform(y_pred.reshape(-1, 1))
y_test_original = y_scaler.inverse_transform(y_test.reshape(-1, 1))
mlp_result = pd.concat([pd.DataFrame(y_original), pd.DataFrame(y_test_original)], axis=1)
mlp_result.to_csv('ann_results.csv')