Spray Cooling Prediction by ML and DL

In this post, we have applied some ML and DL algotihms on a dataset compiled from previous studies in the literature about spray cooling systems. The target variable is Nusselt number (Nu). The applying methods are 1- Random Forest Regression (RFR) 2- Support Vector Regression (SVR) 3- Decision Tree Regression (DTR) 4- XGBoost 5- LightGBM and 6- Multilayer Perception (MLP)

1- Random Forest Regression

Importing needed libraries:

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import RandomizedSearchCV

Loading dataset:

lnk = 'https://raw.githubusercontent.com/mlori77/ANN/refs/heads/main/htttt.csv'
data = pd.read_csv(lnk)

Encoding:



fluid_dummy = pd.get_dummies(data['Fluid'], drop_first = True)

Correlation matrix:

data.drop(['Fluid', 'SAT'], inplace = True, axis = 1)

corr_data = data.copy()

correlation_matrix = corr_data.corr()

plt.figure(figsize=(16,12))
sns.heatmap(correlation_matrix, annot=True,fmt=".2f", cmap="Greens")
plt.show()

Randomly splitting the dataset into training and testing subsets

Y = pd.DataFrame(data['Nu'], columns = ['Nu'])
data.drop(['um', 'rho', 'k', 'Nu', 'At'], inplace = True, axis = 1)

X_train, X_test, y_train, y_test = train_test_split(data, Y, test_size = 0.2, random_state = 0)

y_test.columns = ['y_test']

Model definition, Hyperparameters tuning and Prediction


rf = RandomForestRegressor(random_state = 42)




n_estimators =  np.arange(200, 2001, 200)

max_features = ['auto', 'sqrt']

max_depth = [10, 20, 30, 40, 50, 60, 70, 80, 90, 100, 110, None]

min_samples_split = [2, 5, 7, 10, 20, 50, 100]

min_samples_leaf = [1, 2, 4, 5, 10, 50, 100, 200]

bootstrap = [True, False]
random_grid = {'n_estimators': n_estimators,
               'max_features': max_features,
               'max_depth': max_depth,
               'min_samples_split': min_samples_split,
               'min_samples_leaf': min_samples_leaf,
               'bootstrap': bootstrap}




rf = RandomForestRegressor()

rf_random = RandomizedSearchCV(estimator = rf, param_distributions = random_grid, n_iter = 100, cv = 3, verbose=2, random_state=42, n_jobs = -1)
rf_random.fit(X_train, y_train)

rf_random.best_params_


predict = rf_random.predict(X_test)

2- Support Vector Regression

Importing needed libraries:

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.svm import SVR
from sklearn.model_selection import RandomizedSearchCV

Loading dataset:

data = pd.read_csv("heatml.csv")

Encoding:




fluid_dummy = pd.get_dummies(data['Fluid'], drop_first=True)
data.drop(['Fluid', 'SAT'], inplace=True, axis=1)

Correlation matrix:


correlation_matrix = corr_data.corr()
plt.figure(figsize=(16, 12))
sns.heatmap(correlation_matrix, annot=True, fmt=".2f", cmap="Greens")
plt.show()

Randomly splitting the dataset into training and testing subsets and Scaling

Y = pd.DataFrame(data['Nu'], columns=['Nu'])
data.drop(['um', 'rho', 'k', 'Nu', 'At'], inplace=True, axis=1)


X_train, X_test, y_train, y_test = train_test_split(data, Y, test_size=0.2, random_state=0)
y_test.columns = ['y_test']


scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

Model definition, Hyperparameters tuning and Prediction


svm = SVR()





C = [0.01, 0.1, 1, 10, 100, 1000]
epsilon = [0.01, 0.05, 0.1, 0.2, 0.5]
kernel = ['rbf', 'poly', 'sigmoid']
gamma = ['scale', 'auto', 0.001, 0.01, 0.1]


random_grid = {
    'C': C,
    'epsilon': epsilon,
    'kernel': kernel,
    'gamma': gamma
}




svm_random = RandomizedSearchCV(
    estimator=svm,
    param_distributions=random_grid,
    n_iter=100,
    cv=5,  
    verbose=2,
    random_state=42,
    n_jobs=-1
)


svm_random.fit(X_train, y_train)


svm_random.best_params_


predict = svm_random.predict(X_test)

3- Decision Tree

Importing needed libraries:

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeRegressor
from sklearn.model_selection import RandomizedSearchCV

Loading dataset:

data = pd.read_csv("heatml.csv")

Encoding:



fluid_dummy = pd.get_dummies(data['Fluid'], drop_first = True)
data.drop(['Fluid', 'SAT'], inplace = True, axis = 1)

Correlation matrix:

corr_data = data.copy()

correlation_matrix = corr_data.corr()
plt.figure(figsize=(16,12))
sns.heatmap(correlation_matrix, annot=True, fmt=".2f", cmap="Greens")
plt.show()

Randomly splitting the dataset into training and testing subsets

Y = pd.DataFrame(data['Nu'], columns = ['Nu'])
data.drop(['um', 'rho', 'k', 'Nu', 'At'], inplace = True, axis = 1)

X_train, X_test, y_train, y_test = train_test_split(data, Y, test_size = 0.2, random_state = 0)
y_test.columns = ['y_test']

Model definition, Hyperparameters tuning and Prediction


dt = DecisionTreeRegressor(random_state=42)



max_depth = [10, 20, 30, 40, 50, 60, 70, 80, 90, 100, 110, None]


min_samples_split = [2, 5, 7, 10, 20, 50, 100]

min_samples_leaf = [1, 2, 4, 5, 10, 50, 100, 200]


max_features = ['auto', 'sqrt', None]


criterion = ['mse', 'friedman_mse', 'mae']


random_grid = {
    'max_depth': max_depth,
    'min_samples_split': min_samples_split,
    'min_samples_leaf': min_samples_leaf,
    'max_features': max_features,
    'criterion': criterion
}




dt = DecisionTreeRegressor()


dt_random = RandomizedSearchCV(
    estimator=dt,
    param_distributions=random_grid,
    n_iter=100,
    cv=3,
    verbose=2,
    random_state=42,
    n_jobs=-1
)

dt_random.fit(X_train, y_train)


dt_random.best_params_


predict = dt_random.predict(X_test)

4- XGBoost

Importing and installing needed libraries:

!pip install xgboost
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from xgboost import XGBRegressor
from sklearn.model_selection import RandomizedSearchCV

Loading dataset:

data = pd.read_csv("heatml.csv")

Encoding:



fluid_dummy = pd.get_dummies(data['Fluid'], drop_first=True)

data.drop(['Fluid', 'SAT'], inplace=True, axis=1)

Correlation matrix:

corr_data = data.copy()
correlation_matrix = corr_data.corr()

plt.figure(figsize=(16,12))
sns.heatmap(correlation_matrix, annot=True, fmt=".2f", cmap="Greens")
plt.show()

Randomly splitting the dataset into training and testing subsets

Y = pd.DataFrame(data['Nu'], columns=['Nu'])
data.drop(['um', 'rho', 'k', 'Nu', 'At'], inplace=True, axis=1)


X_train, X_test, y_train, y_test = train_test_split(data, Y, test_size=0.2, random_state=0)
y_test.columns = ['y_test']

Model definition, Hyperparameters tuning and Prediction

xg_reg = XGBRegressor(random_state=42)


param_dist = {
    'n_estimators': np.arange(200, 2001, 200),
    'max_depth': [3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15],
    'learning_rate': [0.001, 0.01, 0.1, 0.2, 0.3],
    'subsample': [0.5, 0.7, 1.0],
    'colsample_bytree': [0.5, 0.7, 1.0],
    'gamma': [0, 0.1, 0.2, 0.3],
    'min_child_weight': [1, 5, 10]
}





random_search = RandomizedSearchCV(estimator=xg_reg, param_distributions=param_dist, n_iter=100, 
                                   cv=3, verbose=2, random_state=42, n_jobs=-1)


random_search.fit(X_train, y_train)


random_search.best_params_


predict = random_search.predict(X_test)

If you got an error of “‘super’ object has no attribute ‘__sklearn_tags__’” try this line of code “pip install scikit-learn==1.1.3”, downgrading your sklearn

5- LightGBM

Importing and installing needed libraries:

!pip install lightgbm
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
import lightgbm as lgb
from sklearn.model_selection import RandomizedSearchCV

Loading dataset:

data = pd.read_csv("heatml.csv")

Encoding:




fluid_dummy = pd.get_dummies(data['Fluid'], drop_first=True)

data.drop(['Fluid', 'SAT'], inplace=True, axis=1)

Correlation matrix:


corr_data = data.copy()
correlation_matrix = corr_data.corr()

plt.figure(figsize=(16,12))
sns.heatmap(correlation_matrix, annot=True, fmt=".2f", cmap="Greens")
plt.show()

Randomly splitting the dataset into training and testing subsets


Y = pd.DataFrame(data['Nu'], columns=['Nu'])
data.drop([ 'um', 'rho', 'k', 'Nu', 'At'], inplace=True, axis=1)


X_train, X_test, y_train, y_test = train_test_split(data, Y, test_size=0.2, random_state=0)
y_test.columns = ['y_test']

Model definition, Hyperparameters tuning and Prediction


lgb_reg = lgb.LGBMRegressor(random_state=42)


param_dist = {
    'n_estimators': np.arange(200, 2001, 200),
    'max_depth': np.arange(3, 15,1),
    'learning_rate': [0.001, 0.01, 0.1, 0.2, 0.3],
    'subsample': [0.5, 0.7, 1.0],
    'colsample_bytree': [0.5, 0.7, 1.0],
    'min_child_samples': [5, 10, 20],
    'reg_alpha': [0, 0.1, 0.2, 0.3],
    'reg_lambda': [0, 0.1, 0.2, 0.3]
}





random_search = RandomizedSearchCV(estimator=lgb_reg, param_distributions=param_dist, n_iter=100,
                                   cv=3, verbose=2, random_state=42, n_jobs=-1)


random_search.fit(X_train, y_train)


random_search.best_params_


predict = random_search.predict(X_test)

Prediction on Gas Atomizer Sprays

1- RFR

import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import RandomizedSearchCV


data = pd.read_csv("heatml-gas.csv")




fluid_dummy = pd.get_dummies(data['Fluid'], drop_first = True)


data.drop(['Fluid', 'SAT'], inplace = True, axis = 1)

corr_data = data.copy()


correlation_matrix = corr_data.corr()

plt.figure(figsize=(16,12))
sns.heatmap(correlation_matrix, annot=True,fmt=".2f", cmap="Greens")
plt.show()


Y = pd.DataFrame(data['Nu'], columns = ['Nu'])
data.drop(['um', 'rho', 'k', 'Nu', 'At'], inplace = True, axis = 1)


X_train, X_test, y_train, y_test = train_test_split(data, Y, test_size = 0.2, random_state = 0)

y_test.columns = ['y_test']

rf = RandomForestRegressor(random_state = 42)





n_estimators = np.arange(200, 2001, 200)

max_features = ['auto', 'sqrt']

max_depth = [10, 20, 30, 40, 50, 60, 70, 80, 90, 100, 110, None]



min_samples_split = [2, 5, 7, 10, 20, 50, 100]

min_samples_leaf = [1, 2, 4, 5, 10, 50, 100, 200]

bootstrap = [True, False]
random_grid = {'n_estimators': n_estimators,
               'max_features': max_features,
               'max_depth': max_depth,
               'min_samples_split': min_samples_split,
               'min_samples_leaf': min_samples_leaf,
               'bootstrap': bootstrap}




rf = RandomForestRegressor()

rf_random = RandomizedSearchCV(estimator = rf, param_distributions = random_grid, n_iter = 100, cv = 3, verbose=2, random_state=42, n_jobs = -1)

rf_random.fit(X_train, y_train)

rf_random.best_params_


predict = rf_random.predict(X_test)



yt = y_test.to_numpy()


predict.reshape(6,1)

result = pd.concat([pd.DataFrame(predict), pd.DataFrame(yt)], axis = 1)

result.to_csv('rfrnuhtgas.csv')

2- SVR

import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.svm import SVR
from sklearn.model_selection import RandomizedSearchCV


data = pd.read_csv("heatml-gas.csv")




fluid_dummy = pd.get_dummies(data['Fluid'], drop_first=True)
data.drop(['Fluid', 'SAT'], inplace=True, axis=1)

corr_data = data.copy()


correlation_matrix = corr_data.corr()
plt.figure(figsize=(16, 12))
sns.heatmap(correlation_matrix, annot=True, fmt=".2f", cmap="Greens")
plt.show()

Y = pd.DataFrame(data['Nu'], columns=['Nu'])
data.drop(['um', 'rho', 'k', 'Nu', 'At'], inplace=True, axis=1)


X_train, X_test, y_train, y_test = train_test_split(data, Y, test_size=0.2, random_state=0)
y_test.columns = ['y_test']


scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)


svm = SVR()






C = [0.01, 0.1, 1, 10, 100, 1000]
epsilon = [0.01, 0.05, 0.1, 0.2, 0.5]
kernel = ['rbf', 'poly', 'sigmoid']
gamma = ['scale', 'auto', 0.001, 0.01, 0.1]


random_grid = {
    'C': C,
    'epsilon': epsilon,
    'kernel': kernel,
    'gamma': gamma
}
pprint(random_grid)


svm = SVR()


svm_random = RandomizedSearchCV(
    estimator=svm,
    param_distributions=random_grid,
    n_iter=100,
    cv=5,  
    verbose=2,
    random_state=42,
    n_jobs=-1
)


svm_random.fit(X_train, y_train)


svm_random.best_params_


predict = svm_random.predict(X_test)


yt = y_test.to_numpy()


predict = predict.reshape(-1, 1)


result = pd.concat([pd.DataFrame(predict), pd.DataFrame(yt)], axis=1)
result.to_csv('svmnuhtgas.csv')

3- DT

import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeRegressor
from sklearn.model_selection import RandomizedSearchCV


data = pd.read_csv("heatml-gas.csv")




fluid_dummy = pd.get_dummies(data['Fluid'], drop_first = True)
data.drop(['Fluid', 'SAT'], inplace = True, axis = 1)

corr_data = data.copy()

correlation_matrix = corr_data.corr()
plt.figure(figsize=(16,12))
sns.heatmap(correlation_matrix, annot=True, fmt=".2f", cmap="Greens")
plt.show()

Y = pd.DataFrame(data['Nu'], columns = ['Nu'])
data.drop(['um', 'rho', 'k', 'Nu', 'At'], inplace = True, axis = 1)

X_train, X_test, y_train, y_test = train_test_split(data, Y, test_size = 0.2, random_state = 0)
y_test.columns = ['y_test']


dt = DecisionTreeRegressor(random_state=42)




max_depth = [int(x) for x in np.linspace(10, 110, num=11)]
max_depth.append(None)


min_samples_split = [2, 5, 7, 10, 20, 50, 100]


min_samples_leaf = [1, 2, 4, 5, 10, 50, 100, 200]


max_features = ['auto', 'sqrt', None]


criterion = ['mse', 'friedman_mse', 'mae']


random_grid = {
    'max_depth': max_depth,
    'min_samples_split': min_samples_split,
    'min_samples_leaf': min_samples_leaf,
    'max_features': max_features,
    'criterion': criterion
}



dt = DecisionTreeRegressor()


dt_random = RandomizedSearchCV(
    estimator=dt,
    param_distributions=random_grid,
    n_iter=100,
    cv=3,
    verbose=2,
    random_state=42,
    n_jobs=-1
)


dt_random.fit(X_train, y_train)


dt_random.best_params_


predict = dt_random.predict(X_test)


yt = y_test.to_numpy()


predict = predict.reshape(-1, 1)


result = pd.concat([pd.DataFrame(predict), pd.DataFrame(yt)], axis=1)
result.to_csv('dtnuhtgas.csv')

4- XGBoost

import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from xgboost import XGBRegressor
from sklearn.model_selection import RandomizedSearchCV


data = pd.read_csv("heatml-gas.csv")





fluid_dummy = pd.get_dummies(data['Fluid'], drop_first=True)

data.drop(['Fluid', 'SAT'], inplace=True, axis=1)


corr_data = data.copy()
correlation_matrix = corr_data.corr()

plt.figure(figsize=(16,12))
sns.heatmap(correlation_matrix, annot=True, fmt=".2f", cmap="Greens")
plt.show()


Y = pd.DataFrame(data['Nu'], columns=['Nu'])
data.drop(['um', 'rho', 'k', 'Nu', 'At'], inplace=True, axis=1)


X_train, X_test, y_train, y_test = train_test_split(data, Y, test_size=0.2, random_state=0)
y_test.columns = ['y_test']


xg_reg = XGBRegressor(random_state=42)



param_dist = {
    'n_estimators': np.arange(200, 2001, 200),
    'max_depth': [3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15],
    'learning_rate': [0.0001, 0.001, 0.01, 0.1, 0.2, 0.3],
    'subsample': [0.5, 0.7, 1.0],
    'colsample_bytree': [0.5, 0.7, 1.0],
    'gamma': [0, 0.1, 0.2, 0.3],
    'min_child_weight': [1, 5, 10]
}


pprint(param_dist)


random_search = RandomizedSearchCV(estimator=xg_reg, param_distributions=param_dist, n_iter=100, 
                                   cv=3, verbose=2, random_state=42, n_jobs=-1)


random_search.fit(X_train, y_train)


random_search.best_params_


predict = random_search.predict(X_test)


yt = y_test.to_numpy()


result = pd.concat([pd.DataFrame(predict), pd.DataFrame(yt)], axis=1)


result.to_csv('xgboostgas.csv')

5- LightBGM

!pip install lightgbm
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
import lightgbm as lgb
from sklearn.model_selection import RandomizedSearchCV


data = pd.read_csv("heatml-gas.csv")




fluid_dummy = pd.get_dummies(data['Fluid'], drop_first=True)

data.drop(['Fluid', 'SAT'], inplace=True, axis=1)


corr_data = data.copy()
correlation_matrix = corr_data.corr()

plt.figure(figsize=(16,12))
sns.heatmap(correlation_matrix, annot=True, fmt=".2f", cmap="Greens")
plt.show()


Y = pd.DataFrame(data['Nu'], columns=['Nu'])
data.drop(['um', 'rho', 'k', 'Nu', 'At'], inplace=True, axis=1)


X_train, X_test, y_train, y_test = train_test_split(data, Y, test_size=0.2, random_state=0)
y_test.columns = ['y_test']


lgb_reg = lgb.LGBMRegressor(random_state=42)


param_dist = {
    'n_estimators': np.arange(200, 2001, 200),
    'max_depth': np.arange(3, 15,1),
    'learning_rate': [0.0001, 0.001, 0.01, 0.1, 0.2, 0.3],
    'subsample': [0.5, 0.7, 1.0],
    'colsample_bytree': [0.5, 0.7, 1.0],
    'min_child_samples': [5, 10, 20],
    'reg_alpha': [0, 0.1, 0.2, 0.3],
    'reg_lambda': [0, 0.1, 0.2, 0.3]
}




random_search = RandomizedSearchCV(estimator=lgb_reg, param_distributions=param_dist, n_iter=100,
                                   cv=3, verbose=2, random_state=42, n_jobs=-1)


random_search.fit(X_train, y_train)


random_search.best_params_


predict = random_search.predict(X_test)


yt = y_test.to_numpy()


lgbm_result = pd.concat([pd.DataFrame(predict), pd.DataFrame(yt)], axis=1)


lgbm_result.to_csv('lgbmgas_results.csv')

Prediction on Pressure Atomizer Sprays

1- RFR

import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import RandomizedSearchCV


data = pd.read_csv("heatml-pressure.csv")




fluid_dummy = pd.get_dummies(data['Fluid'], drop_first = True)


data.drop(['Fluid', 'SAT'], inplace = True, axis = 1)

corr_data = data.copy()


correlation_matrix = corr_data.corr()

plt.figure(figsize=(16,12))
sns.heatmap(correlation_matrix, annot=True,fmt=".2f", cmap="Greens")
plt.show()


Y = pd.DataFrame(data['Nu'], columns = ['Nu'])
data.drop(['um', 'rho', 'k', 'Nu', 'At'], inplace = True, axis = 1)


X_train, X_test, y_train, y_test = train_test_split(data, Y, test_size = 0.2, random_state = 0)

y_test.columns = ['y_test']

rf = RandomForestRegressor(random_state = 42)





n_estimators = np.arange(200, 2001, 200)

max_features = ['auto', 'sqrt']

max_depth = [10, 20, 30, 40, 50, 60, 70, 80, 90, 100, 110, None]


min_samples_split = [2, 5, 7, 10, 20, 50, 100]

min_samples_leaf = [1, 2, 4, 5, 10, 50, 100, 200]

bootstrap = [True, False]
random_grid = {'n_estimators': n_estimators,
               'max_features': max_features,
               'max_depth': max_depth,
               'min_samples_split': min_samples_split,
               'min_samples_leaf': min_samples_leaf,
               'bootstrap': bootstrap}




rf = RandomForestRegressor()

rf_random = RandomizedSearchCV(estimator = rf, param_distributions = random_grid, n_iter = 100, cv = 3, verbose=2, random_state=42, n_jobs = -1)

rf_random.fit(X_train, y_train)

rf_random.best_params_


predict = rf_random.predict(X_test)



yt = y_test.to_numpy()


predict.reshape(101,1)

result = pd.concat([pd.DataFrame(predict), pd.DataFrame(yt)], axis = 1)

result.to_csv('rfrnuhtpressure.csv')

2- SVR

import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.svm import SVR
from sklearn.model_selection import RandomizedSearchCV


data = pd.read_csv("heatml-pressure.csv")




fluid_dummy = pd.get_dummies(data['Fluid'], drop_first=True)
data.drop(['Fluid', 'SAT'], inplace=True, axis=1)

corr_data = data.copy()


correlation_matrix = corr_data.corr()
plt.figure(figsize=(16, 12))
sns.heatmap(correlation_matrix, annot=True, fmt=".2f", cmap="Greens")
plt.show()

Y = pd.DataFrame(data['Nu'], columns=['Nu'])
data.drop(['um', 'rho', 'k', 'Nu', 'At'], inplace=True, axis=1)


X_train, X_test, y_train, y_test = train_test_split(data, Y, test_size=0.2, random_state=0)
y_test.columns = ['y_test']


scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)


svm = SVR()




C = [0.01, 0.1, 1, 10, 100, 1000]
epsilon = [0.01, 0.05, 0.1, 0.2, 0.5]
kernel = ['rbf', 'poly', 'sigmoid']
gamma = ['scale', 'auto', 0.001, 0.01, 0.1]


random_grid = {
    'C': C,
    'epsilon': epsilon,
    'kernel': kernel,
    'gamma': gamma
}


svm = SVR()


svm_random = RandomizedSearchCV(
    estimator=svm,
    param_distributions=random_grid,
    n_iter=100,
    cv=5,  
    verbose=2,
    random_state=42,
    n_jobs=-1
)


svm_random.fit(X_train, y_train)


svm_random.best_params_


predict = svm_random.predict(X_test)


yt = y_test.to_numpy()


predict = predict.reshape(-1, 1)


svm_result = pd.concat([pd.DataFrame(predict), pd.DataFrame(yt)], axis=1)
svm_result.to_csv('svmnuhtpressure_svm.csv')

3- DT

import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeRegressor
from sklearn.model_selection import RandomizedSearchCV


data = pd.read_csv("heatml-pressure.csv")




fluid_dummy = pd.get_dummies(data['Fluid'], drop_first = True)
data.drop(['Fluid', 'SAT'], inplace = True, axis = 1)

corr_data = data.copy()

correlation_matrix = corr_data.corr()
plt.figure(figsize=(16,12))
sns.heatmap(correlation_matrix, annot=True, fmt=".2f", cmap="Greens")
plt.show()

Y = pd.DataFrame(data['Nu'], columns = ['Nu'])
data.drop(['um', 'rho', 'k', 'Nu', 'At'], inplace = True, axis = 1)

X_train, X_test, y_train, y_test = train_test_split(data, Y, test_size = 0.2, random_state = 0)
y_test.columns = ['y_test']


dt = DecisionTreeRegressor(random_state=42)




max_depth = [10, 20, 30, 40, 50, 60, 70, 80, 90, 100, 110, None]



min_samples_split = [2, 5, 7, 10, 20, 50, 100]


min_samples_leaf = [1, 2, 4, 5, 10, 50, 100, 200]


max_features = ['auto', 'sqrt', None]


criterion = ['mse', 'friedman_mse', 'mae']


random_grid = {
    'max_depth': max_depth,
    'min_samples_split': min_samples_split,
    'min_samples_leaf': min_samples_leaf,
    'max_features': max_features,
    'criterion': criterion
}


dt = DecisionTreeRegressor()


dt_random = RandomizedSearchCV(
    estimator=dt,
    param_distributions=random_grid,
    n_iter=100,
    cv=3,
    verbose=2,
    random_state=42,
    n_jobs=-1
)


dt_random.fit(X_train, y_train)


dt_random.best_params_


predict = dt_random.predict(X_test)


yt = y_test.to_numpy()


predict = predict.reshape(-1, 1)


result = pd.concat([pd.DataFrame(predict), pd.DataFrame(yt)], axis=1)
result.to_csv('dtnuhtpressure.csv')

4- XGBoost

import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from xgboost import XGBRegressor
from sklearn.model_selection import RandomizedSearchCV


data = pd.read_csv("heatml-pressure.csv")





fluid_dummy = pd.get_dummies(data['Fluid'], drop_first=True)

data.drop(['Fluid', 'SAT'], inplace=True, axis=1)


corr_data = data.copy()
correlation_matrix = corr_data.corr()

plt.figure(figsize=(16,12))
sns.heatmap(correlation_matrix, annot=True, fmt=".2f", cmap="Greens")
plt.show()


Y = pd.DataFrame(data['Nu'], columns=['Nu'])
data.drop(['um', 'rho', 'k', 'Nu', 'At'], inplace=True, axis=1)


X_train, X_test, y_train, y_test = train_test_split(data, Y, test_size=0.2, random_state=0)
y_test.columns = ['y_test']


xg_reg = XGBRegressor(random_state=42)



param_dist = {
    'n_estimators': np.arange(200, 2001, 200),
    'max_depth': [10, 20, 30, 40, 50, 60, 70, 80, 90, 100, 110, None],
    'learning_rate': [0.0001, 0.001, 0.01, 0.1, 0.2, 0.3],
    'subsample': [0.5, 0.7, 1.0],
    'colsample_bytree': [0.5, 0.7, 1.0],
    'gamma': [0, 0.1, 0.2, 0.3],
    'min_child_weight': [1, 5, 10]
}




random_search = RandomizedSearchCV(estimator=xg_reg, param_distributions=param_dist, n_iter=100, 
                                   cv=3, verbose=2, random_state=42, n_jobs=-1)


random_search.fit(X_train, y_train)


random_search.best_params_


predict = random_search.predict(X_test)


yt = y_test.to_numpy()


result = pd.concat([pd.DataFrame(predict), pd.DataFrame(yt)], axis=1)


result.to_csv('xgboostpressure.csv')

5- LightGBM

!pip install lightgbm
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
import lightgbm as lgb
from sklearn.model_selection import RandomizedSearchCV


data = pd.read_csv("heatml-pressure.csv")





fluid_dummy = pd.get_dummies(data['Fluid'], drop_first=True)

data.drop(['Fluid', 'SAT'], inplace=True, axis=1)


corr_data = data.copy()
correlation_matrix = corr_data.corr()

plt.figure(figsize=(16,12))
sns.heatmap(correlation_matrix, annot=True, fmt=".2f", cmap="Greens")
plt.show()


Y = pd.DataFrame(data['Nu'], columns=['Nu'])
data.drop(['um', 'rho', 'k', 'Nu', 'At'], inplace=True, axis=1)


X_train, X_test, y_train, y_test = train_test_split(data, Y, test_size=0.2, random_state=0)
y_test.columns = ['y_test']


lgb_reg = lgb.LGBMRegressor(random_state=42)



param_dist = {
    'n_estimators': np.arange(200, 2001, 200),
    'max_depth': [10, 20, 30, 40, 50, 60, 70, 80, 90, 100, 110, None],
    'learning_rate': [0.0001, 0.001, 0.01, 0.1, 0.2, 0.3],
    'subsample': [0.5, 0.7, 1.0],
    'colsample_bytree': [0.5, 0.7, 1.0],
    'min_child_samples': [5, 10, 20],
    'reg_alpha': [0, 0.1, 0.2, 0.3],
    'reg_lambda': [0, 0.1, 0.2, 0.3]
}




random_search = RandomizedSearchCV(estimator=lgb_reg, param_distributions=param_dist, n_iter=100,
                                   cv=3, verbose=2, random_state=42, n_jobs=-1)


random_search.fit(X_train, y_train)


print("Best hyperparameters found: ", random_search.best_params_)


predict = random_search.predict(X_test)

Testing on an excluded subset

1- RFR

import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import RandomizedSearchCV



data = pd.read_csv("WholeExcludeCiofalo.csv")


data1 = pd.read_csv("Ciofalo.csv")





fluid_dummy = pd.get_dummies(data['Fluid'], drop_first = True)
data.drop(['Fluid', 'SAT'], inplace = True, axis = 1)


corr_data = data.copy()
correlation_matrix = corr_data.corr()
plt.figure(figsize=(16,12))
sns.heatmap(correlation_matrix, annot=True, fmt=".2f", cmap="Greens")
plt.show()

Y = pd.DataFrame(data['Nu'], columns = ['Nu'])
data.drop(['um', 'rho', 'k', 'Nu', 'At'], inplace = True, axis = 1)





fluid_dummy1 = pd.get_dummies(data1['Fluid'], drop_first = True)
data1.drop(['Fluid', 'SAT'], inplace = True, axis = 1)

Y_test = pd.DataFrame(data1['Nu'], columns = ['Nu'])
data1.drop(['P', 'um', 'Pr', 'rho', 'k', 'Nu', 'At'], inplace = True, axis = 1)


X_train = data
y_train = Y
X_test = data1
y_test = Y_test






rf = RandomForestRegressor(random_state = 42)





n_estimators = np.arange(200, 2001, 200)

max_features = ['auto', 'sqrt']

max_depth = [10, 20, 30, 40, 50, 60, 70, 80, 90, 100, 110, None]

min_samples_split = [2, 5, 7, 10, 20, 50, 100]

min_samples_leaf = [1, 2, 4, 5, 10, 50, 100, 200]

bootstrap = [True, False]
random_grid = {'n_estimators': n_estimators,
               'max_features': max_features,
               'max_depth': max_depth,
               'min_samples_split': min_samples_split,
               'min_samples_leaf': min_samples_leaf,
               'bootstrap': bootstrap}




rf = RandomForestRegressor()


rf_random = RandomizedSearchCV(estimator = rf, param_distributions = random_grid, n_iter = 100, cv = 3, verbose=2, random_state=42, n_jobs = -1)
rf_random.fit(X_train, y_train)

rf_random.best_params_


predict = rf_random.predict(X_test)



yt = y_test.to_numpy()


predict.reshape(63,1)

result = pd.concat([pd.DataFrame(predict), pd.DataFrame(yt)], axis = 1)

result.to_csv('rfrnuhtexc.csv')

2- SVR

import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.svm import SVR
from sklearn.model_selection import RandomizedSearchCV



data = pd.read_csv("WholeExcludeCiofalo.csv")


data1 = pd.read_csv("Ciofalo.csv")






fluid_dummy = pd.get_dummies(data['Fluid'], drop_first = True)
data.drop(['Fluid', 'SAT'], inplace = True, axis = 1)


corr_data = data.copy()
correlation_matrix = corr_data.corr()
plt.figure(figsize=(16,12))
sns.heatmap(correlation_matrix, annot=True, fmt=".2f", cmap="Greens")
plt.show()

Y = pd.DataFrame(data['Nu'], columns = ['Nu'])
data.drop(['um', 'rho', 'k', 'Nu', 'At'], inplace = True, axis = 1)





fluid_dummy1 = pd.get_dummies(data1['Fluid'], drop_first = True)
data1.drop(['Fluid', 'SAT'], inplace = True, axis = 1)

Y_test = pd.DataFrame(data1['Nu'], columns = ['Nu'])
data1.drop(['P', 'um', 'Pr', 'rho', 'k', 'Nu', 'At'], inplace = True, axis = 1)


X_train = data
y_train = Y
X_test = data1
y_test = Y_test





scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)


svm = SVR()





C = [0.01, 0.1, 1, 10, 100, 1000]
epsilon = [0.01, 0.05, 0.1, 0.2, 0.5]
kernel = ['rbf', 'poly', 'sigmoid']
gamma = ['scale', 'auto', 0.001, 0.01, 0.1]


random_grid = {
    'C': C,
    'epsilon': epsilon,
    'kernel': kernel,
    'gamma': gamma
}
pprint(random_grid)



svm = SVR()


svm_random = RandomizedSearchCV(
    estimator=svm,
    param_distributions=random_grid,
    n_iter=100,
    cv=5,  
    verbose=2,
    random_state=42,
    n_jobs=-1
)


svm_random.fit(X_train, y_train)


svm_random.best_params_


predict = svm_random.predict(X_test)


yt = y_test.to_numpy()


predict = predict.reshape(-1, 1)


svm_result = pd.concat([pd.DataFrame(predict), pd.DataFrame(yt)], axis=1)
svm_result.to_csv('svmnuhtexcluded.csv')

3- DT

import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
from sklearn.tree import DecisionTreeRegressor
from sklearn.model_selection import RandomizedSearchCV

data = pd.read_csv("WholeExcludeCiofalo.csv")


data1 = pd.read_csv("Ciofalo.csv")





fluid_dummy = pd.get_dummies(data['Fluid'], drop_first = True)
data.drop(['Fluid', 'SAT'], inplace = True, axis = 1)


corr_data = data.copy()
correlation_matrix = corr_data.corr()
plt.figure(figsize=(16,12))
sns.heatmap(correlation_matrix, annot=True, fmt=".2f", cmap="Greens")
plt.show()

Y = pd.DataFrame(data['Nu'], columns = ['Nu'])
data.drop(['um', 'rho', 'k', 'Nu', 'At'], inplace = True, axis = 1)





fluid_dummy1 = pd.get_dummies(data1['Fluid'], drop_first = True)
data1.drop(['Fluid', 'SAT'], inplace = True, axis = 1)

Y_test = pd.DataFrame(data1['Nu'], columns = ['Nu'])
data1.drop(['um', 'rho', 'k', 'Nu', 'At'], inplace = True, axis = 1)


X_train = data
y_train = Y
X_test = data1
y_test = Y_test


dt = DecisionTreeRegressor(random_state=42)


max_depth = [10, 20, 30, 40, 50, 60, 70, 80, 90, 100, 110, None]

min_samples_split = [2, 5, 7, 10, 20, 50, 100]
min_samples_leaf = [1, 2, 4, 5, 10, 50, 100, 200]
max_features = ['auto', 'sqrt', None]
criterion = ['mse', 'friedman_mse', 'mae']


random_grid = {
    'max_depth': max_depth,
    'min_samples_split': min_samples_split,
    'min_samples_leaf': min_samples_leaf,
    'max_features': max_features,
    'criterion': criterion
}



dt_random = RandomizedSearchCV(
    estimator=dt,
    param_distributions=random_grid,
    n_iter=100,
    cv=3,
    verbose=2,
    random_state=42,
    n_jobs=-1
)


dt_random.fit(X_train, y_train)


dt_random.best_params_


predict = dt_random.predict(X_test)


yt = y_test.to_numpy()


predict = predict.reshape(-1, 1)


result = pd.concat([pd.DataFrame(predict), pd.DataFrame(yt)], axis=1)
result.to_csv('dtnuhtexc.csv')

4- XGBoost

import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from xgboost import XGBRegressor
from sklearn.model_selection import RandomizedSearchCV



data = pd.read_csv("WholeExcludeCiofalo.csv")


data1 = pd.read_csv("Ciofalo.csv")






fluid_dummy = pd.get_dummies(data['Fluid'], drop_first = True)
data.drop(['Fluid', 'SAT'], inplace = True, axis = 1)


corr_data = data.copy()
correlation_matrix = corr_data.corr()
plt.figure(figsize=(16,12))
sns.heatmap(correlation_matrix, annot=True, fmt=".2f", cmap="Greens")
plt.show()

Y = pd.DataFrame(data['Nu'], columns = ['Nu'])
data.drop(['um', 'rho', 'k', 'Nu', 'At'], inplace = True, axis = 1)




fluid_dummy1 = pd.get_dummies(data1['Fluid'], drop_first = True)
data1.drop(['Fluid', 'SAT'], inplace = True, axis = 1)

Y_test = pd.DataFrame(data1['Nu'], columns = ['Nu'])
data1.drop(['um', 'rho', 'k', 'Nu', 'At'], inplace = True, axis = 1)


X_train = data
y_train = Y
X_test = data1
y_test = Y_test





xg_reg = XGBRegressor(random_state=42)


param_dist = {
    'n_estimators': np.arange(200, 2001, 200),
    'max_depth': [3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15],
    'learning_rate': [0.001, 0.01, 0.1, 0.2, 0.3],
    'subsample': [0.5, 0.7, 1.0],
    'colsample_bytree': [0.5, 0.7, 1.0],
    'gamma': [0, 0.1, 0.2, 0.3],
    'min_child_weight': [1, 5, 10]
}



random_search = RandomizedSearchCV(estimator=xg_reg, param_distributions=param_dist, n_iter=100, 
                                   cv=3, verbose=2, random_state=42, n_jobs=-1)


random_search.fit(X_train, y_train)


random_search.best_params_


predict = random_search.predict(X_test)


yt = y_test.to_numpy()


result = pd.concat([pd.DataFrame(predict), pd.DataFrame(yt)], axis=1)


result.to_csv('xgboosexc.csv')

5- LightGBM

!pip install lightgbm
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
import lightgbm as lgb
from sklearn.model_selection import RandomizedSearchCV


data = pd.read_csv("WholeExcludeCiofalo.csv")


data1 = pd.read_csv("Ciofalo.csv")






fluid_dummy = pd.get_dummies(data['Fluid'], drop_first = True)
data.drop(['Fluid', 'SAT'], inplace = True, axis = 1)


corr_data = data.copy()
correlation_matrix = corr_data.corr()
plt.figure(figsize=(16,12))
sns.heatmap(correlation_matrix, annot=True, fmt=".2f", cmap="Greens")
plt.show()

Y = pd.DataFrame(data['Nu'], columns = ['Nu'])
data.drop([''um', 'rho', 'k', 'Nu', 'At'], inplace = True, axis = 1)





fluid_dummy1 = pd.get_dummies(data1['Fluid'], drop_first = True)
data1.drop(['Fluid', 'SAT'], inplace = True, axis = 1)

Y_test = pd.DataFrame(data1['Nu'], columns = ['Nu'])
data1.drop(['um', 'rho', 'k', 'Nu', 'At'], inplace = True, axis = 1)


X_train = data
y_train = Y
X_test = data1
y_test = Y_test






lgb_reg = lgb.LGBMRegressor(random_state=42)



param_dist = {
    'n_estimators': np.arange(200, 2001, 200),
    'max_depth': np.arange(3, 15,1),
    'learning_rate': [0.001, 0.01, 0.1, 0.2, 0.3],
    'subsample': [0.5, 0.7, 1.0],
    'colsample_bytree': [0.5, 0.7, 1.0],
    'min_child_samples': [5, 10, 20],
    'reg_alpha': [0, 0.1, 0.2, 0.3],
    'reg_lambda': [0, 0.1, 0.2, 0.3]
}





random_search = RandomizedSearchCV(estimator=lgb_reg, param_distributions=param_dist, n_iter=100,
                                   cv=3, verbose=2, random_state=42, n_jobs=-1)


random_search.fit(X_train, y_train)


random_search.best_params_


predict = random_search.predict(X_test)


yt = y_test.to_numpy()


lgbm_result = pd.concat([pd.DataFrame(predict), pd.DataFrame(yt)], axis=1)


lgbm_result.to_csv('lgbmexcluded.csv')

Predictions by deep neural networks

without dropout

!pip3 install scikeras

import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import KFold, cross_val_score, GridSearchCV, train_test_split
from sklearn.metrics import r2_score, mean_absolute_error
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, BatchNormalization
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.metrics import MeanAbsoluteError
from scikeras.wrappers import KerasRegressor
from matplotlib import pyplot as plt
from tensorflow.keras.callbacks import EarlyStopping

df = pd.read_csv('htttt.csv')

df_model = df.drop(columns=['Fluid', 'SAT'])

X1 = df_model.drop(columns=['um', 'rho', 'k', 'Nu', 'At']).values
y1 = df_model.Nu.values

x_scaler = StandardScaler()
X = x_scaler.fit_transform(X1)

y_scaler = StandardScaler()
y = y_scaler.fit_transform(y1.reshape(-1, 1))
n_features = X.shape[1]

def DNN_model(layers, lr_rate):
    DNN = Sequential()
    
    DNN.add(Dense(layers[0], input_dim=n_features, kernel_initializer='glorot_uniform', activation="relu"))
    DNN.add(BatchNormalization())  
    
    for nodes in layers[1:]:
        DNN.add(Dense(nodes, kernel_initializer='uniform', activation="relu"))
        DNN.add(BatchNormalization())  
    
    DNN.add(Dense(units=1, kernel_initializer='uniform', activation='linear'))
    
    opt = Adam(learning_rate=lr_rate)
    
    DNN.compile(optimizer=opt,
                loss="mae",
                metrics=[MeanAbsoluteError()])
    
    return DNN

layers = [ (64, 32), (128, 64, 32), (256, 128, 64, 32), (512, 256, 128, 64, 32),
    (128, 64, 32, 16, 8),        
    (256, 128, 64, 32, 16),      
    (512, 256, 128, 64, 32),     
    (1024, 512, 256, 128, 64),   
    (256, 128, 64, 32, 16, 8),   
    (512, 256, 128, 64, 32, 16),  
    (1024, 512, 256, 128, 64, 32), 
    (1024, 512, 256, 128, 64, 32, 16), 
    (2048, 1024, 512, 256, 128, 64, 32), 
]
lr_rate = [0.1, 0.01, 0.001, 0.0001]
batch_size = [64, 32]
epochs = [1000, 100]
param_grid = dict(layers=layers, lr_rate=lr_rate, batch_size=batch_size, epochs=epochs)

model = KerasRegressor(model=DNN_model, layers=layers, lr_rate=lr_rate, verbose=0)
grid = GridSearchCV(estimator=model, param_grid=param_grid, cv=5)
grid_result = grid.fit(X, y)

best_params = grid_result.best_params_

cv = KFold(n_splits=5, random_state=3, shuffle=True)
selected_model = KerasRegressor(model=DNN_model, layers=best_params['layers'],
                                lr_rate=best_params['lr_rate'],
                                batch_size=best_params['batch_size'],
                                epochs=best_params['epochs'], verbose=0)
scores = cross_val_score(selected_model, X, y, scoring='r2', cv=cv, n_jobs=-1)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

model = DNN_model(layers=best_params['layers'], lr_rate=best_params['lr_rate'])

early_stopping = EarlyStopping(monitor='val_loss', patience=20, restore_best_weights=True)

history = model.fit(X_train, y_train, epochs=best_params['epochs'],
                    batch_size=best_params['batch_size'],
                    validation_data=(X_test, y_test),
                    verbose=0, callbacks=[early_stopping])

plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('Model Performance')
plt.ylabel('Loss')
plt.xlabel('Epoch')
plt.legend(['Train', 'Val'], loc='upper right')
plt.box(False)
plt.show()

y_pred = model.predict(X_test)
print("R2 score:", r2_score(y_test, y_pred))

mae = mean_absolute_error(y_test, y_pred)
print("Mean Absolute Error (MAE):", mae)

y_original = y_scaler.inverse_transform(y_pred.reshape(-1, 1))
y_test_original = y_scaler.inverse_transform(y_test.reshape(-1, 1))

mlp_result = pd.concat([pd.DataFrame(y_original), pd.DataFrame(y_test_original)], axis=1)
mlp_result

mlp_result.to_csv('ann_no_dropout.csv')

Using dropout


!pip3 install scikeras


import numpy as np
import pandas as pd
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import KFold, cross_val_score, GridSearchCV, train_test_split
from sklearn.metrics import r2_score, mean_absolute_error
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.layers import Dense, BatchNormalization, Dropout
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.metrics import MeanAbsoluteError, RootMeanSquaredError
from scikeras.wrappers import KerasRegressor
from matplotlib import pyplot as plt
import seaborn as sns
from tensorflow.keras.callbacks import EarlyStopping

df = pd.read_csv('htttt.csv')

df_model = df.drop(columns = ['Fluid', 'SAT'])



X1 = df_model.drop(columns=['um', 'rho', 'k', 'Nu', 'At']).values
y1 = df_model.Nu.values

x_scaler = StandardScaler()
X = x_scaler.fit_transform(X1)

y_scaler = StandardScaler()
y = y_scaler.fit_transform(y1.reshape(-1, 1))
n_features = X.shape[1]

def DNN_model(layers, lr_rate):
    DNN = Sequential()
    
    DNN.add(Dense(layers[0], input_dim=n_features, kernel_initializer='glorot_uniform', activation="relu"))
    DNN.add(BatchNormalization())  
    DNN.add(Dropout(0.2))  
    
    for nodes in layers[1:]:
        DNN.add(Dense(nodes, kernel_initializer='uniform', activation="relu"))
        DNN.add(BatchNormalization())  
        DNN.add(Dropout(0.2))  
    
    DNN.add(Dense(units=1, kernel_initializer='uniform', activation='linear'))
    
    opt = Adam(learning_rate=lr_rate)
    
    DNN.compile(optimizer=opt,
                loss="mae",
                metrics=[MeanAbsoluteError()])
    
    return DNN

layers = [(64, 32), (128, 64, 32), (256, 128, 64, 32), (512, 256, 128, 64, 32),     
    (1024, 512, 256, 128, 64),   
    (256, 128, 64, 32, 16, 8),   
    (512, 256, 128, 64, 32, 16),  
    (1024, 512, 256, 128, 64, 32), 
    (1024, 512, 256, 128, 64, 32, 16), 
    (2048, 1024, 512, 256, 128, 64, 32)] 
lr_rate = [0.1, 0.01, 0.001, 0.0001]
batch_size = [64, 32]
epochs = [1000, 100]
param_grid = dict(layers=layers, lr_rate=lr_rate, batch_size=batch_size, epochs=epochs)

model = KerasRegressor(model=DNN_model, layers=layers, lr_rate=lr_rate, verbose=0)
grid = GridSearchCV(estimator=model, param_grid=param_grid, cv=5)
grid_result = grid.fit(X, y)

print("The best parameters are:", grid_result.best_params_)
best_params = grid_result.best_params_

cv = KFold(n_splits=5, random_state=3, shuffle=True)

selected_model = KerasRegressor(model=DNN_model, layers=best_params['layers'],
                                lr_rate=best_params['lr_rate'],
                                batch_size=best_params['batch_size'],
                                epochs=best_params['epochs'], verbose=0)
scores = cross_val_score(selected_model, X, y, scoring='r2', cv=cv, n_jobs=-1)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)



model = DNN_model(layers=best_params['layers'], lr_rate=best_params['lr_rate'])

early_stopping = EarlyStopping(monitor='val_loss', patience=20, restore_best_weights=True)

history = model.fit(X_train, y_train, epochs=best_params['epochs'],
                    batch_size=best_params['batch_size'],
                    validation_data=(X_test, y_test),
                    verbose=0, callbacks=[early_stopping])

plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('Model Performance')
plt.ylabel('Loss')
plt.xlabel('Epoch')
plt.legend(['Train', 'Val'], loc='upper right')
plt.box(False)
plt.show()

y_pred = model.predict(X_test)
print("R2 score:", r2_score(y_test, y_pred))

mae = mean_absolute_error(y_test, y_pred)
print("Mean Absolute Error (MAE):", mae)

Using RandomizedGridSearchCV




import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import KFold, train_test_split, RandomizedSearchCV
from sklearn.metrics import r2_score, mean_absolute_error
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, BatchNormalization
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping
from scikeras.wrappers import KerasRegressor
from matplotlib import pyplot as plt

url = 'https://raw.githubusercontent.com/mlori77/ANN/main/heatml.csv'
df = pd.read_csv(url)

df_model = df.drop(columns=['Fluid', 'SAT'])
X1 = df_model.drop(columns=['um', 'rho', 'k', 'Nu', 'At']).values
y1 = df_model.Nu.values

x_scaler = StandardScaler()
X = x_scaler.fit_transform(X1)

y_scaler = StandardScaler()
y = y_scaler.fit_transform(y1.reshape(-1, 1))

n_features = X.shape[1]

def DNN_model(layers, lr_rate):
    DNN = Sequential()

    DNN.add(Dense(layers[0], input_dim=n_features, kernel_initializer='glorot_uniform', activation="relu"))
    DNN.add(BatchNormalization())  # Batch Normalization

    for nodes in layers[1:]:
        DNN.add(Dense(nodes, kernel_initializer='uniform', activation="relu"))
        DNN.add(BatchNormalization())  # Batch Normalization

    DNN.add(Dense(units=1, kernel_initializer='uniform', activation='linear'))

    opt = Adam(learning_rate=lr_rate)

    DNN.compile(optimizer=opt,
                loss="mae",
                metrics=[MeanAbsoluteError()])

    return DNN

layers = [
    (64, 32), (128, 64, 32), (256, 128, 64, 32), (512, 256, 128, 64, 32),     
    (1024, 512, 256, 128, 64),   
    (256, 128, 64, 32, 16, 8),   
    (512, 256, 128, 64, 32, 16)
]
lr_rate = [0.01, 0.001]
batch_size = [32, 64]
epochs = [100, 200]
param_grid = dict(layers=layers, lr_rate=lr_rate, batch_size=batch_size, epochs=epochs)

model = KerasRegressor(model=DNN_model, layers=layers, lr_rate=lr_rate, verbose=0)
grid = RandomizedSearchCV(estimator=model, param_distributions=param_grid, n_iter=20, cv=5)
grid_result = grid.fit(X, y)


X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

best_params = grid_result.best_params_
model = DNN_model(layers=best_params['layers'], lr_rate=best_params['lr_rate'])

early_stopping = EarlyStopping(monitor='val_loss', patience=20, restore_best_weights=True)

history = model.fit(X_train, y_train, epochs=best_params['epochs'],
                    batch_size=best_params['batch_size'],
                    validation_data=(X_test, y_test),
                    verbose=0, callbacks=[early_stopping])

plt.plot(history.history['loss'], label='Train Loss')
plt.plot(history.history['val_loss'], label='Val Loss')
plt.title('Model Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()
plt.show()

y_pred = model.predict(X_test)

print("R2 Score: ", r2_score(y_test, y_pred))
mae = mean_absolute_error(y_test, y_pred)
print("MAE: ", mae)

y_original = y_scaler.inverse_transform(y_pred.reshape(-1, 1))
y_test_original = y_scaler.inverse_transform(y_test.reshape(-1, 1))

mlp_result = pd.concat([pd.DataFrame(y_original), pd.DataFrame(y_test_original)], axis=1)
mlp_result.to_csv('ann_results.csv')