AI Powered Hall Recommender

Author: David Ezeani

In [1]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.preprocessing import LabelEncoder
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from sklearn.preprocessing import MultiLabelBinarizer
import joblib
In [3]:
# Load dataset
df = pd.read_csv('unilag_venue_booking_data.csv')
In [3]:
df.shape
Out[3]:
(1500, 9)
In [13]:
df.head()
Out[13]:
event_type hall_name guest_count cost_of_hall rating booking_status date time amenities
0 Stage Play Jelili Omotola Hall B 691 211963 4 Success 3/4/2023 19:00 Sound System, Stage, Generator, AC
1 Workshop J.F. Ade Ajayi Auditorium 766 201250 5 Success 7/16/2024 9:00 AC, Generator
2 Workshop Jelili Omotola Hall B 112 211963 3 Success 1/31/2021 8:00 Sound System, AC, Stage
3 Religious Event Jelili Omotola Hall B 158 211963 4 Success 1/5/2022 8:00 Parking, Lighting, Projector
4 Religious Event Faculty of Arts Theatre 916 93000 5 Success 10/8/2024 16:00 WiFi, AC, Lighting, Parking
In [19]:
df.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1500 entries, 0 to 1499
Data columns (total 9 columns):
 #   Column          Non-Null Count  Dtype 
---  ------          --------------  ----- 
 0   event_type      1500 non-null   object
 1   hall_name       1500 non-null   object
 2   guest_count     1500 non-null   int64 
 3   cost_of_hall    1500 non-null   int64 
 4   rating          1500 non-null   int64 
 5   booking_status  1500 non-null   object
 6   date            1500 non-null   object
 7   time            1500 non-null   object
 8   amenities       1500 non-null   object
dtypes: int64(3), object(6)
memory usage: 105.6+ KB
In [5]:
#Cheching for NaN values
df.isna().sum()
Out[5]:
event_type        0
hall_name         0
guest_count       0
cost_of_hall      0
rating            0
booking_status    0
date              0
time              0
amenities         0
dtype: int64
In [4]:
#Cheching for duplicates
df.duplicated().sum()
Out[4]:
np.int64(0)
In [6]:
df.describe()
Out[6]:
guest_count cost_of_hall rating
count 1500.000000 1500.000000 1500.000000
mean 511.988000 161321.065333 3.350000
std 285.329251 49154.441639 1.470014
min 21.000000 93000.000000 1.000000
25% 261.000000 93000.000000 2.000000
50% 505.000000 176250.000000 4.000000
75% 767.000000 210000.000000 5.000000
max 1000.000000 215375.000000 5.000000
In [5]:
numeric_fields_df = df.select_dtypes(include = ["number"])
In [6]:
numeric_fields_df.corr()
Out[6]:
guest_count cost_of_hall rating
guest_count 1.000000 -0.005847 0.020118
cost_of_hall -0.005847 1.000000 0.088026
rating 0.020118 0.088026 1.000000

No Correllation at all

In [7]:
df["event_type"].value_counts()
Out[7]:
event_type
Seminar            321
Workshop           204
Concert            197
Religious Event    195
Stage Play         178
Tech Talk          178
Conference         177
Birthday            50
Name: count, dtype: int64
In [25]:
df["booking_status"].value_counts()
Out[25]:
booking_status
Success        1335
Rescheduled     138
Cancelled        27
Name: count, dtype: int64
In [26]:
df["booking_status"].value_counts().plot(kind="pie")
plt.title("Booking status")
plt.ylabel("")
plt.show()
No description has been provided for this image
In [14]:
df["hall_name"].value_counts()
Out[14]:
hall_name
Jelili Omotola Hall B                        148
Tolu Odugbemi                                146
Faculty of Social Science Lecture Theatre    146
Julius Berger Hall                           138
Jelili Omotola Hall A                        138
Faculty of Arts Theatre                      137
Rahaman Bello Lecture Theatre                135
Jelili Omotola Hall C                        132
Afe Babalola                                 129
Tayo Aderinokun                              129
J.F. Ade Ajayi Auditorium                    122
Name: count, dtype: int64
In [16]:
df["hall_name"].value_counts().plot(kind="pie")
plt.title("Booked Halls")
plt.ylabel("")
plt.show()
No description has been provided for this image
In [8]:
#find the Mean of Tenure based on the two groups of Churn (yes/no)
df.groupby("hall_name")["rating"].mean()
Out[8]:
hall_name
Afe Babalola                                 3.124031
Faculty of Arts Theatre                      3.321168
Faculty of Social Science Lecture Theatre    3.630137
J.F. Ade Ajayi Auditorium                    4.000000
Jelili Omotola Hall A                        3.521739
Jelili Omotola Hall B                        3.391892
Jelili Omotola Hall C                        3.590909
Julius Berger Hall                           3.318841
Rahaman Bello Lecture Theatre                3.407407
Tayo Aderinokun                              3.503876
Tolu Odugbemi                                3.253425
Name: rating, dtype: float64

Train Model

In [32]:
# Encode categorical variables
le_event = LabelEncoder()
le_hall = LabelEncoder()
mlb_amenities = MultiLabelBinarizer()

df['event_type_encoded'] = le_event.fit_transform(df['event_type'])
df['hall_name_encoded'] = le_hall.fit_transform(df['hall_name'])
In [33]:
# Amenities encoding (split by comma, e.g., "Generator,AC")
df['amenities_list'] = df['amenities'].apply(lambda x: x.split(','))
amenities_encoded = mlb_amenities.fit_transform(df['amenities_list'])

# Add the amenities binary columns to the dataframe
amenities_df = pd.DataFrame(amenities_encoded, columns=mlb_amenities.classes_)
df = pd.concat([df, amenities_df], axis=1)
In [38]:
# Features (X) and Target (y)
X = df[['event_type_encoded', 'guest_count', 'cost_of_hall', 'rating'] + list(mlb_amenities.classes_)]
y = df['hall_name_encoded']
In [39]:
# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
In [40]:
# Model
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)
Out[40]:
RandomForestClassifier(random_state=42)
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
RandomForestClassifier(random_state=42)
In [41]:
# Predictions on test set
y_pred = model.predict(X_test)
In [30]:
# Accuracy
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)
Accuracy: 0.81
In [24]:
# Classification Report
print("\nClassification Report:")
print(classification_report(y_test, y_pred, target_names=le_hall.inverse_transform(sorted(y_test.unique()))))
Classification Report:
                                           precision    recall  f1-score   support

                             Afe Babalola       0.33      0.44      0.38        25
                  Faculty of Arts Theatre       0.36      0.40      0.38        25
Faculty of Social Science Lecture Theatre       1.00      1.00      1.00        22
                J.F. Ade Ajayi Auditorium       0.90      1.00      0.95        28
                    Jelili Omotola Hall A       1.00      0.94      0.97        32
                    Jelili Omotola Hall B       1.00      1.00      1.00        22
                    Jelili Omotola Hall C       1.00      1.00      1.00        28
                       Julius Berger Hall       0.42      0.29      0.34        35
            Rahaman Bello Lecture Theatre       1.00      1.00      1.00        26
                          Tayo Aderinokun       1.00      0.96      0.98        27
                            Tolu Odugbemi       1.00      1.00      1.00        30

                                 accuracy                           0.81       300
                                macro avg       0.82      0.82      0.82       300
                             weighted avg       0.81      0.81      0.81       300

In [25]:
# Confusion Matrix
conf_matrix = confusion_matrix(y_test, y_pred)
plt.figure(figsize=(10, 6))
sns.heatmap(conf_matrix, annot=True, fmt='d', cmap='Blues',
            xticklabels=le_hall.inverse_transform(sorted(y_test.unique())),
            yticklabels=le_hall.inverse_transform(sorted(y_test.unique())))
plt.xlabel("Predicted")
plt.ylabel("Actual")
plt.title("Confusion Matrix")
plt.show()
No description has been provided for this image
In [22]:
# Save the model
joblib.dump(model, 'hall_recommender.pkl')
Out[22]:
['hall_recommender.pkl']
In [18]:
joblib.dump(le_event, 'le_event.pkl')
Out[18]:
['le_event.pkl']
In [19]:
joblib.dump(le_hall, 'le_hall.pkl')
Out[19]:
['le_hall.pkl']
In [21]:
joblib.dump(mlb_amenities, 'mlb_amenities.pkl')
Out[21]:
['mlb_amenities.pkl']