AI Powered Hall Recommender
Author: David Ezeani
In [1]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.preprocessing import LabelEncoder
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from sklearn.preprocessing import MultiLabelBinarizer
import joblib
In [3]:
# Load dataset
df = pd.read_csv('unilag_venue_booking_data.csv')
In [3]:
df.shape
Out[3]:
(1500, 9)
In [13]:
df.head()
Out[13]:
| event_type | hall_name | guest_count | cost_of_hall | rating | booking_status | date | time | amenities | |
|---|---|---|---|---|---|---|---|---|---|
| 0 | Stage Play | Jelili Omotola Hall B | 691 | 211963 | 4 | Success | 3/4/2023 | 19:00 | Sound System, Stage, Generator, AC |
| 1 | Workshop | J.F. Ade Ajayi Auditorium | 766 | 201250 | 5 | Success | 7/16/2024 | 9:00 | AC, Generator |
| 2 | Workshop | Jelili Omotola Hall B | 112 | 211963 | 3 | Success | 1/31/2021 | 8:00 | Sound System, AC, Stage |
| 3 | Religious Event | Jelili Omotola Hall B | 158 | 211963 | 4 | Success | 1/5/2022 | 8:00 | Parking, Lighting, Projector |
| 4 | Religious Event | Faculty of Arts Theatre | 916 | 93000 | 5 | Success | 10/8/2024 | 16:00 | WiFi, AC, Lighting, Parking |
In [19]:
df.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 1500 entries, 0 to 1499 Data columns (total 9 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 event_type 1500 non-null object 1 hall_name 1500 non-null object 2 guest_count 1500 non-null int64 3 cost_of_hall 1500 non-null int64 4 rating 1500 non-null int64 5 booking_status 1500 non-null object 6 date 1500 non-null object 7 time 1500 non-null object 8 amenities 1500 non-null object dtypes: int64(3), object(6) memory usage: 105.6+ KB
In [5]:
#Cheching for NaN values
df.isna().sum()
Out[5]:
event_type 0 hall_name 0 guest_count 0 cost_of_hall 0 rating 0 booking_status 0 date 0 time 0 amenities 0 dtype: int64
In [4]:
#Cheching for duplicates
df.duplicated().sum()
Out[4]:
np.int64(0)
In [6]:
df.describe()
Out[6]:
| guest_count | cost_of_hall | rating | |
|---|---|---|---|
| count | 1500.000000 | 1500.000000 | 1500.000000 |
| mean | 511.988000 | 161321.065333 | 3.350000 |
| std | 285.329251 | 49154.441639 | 1.470014 |
| min | 21.000000 | 93000.000000 | 1.000000 |
| 25% | 261.000000 | 93000.000000 | 2.000000 |
| 50% | 505.000000 | 176250.000000 | 4.000000 |
| 75% | 767.000000 | 210000.000000 | 5.000000 |
| max | 1000.000000 | 215375.000000 | 5.000000 |
In [5]:
numeric_fields_df = df.select_dtypes(include = ["number"])
In [6]:
numeric_fields_df.corr()
Out[6]:
| guest_count | cost_of_hall | rating | |
|---|---|---|---|
| guest_count | 1.000000 | -0.005847 | 0.020118 |
| cost_of_hall | -0.005847 | 1.000000 | 0.088026 |
| rating | 0.020118 | 0.088026 | 1.000000 |
No Correllation at all
In [7]:
df["event_type"].value_counts()
Out[7]:
event_type Seminar 321 Workshop 204 Concert 197 Religious Event 195 Stage Play 178 Tech Talk 178 Conference 177 Birthday 50 Name: count, dtype: int64
In [25]:
df["booking_status"].value_counts()
Out[25]:
booking_status Success 1335 Rescheduled 138 Cancelled 27 Name: count, dtype: int64
In [26]:
df["booking_status"].value_counts().plot(kind="pie")
plt.title("Booking status")
plt.ylabel("")
plt.show()
In [14]:
df["hall_name"].value_counts()
Out[14]:
hall_name Jelili Omotola Hall B 148 Tolu Odugbemi 146 Faculty of Social Science Lecture Theatre 146 Julius Berger Hall 138 Jelili Omotola Hall A 138 Faculty of Arts Theatre 137 Rahaman Bello Lecture Theatre 135 Jelili Omotola Hall C 132 Afe Babalola 129 Tayo Aderinokun 129 J.F. Ade Ajayi Auditorium 122 Name: count, dtype: int64
In [16]:
df["hall_name"].value_counts().plot(kind="pie")
plt.title("Booked Halls")
plt.ylabel("")
plt.show()
In [8]:
#find the Mean of Tenure based on the two groups of Churn (yes/no)
df.groupby("hall_name")["rating"].mean()
Out[8]:
hall_name Afe Babalola 3.124031 Faculty of Arts Theatre 3.321168 Faculty of Social Science Lecture Theatre 3.630137 J.F. Ade Ajayi Auditorium 4.000000 Jelili Omotola Hall A 3.521739 Jelili Omotola Hall B 3.391892 Jelili Omotola Hall C 3.590909 Julius Berger Hall 3.318841 Rahaman Bello Lecture Theatre 3.407407 Tayo Aderinokun 3.503876 Tolu Odugbemi 3.253425 Name: rating, dtype: float64
Train Model
In [32]:
# Encode categorical variables
le_event = LabelEncoder()
le_hall = LabelEncoder()
mlb_amenities = MultiLabelBinarizer()
df['event_type_encoded'] = le_event.fit_transform(df['event_type'])
df['hall_name_encoded'] = le_hall.fit_transform(df['hall_name'])
In [33]:
# Amenities encoding (split by comma, e.g., "Generator,AC")
df['amenities_list'] = df['amenities'].apply(lambda x: x.split(','))
amenities_encoded = mlb_amenities.fit_transform(df['amenities_list'])
# Add the amenities binary columns to the dataframe
amenities_df = pd.DataFrame(amenities_encoded, columns=mlb_amenities.classes_)
df = pd.concat([df, amenities_df], axis=1)
In [38]:
# Features (X) and Target (y)
X = df[['event_type_encoded', 'guest_count', 'cost_of_hall', 'rating'] + list(mlb_amenities.classes_)]
y = df['hall_name_encoded']
In [39]:
# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
In [40]:
# Model
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)
Out[40]:
RandomForestClassifier(random_state=42)In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
RandomForestClassifier(random_state=42)
In [41]:
# Predictions on test set
y_pred = model.predict(X_test)
In [30]:
# Accuracy
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)
Accuracy: 0.81
In [24]:
# Classification Report
print("\nClassification Report:")
print(classification_report(y_test, y_pred, target_names=le_hall.inverse_transform(sorted(y_test.unique()))))
Classification Report:
precision recall f1-score support
Afe Babalola 0.33 0.44 0.38 25
Faculty of Arts Theatre 0.36 0.40 0.38 25
Faculty of Social Science Lecture Theatre 1.00 1.00 1.00 22
J.F. Ade Ajayi Auditorium 0.90 1.00 0.95 28
Jelili Omotola Hall A 1.00 0.94 0.97 32
Jelili Omotola Hall B 1.00 1.00 1.00 22
Jelili Omotola Hall C 1.00 1.00 1.00 28
Julius Berger Hall 0.42 0.29 0.34 35
Rahaman Bello Lecture Theatre 1.00 1.00 1.00 26
Tayo Aderinokun 1.00 0.96 0.98 27
Tolu Odugbemi 1.00 1.00 1.00 30
accuracy 0.81 300
macro avg 0.82 0.82 0.82 300
weighted avg 0.81 0.81 0.81 300
In [25]:
# Confusion Matrix
conf_matrix = confusion_matrix(y_test, y_pred)
plt.figure(figsize=(10, 6))
sns.heatmap(conf_matrix, annot=True, fmt='d', cmap='Blues',
xticklabels=le_hall.inverse_transform(sorted(y_test.unique())),
yticklabels=le_hall.inverse_transform(sorted(y_test.unique())))
plt.xlabel("Predicted")
plt.ylabel("Actual")
plt.title("Confusion Matrix")
plt.show()
In [22]:
# Save the model
joblib.dump(model, 'hall_recommender.pkl')
Out[22]:
['hall_recommender.pkl']
In [18]:
joblib.dump(le_event, 'le_event.pkl')
Out[18]:
['le_event.pkl']
In [19]:
joblib.dump(le_hall, 'le_hall.pkl')
Out[19]:
['le_hall.pkl']
In [21]:
joblib.dump(mlb_amenities, 'mlb_amenities.pkl')
Out[21]:
['mlb_amenities.pkl']