# Add the label dummy variabls to the vector X_train_vec_lbl # create a DataFrame with the dummy variables dummy_df = train_lbl[["cluster_1", "cluster_2", "cluster_3", "cluster_4", "cluster_5", "cluster_6", "cluster_7", "cluster_8", "cluster_9", "cluster_10", "cluster_11", "cluster_12"]] # Reshaping the dummy_df to dummy_matrices for i, col in enumerate(dummy_df.columns): # get the column values as a dense numpy array col_values = dummy_df[col].values.reshape(-1, 1) # stack the column values horizontally with the sparse matrix X_train_vec_lbl = hstack([X_train_vec_lbl, col_values], format='csr') # Size of X_train_vec_lbl after adding 12 dummy variables print(X_train_vec_lbl.shape) # Multinomial Naive Bayes mnb = MultinomialNB() # Fit train dataset v3 to mnb mnb.fit(X_train_vec_lbl, y_train_lbl)