if __name__ == “__main__”: sentences, labels_list = get_data_multilabel() mlb = MultiLabelBinarizer() y = mlb.fit_transform(labels_list) class_names = mlb.classes_ print(f”Total samples: {len(sentences)}”) print(f”Number of classes: {len(class_names)}”) print(f”Classes: {class_names}”) print(f”Multi-label distribution:n{pd.Series([len(l) for l in labels_list]).value_counts()}n”) # Build/load embeddings & FAISS index = load_vector_index() if index is None: print(“Building FAISS index…”) index, embeddings = build_vector_index(sentences) else: embeddings = get_openai_embeddings_with_token(sentences) # IMPROVED: Better hyperparameters for multi-label learning X_train, X_test, y_train, y_test = train_test_split( embeddings, y, test_size=0.2, random_state=42 ) print(f”Training samples: {len(X_train)}, Test samples: {len(X_test)}n”) # SOLUTION 1: Use calibrated, larger model with regularization clf_base = MLPClassifier( hidden_layer_sizes=(256, 128), # Larger capacity for multi-label activation=”relu”, max_iter=1000, alpha=0.001, # L2 regularization learning_rate_init=0.001, # Slower learning batch_size=4, # Smaller batches random_state=42, early_stopping=True, validation_fraction=0.1, verbose=1 ) clf = OneVsRestClassifier(clf_base) clf.fit(X_train, y_train) # DIAGNOSTIC: Check raw probabilities on training data train_probs = clf.predict_proba(X_train[:5]) print(“n— Diagnostic: Sample probabilities from training set —“) for i, probs in enumerate(train_probs): print(f”Sample {i}: {dict(zip(class_names, probs))}”) # Predictions y_pred = clf.predict(X_test) y_pred_proba = clf.predict_proba(X_test) print(“n— Model Evaluation —“) print(f”Exact Match Accuracy: {accuracy_score(y_test, y_pred):.4f}”) print(f”Hamming Loss (label-wise error): {hamming_loss(y_test, y_pred):.4f}”) print(“nClassification Report (per-label):”) print(classification_report(y_test, y_pred, target_names=class_names, zero_division=0)) # SOLUTION 2: Dynamic threshold selection (not fixed 0.5) print(“n— Query Testing with Adaptive Thresholding —n”) new_queries = [ “monaco olympic instance approval”, “oly sg and oly ch”, “status for both monaco and germany instances”, “marketing budget proposal q3”, “check OLY-LU and OLY-DE status” ] for query in new_queries: print(f”Query: ‘{query}'”) # FAISS search query_emb = get_openai_embeddings_with_token(query) D, I = index.search(query_emb, k=3) print(f” FAISS Top 3:”) for rank, (score, idx) in enumerate(zip(D[0], I[0])): print(f” {rank+1}. {labels_list[idx]} (score: {score:.4f})”) # Classifier prediction with adaptive threshold query_emb_2d = np.array(query_emb).reshape(1, -1) probs = clf.predict_proba(query_emb_2d)[0] # SOLUTION 3: Rank-based + threshold approach (more robust) prob_series = pd.Series(probs, index=class_names).sort_values(ascending=False) # Use dynamic threshold based on probability distribution mean_prob = probs.mean() std_prob = probs.std() dynamic_threshold = mean_prob + 0.5 * std_prob # Adaptive threshold predicted_labels = prob_series[prob_series > dynamic_threshold] if predicted_labels.empty: # Fallback: take top prediction if nothing above threshold predicted_labels = prob_series.head(1) print(f” Classifier (Dynamic threshold={dynamic_threshold:.4f}):”) for label, score in predicted_labels.items(): print(f” – {label}: {score:.4f}”) print()