Skip to content

Single Document Classification

python
def preprocess_text(text):
    return text.lower().split()

# Initialize and load models
evaluator = ModelEvaluator("./models")
evaluator.load_all_models()

# Classify text
text = "Your document text here..."
result = evaluator.get_ensemble_prediction(
    text,
    preprocess_text,
    threshold=0.3
)

if result:
    print(f"Best prediction from model: {result['model']}")
    print(f"Topic ID: {result['prediction']['topic_id']}")
    print(f"Confidence: {result['prediction']['probability']:.3f}")
    print("Top terms:", ', '.join(result['prediction']['top_terms'][:5]))

Batch Classification

python
texts = [
    "Document 1 text...",
    "Document 2 text...",
    "Document 3 text..."
]

for idx, text in enumerate(texts):
    print(f"\nDocument {idx + 1}:")
    result = evaluator.get_ensemble_prediction(text, preprocess_text)
    if result:
        print(f"Classified as Topic {result['prediction']['topic_id']}")
        print(f"Confidence: {result['prediction']['probability']:.3f}")
        print("Key terms:", ', '.join(result['prediction']['top_terms'][:3]))

Working with Model Results

The classification results contain detailed information about topic distributions:

python
results = evaluator.classify_text(textpreprocess_text)

for model_name, data in results.items():
    print(f"\nModel: {model_name}")
    print(f"Topic ID: {data['topic_id']}")
    print(f"Probability: {data['probability']:.3f}")
    print("Top terms:", data['top_terms'])
    print("Term weights:", data['term_weights'])