Source code for drcme.prediction

"""
The :mod:`drcme.prediction` module contains wrapper functions for random
forest classification.
"""

import numpy as np
import pandas as pd
import sklearn.ensemble as ensemble
import logging


[docs]def rf_predict(train_df, train_labels, test_df, n_trees=500, class_weight=None): """Predict labels for `test_df` by random forest classification Trains a classifier using `train_df` and `train_labels`, then predicts labels for `test_df`. Parameters ---------- train_df : DataFrame Training data train_labels : list or array Labels for training data test_df : DataFrame Test data n_trees : int, optional Number of trees for random forest classifier class_weight : {“balanced”, “balanced_subsample”}, dict or list of dicts, default=None Class weight parameter for random forest classifier Returns ------- array Predicted labels for `test_df` """ rf = ensemble.RandomForestClassifier(n_estimators=n_trees, oob_score=True, class_weight=class_weight, random_state=0) rf.fit(train_df.values, train_labels) logging.info("OOB score: {:f}".format(rf.oob_score_)) return rf.predict(test_df.values)