set.seed(123)
data <- twoClassSim(1000)
trainIndex <- createDataPartition(data$Class, p = 0.8, list = FALSE)
trainData <- data[trainIndex, ]
testData <- data[-trainIndex, ]
# Train a logistic regression model
model <- glm(Class ~ ., data = trainData, family = binomial)
# Get predicted probabilities
test_probs <- predict(model, testData, type = "response")
# Compute AUC BEFORE calibration
auc_before <- pROC::roc(testData$Class, test_probs)$auc
# Fit an isotonic regression model
iso_model <- isoreg(test_probs, as.numeric(testData$Class) - 1)
# Get calibrated probabilities from isotonic regression
iso_calibrated_probs <- approx(iso_model$x, iso_model$yf, xout = test_probs)$y
# Compute AUC AFTER calibration
auc_after <- pROC::roc(testData$Class, iso_calibrated_probs)$auc
auc_befor导致0.9116,但AUC_AFTER仅为0.52。我相信等渗回归的合适概率应该是有问题的。有人可以指出我的代码中的错误,以便我获得相同的AUC吗?非常感谢它我不确定您正在使用
approx
函数。存档的Rfutilities
软件包提供了library(rfUtilities)
iso_calibrated_probs <- probability.calibration(as.numeric(testData$Class)-1, test_probs, regularization = TRUE)
# Compute AUC AFTER calibration
auc_after <- pROC::roc(testData$Class, iso_calibrated_probs)$auc
auc_after
Area under the curve: 0.9119
plot(density(test_probs), col="red", xlim=c(0,1),
ylab="Density", xlab="probabilities",
main="Calibrated probabilities", las=1)
lines(density(iso_calibrated_probs), col="blue")
legend("topright", legend=c("original", "calibrated"),
lty = c(1,1), col=c("red","blue"))