def get_prediction_interval(prediction, y_test, test_predictions, pi=.95):
'''
Get a prediction interval for a linear regression.
INPUTS:
- Single prediction,
- y_test
- All test set predictions,
- Prediction interval threshold (default = .95)
OUTPUT:
- Prediction interval for single prediction
'''
#get standard deviation of y_test
sum_errs = np.sum((y_test - test_predictions)**2)
stdev = np.sqrt(1 / (len(y_test) - 2) * sum_errs)
#get interval from standard deviation
one_minus_pi = 1 - pi
ppf_lookup = 1 - (one_minus_pi / 2)
z_score = stats.norm.ppf(ppf_lookup)
interval = z_score * stdev
**#generate prediction interval lower and upper bound**
lower, upper = prediction - interval, prediction + interval
return lower, prediction, upper
get_prediction_interval(predictions[0], y_test, predictions)
OUTPUT: (19.24072024369257, 28.996723619824934, 38.752726995957296)
Вопрос в том, как мне ввести одно предсказание, а затем я могу ввести все сразу, чтобы получить интервал прогноза для каждого из них?