prophet.R (prophet-1.0) | : | prophet.R (prophet-1.1) | ||
---|---|---|---|---|
skipping to change at line 19 | skipping to change at line 19 | |||
"component", "dow", "doy", "holiday", "holidays", "holidays_lower", "generated _holidays", | "component", "dow", "doy", "holiday", "holidays", "holidays_lower", "generated _holidays", | |||
"holidays_upper", "ix", "lower", "n", "stat", "trend", "row_number", "extra_re gressors", "col", | "holidays_upper", "ix", "lower", "n", "stat", "trend", "row_number", "extra_re gressors", "col", | |||
"trend_lower", "trend_upper", "upper", "value", "weekly", "weekly_lower", "wee kly_upper", | "trend_lower", "trend_upper", "upper", "value", "weekly", "weekly_lower", "wee kly_upper", | |||
"x", "yearly", "yearly_lower", "yearly_upper", "yhat", "yhat_lower", "yhat_upp er", | "x", "yearly", "yearly_lower", "yearly_upper", "yhat", "yhat_lower", "yhat_upp er", | |||
"country", "year" | "country", "year" | |||
)) | )) | |||
#' Prophet forecaster. | #' Prophet forecaster. | |||
#' | #' | |||
#' @param df (optional) Dataframe containing the history. Must have columns ds | #' @param df (optional) Dataframe containing the history. Must have columns ds | |||
#' (date type) and y, the time series. If growth is logistic, then df must | #' (date type) and y, the time series. If growth is logistic, then df must | |||
#' also have a column cap that specifies the capacity at each ds. If not | #' also have a column cap that specifies the capacity at each ds. If not | |||
#' provided, then the model object will be instantiated but not fit; use | #' provided, then the model object will be instantiated but not fit; use | |||
#' fit.prophet(m, df) to fit the model. | #' fit.prophet(m, df) to fit the model. | |||
#' @param growth String 'linear', 'logistic', or 'flat' to specify a linear, log | #' @param growth String 'linear', 'logistic', or 'flat' to specify a linear, | |||
istic | #' logistic or flat trend. | |||
#' or flat trend. | ||||
#' @param changepoints Vector of dates at which to include potential | #' @param changepoints Vector of dates at which to include potential | |||
#' changepoints. If not specified, potential changepoints are selected | #' changepoints. If not specified, potential changepoints are selected | |||
#' automatically. | #' automatically. | |||
#' @param n.changepoints Number of potential changepoints to include. Not used | #' @param n.changepoints Number of potential changepoints to include. Not used | |||
#' if input `changepoints` is supplied. If `changepoints` is not supplied, | #' if input `changepoints` is supplied. If `changepoints` is not supplied, | |||
#' then n.changepoints potential changepoints are selected uniformly from the | #' then n.changepoints potential changepoints are selected uniformly from the | |||
#' first `changepoint.range` proportion of df$ds. | #' first `changepoint.range` proportion of df$ds. | |||
#' @param changepoint.range Proportion of history in which trend changepoints | #' @param changepoint.range Proportion of history in which trend changepoints | |||
#' will be estimated. Defaults to 0.8 for the first 80%. Not used if | #' will be estimated. Defaults to 0.8 for the first 80%. Not used if | |||
#' `changepoints` is specified. | #' `changepoints` is specified. | |||
#' @param yearly.seasonality Fit yearly seasonality. Can be 'auto', TRUE, | #' @param yearly.seasonality Fit yearly seasonality. Can be 'auto', TRUE, FALSE, | |||
#' FALSE, or a number of Fourier terms to generate. | #' or a number of Fourier terms to generate. | |||
#' @param weekly.seasonality Fit weekly seasonality. Can be 'auto', TRUE, | #' @param weekly.seasonality Fit weekly seasonality. Can be 'auto', TRUE, FALSE, | |||
#' FALSE, or a number of Fourier terms to generate. | #' or a number of Fourier terms to generate. | |||
#' @param daily.seasonality Fit daily seasonality. Can be 'auto', TRUE, | #' @param daily.seasonality Fit daily seasonality. Can be 'auto', TRUE, FALSE, | |||
#' FALSE, or a number of Fourier terms to generate. | #' or a number of Fourier terms to generate. | |||
#' @param holidays data frame with columns holiday (character) and ds (date | #' @param holidays data frame with columns holiday (character) and ds (date | |||
#' type)and optionally columns lower_window and upper_window which specify a | #' type)and optionally columns lower_window and upper_window which specify a | |||
#' range of days around the date to be included as holidays. lower_window=-2 | #' range of days around the date to be included as holidays. lower_window=-2 | |||
#' will include 2 days prior to the date as holidays. Also optionally can have | #' will include 2 days prior to the date as holidays. Also optionally can have | |||
#' a column prior_scale specifying the prior scale for each holiday. | #' a column prior_scale specifying the prior scale for each holiday. | |||
#' @param seasonality.mode 'additive' (default) or 'multiplicative'. | #' @param seasonality.mode 'additive' (default) or 'multiplicative'. | |||
#' @param seasonality.prior.scale Parameter modulating the strength of the | #' @param seasonality.prior.scale Parameter modulating the strength of the | |||
#' seasonality model. Larger values allow the model to fit larger seasonal | #' seasonality model. Larger values allow the model to fit larger seasonal | |||
#' fluctuations, smaller values dampen the seasonality. Can be specified for | #' fluctuations, smaller values dampen the seasonality. Can be specified for | |||
#' individual seasonalities using add_seasonality. | #' individual seasonalities using add_seasonality. | |||
#' @param holidays.prior.scale Parameter modulating the strength of the holiday | #' @param holidays.prior.scale Parameter modulating the strength of the holiday | |||
#' components model, unless overridden in the holidays input. | #' components model, unless overridden in the holidays input. | |||
#' @param changepoint.prior.scale Parameter modulating the flexibility of the | #' @param changepoint.prior.scale Parameter modulating the flexibility of the | |||
#' automatic changepoint selection. Large values will allow many changepoints, | #' automatic changepoint selection. Large values will allow many changepoints, | |||
#' small values will allow few changepoints. | #' small values will allow few changepoints. | |||
#' @param mcmc.samples Integer, if greater than 0, will do full Bayesian | #' @param mcmc.samples Integer, if greater than 0, will do full Bayesian | |||
#' inference with the specified number of MCMC samples. If 0, will do MAP | #' inference with the specified number of MCMC samples. If 0, will do MAP | |||
#' estimation. | #' estimation. | |||
#' @param interval.width Numeric, width of the uncertainty intervals provided | #' @param interval.width Numeric, width of the uncertainty intervals provided | |||
#' for the forecast. If mcmc.samples=0, this will be only the uncertainty | #' for the forecast. If mcmc.samples=0, this will be only the uncertainty in | |||
#' in the trend using the MAP estimate of the extrapolated generative model. | #' the trend using the MAP estimate of the extrapolated generative model. If | |||
#' If mcmc.samples>0, this will be integrated over all model parameters, | #' mcmc.samples>0, this will be integrated over all model parameters, which | |||
#' which will include uncertainty in seasonality. | #' will include uncertainty in seasonality. | |||
#' @param uncertainty.samples Number of simulated draws used to estimate | #' @param uncertainty.samples Number of simulated draws used to estimate | |||
#' uncertainty intervals. Settings this value to 0 or False will disable | #' uncertainty intervals. Settings this value to 0 or False will disable | |||
#' uncertainty estimation and speed up the calculation. | #' uncertainty estimation and speed up the calculation. | |||
#' @param backend Whether to use the "rstan" or "cmdstanr" backend to fit the | ||||
#' model. If not provided, uses the R_STAN_BACKEND environment variable. | ||||
#' @param fit Boolean, if FALSE the model is initialized but not fit. | #' @param fit Boolean, if FALSE the model is initialized but not fit. | |||
#' @param ... Additional arguments, passed to \code{\link{fit.prophet}} | #' @param ... Additional arguments, passed to \code{\link{fit.prophet}} | |||
#' | #' | |||
#' @return A prophet model. | #' @return A prophet model. | |||
#' | #' | |||
#' @examples | #' @examples | |||
#' \dontrun{ | #' \dontrun{ | |||
#' history <- data.frame(ds = seq(as.Date('2015-01-01'), as.Date('2016-01-01'), by = 'd'), | #' history <- data.frame(ds = seq(as.Date('2015-01-01'), as.Date('2016-01-01'), by = 'd'), | |||
#' y = sin(1:366/200) + rnorm(366)/10) | #' y = sin(1:366/200) + rnorm(366)/10) | |||
#' m <- prophet(history) | #' m <- prophet(history) | |||
skipping to change at line 102 | skipping to change at line 104 | |||
daily.seasonality = 'auto', | daily.seasonality = 'auto', | |||
holidays = NULL, | holidays = NULL, | |||
seasonality.mode = 'additive', | seasonality.mode = 'additive', | |||
seasonality.prior.scale = 10, | seasonality.prior.scale = 10, | |||
holidays.prior.scale = 10, | holidays.prior.scale = 10, | |||
changepoint.prior.scale = 0.05, | changepoint.prior.scale = 0.05, | |||
mcmc.samples = 0, | mcmc.samples = 0, | |||
interval.width = 0.80, | interval.width = 0.80, | |||
uncertainty.samples = 1000, | uncertainty.samples = 1000, | |||
fit = TRUE, | fit = TRUE, | |||
backend = NULL, | ||||
... | ... | |||
) { | ) { | |||
if (!is.null(changepoints)) { | if (!is.null(changepoints)) { | |||
n.changepoints <- length(changepoints) | n.changepoints <- length(changepoints) | |||
} | } | |||
if (is.null(backend)) backend <- get_stan_backend() | ||||
m <- list( | m <- list( | |||
growth = growth, | growth = growth, | |||
changepoints = changepoints, | changepoints = changepoints, | |||
n.changepoints = n.changepoints, | n.changepoints = n.changepoints, | |||
changepoint.range = changepoint.range, | changepoint.range = changepoint.range, | |||
yearly.seasonality = yearly.seasonality, | yearly.seasonality = yearly.seasonality, | |||
weekly.seasonality = weekly.seasonality, | weekly.seasonality = weekly.seasonality, | |||
daily.seasonality = daily.seasonality, | daily.seasonality = daily.seasonality, | |||
holidays = holidays, | holidays = holidays, | |||
seasonality.mode = seasonality.mode, | seasonality.mode = seasonality.mode, | |||
seasonality.prior.scale = seasonality.prior.scale, | seasonality.prior.scale = seasonality.prior.scale, | |||
changepoint.prior.scale = changepoint.prior.scale, | changepoint.prior.scale = changepoint.prior.scale, | |||
holidays.prior.scale = holidays.prior.scale, | holidays.prior.scale = holidays.prior.scale, | |||
mcmc.samples = mcmc.samples, | mcmc.samples = mcmc.samples, | |||
interval.width = interval.width, | interval.width = interval.width, | |||
uncertainty.samples = uncertainty.samples, | uncertainty.samples = uncertainty.samples, | |||
backend = backend, | ||||
specified.changepoints = !is.null(changepoints), | specified.changepoints = !is.null(changepoints), | |||
start = NULL, # This and following attributes are set during fitting | start = NULL, # This and following attributes are set during fitting | |||
y.scale = NULL, | y.scale = NULL, | |||
logistic.floor = FALSE, | logistic.floor = FALSE, | |||
t.scale = NULL, | t.scale = NULL, | |||
changepoints.t = NULL, | changepoints.t = NULL, | |||
seasonalities = list(), | seasonalities = list(), | |||
extra_regressors = list(), | extra_regressors = list(), | |||
country_holidays = NULL, | country_holidays = NULL, | |||
stan.fit = NULL, | stan.fit = NULL, | |||
skipping to change at line 297 | skipping to change at line 303 | |||
#' | #' | |||
#' @return numeric time difference | #' @return numeric time difference | |||
#' | #' | |||
#' @keywords internal | #' @keywords internal | |||
time_diff <- function(ds1, ds2, units = "days") { | time_diff <- function(ds1, ds2, units = "days") { | |||
return(as.numeric(difftime(ds1, ds2, units = units))) | return(as.numeric(difftime(ds1, ds2, units = units))) | |||
} | } | |||
#' Prepare dataframe for fitting or predicting. | #' Prepare dataframe for fitting or predicting. | |||
#' | #' | |||
#' Adds a time index and scales y. Creates auxillary columns 't', 't_ix', | #' Adds a time index and scales y. Creates auxiliary columns 't', 't_ix', | |||
#' 'y_scaled', and 'cap_scaled'. These columns are used during both fitting | #' 'y_scaled', and 'cap_scaled'. These columns are used during both fitting | |||
#' and predicting. | #' and predicting. | |||
#' | #' | |||
#' @param m Prophet object. | #' @param m Prophet object. | |||
#' @param df Data frame with columns ds, y, and cap if logistic growth. Any | #' @param df Data frame with columns ds, y, and cap if logistic growth. Any | |||
#' specified additional regressors must also be present. | #' specified additional regressors must also be present. | |||
#' @param initialize_scales Boolean set scaling factors in m from df. | #' @param initialize_scales Boolean set scaling factors in m from df. | |||
#' | #' | |||
#' @return list with items 'df' and 'm'. | #' @return list with items 'df' and 'm'. | |||
#' | #' | |||
skipping to change at line 813 | skipping to change at line 819 | |||
#' Dataframe with seasonality features. | #' Dataframe with seasonality features. | |||
#' Includes seasonality features, holiday features, and added regressors. | #' Includes seasonality features, holiday features, and added regressors. | |||
#' | #' | |||
#' @param m Prophet object. | #' @param m Prophet object. | |||
#' @param df Dataframe with dates for computing seasonality features and any | #' @param df Dataframe with dates for computing seasonality features and any | |||
#' added regressors. | #' added regressors. | |||
#' | #' | |||
#' @return List with items | #' @return List with items | |||
#' seasonal.features: Dataframe with regressor features, | #' seasonal.features: Dataframe with regressor features, | |||
#' prior.scales: Array of prior scales for each colum of the features | #' prior.scales: Array of prior scales for each column of the features | |||
#' dataframe. | #' dataframe. | |||
#' component.cols: Dataframe with indicators for which regression components | #' component.cols: Dataframe with indicators for which regression components | |||
#' correspond to which columns. | #' correspond to which columns. | |||
#' modes: List with keys 'additive' and 'multiplicative' with arrays of | #' modes: List with keys 'additive' and 'multiplicative' with arrays of | |||
#' component names for each mode of seasonality. | #' component names for each mode of seasonality. | |||
#' | #' | |||
#' @keywords internal | #' @keywords internal | |||
make_all_seasonality_features <- function(m, df) { | make_all_seasonality_features <- function(m, df) { | |||
seasonal.features <- data.frame(row.names = 1:nrow(df)) | seasonal.features <- data.frame(row.names = 1:nrow(df)) | |||
prior.scales <- c() | prior.scales <- c() | |||
skipping to change at line 1230 | skipping to change at line 1236 | |||
dat$cap <- rep(0, nrow(history)) # Unused inside Stan | dat$cap <- rep(0, nrow(history)) # Unused inside Stan | |||
kinit <- linear_growth_init(history) | kinit <- linear_growth_init(history) | |||
} else if (m$growth == 'flat') { | } else if (m$growth == 'flat') { | |||
dat$cap <- rep(0, nrow(history)) # Unused inside Stan | dat$cap <- rep(0, nrow(history)) # Unused inside Stan | |||
kinit <- flat_growth_init(history) | kinit <- flat_growth_init(history) | |||
} else if (m$growth == 'logistic') { | } else if (m$growth == 'logistic') { | |||
dat$cap <- history$cap_scaled # Add capacities to the Stan data | dat$cap <- history$cap_scaled # Add capacities to the Stan data | |||
kinit <- logistic_growth_init(history) | kinit <- logistic_growth_init(history) | |||
} | } | |||
if (exists(".prophet.stan.model", where = prophet_model_env)) { | model <- .load_model(m$backend) | |||
model <- get('.prophet.stan.model', envir = prophet_model_env) | ||||
} else { | ||||
model <- stanmodels$prophet | ||||
} | ||||
stan_init <- function() { | stan_init <- function() { | |||
list(k = kinit[1], | list(k = kinit[1], | |||
m = kinit[2], | m = kinit[2], | |||
delta = array(rep(0, length(m$changepoints.t))), | delta = array(rep(0, length(m$changepoints.t))), | |||
beta = array(rep(0, ncol(seasonal.features))), | beta = array(rep(0, ncol(seasonal.features))), | |||
sigma_obs = 1 | sigma_obs = 1 | |||
) | ) | |||
} | } | |||
if (min(history$y) == max(history$y) & | if (min(history$y) == max(history$y) & | |||
(m$growth %in% c('linear', 'flat'))) { | (m$growth %in% c('linear', 'flat'))) { | |||
# Nothing to fit. | # Nothing to fit. | |||
m$params <- stan_init() | m$params <- stan_init() | |||
m$params$sigma_obs <- 0. | m$params$sigma_obs <- 0. | |||
n.iteration <- 1. | n.iteration <- 1. | |||
} else if (m$mcmc.samples > 0) { | ||||
args <- list( | ||||
object = model, | ||||
data = dat, | ||||
init = stan_init, | ||||
iter = m$mcmc.samples | ||||
) | ||||
args <- utils::modifyList(args, list(...)) | ||||
m$stan.fit <- do.call(rstan::sampling, args) | ||||
m$params <- rstan::extract(m$stan.fit) | ||||
n.iteration <- length(m$params$k) | ||||
} else { | } else { | |||
args <- list( | if (m$mcmc.samples > 0) { | |||
object = model, | args <- .stan_args(model, dat, stan_init, m$backend, type = "mcmc", m$mcmc | |||
data = dat, | .samples, ...) | |||
init = stan_init, | model_output <- .sampling(args, m$backend) | |||
algorithm = if(dat$T < 100) {'Newton'} else {'LBFGS'}, | } else { | |||
iter = 1e4, | args <- .stan_args(model, dat, stan_init, m$backend, type = "optimize", .. | |||
as_vector = FALSE | .) | |||
) | model_output <- .fit(args, m$backend) | |||
args <- utils::modifyList(args, list(...)) | ||||
m$stan.fit <- do.call(rstan::optimizing, args) | ||||
if (m$stan.fit$return_code != 0) { | ||||
message( | ||||
'Optimization terminated abnormally. Falling back to Newton optimizer.' | ||||
) | ||||
args$algorithm = 'Newton' | ||||
m$stan.fit <- do.call(rstan::optimizing, args) | ||||
} | } | |||
m$params <- m$stan.fit$par | m$stan.fit <- model_output$stan_fit | |||
n.iteration <- 1 | m$params <- model_output$params | |||
n.iteration <- model_output$n_iteration | ||||
} | } | |||
# Cast the parameters to have consistent form, whether full bayes or MAP | # Cast the parameters to have consistent form, whether full bayes or MAP | |||
for (name in c('delta', 'beta')){ | for (name in c('delta', 'beta')){ | |||
m$params[[name]] <- matrix(m$params[[name]], nrow = n.iteration) | m$params[[name]] <- matrix(m$params[[name]], nrow = n.iteration) | |||
} | } | |||
# rstan::sampling returns 1d arrays; converts to atomic vectors. | # rstan::sampling returns 1d arrays; converts to atomic vectors. | |||
for (name in c('k', 'm', 'sigma_obs')){ | for (name in c('k', 'm', 'sigma_obs')){ | |||
m$params[[name]] <- c(m$params[[name]]) | m$params[[name]] <- c(m$params[[name]]) | |||
} | } | |||
# If no changepoints were requested, replace delta with 0s | # If no changepoints were requested, replace delta with 0s | |||
if (m$n.changepoints == 0) { | if (m$n.changepoints == 0) { | |||
skipping to change at line 1670 | skipping to change at line 1651 | |||
#' @param freq 'day', 'week', 'month', 'quarter', 'year', 1(1 sec), 60(1 minute) or 3600(1 hour). | #' @param freq 'day', 'week', 'month', 'quarter', 'year', 1(1 sec), 60(1 minute) or 3600(1 hour). | |||
#' @param include_history Boolean to include the historical dates in the data | #' @param include_history Boolean to include the historical dates in the data | |||
#' frame for predictions. | #' frame for predictions. | |||
#' | #' | |||
#' @return Dataframe that extends forward from the end of m$history for the | #' @return Dataframe that extends forward from the end of m$history for the | |||
#' requested number of periods. | #' requested number of periods. | |||
#' | #' | |||
#' @export | #' @export | |||
make_future_dataframe <- function(m, periods, freq = 'day', | make_future_dataframe <- function(m, periods, freq = 'day', | |||
include_history = TRUE) { | include_history = TRUE) { | |||
# For backwards compatability with previous zoo date type, | # For backwards compatibility with previous zoo date type, | |||
if (freq == 'm') { | if (freq == 'm') { | |||
freq <- 'month' | freq <- 'month' | |||
} | } | |||
if (is.null(m$history.dates)) { | if (is.null(m$history.dates)) { | |||
stop('Model must be fit before this can be used.') | stop('Model must be fit before this can be used.') | |||
} | } | |||
dates <- seq(max(m$history.dates), length.out = periods + 1, by = freq) | dates <- seq(max(m$history.dates), length.out = periods + 1, by = freq) | |||
dates <- dates[2:(periods + 1)] # Drop the first, which is max(history$ds) | dates <- dates[2:(periods + 1)] # Drop the first, which is max(history$ds) | |||
if (include_history) { | if (include_history) { | |||
dates <- c(m$history.dates, dates) | dates <- c(m$history.dates, dates) | |||
End of changes. 22 change blocks. | ||||
76 lines changed or deleted | 58 lines changed or added |