diff --git a/DESCRIPTION b/DESCRIPTION index 40872949..f6489b5a 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Package: CLVTools Title: Tools for Customer Lifetime Value Estimation -Version: 0.12.0 -Date: 2025-09-22 +Version: 0.12.1 +Date: 2025-11-06 Authors@R: c( person(given="Patrick", family="Bachmann", email = "pbachma@ethz.ch", role = c("cre","aut")), person(given="Niels", family="Kuebler", email = "niels.kuebler@uzh.ch", role = "aut"), diff --git a/NEWS.md b/NEWS.md index 51480455..01ddeb1f 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,10 @@ +# CLVTools 0.12.1 + +### NEW FEATURES +* `newcustomer()` prediction: Include the initial transaction in the predicted number of orders + + + # CLVTools 0.12.0 ### NEW FEATURES diff --git a/R/f_generics_clvfittedtransactions.R b/R/f_generics_clvfittedtransactions.R index c4eaf868..8a455e12 100644 --- a/R/f_generics_clvfittedtransactions.R +++ b/R/f_generics_clvfittedtransactions.R @@ -335,7 +335,8 @@ setMethod("clv.controlflow.predict.new.customer", signature = signature(clv.fitt check_err_msg("Parameter newdata has to be output from calling `newcustomer()`!") } - return(drop(clv.model.predict.new.customer( + # 1+: Include initial order + return(1 + drop(clv.model.predict.new.customer( clv.model = clv.fitted@clv.model, clv.fitted = clv.fitted, clv.newcustomer=clv.newcustomer))) diff --git a/R/f_generics_clvfittedtransactionsdyncov.R b/R/f_generics_clvfittedtransactionsdyncov.R index 9318c968..6f70e5b9 100644 --- a/R/f_generics_clvfittedtransactionsdyncov.R +++ b/R/f_generics_clvfittedtransactionsdyncov.R @@ -107,7 +107,8 @@ setMethod(f = "clv.controlflow.predict.new.customer", signature = signature(clv. tp.prediction.end=tp.prediction.end)) - return(clv.model.predict.new.customer( + # 1+: Include initial order + return(1 + clv.model.predict.new.customer( clv.model = clv.fitted@clv.model, clv.fitted = clv.fitted, clv.newcustomer=clv.newcustomer)) diff --git a/R/f_generics_clvfittedtransactionsstaticcov.R b/R/f_generics_clvfittedtransactionsstaticcov.R index b21c84d2..1122274c 100644 --- a/R/f_generics_clvfittedtransactionsstaticcov.R +++ b/R/f_generics_clvfittedtransactionsstaticcov.R @@ -91,7 +91,8 @@ setMethod(f = "clv.controlflow.predict.new.customer", signature = signature(clv. check_err_msg(check_user_data_predict_newcustomer_staticcov(clv.fitted=clv.fitted, clv.newcustomer=clv.newcustomer)) - return(drop(clv.model.predict.new.customer( + # 1+: Include initial order + return(1 + drop(clv.model.predict.new.customer( clv.model = clv.fitted@clv.model, clv.fitted = clv.fitted, clv.newcustomer=clv.newcustomer))) diff --git a/R/f_interface_newcustomer.R b/R/f_interface_newcustomer.R index f9d05f75..91c6c34d 100644 --- a/R/f_interface_newcustomer.R +++ b/R/f_interface_newcustomer.R @@ -4,11 +4,10 @@ #' @description #' The methods documented here are to be used together with #' \link[CLVTools:predict.clv.fitted.transactions]{predict (transactions)} to obtain -#' the expected number of transactions of an average newly alive customer and +#' the expected number of transactions of an average, yet-to-be acquired customer and #' with \link[CLVTools:predict.clv.fitted.spending]{predict (spending)} to obtain -#' the expected spending of an average newly alive customer. -#' This prediction is only sensible for (fictional) customers without order history: -#' Customers which just came alive and have not had the chance to reveal any more of their behavior. +#' the expected spending of an average yet-to-be acquired customer. +#' See the \code{Method} subsection in Details for more explanations. #' #' The methods described here produce the data required as input to #' \code{predict(newdata=)} to make this new customer prediction. @@ -16,16 +15,20 @@ #' See details for the required format. #' #' \code{newcustomer()}, \code{newcustomer.static()}, \code{newcustomer.dynamic()}: -#' To predict the number of transactions a single, fictional, average new customer is expected to make in -#' the \code{num.periods} periods since making the first transaction ("coming alive"). +#' To predict the number of transactions a single, fictional, average, yet-to-be acquired +#' customer is expected to make in the first \code{num.periods} periods. #' -#' \code{newcustomer.spending()}: To estimate how much a single, fictional, average -#' new customer is expected to spend on average per transaction. +#' \code{newcustomer.spending()}: To estimate how much a single, fictional, average, +#' yet-to-be acquired customer is expected to spend on average per transaction. +#' Note that the spending model should be fit with \code{remove.first.transaction=FALSE} +#' because the spending predictions are also used for the first orders. #' -#' @param num.periods A positive, numeric scalar indicating the number of periods to predict. +#' +#' +#' @param num.periods A positive, numeric scalar indicating the number of periods to predict from the initial transaction. #' @param data.cov.life Numeric-only covariate data for the lifetime process for a single customer, \code{data.table} or \code{data.frame}. See details. #' @param data.cov.trans Numeric-only covariate data for the transaction process for a single customer, \code{data.table} or \code{data.frame}. See details. -#' @param first.transaction For dynamic covariate models only: The time point of the first transaction of the customer ("coming alive") for which a prediction is made. +#' @param first.transaction For dynamic covariate models only: The time point of the first transaction of the customer ("coming alive"). #' Has to be within the time range of the covariate data. #' #' @seealso \link[CLVTools:predict.clv.fitted.transactions]{predict (transactions)} to use the output of the methods described here. @@ -52,14 +55,23 @@ #' additionally required because the exact covariates that are active during the prediction period have #' to be known. #' +#' +#' \subsection{Method}{ +#' These predictions are for average, prospective customers: Yet-to-be acquired +#' customers which still have to place their first order. +#' Therefore, the predicted number of expected orders also includes the initial purchase (1+). +#' The subsequent orders in the first \code{t} periods are then predicted using the unconditional expectation. +#' In case of the Pareto/NBD this is +#' +#' \deqn{1 + E[X(t)]= 1 + \frac{r \beta}{\alpha (s-1)} \left[ 1- \left (\frac{\beta}{\beta+t} \right)^{s-1} \right].} +#' } +#' #' @returns #' \item{newcustomer()}{An object of class \code{clv.newcustomer.no.cov}} #' \item{newcustomer.static()}{An object of class \code{clv.newcustomer.static.cov}} #' \item{newcustomer.dynamic()}{An object of class \code{clv.newcustomer.dynamic.cov}} #' \item{newcustomer.spending()}{An object of class \code{clv.newcustomer.spending}} #' -#' -#' #' @examples #' \donttest{ #' data("apparelTrans") @@ -96,7 +108,9 @@ #' #' #' # Spending model -#' gg.apparel <- gg(clv.data.apparel) +#' # Note: remove.first.transaction=FALSE as the predicted spending will be multiplied +#' # with the total number of orders that also includes the initial purchase +#' gg.apparel <- gg(clv.data.apparel, remove.first.transaction=FALSE) #' predict(gg.apparel, newdata = newcustomer.spending()) #' #' diff --git a/README.md b/README.md index ddfbaa63..220d7797 100644 --- a/README.md +++ b/README.md @@ -20,6 +20,12 @@ CLVs in continuous non-contractual business settings such as retailers, probabilistic customer attrition models are the preferred choice in literature and practice. +Below, we provide broad overview on the functionalites of CLVTools and a quickstart tutorial. More detailed information is provided in the following documents: +- For more information on the terminology and general modeling challenges when assessing customers' future value look at the vignette ["Probabilistic Models for Analyzing Customer Purchase Behavior: A Primer"](https://cran.r-project.org/web/packages/CLVTools/vignettes/CLVTools_intuitive_explanations.pdf). +- For a comprehensive case study with CLVTools look at the vignette: ["Walkthrough for the CLVTools Package"](https://cran.r-project.org/web/packages/CLVTools/vignettes/CLVTools.pdf). +- For advanced modeling techniques look a the vignette ["Advanced and Very Advanced Modeling Techniques in CLVTools"](https://cran.r-project.org/web/packages/CLVTools/vignettes/CLVTools_advanced_techniques.pdf). +- To understand the internal object-oriented architecture of CLVToools look at the vignette ["Classes in CLVTools"](https://cran.r-project.org/web/packages/CLVTools/vignettes/CLVTools_classes.pdf). + The R package `CLVTools` provides an efficient and easy to use implementation framework for probabilistic customer attrition models in non-contractual settings. Building up on the learnings of other diff --git a/cran-comments.md b/cran-comments.md index a3ca625c..473f9437 100644 --- a/cran-comments.md +++ b/cran-comments.md @@ -1,24 +1,16 @@ # Comment from the authors -This is version 0.12.0 of the CLVTools package. +This is version 0.12.1 of the CLVTools package. The most relevant changes in this version are: -* Add 3 new vignettes covering: Advanced modelling techniques, model intuition, and the internal class system -* Add method `hessian()` to calculate hessian matrix for already fitted models -* Correct significance indicators NA in `summary()` -* Add new parameter to data preparation method -* Renaming prediction output columns -* Fix CRAN notes - +* `newcustomer()` prediction: Include the initial transaction in the predicted number of orders # Test environments - -## Testthat -Tests provide coverage of roughly 91.5 percent (covr) and ensure that all functionalities work correctly for all models in all settings +Tests provide coverage of roughly 91.5 percent (covr): * Winbuilder devel, release, and old-release * macbuilder release -## R CMD check results +# R CMD check results 0 errors | 0 warnings | 1 note * sub-directories of 1Mb or more (varying from 4Mb to 16.6Mb, we are linking against RcppArmadillo and the GNU GSL) diff --git a/man/newcustomer.Rd b/man/newcustomer.Rd index a156ba82..8b4ab4ee 100644 --- a/man/newcustomer.Rd +++ b/man/newcustomer.Rd @@ -21,13 +21,13 @@ newcustomer.dynamic( newcustomer.spending() } \arguments{ -\item{num.periods}{A positive, numeric scalar indicating the number of periods to predict.} +\item{num.periods}{A positive, numeric scalar indicating the number of periods to predict from the initial transaction.} \item{data.cov.life}{Numeric-only covariate data for the lifetime process for a single customer, \code{data.table} or \code{data.frame}. See details.} \item{data.cov.trans}{Numeric-only covariate data for the transaction process for a single customer, \code{data.table} or \code{data.frame}. See details.} -\item{first.transaction}{For dynamic covariate models only: The time point of the first transaction of the customer ("coming alive") for which a prediction is made. +\item{first.transaction}{For dynamic covariate models only: The time point of the first transaction of the customer ("coming alive"). Has to be within the time range of the covariate data.} } \value{ @@ -39,11 +39,10 @@ Has to be within the time range of the covariate data.} \description{ The methods documented here are to be used together with \link[CLVTools:predict.clv.fitted.transactions]{predict (transactions)} to obtain -the expected number of transactions of an average newly alive customer and +the expected number of transactions of an average, yet-to-be acquired customer and with \link[CLVTools:predict.clv.fitted.spending]{predict (spending)} to obtain -the expected spending of an average newly alive customer. -This prediction is only sensible for (fictional) customers without order history: -Customers which just came alive and have not had the chance to reveal any more of their behavior. +the expected spending of an average yet-to-be acquired customer. +See the \code{Method} subsection in Details for more explanations. The methods described here produce the data required as input to \code{predict(newdata=)} to make this new customer prediction. @@ -51,11 +50,13 @@ This is mostly covariate data for static and dynamic covariate models. See details for the required format. \code{newcustomer()}, \code{newcustomer.static()}, \code{newcustomer.dynamic()}: -To predict the number of transactions a single, fictional, average new customer is expected to make in -the \code{num.periods} periods since making the first transaction ("coming alive"). +To predict the number of transactions a single, fictional, average, yet-to-be acquired +customer is expected to make in the first \code{num.periods} periods. -\code{newcustomer.spending()}: To estimate how much a single, fictional, average -new customer is expected to spend on average per transaction. +\code{newcustomer.spending()}: To estimate how much a single, fictional, average, +yet-to-be acquired customer is expected to spend on average per transaction. +Note that the spending model should be fit with \code{remove.first.transaction=FALSE} +because the spending predictions are also used for the first orders. } \details{ The covariate data has to contain one column for every covariate parameter in the fitted model. Only numeric values are allowed, no factors or characters. @@ -77,6 +78,17 @@ See examples. For models with dynamic covariates, the time point of the first purchase (\code{first.transaction}) is additionally required because the exact covariates that are active during the prediction period have to be known. + + +\subsection{Method}{ +These predictions are for average, prospective customers: Yet-to-be acquired +customers which still have to place their first order. +Therefore, the predicted number of expected orders also includes the initial purchase (1+). +The subsequent orders in the first \code{t} periods are then predicted using the unconditional expectation. +In case of the Pareto/NBD this is + +\deqn{1 + E[X(t)]= 1 + \frac{r \beta}{\alpha (s-1)} \left[ 1- \left (\frac{\beta}{\beta+t} \right)^{s-1} \right].} +} } \examples{ \donttest{ @@ -114,7 +126,9 @@ predict( # Spending model -gg.apparel <- gg(clv.data.apparel) +# Note: remove.first.transaction=FALSE as the predicted spending will be multiplied +# with the total number of orders that also includes the initial purchase +gg.apparel <- gg(clv.data.apparel, remove.first.transaction=FALSE) predict(gg.apparel, newdata = newcustomer.spending()) diff --git a/tests/testthat/helper_s3_fitted_plot.R b/tests/testthat/helper_s3_fitted_plot.R index 936a004b..34a5ff7d 100644 --- a/tests/testthat/helper_s3_fitted_plot.R +++ b/tests/testthat/helper_s3_fitted_plot.R @@ -313,6 +313,6 @@ fct.testthat.runability.clvfittedspending.plot <- function(fitted.spending){ skip_on_cran() expect_silent(res.plot.10 <- plot(fitted.spending, n = 10, verbose=FALSE)) expect_silent(res.plot.20 <- plot(fitted.spending, n = 20, verbose=FALSE)) - expect_false(isTRUE(all.equal(res.plot.10, res.plot.20))) + expect_false(isTRUE(all.equal(res.plot.10@layers, res.plot.20@layers))) }) } diff --git a/tests/testthat/helper_testthat_correctness_transactions.R b/tests/testthat/helper_testthat_correctness_transactions.R index 5138b5d6..83ed2673 100644 --- a/tests/testthat/helper_testthat_correctness_transactions.R +++ b/tests/testthat/helper_testthat_correctness_transactions.R @@ -218,9 +218,9 @@ fct.testthat.correctness.clvfittedtransactions.staticcov.regularization.lambda.0 } fct.testthat.correctness.clvfittedtransactions.nocov.predict.newcustomer.0.for.num.periods.eq.0 <- function(clv.fitted){ - test_that("nocov: predict newcustomer 0 for t=0", { + test_that("nocov: predict newcustomer==1 for t=0", { expect_silent(pred <- predict(clv.fitted, newdata=newcustomer(num.periods = 0))) - expect_true(pred == 0) + expect_true(pred == 1) }) } @@ -284,7 +284,7 @@ fct.testthat.correctness.clvfittedtransactions.staticcov.predict.newcustomer.dif } fct.testthat.correctness.clvfittedtransactions.staticcov.predict.newcustomer.0.for.num.periods.eq.0 <- function(m.fitted.static){ - test_that("staticcov: predict(newcustomer) 0 for t=0", { + test_that("staticcov: predict(newcustomer)==1 for t=0", { df.cov <- fct.helper.default.newcustomer.covdata.static() expect_silent(pred <- predict( m.fitted.static, @@ -292,7 +292,7 @@ fct.testthat.correctness.clvfittedtransactions.staticcov.predict.newcustomer.0.f num.periods = 0, data.cov.life = df.cov, data.cov.trans = df.cov))) - expect_true(pred == 0) + expect_true(pred == 1) }) } diff --git a/tests/testthat/test_correctness_pnbd_dyncov.R b/tests/testthat/test_correctness_pnbd_dyncov.R index e6c494c6..0e50997f 100644 --- a/tests/testthat/test_correctness_pnbd_dyncov.R +++ b/tests/testthat/test_correctness_pnbd_dyncov.R @@ -273,14 +273,14 @@ fct.testthat.correctness.dyncov.predict.newcustomer <- function(){ p.dyn <- fct.helper.dyncov.quickfit.apparel.data() df.cov <- fct.helper.default.newcustomer.covdata.dyncov() - test_that("dyncov: predict newcustomer 0 for t=0", { + test_that("dyncov: predict newcustomer==1 for t=0", { expect_silent(pred <- predict(p.dyn, newdata=newcustomer.dynamic( num.periods = 0, data.cov.life = df.cov, data.cov.trans = df.cov, first.transaction = "2000-01-04" ))) - expect_equal(pred, 0) + expect_equal(pred, 1) }) test_that("dyncov predict newcustomer different results for different covs", { diff --git a/vignettes/CLVTools.Rmd b/vignettes/CLVTools.Rmd index 086691d1..47c45191 100644 --- a/vignettes/CLVTools.Rmd +++ b/vignettes/CLVTools.Rmd @@ -11,13 +11,28 @@ output: latex_engine: xelatex toc: true number_sections: yes +papersize: A4 bibliography: bibliography.bib vignette: > %\VignetteIndexEntry{The CLVTools Package} %\VignetteEncoding{UTF-8} %\VignetteEngine{knitr::rmarkdown} +abstract: | + This vignette is a hands-on guide to the R package `CLVTools` for modeling and forecasting + customer base dynamics. It shows how to construct `clv.data` objects, estimate latent + attrition models (Pareto/NBD, BG/NBD, GGom/NBD), and generate individual-level forecasts: + conditional expected transactions (CET), probability of being alive (PAlive), and discounted + expected residual (or finite-horizon) transactions (DERT/DECT). We demonstrate the use of + time-invariant and time-varying covariates, optional purchase–attrition correlation via a + Sarmanov specification, and regularization and equality constraints for covariate effects. + The vignette also covers the Gamma/Gamma spending model for predicting mean spend and + computing CLV, and provides reproducible code for summaries, diagnostics, and plots. + Guidance on data preparation, estimation/holdout splitting, optimizer settings, and result + interpretation is included throughout. --- +\newpage + ```{r setup, include = FALSE} knitr::opts_chunk$set(