2 methods to automatically find the ideal number of bins for your histograms
library(ggplot2)
library(patchwork) # used to compose the plots for the comparisons
library(hrbrthemes) # used for the theme of plots
theme_set(theme_ipsum()) # set the plot theme to theme_ipsum by default
data("mtcars")
the bin-width is given by \(h=2×IQR×n^{-1/3}\) where IQR is the interquartile range. the number of bins is then: \((max - min) /(2×IQR×n^{-1/3})\)
#' bins_fd
#' @description returns the number of bins according to the Freedman-Diaconis rule
#' @param vec numeric vector
#' @return number of bins
bins_fd <- function(vec) {
diff(range(vec)) / (2 * IQR(vec) / length(vec)^(1/3))
}
the number of bins corresponds to the square root of the number of observations
#' bins_sqrt
#' @description returns the square root of the length of a vector
#' @param vec numeric vector
#' @return number of bins
bins_sqrt <- function(vec) {
sqrt(length(vec))
}
ggplot(data = mtcars, aes(x = mpg)) +
geom_histogram(bins = bins_fd(mtcars$mpg))
#' histogram_density
#' @description plot the histogram of a variable with density plot
#' @params data : a data.frame
#' @params var : the variable to plot
#' @params bins_fn : function to compute the number of bins (if `NULL` defaults to 30)
#' @params title : title of the plot
#' @return ggplot object
histogram_density <- function(data, var, bins_fn = NULL, title = "") {
if (is.null(bins_fn)) {
bins <- 30
} else {
bins <- bins_fn(data[[var]])
}
ggplot(data = data, aes_string(x = var)) + geom_density(fill = "red", alpha = 0.2) +
geom_histogram(aes(y = ..density..),
bins = bins,
alpha = 0.5
) +
ggtitle(title)
}
var <- "mpg"
a <- histogram_density(mtcars, var, bins_fn = NULL, title = "Default")
b <- histogram_density(mtcars, var, bins_fn = bins_sqrt, title = "Square root")
c <- histogram_density(mtcars, var, bins_fn = bins_fd, title = "Freedman-Diaconis")
a + b + c + patchwork::plot_layout(ncol = 3)
var <- "wt"
a <- histogram_density(mtcars, var, bins_fn = NULL, title = "Default")
b <- histogram_density(mtcars, var, bins_fn = bins_sqrt, title = "Square root")
c <- histogram_density(mtcars, var, bins_fn = bins_fd, title = "Freedman-Diaconis")
a + b + c + patchwork::plot_layout(ncol = 3)
“Wikipedia - Freedman Diaconis Rule.” 2018. https://en.wikipedia.org/w/index.php?title=Freedman%E2%80%93Diaconis_rule&oldid=844512210.