Plot scatter plot with 2D density

2d density plot is helpful for examining the connection between 2 numerical variables. It divides the plot area into several little fragments to prevent overlapping (as in the scatterplot next to it) and shows the number of points in each fragment.
density
Author
Published

Saturday, May 11, 2024

Modified

Tuesday, May 14, 2024

library(ggpubr)
## Loading required package: ggplot2
library(MASS)
library(viridis)
## Loading required package: viridisLite
library(patchwork)
## 
## Attaching package: 'patchwork'
## The following object is masked from 'package:MASS':
## 
##     area
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ lubridate 1.9.3     ✔ tibble    3.2.1
## ✔ purrr     1.0.2     ✔ tidyr     1.3.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ✖ dplyr::select() masks MASS::select()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
### Add 2d density estimation
plot_data <- iris |> mutate(Species = factor(Species)) |> as_tibble()

sp <- ggplot(plot_data, aes(x = Sepal.Length, y = Sepal.Width))+
    geom_point()+
    theme_bw()

### Show the countour only
sp + geom_density_2d()


### Show the area only, with gradient color
sp + stat_density_2d(aes(fill = ..level..), geom = "polygon")
## Warning: The dot-dot notation (`..level..`) was deprecated in ggplot2 3.4.0.
## ℹ Please use `after_stat(level)` instead.


### Change gradient color: custom
sp + stat_density_2d(aes(fill = ..level..), geom = "polygon") +
    gradient_fill(c("white", "steelblue"))


### Change gradient color: custom
sp + stat_density_2d(
    aes(
        fill = after_stat(nlevel)), geom = "polygon", 
        n = 200, bins = 10, contour = TRUE
) +
    scale_fill_viridis_c(option = "A")

    # gradient_fill(c("white", "steelblue"))

### Category data
sp + stat_density_2d(aes(alpha = ..level.., fill = Species), geom = "polygon") +
    scale_fill_manual(values = c("red", "steelblue", "green"))


sp + stat_density_2d(
    aes(alpha = ..level.., fill = Species), geom = "polygon",
     n = 200, bins = 10, contour = TRUE
) +
    scale_fill_manual(values = c("red", "steelblue", "green")) +
    facet_wrap(~Species)


sp + stat_density_2d(
    aes(alpha = after_stat(nlevel), fill = Species), geom = "polygon",
     n = 200, bins = 10, contour = TRUE
) +
    scale_fill_manual(values = c("red", "steelblue", "green")) +
    facet_wrap(~Species)


### By adding to stat_density_2d the argument bins to avoid overplotting, 
### control and draw the attention to a number of density areas in a very economical fashion.
sp + 
stat_density_2d(
    aes(alpha = ..level.., fill = Species), geom = "polygon", bins = 4
) +
    scale_fill_manual(values = c("red", "steelblue", "green"))


### Assigning manually the colours, NA for those levels we do not want to plot. Main disadvantage, we should know the number of levels and colours needed in advance (or compute them)
sp +
stat_density_2d(geom = "polygon", aes(fill = as.factor(..level..))) +
  scale_fill_manual(
    values = c(NA, NA, NA, NA, NA,"#BDD7E7", "#6BAED6", "#3182BD", "#08519C")
)


###
sp + geom_density_2d_filled() +
  scale_fill_brewer()
## Warning in RColorBrewer::brewer.pal(n, pal): n too large, allowed maximum for palette Blues is 9
## Returning the palette you asked for with that many colors


### Change the gradient color: RColorBrewer palette
sp + stat_density_2d(aes(fill = ..level..), geom = "polygon") +
    gradient_fill("YlOrRd")


### Area + contour
sp + stat_density_2d(aes(fill = ..level..), geom = "polygon", colour = "white") +
    gradient_fill("YlOrRd")


### Using raster
sp +
    stat_density_2d(aes(fill = ..density..), geom = "raster", contour = FALSE) +
    scale_x_continuous(expand = c(0, 0)) +
    scale_y_continuous(expand = c(0, 0)) +
    theme(legend.position = "none")


### Call the palette with a number
sp +
    stat_density_2d(aes(fill = ..density..), geom = "raster", contour = FALSE) +
    scale_fill_distiller(palette = 4, direction = -1) +
    scale_x_continuous(expand = c(0, 0)) +
    scale_y_continuous(expand = c(0, 0)) +
    theme(legend.position = "none")



### The direction argument allows to reverse the palette
sp +
    stat_density_2d(aes(fill = ..density..), geom = "raster", contour = FALSE) +
    scale_fill_distiller(palette = 4, direction = 1) +
    scale_x_continuous(expand = c(0, 0)) +
    scale_y_continuous(expand = c(0, 0)) +
    theme(legend.position = "none")



### Call the palette using a name.
sp +
    stat_density_2d(aes(fill = ..density..), geom = "raster", contour = FALSE) +
    scale_fill_distiller(palette = "Spectral", direction = 1) +
    scale_x_continuous(expand = c(0, 0)) +
    scale_y_continuous(expand = c(0, 0)) +
    theme(legend.position = "none")

plot_data <-
  data.frame(X = c(rnorm(300, 3, 2.5), rnorm(150, 7, 2)),
             Y = c(rnorm(300, 6, 2.5), rnorm(150, 2, 2)),
             Label = c(rep('A', 300), rep('B', 150)))


library(ggplot2)
library(MASS)
library(tidyr)
#Calculate the range
xlim <- range(plot_data$X)
ylim <-range(plot_data$Y)


#Genrate the kernel density for each group
newplot_data <- plot_data %>% group_by(Label) %>% do(Dens=kde2d(.$X, .$Y, n=100, lims=c(xlim,ylim)))

#Transform the density in  data.frame
newplot_data  %<>%  do(Label=.$Label, V=expand.grid(.$Dens$x,.$Dens$y), Value=c(.$Dens$z)) %>% do(data.frame(Label=.$Label,x=.$V$Var1, y=.$V$Var2, Value=.$Value))

#Untidy data and chose the value for each point.
#In this case chose the value of the label with highest value  
   newplot_data  %<>%   spread( Label,value=Value) %>%
        mutate(Level = if_else(A>B, A, B), Label = if_else(A>B,"A", "B"))
# Contour plot
ggplot(newplot_data, aes(x,y, z=Level, fill=Label, alpha=..level..))  + stat_contour(geom="polygon")

### Define the functio using kde2d
# Get density of points in 2 dimensions.
# @param x A numeric vector.
# @param y A numeric vector.
# @param n Create a square n by n grid to compute density.
# @return The density within each square.
get_density <- function(x, y, ...) {
  dens <- MASS::kde2d(x, y, ...)
  ix <- findInterval(x, dens$x)
  iy <- findInterval(y, dens$y)
  ii <- cbind(ix, iy)
  return(dens$z[ii])
}

### Example data
set.seed(1)
dat <- data.frame(
  x = c(
    rnorm(1e4, mean = 0, sd = 0.1),
    rnorm(1e3, mean = 0, sd = 0.1)
  ),
  y = c(
    rnorm(1e4, mean = 0, sd = 0.1),
    rnorm(1e3, mean = 0.1, sd = 0.2)
  )
)

### Split the plot into a 100 by 100 grid of squares and then color the points 
### by the estimated density in each square
dat$density <- get_density(dat$x, dat$y, n = 100)

### Points are overplotted, so you can’t see the peak density:
ggplot(dat) + geom_point(aes(x, y))


ggplot(dat) + geom_point(aes(x, y, color = density)) + 
    scale_color_viridis()


### set n = 15 (the squares in the grid are too big)
dat$density <- get_density(dat$x, dat$y, n = 15)
ggplot(dat) + geom_point(aes(x, y, color = density)) + 
    scale_color_viridis()


### And what if you modify the bandwidth of the normal kernel with h = c(1, 1)?
dat$density <- get_density(dat$x, dat$y, h = c(1, 1), n = 100)
ggplot(dat) + geom_point(aes(x, y, color = density)) + scale_color_viridis()

# Generate example data
set.seed(123)
df <- data.frame(matrix(rnorm(1000), ncol=10))
df$type <- sample(c("WT", "MUT/HET"), nrow(df), replace = TRUE)

# Perform UMAP
umap_result <- umap(df %>% select(-type))

# Prepare data for plotting
umap_df <- as.data.frame(umap_result$layout)
colnames(umap_df) <- c("UMAP1", "UMAP2")
umap_df$type <- df$type

# Function to create density plots with customized legend
plot_density <- function(data, title, color_low, color_high) {
  ggplot(data, aes(x = UMAP1, y = UMAP2)) +
    geom_density_2d_filled(aes(fill = after_stat(level)), bins = 30) +  # Ensure enough bins for continuous fill
    scale_fill_gradient(low = color_low, high = color_high, name = "Density", 
                        labels = c("Low", "High")) +
    theme_minimal() +
    labs(title = title) +
    theme(
      legend.position = "top",
      legend.title = element_text(size = 10),
      legend.text = element_text(size = 8),
      plot.title = element_text(hjust = 0.5)
    ) +
    guides(fill = guide_colorbar(barwidth = 7, barheight = 1,
                                 title.position = "top", title.hjust = 0.5,
                                 label.position = "bottom"))
}

# WT Density Plot
wt_density_plot <- plot_density(umap_df %>% filter(type == "WT"), "WT density", "white", "blue")

# Display the plot
print(wt_density_plot)