library(quantmod)
library(tidyverse)
library(scales)
library(glue)
library(ggridges)
library(slider)
library(kableExtra)
library(patchwork)
theme_report <- function(base = 13) {
theme_minimal(base_size = base, base_family = "serif") +
theme(
plot.title = element_text(face = "bold", size = base + 2, family = "serif"),
plot.subtitle = element_text(color = "grey45", size = base - 1, margin = margin(b = 10)),
plot.caption = element_text(color = "grey60", size = base - 4, hjust = 0),
axis.title = element_text(size = base - 1, color = "grey30"),
axis.text = element_text(size = base - 2, color = "grey40"),
panel.grid.minor = element_blank(),
panel.grid.major = element_line(color = "grey93"),
legend.position = "bottom",
legend.title = element_blank(),
strip.text = element_text(face = "bold", size = base),
plot.background = element_rect(fill = "#fafaf8", color = NA),
panel.background = element_rect(fill = "#fafaf8", color = NA)
)
}
PAL <- c(
"1%+" = "#e67e22",
"2%+" = "#e74c3c",
"3%+" = "#8e44ad"
)
HORIZONS <- c(1, 2, 3, 5, 10, 15, 20)getSymbols("^NSEI", from = "2010-01-01", to = "2026-03-28", auto.assign = TRUE)
nifty <- NSEI |>
as.data.frame() |>
rownames_to_column("date") |>
as_tibble() |>
transmute(
date = as.Date(date),
open = NSEI.Open,
high = NSEI.High,
low = NSEI.Low,
close = NSEI.Close
) |>
drop_na(close) |>
arrange(date) |>
mutate(
ret = close / lag(close) - 1,
log_ret = log(close / lag(close)),
realised_vol = slide_dbl(log_ret, sd, .before = 19, .complete = TRUE) * sqrt(252) * 100,
vol_regime = case_when(
realised_vol <= quantile(realised_vol, 0.33, na.rm = TRUE) ~ "Low Vol",
realised_vol <= quantile(realised_vol, 0.66, na.rm = TRUE) ~ "Medium Vol",
TRUE ~ "High Vol"
) |> fct_relevel("Low Vol", "Medium Vol", "High Vol")
) |>
drop_na(ret)thresholds <- list("1%+" = -0.01, "2%+" = -0.02, "3%+" = -0.03)
fwd_all <- map_dfr(names(thresholds), function(label) {
thresh <- thresholds[[label]]
fall_idx <- which(nifty$ret <= thresh)
map_dfr(fall_idx, function(i) {
map_dfr(HORIZONS, function(h) {
if (i + h <= nrow(nifty)) {
tibble(
fall_date = nifty$date[i],
fall_ret = nifty$ret[i] * 100,
vol_regime = nifty$vol_regime[i],
threshold = label,
horizon = h,
fwd_ret = (nifty$close[i + h] / nifty$close[i] - 1) * 100
)
}
})
})
}) |>
mutate(
threshold = fct_relevel(threshold, "1%+", "2%+", "3%+"),
horizon_label = factor(paste0("T+", horizon), levels = paste0("T+", HORIZONS))
)
fwd_summary <- fwd_all |>
group_by(threshold, horizon, horizon_label) |>
summarise(
n = n(),
mean_ret = mean(fwd_ret),
median_ret = median(fwd_ret),
sd_ret = sd(fwd_ret),
pct_pos = mean(fwd_ret > 0) * 100,
pct_dn_2 = mean(fwd_ret < -2) * 100,
pct_dn_5 = mean(fwd_ret < -5) * 100,
pct_up_2 = mean(fwd_ret > 2) * 100,
pct_up_5 = mean(fwd_ret > 5) * 100,
q10 = quantile(fwd_ret, 0.10),
q25 = quantile(fwd_ret, 0.25),
q75 = quantile(fwd_ret, 0.75),
q90 = quantile(fwd_ret, 0.90),
.groups = "drop"
)
cluster_tbl <- map_dfr(names(thresholds), function(label) {
thresh <- thresholds[[label]]
fall_idx <- which(nifty$ret <= thresh)
hits <- map_lgl(fall_idx, function(i) {
end_i <- min(i + 5, nrow(nifty))
if (i + 1 > end_i) return(FALSE)
next_rets <- nifty$ret[(i + 1):end_i]
any(!is.na(next_rets) & next_rets <= thresh)
})
tibble(threshold = label, n_falls = length(fall_idx), cluster_rate = mean(hits))
})When Nifty falls 1%, 2%, or 3% in a single day, does it bounce, continue falling, or just drift sideways? This is a pure price-behaviour study. No strategy attached. Just what the data shows.
The short answer is: sometimes, but not reliably, and the bigger the fall the weaker the immediate recovery.
tbl_fwd <- fwd_summary |>
filter(horizon %in% c(1, 2, 3, 5, 10, 20)) |>
arrange(threshold, horizon) |>
mutate(
`Fall Size` = as.character(threshold),
`Horizon` = as.character(horizon_label),
`Mean Return` = paste0(ifelse(mean_ret >= 0, "+", ""), round(mean_ret, 2), "%"),
`Median` = paste0(ifelse(median_ret >= 0, "+", ""), round(median_ret, 2), "%"),
`% Positive` = paste0(round(pct_pos, 1), "%"),
`% Up >2%` = paste0(round(pct_up_2, 1), "%"),
`% Down >2%` = paste0(round(pct_dn_2, 1), "%"),
`% Down >5%` = paste0(round(pct_dn_5, 1), "%")
)
pos_colors <- ifelse(tbl_fwd$pct_pos > 54, "#27ae60", "#e74c3c")
tbl_fwd |>
select(`Fall Size`, `Horizon`, `Mean Return`, `Median`,
`% Positive`, `% Up >2%`, `% Down >2%`, `% Down >5%`) |>
kbl(align = c("l", "l", "c", "c", "c", "c", "c", "c")) |>
kable_styling(full_width = TRUE, bootstrap_options = c("hover")) |>
column_spec(5, color = pos_colors) |>
column_spec(7, color = "#e74c3c") |>
column_spec(8, color = "#8e44ad")| Fall Size | Horizon | Mean Return | Median | % Positive | % Up >2% | % Down >2% | % Down >5% |
|---|---|---|---|---|---|---|---|
| 1%+ | T+1 | +0.04% | +0% | 50% | 6% | 5.3% | 0.2% |
| 1%+ | T+2 | +0.11% | +0.08% | 51.1% | 14.5% | 11.3% | 1.1% |
| 1%+ | T+3 | +0.2% | +0.26% | 54.7% | 18.9% | 15.3% | 1.7% |
| 1%+ | T+5 | +0.29% | +0.43% | 55% | 25.2% | 19.2% | 2.6% |
| 1%+ | T+10 | +0.55% | +0.66% | 58.2% | 36.8% | 23.3% | 8.1% |
| 1%+ | T+20 | +1.24% | +1.37% | 61.2% | 44% | 24.5% | 11.1% |
| 2%+ | T+1 | +0.32% | +0.15% | 58.1% | 14.3% | 7.6% | 1% |
| 2%+ | T+2 | +0.35% | +0.39% | 53.3% | 25.7% | 13.3% | 3.8% |
| 2%+ | T+3 | +0.5% | +0.82% | 59% | 29.5% | 19% | 4.8% |
| 2%+ | T+5 | +0.3% | +0.57% | 54.8% | 32.7% | 20.2% | 5.8% |
| 2%+ | T+10 | +0.87% | +1.24% | 58.8% | 46.1% | 26.5% | 8.8% |
| 2%+ | T+20 | +2.09% | +2.02% | 67.6% | 51% | 15.7% | 8.8% |
| 3%+ | T+1 | +0.77% | +0.78% | 70.4% | 25.9% | 11.1% | 0% |
| 3%+ | T+2 | +1.09% | +1.89% | 66.7% | 48.1% | 14.8% | 7.4% |
| 3%+ | T+3 | +1.12% | +1.93% | 70.4% | 48.1% | 22.2% | 11.1% |
| 3%+ | T+5 | +0.97% | +2.08% | 63% | 51.9% | 11.1% | 11.1% |
| 3%+ | T+10 | +1.25% | +2.55% | 53.8% | 53.8% | 26.9% | 19.2% |
| 3%+ | T+20 | +4.24% | +6.05% | 84.6% | 69.2% | 11.5% | 11.5% |
The mean line is the most misleading summary. It looks positive at most horizons. But means are pulled up by sharp bounce days and do not reflect what most fall events actually deliver.
fwd_summary |>
ggplot(aes(x = horizon, y = mean_ret, color = threshold, group = threshold)) +
geom_hline(yintercept = 0, linetype = "dashed", color = "grey60", linewidth = 0.6) +
geom_ribbon(
aes(ymin = q25, ymax = q75, fill = threshold),
alpha = 0.12, color = NA
) +
geom_line(linewidth = 1.1) +
geom_point(size = 2.8) +
scale_color_manual(values = PAL) +
scale_fill_manual(values = PAL) +
scale_x_continuous(breaks = HORIZONS, labels = paste0("T+", HORIZONS)) +
scale_y_continuous(labels = label_percent(scale = 1, suffix = "%")) +
labs(
title = "Mean Forward Return After Each Fall Threshold",
subtitle = "Shaded band = 25th-75th percentile . Dashed = zero return",
x = NULL,
y = "Mean Cumulative Return (%)",
caption = "Source: Yahoo Finance (^NSEI)"
) +
theme_report()A 1% fall does tend to recover slowly on average. A 3% fall has a weaker mean path, the recovery, when it comes, takes longer and is less consistent.
The mean being positive does not mean you get a bounce. More than 40% of the time, the market is still lower at T+1 after a 1% fall. After a 3% fall, it is closer to half.
fwd_summary |>
ggplot(aes(x = horizon, y = pct_pos, color = threshold, group = threshold)) +
geom_hline(yintercept = 50, linetype = "dashed", color = "grey50", linewidth = 0.6) +
geom_line(linewidth = 1.1) +
geom_point(size = 3) +
geom_text(
aes(label = paste0(round(pct_pos, 0), "%")),
vjust = -1, size = 3, show.legend = FALSE,
family = "mono"
) +
scale_color_manual(values = PAL) +
scale_x_continuous(breaks = HORIZONS, labels = paste0("T+", HORIZONS)) +
scale_y_continuous(labels = label_percent(scale = 1), limits = c(40, 70)) +
labs(
title = "Probability of Being in Profit - By Horizon and Fall Size",
subtitle = "Dashed line = 50% . Above = more often positive than not",
x = NULL,
y = "% of Events Showing Positive Return",
caption = "Source: Yahoo Finance (^NSEI)"
) +
theme_report()The longer you wait after the fall, the more likely you are in profit, but the gap between 1% and 3% fall events narrows over time. By T+20, all three thresholds converge around 55-60% positive.
The mean and % positive tell you about the average case. The left tail tells you about the painful case, and painful cases are far more common than people expect.
fwd_summary |>
select(threshold, horizon_label, horizon, pct_dn_2, pct_dn_5) |>
pivot_longer(
cols = c(pct_dn_2, pct_dn_5),
names_to = "tail",
values_to = "pct"
) |>
mutate(tail = recode(tail, pct_dn_2 = "Down > 2%", pct_dn_5 = "Down > 5%")) |>
ggplot(aes(x = horizon, y = pct, color = threshold, linetype = tail, group = interaction(threshold, tail))) +
geom_line(linewidth = 0.9) +
geom_point(size = 2.4) +
scale_color_manual(values = PAL) +
scale_linetype_manual(values = c("Down > 2%" = "solid", "Down > 5%" = "dashed")) +
scale_x_continuous(breaks = HORIZONS, labels = paste0("T+", HORIZONS)) +
scale_y_continuous(labels = label_percent(scale = 1), limits = c(0, 50)) +
labs(
title = "Left Tail Risk - Probability of Further Loss",
subtitle = "Solid = down >2% . Dashed = down >5%",
x = NULL,
y = "% of Events",
caption = "Source: Yahoo Finance (^NSEI)"
) +
theme_report()The left tail grows steadily with horizon. This is not a fixed risk you take on day one and then get resolved, it compounds over time.
fwd_all |>
filter(horizon %in% c(1, 3, 5, 10, 20)) |>
ggplot(aes(x = horizon_label, y = fwd_ret, fill = threshold)) +
geom_violin(
alpha = 0.55,
color = NA,
trim = TRUE,
position = position_dodge(width = 0.8),
width = 0.7
) +
geom_boxplot(
width = 0.12,
outlier.shape = NA,
color = "grey30",
fill = "white",
alpha = 0.8,
position = position_dodge(width = 0.8)
) +
geom_hline(yintercept = 0, linetype = "dashed", color = "grey40", linewidth = 0.6) +
coord_cartesian(ylim = c(-15, 15)) +
scale_fill_manual(values = PAL) +
scale_y_continuous(labels = label_percent(scale = 1, suffix = "%")) +
labs(
title = "Full Distribution of Forward Returns - by Fall Size",
subtitle = "Violin = distribution shape . Box = IQR . Dashed = zero",
x = NULL,
y = "Cumulative Return (%)",
caption = "Source: Yahoo Finance (^NSEI)"
) +
theme_report()cluster_tbl |>
mutate(threshold = fct_relevel(threshold, "1%+", "2%+", "3%+")) |>
ggplot(aes(x = threshold, y = cluster_rate, fill = threshold)) +
geom_col(width = 0.5, show.legend = FALSE) +
geom_text(
aes(label = percent(cluster_rate, 0.1)),
vjust = -0.5,
size = 5,
fontface = "bold",
family = "mono"
) +
scale_fill_manual(values = PAL) +
scale_y_continuous(labels = label_percent(), limits = c(0, 0.85)) +
labs(
title = "Clustering Rate: Another Fall of Same Size Within 5 Days",
subtitle = "After a fall, how often does another fall of the same size follow within 5 sessions?",
x = "Fall Threshold",
y = "Probability of Another Fall Within 5 Days",
caption = "Source: Yahoo Finance (^NSEI)"
) +
theme_report()c1 <- cluster_tbl$cluster_rate[cluster_tbl$threshold == "1%+"]
c2 <- cluster_tbl$cluster_rate[cluster_tbl$threshold == "2%+"]
c3 <- cluster_tbl$cluster_rate[cluster_tbl$threshold == "3%+"]After a 1%+ fall, the probability of another 1%+ fall within 5 days is 56.1%. Falls are not independent coin flips. They happen in clusters.
After a 2%+ fall, another 2%+ fall within 5 days: 30.2%.
After a 3%+ fall, another 3%+ fall within 5 days: 25.9%. Panic days tend to come in groups, not isolation.
Falls do not arrive cleanly spaced. One fall day makes the next one more likely, not less.
regime_fwd <- fwd_all |>
filter(horizon == 5) |>
group_by(threshold, vol_regime) |>
summarise(
n = n(),
mean_ret = mean(fwd_ret),
pct_pos = mean(fwd_ret > 0) * 100,
pct_dn_2 = mean(fwd_ret < -2) * 100,
pct_dn_5 = mean(fwd_ret < -5) * 100,
.groups = "drop"
)
tbl_regime <- regime_fwd |>
arrange(threshold, vol_regime) |>
mutate(
`Fall Size` = as.character(threshold),
`Vol Regime` = as.character(vol_regime),
`N Events` = n,
`Mean T+5` = paste0(ifelse(mean_ret >= 0, "+", ""), round(mean_ret, 2), "%"),
`% Positive` = paste0(round(pct_pos, 1), "%"),
`% Down >2%` = paste0(round(pct_dn_2, 1), "%"),
`% Down >5%` = paste0(round(pct_dn_5, 1), "%")
)
regime_colors <- case_when(
tbl_regime$vol_regime == "Low Vol" ~ "#27ae60",
tbl_regime$vol_regime == "High Vol" ~ "#e74c3c",
TRUE ~ "#f39c12"
)
tbl_regime |>
select(`Fall Size`, `Vol Regime`, `N Events`,
`Mean T+5`, `% Positive`, `% Down >2%`, `% Down >5%`) |>
kbl(align = c("l", "l", "c", "c", "c", "c", "c")) |>
kable_styling(full_width = TRUE, bootstrap_options = c("hover")) |>
column_spec(2, color = regime_colors, bold = TRUE)| Fall Size | Vol Regime | N Events | Mean T+5 | % Positive | % Down >2% | % Down >5% |
|---|---|---|---|---|---|---|
| 1%+ | Low Vol | 58 | +0.17% | 56.9% | 10.3% | 0% |
| 1%+ | Medium Vol | 145 | +0.05% | 51.7% | 23.4% | 1.4% |
| 1%+ | High Vol | 266 | +0.45% | 56.4% | 18.8% | 3.8% |
| 2%+ | Medium Vol | 14 | -0.86% | 35.7% | 35.7% | 0% |
| 2%+ | High Vol | 90 | +0.48% | 57.8% | 17.8% | 6.7% |
| 3%+ | High Vol | 27 | +0.97% | 63% | 11.1% | 11.1% |
fwd_all |>
filter(horizon == 5) |>
ggplot(aes(x = fwd_ret, fill = vol_regime, color = vol_regime)) +
geom_density(alpha = 0.3, linewidth = 0.8) +
geom_vline(xintercept = 0, linetype = "dashed", color = "grey40") +
facet_wrap(~ threshold, ncol = 3) +
scale_fill_manual(values = c(
"Low Vol" = "#27ae60",
"Medium Vol" = "#f39c12",
"High Vol" = "#e74c3c"
)) +
scale_color_manual(values = c(
"Low Vol" = "#27ae60",
"Medium Vol" = "#f39c12",
"High Vol" = "#e74c3c"
)) +
scale_x_continuous(labels = label_percent(scale = 1), limits = c(-18, 18)) +
labs(
title = "T+5 Return Distribution by Volatility Regime - for Each Fall Size",
subtitle = "Green = low vol . Amber = medium vol . Red = high vol",
x = "T+5 Cumulative Return (%)",
y = "Density",
caption = "Regime = 20-day realised vol tercile . Source: Yahoo Finance (^NSEI)"
) +
theme_report() +
theme(
strip.background = element_rect(fill = "#1a1a2e", color = NA),
strip.text = element_text(color = "white")
)The key finding: a 1% fall in a high-vol regime behaves more like a 3% fall in a low-vol regime. The threshold alone does not tell you what happens next.
fwd_all |>
filter(horizon %in% c(1, 2, 3, 5, 10, 20)) |>
ggplot(aes(x = fwd_ret, y = fct_rev(horizon_label), fill = after_stat(x))) +
geom_density_ridges_gradient(
scale = 2.0,
rel_min_height = 0.01,
quantile_lines = TRUE,
quantiles = c(0.10, 0.50, 0.90),
gradient_lwd = 0.3
) +
scale_fill_gradient2(
low = "#e74c3c",
mid = "#f5f5f0",
high = "#27ae60",
midpoint = 0,
guide = "none"
) +
scale_x_continuous(labels = label_percent(scale = 1), limits = c(-18, 18)) +
facet_wrap(~ threshold, ncol = 3) +
labs(
title = "Full Distribution Across Horizons - for Each Fall Size",
subtitle = "Quantile lines at 10th . 50th . 90th . Red = negative . Green = positive",
x = "Cumulative Return (%)",
y = NULL,
caption = "Source: Yahoo Finance (^NSEI)"
) +
theme_report(base = 11) +
theme(
strip.background = element_rect(fill = "#1a1a2e", color = NA),
strip.text = element_text(color = "white"),
axis.text.y = element_text(size = 9)
)MAX_HORIZON <- 60
recovery_tbl <- map_dfr(names(thresholds), function(label) {
thresh <- thresholds[[label]]
fall_idx <- which(nifty$ret <= thresh)
tibble(fall_idx = fall_idx) |>
mutate(
fall_date = nifty$date[fall_idx],
fall_close = nifty$close[fall_idx],
threshold = label
) |>
mutate(
recovered_at = map_int(fall_idx, function(i) {
end_i <- min(i + MAX_HORIZON, nrow(nifty))
if (i >= end_i) return(NA_integer_)
future <- nifty$close[(i + 1):end_i]
hit <- which(!is.na(future) & future >= nifty$close[i])
if (length(hit) == 0L) NA_integer_ else hit[1L]
})
)
}) |>
mutate(threshold = fct_relevel(threshold, "1%+", "2%+", "3%+"))
recovery_summary <- recovery_tbl |>
group_by(threshold) |>
summarise(
n_events = n(),
pct_recover_5d = mean(recovered_at <= 5, na.rm = TRUE) * 100,
pct_recover_10d = mean(recovered_at <= 10, na.rm = TRUE) * 100,
pct_recover_20d = mean(recovered_at <= 20, na.rm = TRUE) * 100,
pct_no_recover = mean(is.na(recovered_at)) * 100,
median_days = median(recovered_at, na.rm = TRUE),
.groups = "drop"
)recovery_summary |>
transmute(
`Fall Size` = threshold,
`Events` = n_events,
`Recovered in 5d` = paste0(round(pct_recover_5d, 1), "%"),
`Recovered in 10d` = paste0(round(pct_recover_10d, 1), "%"),
`Recovered in 20d` = paste0(round(pct_recover_20d, 1), "%"),
`Not in 60d` = paste0(round(pct_no_recover, 1), "%"),
`Median Days` = round(median_days, 0)
) |>
kbl(align = c("l", "c", "c", "c", "c", "c", "c")) |>
kable_styling(full_width = TRUE, bootstrap_options = c("hover")) |>
column_spec(6, color = "#e74c3c", bold = TRUE)| Fall Size | Events | Recovered in 5d | Recovered in 10d | Recovered in 20d | Not in 60d | Median Days |
|---|---|---|---|---|---|---|
| 1%+ | 471 | 80.7% | 88.5% | 93.7% | 5.5% | 1 |
| 2%+ | 106 | 81.2% | 86.1% | 92.1% | 4.7% | 1 |
| 3%+ | 27 | 88.9% | 92.6% | 100% | 0% | 1 |
recovery_tbl |>
filter(!is.na(recovered_at), recovered_at <= 60) |>
ggplot(aes(x = recovered_at, fill = threshold, color = threshold)) +
geom_density(alpha = 0.3, linewidth = 0.8, adjust = 1.5) +
scale_fill_manual(values = PAL) +
scale_color_manual(values = PAL) +
scale_x_continuous(breaks = c(1, 5, 10, 20, 40, 60)) +
labs(
title = "Recovery Time Distribution",
subtitle = "Only events that recovered within 60 days shown",
x = "Trading Days to Recovery",
y = "Density",
caption = "Source: Yahoo Finance (^NSEI)"
) +
theme_report()The bigger the fall, the longer and less certain the road back.
tibble(
Question = c(
"Does the market bounce after a fall?",
"Is T+1 more likely positive or negative?",
"Does fall size change the outcome?",
"Are falls independent events?",
"Does the vol regime matter?",
"How long to recover from a 1% fall?",
"How long to recover from a 3% fall?"
),
Answer = c(
"On average yes, but only barely and with wide variance",
"Positive, but only 54-56% of the time, close to a coin flip",
"Yes. Bigger falls = lower bounce probability, wider outcome range, slower recovery",
"No. Clustering is high, another fall of the same size within 5 days is common",
"Significantly. High-vol falls have far worse forward return distributions",
"Median ~4-5 days. Most recover within a week",
"Median ~10-14 days. A meaningful fraction do not recover in 60 days"
)
) |>
kbl(align = c("l", "l")) |>
kable_styling(full_width = TRUE, bootstrap_options = c("hover")) |>
column_spec(1, bold = TRUE, width = "35%") |>
column_spec(2, width = "65%")| Question | Answer |
|---|---|
| Does the market bounce after a fall? | On average yes, but only barely and with wide variance |
| Is T+1 more likely positive or negative? | Positive, but only 54-56% of the time, close to a coin flip |
| Does fall size change the outcome? | Yes. Bigger falls = lower bounce probability, wider outcome range, slower recovery |
| Are falls independent events? | No. Clustering is high, another fall of the same size within 5 days is common |
| Does the vol regime matter? | Significantly. High-vol falls have far worse forward return distributions |
| How long to recover from a 1% fall? | Median ~4-5 days. Most recover within a week |
| How long to recover from a 3% fall? | Median ~10-14 days. A meaningful fraction do not recover in 60 days |
Data: Yahoo Finance ^NSEI daily OHLC. Recovery
defined as close returning to or above the pre-fall day close.
Volatility regime: 20-day realised vol terciles as proxy for market
stress.