Study C: Grid Resolution

Registration Benchmark v2

Setup

Code

library(ggplot2)
library(data.table)
library(patchwork)

base_dir <- here::here()
source(file.path(base_dir, "sim-analyze.R"))

mcols <- method_colors()
mlabs <- method_labels()
dlabs <- dgp_short_labels()

Code

dt <- load_study_c()

Study C tests whether grid resolution is a confound for method comparison. SRVF operates on SRSFs (numerical derivatives) which amplify discretization artifacts at finer grids. A preliminary pilot (job 5131384) showed SRVF warp MISE 1.82× worse at \(n_\text{grid}=201\) vs 101.

Design: 5 DGPs \(\times\) 3 grid sizes (51, 101, 201) \(\times\) 3 noise (0, 0.1, 0.3) \(\times\) 5 methods \(\times\) severity=1.0 = 225 cells \(\times\) 50 reps = 11,250 runs.

Code

dgps_used <- c("D01", "D03", "D04", "D09", "D12")
knitr::kable(
  dgp_factors()[dgp %in% dgps_used],
  caption = "DGPs in Study C: covering both templates, all 3 warp types, and none/highrank amplitude."
)

DGPs in Study C: covering both templates, all 3 warp types, and none/highrank amplitude.
dgp	template	warp_type	amplitude
D01	harmonic	simple	none
D03	wiggly	complex	none
D04	harmonic	affine	none
D09	harmonic	simple	highrank
D12	wiggly	complex	highrank

1 Overview

1.1 Failure Rates

Code

fail_dt <- dt[,
  .(n_fail = sum(failure), n_total = .N, rate = mean(failure)),
  by = .(method, n_grid)
]
fail_any <- fail_dt[n_fail > 0]
if (nrow(fail_any) == 0) {
  cat("No failures across all 11,250 runs.\n\n")
} else {
  cat("**Failure rates by method and grid:**\n\n")
  print(knitr::kable(fail_dt[n_fail > 0]))
}

Failure rates by method and grid:

method	n_grid	n_fail	n_total	rate
landmark_auto	51	3	750	0.004
landmark_auto	101	6	750	0.008
landmark_auto	201	1	750	0.001

1.2 Warp MISE by Grid \(\times\) Method

The central question: does grid resolution systematically affect method performance?

Code

ggplot(
  dt[failure == FALSE],
  aes(y = method, x = warp_mise, fill = n_grid)
) +
  geom_boxplot(outlier.size = 0.3, outlier.alpha = 0.2, position = "dodge") +
  facet_wrap(~noise_sd, labeller = labeller(noise_sd = function(x) paste0("noise=", x))) +
  scale_y_discrete(labels = mlabs) +
  scale_x_log10() +
  scale_fill_brewer(palette = "Set2", name = "Grid size") +
  labs(y = NULL, x = "Warp MISE (log scale)") +
  theme_benchmark()

Warp MISE by grid size and method, faceted by noise level. Each boxplot pools over all 5 DGPs and 50 reps.

2 Grid Degradation Ratios

Grid degradation ratios compare each grid size to the baseline \(n_\text{grid}=101\). Values > 1 mean worse performance at that grid; < 1 means better.

Code

ratio_rep <- dcast(
  dt[failure == FALSE, .(dgp, method, noise_sd, rep, n_grid, warp_mise)],
  dgp + method + noise_sd + rep ~ n_grid,
  value.var = "warp_mise"
)
ratio_rep[, ratio_51 := `51` / `101`]
ratio_rep[, ratio_201 := `201` / `101`]
ratio_rep <- melt(
  ratio_rep,
  id.vars = c("dgp", "method", "noise_sd", "rep"),
  measure.vars = c("ratio_51", "ratio_201"),
  variable.name = "n_grid",
  value.name = "ratio"
)
ratio_rep[, n_grid := sub("^ratio_", "", n_grid)]
ratio_dt <- ratio_rep[, .(
  ratio = median(ratio, na.rm = TRUE),
  q25 = quantile(ratio, 0.25, na.rm = TRUE),
  q75 = quantile(ratio, 0.75, na.rm = TRUE)
), by = .(dgp, method, n_grid, noise_sd)]

ggplot(ratio_dt, aes(x = ratio, y = method, color = method)) +
  geom_vline(xintercept = 1, linetype = "dashed", color = "grey40") +
  geom_point(size = 2, alpha = 0.7) +
  geom_errorbarh(aes(xmin = q25, xmax = q75), height = 0.15, alpha = 0.3) +
  geom_text(aes(label = dgp), size = 3, vjust = -0.8, show.legend = FALSE) +
  stat_summary(fun = median, geom = "crossbar", width = 0.4,
    color = "black", linewidth = 0.4) +
  facet_grid(n_grid ~ noise_sd,
    labeller = labeller(
      noise_sd = function(x) paste0("noise=", x),
      n_grid = function(x) paste0("grid=", x)
    )
  ) +
  scale_color_manual(values = mcols, guide = "none") +
  scale_y_discrete(labels = mlabs) +
  scale_x_continuous(trans = "log2",
    breaks = c(0.25, 0.5, 1, 2, 4),
    labels = c("1/4", "1/2", "1", "2", "4")
  ) +
  labs(x = expression("Warp MISE ratio vs grid=101 (" * log[2] * " scale)"), y = NULL) +
  theme_benchmark()

Warp MISE ratio relative to grid=101, per method and noise level. Each labeled point is one DGP; horizontal bars show the IQR of paired per-rep ratios and the crossbar shows the median across DGPs. Values > 1 (right of dashed line) indicate degradation.

Code

# Median ratio per method x grid x noise
ratio_by_noise <- ratio_dt[,
  .(median_ratio = median(ratio, na.rm = TRUE)),
  by = .(method, n_grid, noise_sd)
]
ratio_uncertainty <- mean_se_table(ratio_dt, "ratio", c("method", "n_grid", "noise_sd"))

# SRVF noise-dependent reversal (the key finding)
srvf_rats <- ratio_by_noise[method == "srvf"]
srvf_unc <- ratio_uncertainty[method == "srvf"]
cat("**Key finding: SRVF grid sensitivity reverses with noise level.**\n\n")

Key finding: SRVF grid sensitivity reverses with noise level.

Code

tbl_lines <- c(
  "SRVF warp MISE ratios (vs grid=101):\n",
  "| Noise | grid=51 | grid=201 |",
  "|-------|---------|----------|"
)
for (ns in c("0", "0.1", "0.3")) {
  r51 <- srvf_rats[n_grid == "51" & noise_sd == ns, median_ratio]
  r201 <- srvf_rats[n_grid == "201" & noise_sd == ns, median_ratio]
  se51 <- srvf_unc[n_grid == "51" & noise_sd == ns, se]
  se201 <- srvf_unc[n_grid == "201" & noise_sd == ns, se]
  tbl_lines <- c(tbl_lines, sprintf(
    "| %s | %.2f ± %.2f | %.2f ± %.2f |",
    ns, r51, se51, r201, se201
  ))
}
cat(paste(tbl_lines, collapse = "\n"), "\n\n")

SRVF warp MISE ratios (vs grid=101):

Noise	grid=51	grid=201
0	2.26 ± 0.87	0.91 ± 0.19
0.1	0.93 ± 0.21	1.73 ± 0.92
0.3	0.73 ± 0.02	1.20 ± 0.19

Code

cat("At noise=0, coarser grid *hurts* SRVF — DP alignment on 51 points is too ",
  "imprecise to resolve the warp. At noise>0, finer grid *hurts* SRVF — ",
  "numerical differentiation amplifies noise at 201 points, acting as ",
  "de facto anti-regularization.\n\n")

At noise=0, coarser grid hurts SRVF — DP alignment on 51 points is too imprecise to resolve the warp. At noise>0, finer grid hurts SRVF — numerical differentiation amplifies noise at 201 points, acting as de facto anti-regularization.

Code

# All methods summary
cat("**All methods** (median ratio, pooled over DGPs and noise — ",
  "descriptive only; SRVF pooled medians mask the noise-dependent reversal above):\n\n")

All methods (median ratio, pooled over DGPs and noise — descriptive only; SRVF pooled medians mask the noise-dependent reversal above):

Code

ratio_summary <- ratio_dt[,
  .(
    median_ratio = median(ratio, na.rm = TRUE),
    mean_ratio = mean(ratio, na.rm = TRUE),
    se_ratio = if (.N > 1) sd(ratio, na.rm = TRUE) / sqrt(.N) else 0,
    min_ratio = min(ratio, na.rm = TRUE),
    max_ratio = max(ratio, na.rm = TRUE)
  ),
  by = .(method, n_grid)
]
for (g in sort(unique(as.character(ratio_summary$n_grid)))) {
  cat(sprintf("*Grid = %s:*\n\n", g))
  sub <- ratio_summary[n_grid == g][order(median_ratio)]
  for (i in seq_len(nrow(sub))) {
    cat(sprintf("- **%s**: median %.2f; mean %.2f ± %.2f; range [%.2f, %.2f]\n",
      mlabs[sub$method[i]], sub$median_ratio[i],
      sub$mean_ratio[i], sub$se_ratio[i],
      sub$min_ratio[i], sub$max_ratio[i]))
  }
  cat("\n")
}

Grid = 201:

FDA (default): median 0.97; mean 0.96 ± 0.05; range [0.62, 1.29]
FDA (crit 1): median 0.99; mean 0.98 ± 0.03; range [0.68, 1.19]
Landmark (auto): median 1.01; mean 1.01 ± 0.01; range [0.87, 1.07]
Affine (S+S): median 1.02; mean 1.03 ± 0.03; range [0.85, 1.27]
SRVF: median 1.24; mean 1.71 ± 0.39; range [0.17, 6.39]

Grid = 51:

SRVF: median 0.93; mean 1.46 ± 0.38; range [0.30, 5.33]
Affine (S+S): median 0.99; mean 0.99 ± 0.05; range [0.73, 1.41]
Landmark (auto): median 1.00; mean 1.04 ± 0.03; range [0.84, 1.37]
FDA (default): median 1.04; mean 1.01 ± 0.04; range [0.64, 1.26]
FDA (crit 1): median 1.06; mean 1.07 ± 0.05; range [0.80, 1.43]

3 Template MISE by Grid

Code

ggplot(
  dt[failure == FALSE],
  aes(y = method, x = template_mise, fill = n_grid)
) +
  geom_boxplot(outlier.size = 0.3, outlier.alpha = 0.2, position = "dodge") +
  facet_wrap(~noise_sd, labeller = labeller(noise_sd = function(x) paste0("noise=", x))) +
  scale_y_discrete(labels = mlabs) +
  scale_x_log10() +
  scale_fill_brewer(palette = "Set2", name = "Grid size") +
  labs(y = NULL, x = "Template MISE (log scale)") +
  theme_benchmark()

Template MISE by grid size and method, faceted by noise level.

Code

t_ratio_rep <- dcast(
  dt[failure == FALSE, .(dgp, method, noise_sd, rep, n_grid, template_mise)],
  dgp + method + noise_sd + rep ~ n_grid,
  value.var = "template_mise"
)
t_ratio_rep[, ratio_51 := `51` / `101`]
t_ratio_rep[, ratio_201 := `201` / `101`]
t_ratio_rep <- melt(
  t_ratio_rep,
  id.vars = c("dgp", "method", "noise_sd", "rep"),
  measure.vars = c("ratio_51", "ratio_201"),
  variable.name = "n_grid",
  value.name = "ratio"
)
t_ratio_rep[, n_grid := sub("^ratio_", "", n_grid)]
t_ratio_dt <- t_ratio_rep[, .(
  ratio = median(ratio, na.rm = TRUE),
  q25 = quantile(ratio, 0.25, na.rm = TRUE),
  q75 = quantile(ratio, 0.75, na.rm = TRUE)
), by = .(dgp, method, n_grid, noise_sd)]

ggplot(t_ratio_dt, aes(x = ratio, y = method, color = method)) +
  geom_vline(xintercept = 1, linetype = "dashed", color = "grey40") +
  geom_point(size = 2, alpha = 0.7) +
  geom_errorbarh(aes(xmin = q25, xmax = q75), height = 0.15, alpha = 0.3) +
  geom_text(aes(label = dgp), size = 3, vjust = -0.8, show.legend = FALSE) +
  stat_summary(fun = median, geom = "crossbar", width = 0.4,
    color = "black", linewidth = 0.4) +
  facet_grid(n_grid ~ noise_sd,
    labeller = labeller(
      noise_sd = function(x) paste0("noise=", x),
      n_grid = function(x) paste0("grid=", x)
    )
  ) +
  scale_color_manual(values = mcols, guide = "none") +
  scale_y_discrete(labels = mlabs) +
  scale_x_continuous(trans = "log2",
    breaks = c(0.25, 0.5, 1, 2, 4),
    labels = c("1/4", "1/2", "1", "2", "4")
  ) +
  labs(x = expression("Template MISE ratio vs grid=101 (" * log[2] * " scale)"), y = NULL) +
  theme_benchmark()

Template MISE ratio relative to grid=101. Each labeled point is one DGP and horizontal bars show the IQR of paired per-rep ratios. Values < 1 mean that grid improves template estimation.

4 Elastic Distance by Grid

SRSF elastic distance is reparameterization-invariant. A numerical convergence test shows that finer grids yield slightly larger elastic distances for fixed function pairs (~4% from grid=51 to 1001 for smooth functions), but this effect is far too small to explain the patterns below. The large grid effects visible under noise are genuine: finer grids provide more noisy observation points to the template estimator, degrading the estimated template shape.

Code

if ("template_elastic_dist" %in% names(dt) && any(!is.na(dt$template_elastic_dist))) {
  ggplot(
    dt[failure == FALSE & !is.na(template_elastic_dist)],
    aes(y = method, x = template_elastic_dist, fill = n_grid)
  ) +
    geom_boxplot(outlier.size = 0.3, outlier.alpha = 0.2, position = "dodge") +
    facet_wrap(~noise_sd,
      labeller = labeller(noise_sd = function(x) paste0("noise=", x))) +
    scale_y_discrete(labels = mlabs) +
    scale_x_log10() +
    scale_fill_brewer(palette = "Set2", name = "Grid size") +
    labs(y = NULL, x = "Elastic distance (log scale)") +
    theme_benchmark()
} else {
  cat("*Elastic distance not available in results.*")
}

Elastic (SRSF) template distance by grid size and method.

Code

if ("template_elastic_dist" %in% names(dt) && any(!is.na(dt$template_elastic_dist))) {
  e_ratio_rep <- dcast(
    dt[failure == FALSE & !is.na(template_elastic_dist),
      .(dgp, method, noise_sd, rep, n_grid, template_elastic_dist)],
    dgp + method + noise_sd + rep ~ n_grid,
    value.var = "template_elastic_dist"
  )
  e_ratio_rep[, ratio_51 := `51` / `101`]
  e_ratio_rep[, ratio_201 := `201` / `101`]
  e_ratio_rep <- melt(
    e_ratio_rep,
    id.vars = c("dgp", "method", "noise_sd", "rep"),
    measure.vars = c("ratio_51", "ratio_201"),
    variable.name = "n_grid",
    value.name = "ratio"
  )
  e_ratio_rep[, n_grid := sub("^ratio_", "", n_grid)]
  e_ratio_dt <- e_ratio_rep[, .(
    ratio = median(ratio, na.rm = TRUE),
    q25 = quantile(ratio, 0.25, na.rm = TRUE),
    q75 = quantile(ratio, 0.75, na.rm = TRUE)
  ), by = .(dgp, method, n_grid, noise_sd)]

  print(
    ggplot(e_ratio_dt, aes(x = ratio, y = method, color = method)) +
      geom_vline(xintercept = 1, linetype = "dashed", color = "grey40") +
      geom_point(size = 2, alpha = 0.7) +
      geom_errorbarh(aes(xmin = q25, xmax = q75), height = 0.15, alpha = 0.3) +
      geom_text(aes(label = dgp), size = 3, vjust = -0.8, show.legend = FALSE) +
      stat_summary(fun = median, geom = "crossbar", width = 0.4,
        color = "black", linewidth = 0.4) +
      facet_grid(n_grid ~ noise_sd,
        labeller = labeller(
          noise_sd = function(x) paste0("noise=", x),
          n_grid = function(x) paste0("grid=", x)
        )
      ) +
      scale_color_manual(values = mcols, guide = "none") +
      scale_y_discrete(labels = mlabs) +
      scale_x_continuous(trans = "log2",
        breaks = c(0.25, 0.5, 1, 2, 4),
        labels = c("1/4", "1/2", "1", "2", "4")
      ) +
      labs(
        x = expression("Elastic distance ratio vs grid=101 (" * log[2] * " scale)"),
        y = NULL
      ) +
      theme_benchmark()
  )
}

Elastic distance ratio relative to grid=101. Each labeled point is one DGP and horizontal bars show the IQR of paired per-rep ratios. Values > 1 mean that grid degrades template shape recovery.

Code

if ("template_elastic_dist" %in% names(dt) && any(!is.na(dt$template_elastic_dist))) {
  e_ratio_dt2 <- dt[failure == FALSE & !is.na(template_elastic_dist),
    .(elastic_dist = median(template_elastic_dist, na.rm = TRUE)),
    by = .(dgp, method, n_grid, noise_sd)
  ]
  e_base2 <- e_ratio_dt2[n_grid == "101",
    .(dgp, method, noise_sd, base = elastic_dist)]
  e_ratio_dt2 <- merge(e_ratio_dt2, e_base2, by = c("dgp", "method", "noise_sd"))
  e_ratio_dt2[, ratio := elastic_dist / base]
  e_ratio_dt2 <- e_ratio_dt2[n_grid != "101"]

  e_summary <- e_ratio_dt2[,
    .(median_ratio = median(ratio, na.rm = TRUE),
      min_ratio = min(ratio, na.rm = TRUE),
      max_ratio = max(ratio, na.rm = TRUE)),
    by = .(method, n_grid)
  ]

  cat("**Elastic distance ratios vs grid=101** (median [min, max] across DGPs and noise):\n\n")
  for (g in sort(unique(as.character(e_summary$n_grid)))) {
    cat(sprintf("*Grid = %s:*\n\n", g))
    sub <- e_summary[n_grid == g][order(median_ratio)]
    for (i in seq_len(nrow(sub))) {
      cat(sprintf("- **%s**: %.2f [%.2f, %.2f]\n",
        mlabs[sub$method[i]], sub$median_ratio[i],
        sub$min_ratio[i], sub$max_ratio[i]))
    }
    cat("\n")
  }

  # Check if elastic distance shows same SRVF pattern as warp MISE
  srvf_e <- e_ratio_dt2[method == "srvf",
    .(median_ratio = median(ratio, na.rm = TRUE)),
    by = .(n_grid, noise_sd)
  ]
  srvf_e201_n0 <- srvf_e[n_grid == "201" & noise_sd == "0", median_ratio]
  srvf_e201_n3 <- srvf_e[n_grid == "201" & noise_sd == "0.3", median_ratio]
  if (length(srvf_e201_n0) > 0 && length(srvf_e201_n3) > 0) {
    cat(sprintf(
      "SRVF elastic distance ratio at grid=201: %.2f (noise=0), %.2f (noise=0.3). ",
      srvf_e201_n0, srvf_e201_n3
    ))
    if (srvf_e201_n3 > srvf_e201_n0) {
      cat("Finer grids degrade template shape recovery more under noise, ",
        "consistent with the warp MISE pattern.\n")
    } else {
      cat("The pattern differs from warp MISE — elastic distance is less ",
        "sensitive to the grid × noise interaction for template estimation.\n")
    }
  }
}

Elastic distance ratios vs grid=101 (median [min, max] across DGPs and noise):

Grid = 201:

Affine (S+S): 1.08 [0.85, 1.88]
FDA (crit 1): 1.10 [0.70, 2.02]
Landmark (auto): 1.16 [0.95, 1.84]
FDA (default): 1.19 [0.94, 2.17]
SRVF: 1.50 [0.48, 2.50]

Grid = 51:

SRVF: 0.83 [0.46, 2.03]
Affine (S+S): 0.99 [0.54, 1.16]
FDA (crit 1): 1.04 [0.52, 1.75]
FDA (default): 1.05 [0.53, 1.37]
Landmark (auto): 1.12 [0.87, 1.32]

SRVF elastic distance ratio at grid=201: 0.75 (noise=0), 1.53 (noise=0.3). Finer grids degrade template shape recovery more under noise, consistent with the warp MISE pattern.

5 Alignment Error by Grid

Alignment error measures the practical downstream impact: \(\text{mean}_i \int (\hat{x}_i^{\text{aligned}} - \tilde{x}_i)^2\,dt\). Do grid artifacts in warp estimation translate into worse alignment?

Code

ggplot(
  dt[failure == FALSE],
  aes(y = method, x = alignment_error, fill = n_grid)
) +
  geom_boxplot(outlier.size = 0.3, outlier.alpha = 0.2, position = "dodge") +
  facet_wrap(~noise_sd, labeller = labeller(noise_sd = function(x) paste0("noise=", x))) +
  scale_y_discrete(labels = mlabs) +
  scale_x_log10() +
  scale_fill_brewer(palette = "Set2", name = "Grid size") +
  labs(y = NULL, x = "Alignment error (log scale)") +
  theme_benchmark()

Alignment error by grid size and method, faceted by noise level.

Code

ae_ratio_rep <- dcast(
  dt[failure == FALSE, .(dgp, method, noise_sd, rep, n_grid, alignment_error)],
  dgp + method + noise_sd + rep ~ n_grid,
  value.var = "alignment_error"
)
ae_ratio_rep[, ratio_51 := `51` / `101`]
ae_ratio_rep[, ratio_201 := `201` / `101`]
ae_ratio_rep <- melt(
  ae_ratio_rep,
  id.vars = c("dgp", "method", "noise_sd", "rep"),
  measure.vars = c("ratio_51", "ratio_201"),
  variable.name = "n_grid",
  value.name = "ratio"
)
ae_ratio_rep[, n_grid := sub("^ratio_", "", n_grid)]
ae_ratio_dt <- ae_ratio_rep[, .(
  ratio = median(ratio, na.rm = TRUE),
  q25 = quantile(ratio, 0.25, na.rm = TRUE),
  q75 = quantile(ratio, 0.75, na.rm = TRUE)
), by = .(dgp, method, n_grid, noise_sd)]

ggplot(ae_ratio_dt, aes(x = ratio, y = method, color = method)) +
  geom_vline(xintercept = 1, linetype = "dashed", color = "grey40") +
  geom_point(size = 2, alpha = 0.7) +
  geom_errorbarh(aes(xmin = q25, xmax = q75), height = 0.15, alpha = 0.3) +
  geom_text(aes(label = dgp), size = 3, vjust = -0.8, show.legend = FALSE) +
  stat_summary(fun = median, geom = "crossbar", width = 0.4,
    color = "black", linewidth = 0.4) +
  facet_grid(n_grid ~ noise_sd,
    labeller = labeller(
      noise_sd = function(x) paste0("noise=", x),
      n_grid = function(x) paste0("grid=", x)
    )
  ) +
  scale_color_manual(values = mcols, guide = "none") +
  scale_y_discrete(labels = mlabs) +
  scale_x_continuous(trans = "log2",
    breaks = c(0.25, 0.5, 1, 2, 4),
    labels = c("1/4", "1/2", "1", "2", "4")
  ) +
  labs(x = expression("Alignment error ratio vs grid=101 (" * log[2] * " scale)"),
    y = NULL) +
  theme_benchmark()

Alignment error ratio relative to grid=101, per method and noise level. Each labeled point is one DGP; horizontal bars show the IQR of paired per-rep ratios and the crossbar shows the median across DGPs.

Code

ae_ratio_summary <- ae_ratio_dt[,
  .(median_ratio = median(ratio, na.rm = TRUE),
    min_ratio = min(ratio, na.rm = TRUE),
    max_ratio = max(ratio, na.rm = TRUE)),
  by = .(method, n_grid)
]

cat("**Alignment error ratios vs grid=101** (median [min, max] across DGPs and noise):\n\n")

Alignment error ratios vs grid=101 (median [min, max] across DGPs and noise):

Code

for (g in sort(unique(as.character(ae_ratio_summary$n_grid)))) {
  cat(sprintf("*Grid = %s:*\n\n", g))
  sub <- ae_ratio_summary[n_grid == g][order(median_ratio)]
  for (i in seq_len(nrow(sub))) {
    cat(sprintf("- **%s**: %.2f [%.2f, %.2f]\n",
      mlabs[sub$method[i]], sub$median_ratio[i],
      sub$min_ratio[i], sub$max_ratio[i]))
  }
  cat("\n")
}

Grid = 201:

FDA (default): 1.00 [0.93, 1.12]
Affine (S+S): 1.01 [0.84, 1.23]
Landmark (auto): 1.04 [0.93, 1.10]
FDA (crit 1): 1.04 [0.92, 1.15]
SRVF: 1.27 [0.51, 6.20]

Grid = 51:

SRVF: 0.79 [0.35, 2.32]
Affine (S+S): 0.91 [0.55, 1.34]
Landmark (auto): 0.96 [0.83, 1.24]
FDA (default): 0.97 [0.92, 1.37]
FDA (crit 1): 1.00 [0.86, 1.19]

Code

# Compare with warp MISE ratios
warp_ratio_summary <- ratio_dt[,
  .(warp_median = median(ratio, na.rm = TRUE)),
  by = .(method, n_grid)
]
ae_ratio_summary2 <- ae_ratio_dt[,
  .(ae_median = median(ratio, na.rm = TRUE)),
  by = .(method, n_grid)
]
both <- merge(warp_ratio_summary, ae_ratio_summary2, by = c("method", "n_grid"))
r_grid <- cor(both$warp_median, both$ae_median, use = "complete.obs")
cat(sprintf(
  "Correlation between warp MISE and alignment error grid ratios: r = %.2f. ",
  r_grid))

Correlation between warp MISE and alignment error grid ratios: r = 0.89.

Code

if (r_grid > 0.9) {
  cat("Grid effects on warp recovery translate almost directly into alignment quality.\n\n")
} else {
  cat("Grid effects on alignment error partially decouple from warp MISE — ",
    "alignment quality has some robustness to grid-induced warp artifacts.\n\n")
}

Grid effects on alignment error partially decouple from warp MISE — alignment quality has some robustness to grid-induced warp artifacts.

Code

# SRVF-specific: does the noise-dependent reversal hold for alignment error too?
srvf_ae <- ae_ratio_dt[method == "srvf",
  .(median_ratio = median(ratio, na.rm = TRUE)),
  by = .(n_grid, noise_sd)]
srvf_ae51_n0 <- srvf_ae[n_grid == "51" & noise_sd == "0", median_ratio]
srvf_ae51_n3 <- srvf_ae[n_grid == "51" & noise_sd == "0.3", median_ratio]
srvf_ae201_n0 <- srvf_ae[n_grid == "201" & noise_sd == "0", median_ratio]
srvf_ae201_n3 <- srvf_ae[n_grid == "201" & noise_sd == "0.3", median_ratio]

if (length(srvf_ae51_n0) > 0 && length(srvf_ae201_n3) > 0) {
  cat(sprintf(
    "SRVF alignment error ratios: grid=51 is %.2f (noise=0), %.2f (noise=0.3); grid=201 is %.2f (noise=0), %.2f (noise=0.3). ",
    srvf_ae51_n0, srvf_ae51_n3, srvf_ae201_n0, srvf_ae201_n3))
  reversal <- (srvf_ae51_n0 > 1 && srvf_ae51_n3 < 1) ||
    (srvf_ae201_n0 < 1 && srvf_ae201_n3 > 1)
  if (reversal) {
    cat("The noise-dependent reversal observed for warp MISE is also present for alignment error.\n")
  } else {
    cat("The noise-dependent reversal pattern differs somewhat for alignment error.\n")
  }
}

SRVF alignment error ratios: grid=51 is 1.69 (noise=0), 0.68 (noise=0.3); grid=201 is 0.97 (noise=0), 1.16 (noise=0.3). The noise-dependent reversal observed for warp MISE is also present for alignment error.

6 Grid \(\times\) Noise Interaction

The key question: does the combination of fine grid + high noise compound for SRVF?

Code

int_dt <- dt[failure == FALSE,
  .(
    warp_mise = median(warp_mise, na.rm = TRUE),
    q25 = quantile(warp_mise, 0.25, na.rm = TRUE),
    q75 = quantile(warp_mise, 0.75, na.rm = TRUE)
  ),
  by = .(dgp, method, n_grid, noise_sd)
]

ggplot(int_dt, aes(x = n_grid, y = warp_mise, group = dgp, color = dgp)) +
  geom_line(alpha = 0.7) +
  geom_point(size = 2) +
  geom_errorbar(aes(ymin = q25, ymax = q75), width = 0.08, alpha = 0.25) +
  facet_grid(method ~ noise_sd,
    scales = "free_y",
    labeller = labeller(
      noise_sd = function(x) paste0("noise=", x),
      method = mlabs
    )
  ) +
  scale_y_log10() +
  labs(x = "Grid size", y = "Warp MISE (log scale)", color = "DGP") +
  theme_benchmark()

Grid x noise interaction: warp MISE profiles per method. Points show medians and vertical bars show IQRs; each line connects the 3 grid sizes for one DGP. Steeper upward slopes at grid=201 for noise=0.3 indicate grid-noise compounding. Note: y-axis scales differ across methods due to large baseline differences.

Code

# Decompose compound effect into main effects + interaction
compound_dt <- dt[failure == FALSE,
  .(warp_mise = median(warp_mise, na.rm = TRUE)),
  by = .(dgp, method, n_grid, noise_sd)
]

# For SRVF: decompose the compound ratio
# Compound = (grid=201, noise=0.3) / (grid=101, noise=0)
# Noise main effect = (grid=101, noise=0.3) / (grid=101, noise=0)
# Grid main effect = (grid=201, noise=0) / (grid=101, noise=0)
# Interaction = compound / (noise_main * grid_main)

ref_clean <- compound_dt[n_grid == "101" & noise_sd == "0",
  .(dgp, method, ref = warp_mise)]
noise_main <- compound_dt[n_grid == "101" & noise_sd == "0.3",
  .(dgp, method, noisy = warp_mise)]
grid_main <- compound_dt[n_grid == "201" & noise_sd == "0",
  .(dgp, method, fine = warp_mise)]
stress <- compound_dt[n_grid == "201" & noise_sd == "0.3",
  .(dgp, method, stressed = warp_mise)]

decomp <- Reduce(function(a, b) merge(a, b, by = c("dgp", "method")),
  list(ref_clean, noise_main, grid_main, stress))
decomp[, `:=`(
  compound = stressed / ref,
  noise_effect = noisy / ref,
  grid_effect = fine / ref,
  interaction = (stressed / ref) / ((noisy / ref) * (fine / ref))
)]

cat("**Compound effect decomposition** ",
  "(grid=201/noise=0.3 vs grid=101/noise=0):\n\n")

Compound effect decomposition (grid=201/noise=0.3 vs grid=101/noise=0):

Code

decomp_summary <- decomp[,
  .(
    compound = median(compound, na.rm = TRUE),
    noise_main = median(noise_effect, na.rm = TRUE),
    grid_main = median(grid_effect, na.rm = TRUE),
    interaction = median(interaction, na.rm = TRUE)
  ),
  by = method
][order(-compound)]

tbl2 <- c(
  "| Method | Compound | Noise main | Grid main | Interaction |",
  "|--------|----------|------------|-----------|-------------|"
)
for (i in seq_len(nrow(decomp_summary))) {
  tbl2 <- c(tbl2, sprintf("| %s | %.1f\u00d7 | %.1f\u00d7 | %.2f\u00d7 | %.2f\u00d7 |",
    mlabs[decomp_summary$method[i]],
    decomp_summary$compound[i],
    decomp_summary$noise_main[i],
    decomp_summary$grid_main[i],
    decomp_summary$interaction[i]))
}
cat(paste(tbl2, collapse = "\n"), "\n\n")

Method	Compound	Noise main	Grid main	Interaction
SRVF	21.7×	13.2×	0.91×	1.21×
Landmark (auto)	1.1×	1.0×	1.05×	0.94×
FDA (crit 1)	0.8×	1.0×	1.06×	0.78×
FDA (default)	0.8×	0.9×	0.98×	0.88×
Affine (S+S)	0.7×	0.7×	0.99×	0.98×

Code

srvf_decomp <- decomp_summary[method == "srvf"]
cat(sprintf(
  "For SRVF, the compound ratio of %.1f× decomposes into a noise main effect ",
  srvf_decomp$compound
))

For SRVF, the compound ratio of 21.7× decomposes into a noise main effect

Code

cat(sprintf(
  "of %.1f×, a grid main effect of %.2f×, and a super-multiplicative interaction ",
  srvf_decomp$noise_main, srvf_decomp$grid_main
))

of 13.2×, a grid main effect of 0.91×, and a super-multiplicative interaction

Code

cat(sprintf(
  "of %.2f×. ", srvf_decomp$interaction
))

of 1.21×.

Code

if (srvf_decomp$interaction > 1.1) {
  cat("The interaction exceeds 1, confirming that grid and noise effects ",
    "are super-additive for SRVF — fine grids amplify the noise problem ",
    "beyond what either factor alone would predict.\n\n")
} else if (srvf_decomp$interaction < 0.9) {
  cat("The interaction is below 1, suggesting the compound effect is ",
    "sub-multiplicative — the large compound ratio is mostly driven by ",
    "the noise main effect.\n\n")
} else {
  cat("The interaction is near 1, suggesting the compound effect is ",
    "approximately multiplicative — grid and noise contribute independently ",
    "with no synergy.\n\n")
}

The interaction exceeds 1, confirming that grid and noise effects are super-additive for SRVF — fine grids amplify the noise problem beyond what either factor alone would predict.

Code

# Note on extreme max values
max_compound <- decomp[, .(max_compound = max(compound)), by = method]
extreme <- max_compound[max_compound > 10]
if (nrow(extreme) > 0) {
  cat("*Note:* Extreme compound ratios (")
  cat(paste(sprintf("%s: %.0f×", mlabs[extreme$method], extreme$max_compound),
    collapse = ", "))
  cat(") are driven by DGPs where the baseline (grid=101, noise=0) warp MISE ",
    "is very small, making the ratio sensitive to small absolute differences.\n\n")
}

Note: Extreme compound ratios (SRVF: 202×, Affine (S+S): 54×) are driven by DGPs where the baseline (grid=101, noise=0) warp MISE is very small, making the ratio sensitive to small absolute differences.

7 Per-DGP Profiles

Code

profile_dt <- dt[failure == FALSE,
  .(
    median = median(warp_mise, na.rm = TRUE),
    q25 = quantile(warp_mise, 0.25, na.rm = TRUE),
    q75 = quantile(warp_mise, 0.75, na.rm = TRUE)
  ),
  by = .(dgp, method, n_grid, noise_sd)
]

ggplot(profile_dt, aes(x = n_grid, y = median, color = noise_sd, group = noise_sd)) +
  geom_line(linewidth = 0.7) +
  geom_point(size = 2) +
  geom_errorbar(aes(ymin = q25, ymax = q75), width = 0.1, alpha = 0.4) +
  facet_grid(dgp ~ method, scales = "free_y",
    labeller = labeller(method = mlabs, dgp = dlabs)) +
  scale_y_log10() +
  scale_color_brewer(palette = "Dark2", name = "Noise") +
  labs(x = "Grid size", y = "Warp MISE (log scale)") +
  theme_benchmark(base_size = 9)

Per-DGP warp MISE by grid size, faceted by method. Lines connect grid levels within each noise condition. IQR error bars show variability across replications.

8 Timing

Code

ggplot(
  dt[failure == FALSE],
  aes(x = n_grid, y = time_per_curve, fill = n_grid)
) +
  geom_boxplot(outlier.size = 0.3, outlier.alpha = 0.2) +
  facet_wrap(~method, nrow = 1, labeller = labeller(method = mlabs)) +
  scale_y_log10() +
  scale_fill_brewer(palette = "Set2", name = "Grid size") +
  labs(x = "Grid size", y = "Time per curve (sec, log scale)") +
  theme_benchmark()

Computation time per curve (seconds) by grid size and method. Common y-axis (log scale) enables direct comparison. SRVF scales steeply due to O(n²) DP alignment.

Code

timing_dt <- dt[failure == FALSE,
  .(time_per_curve = median(time_per_curve, na.rm = TRUE)),
  by = .(method, n_grid)
]
timing_wide <- dcast(timing_dt, method ~ n_grid, value.var = "time_per_curve")

# Compute timing ratios
timing_wide[, ratio_201_101 := `201` / `101`]
timing_wide[, method := mlabs[method]]
cat("**Median time per curve (seconds) by method and grid size:**\n\n")

Median time per curve (seconds) by method and grid size:

Code

print(knitr::kable(timing_wide, digits = 4))

method	51	101	201	ratio_201_101
SRVF	0.0517	0.1809	0.705	3.900
CC (FPC1 cc)	0.1130	0.1171	0.118	1.005
CC (L2 dist)	0.2074	0.2057	0.191	0.927
Affine (S+S)	0.0991	0.1204	0.129	1.074
Landmark (auto)	0.0033	0.0036	0.004	1.122

Code

cat("\n")

Code

srvf_ratio <- timing_wide[method == "SRVF", ratio_201_101]
cat(sprintf(
  "SRVF timing scales %.1f× from grid=101 to grid=201 (DP alignment is O(n²)). ",
  srvf_ratio
))

SRVF timing scales 3.9× from grid=101 to grid=201 (DP alignment is O(n²)).

Code

cat("FDA methods show negligible timing change across grid sizes.\n")

FDA methods show negligible timing change across grid sizes.

9 Summary

Code

# Summary: for each method, median warp MISE ratio at grid=51 and grid=201 vs 101
ratio_summary_all <- ratio_dt[,
  .(median_ratio = median(ratio, na.rm = TRUE)),
  by = .(method, n_grid, noise_sd)
]
summary_wide <- dcast(ratio_summary_all,
  method + noise_sd ~ n_grid,
  value.var = "median_ratio"
)
summary_wide[, method := mlabs[method]]
cat("**Median warp MISE ratio vs grid=101** (by method and noise level):\n\n")

Median warp MISE ratio vs grid=101 (by method and noise level):

Code

print(knitr::kable(summary_wide, digits = 2))

method	noise_sd	51	201
SRVF	0	2.26	0.91
SRVF	0.1	0.93	1.73
SRVF	0.3	0.73	1.20
FDA (default)	0	1.00	0.98
FDA (default)	0.1	1.04	0.97
FDA (default)	0.3	1.11	0.88
FDA (crit 1)	0	0.99	1.06
FDA (crit 1)	0.1	1.10	0.99
FDA (crit 1)	0.3	1.13	0.86
Affine (S+S)	0	0.86	0.99
Affine (S+S)	0.1	0.93	1.02
Affine (S+S)	0.3	1.05	1.06
Landmark (auto)	0	1.00	1.05
Landmark (auto)	0.1	0.97	1.01
Landmark (auto)	0.3	1.04	1.00

Code

cat("\n")

Code

cat("**Key findings:**\n\n")

Key findings:

Code

# 1. SRVF reversal
srvf_51_n0 <- ratio_dt[method == "srvf" & n_grid == "51" & noise_sd == "0",
  median(ratio, na.rm = TRUE)]
srvf_51_n3 <- ratio_dt[method == "srvf" & n_grid == "51" & noise_sd == "0.3",
  median(ratio, na.rm = TRUE)]
srvf_201_n0 <- ratio_dt[method == "srvf" & n_grid == "201" & noise_sd == "0",
  median(ratio, na.rm = TRUE)]
srvf_201_n3 <- ratio_dt[method == "srvf" & n_grid == "201" & noise_sd == "0.3",
  median(ratio, na.rm = TRUE)]

cat(sprintf(
  "1. **SRVF grid sensitivity reverses with noise.** At noise=0, grid=51 ",
  "degrades SRVF (%.2f×) while grid=201 helps (%.2f×). At noise=0.3, ",
  "grid=51 helps (%.2f×) while grid=201 degrades (%.2f×). Coarse grids ",
  "act as implicit regularization by smoothing numerical derivatives.\n\n",
  srvf_51_n0, srvf_201_n0, srvf_51_n3, srvf_201_n3
))

SRVF grid sensitivity reverses with noise. At noise=0, grid=51

Code

# 2. FDA stability
fda_def_range <- ratio_dt[method == "fda_default",
  .(lo = min(ratio, na.rm = TRUE), hi = max(ratio, na.rm = TRUE))]
cat(sprintf(
  "2. **FDA methods are grid-insensitive.** fda_default ratios range from ",
  "%.2f to %.2f across all grids, noise levels, and DGPs. fda_crit1 shows ",
  "slight improvement at finer grids under noise.\n\n",
  fda_def_range$lo, fda_def_range$hi
))

FDA methods are grid-insensitive. fda_default ratios range from

Code

# 3. Timing
cat("3. **SRVF timing scales steeply** (~4× per grid doubling due to O(n²) DP ",
  "alignment). FDA timing is essentially constant across grids.\n\n")

SRVF timing scales steeply (~4× per grid doubling due to O(n²) DP alignment). FDA timing is essentially constant across grids.

Code

# 4. Practical
cat("4. **Grid=101 is a robust default** for `tf_register()`. It avoids SRVF ",
  "degradation at finer grids while providing sufficient resolution for FDA ",
  "methods. Users who need finer grids with SRVF should investigate whether ",
  "penalization (Study B) can mitigate the derivative noise amplification — ",
  "this hypothesis is not tested in Study C.\n\n")

Grid=101 is a robust default for tf_register(). It avoids SRVF degradation at finer grids while providing sufficient resolution for FDA methods. Users who need finer grids with SRVF should investigate whether penalization (Study B) can mitigate the derivative noise amplification — this hypothesis is not tested in Study C.

Code

cat("5. **Grid resolution is a confound for SRVF benchmarking.** ",
  "Method comparisons involving SRVF at grid=201 (or finer) ",
  "will underestimate SRVF performance relative to FDA methods, especially ",
  "under noise. Studies A and B use grid=101, which is neutral territory.\n")

Grid resolution is a confound for SRVF benchmarking. Method comparisons involving SRVF at grid=201 (or finer) will underestimate SRVF performance relative to FDA methods, especially under noise. Studies A and B use grid=101, which is neutral territory.