-
Notifications
You must be signed in to change notification settings - Fork 2
Open
Description
after going through a few iterations of the first for loop in https://github.com/tdhock/aum/blob/main/vignettes/line-search.Rmd
I executed this code
N.seq <- as.integer(10^seq(2,log10(max(diff.list$subtrain$example)),l=10))
N.seq <- as.integer(10^seq(log10(100),log10(1600),l=10))
atime.list <- atime::atime(
N=N.seq,
setup={
maxIterations <- N*(N-1)/2
X.subtrain <- X.keep[index.list$subtrain,]
X.sub <- X.subtrain[1:N,]
diff.sub <- diff.list$subtrain[example %in% seq(0,N-1)]
},
result=TRUE,
seconds.limit=1,
times=1,
aum_line_search={
nb.weight.search <- aum::aum_line_search(
diff.sub,
maxIterations=maxIterations,
feature.mat=X.sub,
weight.vec=weight.vec)
list(
total.iterations=nrow(nb.weight.search$line_search_result),
iterations.to.min=nb.weight.search$line_search_result[,which.min(aum)])
})
atime.list$measurements[, total.iterations := sapply(
result, function(L)L$total.iterations)]
atime.list$measurements[, iterations.to.min := sapply(
result, function(L)L$iterations.to.min)]
best.list <- atime::references_best(atime.list, unit.col.vec=c("total.iterations","iterations.to.min"))
best.ref <- best.list$ref[each.sign.rank==1]
library(ggplot2)
gg <- ggplot()+
theme_bw()+
facet_grid(unit ~ ., scales="free")+
geom_line(aes(
N, empirical, color=expr.name),
data=best.list$meas)+
scale_x_log10()+
scale_y_log10("median line, min/max band")
gg.show <- gg+
directlabels::geom_dl(aes(
N, empirical, color=expr.name, label=expr.name),
method="right.polygons",
data=best.list$meas)+
theme(legend.position="none")+
coord_cartesian(xlim=c(min(N.seq),max(best.list$meas$N)*5))
gg.ref <- gg.show+
geom_line(aes(
N, reference, group=paste(fun.name, expr.name)),
color="grey",
data=best.ref)
gg.ref+
directlabels::geom_dl(aes(
N, reference,
label.group=paste(fun.name, expr.name),
label=fun.name),
data=best.ref,
color="grey",
method="left.polygons")
and I got this plot
which suggests that the number of iterations in line search is quadratic, and so is the number of iterations to get to the min.
@phase Would be nice to have a vignette that explores this more systematically,
- do this asymptotic analysis for every step of gradient descent on full data?
- do gradient descent on different data sizes, and keep track of these metrics at each step?
Metadata
Metadata
Assignees
Labels
No labels