In this section we address the high-dimensional case, where the function class is sparse linear, i.e. \(g(a) = \langle \alpha, a\rangle\), where \(\|\alpha\|_0 := \{j\in [p]\,|\,|\alpha_j|>0\} \leq s\). We will consider \(\ell_1\) relaxations for the minimax optimization problem with \(\ell_1\)-balls for the adversary. We remove the non-smoothness of the \(\ell_1\) regularization by lifting the parameter \(\alpha\) to a \(2p\)-dimensional positive orthant. Consider two vectors \(\rho^{+}, \rho^{-} \geq 0\) and then setting \(\alpha = \rho^{+} - \rho^{-}\), with \(\rho = \left(\rho^{+}; \rho^{-}\right)\). Observe that for any feasible \(\bar{\alpha}\), the solution \(\rho_i^{+} = \alpha_i 1\left\{\alpha_i > 0\right\}\) and \(\rho_i^{-} = \alpha_i 1\left\{\alpha_i \leq 0\right\}\) is still feasible and achieves the same objective, by the linearity of the loss function. Moreover, any solution \(\rho\), maps to a feasible solution \(\alpha\) and thus the two optimization programs have the same optimal solutions.
Estimator 1
The minimax problem is:
(1)\[\min_{\|\alpha\|_1 \leq V_1} \max _{\|\theta_1\|_1 \leq 1} L(\alpha, \theta) := \min_{\|\alpha\|_1 \leq V_1} \max _{\|\theta_1\|_1 \leq 1} 2\langle \mathbb{E}_n [(y - \langle \alpha, a \rangle)c'], \theta_1 \rangle - \mathbb{E}_n [\langle c', \theta_1 \rangle^2] + \mu' \|\alpha\|_1\]
which can be written as:
\[\min _{\rho \geq 0, \|\rho\|_1 \leq V_1} \max _{\omega_1 \geq 0, \|\omega_1\|_1 = 1} \ell(\rho, \omega_1)\]
where
\[\ell(\rho, \omega_1) := 2 \omega_1^{\top} \mathbb{E}_n [u_1 y] - 2 \omega_1^{\top} \mathbb{E}_n [u_1 v_1^{\top}] \rho - \omega_1^{\top} \mathbb{E}_n [u_1 u_1^{\top}] \omega_1 + \mu' \sum_{i=1}^{2 p} \rho_i.\]
Moreover, \(v_1 = (a, -a)\), \(u_1 = (c', -c')\); and \(\theta_1 = \omega_1^{+} - \omega_1^{-}\), \(\alpha = \rho^+ - \rho^{-}\).
FTRL iterates for Estimator 1
Consider the iterates for \(t=1,\ldots, T\):
\begin{aligned}
\tilde{\rho}_{t+1} &= \exp\left(-\frac{\eta}{V_1} \left\{\sum_{\tau \leq t} -2 \mathbb{E}_n [v_1 u_1^{\top}] \omega_{1\tau} -2 \mathbb{E}_n [v_1 u_1^{\top}] \omega_{1t} + (t+1)\mu' \right\} - 1\right) \\
\rho_{t+1} &= \tilde{\rho}_{t+1} \min\left\{1, \frac{V_1}{\| \tilde{\rho}_{t+1} \|_1}\right\},
\end{aligned}
\begin{aligned}
\tilde{\omega}_{1,t+1} &= \tilde{\omega}_{1,t} \exp\bigg(2\eta\left\{2 \mathbb{E}_n [u_1 y] - 2 \mathbb{E}_n [u_1 v_1^{\top}] \rho_{t} - 2 \mathbb{E}_n [u_1 u_1^{\top}] \tilde{\omega}_{1,t}\right\} \\
&\qquad -\eta\left\{2 \mathbb{E}_n [u_1 y] - 2 \mathbb{E}_n [u_1 v_1^{\top}] \rho_{t-1} - 2 \mathbb{E}_n [u_1 u_1^{\top}] \tilde{\omega}_{1,t-1}\right\}\bigg) \\
\omega_{1,t+1} &= \frac{\tilde{\omega}_{1,t+1}}{\|\tilde{\omega}_{1,t+1}\|_1}
\end{aligned}
with \(\tilde{\rho}_{-1} = \tilde{\rho}_{0} = \frac{1}{e}\), \(\tilde{\omega}_{1,-1} = \tilde{\omega}_{1,0} = \frac{1}{2p}\), and \(\eta = \frac{1}{8 \|\mathbb{E}_n [v_1 u_1^{\top}]\|_\infty}\).
Then, \(\bar{\rho} = \frac{1}{T}\sum_{t=1}^{T} \rho_t\), \(\bar{\alpha} = \bar{\rho}^{+} - \bar{\rho}^{-}\) is a \(O(T^{-1})\)-approximate solution for (1).
Proof
The proof will match symbols with Proposition 17. Let
\[\Theta = \{\rho \;|\; \rho \geq 0,\, \|\rho\|_1 \leq V_1\}\;,\quad W = \{\omega_1 \;|\; \omega_1 \geq 0, \|\omega_1\|_1 = 1\}\]
be the convex feasibility sets. Note that \(\ell\) is convex in \(\rho\) and concave in \(\omega_1\). Since
\[\begin{split}\begin{aligned}
\nabla_{\rho} \ell(\rho, \omega_1) &= -2 \mathbb{E}_n [v_1 u_1^{\top}] \omega_1 + \mu' \\
\nabla_{\omega_1} \ell(\rho, \omega_1) &= 2 \mathbb{E}_n [u_1 y] - 2 \mathbb{E}_n [u_1 v_1^{\top}] \rho - 2 \mathbb{E}_n [u_1 u_1^{\top}] \omega_1
\end{aligned}\end{split}\]
the Lipschitzness property is satisfied with \(L = 2 \|\mathbb{E}_n [v_1 u_1^{\top}]\|_\infty\):
\[\begin{split}\begin{aligned}
\left\|\nabla_\rho \ell(\rho, \omega_1) - \nabla_\rho \ell(\rho, \omega_1^{\prime})\right\|_{\infty} &= \left\|2 \mathbb{E}_n [v u^{\top}] (\omega_1 - \omega_1^{\prime})\right\|_{\infty} \leq 2 \|\mathbb{E}_n [v u^{\top}]\|_{\infty} \left\|\omega_1 - \omega_1^{\prime}\right\|_1 \\
\left\|\nabla_{\omega_{1}} \ell(\rho, \omega_{1}) - \nabla_{\omega_{1}} \ell(\rho^{\prime}, \omega_{1})\right\|_{\infty} &= \left\|2 \mathbb{E}_n [u v^{\top}] (\rho - \rho^{\prime})\right\|_{\infty} \leq 2 \|\mathbb{E}_n [v u^{\top}]\|_{\infty} \left\|\rho - \rho^{\prime}\right\|_1
\end{aligned}\end{split}\]
Consider the entropic regularizers \(R_{min}(\rho) = V_1 \sum_{i=1}^{2p} \rho_i \log (\rho_i)\), and \(R_{max}(\omega_1) = \sum_{i=1}^{2p} \omega_{1i} \log (\omega_{1i})\) which are \(1\)-strongly convex in the spaces \(\Theta\), and \(W\) respectively. Then, the iterates satisfy:
\begin{aligned}
\rho_{t+1} &= \underset{\rho \geq 0, \|\rho\|_1 \leq V_1}{\operatorname{argmin}} \rho^{\top} \left(\sum_{\tau \leq t} \left\{-2 \mathbb{E}_n [v_1 u_1^{\top}] \omega_{1\tau} + \mu'\right\} - 2 \mathbb{E}_n [v_1 u_1^{\top}] \omega_{1t} + \mu'\right) + \frac{V_1}{\eta} \sum_{i=1}^{2p} \rho_i \log (\rho_i) \\
\tilde{\rho}_{t+1} &= \exp\left(-\frac{\eta}{V_1} \left\{\sum_{\tau \leq t} -2 \mathbb{E}_n [v_1 u_1^{\top}] \omega_{1\tau} -2 \mathbb{E}_n [v_1 u_1^{\top}] \omega_{1t} + (t+1)\mu' \right\} - 1\right) \\
\rho_{t+1} &= \tilde{\rho}_{t+1} \min\left\{1, \frac{V_1}{\| \tilde{\rho}_{t+1} \|_1}\right\},
\end{aligned}
\begin{aligned}
\omega_{1,t+1} &= \underset{\|\omega_1\|_1 \leq 1}{\operatorname{argmax}} \omega_1^{\top} \left(\sum_{\tau \leq t} \left\{2 \mathbb{E}_n [u_1 y] - 2 \mathbb{E}_n [u_1 v_1^{\top}] \rho_{\tau} - 2 \mathbb{E}_n [u_1 u_1^{\top}] \omega_{1\tau} \right\} \right. \\
&\qquad \left. + 2 \mathbb{E}_n [u_1 y] - 2 \mathbb{E}_n [u_1 v_1^{\top}] \rho_{t} - 2 \mathbb{E}_n [u_1 u_1^{\top}] \omega_{1t} \right) - \frac{1}{\eta} \sum_{i=1}^{2p} \omega_{1i} \log (\omega_{1i}) \\
\tilde{\omega}_{1,t+1} &= \tilde{\omega}_{1,t} \exp\left(2\eta \left\{2 \mathbb{E}_n [u_1 y] - 2 \mathbb{E}_n [u_1 v_1^{\top}] \rho_{t} - 2 \mathbb{E}_n [u_1 u_1^{\top}] \tilde{\omega}_{1,t}\right\} \right. \\
&\qquad \left. -\eta \left\{2 \mathbb{E}_n [u_1 y] - 2 \mathbb{E}_n [u_1 v_1^{\top}] \rho_{t-1} - 2 \mathbb{E}_n [u_1 u_1^{\top}] \tilde{\omega}_{1,t-1}\right\}\right) \\
\omega_{1,t+1} &= \frac{\tilde{\omega}_{1,t+1}}{\|\tilde{\omega}_{1,t+1}\|_1}
\end{aligned}
with \(\omega_{1,-1} = \omega_{1,0} = \frac{1}{2p}\). Therefore, by Proposition 17, the ensemble
\[\bar{\rho} = \frac{1}{T} \sum_{t=1}^T \rho_t\]
is \(O\left(\frac{1}{T}\right)\)-approximate solution for the minimax objective.
Duality Gap
The ensembles \(\bar{\alpha}\), \(\bar{\theta_1}\) can be thought of as primal and dual solutions and we can use the duality gap as a certificate for convergence of the algorithm.
\begin{aligned}
\text { Duality Gap } &:= \max _{\|\theta_1\|_1 \leq 1 } L(\bar{\alpha}, \theta_1) - \min _{\|\alpha\|_1 \leq V_1} L(\alpha, \bar{\theta_1}) \\
&\leq \left(\mathbb{E}_n [(y - \langle \bar{\alpha}, a \rangle)c']\right)^{\top} \mathbb{E}_n [c' c'^{\top}]^{\dagger} \left(\mathbb{E}_n [(y - \langle \bar{\alpha}, a \rangle)c']\right) + \mu' \|\bar{\alpha}\|_1 \\
&\quad - \left(\bar{\theta_1}^{\top} \mathbb{E}_n [c'y] + V_1 \left\{\mu' - 2 \|\mathbb{E}_n [a c'^{\top}] \bar{\theta_1}\|_\infty \right\}^{-} - \bar{\theta_1}^{\top} \mathbb{E}_n [c' c'^{\top}] \bar{\theta_1}\right) := \text{ tol}
\end{aligned}
Estimator 3 - (Ridge)
The joint estimator is:
(3)\[\begin{split}\hat\alpha, \hat\beta := \underset{\|\beta\|_1 \leq V_2}{\operatorname{argmin}_{\|\alpha\|_1 \leq V_1}} \underset{\|\theta_2\|_1 \leq 1}{\max_{\|\theta_1\|_1 \leq 1}} & 2\langle \mathbb{E}_n [(y - \langle \alpha, a \rangle)c'], \theta_1 \rangle - \mathbb{E}_n [\langle c', \theta_1 \rangle^2] + \mu' \mathbb{E}_n [\langle a, \alpha \rangle^2] \\
& + 2\langle \mathbb{E}_n [(\langle \alpha, a \rangle - \langle \beta, b \rangle)c], \theta_2 \rangle - \mathbb{E}_n [\langle c, \theta_2 \rangle^2] + \mu \mathbb{E}_n [\langle b, \beta \rangle^2]\end{split}\]
and the problem is equivalent to:
\[\underset{\rho_2 \geq 0, \|\rho_2\|_1 \leq V_2}{\min_{\rho_1 \geq 0, \|\rho_1\|_1 \leq V_1}} \underset{\omega_2 \geq 0, \|\omega_2\|_1 = 1}{\max_{\omega_1 \geq 0, \|\omega_1\|_1 = 1}} \ell(\{\rho_1, \rho_2\}, \{\omega_1, \omega_2\})\]
\[\begin{split}\begin{aligned}
\ell(\{\rho_1, \rho_2\}, \{\omega_1, \omega_2\}) := & 2 \omega_1^{\top} \mathbb{E}_n [u_1 y] - 2 \omega_1^{\top} \mathbb{E}_n \left[u_1 v_1^{\top}\right] \rho_1 - \omega_1^{\top} \mathbb{E}_n \left[u_1 u_1^{\top}\right] \omega_1 + \mu' \rho_1^\top \mathbb{E}_n [v_1 v_1^{\top}] \rho_1 \\
& + 2 \omega_2^{\top} \mathbb{E}_n [u_2 v_1^{\top}] \rho_1 - 2 \omega_2^{\top} \mathbb{E}_n \left[u_2 v_2^{\top}\right] \rho_2 - \omega_2^{\top} \mathbb{E}_n \left[u_2 u_2^{\top}\right] \omega_2 + \mu \rho_2^\top \mathbb{E}_n [v_2 v_2^{\top}] \rho_2
\end{aligned}\end{split}\]
and \(v_1 = (a, -a)\), \(v_2 = (b, -b)\), \(u_1 = (c', -c')\), \(u_2 = (c, -c)\).
FTRL iterates for Estimator 3 (Ridge)
Consider the iterates for \(t = 1, \ldots, T\):
\begin{aligned}
\tilde{\rho}_{1,t+1} &= \exp\left(-\frac{\eta}{V_1}\left\{\sum_{\tau = 1}^{t} \left(-2 \mathbb{E}_n [v_1 u_1^{\top}] \omega_{1,\tau} + 2 \mu' \mathbb{E}_n [v_1 v_1^\top] \tilde{\rho}_{1,\tau} + 2 \mathbb{E}_n [v_1 u_2^\top] \omega_{2,\tau}\right)\right. \right. \\
& \left. \left. -2 \mathbb{E}_n [v_1 u_1^{\top}] \omega_{1,t} + 2 \mu' \mathbb{E}_n [v_1 v_1^\top] \tilde{\rho}_{1,t} + 2 \mathbb{E}_n [v_1 u_2^\top] \omega_{2,t} \right\} - 1\right) \\
\rho_{1,t+1} &= \tilde{\rho}_{1,t+1} \min\left\{1, \frac{V_1}{\| \tilde{\rho}_{1,t+1} \|_1}\right\}, \\
\tilde{\rho}_{2,t+1} &= \exp\left(-\frac{\eta}{V_2}\left\{\sum_{\tau = 1}^{t} \left(-2 \mathbb{E}_n [v_2 u_2^\top] \omega_{2,\tau} + 2 \mu \mathbb{E}_n [v_2 v_2^\top] \tilde{\rho}_{2,\tau}\right)\right. \right. \\
& \left. \left. -2 \mathbb{E}_n [v_2 u_2^\top] \omega_{2,t} + 2 \mu \mathbb{E}_n [v_2 v_2^\top] \tilde{\rho}_{2,t} \right\} - 1\right) \\
\rho_{2,t+1} &= \tilde{\rho}_{2,t+1} \min\left\{1, \frac{V_2}{\| \tilde{\rho}_{2,t+1} \|_1}\right\},
\end{aligned}
\begin{aligned}
\tilde{\omega}_{1,t+1} &= \tilde{\omega}_{1,t} \exp\bigg(2\eta\left\{2 \mathbb{E}_n [u_1 y] - 2 \mathbb{E}_n [u_1 v_1^\top] \rho_{1,t} - 2 \mathbb{E}_n [u_1 u_1^{\top}] \tilde{\omega}_{1,t}\right\} \\
&\qquad - \eta\left\{2 \mathbb{E}_n [u_1 y] - 2 \mathbb{E}_n [u_1 v_1^\top] \rho_{1,t-1} - 2 \mathbb{E}_n [u_1 u_1^{\top}] \tilde{\omega}_{1,t-1}\right\}\bigg) \\
\omega_{1,t+1} &= \frac{\tilde{\omega}_{1,t+1}}{\|\tilde{\omega}_{1,t+1}\|_1} \\
\tilde{\omega}_{2,t+1} &= \tilde{\omega}_{2,t} \exp\bigg(2\eta\left\{2 \mathbb{E}_n [u_2 v_1^\top] \rho_{1,t} - 2 \mathbb{E}_n [u_2 v_2^\top] \rho_{2,t} - 2 \mathbb{E}_n [u_2 u_2^{\top}] \tilde{\omega}_{2,t}\right\} \\
&\qquad - \eta\left\{2 \mathbb{E}_n [u_2 v_1^\top] \rho_{1,t-1} - 2 \mathbb{E}_n [u_2 v_2^\top] \rho_{2,t-1} - 2 \mathbb{E}_n [u_2 u_2^{\top}] \tilde{\omega}_{2,t-1}\right\}\bigg) \\
\omega_{2,t+1} &= \frac{\tilde{\omega}_{2,t+1}}{\|\tilde{\omega}_{2,t+1}\|_1}
\end{aligned}
with \(\tilde{\rho}_{1,-1} = \tilde{\rho}_{1,0} = \tilde{\rho}_{2,-1} = \tilde{\rho}_{2,0} = \frac{1}{e}\) and \(\omega_{1,-1} = \omega_{1,0} = \omega_{2,-1} = \omega_{2,0} = \frac{1}{2p}\), and \(\eta = [16\max\left\{\left\|\mathbb{E}_n [v_1 u_1^\top]\right\|_\infty, \left\|\mathbb{E}_n [v_1 u_2^\top]\right\|_\infty, \left\|\mathbb{E}_n [v_2 u_2^\top]\right\|_\infty\right\}]^{-1}\).
Then,
\[\begin{split}\bar{\rho_1} = \frac{1}{T} \sum_{t=1}^{T} \rho_{1,t}, \quad \bar\alpha = \bar\rho_1^{+} - \bar\rho_1^{-} \\
\bar{\rho_2} = \frac{1}{T} \sum_{t=1}^{T} \rho_{2,t}, \quad \bar\beta = \bar\rho_2^{+} - \bar\rho_2^{-}\end{split}\]
are a \(O(T^{-1})\)-approximate solution for (3).
Proof
We will match symbols with Proposition 17. Let
\[\begin{split}\Theta = \{\rho_1 \;|\; \rho_1 \geq 0,\, \|\rho_1\|_1 \leq V_1\} \times \{\rho_2 \;|\; \rho_2 \geq 0,\, \|\rho_2\|_1 \leq V_2\} \\
W = \{\omega_1 \;|\; \omega_1 \geq 0, \|\omega_1\|_1 = 1\} \times \{\omega_2 \;|\; \omega_2 \geq 0, \|\omega_2\|_1 = 1\}\end{split}\]
be the convex feasibility sets. Note that \(\ell\) is convex in \((\rho_1, \rho_2)\) and concave in \((\omega_1, \omega_2)\). Equip the spaces \(\Theta\) and \(W\) with the direct sum \(1\)-norm:
\[\|(\rho_1, \rho_2)\|_1 = \|\rho_1\|_1 + \|\rho_2\|_1\]
with dual norm:
\[\|(\rho_1, \rho_2)\|_\infty = \max\{\|\rho_1\|_\infty, \|\rho_2\|_\infty\}\]
Now, the derivatives are given by:
\[\begin{split}\begin{aligned}
\nabla_{(\rho_1, \rho_2)} \ell(\{\rho_1, \rho_2\}, \{\omega_1, \omega_2\}) &= \begin{pmatrix}
-2 \mathbb{E}_n [v_1 u_1^{\top}] \omega_1 + 2 \mu' \mathbb{E}_n [v_1 v_1^\top] \rho_1 + 2 \mathbb{E}_n [v_1 u_2^\top] \omega_2 \\
-2 \mathbb{E}_n [v_2 u_2^{\top}] \omega_2 + 2 \mu \mathbb{E}_n [v_2 v_2^\top] \rho_2
\end{pmatrix}^\top \\
\nabla_{(\omega_1, \omega_2)} \ell(\{\rho_1, \rho_2\}, \{\omega_1, \omega_2\}) &= \begin{pmatrix}
2 \mathbb{E}_n [u_1 y] - 2 \mathbb{E}_n [u_1 v_1^{\top}] \rho_1 - 2 \mathbb{E}_n [u_1 u_1^{\top}] \omega_1 \\
2 \mathbb{E}_n [u_2 v_1^\top] \rho_1 - 2 \mathbb{E}_n [u_2 v_2^{\top}] \rho_2 - 2 \mathbb{E}_n [u_2 u_2^{\top}] \omega_2
\end{pmatrix}^\top
\end{aligned}\end{split}\]
The Lipschitzness property is satisfied with \(L = 2 \max\left\{\left\|2 \mathbb{E}_n [v_1 u_1^\top]\right\|_\infty, \left\|2 \mathbb{E}_n [v_1 u_2^\top]\right\|_\infty, \left\|2 \mathbb{E}_n [v_2 u_2^\top]\right\|_\infty\right\}\):
\[\begin{split}\begin{aligned}
&\left\|\nabla_{(\rho_1, \rho_2)} \ell(\{\rho_1, \rho_2\}, \{\omega_1, \omega_2\}) - \nabla_{(\rho_1, \rho_2)} \ell(\{\rho_1, \rho_2\}, \{\omega_1', \omega_2'\})\right\|_{\infty} \\
&= \left\|\left(-2 \mathbb{E}_n [v_1 u_1^\top](\omega_1 - \omega_1') + 2 \mathbb{E}_n [v_1 u_2^\top](\omega_2 - \omega_2'), -2 \mathbb{E}_n [v_2 u_2^\top](\omega_2 - \omega_2')\right)\right\|_{\infty} \\
&= \max\left\{\left\|-2 \mathbb{E}_n [v_1 u_1^\top](\omega_1 - \omega_1') + 2 \mathbb{E}_n [v_1 u_2^\top](\omega_2 - \omega_2')\right\|_\infty, \left\|-2 \mathbb{E}_n [v_2 u_2^\top](\omega_2 - \omega_2')\right\|_\infty\right\} \\
&\leq \max\left\{\left\|2 \mathbb{E}_n [v_1 u_1^\top]\right\|_\infty \left\|(\omega_1 - \omega_1')\right\|_{1} + \left\|2 \mathbb{E}_n [v_1 u_2^\top]\right\|_\infty \left\|(\omega_2 - \omega_2')\right\|_{1}, \left\|2 \mathbb{E}_n [v_2 u_2^\top]\right\|_\infty \left\|(\omega_2 - \omega_2')\right\|_{1}\right\} \\
&\leq 2 \max\left\{\left\|2 \mathbb{E}_n [v_1 u_1^\top]\right\|_\infty, \left\|2 \mathbb{E}_n [v_1 u_2^\top]\right\|_\infty, \left\|2 \mathbb{E}_n [v_2 u_2^\top]\right\|_\infty\right\} \left[\left\|(\omega_1 - \omega_1')\right\|_{1} + \left\|(\omega_2 - \omega_2')\right\|_{1}\right]
\end{aligned}\end{split}\]
and similarly for the Lipschitzness of \(\nabla_{(\omega_1, \omega_2)} \ell(\{\rho_1, \rho_2\}, \{\omega_1, \omega_2\})\).
Consider the following entropic regularizers:
\[\begin{split}\begin{aligned}
R_{min}(\rho_1, \rho_2) &= V_1 \sum_{i=1}^{2p} \rho_{1i} \log (\rho_{1i}) + V_2 \sum_{i=1}^{2p} \rho_{2i} \log (\rho_{2i}) \\
R_{max}(\omega_1, \omega_2) &= \sum_{i=1}^{2p} \omega_{1i} \log (\omega_{1i}) + \sum_{i=1}^{2p} \omega_{2i} \log (\omega_{2i})
\end{aligned}\end{split}\]
which are \(1\)-strongly convex in the spaces \(\Theta\), and \(W\) respectively.
To find the iterates it remains to solve:
\[\begin{split}\begin{aligned}
(\rho_{1,t+1}, \rho_{2,t+1}) &= \operatorname{argmin}_{\rho_1, \rho_2} \big(\rho_1, \rho_2\big)^\top \left(\sum_{\tau = 1}^{t} \left\{\nabla_{(\rho_1, \rho_2)} \ell(\{\rho_{1,\tau}, \rho_{2,\tau}\}, \{\omega_{1,\tau}, \omega_{2,\tau}\})\right\} \right. \\
&+\left. \nabla_{(\rho_1, \rho_2)} \ell(\{\rho_{1,t}, \rho_{2,t}\}, \{\omega_{1,t}, \omega_{2,t}\})\right) + R_{min}(\rho_1, \rho_2)
\end{aligned}\end{split}\]
Given the derivatives computed above, and that the problem is separable in \(\rho_1\), \(\rho_2\), the iterates are:
\[\begin{split}\begin{aligned}
\tilde{\rho}_{1,t+1} &= \exp\left(-\frac{\eta}{V_1}\left\{\sum_{\tau = 1}^{t} \left(-2 \mathbb{E}_n [v_1 u_1^{\top}] \omega_{1,\tau} + 2 \mu' \mathbb{E}_n [v_1 v_1^\top] \tilde{\rho}_{1,\tau} + 2 \mathbb{E}_n [v_1 u_2^\top] \omega_{2,\tau}\right)\right.\right. \\
&\left.\left.-2 \mathbb{E}_n [v_1 u_1^{\top}] \omega_{1,t} + 2 \mu' \mathbb{E}_n [v_1 v_1^\top] \tilde{\rho}_{1,t} + 2 \mathbb{E}_n [v_1 u_2^\top] \omega_{2,t}\right\} - 1\right) \\
\rho_{1,t+1} &= \tilde{\rho}_{1,t+1} \min\left\{1, \frac{V_1}{\| \tilde{\rho}_{1,t+1} \|_1}\right\}, \\
\tilde{\rho}_{2,t+1} &= \exp\left(-\frac{\eta}{V_2}\left\{\sum_{\tau = 1}^{t} \left(-2 \mathbb{E}_n [v_2 u_2^\top] \omega_{2,\tau} + 2 \mu \mathbb{E}_n [v_2 v_2^\top] \tilde{\rho}_{2,\tau}\right)\right.\right. \\
&\left.\left.-2 \mathbb{E}_n [v_2 u_2^\top] \omega_{2,t} + 2 \mu \mathbb{E}_n [v_2 v_2^\top] \tilde{\rho}_{2,t}\right\} - 1\right) \\
\rho_{2,t+1} &= \tilde{\rho}_{2,t+1} \min\left\{1, \frac{V_2}{\| \tilde{\rho}_{2,t+1} \|_1}\right\},
\end{aligned}\end{split}\]
\begin{aligned}
\tilde{\omega}_{1,t+1} &= \tilde{\omega}_{1,t} \exp\bigg(2\eta\left\{2 \mathbb{E}_n [u_1 y] - 2 \mathbb{E}_n [u_1 v_1^\top] \rho_{1,t} - 2 \mathbb{E}_n [u_1 u_1^{\top}] \tilde{\omega}_{1,t}\right\} \\
&\qquad - \eta\left\{2 \mathbb{E}_n [u_1 y] - 2 \mathbb{E}_n [u_1 v_1^\top] \rho_{1,t-1} - 2 \mathbb{E}_n [u_1 u_1^{\top}] \tilde{\omega}_{1,t-1}\right\}\bigg) \\
\omega_{1,t+1} &= \frac{\tilde{\omega}_{1,t+1}}{\|\tilde{\omega}_{1,t+1}\|_1} \\
\tilde{\omega}_{2,t+1} &= \tilde{\omega}_{2,t} \exp\bigg(2\eta\left\{2 \mathbb{E}_n [u_2 v_1^\top] \rho_{1,t} - 2 \mathbb{E}_n [u_2 v_2^\top] \rho_{2,t} - 2 \mathbb{E}_n [u_2 u_2^{\top}] \tilde{\omega}_{2,t}\right\} \\
&\qquad - \eta\left\{2 \mathbb{E}_n [u_2 v_1^\top] \rho_{1,t-1} - 2 \mathbb{E}_n [u_2 v_2^\top] \rho_{2,t-1} - 2 \mathbb{E}_n [u_2 u_2^{\top}] \tilde{\omega}_{2,t-1}\right\}\bigg) \\
\omega_{2,t+1} &= \frac{\tilde{\omega}_{2,t+1}}{\|\tilde{\omega}_{2,t+1}\|_1}
\end{aligned}
with \(\tilde{\rho}_{1,-1} = \tilde{\rho}_{1,0} = \tilde{\rho}_{2,-1} = \tilde{\rho}_{2,0} = \frac{1}{e}\) and \(\omega_{1,-1} = \omega_{1,0} = \omega_{2,-1} = \omega_{2,0} = \frac{1}{2p}\).
Putting everything together, by Proposition 17 the ensembles:
\[\bar{\rho_1} = \frac{1}{T} \sum_{t=1}^T \rho_{1,t}, \quad \bar{\rho_2} = \frac{1}{T} \sum_{t=1}^T \rho_{2,t}\]
are a \(O\left(\frac{1}{T}\right)\)-approximate solution for the minimax objective.
Estimator 3 - (\(\ell_1\)-norm)
The joint estimator is
(4)\[\begin{split}\begin{aligned}
\hat\alpha, \hat\beta &:= \underset{\|\beta\|_1 \leq V_2}{\operatorname{argmin}_{\|\alpha\|_1 \leq V_1}} \underset{\|\theta_2\|_1\leq 1}{\max_{\|\theta_1\|_1\leq 1}} \left(
2\langle\mathbb{E}_n[(y-\langle\alpha, a\rangle)c'],\theta_1\rangle -\mathbb{E}_n[\langle c',\theta_1\rangle^2]+\mu'\|\alpha\|_1 \right.\\
&\left. + 2\langle\mathbb{E}_n[(\langle\alpha, a\rangle-\langle\beta, b\rangle)c],\theta_2\rangle -\mathbb{E}_n[\langle c,\theta_2\rangle^2]+\mu\|\beta\|_1
\right)
\end{aligned}\end{split}\]
This minimax problem can be reformulated as
\[\underset{\rho_2 \geq 0,\|\rho_2\|_1 \leq V_2}{\min_{\rho_1 \geq 0,\|\rho_1\|_1 \leq V_1}} \underset{\omega_2\geq 0, \|\omega_2\|_1\leq 1}{\max_{\omega_1\geq 0, \|\omega_1\|_1= 1}} \ell(\{\rho_1,\rho_2\}, \{\omega_1,\omega_2\})\]
where
\[\begin{split}\ell(\{\rho_1,\rho_2\}, \{\omega_1,\omega_2\}) :=
2\omega_1^{\top} \mathbb{E}_n[u_1 y] - 2\omega_1^{\top} \mathbb{E}_n\left[u_1 v_1^{\top}\right] \rho_1 - \omega_1^{\top} \mathbb{E}_n\left[u_1 u_1^{\top}\right]\omega_1 + \mu' \sum_{i=1}^{2 p} \rho_{1i} \\
+ 2\omega_2^{\top} \mathbb{E}_n[u_2 v_1^{\top}] \rho_1 - 2\omega_2^{\top} \mathbb{E}_n\left[u_2 v_2^{\top}\right] \rho_2 - \omega_2^{\top} \mathbb{E}_n\left[u_2 u_2^{\top}\right]\omega_2 + \mu \sum_{i=1}^{2 p} \rho_{2i}\end{split}\]
and \(v_1 = (a, -a)\), \(v_2 = (b, -b)\), \(u_1 = (c',-c')\), \(u_2 = (c,-c)\).
We state without proof, the algorithm for an approximate solution:
FTRL iterates for Estimator 3 (\(\ell_1\)-norm)
Consider the iterates for \(t=1,\ldots, T\):
\begin{aligned}
\tilde{\rho}_{1,t+1} &= \exp\left(-\frac{\eta}{V_1}\left\{\sum_{\tau=1}^{t} \bigg(-2\mathbb{E}_n[v_1u_1^{\top}]\omega_{1,\tau} + 2\mathbb{E}_n[v_1u_2^\top]\omega_{2,\tau}\bigg)\right.\right. \\
&\left.\left.-2\mathbb{E}_n[v_1u_1^{\top}]\omega_{1,t} + 2\mathbb{E}_n[v_1u_2^\top]\omega_{2,t} + (t+1)\mu'\right\}-1\right) \\
\rho_{1,t+1} &= \tilde{\rho}_{1,t+1}\min\left\{1, \frac{V_1}{\| \tilde{\rho}_{1,t+1}\|_1}\right\},\\
\tilde{\rho}_{2,t+1} &= \exp\left(-\frac{\eta}{V_2}\left\{\sum_{\tau=1}^{t} \bigg(-2\mathbb{E}_n[v_2u_2^\top]\omega_{2,\tau}\bigg)-2\mathbb{E}_n[v_2u_2^\top]\omega_{2,t}+(t+1)\mu\right\}-1\right)\\
\rho_{2,t+1} &= \tilde{\rho}_{2,t+1}\min\left\{1, \frac{V_2}{\| \tilde{\rho}_{2,t+1}\|_1}\right\},
\end{aligned}
\begin{aligned}
\tilde{\omega}_{1,t+1} &= \tilde\omega_{1,t}\exp\bigg(2\eta\left\{2\mathbb{E}_n[u_1y]-2\mathbb{E}_n[u_1v_1^\top]\rho_{1,t} - 2\mathbb{E}_n[u_1u_1^{\top}]\tilde\omega_{1,t}\right\} \\
&\qquad -\eta\left\{2\mathbb{E}_n[u_1y]-2\mathbb{E}_n[u_1v_1^\top]\rho_{1,t-1} - 2\mathbb{E}_n[u_1u_1^{\top}]\tilde\omega_{1,t-1}\right\}\bigg)\\
\omega_{1,t+1} &= \frac{\tilde{\omega}_{1,t+1}}{\|\tilde{\omega}_{1,t+1}\|_1}\\
\tilde{\omega}_{2,t+1} &= \tilde\omega_{2,t}\exp\bigg(2\eta\left\{2\mathbb{E}_n[u_2v_1^\top]\rho_{1,t}-2\mathbb{E}_n[u_2v_2^\top]\rho_{2,t} - 2\mathbb{E}_n[u_2u_2^{\top}]\tilde\omega_{2,t}\right\} \\
&\qquad -\eta\left\{2\mathbb{E}_n[u_2v_1^\top]\rho_{1,t-1}-2\mathbb{E}_n[u_2v_2^\top]\rho_{2,t-1} - 2\mathbb{E}_n[u_2u_2^{\top}]\tilde\omega_{2,t-1}\right\}\bigg)\\
\omega_{2,t+1} &= \frac{\tilde{\omega}_{2,t+1}}{\|\tilde{\omega}_{2,t+1}\|_1}
\end{aligned}
with \(\tilde\rho_{1,-1} = \tilde\rho_{1,0} = \tilde\rho_{2,-1} = \tilde\rho_{2,0}= \frac{1}{e}\) and \(\omega_{1,-1}=\omega_{1,0} = \omega_{2,-1}=\omega_{2,0}= \frac{1}{2p}\), and \(\eta =[16\max\left\{\left\|\mathbb{E}_n[v_1u_1^\top]\right\|_\infty, \left\|\mathbb{E}_n[v_1u_2^\top]\right\|_\infty, \left\| \mathbb{E}_n[v_2u_2^\top]\right\|_\infty\right\}]^{-1}\).
Then,
\begin{aligned}
\bar{\rho_1} = \frac{1}{T}\sum_{t=1}^{T}\rho_{1,t}\,,\quad \bar\alpha = \bar\rho_1^{+}-\bar\rho_1^{-} \\
\bar{\rho_2} = \frac{1}{T}\sum_{t=1}^{T}\rho_{2,t}\,,\quad \bar\beta = \bar\rho_2^{+}-\bar\rho_2^{-}
\end{aligned}
are a \(O(T^{-1})\)-approximate solution for (4).