\usemodule  [bib]
\definealternativestyle [emph]     [\em]     []


\setupblank       [line]
\setupwhitespace  [small]
\setupindenting   [medium,yes]
\setupinterlinespace    [auto,medium]

\setuppapersize [letter]
                [letter]

\setuplayout[
                  width=middle,
                 height=middle,
              %location=middle,
                topspace=1.0in,
              bottomspace=.5in,
          bottomdistance=0in,
                   bottom=.5in,
               backspace=1.5in,
                cutspace=1.0in,
              leftmargin=1.0in,
             rightmargin=0.5in,
      leftmargindistance=0.1in,
     rightmargindistance=0.1in,
                  header=0.0in,
                 footer=0.25in,
           headerdistace=0.0in,
         footerdistance=0.25in,
                    marking=on,
                      grid=no,
    ]

\setupformulas
  [   indentnext=auto,
     spacebefore=none,
  ]

\usemodule  [mathsets]
\definemathset[EXP] [text={E}]
\definemathset[PR]  [text=\Pr,left=(,right=)]

\unexpanded\def\stackrel#1#2%
    {\mathrel{\mathop{#2}\limits^{#1}}}

%D Theorems Setup <<<

\definetextbackground
    [theoremframe]
    [           mp=background:random,
          location=paragraph,
     rulethickness=1pt,
             width=broad,
        leftoffset=1em,
       rightoffset=1em,
    	framecolor=black,
            before={\testpage[3]\blank[big]},
             after={\blank[big]}
    ]
\startuseMPgraphic{background:random}
   path p;
   for i = 1 upto nofmultipars :
    p = (multipars[i]
     topenlarged 10pt
     bottomenlarged 10pt) randomized 4pt ;
   % fill p withcolor lightgray ;
   draw p withcolor \MPvar{linecolor}
    withpen pencircle scaled \MPvar{linewidth};
   endfor;
\stopuseMPgraphic

\setupenumerations
  [        title=yes,
           stopper=.,
    location=serried,
         width=broad,
        style=normal,
   titledistance=1ex,
      distance=0.5em,
      indentnext=yes,
       indenting=yes,
       way=bychapter,
    before=\starttheoremframe,
    after=\stoptheoremframe,
  ]

\defineenumeration [problem]    [text=Problem]
\defineenumeration [definition] [text=Definition]
\defineenumeration [theorem]    [text=Theorem]
\defineenumeration [lemma]      [text=Lemma]
\defineenumeration [corollary]  [text=Corollary]

\defineenumeration
  [proof]
  [       text=Proof,
     headstyle=italic,
    titlestyle=italic,
      distance=1ex,
         style=normal,
        number=no,
     titleleft=,
    titleright=,
       stopper=.,
        before=\blank,
         after=\blank,
   closesymbol=\math{\square},
  ]

% >>>


\def\IE{i.e.}
\def\EG{e.g.}


\setupcolors[state=start]

\swapmacros{\phi}{\varphi}

\def\ALPHABET#1{{\cal  #1}}
\def\FSPACE  #1{{\cal  #1}}
\let\FIELD   \fraktur

\def\1#1{\presuper{1}{#1}}
\def\2#1{\presuper{2}{#1}}
\def\3#1{\presuper{3}{#1}}
\def\4#1{\presuper{4}{#1}}
\def\SOME#1{\presuper{i}{#1}}

\let\DEFINED=\colonequals
\let\BYDEFINITION=\equalscolon
\let\ESTIMATE=\hat
\def\COST{{\cal J}}

\def\presuper#1#2%
  {\mathop{}%
   \mathopen{\vphantom{#2}}^{#1}%
   \kern-\scriptspace%
   #2}
\abbreviation {RHS} {right hand side}

\def\DESIGN
  {\dosingleempty\doDESIGN}

\def\doDESIGN[#1]%
  {\doifelsenothing{#1}
    {\math{G^1, L^1, G^2, L^2}}
    {\math{G^{1,#1},L^{1,#1}, G^{2,#1},L^{2,#1}}}}

\def\STAGE#1{\1 #1$, $\2 #1$, $\3 #1$ and $\4 #1}

\starttext

Observe that by definition $\SOME {π}_t$ satisfies (S1). Part~1 of \in
Lemma[lem:info-states] shows that they satisfy (S2); part~2 shows that they
satisfy (S3). (S4) is satisfied by definition. Next we show how to obtain a
sequential decomposition using these information states. 

\subsubject[sec:equivalent] An equivalent optimization problem

Consider a centralized deterministic optimization problem with state space
alternating between $\STAGE {{Π}}$ and action space alternating between $\FSPACE
G^1_t$, $\FSPACE L^1_t$, $\FSPACE G^2_t$, and $\FSPACE L^2_t$. The system
dynamics are given by~\in[eq:transformations] and at each stage $t$ the
decision rules $g^1_t$, $l^1_t$, $g^2_t$, and $l^2_t$ are determined according
to \emph{meta|-|functions} or \emph{meta|-|rules} $\STAGE {{Δ}_t}$, where $\1
{Δ}_t$ is a function from $\1 {Π}$ to $\FSPACE G^1_t$, $\2 {Δ}_t$ is a function
from $\2 {Π}$ to $\FSPACE L^1_t$, $\3 Δ_t$ is a function from $\3 Π_t$ to
$\FSPACE G^2_t$, and  $\4 {Δ}_t$ is a function from $\4 {Π}$ to $\FSPACE L^2_t$.
Thus the system equations~\in[eq:transformations] can be written as
\startsubformulas[eq:system equations]
  \placeformula \startformula \startalign[m=2, distance=3em]
      \NC g^1_t        \EQ \1 {Δ}_t(\1 {π}_t),
      \NC \2 {π}_t     \EQ \1 Q(g^1_t) \1 {π}_t, \NR[+]
      \NC l^1_t        \EQ \2 {Δ}_t(\2 {π}_t),
      \NC \3 {π}_t     \EQ \2 Q(l^1_t) \2 {π}_t, \NR[+]
      \NC g^2_t        \EQ \3 {Δ}_t(\3 {π}_t),
      \NC \3 {π}_t     \EQ \3 Q(g^2_t) \1 {π}_t, \NR[+]
      \NC l^2_t        \EQ \4 {Δ}_t(\4 {π}_t),
      \NC \1 {π}_{t+1} \EQ \4 Q(l^2_t) \4 {π}_t. \NR[+]
  \stopalign \stopformula 
\stopsubformulas
The initial state $\1 {π}_1 = \PR{X_1, Y_1}$ is given (in terms of $P_{X_1}$ and
$P_{N^1_1}$).  An instantaneous cost $\ESTIMATE ρ_t(\4 {π}_t)$ is incurred at
each stage. The choice $(\1 {Δ}_1, \2 {Δ}_1, \3 {Δ}_1, \4 Δ_t,\allowbreak
\dots, \allowbreak \1 {Δ}_T, \2 {Δ}_T, \3 {Δ}_T, \4 Δ_T)$ is called a
\emph{meta|-|strategy} or a \emph{meta|-|design} and denoted by ${Δ}^T$. The
performance of a meta||strategy is given by the total cost incurred by that
meta|-|strategy, i.e.,
\placeformula[eq:cost meta-strategy] \startformula
  \COST_T({Δ}^T (\1 {π}_1) = ∑_{t=1}^T \ESTIMATE ρ_t(\4 {π}_t).
\stopformula

\blank[medium] \noindentation
Now consider the following optimization problem:

\startproblem[prob:deterministic]
  Consider the dynamic system~\in[eq:system equations] with known
  transformations $\STAGE {Q_t}$. The initial state $\1 {π}_1$ is given.
  Determine a meta||strategy ${Δ}^T$ to minimize the total cost given
  by~\in[eq:cost meta-strategy].
\stopproblem

Given any meta|-|strategy ${Δ}^T$, the time evolution of $\SOME {π}_t$ is
deterministic; $\SOME {π}_t$ and the corresponding $\SOME {φ}_t$ can be determined
from~\in[eq:system equations]. Thus, for a given initial states $\1 {π}_t$,
there is a communication strategy corresponding to any choice of
meta|-|strategy. Further, we can rewrite the performance criterion
of~\in[eq:cost] as 
\placeformula[eq:information cost] \startformula \startalign
  \NC \COST_T(\DESIGN) \EQ \EXP{∑_{t=1}^T ρ_t(X_t, U^1_t, U^2_t) | \DESIGN }
      \NR
  \NC \NC \stackrel{(a)}= ∑_{t=1}^T \EXP{ ρ_t(X_t, U^1_t, U^2_t) | \1
      {φ}^{t+1}} \NR
  \NC \NC \stackrel{(b)}= ∑_{t=1}^T \ESTIMATE ρ_t(\4 {π}_t) \NR
  \NC \NC \BYDEFINITION \COST_T({Δ}^T | \1 {π}_1) \NR[+]
\stopalign \stopformula
where $(a)$ follows from the sequential ordering of system variables and $(b)$
follows from \in Lemma[lem:info-states]. Thus, if ${Δ}^{*,T}$ is an optimal
meta|-|strategy for \in Problem[prob:deterministic] then the design
$(\DESIGN[*])$ corresponding to ${Δ}^{*,T}$ is an optimal design for \in
Problem[prob:two-agent]. Hence, \in Problem[prob:deterministic] is equivalent to
\in Problems[prob:two-agent]. Now we provide an algorithm to determine an
optimal meta|-|strategy for \in Problem[prob:deterministic].

\subsubject[sec:DP] The global optimization algorithm 

\in Problem[prob:deterministic] can be formulated as a classical centralized
optimization problem by considering the information state $\SOME {π}_t$ is the
\quotation{controlled state} at time $\SOME t$, the decision rule $\SOME {φ}_t$
($g^1_t$, $l^1_t$, $g^2_t$, or $l^2_t$ depending on $i$) as the
\quotation{control action} (or decision) at time $\SOME t$, and the
meta|-|function $\SOME {Δ}_t$ as the \quotation{control law} at time $\SOME t$.
Hence, an optimal meta|-|strategy for \in Problem[prob:deterministic] is given
by the optimal \quotation{control strategy} of the centralized optimization
problem and can be determined as follows:

\starttheorem[thm:DP] {Global optimization algorithm}
  An optimal meta|-|strategy ${Δ}^{*,T}$ for \in Problem[prob:deterministic]
  (and consequently an optimal communication strategy for \in
  Problem[prob:two-agent]) can be determined by the solution of the following
  nested optimality equations. For all $\SOME {π_t} \in \SOME {Π_t}$, $i = 1$,
  $2$, $3$, $4$, $t = 1,\dots,T$, define
  \startsubformulas[eq:DP]
  \placeformula \startformula \startalign 
    \NC \1 V_{T+1}(\1 {π}_T) \EQ 0, \NR[+]
  \intertext{and for $t=1,\dots, T$}
    \NC \1 V_t(\1 {π}_t) \EQ \inf_{g^1_t \in \FSPACE G^1_t} 
        \2 V_t\big(\1 Q_t(g^1_t) \1 {π}_t \big),              \NR[+]
    \NC \2 V_t(\2 {π}_t) \EQ \inf_{l^1_t \in \FSPACE L^1_t} 
        \3 V_t\big(\2 Q_t(l^1_t) \2 {π}_t \big),              \NR[+]
    \NC \3 V_t(\3 {π}_t) \EQ \inf_{g^2_t \in \FSPACE G^2_t} 
        \4 V_t\big(\3 Q_t(g^2_t) \3 {π}_t \big),              \NR[+]
    \NC \4 V_t(\4 {π}_t) \EQ \ESTIMATE ρ_t(\4 π_t) + 
        \inf_{l^2_t \in \FSPACE L^2_t} 
        \1 V_{t+1}\big(\4 Q_t(l^2_t) \4 {π}_t \big).              \NR[+]
  \stopalign \stopformula
  \stopsubformulas
  The functions $\SOME V_t$ are called \emph{value functions}; they represent
  the minimum expected future cost that the system in state $\SOME {π}$ will
  incur from time $\SOME t$ onwards. These value functions can be determined
  iteratively by moving backwards in time. The optimal performance of \in
  Problem[prob:deterministic] (and \in Problem[prob:two-agent]) is given by
  \placeformula[+] \startformula
    \COST^*_T = \1 V_1(\1 {π}_1).
  \stopformula
  For any $\SOME t$ and $\SOME {π}$, the $\arg\min$ (or $\arg\inf$) in the
  \RHS\ of $\SOME V_t(\SOME {π})$ equals to the optimal value of the
  meta|-|function $\SOME {Δ}_t (\SOME {π}_t)$. Thus, solving  for the value
  functions for all values of the information state also determines an optimal
  meta|-|strategy ${Δ}^{*,T}$ for \in Problem[prob:deterministic].
  Relations~\in[eq:system equations] can be used to determine optimal
  communication strategy for \in Problem[prob:two-agent]. 
\stoptheorem

\startproof
  This is a standard result for a deterministic optimization problem,
  see~\cite[extras={, Chapter~2}][KumarVaraiya:1986].
\stopproof

Observe that the four step $T$|-|stage sequential decomposition
of~\in[eq:DP] can be combined into a one|-|step $T$|-|stage sequential
decomposition
\placeformula[eq:one step] \startformula \startalign
  \NC 
  \1 V_t(\1 {π}_t) = 
    \smash{\inf_{ \docramped\scriptstyle{\startsubstack 
             \NC g^1_t \in \FSPACE G^1_t \NR 
             \NC l^1_t \in \FSPACE L^1_t \NR
             \NC g^2_t \in \FSPACE G^2_t \NR 
             \NC l^2_t \in \FSPACE L^2_t \NR
           \stopsubstack}}}
    \Bigg[ \NC
      \hat {ρ}_t \bigg( \big( 
              \3 Q_t(g^2_t) ∘ \2 Q_t(l^1_t) ∘ \1 Q_t(g^1_t)\bigg) \1 π_t
                      \bigg) 
            \NR
        \NC \NC \quad
          + \1 V_{t+1} \bigg( 
          \Big( \4 Q_t(l^2_t) ∘ \3 Q_t(g^2_t) ∘ \2 Q_t(l^1_t) ∘ \1 Q_t(g^1_t)
          \Big) \1 π_t 
          \bigg) 
         \Bigg]  \NR[+]
 \stopalign \stopformula
which is a deterministic dynamic program in function spade. We present a finer
decomposition in \in Theorem[thm:DP] which corresponds to the refinement of
time presented in \in Figure[fig:sequential-two-agent]; the decomposition given
by~\in[eq:DP] has a smaller search space than the decomposition given
in~\in[eq:one step].
        
\section[sec:homogeneous] The time homogeneous case

In many scenarios  the system is time|-|homogeneous, \IE, the spaces $\ALPHABET
X_t$, $\ALPHABET Y^1_t$, $\ALPHABET Y^2_t$, $\ALPHABET S^1_t$, $\ALPHABET
S^2_t$, $\ALPHABET U^1_t$, $\ALPHABET U^2_t$, $\ALPHABET W_t$, $\ALPHABET
N^1_t$, and $\ALPHABET N^2_t$, the noise statistics $P_{W_t}$, $P_{N^1_t}$ and
$P_{N^2_t}$, the plant function $f_t(⋅)$, the observation functions $h^1_t(⋅)$
and $h^2_t(⋅)$, and the cost function $ρ_t(⋅)$ do not depend on time $t$. If the
system of \in Problem[prob:two-agent] is time|-|homogeneous, some of the results
derived in the previous section can be simplified. The spaces $\FSPACE G^1_t$,
$\FSPACE L^1_t$, $\FSPACE G^2_t$, $\FSPACE L^2_t$, $\STAGE {Π_t}$ do not depend
on time; we can drop the subscript $t$ and simply denote them by $\FSPACE G^1$,
$\FSPACE L^1$, $\FSPACE G^2$, $\FSPACE L^2$, $\STAGE {Π}$, respectively.
Furthermore, the transformations $\STAGE {Q_t}$ and the function $\ESTIMATE ρ_t$
of \in Lemma[lem:info-states] do not depend on $t$; thus, we can denote them by
$\1 Q$, $\2 Q$, $\3 Q$, $\4 Q$ and $\ESTIMATE ρ$, respectively. Hence \in
Problem[prob:deterministic] reduces to a time|-|homogeneous problem|<|the state
space, the action space, the system update equations, and the instantaneous
distortion do not depend on $t$. Hence we can simplify \in Theorem[thm:DP] as
follows.

\startcorollary
  If the system of \in Problem[prob:two-agent] is time|-|homogeneous, the nested
  optimality equations~\in[eq:DP] can be written as
  \startsubformulas
  \placeformula \startformula \startalign 
    \NC \1 V_{T+1}(\1 {π}) \EQ 0, \NR[+]
  \intertext{and for $t=1,\dots, T$}
    \NC \1 V_t(\1 {π}) \EQ \inf_{g^1_t \in \FSPACE G^1} 
        \2 V_t\big(\1 Q(g^1_t) \1 {π} \big),              \NR[+]
    \NC \2 V_t(\2 {π}) \EQ \inf_{l^1_t \in \FSPACE L^1} 
        \3 V_t\big(\2 Q(l^1_t) \2 {π} \big),              \NR[+]
    \NC \3 V_t(\3 {π}) \EQ \inf_{g^2_t \in \FSPACE G^2} 
        \4 V_t\big(\3 Q(g^2_t) \3 {π} \big),              \NR[+]
    \NC \4 V_t(\4 {π}) \EQ \ESTIMATE ρ(\4 π) + 
        \inf_{l^2_t \in \FSPACE L^2} 
        \1 V_{t+1}\big(\4 Q(l^2_t) \4 {π} \big).              \NR[+]
  \stopalign \stopformula
  \stopsubformulas

\stopcorollary

Notice that in the above equations $\1 Q$, $\2 Q$, $\3 Q$, $\4 Q$, and $\hat {ρ}$ do not depend
on $t$. 


\stoptext