%%% You have to change the name of the current file using your name.
%%% So if you are J. Lember, please name it as lember.tex !
%%% If you want to compile this file, please compile the file named "compile_this.tex".
%% Title of the presentation
\title{On the variance and uniqueness of longest common subsequence}
%% Authors, their affiliation and email
\authors{J\"uri Lember\inst{1} and Heinrich Matzinger\inst{2}}
\university{\inst{1} University of Tartu, Estonia, juri.lember@ut.ee\\
\inst{2} University of Bielefeld, Germany, matzing@mathematik.uni-bielefeld.de }
%% Authors' family names for the abstract index
\index{Lember}
\index{Matzinger}
\smallskip
%% keywords
\noindent {\bf Keywords}: Chvatal-Sankoff conjecture, longest common subsequence, optimal alignment, variance bound.
\bigskip
%% Abstract of the presentation
%%Please insert the abstract of your talk avoiding \newcommand definitions and figures.
%%Equations should be presented using \begin{equation*} and \end{equation*}.
%%Abstract can include references with \cite{key}.
%%Maximum length of your abstract is one page!
We consider the case, where $X$ and $Y$ are the first $n$ elements of
ergodic processes $X_1,X_2,\ldots$ and $Y_1,Y_2,\ldots$,
respectively. Then $L_n$ is a random variable that increases with
$n$, hence the ratio $L_n/n$ -- a measure of relatedness -- is of
interest. In order to make statistical inferences about the
relatedness of $X$ and $Y$, it is crucial to understand the
asymptotics of $L_n/n$ in the case when $X$ and $Y$ are independent.
Using the subaddivity, it is easy to see that in this case
\begin{equation*}
\frac{L_n}{n}\to \gamma
\end{equation*}
where $\gamma$, sometimes called Chvetal-Sankoff constant, is unknown even for the simplest case
where $X$ and $Y$ are i.i.d.~Bernoulli with parameter 0.5.
Another problem is the asymptotic behavior of the variance ${\rm
Var}(L_n)$. In their pioneering paper \cite{chvatal1975}, Chvatal and Sankoff
conjectured: if $X$ and $Y$ are iid Bernoulli,
then ${\rm Var}(L_n)=o(n^{\frac{2}{3}})$.
As shown in \cite{lkk2011}, such a lower bound is useful when
proving asymptotic results for segmentation. For a general overview about hidden Markov models, we refer to \cite{cappe2005}.
%% References of the abstract
%% If you have references, include them as follows.
%% Do not send your .bib file!
%% If you don't have any references please delete the next lines.
\begin{thebibliography}{1}
% journal article
\bibitem{chvatal1975}
Chvatal, V., Sankoff, D. (1975). Longest common subsequences of two random
sequences. \textit{Journal of Applied Probability} \textbf{14}, 306--315.
% book
\bibitem{cappe2005}
Capp\'{e}, O., Moulines, E., Ryd\'{e}n, T. (2005). \textit{Inference in Hidden Markov Models}. Springer, New York.
% book article
\bibitem{lkk2011}
Lember, J., Kuljus, K., Koloydenko, A. (2011). Theory of segmentation. In: Dymarski, P. (ed), \textit{Hidden Markov Models, Theory and Applications}. InTech, 51--84.
\end{thebibliography}