\resizebox{1.0\linewidth}{!}{
\begin{tikzpicture} %[node distance=1.5cm, font=\sffamily]

\tikzstyle{box} = [rectangle, minimum height=0.46cm, rounded corners=0.08cm, inner sep=2pt, text=black, font=\small]
\tikzstyle{q} = [box, minimum width=0.3cm, fill=red!30]
\tikzstyle{qn} = [q, minimum width=0.55cm]
\tikzstyle{a} = [box, minimum width=0.4cm, fill=orange!50!blue!20]
\tikzstyle{an} = [a, minimum width=0.55cm]
\tikzstyle{ctx} = [box, minimum width=0.8cm, fill=black!20]
\tikzstyle{kv} = [box, minimum width=0.65cm, fill=dg!60]
\tikzstyle{kvn} = [box, minimum width=0.77cm, fill=dg!60]
\tikzstyle{arrow} = [-stealth, line width=0.25 mm]
\tikzstyle{txt} = [font=\small]
\tikzstyle{label} = [font=\scriptsize]


% Previous eviction methods
\node[ctx] (ctx) {CTX};
\node[qn, right=0cm of ctx] (query1) {$\text{Q}_1$};
\node[kvn, right=0.8cm of query1] (kv) {$\text{KV}_1$};
\node[an, right=0.75cm of kv] (ans) {$\text{A}_1$};
% Arrows
\draw[arrow] (query1) -- node[label, above] (arr1) {+ evict} (kv);
\node[label, above=-0.25cm of arr1] {prefill};
\draw[arrow] (kv) -- node[label, above] (arr2) {decode} (ans);

\node[ctx, below=0.3cm of ctx] (ctx2) {CTX};
\node[qn, right=0cm of ctx2] (query2) {$\text{Q}_n$};
\node[kvn, right=0.8cm of query2] (kv2) {$\text{KV}_n$};
\node[an, right=0.75cm of kv2] (ans2) {$\text{A}_n$};
% Arrows
\draw[arrow] (query2) -- (kv2);
\draw[arrow] (kv2) -- (ans2);

\node[below=0.36cm of arr1, rotate=90, anchor=center, yshift=0.0cm] {$\cdots$};
\node[below=0.36cm of arr2, rotate=90, anchor=center, yshift=0.0cm] {$\cdots$};


% Label
\node[anchor=west] at (ctx.west |- 0,0.85) {\hspace{-0.3em}(a) Query-aware KV eviction};
\node[label,anchor=west] at (ctx.west |- 0,-1.38) {\hspace{-0.3em}\xmark\ Repetitive prefill.\hspace{0.2em} \cmark\ Good performance.};

% Separator
\draw[dashed] ($(ans.east |- 0,-1.3)+(0.52, 0)$) -- ($(ans.east |- 0,0.8)+(0.52, 0)$);


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Previous eviction methods
\node[ctx, right=1cm of ans] (ctx) {CTX};
\node[qn, right=0cm of ctx] (query1) {$\text{Q}_1$};
\node[kvn, right=0.8cm of query1] (kv) {$\text{KV}_1$};
\node[an, right=1.4cm of kv] (ans) {$\text{A}_1$};
% Arrows
\draw[arrow] (query1) -- node[label, above] (arr1) {+ evict} (kv);
\node[label, above=-0.25cm of arr1] {prefill};
\draw[arrow] (kv) -- node[label, above] {decode} (ans);

\node[qn, right=0.2cm of kv, yshift=-0.5cm] (query2) {$\text{Q}_2$};
\node[an] at (query2 -| ans) (ans2) {$\text{A}_2$};
\node[qn, below=0.15cm of query2] (query3) {$\text{Q}_n$};
\node[an] at (query3 -| ans) (ans3) {$\text{A}_n$};
% Arrows
\draw[arrow] (kv) -- (query2.west);
\draw[arrow] (query2) -- node[label, above] (arr2) {} (ans2);
\draw[arrow] (kv) -- (query3.west);
\draw[arrow] (query3) -- (ans3);

\node[below=0.3cm of arr2, rotate=90, anchor=center, yshift=0.0cm] {$\cdots$};

% Label
\node[anchor=west] at (ctx.west |- 0,0.85) {\hspace{-0.3em}(b) Reusing query-dependent cache};
\node[label, anchor=west] at (ctx.west |- 0,-1.1) {\hspace{-0.3em}\cmark\ One-time prefill.};
\node[label, anchor=west] at (ctx.west |- 0,-1.38) {\hspace{-0.3em}\xmark\ \hspace{0.2em}Low performance.};

% Separator
\draw[dashed] ($(ans.east |- 0,-1.3)+(0.52, 0)$) -- ($(ans.east |- 0,0.8)+(0.52, 0)$);


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Proposed approach
\node[ctx, right=2.9cm of kv, anchor=west, yshift=-0.35cm] (ctx) {CTX};
\node[kv, right=0.8cm of ctx] (kv) {KV};
\node[qn, right=0.2cm of kv.north east, yshift=0.2cm] (q1) {$\text{Q}_1$};
\node[qn, right=0.2cm of kv.south east, yshift=-0.2cm] (q2) {$\text{Q}_n$};
\node[an, right=0.75cm of q1] (a1) {$\text{A}_1$};
\node[an, right=0.75cm of q2] (a2) {$\text{A}_n$};

\node[right=1.1cm of kv, rotate=90, anchor=center, yshift=0.0cm] {$\cdots$};

% Arrows
\draw[arrow] (ctx) -- node[label, above] (arr1) {+ evict} (kv);
\node[label, above=-0.25cm of arr1] {prefill};
\draw[arrow] (kv) -- (q1.west);
\draw[arrow] (kv) -- (q2.west);
\draw[arrow] (q1) -- node[label, above=0.0cm] {decode} (a1);
\draw[arrow] (q2) -- (a2);


% Label
\node[anchor=west] at (ctx.west |- 0,0.85) {\hspace{-0.3em}(c) Proposed framework};
\node[label, anchor=west] at (ctx.west |- 0,-1.1) {\hspace{-0.3em}\cmark\ One-time prefill.};
\node[label, anchor=west] at (ctx.west |- 0,-1.38) {\hspace{-0.3em}\cmark\ Good performance.};


\end{tikzpicture}
}