\begin{tikzpicture}

\tikzstyle{label} = [font=\tiny, anchor=south, black, yshift=-1pt]

\begin{groupplot}[group style={columns=4, horizontal sep=1.15cm, vertical sep=0.0cm},
ybar,
width=3.85cm,
height=3.7cm,
every axis plot/.append style={thick},
% Label
xlabel shift=-0.1cm,         
ylabel shift=-0.12cm,
xlabel near ticks,
ylabel near ticks,
label style={font=\footnotesize},
% Grid
ymajorgrids={true},
major grid style={dashed},
% tick
tick label style={font=\scriptsize},
tick pos=left,
]

\nextgroupplot[
            bar width=4pt,
            enlarge x limits={abs=0.25cm},
            xlabel={KV cache ratio},
            ylabel={Attention latency (ms)},
            % Tick and Range
            xtick={0.2, 0.4, 0.6, 0.8, 1.0},
            ytick={0.0, 0.1, 0.2, 0.3, 0.4},
            x tick label style={/pgf/number format/.cd, fixed, fixed zerofill, precision=1},
            y tick label style={/pgf/number format/.cd, fixed, fixed zerofill, precision=1},
            ymin=0.0,
            ymax=0.45,
            ]
\addplot table[x=ratio, y=decoding, col sep=comma]{data/profile.csv};
\node[label] at (axis cs:0.2,0.17) {0.17};
\node[label] at (axis cs:0.4,0.22) {0.22};
\node[label] at (axis cs:0.6,0.27) {0.27};
\node[label] at (axis cs:0.8,0.34) {0.34};
\node[label] at (axis cs:1.0,0.39) {0.39};

\nextgroupplot[
            bar width=4pt,
            enlarge x limits={abs=0.25cm},
            xlabel={KV cache ratio},
            ylabel={KV memory (GB)},
            % Tick and Range
            xtick={0.2, 0.4, 0.6, 0.8, 1.0},
            x tick label style={/pgf/number format/.cd, fixed, fixed zerofill, precision=1},
            y tick label style={/pgf/number format/.cd, fixed, fixed zerofill, precision=0},
            ymin=0.0,
            ymax=19,
            ]
\addplot[draw=red, fill=red, fill opacity=0.3] table[x=ratio, y=kvcache, col sep=comma]{data/profile.csv};
\node[label] at (axis cs:0.2,3.3) {3.3};
\node[label] at (axis cs:0.4,6.5) {6.5};
\node[label] at (axis cs:0.6,9.8) {9.8};
\node[label] at (axis cs:0.8,13.1) {13.1};
\node[label] at (axis cs:1.0,16.3) {16.3};



\nextgroupplot[
            xshift=0.36cm,
            bar width=4pt,
            enlarge x limits={abs=0.25cm},
            xlabel={Repeat chunk size},
            ylabel={Compute time (s)},
            ylabel shift=-0.2cm,
            % Tick and Range
            symbolic x coords={0.5, 1, 2, 4, 8},
            xtick={0.5, 1, 2, 4, 8},
            ytick={0, 20, 40, 60, 80, 100},
            xticklabels={0.5k, 1k, 2k, 4k, 8k},
            ymin=0.0,
            ymax=110,
            ]
\addplot table[x=chunk, y=time, col sep=comma]{data/profile.csv};
\node[label, yshift=-0.5pt] at (axis cs:0.5,95.8) {95.8};
\node[label] at (axis cs:1,75.4) {75.4};
\node[label] at (axis cs:2,65.9) {65.9};
\node[label] at (axis cs:4,71.9) {71.9};
\node[label] at (axis cs:8,87.2) {87.2};

\draw[line width=1.3pt, dashed] 
    (axis description cs:0,0.3) |- (axis cs:0.5,31.3);
\draw[line width=1.3pt, dashed] 
    (axis cs:0.5,31.3) -- (axis cs:8,31.3);
\draw[line width=1.3pt, dashed] 
    (axis cs:8,31.3) -| (axis description cs:1,0.3);

% \node[anchor=south east, font=\footnotesize] at (axis description cs:0.99,0.26) {prefill};


\nextgroupplot[
            bar width=4pt,
            enlarge x limits={abs=0.25cm},
            xlabel={Repeat chunk size},
            ylabel={Peak memory (GB)},
            % Tick and Range
            symbolic x coords={0.5, 1, 2, 4, 8},
            xtick={0.5, 1, 2, 4, 8},
            xticklabels={0.5k, 1k, 2k, 4k, 8k},
            ymin=0.0,
            ymax=44.8,
            ]
\addplot[draw=red, fill=red, fill opacity=0.3] table[x=chunk, y=memory, col sep=comma]{data/profile.csv};
\node[label] at (axis cs:0.5,30.5) {30.5};
\node[label, yshift=0.5pt] at (axis cs:1,30.7) {30.7};
\node[label, yshift=1pt] at (axis cs:2,31.1) {31.1};
\node[label, yshift=0.5pt] at (axis cs:4,32.5) {32.5};
\node[label] at (axis cs:8,38.8) {38.8};


\draw[line width=1.3pt, dashed] 
    (axis description cs:0,0.7) |- (axis cs:0.5,30.1);
\draw[line width=1.3pt, dashed] 
    (axis cs:0.5,30.1) -- (axis cs:8,30.1);
\draw[line width=1.3pt, dashed] 
    (axis cs:8,30.1) -| (axis description cs:1,0.7);

% \node[anchor=south east, font=\footnotesize] at (axis description cs:0.99,0.5) {prefill};
\end{groupplot}


% Titles
\node[align=center, anchor=south] (title1) at 
  ($(group c1r1.north)!0.5!(group c2r1.north)+(0,0.25cm)$) {(a) Inference efficiency (decoding)};
\node[align=center, anchor=south] (title2) at 
  ($(group c3r1.north)!0.5!(group c4r1.north)+(0,0.25cm)$) {(b) Compression overhead};

\end{tikzpicture}
