// scenes-13.jsx — "What it takes to run a model" + "Why prices differ" (cost physics)

function GpuCard({ x, y, fill, label, o = 1, over = false }) {
  const w = 132, h = 168, col = over ? COLORS.coral : COLORS.green;
  return (
    <div style={{ position:'absolute', left:x, top:y, width:w, height:h, opacity:o, transform:`translateY(${(1-o)*16}px)`,
      background:COLORS.bgPanel, border:`1.8px solid ${COLORS.inkFaint}`, borderRadius:12, padding:10, transition:'none' }}>
      <div style={{ fontFamily:FONTS.mono, fontSize:14, color:COLORS.inkDim, textAlign:'center', marginBottom:8 }}>{label}</div>
      <div style={{ position:'relative', width:'100%', height:h-58, background:'#0a0d16', borderRadius:7, overflow:'hidden', border:`1px solid ${COLORS.inkFaint}55` }}>
        <div style={{ position:'absolute', bottom:0, left:0, right:0, height:`${clamp(fill,0,1)*100}%`,
          background:`linear-gradient(180deg, ${col}, ${col}aa)`, transition:'none' }} />
        <div style={{ position:'absolute', inset:0, display:'flex', alignItems:'center', justifyContent:'center',
          fontFamily:FONTS.mono, fontSize:13, color:'#fff', textShadow:'0 1px 3px #000' }}>{Math.round(clamp(fill,0,1)*80)} GB</div>
      </div>
    </div>
  );
}

// ════════════════════════════════════════════════════════════════════════════
// SCENE — WHAT IT TAKES TO RUN A MODEL  (length 22s)
// ════════════════════════════════════════════════════════════════════════════
function ScenePhysicsRun() {
  const { localTime: lt } = useSprite();
  const setup = ramp(lt, 0.2, 1.0);

  // weight grid (left)
  const gx = 130, gy = 300, gc = 14, gr = 9, cell = 22, gap = 6;
  const fillGrid = ramp(lt, 1.6, 4.6);
  const wCount = Math.round(70 * ramp(lt, 1.8, 4.4));   // billions

  // gpu cards (center)
  const gpuPhase = lt > 5.0;
  const card1 = clamp(ramp(lt, 5.4, 7.0), 0, 1);        // → 80GB
  const card2 = clamp(ramp(lt, 6.6, 8.2), 0, 1) * 0.75; // → 60GB (140 total)

  // per-token compute
  const tokPhase = lt > 9.0;
  // kv cache callback
  const kvPhase = lt > 13.0;
  const costPhase = lt > 17.0;

  return (
    <>
      <Bg accent={COLORS.blue} />
      <Eyebrow lt={lt} a={0.4} b={22} n="17" label="What it takes to run a model" color={COLORS.blue} />

      {/* weights grid */}
      <div style={{ position:'absolute', left:gx, top:gy-44, opacity:setup, fontFamily:FONTS.mono, fontSize:17, letterSpacing:'0.1em', textTransform:'uppercase', color:COLORS.inkDim }}>the weights</div>
      <div style={{ position:'absolute', left:gx, top:gy, display:'grid', gridTemplateColumns:`repeat(${gc}, ${cell}px)`, gap, opacity:setup }}>
        {Array.from({length:gc*gr}).map((_,i)=>{
          const on = i < gc*gr*fillGrid;
          return <div key={i} style={{ width:cell, height:cell, borderRadius:4, background: on?COLORS.blue+'cc':COLORS.blue+'1f', border:`1px solid ${COLORS.blue}55` }} />;
        })}
      </div>
      <div style={{ position:'absolute', left:gx, top:gy+gr*(cell+gap)+14, opacity:fillGrid, width:gc*(cell+gap) }}>
        <div style={{ fontFamily:FONTS.math, fontSize:46, fontWeight:700, color:COLORS.blue, lineHeight:1 }}>{wCount}B</div>
        <div style={{ fontFamily:FONTS.sans, fontSize:18, color:COLORS.inkDim }}>weights — matrices, not magic</div>
      </div>

      {/* GPU cards */}
      {gpuPhase && (
        <>
          <div style={{ position:'absolute', left:760, top:gy-44, opacity:ramp(lt,5.0,5.6), fontFamily:FONTS.mono, fontSize:17, letterSpacing:'0.1em', textTransform:'uppercase', color:COLORS.inkDim }}>loaded into GPU memory</div>
          <GpuCard x={760} y={gy} fill={card1} label="H100 · 80GB" o={ramp(lt,5.2,5.9)} />
          <GpuCard x={912} y={gy} fill={card2} label="H100 · 80GB" o={ramp(lt,6.4,7.1)} />
          <div style={{ position:'absolute', left:760, top:gy+185, opacity:ramp(lt,7.0,7.8), width:284, textAlign:'center',
            fontFamily:FONTS.math, fontSize:24, color:COLORS.ink }}>
            70B × 2 bytes = <b style={{color:COLORS.blue}}>140 GB</b>
            <div style={{ fontFamily:FONTS.sans, fontSize:17, color:COLORS.inkDim, marginTop:4 }}>frontier models → a whole cluster</div>
          </div>
        </>
      )}

      {/* per-token compute / kv */}
      {tokPhase && (
        <div style={{ position:'absolute', left:1240, top:gy-10, opacity:pulse(lt,9.0,17.0,0.4), width:560 }}>
          <div style={{ fontFamily:FONTS.sans, fontSize:21, color:COLORS.ink, marginBottom:10 }}>Every <b style={{color:COLORS.yellow}}>token</b> is multiplied through <b>all</b> of them:</div>
          <div style={{ fontFamily:FONTS.math, fontSize:38, color:COLORS.yellow }}>≈ 140 GFLOP <span style={{fontSize:24, color:COLORS.inkDim}}>/ token</span></div>
          {kvPhase && (
            <div style={{ marginTop:26, opacity:ramp(lt,13.2,13.9), borderTop:`1px solid ${COLORS.inkFaint}55`, paddingTop:18 }}>
              <div style={{ fontFamily:FONTS.sans, fontSize:20, color:COLORS.purple }}>…and the <b>KV cache</b> for your context eats VRAM too —</div>
              <div style={{ fontFamily:FONTS.sans, fontSize:18, color:COLORS.inkDim, marginTop:4 }}>longer context literally needs more GPU.</div>
            </div>
          )}
        </div>
      )}

      {/* cost chain */}
      {costPhase && (
        <div style={{ position:'absolute', left:1240, top:gy+250, opacity:pulse(lt,17.2,22,0.4), width:580 }}>
          <div style={{ fontFamily:FONTS.math, fontSize:26, color:COLORS.ink }}>
            <span style={{color:COLORS.coral}}>$2–4 / GPU-hour</span> × GPU-seconds ÷ tokens
          </div>
          <div style={{ fontFamily:FONTS.sans, fontSize:19, color:COLORS.inkDim, marginTop:6 }}>= the price per token. You're renting GPU time, sliced thin.</div>
        </div>
      )}

      <Caption lt={lt} a={1.2} b={4.8}>
        A model is just <b style={{color:COLORS.blue}}>billions of numbers</b> — the trained weights.
      </Caption>
      <Caption lt={lt} a={5.0} b={8.8}>
        To run, every weight must sit in <b>GPU memory</b> — big models need many GPUs.
      </Caption>
      <Caption lt={lt} a={9.0} b={12.8} color={COLORS.yellow}>
        And each token is multiplied through <b>all</b> of them — that's the compute you rent.
      </Caption>
      <Caption lt={lt} a={13.2} b={17.0} color={COLORS.purple}>
        Context isn't free either — its KV cache takes VRAM that could serve other users.
      </Caption>
      <Caption lt={lt} a={17.2} b={22} color={COLORS.coral}>
        GPUs cost dollars an hour — that hourly rate, divided by tokens, is your price.
      </Caption>
    </>
  );
}

// ════════════════════════════════════════════════════════════════════════════
// SCENE — WHY PRICES DIFFER  (length 23s)
// ════════════════════════════════════════════════════════════════════════════
function ScenePhysicsWhy() {
  const { localTime: lt } = useSprite();
  const setup = ramp(lt, 0.2, 1.0);

  // input vs output lanes
  const inN = 10, outN = 6;
  const inLight = ramp(lt, 1.8, 2.6);                    // all at once
  const outAt = (i) => 3.4 + i*0.5;
  const nOut = Array.from({length:outN}).filter((_,i)=> lt > outAt(i)).length;

  const spectrumPhase = lt > 8.4;
  const moePhase = lt > 12.6;
  const recapPhase = lt > 17.6;

  const laneY1 = 250, laneY2 = 400, lx = 150, sq = 38, sgap = 10;

  return (
    <>
      <Bg accent={COLORS.coral} />
      <Eyebrow lt={lt} a={0.4} b={23} n="18" label="Why prices differ" color={COLORS.coral} />

      {/* INPUT lane */}
      <div style={{ position:'absolute', left:lx, top:laneY1-38, opacity:setup, fontFamily:FONTS.mono, fontSize:18, color:COLORS.blue, letterSpacing:'0.08em' }}>INPUT · prefill</div>
      <div style={{ position:'absolute', left:lx, top:laneY1, display:'flex', gap:sgap }}>
        {Array.from({length:inN}).map((_,i)=>(
          <div key={i} style={{ width:sq, height:sq, borderRadius:6, background:COLORS.blue+(inLight>0.5?'cc':'22'), border:`1.4px solid ${COLORS.blue}`, opacity:setup }} />
        ))}
      </div>
      {inLight>0.4 && (
        <div style={{ position:'absolute', left:lx, top:laneY1+sq+10, width:inN*(sq+sgap)-sgap, height:8, borderRadius:5,
          background:`linear-gradient(90deg, ${COLORS.green}, ${COLORS.green}aa)`, opacity:inLight, boxShadow:`0 0 14px ${COLORS.green}66` }} />
      )}
      <div style={{ position:'absolute', left:lx + inN*(sq+sgap)+20, top:laneY1+2, opacity:inLight, width:360,
        fontFamily:FONTS.sans, fontSize:20, color:COLORS.green }}>all tokens in <b>one parallel pass</b><br/><span style={{color:COLORS.inkDim, fontSize:17}}>GPU fully used → efficient → cheap</span></div>

      {/* OUTPUT lane */}
      <div style={{ position:'absolute', left:lx, top:laneY2-38, opacity:ramp(lt,3.0,3.6), fontFamily:FONTS.mono, fontSize:18, color:COLORS.yellow, letterSpacing:'0.08em' }}>OUTPUT · decode</div>
      <div style={{ position:'absolute', left:lx, top:laneY2, display:'flex', gap:sgap }}>
        {Array.from({length:outN}).map((_,i)=>{
          const on = i < nOut;
          const newest = i === nOut-1;
          return (
            <React.Fragment key={i}>
              <div style={{ width:sq, height:sq, borderRadius:6, background: on?COLORS.yellow+'cc':COLORS.yellow+'18',
                border:`1.4px solid ${COLORS.yellow}${on?'':'55'}`, boxShadow: newest?`0 0 12px ${COLORS.yellow}`:'none' }} />
            </React.Fragment>
          );
        })}
      </div>
      {nOut>0 && Array.from({length:nOut}).map((_,i)=>(
        <div key={'p'+i} style={{ position:'absolute', left:lx+i*(sq+sgap), top:laneY2+sq+10, width:sq, height:8, borderRadius:5,
          background:COLORS.coral, opacity:0.8 }} />
      ))}
      <div style={{ position:'absolute', left:lx + outN*(sq+sgap)+20, top:laneY2+2, opacity:ramp(lt,6.0,6.8), width:430,
        fontFamily:FONTS.sans, fontSize:20, color:COLORS.coral }}>one token at a time, <b>its own pass</b><br/><span style={{color:COLORS.inkDim, fontSize:17}}>GPU underused → ~5× the cost per token</span></div>

      {/* price spectrum */}
      {spectrumPhase && (() => {
        const o = pulse(lt, 8.6, 23, 0.4);
        const sx = 200, sw = 1520, sy = 660;
        const marks = [
          { p:0.08, t:'8B dense', s:'1 GPU · cheap', c:COLORS.green },
          { p:0.46, t:'MoE 200B / ~20B active', s:'runs lean for its size', c:COLORS.yellow, moe:true },
          { p:0.92, t:'frontier dense', s:'a cluster · costly', c:COLORS.coral },
        ];
        return (
          <div style={{ opacity:o }}>
            <div style={{ position:'absolute', left:sx, top:sy-44, fontFamily:FONTS.sans, fontSize:22, fontWeight:600, color:COLORS.ink }}>What drives the spread: <span style={{color:COLORS.inkDim, fontWeight:400, fontSize:19}}>parameters you must load &amp; multiply</span></div>
            <div style={{ position:'absolute', left:sx, top:sy, width:sw, height:12, borderRadius:8,
              background:`linear-gradient(90deg, ${COLORS.green}, ${COLORS.yellow}, ${COLORS.coral})` }} />
            <div style={{ position:'absolute', left:sx, top:sy+20, width:sw, display:'flex', justifyContent:'space-between', fontFamily:FONTS.mono, fontSize:15, color:COLORS.inkDim }}>
              <span>cheaper →</span><span>→ pricier</span>
            </div>
            {marks.map((m,i)=>{
              const mo = (m.moe ? ramp(lt, 12.8, 13.6) : ramp(lt, 9.0+i*0.4, 9.7+i*0.4));
              if (mo<=0.01) return null;
              return (
                <div key={i} style={{ position:'absolute', left:sx + m.p*sw, top:sy-18, transform:'translateX(-50%)', opacity:mo, textAlign:'center', width:260 }}>
                  <div style={{ width:20, height:20, borderRadius:'50%', background:m.c, margin:'0 auto', boxShadow:`0 0 14px ${m.c}`, border:'2px solid #fff3' }} />
                  <div style={{ fontFamily:FONTS.sans, fontSize:18, fontWeight:600, color:m.c, marginTop:48 }}>{m.t}</div>
                  <div style={{ fontFamily:FONTS.sans, fontSize:15, color:COLORS.inkDim }}>{m.s}</div>
                </div>
              );
            })}
          </div>
        );
      })()}

      {/* recap drivers */}
      {recapPhase && (
        <div style={{ position:'absolute', left:'50%', top:792, transform:'translate(-50%,0)', opacity:pulse(lt,17.8,23,0.4), display:'flex', gap:14 }}>
          {[['active parameters',COLORS.blue],['input vs output',COLORS.yellow],['GPU $/hour',COLORS.coral],['batching & demand',COLORS.green]].map(([t,c],i)=>(
            <span key={i} style={{ fontFamily:FONTS.mono, fontSize:17, color:c, padding:'7px 15px', borderRadius:999, background:c+'1f', border:`1px solid ${c}55`, whiteSpace:'nowrap' }}>{t}</span>
          ))}
        </div>
      )}

      <Caption lt={lt} a={1.6} b={5.8}>
        <b style={{color:COLORS.blue}}>Input</b> is processed all at once — the GPU runs flat-out, so it's cheap.
      </Caption>
      <Caption lt={lt} a={6.0} b={8.4} color={COLORS.coral}>
        <b>Output</b> comes one token at a time — the GPU idles between them, so it costs ~5× more.
      </Caption>
      <Caption lt={lt} a={8.6} b={12.4}>
        Across models, price tracks the <b>parameters</b> you must hold in memory and multiply.
      </Caption>
      <Caption lt={lt} a={12.8} b={17.4} color={COLORS.yellow}>
        <b>Mixture-of-Experts</b> fires only a few experts per token — big brain, small bill.
      </Caption>
      <Caption lt={lt} a={17.8} b={23}>
        So the drivers: <b>active params, input vs output, GPU rate, and how well it batches</b>.
      </Caption>
    </>
  );
}

Object.assign(window, { ScenePhysicsRun, ScenePhysicsWhy, GpuCard });
