
  <rss version="2.0" xmlns:atom="http://www.w3.org/2005/Atom">
    <channel>
      <title>KShivendu</title>
      <link>https://www.kshivendu.dev/blog</link>
      <description>Kumar Shivendu&#39;s blog</description>
      <language>en-us</language>
      <managingEditor>hi@kshivendu.dev (Kumar Shivendu)</managingEditor>
      <webMaster>hi@kshivendu.dev (Kumar Shivendu)</webMaster>
      <lastBuildDate>Tue, 12 May 2026 00:00:00 GMT</lastBuildDate>
      <atom:link href="https://www.kshivendu.dev/tags/compression/feed.xml" rel="self" type="application/rss+xml"/>
      
  <item>
    <guid>https://www.kshivendu.dev/blog/tokenization-compression</guid>
    <title>Tokenization is a Compression Codec Nobody Uses That Way</title>
    <link>https://www.kshivendu.dev/blog/tokenization-compression</link>
    <description>Vector databases compress text payloads with generic codecs like LZ4, but not with token-aware schemes. BPE tokenization + entropy coding gives you 5x lossless compression using infrastructure already in every ML stack.</description>
    <pubDate>Tue, 12 May 2026 00:00:00 GMT</pubDate>
    <author>hi@kshivendu.dev (Kumar Shivendu)</author>
    <category>compression</category><category>vector-databases</category><category>internals</category><category>nlp</category>
  </item>

    </channel>
  </rss>
