<!DOCTYPE html><html lang="en"><head><meta http-equiv="Content-Type" content="text/html charset=UTF-8"><meta charset="UTF-8"><meta name="viewport" content="width=device-width"><meta name="x-apple-disable-message-reformatting"><title>TLDR Data</title><meta name="color-scheme" content="light dark"><meta name="supported-color-schemes" content="light dark"><style type="text/css">
:root {
color-scheme: light dark; supported-color-schemes: light dark;
}
*,
*:after,
*:before {
-webkit-box-sizing: border-box; -moz-box-sizing: border-box; box-sizing: border-box;
}
* {
-ms-text-size-adjust: 100%; -webkit-text-size-adjust: 100%;
}
html,
body,
.document {
width: 100% !important; height: 100% !important; margin: 0; padding: 0;
}
body {
-webkit-font-smoothing: antialiased; -moz-osx-font-smoothing: grayscale; text-rendering: optimizeLegibility;
}
div[style*="margin: 16px 0"] {
margin: 0 !important;
}
table,
td {
mso-table-lspace: 0pt; mso-table-rspace: 0pt;
}
table {
border-spacing: 0; border-collapse: collapse; table-layout: fixed; margin: 0 auto;
}
img {
-ms-interpolation-mode: bicubic; max-width: 100%; border: 0;
}
*[x-apple-data-detectors] {
color: inherit !important; text-decoration: none !important;
}
.x-gmail-data-detectors,
.x-gmail-data-detectors *,
.aBn {
border-bottom: 0 !important; cursor: default !important;
}
.btn {
-webkit-transition: all 200ms ease; transition: all 200ms ease;
}
.btn:hover {
background-color: #f67575; border-color: #f67575;
}
* {
font-family: Arial, Helvetica, sans-serif; font-size: 18px;
}
@media screen and (max-width: 600px) {
.container {
width: 100%; margin: auto;
}
.stack {
display: block!important; width: 100%!important; max-width: 100%!important;
}
.btn {
display: block; width: 100%; text-align: center;
}
}
body,
p,
td,
tr,
.body,
table,
h1,
h2,
h3,
h4,
h5,
h6,
div,
span {
background-color: #FEFEFE !important; color: #010101 !important;
}
@media (prefers-color-scheme: dark) {
body,
p,
td,
tr,
.body,
table,
h1,
h2,
h3,
h4,
h5,
h6,
div,
span {
background-color: #27292D !important; color: #FEFEFE !important;
}
}
a {
color: inherit !important; text-decoration: underline !important;
}
</style><!--[if mso | ie]>
<style type="text/css">
a {
background-color: #FEFEFE !important; color: #010101 !important;
}
@media (prefers-color-scheme: dark) {
a {
background-color: #27292D !important; color: #FEFEFE !important;
}
}
</style>
<![endif]--></head><body class="">
<div style="display: none; max-height: 0px; overflow: hidden;">While core data systems remain stable, a Cambrian explosion of supporting tools is reshaping how teams build on top of data β β β β β β β β β β β β β β β β β β β β β β β β β β β β β β β β β β β β β β β β β β β β β β β β β β β β </div>
<div style="display: none; max-height: 0px; overflow: hidden;">
<br>
</div>
<table align="center" class="document"><tbody><tr><td valign="top">
<table align="center" border="0" cellpadding="0" cellspacing="0" class="container" width="600"><tbody><tr class="inner-body"><td>
<table align="center" border="0" cellpadding="0" cellspacing="0" width="100%"><tbody><tr class="header"><td bgcolor="" class="container">
<table width="100%"><tbody><tr><td class="container">
<table align="center" bgcolor="" border="0" cellpadding="0" cellspacing="0" style="margin-top: 0px;" width="100%"><tbody><tr><td style="padding: 0px;">
<table align="center" bgcolor="" border="0" cellpadding="0" cellspacing="0" width="100%"><tbody><tr><td class="container" style="padding: 15px 15px;">
<div style="text-align: center;">
<span style="margin-right: 0px;"><a href="https://tracking.tldrnewsletter.com/CL0/https:%2F%2Ftldr.tech%2Fdata%3Futm_source=tldrdata/1/0100019a012032e8-b6060bba-92a6-4062-818e-dd02760f2bed-000000/oFDusK0ymWaAihAgfu1GM4XLXl070Kt9oScqgQZN_VA=427" rel="noopener noreferrer" target="_blank"><span>Sign Up</span></a>
|<span style="margin-right: 2px; margin-left: 2px;"><a href="https://tracking.tldrnewsletter.com/CL0/https:%2F%2Fadvertise.tldr.tech%3Futm_source=tldrdata%26utm_medium=newsletter%26utm_campaign=advertisetopnav/1/0100019a012032e8-b6060bba-92a6-4062-818e-dd02760f2bed-000000/o7hf6wVbdPEJ8SlVZJRkn_Kut0gobLGwvEzq6FJiHnk=427" rel="noopener noreferrer" target="_blank"><span>Advertise</span></a></span>|<span style="margin-left: 2px;"><a href="https://tracking.tldrnewsletter.com/CL0/https:%2F%2Fa.tldrnewsletter.com%2Fweb-version%3Fep=1%26lc=1670a604-84b7-11f0-bcf5-55fc1d40139c%26p=b0b2d84e-ad99-11f0-ad8f-81926d27deee%26pt=campaign%26t=1760955478%26s=0f9c1c3306d529a91efe8c46e356d47048236fdfd3389d47c6da305dd7db960b/1/0100019a012032e8-b6060bba-92a6-4062-818e-dd02760f2bed-000000/RLqgvzUx6kPN232ucgrZzApavmaEW9wC7Cl2y35Q8sg=427"><span>View Online</span></a></span>
<br>
</span></div>
</td></tr></tbody></table>
<table align="center" bgcolor="" border="0" cellpadding="0" cellspacing="0" width="100%"><tbody><tr><td class="container" style="text-align: center;"><span data-darkreader-inline-color="" style="--darkreader-inline-color:#3db3ff; color: rgb(51, 175, 255) !important; font-size: 30px;">T</span><span style="font-size: 30px;"><span data-darkreader-inline-color="" style="color: rgb(232, 192, 96) !important; --darkreader-inline-color:#e8c163; font-size:30px;">L</span><span data-darkreader-inline-color="" style="color: rgb(101, 195, 173) !important; --darkreader-inline-color:#6ec7b2; font-size:30px;">D</span></span><span data-darkreader-inline-color="" style="--darkreader-inline-color:#dd6e6e; color: rgb(220, 107, 107) !important; font-size: 30px;">R</span>
<br>
</td></tr></tbody></table>
<br>
<table align="center" bgcolor="" border="0" cellpadding="0" cellspacing="0" width="100%"><tbody></tbody></table>
<table style="table-layout: fixed; width:100%;" width="100%"><tbody><tr><td style="padding:0;border-collapse:collapse;border-spacing:0;margin:0;">
<div style="text-align: center;">
<h1><strong>TLDR Data <span id="date">2025-10-20</span></strong></h1>
</div>
</td></tr></tbody></table>
<table style="table-layout: fixed; width:100%;" width="100%"><tbody></tbody></table>
</td></tr></tbody></table>
</td></tr></tbody></table>
</td></tr>
<tr bgcolor=""><td class="container">
<table align="center" bgcolor="" border="0" cellpadding="0" cellspacing="0" width="100%"><tbody><tr><td style="padding: 0px;">
<table align="center" bgcolor="" border="0" cellpadding="0" cellspacing="0" width="100%"><tbody><tr><td class="container" style="padding-top: 0px; padding-bottom: 0px;">
<div class="text-block">
<div style="text-align: center;"><span style="font-size: 36px;">π±</span></div></div>
</td></tr></tbody></table>
<table align="center" bgcolor="" border="0" cellpadding="0" cellspacing="0" width="100%"><tbody><tr><td class="container" style="padding-top: 0px; padding-bottom: 0px;">
<div class="text-block">
<div style="text-align: center;">
<h1><strong>Deep Dives</strong></h1>
</div>
</div>
</td></tr></tbody></table>
<table style="table-layout: fixed; width: 100%;" width="100%"><tbody><tr><td style="padding:0;border-collapse:collapse;border-spacing:0;margin:0;" valign="top">
<table align="center" border="0" cellpadding="0" cellspacing="0" width="100%"><tbody><tr><td class="container" style="padding: 15px 15px;">
<div class="text-block">
<span>
<a href="https://tracking.tldrnewsletter.com/CL0/https:%2F%2Fa16z.com%2Femerging-architectures-for-modern-data-infrastructure%2F%3Futm_source=tldrdata/1/0100019a012032e8-b6060bba-92a6-4062-818e-dd02760f2bed-000000/HItLABUwrdc7FVHgbOnhSpVbjIxU3x02DSNTf52WjTY=427">
<span>
<strong>Emerging Architectures for Modern Data Infrastructure (10 minute read)</strong>
</span>
</a>
<br>
<br>
<span style="font-family: "Helvetica Neue", Helvetica, Arial, Verdana, sans-serif;">
While core data systems (warehouses, ingest, transformation) remain stable, a Cambrian explosion of supporting tools (metric layers, observability, data apps) is reshaping how teams build on top of data. a16z introduces the data platform hypothesis: vendors are becoming platforms with common backends and APIs, letting βfrontendβ developers build apps without redoing plumbing. As logic and apps move into the data tier, it may shift the balance from ETL and BI toward warehouse-native application layers.
</span>
</span>
</div>
</td></tr></tbody></table>
<table align="center" border="0" cellpadding="0" cellspacing="0" width="100%"><tbody><tr><td class="container" style="padding: 15px 15px;">
<div class="text-block">
<span>
<a href="https://tracking.tldrnewsletter.com/CL0/https:%2F%2Fmedium.com%2Ffresha-data-engineering%2Fthe-good-the-bad-and-the-automq-5aa7a8748e71%3Futm_source=tldrdata/1/0100019a012032e8-b6060bba-92a6-4062-818e-dd02760f2bed-000000/17UUynKfq1WL9c2AUDO-F8_t0DGOSBaoltPF9ehnuuU=427">
<span>
<strong>The Good, The Bad, and The AutoMQ (7 minute read)</strong>
</span>
</a>
<br>
<br>
<span style="font-family: "Helvetica Neue", Helvetica, Arial, Verdana, sans-serif;">
Kafka is undergoing a major architectural shift toward diskless, shared-storage design driven by cloud-native architecture maturation, as indicated by the proposals led by Slack, Aiven, and AutoMQ. Fast Tiering (Slack), which offloads active logs to fast object stores, could optimize replication costs by cutting broker footprint and cross-AZ traffic, while Diskless Topics (Aiven) redesigns Kafka altogether with brokers as compute nodes and object storage as the source of truth. Unified Shared Storage (AutoMQ) proposes a pluggable storage engine, seamlessly supporting both disk-backed and object-store backends (already implemented). Together, these efforts mark an irreversible shift where streaming and storage converge, redefining what Kafka can mean.
</span>
</span>
</div>
</td></tr></tbody></table>
<table align="center" border="0" cellpadding="0" cellspacing="0" width="100%"><tbody><tr><td class="container" style="padding: 15px 15px;">
<div class="text-block">
<span>
<a href="https://tracking.tldrnewsletter.com/CL0/https:%2F%2Fmedium.com%2F@vincent_daniel%2Fdeletion-vectors-and-puffin-files-merge-in-the-new-v3-iceberg-format-565188036d0c%3Futm_source=tldrdata/1/0100019a012032e8-b6060bba-92a6-4062-818e-dd02760f2bed-000000/3FiW4fxLJ61YWMZ79O9nc2Z1sYc7PPyjCGXrRd2Of5Y=427">
<span>
<strong>Deletion Vectors and Puffin Files Merge in the New v3 Iceberg Format (3 minute read)</strong>
</span>
</a>
<br>
<br>
<span style="font-family: "Helvetica Neue", Helvetica, Arial, Verdana, sans-serif;">
Apache Iceberg v3 introduces deletion vectors, enabling efficient row-level deletes without file rewrites and offering significant CDC MERGE performance gains compared to v2. Benchmarks on Amazon EMR with Spark 3.5.5 show 2x (filtered read) to 4x (merge) performance improvements from Iceberg v2 over v3. Deletion vectors simultaneously optimize storage, S3 Access, and compute costs, as well as operational ones, by changing file compaction from a routine necessity to an occasional task.
</span>
</span>
</div>
</td></tr></tbody></table>
<table align="center" border="0" cellpadding="0" cellspacing="0" width="100%"><tbody><tr><td class="container" style="padding: 15px 15px;">
<div class="text-block">
<span>
<a href="https://tracking.tldrnewsletter.com/CL0/https:%2F%2Fnetflixtechblog.com%2Fhow-and-why-netflix-built-a-real-time-distributed-graph-part-1-ingesting-and-processing-data-80113e124acc%3Futm_source=tldrdata/1/0100019a012032e8-b6060bba-92a6-4062-818e-dd02760f2bed-000000/kkCgQvzsh1tHSfJZeoaX9wEIsnUnUtgpXmN0kcE5N5Y=427">
<span>
<strong>How and Why Netflix Built a Real-Time Distributed Graph: Part 1 β Ingesting and Processing Data Streams at Internet Scale (7 minute read)</strong>
</span>
</a>
<br>
<br>
<span style="font-family: "Helvetica Neue", Helvetica, Arial, Verdana, sans-serif;">
As Netflix's ecosystem expands, traditional data warehouses struggle with integrating cross-device, cross-vertical events in real-time. They built Real-Time Distributed Graph (RDG), a system designed to unify siloed member interaction data across streaming, ads, live events, and gaming. RDG uses graph structures for efficient relationship queries, pattern detection, and schema flexibility, enabling faster insights for personalization and operations.
</span>
</span>
</div>
</td></tr></tbody></table>
<table align="center" border="0" cellpadding="0" cellspacing="0" width="100%"><tbody><tr><td class="container" style="padding: 15px 15px;">
<div class="text-block">
<span>
<a href="https://tracking.tldrnewsletter.com/CL0/https:%2F%2Fneo4j.com%2Fblog%2Fgenai%2Fadvanced-rag-techniques%2F%3Futm_source=tldrdata/1/0100019a012032e8-b6060bba-92a6-4062-818e-dd02760f2bed-000000/D9wCFZXgqHiS58hfforMwbUIDnfaPl8cnE4AX3JxuFw=427">
<span>
<strong>Advanced RAG Techniques for High-Performance LLM Applications (8 minute read)</strong>
</span>
</a>
<br>
<br>
<span style="font-family: "Helvetica Neue", Helvetica, Arial, Verdana, sans-serif;">
Advanced Retrieval-Augmented Generation (RAG) architectures address LLM production challenges by integrating hybrid retrieval (semantic + lexical), metadata filtering, reranking, and structured chunking with knowledge graph-driven context. Implementing stepwise agentic planning, query expansion, and context distillation further improves precision, recall, and grounding. Knowledge graphs enable multi-hop reasoning, higher explainability, and traceable provenance, supporting scalable, trustworthy GenAI applications.
</span>
</span>
</div>
</td></tr></tbody></table>
</td></tr></tbody></table>
<table align="center" bgcolor="" border="0" cellpadding="0" cellspacing="0" width="100%"><tbody><tr><td class="container" style="padding-top: 0px; padding-bottom: 0px;">
<div class="text-block">
<div style="text-align: center;"><span style="font-size: 36px;">π</span></div>
</div>
</td></tr></tbody></table>
<table align="center" bgcolor="" border="0" cellpadding="0" cellspacing="0" width="100%"><tbody><tr><td class="container" style="padding-top: 0px; padding-bottom: 0px;">
<div class="text-block">
<div style="text-align: center;">
<h1><strong>Opinions & Advice</strong></h1>
</div>
</div>
</td></tr></tbody></table>
<table style="table-layout: fixed; width: 100%;" width="100%"><tbody><tr><td style="padding:0;border-collapse:collapse;border-spacing:0;margin:0;" valign="top">
<table align="center" border="0" cellpadding="0" cellspacing="0" width="100%"><tbody><tr><td class="container" style="padding: 15px 15px;">
<div class="text-block">
<span>
<a href="https://tracking.tldrnewsletter.com/CL0/https:%2F%2Fwww.ssp.sh%2Fblog%2Fagentic-data-modeling%2F%3Futm_source=tldrdata/1/0100019a012032e8-b6060bba-92a6-4062-818e-dd02760f2bed-000000/4ndDDv1j6c9HHCauKQ3N4YTyPmGkFzJ2-l0ILuR6PNI=427">
<span>
<strong>Data Modeling for the Agentic Era: Semantics, Speed, and Stewardship (27 minute read)</strong>
</span>
</a>
<br>
<br>
<span style="font-family: "Helvetica Neue", Helvetica, Arial, Verdana, sans-serif;">
AI agents can generate queries or dashboards in seconds, but without structure, they're just guessing. Agentic workflows should build on three essential pillars: Semantics, a curated metrics layer where agents query well-defined business entities and measures; Speed, sub-second analytics so humans can instantly verify AI outputs; and Stewardship - guardrails, human oversight, and versions that keep agents reliable and compliant. These principles create safe and effective systems where humans stay in the driver's seat and AI acts like a BI pair-programmer.
</span>
</span>
</div>
</td></tr></tbody></table>
<table align="center" border="0" cellpadding="0" cellspacing="0" width="100%"><tbody><tr><td class="container" style="padding: 15px 15px;">
<div class="text-block">
<span>
<a href="https://tracking.tldrnewsletter.com/CL0/https:%2F%2Fwww.crunchydata.com%2Fblog%2Fis-postgres-read-heavy-or-write-heavy-and-why-should-you-care%3Futm_source=tldrdata/1/0100019a012032e8-b6060bba-92a6-4062-818e-dd02760f2bed-000000/0h1yR1zip3h0UoHN0EZk5sY3Y57LHLX-WNt8d0l3SgU=427">
<span>
<strong>Is Postgres Read Heavy or Write Heavy? (And Why Should You Care) (10 minute read)</strong>
</span>
</a>
<br>
<br>
<span style="font-family: "Helvetica Neue", Helvetica, Arial, Verdana, sans-serif;">
Classifying Postgres workloads as read-heavy or write-heavy is crucial for targeted performance tuning, resource allocation, and scaling. Since reads are often cheaper (via caching) than writes (which involve WAL logging, index updates, and flushes), misclassification leads to inefficiencies, e.g., over-indexing hurts writes, while under-caching slows reads. Most databases lean read-heavy (10:1 ratio), but monitor for proactive optimization.
</span>
</span>
</div>
</td></tr></tbody></table>
<table align="center" border="0" cellpadding="0" cellspacing="0" width="100%"><tbody><tr><td class="container" style="padding: 15px 15px;">
<div class="text-block">
<span>
<a href="https://tracking.tldrnewsletter.com/CL0/https:%2F%2Fjack-vanlightly.com%2Fblog%2F2025%2F10%2F15%2Fwhy-im-not-a-fan-of-zero-copy-apache-kafka-apache-iceberg%3Futm_source=tldrdata/1/0100019a012032e8-b6060bba-92a6-4062-818e-dd02760f2bed-000000/aFKSZyLj5JqgpsxD1MIj5sJaLV-yGgEToQeyqiH2QTo=427">
<span>
<strong>Why I'm Not a Fan of Zero-copy Apache Kafka-Apache Iceberg (8 minute read)</strong>
</span>
</a>
<br>
<br>
<span style="font-family: "Helvetica Neue", Helvetica, Arial, Verdana, sans-serif;">
Zero-copy, Kafka brokers tier log segments directly to Iceberg tables for dual use by streaming consumers and analytics, eroding Kafka's strengths. It shifts costs from storage to compute, creates hybrid systems prone to conflicts, and favors logical unification (e.g., via schemas and reliable data movement) over physical sharing. Alternatively, materialization's duplication is already standard and preferable for decoupling.
</span>
</span>
</div>
</td></tr></tbody></table>
</td></tr></tbody></table>
<table align="center" bgcolor="" border="0" cellpadding="0" cellspacing="0" width="100%"><tbody><tr><td class="container" style="padding-top: 0px; padding-bottom: 0px;">
<div class="text-block">
<div style="text-align: center;"><span style="font-size: 36px;">π»</span></div>
</div>
</td></tr></tbody></table>
<table align="center" bgcolor="" border="0" cellpadding="0" cellspacing="0" width="100%"><tbody><tr><td class="container" style="padding-top: 0px; padding-bottom: 0px;">
<div class="text-block">
<div style="text-align: center;">
<h1><strong>Launches & Tools</strong></h1>
</div>
</div>
</td></tr></tbody></table>
<table style="table-layout: fixed; width: 100%;" width="100%"><tbody><tr><td style="padding:0;border-collapse:collapse;border-spacing:0;margin:0;" valign="top">
<table align="center" border="0" cellpadding="0" cellspacing="0" width="100%"><tbody><tr><td class="container" style="padding: 15px 15px;">
<div class="text-block">
<span>
<a href="https://tracking.tldrnewsletter.com/CL0/https:%2F%2Fcloud.google.com%2Fresources%2Fcontent%2Fdata-science-guide%3Fe=48754805%26hl=en%26utm_source=cloud_sfdc%26utm_medium=email%26utm_campaign=FY25-Q3-GLOBAL-ENT36892-website-dl-datasciencebook-99286%26utm_content=tldr%26utm_term=oct_20/1/0100019a012032e8-b6060bba-92a6-4062-818e-dd02760f2bed-000000/JcUbA1XMHeEXezLYF1BMgKLMklwcluoKY-0y1hA97Mk=427">
<span>
<strong>Guide to Data Science on Google Cloud: From Demand Forecasting to Agentic Flows (Sponsor)</strong>
</span>
</a>
<br>
<br>
<span style="font-family: "Helvetica Neue", Helvetica, Arial, Verdana, sans-serif;">
This guide helps you get started with data science workflows on Google Cloud and offers a range of <a href="https://tracking.tldrnewsletter.com/CL0/https:%2F%2Fcloud.google.com%2Fresources%2Fcontent%2Fdata-science-guide%3Fe=48754805%26hl=en%26utm_source=cloud_sfdc%26utm_medium=email%26utm_campaign=FY25-Q3-GLOBAL-ENT36892-website-dl-datasciencebook-99286%26utm_content=tldr%26utm_term=oct_20/2/0100019a012032e8-b6060bba-92a6-4062-818e-dd02760f2bed-000000/2DNv8v54AyNgOqTwNNdYVHigh4EHCUtnuCdnogfGNWU=427" rel="noopener noreferrer nofollow" target="_blank"><span>real-world use cases (with code)</span></a> for you to explore. See how you can tackle challenges, ranging from "traditional" ML problems such as segmentation to agentic AI automation. <a href="https://tracking.tldrnewsletter.com/CL0/https:%2F%2Fcloud.google.com%2Fresources%2Fcontent%2Fdata-science-guide%3Fe=48754805%26hl=en%26utm_source=cloud_sfdc%26utm_medium=email%26utm_campaign=FY25-Q3-GLOBAL-ENT36892-website-dl-datasciencebook-99286%26utm_content=tldr%26utm_term=oct_20/3/0100019a012032e8-b6060bba-92a6-4062-818e-dd02760f2bed-000000/fz1PCjxR9K2ad-3OhfXs7Y0n0kpyMl0x3QuwVXF_uvM=427" rel="noopener noreferrer nofollow" target="_blank"><span>Read the guide</span></a>
</span>
</span>
</div>
</td></tr></tbody></table>
<table align="center" border="0" cellpadding="0" cellspacing="0" width="100%"><tbody><tr><td class="container" style="padding: 15px 15px;">
<div class="text-block">
<span>
<a href="https://tracking.tldrnewsletter.com/CL0/https:%2F%2Fflink.apache.org%2F2025%2F10%2F15%2Fapache-flink-agents-0.1.0-release-announcement%2F%3Futm_source=tldrdata/1/0100019a012032e8-b6060bba-92a6-4062-818e-dd02760f2bed-000000/23yY8QAxLLi5j0I-a-rIxBwa65RqIApecUL-_xPRUtc=427">
<span>
<strong>Apache Flink Agents 0.1.0 Release Announcement (5 minute read)</strong>
</span>
</a>
<br>
<br>
<span style="font-family: "Helvetica Neue", Helvetica, Arial, Verdana, sans-serif;">
Apache Flink Agents 0.1.0 introduces a unified framework for integrating event-driven AI agents that incorporate LLMs, tools, memory, and orchestration directly into Flink's streaming runtime. It provides massive scalability, millisecond latency, exactly-once processing, and state management, with native support for Python and Java and seamless integration with Flink DataStream and Table APIs.
</span>
</span>
</div>
</td></tr></tbody></table>
<table align="center" border="0" cellpadding="0" cellspacing="0" width="100%"><tbody><tr><td class="container" style="padding: 15px 15px;">
<div class="text-block">
<span>
<a href="https://tracking.tldrnewsletter.com/CL0/https:%2F%2Fgithub.com%2Fopendatalab%2FMinerU%3Futm_source=tldrdata/1/0100019a012032e8-b6060bba-92a6-4062-818e-dd02760f2bed-000000/ANmdoAXodUJbJHy8MJt38c6riFuq6WRveFLdyLh25GU=427">
<span>
<strong>MinerU β Open-Source Document Parser for Agentic Workflows (GitHub Repo)</strong>
</span>
</a>
<br>
<br>
<span style="font-family: "Helvetica Neue", Helvetica, Arial, Verdana, sans-serif;">
MinerU transforms complex documents (e.g., PDFs) into LLM-ready Markdown/JSON, preserving layout, tables, formulas, and reading order. Built for agents and enterprise use, it supports OCR in 84 languages, GPU/CPU/NPU acceleration, and outputs richly structured representations interpreted by downstream AI systems. Trade-offs: sophisticated deployment and resource requirements; edge cases (handwritten docs, comics) still limited.
</span>
</span>
</div>
</td></tr></tbody></table>
</td></tr></tbody></table>
<table align="center" bgcolor="" border="0" cellpadding="0" cellspacing="0" width="100%"><tbody><tr><td class="container" style="padding-top: 0px; padding-bottom: 0px;">
<div class="text-block">
<div style="text-align: center;"><span style="font-size: 36px;">π</span></div></div>
</td></tr></tbody></table>
<table align="center" bgcolor="" border="0" cellpadding="0" cellspacing="0" width="100%"><tbody><tr><td class="container" style="padding-top: 0px; padding-bottom: 0px;">
<div class="text-block">
<div style="text-align: center;"><strong><h1>Miscellaneous</h1></strong></div>
</div>
</td></tr></tbody></table>
<table bgcolor="" style="table-layout: fixed; width: 100%;" width="100%"><tbody><tr><td style="padding:0;border-collapse:collapse;border-spacing:0;margin:0;" valign="top">
<table align="center" border="0" cellpadding="0" cellspacing="0" width="100%"><tbody><tr><td class="container" style="padding: 15px 15px;">
<div class="text-block">
<span>
<a href="https://tracking.tldrnewsletter.com/CL0/https:%2F%2Fengineering.fb.com%2F2025%2F10%2F17%2Fai-research%2Fscaling-llm-inference-innovations-tensor-parallelism-context-parallelism-expert-parallelism%2F%3Futm_source=tldrdata/1/0100019a012032e8-b6060bba-92a6-4062-818e-dd02760f2bed-000000/D6yR2hG1pO885JMF8dyVrw5hFfloo1Lrpx_mEQn27Bk=427">
<span>
<strong>Scaling LLM Inference: Innovations in Tensor Parallelism, Context Parallelism, and Expert Parallelism (3 minute read)</strong>
</span>
</a>
<br>
<br>
<span style="font-family: "Helvetica Neue", Helvetica, Arial, Verdana, sans-serif;">
Meta has introduced advanced parallelism techniques, including tensor, context, and expert parallelism, to optimize LLM inference at scale, reducing end-to-end latency by 10β50% for key workloads compared with baseline libraries. Innovations include direct data access (DDA) algorithms for communication efficiency, fast-attention kernels for context parallelism enabling under one-minute inference of 10 million tokens, and near-linear scaling on multi-node clusters.
</span>
</span>
</div>
</td></tr></tbody></table>
<table align="center" border="0" cellpadding="0" cellspacing="0" width="100%"><tbody><tr><td class="container" style="padding: 15px 15px;">
<div class="text-block">
<span>
<a href="https://tracking.tldrnewsletter.com/CL0/https:%2F%2Fwww.zeropartydata.es%2Fp%2Feffective-human-supervision-the-minority%3Futm_source=tldrdata/1/0100019a012032e8-b6060bba-92a6-4062-818e-dd02760f2bed-000000/g4_H7wJTVrfLvdIkHL5qMR0FDRMivevKUObaeA4wuQc=427">
<span>
<strong>Effective Human Supervision: the M-AI-nority Report (11 minute read)</strong>
</span>
</a>
<br>
<br>
<span style="font-family: "Helvetica Neue", Helvetica, Arial, Verdana, sans-serif;">
βHuman in the loopβ AI compliance is often superficial, with organizations implementing nominal oversight (such as perfunctory human rubber-stamping and inadequate training) while real automated decision risks and mandated legal safeguards (GDPR, AI Act) are ignored. Documented failures (e.g., Amazon hiring, Dutch SyRI, Wells Fargo) illustrate how unchecked automation perpetuates bias, fails meaningful review, and exposes organizations to multimillion penalties and reputational risk. Ensure rigorous, context-aware human supervisory processes and robust metrics targeting both system performance and legal compliance, as mere technical βoverridesβ or compliance theater are insufficient under regulatory scrutiny.
</span>
</span>
</div>
</td></tr></tbody></table>
</td></tr></tbody></table>
<table align="center" bgcolor="" border="0" cellpadding="0" cellspacing="0" width="100%"><tbody><tr><td class="container" style="padding-top: 0px; padding-bottom: 0px;">
<div class="text-block">
<div style="text-align: center;"><span style="font-size: 36px;">β‘</span></div></div>
</td></tr></tbody></table>
<table align="center" bgcolor="" border="0" cellpadding="0" cellspacing="0" width="100%"><tbody><tr><td class="container" style="padding-top: 0px; padding-bottom: 0px;">
<div class="text-block">
<div style="text-align: center;">
<h1><strong>Quick Links</strong></h1>
</div>
</div>
</td></tr></tbody></table>
<table bgcolor="" style="table-layout: fixed; width: 100%;" width="100%"><tbody><tr><td style="padding:0;border-collapse:collapse;border-spacing:0;margin:0;" valign="top">
<table align="center" border="0" cellpadding="0" cellspacing="0" width="100%"><tbody><tr><td class="container" style="padding: 15px 15px;">
<div class="text-block">
<span>
<a href="https://tracking.tldrnewsletter.com/CL0/https:%2F%2Fwww.linkedin.com%2Fposts%2Fdanthelion_the-dbt-fear-index-just-spiked-and-you-share-7384229937823707136-PzCO%3Futm_source=tldrdata/1/0100019a012032e8-b6060bba-92a6-4062-818e-dd02760f2bed-000000/o0R4dtC9vZTnpvjZMo8YVWsqjM5SbyLhIyjuJPfn5o0=427">
<span>
<strong>The βdbt Fear Indexβ Just Spiked, and You Should Be Worried (2 minute read)</strong>
</span>
</a>
<br>
<br>
<span style="font-family: "Helvetica Neue", Helvetica, Arial, Verdana, sans-serif;">
The surge in βdbt Fear Indexβ shows the community's concern that the Fivetran + dbt Labs merger could lead to higher costs and tighter vendor control.
</span>
</span>
</div>
</td></tr></tbody></table>
<table align="center" border="0" cellpadding="0" cellspacing="0" width="100%"><tbody><tr><td class="container" style="padding: 15px 15px;">
<div class="text-block">
<span>
<a href="https://tracking.tldrnewsletter.com/CL0/https:%2F%2Fwww.jumpingrivers.com%2Fblog%2Fwhats-new-py314%2F%3Futm_source=tldrdata/1/0100019a012032e8-b6060bba-92a6-4062-818e-dd02760f2bed-000000/B2GDMM2L0sliHn3PKGQCqVJbPxQF8y8flXOki3MBFfc=427">
<span>
<strong>What's new for Python in 2025? (12 minute read)</strong>
</span>
</a>
<br>
<br>
<span style="font-family: "Helvetica Neue", Helvetica, Arial, Verdana, sans-serif;">
Python 3.14 adds free-threading, allowing genuine multi-core execution for faster, CPU-bound data workflows.
</span>
</span>
</div>
</td></tr></tbody></table>
</td></tr></tbody></table>
<table align="center" bgcolor="" border="0" cellpadding="0" cellspacing="0" width="100%"><tbody><tr><td align="left" style="word-break: break-word; vertical-align: top; padding: 5px 10px;">
<p style="padding: 0; margin: 0; font-size: 22px; color: #000000; line-height: 1.6; font-weight: bold;">
Want to advertise in TLDR? π°
</p>
<div class="text-block" style="margin-top: 10px;">
If your company is interested in reaching an audience of data engineering professionals and decision makers, you may want to <a href="https://tracking.tldrnewsletter.com/CL0/https:%2F%2Fadvertise.tldr.tech%2F%3Futm_source=tldrdata%26utm_medium=newsletter%26utm_campaign=advertisecta/1/0100019a012032e8-b6060bba-92a6-4062-818e-dd02760f2bed-000000/Oryz0GPxCC24XyHGEmQhaGFy3chcsMXOzCM2GZBH9nc=427"><strong><span>advertise with us</span></strong></a>.
</div>
<br>
<!-- New "Want to work at TLDR?" section -->
<p style="padding: 0; margin: 0; font-size: 22px; color: #000000; line-height: 1.6; font-weight: bold;">
Want to work at TLDR? πΌ
</p>
<div class="text-block" style="margin-top: 10px;">
<a href="https://tracking.tldrnewsletter.com/CL0/https:%2F%2Fjobs.ashbyhq.com%2Ftldr.tech/1/0100019a012032e8-b6060bba-92a6-4062-818e-dd02760f2bed-000000/Xq6gZ0mq08XTgXxfGwpSQ77VvQyDkox3v7XSm5ALHeY=427" rel="noopener noreferrer" style="color: #0000EE; text-decoration: underline;" target="_blank"><strong>Apply here</strong></a> or send a friend's resume to <a href="mailto:jobs@tldr.tech" style="color: #0000EE; text-decoration: underline;">jobs@tldr.tech</a> and get $1k if we hire them!
</div>
<br>
<div class="text-block">
If you have any comments or feedback, just respond to this email!
<br>
<br> Thanks for reading,
<br>
<a href="https://tracking.tldrnewsletter.com/CL0/https:%2F%2Fwww.linkedin.com%2Fin%2Fjoelvanveluwen%2F/1/0100019a012032e8-b6060bba-92a6-4062-818e-dd02760f2bed-000000/98_wJYiiTZ16o_Q0vgWFF5A0Na7N8eLHXkI-UCZScrY=427"><span>Joel Van Veluwen</span></a>, <a href="https://tracking.tldrnewsletter.com/CL0/https:%2F%2Fwww.linkedin.com%2Fin%2Fjennytzurueyching%2F/1/0100019a012032e8-b6060bba-92a6-4062-818e-dd02760f2bed-000000/5-ltIL0a3bMtHJa5W9Sj0rhjfdLRmvjuhvkEY5V3yMU=427"><span>Tzu-Ruey Ching</span></a> & <a href="https://tracking.tldrnewsletter.com/CL0/https:%2F%2Fwww.linkedin.com%2Fin%2Fremi-turpaud%2F/1/0100019a012032e8-b6060bba-92a6-4062-818e-dd02760f2bed-000000/SzuO8RBQzb5Tt9lhnbr5Po-ANvF3Zc9RR8OlIujA0QE=427"><span>Remi Turpaud</span></a>
<br>
<br>
</div>
<br>
</td></tr></tbody></table>
<table align="center" bgcolor="" border="0" cellpadding="0" cellspacing="0" width="100%"><tbody><tr><td class="container" style="padding: 15px 15px;">
<div class="text-block" id="testing-id">
<a href="https://tracking.tldrnewsletter.com/CL0/https:%2F%2Ftldr.tech%2Fdata%2Fmanage%3Femail=silk.theater.56%2540fwdnl.com/1/0100019a012032e8-b6060bba-92a6-4062-818e-dd02760f2bed-000000/_DUqJx8o-QDB3mWxchuDTquQsLBKX6vnLM_U7gg2tS4=427">Manage your subscriptions</a> to our other newsletters on tech, startups, and programming. Or if TLDR Data isn't for you, please <a href="https://tracking.tldrnewsletter.com/CL0/https:%2F%2Fa.tldrnewsletter.com%2Funsubscribe%3Fep=1%26l=037ede50-92cc-11ee-b0f2-b761aa2217ad%26lc=1670a604-84b7-11f0-bcf5-55fc1d40139c%26p=b0b2d84e-ad99-11f0-ad8f-81926d27deee%26pt=campaign%26pv=4%26spa=1760954452%26t=1760955478%26s=30a8b1807f0f0a489df119ce2069bfb1edb7ba770c696a1feb188441d534d35b/1/0100019a012032e8-b6060bba-92a6-4062-818e-dd02760f2bed-000000/jCmQ7qeOcoA119CUx84HJk3seDYLngINqB0I1tYHukI=427">unsubscribe</a>.
<br>
</div>
</td></tr></tbody></table>
</td></tr></tbody></table>
</td></tr></tbody></table>
</td></tr></tbody></table>
</td></tr></tbody></table>
<img alt="" src="http://tracking.tldrnewsletter.com/CI0/0100019a012032e8-b6060bba-92a6-4062-818e-dd02760f2bed-000000/Ur07Upe2Yj6qSkAAQ3j475aHDIK9_Ts9z2ATaRnKLig=427" style="display: none; width: 1px; height: 1px;">
</body></html>