From d31d4d4f53ad7cb9ffe8592e320da8f90ee4de28 Mon Sep 17 00:00:00 2001 From: Ognjen Bostjancic Date: Tue, 30 Jun 2026 12:56:36 +0200 Subject: [PATCH 1/3] docs(ai): Document stripping inline media from agent messages AI agent messages can carry inline media (base64 images, audio, files) that may hold sensitive information. The SDK does not strip this by default, so document a beforeSendSpan example that replaces inline media with a placeholder while preserving the rest of the conversation. Refs TET-2569 --- docs/ai/monitoring/agents/privacy.mdx | 70 +++++++++++++++++++++++++++ 1 file changed, 70 insertions(+) diff --git a/docs/ai/monitoring/agents/privacy.mdx b/docs/ai/monitoring/agents/privacy.mdx index 532ea9b285450..313042a62fd51 100644 --- a/docs/ai/monitoring/agents/privacy.mdx +++ b/docs/ai/monitoring/agents/privacy.mdx @@ -41,3 +41,73 @@ However, the following AI agent span attributes are not protected by default: If you wish to enable Data Scrubbing for any of these fields you can add Organization or Project-level [Advanced Data Scrubbing](/product/data-management-settings/scrubbing/advanced-datascrubbing/) rule in **Security & Privacy** [settings](https://sentry.io/orgredirect/organizations/:orgslug/settings/organization/security-and-privacy) in the following format: `$span.data.''`. ![Additional Sensitive Fields](./img/advanced-scrubbing-rule.png) + +## Stripping Images and Media from Messages + +AI messages can carry inline media: base64-encoded images, audio, or files +embedded directly in the input and output. This media can hold sensitive +information, so you might want to keep it out of Sentry while preserving the +rest of the conversation — roles, text, and tool calls. + +The Sentry SDK does not strip media by default. You can remove it yourself in +[`beforeSendSpan`](/platforms/javascript/configuration/options/#beforeSendSpan), +which runs on every span before it's sent. The example below walks each AI +message and replaces any inline media with a placeholder, leaving the +surrounding structure intact. + +```javascript +const MEDIA_PLACEHOLDER = "[media stripped]"; + +// Span attributes whose values are JSON-encoded AI messages. +const AI_MESSAGE_ATTRIBUTES = [ + "gen_ai.input.messages", + "gen_ai.output.messages", + "gen_ai.system_instructions", +]; + +// We match on the value itself, not the field name, so we don't depend on +// provider-specific shapes (OpenAI / Anthropic / Google / Vercel AI SDK). +// Inline media is either a `data:` URI or a large raw base64 blob. Plain text +// and http(s) URLs contain characters outside the base64 set, so they're left +// untouched. +function isInlineMedia(value) { + if (typeof value !== "string") return false; + if (value.startsWith("data:")) return true; + return value.length > 1024 && /^[A-Za-z0-9+/=\s]+$/.test(value); +} + +// Recursively replace any inline media with a placeholder, preserving roles, +// text, and tool calls. +function stripInlineMedia(node) { + if (typeof node === "string") { + return isInlineMedia(node) ? MEDIA_PLACEHOLDER : node; + } + if (Array.isArray(node)) { + return node.map(stripInlineMedia); + } + if (node && typeof node === "object") { + return Object.fromEntries( + Object.entries(node).map(([key, value]) => [key, stripInlineMedia(value)]) + ); + } + return node; +} + +Sentry.init({ + // ... + beforeSendSpan(span) { + for (const key of AI_MESSAGE_ATTRIBUTES) { + const raw = span.data?.[key]; + if (typeof raw !== "string") continue; + + try { + span.data[key] = JSON.stringify(stripInlineMedia(JSON.parse(raw))); + } catch { + // Not JSON or an unexpected shape — leave the value as-is. + } + } + + return span; + }, +}); +``` From 02cc92d58798243c6cf361640de69a3286d8fc71 Mon Sep 17 00:00:00 2001 From: Ognjen Bostjancic Date: Tue, 30 Jun 2026 13:07:58 +0200 Subject: [PATCH 2/3] docs(ai): Frame media stripping example around customization Make isInlineMedia the clear customization point and drop the base64 implementation trivia from the comments, so readers focus on adapting the matching logic to their own provider. Refs TET-2569 --- docs/ai/monitoring/agents/privacy.mdx | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/docs/ai/monitoring/agents/privacy.mdx b/docs/ai/monitoring/agents/privacy.mdx index 313042a62fd51..19907a40af99e 100644 --- a/docs/ai/monitoring/agents/privacy.mdx +++ b/docs/ai/monitoring/agents/privacy.mdx @@ -53,7 +53,8 @@ The Sentry SDK does not strip media by default. You can remove it yourself in [`beforeSendSpan`](/platforms/javascript/configuration/options/#beforeSendSpan), which runs on every span before it's sent. The example below walks each AI message and replaces any inline media with a placeholder, leaving the -surrounding structure intact. +surrounding structure intact. Adjust `isInlineMedia` to match how your AI +provider embeds media. ```javascript const MEDIA_PLACEHOLDER = "[media stripped]"; @@ -65,19 +66,15 @@ const AI_MESSAGE_ATTRIBUTES = [ "gen_ai.system_instructions", ]; -// We match on the value itself, not the field name, so we don't depend on -// provider-specific shapes (OpenAI / Anthropic / Google / Vercel AI SDK). -// Inline media is either a `data:` URI or a large raw base64 blob. Plain text -// and http(s) URLs contain characters outside the base64 set, so they're left -// untouched. +// Decide what should be treated as media. This default catches `data:` URIs +// and large base64 blobs; replace it with whatever matches your data best. function isInlineMedia(value) { if (typeof value !== "string") return false; if (value.startsWith("data:")) return true; return value.length > 1024 && /^[A-Za-z0-9+/=\s]+$/.test(value); } -// Recursively replace any inline media with a placeholder, preserving roles, -// text, and tool calls. +// Replace anything isInlineMedia flags, keeping roles, text, and tool calls. function stripInlineMedia(node) { if (typeof node === "string") { return isInlineMedia(node) ? MEDIA_PLACEHOLDER : node; From 590981165bda1334398a140d33dde6d3f7f3b926 Mon Sep 17 00:00:00 2001 From: Ognjen Bostjancic Date: Tue, 30 Jun 2026 14:33:30 +0200 Subject: [PATCH 3/3] docs(ai): Reword media stripping intro to focus on user control Frame the section around scrubbing data before it's sent, matching the phrasing used elsewhere in the privacy docs, instead of calling out what the SDK does not do by default. Refs TET-2569 --- docs/ai/monitoring/agents/privacy.mdx | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) diff --git a/docs/ai/monitoring/agents/privacy.mdx b/docs/ai/monitoring/agents/privacy.mdx index 19907a40af99e..87a845dc9ed55 100644 --- a/docs/ai/monitoring/agents/privacy.mdx +++ b/docs/ai/monitoring/agents/privacy.mdx @@ -45,16 +45,14 @@ If you wish to enable Data Scrubbing for any of these fields you can add Organiz ## Stripping Images and Media from Messages AI messages can carry inline media: base64-encoded images, audio, or files -embedded directly in the input and output. This media can hold sensitive -information, so you might want to keep it out of Sentry while preserving the -rest of the conversation — roles, text, and tool calls. - -The Sentry SDK does not strip media by default. You can remove it yourself in -[`beforeSendSpan`](/platforms/javascript/configuration/options/#beforeSendSpan), -which runs on every span before it's sent. The example below walks each AI -message and replaces any inline media with a placeholder, leaving the -surrounding structure intact. Adjust `isInlineMedia` to match how your AI -provider embeds media. +embedded directly in the input and output. If this media might contain +sensitive information, you can scrub it before it's sent to Sentry while +keeping the rest of the conversation — roles, text, and tool calls — intact. + +Use [`beforeSendSpan`](/platforms/javascript/configuration/options/#beforeSendSpan), +which runs on every span before it's sent, to replace inline media with a +placeholder. The example below walks each AI message and scrubs the media it +finds. Adjust `isInlineMedia` to match how your AI provider embeds media. ```javascript const MEDIA_PLACEHOLDER = "[media stripped]";